summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_engines.c36
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_engines.h14
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_gt.c42
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_gt.h39
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_gt_pm.c601
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_gt_pm.h14
-rw-r--r--drivers/gpu/drm/i915/gt/intel_breadcrumbs.c66
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.c57
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.h74
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_types.h10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h250
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c281
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c242
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h23
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c108
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.h31
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pool.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h102
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gpu_commands.h29
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c295
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.h12
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_irq.c17
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.c156
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.h19
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_requests.c116
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_requests.h8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_types.h20
-rw-r--r--drivers/gpu/drm/i915/gt/intel_hangcheck.c361
-rw-r--r--drivers/gpu/drm/i915/gt/intel_llc.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c831
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.h34
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc_reg.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_mocs.c176
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.c114
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6_types.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_renderstate.c98
-rw-r--r--drivers/gpu/drm/i915/gt/intel_renderstate.h17
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c110
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.c318
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.h131
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_submission.c (renamed from drivers/gpu/drm/i915/gt/intel_ringbuffer.c)537
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_types.h51
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c1903
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.h39
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps_types.h93
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.c131
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline_types.h19
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c68
-rw-r--r--drivers/gpu/drm/i915/gt/mock_engine.c51
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_context.c123
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_cs.c360
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c374
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_pm.c7
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_gt_pm.c22
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_hangcheck.c206
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_llc.c7
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_lrc.c1070
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_mocs.c419
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rc6.c203
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rc6.h13
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_reset.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_timeline.c7
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_workarounds.c82
-rw-r--r--drivers/gpu/drm/i915/gt/selftests/mock_timeline.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftests/mock_timeline.h2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c223
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h48
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c24
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c309
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h52
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h3
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.c53
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.h4
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c736
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h54
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.c4
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c74
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c83
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h5
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc.c299
84 files changed, 8820 insertions, 3816 deletions
diff --git a/drivers/gpu/drm/i915/gt/debugfs_engines.c b/drivers/gpu/drm/i915/gt/debugfs_engines.c
new file mode 100644
index 000000000000..6a5e9ab20b94
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/debugfs_engines.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: MIT
+
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <drm/drm_print.h>
+
+#include "debugfs_engines.h"
+#include "debugfs_gt.h"
+#include "i915_drv.h" /* for_each_engine! */
+#include "intel_engine.h"
+
+static int engines_show(struct seq_file *m, void *data)
+{
+ struct intel_gt *gt = m->private;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct drm_printer p;
+
+ p = drm_seq_file_printer(m);
+ for_each_engine(engine, gt, id)
+ intel_engine_dump(engine, &p, "%s\n", engine->name);
+
+ return 0;
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(engines);
+
+void debugfs_engines_register(struct intel_gt *gt, struct dentry *root)
+{
+ static const struct debugfs_gt_file files[] = {
+ { "engines", &engines_fops },
+ };
+
+ debugfs_gt_register_files(gt, root, files, ARRAY_SIZE(files));
+}
diff --git a/drivers/gpu/drm/i915/gt/debugfs_engines.h b/drivers/gpu/drm/i915/gt/debugfs_engines.h
new file mode 100644
index 000000000000..f69257eaa1cc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/debugfs_engines.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef DEBUGFS_ENGINES_H
+#define DEBUGFS_ENGINES_H
+
+struct intel_gt;
+struct dentry;
+
+void debugfs_engines_register(struct intel_gt *gt, struct dentry *root);
+
+#endif /* DEBUGFS_ENGINES_H */
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.c b/drivers/gpu/drm/i915/gt/debugfs_gt.c
new file mode 100644
index 000000000000..75255aaacaed
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/debugfs_gt.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: MIT
+
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/debugfs.h>
+
+#include "debugfs_engines.h"
+#include "debugfs_gt.h"
+#include "debugfs_gt_pm.h"
+#include "i915_drv.h"
+
+void debugfs_gt_register(struct intel_gt *gt)
+{
+ struct dentry *root;
+
+ if (!gt->i915->drm.primary->debugfs_root)
+ return;
+
+ root = debugfs_create_dir("gt", gt->i915->drm.primary->debugfs_root);
+ if (IS_ERR(root))
+ return;
+
+ debugfs_engines_register(gt, root);
+ debugfs_gt_pm_register(gt, root);
+}
+
+void debugfs_gt_register_files(struct intel_gt *gt,
+ struct dentry *root,
+ const struct debugfs_gt_file *files,
+ unsigned long count)
+{
+ while (count--) {
+ if (!files->eval || files->eval(gt))
+ debugfs_create_file(files->name,
+ 0444, root, gt,
+ files->fops);
+
+ files++;
+ }
+}
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.h b/drivers/gpu/drm/i915/gt/debugfs_gt.h
new file mode 100644
index 000000000000..4ea0f06cda8f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/debugfs_gt.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef DEBUGFS_GT_H
+#define DEBUGFS_GT_H
+
+#include <linux/file.h>
+
+struct intel_gt;
+
+#define DEFINE_GT_DEBUGFS_ATTRIBUTE(__name) \
+ static int __name ## _open(struct inode *inode, struct file *file) \
+{ \
+ return single_open(file, __name ## _show, inode->i_private); \
+} \
+static const struct file_operations __name ## _fops = { \
+ .owner = THIS_MODULE, \
+ .open = __name ## _open, \
+ .read = seq_read, \
+ .llseek = seq_lseek, \
+ .release = single_release, \
+}
+
+void debugfs_gt_register(struct intel_gt *gt);
+
+struct debugfs_gt_file {
+ const char *name;
+ const struct file_operations *fops;
+ bool (*eval)(const struct intel_gt *gt);
+};
+
+void debugfs_gt_register_files(struct intel_gt *gt,
+ struct dentry *root,
+ const struct debugfs_gt_file *files,
+ unsigned long count);
+
+#endif /* DEBUGFS_GT_H */
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c
new file mode 100644
index 000000000000..059c9e5c002e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c
@@ -0,0 +1,601 @@
+// SPDX-License-Identifier: MIT
+
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/seq_file.h>
+
+#include "debugfs_gt.h"
+#include "debugfs_gt_pm.h"
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_llc.h"
+#include "intel_rc6.h"
+#include "intel_rps.h"
+#include "intel_runtime_pm.h"
+#include "intel_sideband.h"
+#include "intel_uncore.h"
+
+static int fw_domains_show(struct seq_file *m, void *data)
+{
+ struct intel_gt *gt = m->private;
+ struct intel_uncore *uncore = gt->uncore;
+ struct intel_uncore_forcewake_domain *fw_domain;
+ unsigned int tmp;
+
+ seq_printf(m, "user.bypass_count = %u\n",
+ uncore->user_forcewake_count);
+
+ for_each_fw_domain(fw_domain, uncore, tmp)
+ seq_printf(m, "%s.wake_count = %u\n",
+ intel_uncore_forcewake_domain_to_str(fw_domain->id),
+ READ_ONCE(fw_domain->wake_count));
+
+ return 0;
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(fw_domains);
+
+static void print_rc6_res(struct seq_file *m,
+ const char *title,
+ const i915_reg_t reg)
+{
+ struct intel_gt *gt = m->private;
+ intel_wakeref_t wakeref;
+
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+ seq_printf(m, "%s %u (%llu us)\n", title,
+ intel_uncore_read(gt->uncore, reg),
+ intel_rc6_residency_us(&gt->rc6, reg));
+}
+
+static int vlv_drpc(struct seq_file *m)
+{
+ struct intel_gt *gt = m->private;
+ struct intel_uncore *uncore = gt->uncore;
+ u32 rcctl1, pw_status;
+
+ pw_status = intel_uncore_read(uncore, VLV_GTLC_PW_STATUS);
+ rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL);
+
+ seq_printf(m, "RC6 Enabled: %s\n",
+ yesno(rcctl1 & (GEN7_RC_CTL_TO_MODE |
+ GEN6_RC_CTL_EI_MODE(1))));
+ seq_printf(m, "Render Power Well: %s\n",
+ (pw_status & VLV_GTLC_PW_RENDER_STATUS_MASK) ? "Up" : "Down");
+ seq_printf(m, "Media Power Well: %s\n",
+ (pw_status & VLV_GTLC_PW_MEDIA_STATUS_MASK) ? "Up" : "Down");
+
+ print_rc6_res(m, "Render RC6 residency since boot:", VLV_GT_RENDER_RC6);
+ print_rc6_res(m, "Media RC6 residency since boot:", VLV_GT_MEDIA_RC6);
+
+ return fw_domains_show(m, NULL);
+}
+
+static int gen6_drpc(struct seq_file *m)
+{
+ struct intel_gt *gt = m->private;
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
+ u32 gt_core_status, rcctl1, rc6vids = 0;
+ u32 gen9_powergate_enable = 0, gen9_powergate_status = 0;
+
+ gt_core_status = intel_uncore_read_fw(uncore, GEN6_GT_CORE_STATUS);
+
+ rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL);
+ if (INTEL_GEN(i915) >= 9) {
+ gen9_powergate_enable =
+ intel_uncore_read(uncore, GEN9_PG_ENABLE);
+ gen9_powergate_status =
+ intel_uncore_read(uncore, GEN9_PWRGT_DOMAIN_STATUS);
+ }
+
+ if (INTEL_GEN(i915) <= 7)
+ sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS,
+ &rc6vids, NULL);
+
+ seq_printf(m, "RC1e Enabled: %s\n",
+ yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE));
+ seq_printf(m, "RC6 Enabled: %s\n",
+ yesno(rcctl1 & GEN6_RC_CTL_RC6_ENABLE));
+ if (INTEL_GEN(i915) >= 9) {
+ seq_printf(m, "Render Well Gating Enabled: %s\n",
+ yesno(gen9_powergate_enable & GEN9_RENDER_PG_ENABLE));
+ seq_printf(m, "Media Well Gating Enabled: %s\n",
+ yesno(gen9_powergate_enable & GEN9_MEDIA_PG_ENABLE));
+ }
+ seq_printf(m, "Deep RC6 Enabled: %s\n",
+ yesno(rcctl1 & GEN6_RC_CTL_RC6p_ENABLE));
+ seq_printf(m, "Deepest RC6 Enabled: %s\n",
+ yesno(rcctl1 & GEN6_RC_CTL_RC6pp_ENABLE));
+ seq_puts(m, "Current RC state: ");
+ switch (gt_core_status & GEN6_RCn_MASK) {
+ case GEN6_RC0:
+ if (gt_core_status & GEN6_CORE_CPD_STATE_MASK)
+ seq_puts(m, "Core Power Down\n");
+ else
+ seq_puts(m, "on\n");
+ break;
+ case GEN6_RC3:
+ seq_puts(m, "RC3\n");
+ break;
+ case GEN6_RC6:
+ seq_puts(m, "RC6\n");
+ break;
+ case GEN6_RC7:
+ seq_puts(m, "RC7\n");
+ break;
+ default:
+ seq_puts(m, "Unknown\n");
+ break;
+ }
+
+ seq_printf(m, "Core Power Down: %s\n",
+ yesno(gt_core_status & GEN6_CORE_CPD_STATE_MASK));
+ if (INTEL_GEN(i915) >= 9) {
+ seq_printf(m, "Render Power Well: %s\n",
+ (gen9_powergate_status &
+ GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down");
+ seq_printf(m, "Media Power Well: %s\n",
+ (gen9_powergate_status &
+ GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down");
+ }
+
+ /* Not exactly sure what this is */
+ print_rc6_res(m, "RC6 \"Locked to RPn\" residency since boot:",
+ GEN6_GT_GFX_RC6_LOCKED);
+ print_rc6_res(m, "RC6 residency since boot:", GEN6_GT_GFX_RC6);
+ print_rc6_res(m, "RC6+ residency since boot:", GEN6_GT_GFX_RC6p);
+ print_rc6_res(m, "RC6++ residency since boot:", GEN6_GT_GFX_RC6pp);
+
+ if (INTEL_GEN(i915) <= 7) {
+ seq_printf(m, "RC6 voltage: %dmV\n",
+ GEN6_DECODE_RC6_VID(((rc6vids >> 0) & 0xff)));
+ seq_printf(m, "RC6+ voltage: %dmV\n",
+ GEN6_DECODE_RC6_VID(((rc6vids >> 8) & 0xff)));
+ seq_printf(m, "RC6++ voltage: %dmV\n",
+ GEN6_DECODE_RC6_VID(((rc6vids >> 16) & 0xff)));
+ }
+
+ return fw_domains_show(m, NULL);
+}
+
+static int ilk_drpc(struct seq_file *m)
+{
+ struct intel_gt *gt = m->private;
+ struct intel_uncore *uncore = gt->uncore;
+ u32 rgvmodectl, rstdbyctl;
+ u16 crstandvid;
+
+ rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
+ rstdbyctl = intel_uncore_read(uncore, RSTDBYCTL);
+ crstandvid = intel_uncore_read16(uncore, CRSTANDVID);
+
+ seq_printf(m, "HD boost: %s\n", yesno(rgvmodectl & MEMMODE_BOOST_EN));
+ seq_printf(m, "Boost freq: %d\n",
+ (rgvmodectl & MEMMODE_BOOST_FREQ_MASK) >>
+ MEMMODE_BOOST_FREQ_SHIFT);
+ seq_printf(m, "HW control enabled: %s\n",
+ yesno(rgvmodectl & MEMMODE_HWIDLE_EN));
+ seq_printf(m, "SW control enabled: %s\n",
+ yesno(rgvmodectl & MEMMODE_SWMODE_EN));
+ seq_printf(m, "Gated voltage change: %s\n",
+ yesno(rgvmodectl & MEMMODE_RCLK_GATE));
+ seq_printf(m, "Starting frequency: P%d\n",
+ (rgvmodectl & MEMMODE_FSTART_MASK) >> MEMMODE_FSTART_SHIFT);
+ seq_printf(m, "Max P-state: P%d\n",
+ (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT);
+ seq_printf(m, "Min P-state: P%d\n", (rgvmodectl & MEMMODE_FMIN_MASK));
+ seq_printf(m, "RS1 VID: %d\n", (crstandvid & 0x3f));
+ seq_printf(m, "RS2 VID: %d\n", ((crstandvid >> 8) & 0x3f));
+ seq_printf(m, "Render standby enabled: %s\n",
+ yesno(!(rstdbyctl & RCX_SW_EXIT)));
+ seq_puts(m, "Current RS state: ");
+ switch (rstdbyctl & RSX_STATUS_MASK) {
+ case RSX_STATUS_ON:
+ seq_puts(m, "on\n");
+ break;
+ case RSX_STATUS_RC1:
+ seq_puts(m, "RC1\n");
+ break;
+ case RSX_STATUS_RC1E:
+ seq_puts(m, "RC1E\n");
+ break;
+ case RSX_STATUS_RS1:
+ seq_puts(m, "RS1\n");
+ break;
+ case RSX_STATUS_RS2:
+ seq_puts(m, "RS2 (RC6)\n");
+ break;
+ case RSX_STATUS_RS3:
+ seq_puts(m, "RC3 (RC6+)\n");
+ break;
+ default:
+ seq_puts(m, "unknown\n");
+ break;
+ }
+
+ return 0;
+}
+
+static int drpc_show(struct seq_file *m, void *unused)
+{
+ struct intel_gt *gt = m->private;
+ struct drm_i915_private *i915 = gt->i915;
+ intel_wakeref_t wakeref;
+ int err = -ENODEV;
+
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
+ if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+ err = vlv_drpc(m);
+ else if (INTEL_GEN(i915) >= 6)
+ err = gen6_drpc(m);
+ else
+ err = ilk_drpc(m);
+ }
+
+ return err;
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(drpc);
+
+static int frequency_show(struct seq_file *m, void *unused)
+{
+ struct intel_gt *gt = m->private;
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
+ struct intel_rps *rps = &gt->rps;
+ intel_wakeref_t wakeref;
+
+ wakeref = intel_runtime_pm_get(uncore->rpm);
+
+ if (IS_GEN(i915, 5)) {
+ u16 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
+ u16 rgvstat = intel_uncore_read16(uncore, MEMSTAT_ILK);
+
+ seq_printf(m, "Requested P-state: %d\n", (rgvswctl >> 8) & 0xf);
+ seq_printf(m, "Requested VID: %d\n", rgvswctl & 0x3f);
+ seq_printf(m, "Current VID: %d\n", (rgvstat & MEMSTAT_VID_MASK) >>
+ MEMSTAT_VID_SHIFT);
+ seq_printf(m, "Current P-state: %d\n",
+ (rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT);
+ } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
+ u32 rpmodectl, freq_sts;
+
+ rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL);
+ seq_printf(m, "Video Turbo Mode: %s\n",
+ yesno(rpmodectl & GEN6_RP_MEDIA_TURBO));
+ seq_printf(m, "HW control enabled: %s\n",
+ yesno(rpmodectl & GEN6_RP_ENABLE));
+ seq_printf(m, "SW control enabled: %s\n",
+ yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) ==
+ GEN6_RP_MEDIA_SW_MODE));
+
+ vlv_punit_get(i915);
+ freq_sts = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
+ vlv_punit_put(i915);
+
+ seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts);
+ seq_printf(m, "DDR freq: %d MHz\n", i915->mem_freq);
+
+ seq_printf(m, "actual GPU freq: %d MHz\n",
+ intel_gpu_freq(rps, (freq_sts >> 8) & 0xff));
+
+ seq_printf(m, "current GPU freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->cur_freq));
+
+ seq_printf(m, "max GPU freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->max_freq));
+
+ seq_printf(m, "min GPU freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->min_freq));
+
+ seq_printf(m, "idle GPU freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->idle_freq));
+
+ seq_printf(m, "efficient (RPe) frequency: %d MHz\n",
+ intel_gpu_freq(rps, rps->efficient_freq));
+ } else if (INTEL_GEN(i915) >= 6) {
+ u32 rp_state_limits;
+ u32 gt_perf_status;
+ u32 rp_state_cap;
+ u32 rpmodectl, rpinclimit, rpdeclimit;
+ u32 rpstat, cagf, reqf;
+ u32 rpupei, rpcurup, rpprevup;
+ u32 rpdownei, rpcurdown, rpprevdown;
+ u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask;
+ int max_freq;
+
+ rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS);
+ if (IS_GEN9_LP(i915)) {
+ rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP);
+ gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS);
+ } else {
+ rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
+ gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS);
+ }
+
+ /* RPSTAT1 is in the GT power well */
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+ reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ);
+ if (INTEL_GEN(i915) >= 9) {
+ reqf >>= 23;
+ } else {
+ reqf &= ~GEN6_TURBO_DISABLE;
+ if (IS_HASWELL(i915) || IS_BROADWELL(i915))
+ reqf >>= 24;
+ else
+ reqf >>= 25;
+ }
+ reqf = intel_gpu_freq(rps, reqf);
+
+ rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL);
+ rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
+ rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
+
+ rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1);
+ rpupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK;
+ rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK;
+ rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK;
+ rpdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK;
+ rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK;
+ rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK;
+ cagf = intel_rps_read_actual_frequency(rps);
+
+ intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+
+ if (INTEL_GEN(i915) >= 11) {
+ pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE);
+ pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK);
+ /*
+ * The equivalent to the PM ISR & IIR cannot be read
+ * without affecting the current state of the system
+ */
+ pm_isr = 0;
+ pm_iir = 0;
+ } else if (INTEL_GEN(i915) >= 8) {
+ pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2));
+ pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2));
+ pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2));
+ pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2));
+ } else {
+ pm_ier = intel_uncore_read(uncore, GEN6_PMIER);
+ pm_imr = intel_uncore_read(uncore, GEN6_PMIMR);
+ pm_isr = intel_uncore_read(uncore, GEN6_PMISR);
+ pm_iir = intel_uncore_read(uncore, GEN6_PMIIR);
+ }
+ pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK);
+
+ seq_printf(m, "Video Turbo Mode: %s\n",
+ yesno(rpmodectl & GEN6_RP_MEDIA_TURBO));
+ seq_printf(m, "HW control enabled: %s\n",
+ yesno(rpmodectl & GEN6_RP_ENABLE));
+ seq_printf(m, "SW control enabled: %s\n",
+ yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) ==
+ GEN6_RP_MEDIA_SW_MODE));
+
+ seq_printf(m, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n",
+ pm_ier, pm_imr, pm_mask);
+ if (INTEL_GEN(i915) <= 10)
+ seq_printf(m, "PM ISR=0x%08x IIR=0x%08x\n",
+ pm_isr, pm_iir);
+ seq_printf(m, "pm_intrmsk_mbz: 0x%08x\n",
+ rps->pm_intrmsk_mbz);
+ seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status);
+ seq_printf(m, "Render p-state ratio: %d\n",
+ (gt_perf_status & (INTEL_GEN(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8);
+ seq_printf(m, "Render p-state VID: %d\n",
+ gt_perf_status & 0xff);
+ seq_printf(m, "Render p-state limit: %d\n",
+ rp_state_limits & 0xff);
+ seq_printf(m, "RPSTAT1: 0x%08x\n", rpstat);
+ seq_printf(m, "RPMODECTL: 0x%08x\n", rpmodectl);
+ seq_printf(m, "RPINCLIMIT: 0x%08x\n", rpinclimit);
+ seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
+ seq_printf(m, "RPNSWREQ: %dMHz\n", reqf);
+ seq_printf(m, "CAGF: %dMHz\n", cagf);
+ seq_printf(m, "RP CUR UP EI: %d (%dus)\n",
+ rpupei, GT_PM_INTERVAL_TO_US(i915, rpupei));
+ seq_printf(m, "RP CUR UP: %d (%dus)\n",
+ rpcurup, GT_PM_INTERVAL_TO_US(i915, rpcurup));
+ seq_printf(m, "RP PREV UP: %d (%dus)\n",
+ rpprevup, GT_PM_INTERVAL_TO_US(i915, rpprevup));
+ seq_printf(m, "Up threshold: %d%%\n",
+ rps->power.up_threshold);
+
+ seq_printf(m, "RP CUR DOWN EI: %d (%dus)\n",
+ rpdownei, GT_PM_INTERVAL_TO_US(i915, rpdownei));
+ seq_printf(m, "RP CUR DOWN: %d (%dus)\n",
+ rpcurdown, GT_PM_INTERVAL_TO_US(i915, rpcurdown));
+ seq_printf(m, "RP PREV DOWN: %d (%dus)\n",
+ rpprevdown, GT_PM_INTERVAL_TO_US(i915, rpprevdown));
+ seq_printf(m, "Down threshold: %d%%\n",
+ rps->power.down_threshold);
+
+ max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 0 :
+ rp_state_cap >> 16) & 0xff;
+ max_freq *= (IS_GEN9_BC(i915) ||
+ INTEL_GEN(i915) >= 10 ? GEN9_FREQ_SCALER : 1);
+ seq_printf(m, "Lowest (RPN) frequency: %dMHz\n",
+ intel_gpu_freq(rps, max_freq));
+
+ max_freq = (rp_state_cap & 0xff00) >> 8;
+ max_freq *= (IS_GEN9_BC(i915) ||
+ INTEL_GEN(i915) >= 10 ? GEN9_FREQ_SCALER : 1);
+ seq_printf(m, "Nominal (RP1) frequency: %dMHz\n",
+ intel_gpu_freq(rps, max_freq));
+
+ max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 16 :
+ rp_state_cap >> 0) & 0xff;
+ max_freq *= (IS_GEN9_BC(i915) ||
+ INTEL_GEN(i915) >= 10 ? GEN9_FREQ_SCALER : 1);
+ seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
+ intel_gpu_freq(rps, max_freq));
+ seq_printf(m, "Max overclocked frequency: %dMHz\n",
+ intel_gpu_freq(rps, rps->max_freq));
+
+ seq_printf(m, "Current freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->cur_freq));
+ seq_printf(m, "Actual freq: %d MHz\n", cagf);
+ seq_printf(m, "Idle freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->idle_freq));
+ seq_printf(m, "Min freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->min_freq));
+ seq_printf(m, "Boost freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->boost_freq));
+ seq_printf(m, "Max freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->max_freq));
+ seq_printf(m,
+ "efficient (RPe) frequency: %d MHz\n",
+ intel_gpu_freq(rps, rps->efficient_freq));
+ } else {
+ seq_puts(m, "no P-state info available\n");
+ }
+
+ seq_printf(m, "Current CD clock frequency: %d kHz\n", i915->cdclk.hw.cdclk);
+ seq_printf(m, "Max CD clock frequency: %d kHz\n", i915->max_cdclk_freq);
+ seq_printf(m, "Max pixel clock frequency: %d kHz\n", i915->max_dotclk_freq);
+
+ intel_runtime_pm_put(uncore->rpm, wakeref);
+
+ return 0;
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(frequency);
+
+static int llc_show(struct seq_file *m, void *data)
+{
+ struct intel_gt *gt = m->private;
+ struct drm_i915_private *i915 = gt->i915;
+ const bool edram = INTEL_GEN(i915) > 8;
+ struct intel_rps *rps = &gt->rps;
+ unsigned int max_gpu_freq, min_gpu_freq;
+ intel_wakeref_t wakeref;
+ int gpu_freq, ia_freq;
+
+ seq_printf(m, "LLC: %s\n", yesno(HAS_LLC(i915)));
+ seq_printf(m, "%s: %uMB\n", edram ? "eDRAM" : "eLLC",
+ i915->edram_size_mb);
+
+ min_gpu_freq = rps->min_freq;
+ max_gpu_freq = rps->max_freq;
+ if (IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) {
+ /* Convert GT frequency to 50 HZ units */
+ min_gpu_freq /= GEN9_FREQ_SCALER;
+ max_gpu_freq /= GEN9_FREQ_SCALER;
+ }
+
+ seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n");
+
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
+ for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
+ ia_freq = gpu_freq;
+ sandybridge_pcode_read(i915,
+ GEN6_PCODE_READ_MIN_FREQ_TABLE,
+ &ia_freq, NULL);
+ seq_printf(m, "%d\t\t%d\t\t\t\t%d\n",
+ intel_gpu_freq(rps,
+ (gpu_freq *
+ (IS_GEN9_BC(i915) ||
+ INTEL_GEN(i915) >= 10 ?
+ GEN9_FREQ_SCALER : 1))),
+ ((ia_freq >> 0) & 0xff) * 100,
+ ((ia_freq >> 8) & 0xff) * 100);
+ }
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+
+ return 0;
+}
+
+static bool llc_eval(const struct intel_gt *gt)
+{
+ return HAS_LLC(gt->i915);
+}
+
+DEFINE_GT_DEBUGFS_ATTRIBUTE(llc);
+
+static const char *rps_power_to_str(unsigned int power)
+{
+ static const char * const strings[] = {
+ [LOW_POWER] = "low power",
+ [BETWEEN] = "mixed",
+ [HIGH_POWER] = "high power",
+ };
+
+ if (power >= ARRAY_SIZE(strings) || !strings[power])
+ return "unknown";
+
+ return strings[power];
+}
+
+static int rps_boost_show(struct seq_file *m, void *data)
+{
+ struct intel_gt *gt = m->private;
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_rps *rps = &gt->rps;
+
+ seq_printf(m, "RPS enabled? %d\n", rps->enabled);
+ seq_printf(m, "GPU busy? %s\n", yesno(gt->awake));
+ seq_printf(m, "Boosts outstanding? %d\n",
+ atomic_read(&rps->num_waiters));
+ seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive));
+ seq_printf(m, "Frequency requested %d, actual %d\n",
+ intel_gpu_freq(rps, rps->cur_freq),
+ intel_rps_read_actual_frequency(rps));
+ seq_printf(m, " min hard:%d, soft:%d; max soft:%d, hard:%d\n",
+ intel_gpu_freq(rps, rps->min_freq),
+ intel_gpu_freq(rps, rps->min_freq_softlimit),
+ intel_gpu_freq(rps, rps->max_freq_softlimit),
+ intel_gpu_freq(rps, rps->max_freq));
+ seq_printf(m, " idle:%d, efficient:%d, boost:%d\n",
+ intel_gpu_freq(rps, rps->idle_freq),
+ intel_gpu_freq(rps, rps->efficient_freq),
+ intel_gpu_freq(rps, rps->boost_freq));
+
+ seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts));
+
+ if (INTEL_GEN(i915) >= 6 && rps->enabled && gt->awake) {
+ struct intel_uncore *uncore = gt->uncore;
+ u32 rpup, rpupei;
+ u32 rpdown, rpdownei;
+
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+ rpup = intel_uncore_read_fw(uncore, GEN6_RP_CUR_UP) & GEN6_RP_EI_MASK;
+ rpupei = intel_uncore_read_fw(uncore, GEN6_RP_CUR_UP_EI) & GEN6_RP_EI_MASK;
+ rpdown = intel_uncore_read_fw(uncore, GEN6_RP_CUR_DOWN) & GEN6_RP_EI_MASK;
+ rpdownei = intel_uncore_read_fw(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_RP_EI_MASK;
+ intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+
+ seq_printf(m, "\nRPS Autotuning (current \"%s\" window):\n",
+ rps_power_to_str(rps->power.mode));
+ seq_printf(m, " Avg. up: %d%% [above threshold? %d%%]\n",
+ rpup && rpupei ? 100 * rpup / rpupei : 0,
+ rps->power.up_threshold);
+ seq_printf(m, " Avg. down: %d%% [below threshold? %d%%]\n",
+ rpdown && rpdownei ? 100 * rpdown / rpdownei : 0,
+ rps->power.down_threshold);
+ } else {
+ seq_puts(m, "\nRPS Autotuning inactive\n");
+ }
+
+ return 0;
+}
+
+static bool rps_eval(const struct intel_gt *gt)
+{
+ return HAS_RPS(gt->i915);
+}
+
+DEFINE_GT_DEBUGFS_ATTRIBUTE(rps_boost);
+
+void debugfs_gt_pm_register(struct intel_gt *gt, struct dentry *root)
+{
+ static const struct debugfs_gt_file files[] = {
+ { "drpc", &drpc_fops, NULL },
+ { "frequency", &frequency_fops, NULL },
+ { "forcewake", &fw_domains_fops, NULL },
+ { "llc", &llc_fops, llc_eval },
+ { "rps_boost", &rps_boost_fops, rps_eval },
+ };
+
+ debugfs_gt_register_files(gt, root, files, ARRAY_SIZE(files));
+}
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h
new file mode 100644
index 000000000000..4cf5f5c9da7d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef DEBUGFS_GT_PM_H
+#define DEBUGFS_GT_PM_H
+
+struct intel_gt;
+struct dentry;
+
+void debugfs_gt_pm_register(struct intel_gt *gt, struct dentry *root);
+
+#endif /* DEBUGFS_GT_PM_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 55317081d48b..0ba524a414c6 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -28,6 +28,8 @@
#include "i915_drv.h"
#include "i915_trace.h"
+#include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
static void irq_enable(struct intel_engine_cs *engine)
{
@@ -53,15 +55,17 @@ static void irq_disable(struct intel_engine_cs *engine)
static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
{
+ struct intel_engine_cs *engine =
+ container_of(b, struct intel_engine_cs, breadcrumbs);
+
lockdep_assert_held(&b->irq_lock);
GEM_BUG_ON(!b->irq_enabled);
if (!--b->irq_enabled)
- irq_disable(container_of(b,
- struct intel_engine_cs,
- breadcrumbs));
+ irq_disable(engine);
b->irq_armed = false;
+ intel_gt_pm_put_async(engine->gt);
}
void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
@@ -127,16 +131,23 @@ __dma_fence_signal__notify(struct dma_fence *fence,
}
}
-void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
+static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
+ struct intel_engine_cs *engine =
+ container_of(b, struct intel_engine_cs, breadcrumbs);
+
+ intel_engine_add_retire(engine, tl);
+}
+
+static void signal_irq_work(struct irq_work *work)
+{
+ struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
const ktime_t timestamp = ktime_get();
struct intel_context *ce, *cn;
struct list_head *pos, *next;
- unsigned long flags;
LIST_HEAD(signal);
- spin_lock_irqsave(&b->irq_lock, flags);
+ spin_lock(&b->irq_lock);
if (b->irq_armed && list_empty(&b->signalers))
__intel_breadcrumbs_disarm_irq(b);
@@ -177,44 +188,41 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
if (!list_is_first(pos, &ce->signals)) {
/* Advance the list to the first incomplete request */
__list_del_many(&ce->signals, pos);
- if (&ce->signals == pos) /* now empty */
+ if (&ce->signals == pos) { /* now empty */
list_del_init(&ce->signal_link);
+ add_retire(b, ce->timeline);
+ }
}
}
- spin_unlock_irqrestore(&b->irq_lock, flags);
+ spin_unlock(&b->irq_lock);
list_for_each_safe(pos, next, &signal) {
struct i915_request *rq =
list_entry(pos, typeof(*rq), signal_link);
struct list_head cb_list;
- spin_lock_irqsave(&rq->lock, flags);
+ spin_lock(&rq->lock);
list_replace(&rq->fence.cb_list, &cb_list);
__dma_fence_signal__timestamp(&rq->fence, timestamp);
__dma_fence_signal__notify(&rq->fence, &cb_list);
- spin_unlock_irqrestore(&rq->lock, flags);
+ spin_unlock(&rq->lock);
i915_request_put(rq);
}
}
-static void signal_irq_work(struct irq_work *work)
-{
- struct intel_engine_cs *engine =
- container_of(work, typeof(*engine), breadcrumbs.irq_work);
-
- intel_engine_breadcrumbs_irq(engine);
-}
-
-static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
+static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
{
struct intel_engine_cs *engine =
container_of(b, struct intel_engine_cs, breadcrumbs);
lockdep_assert_held(&b->irq_lock);
if (b->irq_armed)
- return;
+ return true;
+
+ if (!intel_gt_pm_get_if_awake(engine->gt))
+ return false;
/*
* The breadcrumb irq will be disarmed on the interrupt after the
@@ -234,6 +242,8 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
if (!b->irq_enabled++)
irq_enable(engine);
+
+ return true;
}
void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
@@ -271,19 +281,20 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
- struct intel_context *ce = rq->hw_context;
+ struct intel_context *ce = rq->context;
struct list_head *pos;
spin_lock(&b->irq_lock);
GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
- __intel_breadcrumbs_arm_irq(b);
+ if (!__intel_breadcrumbs_arm_irq(b))
+ goto unlock;
/*
* We keep the seqno in retirement order, so we can break
- * inside intel_engine_breadcrumbs_irq as soon as we've passed
- * the last completed request (or seen a request that hasn't
- * event started). We could iterate the timeline->requests list,
+ * inside intel_engine_signal_breadcrumbs as soon as we've
+ * passed the last completed request (or seen a request that
+ * hasn't event started). We could walk the timeline->requests,
* but keeping a separate signalers_list has the advantage of
* hopefully being much smaller than the full list and so
* provides faster iteration and detection when there are no
@@ -306,6 +317,7 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
GEM_BUG_ON(!check_signal_order(ce, rq));
set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+unlock:
spin_unlock(&b->irq_lock);
}
@@ -326,7 +338,7 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
*/
spin_lock(&b->irq_lock);
if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
- struct intel_context *ce = rq->hw_context;
+ struct intel_context *ce = rq->context;
list_del(&rq->signal_link);
if (list_empty(&ce->signals))
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 59c3083c1ec1..fbaa9df6f436 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -13,6 +13,7 @@
#include "intel_context.h"
#include "intel_engine.h"
#include "intel_engine_pm.h"
+#include "intel_ring.h"
static struct i915_global_context {
struct i915_global base;
@@ -30,8 +31,7 @@ void intel_context_free(struct intel_context *ce)
}
struct intel_context *
-intel_context_create(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine)
+intel_context_create(struct intel_engine_cs *engine)
{
struct intel_context *ce;
@@ -39,7 +39,7 @@ intel_context_create(struct i915_gem_context *ctx,
if (!ce)
return ERR_PTR(-ENOMEM);
- intel_context_init(ce, ctx, engine);
+ intel_context_init(ce, engine);
return ce;
}
@@ -67,11 +67,8 @@ int __intel_context_do_pin(struct intel_context *ce)
if (err)
goto err;
- GEM_TRACE("%s context:%llx pin ring:{head:%04x, tail:%04x}\n",
- ce->engine->name, ce->timeline->fence_context,
- ce->ring->head, ce->ring->tail);
-
- i915_gem_context_get(ce->gem_context); /* for ctx->ppgtt */
+ CE_TRACE(ce, "pin ring:{head:%04x, tail:%04x}\n",
+ ce->ring->head, ce->ring->tail);
smp_mb__before_atomic(); /* flush pin before it is visible */
}
@@ -97,12 +94,10 @@ void intel_context_unpin(struct intel_context *ce)
mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING);
if (likely(atomic_dec_and_test(&ce->pin_count))) {
- GEM_TRACE("%s context:%llx retire\n",
- ce->engine->name, ce->timeline->fence_context);
+ CE_TRACE(ce, "retire\n");
ce->ops->unpin(ce);
- i915_gem_context_put(ce->gem_context);
intel_context_active_release(ce);
}
@@ -112,13 +107,10 @@ void intel_context_unpin(struct intel_context *ce)
static int __context_pin_state(struct i915_vma *vma)
{
- u64 flags;
+ unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
int err;
- flags = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
- flags |= PIN_HIGH | PIN_GLOBAL;
-
- err = i915_vma_pin(vma, 0, 0, flags);
+ err = i915_ggtt_pin(vma, 0, bias | PIN_HIGH);
if (err)
return err;
@@ -143,9 +135,9 @@ static void __intel_context_retire(struct i915_active *active)
{
struct intel_context *ce = container_of(active, typeof(*ce), active);
- GEM_TRACE("%s context:%llx retire\n",
- ce->engine->name, ce->timeline->fence_context);
+ CE_TRACE(ce, "retire\n");
+ set_bit(CONTEXT_VALID_BIT, &ce->flags);
if (ce->state)
__context_unpin_state(ce->state);
@@ -197,7 +189,7 @@ int intel_context_active_acquire(struct intel_context *ce)
return err;
/* Preallocate tracking nodes */
- if (!i915_gem_context_is_kernel(ce->gem_context)) {
+ if (!intel_context_is_barrier(ce)) {
err = i915_active_acquire_preallocate_barrier(&ce->active,
ce->engine);
if (err) {
@@ -218,30 +210,19 @@ void intel_context_active_release(struct intel_context *ce)
void
intel_context_init(struct intel_context *ce,
- struct i915_gem_context *ctx,
struct intel_engine_cs *engine)
{
- struct i915_address_space *vm;
-
GEM_BUG_ON(!engine->cops);
+ GEM_BUG_ON(!engine->gt->vm);
kref_init(&ce->ref);
- ce->gem_context = ctx;
- rcu_read_lock();
- vm = rcu_dereference(ctx->vm);
- if (vm)
- ce->vm = i915_vm_get(vm);
- else
- ce->vm = i915_vm_get(&engine->gt->ggtt->vm);
- rcu_read_unlock();
- if (ctx->timeline)
- ce->timeline = intel_timeline_get(ctx->timeline);
-
ce->engine = engine;
ce->ops = engine->cops;
ce->sseu = engine->sseu;
- ce->ring = __intel_context_ring_size(SZ_16K);
+ ce->ring = __intel_context_ring_size(SZ_4K);
+
+ ce->vm = i915_vm_get(engine->gt->vm);
INIT_LIST_HEAD(&ce->signal_link);
INIT_LIST_HEAD(&ce->signals);
@@ -306,17 +287,11 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
int err;
/* Only suitable for use in remotely modifying this context */
- GEM_BUG_ON(rq->hw_context == ce);
+ GEM_BUG_ON(rq->context == ce);
if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */
- err = mutex_lock_interruptible_nested(&tl->mutex,
- SINGLE_DEPTH_NESTING);
- if (err)
- return err;
-
/* Queue this switch after current activity by this context. */
err = i915_active_fence_set(&tl->last_request, rq);
- mutex_unlock(&tl->mutex);
if (err)
return err;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index dd742ac2fbdb..1d4a1b1357cf 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -7,21 +7,29 @@
#ifndef __INTEL_CONTEXT_H__
#define __INTEL_CONTEXT_H__
+#include <linux/bitops.h>
#include <linux/lockdep.h>
+#include <linux/types.h>
#include "i915_active.h"
#include "intel_context_types.h"
#include "intel_engine_types.h"
+#include "intel_ring_types.h"
#include "intel_timeline_types.h"
+#define CE_TRACE(ce, fmt, ...) do { \
+ const struct intel_context *ce__ = (ce); \
+ ENGINE_TRACE(ce__->engine, "context:%llx" fmt, \
+ ce__->timeline->fence_context, \
+ ##__VA_ARGS__); \
+} while (0)
+
void intel_context_init(struct intel_context *ce,
- struct i915_gem_context *ctx,
struct intel_engine_cs *engine);
void intel_context_fini(struct intel_context *ce);
struct intel_context *
-intel_context_create(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine);
+intel_context_create(struct intel_engine_cs *engine);
void intel_context_free(struct intel_context *ce);
@@ -152,4 +160,64 @@ static inline struct intel_ring *__intel_context_ring_size(u64 sz)
return u64_to_ptr(struct intel_ring, sz);
}
+static inline bool intel_context_is_barrier(const struct intel_context *ce)
+{
+ return test_bit(CONTEXT_BARRIER_BIT, &ce->flags);
+}
+
+static inline bool intel_context_use_semaphores(const struct intel_context *ce)
+{
+ return test_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
+}
+
+static inline void intel_context_set_use_semaphores(struct intel_context *ce)
+{
+ set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
+}
+
+static inline void intel_context_clear_use_semaphores(struct intel_context *ce)
+{
+ clear_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
+}
+
+static inline bool intel_context_is_banned(const struct intel_context *ce)
+{
+ return test_bit(CONTEXT_BANNED, &ce->flags);
+}
+
+static inline bool intel_context_set_banned(struct intel_context *ce)
+{
+ return test_and_set_bit(CONTEXT_BANNED, &ce->flags);
+}
+
+static inline bool
+intel_context_force_single_submission(const struct intel_context *ce)
+{
+ return test_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ce->flags);
+}
+
+static inline void
+intel_context_set_single_submission(struct intel_context *ce)
+{
+ __set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ce->flags);
+}
+
+static inline bool
+intel_context_nopreempt(const struct intel_context *ce)
+{
+ return test_bit(CONTEXT_NOPREEMPT, &ce->flags);
+}
+
+static inline void
+intel_context_set_nopreempt(struct intel_context *ce)
+{
+ set_bit(CONTEXT_NOPREEMPT, &ce->flags);
+}
+
+static inline void
+intel_context_clear_nopreempt(struct intel_context *ce)
+{
+ clear_bit(CONTEXT_NOPREEMPT, &ce->flags);
+}
+
#endif /* __INTEL_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 6959b05ae5f8..9527a659546c 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -44,7 +44,7 @@ struct intel_context {
#define intel_context_inflight_count(ce) ptr_unmask_bits((ce)->inflight, 2)
struct i915_address_space *vm;
- struct i915_gem_context *gem_context;
+ struct i915_gem_context __rcu *gem_context;
struct list_head signal_link;
struct list_head signals;
@@ -54,7 +54,13 @@ struct intel_context {
struct intel_timeline *timeline;
unsigned long flags;
-#define CONTEXT_ALLOC_BIT 0
+#define CONTEXT_BARRIER_BIT 0
+#define CONTEXT_ALLOC_BIT 1
+#define CONTEXT_VALID_BIT 2
+#define CONTEXT_USE_SEMAPHORES 3
+#define CONTEXT_BANNED 4
+#define CONTEXT_FORCE_SINGLE_SUBMISSION 5
+#define CONTEXT_NOPREEMPT 6
u32 *lrc_reg_state;
u64 lrc_desc;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 93ea367fe624..79ecac5ac0ab 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -19,6 +19,7 @@
#include "intel_workarounds.h"
struct drm_printer;
+struct intel_gt;
/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
* but keeps the logic simple. Indeed, the whole purpose of this macro is just
@@ -28,6 +29,13 @@ struct drm_printer;
#define CACHELINE_BYTES 64
#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32))
+#define ENGINE_TRACE(e, fmt, ...) do { \
+ const struct intel_engine_cs *e__ __maybe_unused = (e); \
+ GEM_TRACE("%s %s: " fmt, \
+ dev_name(e__->i915->drm.dev), e__->name, \
+ ##__VA_ARGS__); \
+} while (0)
+
/*
* The register defines to be used with the following macros need to accept a
* base param, e.g:
@@ -89,38 +97,6 @@ struct drm_printer;
/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
* do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
*/
-enum intel_engine_hangcheck_action {
- ENGINE_IDLE = 0,
- ENGINE_WAIT,
- ENGINE_ACTIVE_SEQNO,
- ENGINE_ACTIVE_HEAD,
- ENGINE_ACTIVE_SUBUNITS,
- ENGINE_WAIT_KICK,
- ENGINE_DEAD,
-};
-
-static inline const char *
-hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
-{
- switch (a) {
- case ENGINE_IDLE:
- return "idle";
- case ENGINE_WAIT:
- return "wait";
- case ENGINE_ACTIVE_SEQNO:
- return "active seqno";
- case ENGINE_ACTIVE_HEAD:
- return "active head";
- case ENGINE_ACTIVE_SUBUNITS:
- return "active subunits";
- case ENGINE_WAIT_KICK:
- return "wait kick";
- case ENGINE_DEAD:
- return "dead";
- }
-
- return "unknown";
-}
static inline unsigned int
execlists_num_ports(const struct intel_engine_execlists * const execlists)
@@ -131,9 +107,7 @@ execlists_num_ports(const struct intel_engine_execlists * const execlists)
static inline struct i915_request *
execlists_active(const struct intel_engine_execlists *execlists)
{
- GEM_BUG_ON(execlists->active - execlists->inflight >
- execlists_num_ports(execlists));
- return READ_ONCE(*execlists->active);
+ return *READ_ONCE(execlists->active);
}
static inline void
@@ -206,132 +180,19 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
#define I915_HWS_CSB_WRITE_INDEX 0x1f
#define CNL_HWS_CSB_WRITE_INDEX 0x2f
-struct intel_ring *
-intel_engine_create_ring(struct intel_engine_cs *engine, int size);
-int intel_ring_pin(struct intel_ring *ring);
-void intel_ring_reset(struct intel_ring *ring, u32 tail);
-unsigned int intel_ring_update_space(struct intel_ring *ring);
-void intel_ring_unpin(struct intel_ring *ring);
-void intel_ring_free(struct kref *ref);
-
-static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
-{
- kref_get(&ring->ref);
- return ring;
-}
-
-static inline void intel_ring_put(struct intel_ring *ring)
-{
- kref_put(&ring->ref, intel_ring_free);
-}
-
void intel_engine_stop(struct intel_engine_cs *engine);
void intel_engine_cleanup(struct intel_engine_cs *engine);
-int __must_check intel_ring_cacheline_align(struct i915_request *rq);
-
-u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
+int intel_engines_init_mmio(struct intel_gt *gt);
+int intel_engines_init(struct intel_gt *gt);
-static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
-{
- /* Dummy function.
- *
- * This serves as a placeholder in the code so that the reader
- * can compare against the preceding intel_ring_begin() and
- * check that the number of dwords emitted matches the space
- * reserved for the command packet (i.e. the value passed to
- * intel_ring_begin()).
- */
- GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
-}
-
-static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
-{
- return pos & (ring->size - 1);
-}
-
-static inline bool
-intel_ring_offset_valid(const struct intel_ring *ring,
- unsigned int pos)
-{
- if (pos & -ring->size) /* must be strictly within the ring */
- return false;
-
- if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
- return false;
-
- return true;
-}
-
-static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
-{
- /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
- u32 offset = addr - rq->ring->vaddr;
- GEM_BUG_ON(offset > rq->ring->size);
- return intel_ring_wrap(rq->ring, offset);
-}
-
-static inline void
-assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
-{
- GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
-
- /*
- * "Ring Buffer Use"
- * Gen2 BSpec "1. Programming Environment" / 1.4.4.6
- * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
- * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
- * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
- * same cacheline, the Head Pointer must not be greater than the Tail
- * Pointer."
- *
- * We use ring->head as the last known location of the actual RING_HEAD,
- * it may have advanced but in the worst case it is equally the same
- * as ring->head and so we should never program RING_TAIL to advance
- * into the same cacheline as ring->head.
- */
-#define cacheline(a) round_down(a, CACHELINE_BYTES)
- GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
- tail < ring->head);
-#undef cacheline
-}
-
-static inline unsigned int
-intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
-{
- /* Whilst writes to the tail are strictly order, there is no
- * serialisation between readers and the writers. The tail may be
- * read by i915_request_retire() just as it is being updated
- * by execlists, as although the breadcrumb is complete, the context
- * switch hasn't been seen.
- */
- assert_ring_tail_valid(ring, tail);
- ring->tail = tail;
- return tail;
-}
-
-static inline unsigned int
-__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
-{
- /*
- * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
- * same cacheline, the Head Pointer must not be greater than the Tail
- * Pointer."
- */
- GEM_BUG_ON(!is_power_of_2(size));
- return (head - tail - CACHELINE_BYTES) & (size - 1);
-}
-
-int intel_engines_init_mmio(struct drm_i915_private *i915);
-int intel_engines_setup(struct drm_i915_private *i915);
-int intel_engines_init(struct drm_i915_private *i915);
-void intel_engines_cleanup(struct drm_i915_private *i915);
+void intel_engines_release(struct intel_gt *gt);
+void intel_engines_free(struct intel_gt *gt);
int intel_engine_init_common(struct intel_engine_cs *engine);
void intel_engine_cleanup_common(struct intel_engine_cs *engine);
int intel_ring_submission_setup(struct intel_engine_cs *engine);
-int intel_ring_submission_init(struct intel_engine_cs *engine);
int intel_engine_stop_cs(struct intel_engine_cs *engine);
void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine);
@@ -352,13 +213,11 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
static inline void
-intel_engine_queue_breadcrumbs(struct intel_engine_cs *engine)
+intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
{
irq_work_queue(&engine->breadcrumbs.irq_work);
}
-void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine);
-
void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
@@ -416,14 +275,14 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
static inline void __intel_engine_reset(struct intel_engine_cs *engine,
bool stalled)
{
- if (engine->reset.reset)
- engine->reset.reset(engine, stalled);
+ if (engine->reset.rewind)
+ engine->reset.rewind(engine, stalled);
engine->serial++; /* contexts lost */
}
bool intel_engines_are_idle(struct intel_gt *gt);
bool intel_engine_is_idle(struct intel_engine_cs *engine);
-void intel_engine_flush_submission(struct intel_engine_cs *engine);
+bool intel_engine_flush_submission(struct intel_engine_cs *engine);
void intel_engines_reset_default_submission(struct intel_gt *gt);
@@ -434,61 +293,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
struct drm_printer *m,
const char *header, ...);
-static inline void intel_engine_context_in(struct intel_engine_cs *engine)
-{
- unsigned long flags;
-
- if (READ_ONCE(engine->stats.enabled) == 0)
- return;
-
- write_seqlock_irqsave(&engine->stats.lock, flags);
-
- if (engine->stats.enabled > 0) {
- if (engine->stats.active++ == 0)
- engine->stats.start = ktime_get();
- GEM_BUG_ON(engine->stats.active == 0);
- }
-
- write_sequnlock_irqrestore(&engine->stats.lock, flags);
-}
-
-static inline void intel_engine_context_out(struct intel_engine_cs *engine)
-{
- unsigned long flags;
-
- if (READ_ONCE(engine->stats.enabled) == 0)
- return;
-
- write_seqlock_irqsave(&engine->stats.lock, flags);
-
- if (engine->stats.enabled > 0) {
- ktime_t last;
-
- if (engine->stats.active && --engine->stats.active == 0) {
- /*
- * Decrement the active context count and in case GPU
- * is now idle add up to the running total.
- */
- last = ktime_sub(ktime_get(), engine->stats.start);
-
- engine->stats.total = ktime_add(engine->stats.total,
- last);
- } else if (engine->stats.active == 0) {
- /*
- * After turning on engine stats, context out might be
- * the first event in which case we account from the
- * time stats gathering was turned on.
- */
- last = ktime_sub(ktime_get(), engine->stats.enabled_at);
-
- engine->stats.total = ktime_add(engine->stats.total,
- last);
- }
- }
-
- write_sequnlock_irqrestore(&engine->stats.lock, flags);
-}
-
int intel_enable_engine_stats(struct intel_engine_cs *engine);
void intel_disable_engine_stats(struct intel_engine_cs *engine);
@@ -497,7 +301,7 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine);
struct i915_request *
intel_engine_find_active_request(struct intel_engine_cs *engine);
-u32 intel_engine_context_size(struct drm_i915_private *i915, u8 class);
+u32 intel_engine_context_size(struct intel_gt *gt, u8 class);
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
@@ -525,4 +329,22 @@ void intel_engine_init_active(struct intel_engine_cs *engine,
#define ENGINE_MOCK 1
#define ENGINE_VIRTUAL 2
+static inline bool
+intel_engine_has_preempt_reset(const struct intel_engine_cs *engine)
+{
+ if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
+ return false;
+
+ return intel_engine_has_preemption(engine);
+}
+
+static inline bool
+intel_engine_has_timeslices(const struct intel_engine_cs *engine)
+{
+ if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ return false;
+
+ return intel_engine_has_semaphores(engine);
+}
+
#endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 051734c9b733..ddf9543b1261 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -28,15 +28,16 @@
#include "i915_drv.h"
-#include "gt/intel_gt.h"
-
+#include "intel_context.h"
#include "intel_engine.h"
#include "intel_engine_pm.h"
#include "intel_engine_pool.h"
#include "intel_engine_user.h"
-#include "intel_context.h"
+#include "intel_gt.h"
+#include "intel_gt_requests.h"
#include "intel_lrc.h"
#include "intel_reset.h"
+#include "intel_ring.h"
/* Haswell does have the CXT_SIZE register however it does not appear to be
* valid. Now, docs explain in dwords what is in the context object. The full
@@ -140,7 +141,7 @@ static const struct engine_info intel_engines[] = {
/**
* intel_engine_context_size() - return the size of the context for an engine
- * @dev_priv: i915 device private
+ * @gt: the gt
* @class: engine class
*
* Each engine class may require a different amount of space for a context
@@ -152,17 +153,18 @@ static const struct engine_info intel_engines[] = {
* in LRC mode, but does not include the "shared data page" used with
* GuC submission. The caller should account for this if using the GuC.
*/
-u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
+u32 intel_engine_context_size(struct intel_gt *gt, u8 class)
{
+ struct intel_uncore *uncore = gt->uncore;
u32 cxt_size;
BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);
switch (class) {
case RENDER_CLASS:
- switch (INTEL_GEN(dev_priv)) {
+ switch (INTEL_GEN(gt->i915)) {
default:
- MISSING_CASE(INTEL_GEN(dev_priv));
+ MISSING_CASE(INTEL_GEN(gt->i915));
return DEFAULT_LR_CONTEXT_RENDER_SIZE;
case 12:
case 11:
@@ -174,14 +176,14 @@ u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
case 8:
return GEN8_LR_CONTEXT_RENDER_SIZE;
case 7:
- if (IS_HASWELL(dev_priv))
+ if (IS_HASWELL(gt->i915))
return HSW_CXT_TOTAL_SIZE;
- cxt_size = I915_READ(GEN7_CXT_SIZE);
+ cxt_size = intel_uncore_read(uncore, GEN7_CXT_SIZE);
return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64,
PAGE_SIZE);
case 6:
- cxt_size = I915_READ(CXT_SIZE);
+ cxt_size = intel_uncore_read(uncore, CXT_SIZE);
return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
PAGE_SIZE);
case 5:
@@ -196,9 +198,9 @@ u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
* minimum allocation anyway so it should all come
* out in the wash.
*/
- cxt_size = I915_READ(CXT_SIZE) + 1;
+ cxt_size = intel_uncore_read(uncore, CXT_SIZE) + 1;
DRM_DEBUG_DRIVER("gen%d CXT_SIZE = %d bytes [0x%08x]\n",
- INTEL_GEN(dev_priv),
+ INTEL_GEN(gt->i915),
cxt_size * 64,
cxt_size - 1);
return round_up(cxt_size * 64, PAGE_SIZE);
@@ -215,7 +217,7 @@ u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
case VIDEO_DECODE_CLASS:
case VIDEO_ENHANCEMENT_CLASS:
case COPY_ENGINE_CLASS:
- if (INTEL_GEN(dev_priv) < 8)
+ if (INTEL_GEN(gt->i915) < 8)
return 0;
return GEN8_LR_CONTEXT_OTHER_SIZE;
}
@@ -308,14 +310,16 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
engine->instance = info->instance;
__sprint_engine_name(engine);
- /*
- * To be overridden by the backend on setup. However to facilitate
- * cleanup on error during setup, we always provide the destroy vfunc.
- */
- engine->destroy = (typeof(engine->destroy))kfree;
+ engine->props.heartbeat_interval_ms =
+ CONFIG_DRM_I915_HEARTBEAT_INTERVAL;
+ engine->props.preempt_timeout_ms =
+ CONFIG_DRM_I915_PREEMPT_TIMEOUT;
+ engine->props.stop_timeout_ms =
+ CONFIG_DRM_I915_STOP_TIMEOUT;
+ engine->props.timeslice_duration_ms =
+ CONFIG_DRM_I915_TIMESLICE_DURATION;
- engine->context_size = intel_engine_context_size(gt->i915,
- engine->class);
+ engine->context_size = intel_engine_context_size(gt, engine->class);
if (WARN_ON(engine->context_size > BIT(20)))
engine->context_size = 0;
if (engine->context_size)
@@ -324,6 +328,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
/* Nothing to do here, execute in order of dependencies */
engine->schedule = NULL;
+ ewma__engine_latency_init(&engine->latency);
seqlock_init(&engine->stats.lock);
ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
@@ -334,7 +339,6 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
gt->engine_class[info->class][info->instance] = engine;
gt->engine[id] = engine;
- intel_engine_add_user(engine);
gt->i915->engine[id] = engine;
return 0;
@@ -370,38 +374,58 @@ static void __setup_engine_capabilities(struct intel_engine_cs *engine)
}
}
-static void intel_setup_engine_capabilities(struct drm_i915_private *i915)
+static void intel_setup_engine_capabilities(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
- for_each_engine(engine, i915, id)
+ for_each_engine(engine, gt, id)
__setup_engine_capabilities(engine);
}
/**
- * intel_engines_cleanup() - free the resources allocated for Command Streamers
- * @i915: the i915 devic
+ * intel_engines_release() - free the resources allocated for Command Streamers
+ * @gt: pointer to struct intel_gt
*/
-void intel_engines_cleanup(struct drm_i915_private *i915)
+void intel_engines_release(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
- for_each_engine(engine, i915, id) {
- engine->destroy(engine);
- i915->engine[id] = NULL;
+ /* Decouple the backend; but keep the layout for late GPU resets */
+ for_each_engine(engine, gt, id) {
+ if (!engine->release)
+ continue;
+
+ engine->release(engine);
+ engine->release = NULL;
+
+ memset(&engine->reset, 0, sizeof(engine->reset));
+
+ gt->i915->engine[id] = NULL;
+ }
+}
+
+void intel_engines_free(struct intel_gt *gt)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ for_each_engine(engine, gt, id) {
+ kfree(engine);
+ gt->engine[id] = NULL;
}
}
/**
* intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
- * @i915: the i915 device
+ * @gt: pointer to struct intel_gt
*
* Return: non-zero if the initialization failed.
*/
-int intel_engines_init_mmio(struct drm_i915_private *i915)
+int intel_engines_init_mmio(struct intel_gt *gt)
{
+ struct drm_i915_private *i915 = gt->i915;
struct intel_device_info *device_info = mkwrite_device_info(i915);
const unsigned int engine_mask = INTEL_INFO(i915)->engine_mask;
unsigned int mask = 0;
@@ -419,7 +443,7 @@ int intel_engines_init_mmio(struct drm_i915_private *i915)
if (!HAS_ENGINE(i915, i))
continue;
- err = intel_engine_setup(&i915->gt, i);
+ err = intel_engine_setup(gt, i);
if (err)
goto cleanup;
@@ -436,45 +460,14 @@ int intel_engines_init_mmio(struct drm_i915_private *i915)
RUNTIME_INFO(i915)->num_engines = hweight32(mask);
- intel_gt_check_and_clear_faults(&i915->gt);
+ intel_gt_check_and_clear_faults(gt);
- intel_setup_engine_capabilities(i915);
+ intel_setup_engine_capabilities(gt);
return 0;
cleanup:
- intel_engines_cleanup(i915);
- return err;
-}
-
-/**
- * intel_engines_init() - init the Engine Command Streamers
- * @i915: i915 device private
- *
- * Return: non-zero if the initialization failed.
- */
-int intel_engines_init(struct drm_i915_private *i915)
-{
- int (*init)(struct intel_engine_cs *engine);
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- int err;
-
- if (HAS_EXECLISTS(i915))
- init = intel_execlists_submission_init;
- else
- init = intel_ring_submission_init;
-
- for_each_engine(engine, i915, id) {
- err = init(engine);
- if (err)
- goto cleanup;
- }
-
- return 0;
-
-cleanup:
- intel_engines_cleanup(i915);
+ intel_engines_free(gt);
return err;
}
@@ -518,7 +511,7 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine,
unsigned int flags;
flags = PIN_GLOBAL;
- if (!HAS_LLC(engine->i915))
+ if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt))
/*
* On g33, we cannot place HWS above 256MiB, so
* restrict its pinning to the low mappable arena.
@@ -589,7 +582,7 @@ err:
return ret;
}
-static int intel_engine_setup_common(struct intel_engine_cs *engine)
+static int engine_setup_common(struct intel_engine_cs *engine)
{
int err;
@@ -602,9 +595,9 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
intel_engine_init_active(engine, ENGINE_PHYSICAL);
intel_engine_init_breadcrumbs(engine);
intel_engine_init_execlists(engine);
- intel_engine_init_hangcheck(engine);
intel_engine_init_cmd_parser(engine);
intel_engine_init__pm(engine);
+ intel_engine_init_retire(engine);
intel_engine_pool_init(&engine->pool);
@@ -619,49 +612,6 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
return 0;
}
-/**
- * intel_engines_setup- setup engine state not requiring hw access
- * @i915: Device to setup.
- *
- * Initializes engine structure members shared between legacy and execlists
- * submission modes which do not require hardware access.
- *
- * Typically done early in the submission mode specific engine setup stage.
- */
-int intel_engines_setup(struct drm_i915_private *i915)
-{
- int (*setup)(struct intel_engine_cs *engine);
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- int err;
-
- if (HAS_EXECLISTS(i915))
- setup = intel_execlists_submission_setup;
- else
- setup = intel_ring_submission_setup;
-
- for_each_engine(engine, i915, id) {
- err = intel_engine_setup_common(engine);
- if (err)
- goto cleanup;
-
- err = setup(engine);
- if (err)
- goto cleanup;
-
- /* We expect the backend to take control over its state */
- GEM_BUG_ON(engine->destroy == (typeof(engine->destroy))kfree);
-
- GEM_BUG_ON(!engine->cops);
- }
-
- return 0;
-
-cleanup:
- intel_engines_cleanup(i915);
- return err;
-}
-
struct measure_breadcrumb {
struct i915_request rq;
struct intel_timeline timeline;
@@ -745,13 +695,13 @@ create_kernel_context(struct intel_engine_cs *engine)
struct intel_context *ce;
int err;
- ce = intel_context_create(engine->i915->kernel_context, engine);
+ ce = intel_context_create(engine);
if (IS_ERR(ce))
return ce;
- ce->ring = __intel_context_ring_size(SZ_4K);
+ __set_bit(CONTEXT_BARRIER_BIT, &ce->flags);
- err = intel_context_pin(ce);
+ err = intel_context_pin(ce); /* perma-pin so it is always available */
if (err) {
intel_context_put(ce);
return ERR_PTR(err);
@@ -779,13 +729,19 @@ create_kernel_context(struct intel_engine_cs *engine)
*
* Returns zero on success or an error code on failure.
*/
-int intel_engine_init_common(struct intel_engine_cs *engine)
+static int engine_init_common(struct intel_engine_cs *engine)
{
struct intel_context *ce;
int ret;
engine->set_default_submission(engine);
+ ret = measure_breadcrumb_dw(engine);
+ if (ret < 0)
+ return ret;
+
+ engine->emit_fini_breadcrumb_dw = ret;
+
/*
* We may need to do things with the shrinker which
* require us to immediately switch back to the default
@@ -800,18 +756,38 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
engine->kernel_context = ce;
- ret = measure_breadcrumb_dw(engine);
- if (ret < 0)
- goto err_unpin;
+ return 0;
+}
- engine->emit_fini_breadcrumb_dw = ret;
+int intel_engines_init(struct intel_gt *gt)
+{
+ int (*setup)(struct intel_engine_cs *engine);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err;
- return 0;
+ if (HAS_EXECLISTS(gt->i915))
+ setup = intel_execlists_submission_setup;
+ else
+ setup = intel_ring_submission_setup;
-err_unpin:
- intel_context_unpin(ce);
- intel_context_put(ce);
- return ret;
+ for_each_engine(engine, gt, id) {
+ err = engine_setup_common(engine);
+ if (err)
+ return err;
+
+ err = setup(engine);
+ if (err)
+ return err;
+
+ err = engine_init_common(engine);
+ if (err)
+ return err;
+
+ intel_engine_add_user(engine);
+ }
+
+ return 0;
}
/**
@@ -824,9 +800,11 @@ err_unpin:
void intel_engine_cleanup_common(struct intel_engine_cs *engine)
{
GEM_BUG_ON(!list_empty(&engine->active.requests));
+ tasklet_kill(&engine->execlists.tasklet); /* flush the callback */
cleanup_status_page(engine);
+ intel_engine_fini_retire(engine);
intel_engine_pool_fini(&engine->pool);
intel_engine_fini_breadcrumbs(engine);
intel_engine_cleanup_cmd_parser(engine);
@@ -873,6 +851,21 @@ u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine)
return bbaddr;
}
+static unsigned long stop_timeout(const struct intel_engine_cs *engine)
+{
+ if (in_atomic() || irqs_disabled()) /* inside atomic preempt-reset? */
+ return 0;
+
+ /*
+ * If we are doing a normal GPU reset, we can take our time and allow
+ * the engine to quiesce. We've stopped submission to the engine, and
+ * if we wait long enough an innocent context should complete and
+ * leave the engine idle. So they should not be caught unaware by
+ * the forthcoming GPU reset (which usually follows the stop_cs)!
+ */
+ return READ_ONCE(engine->props.stop_timeout_ms);
+}
+
int intel_engine_stop_cs(struct intel_engine_cs *engine)
{
struct intel_uncore *uncore = engine->uncore;
@@ -883,16 +876,16 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine)
if (INTEL_GEN(engine->i915) < 3)
return -ENODEV;
- GEM_TRACE("%s\n", engine->name);
+ ENGINE_TRACE(engine, "\n");
intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
err = 0;
if (__intel_wait_for_register_fw(uncore,
mode, MODE_IDLE, MODE_IDLE,
- 1000, 0,
+ 1000, stop_timeout(engine),
NULL)) {
- GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name);
+ ENGINE_TRACE(engine, "timed out on STOP_RING -> IDLE\n");
err = -ETIMEDOUT;
}
@@ -904,7 +897,7 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine)
void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine)
{
- GEM_TRACE("%s\n", engine->name);
+ ENGINE_TRACE(engine, "\n");
ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
}
@@ -1054,9 +1047,10 @@ static bool ring_is_idle(struct intel_engine_cs *engine)
return idle;
}
-void intel_engine_flush_submission(struct intel_engine_cs *engine)
+bool intel_engine_flush_submission(struct intel_engine_cs *engine)
{
struct tasklet_struct *t = &engine->execlists.tasklet;
+ bool active = tasklet_is_locked(t);
if (__tasklet_is_scheduled(t)) {
local_bh_disable();
@@ -1067,10 +1061,13 @@ void intel_engine_flush_submission(struct intel_engine_cs *engine)
tasklet_unlock(t);
}
local_bh_enable();
+ active = true;
}
/* Otherwise flush the tasklet if it was running on another cpu */
tasklet_unlock_wait(t);
+
+ return active;
}
/**
@@ -1318,10 +1315,11 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
unsigned int idx;
u8 read, write;
- drm_printf(m, "\tExeclist tasklet queued? %s (%s), timeslice? %s\n",
+ drm_printf(m, "\tExeclist tasklet queued? %s (%s), preempt? %s, timeslice? %s\n",
yesno(test_bit(TASKLET_STATE_SCHED,
&engine->execlists.tasklet.state)),
enableddisabled(!atomic_read(&engine->execlists.tasklet.count)),
+ repr_timer(&engine->execlists.preempt),
repr_timer(&engine->execlists.timer));
read = execlists->csb_head;
@@ -1345,6 +1343,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
}
execlists_active_lock_bh(execlists);
+ rcu_read_lock();
for (port = execlists->active; (rq = *port); port++) {
char hdr[80];
int len;
@@ -1382,6 +1381,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
if (tl)
intel_timeline_put(tl);
}
+ rcu_read_unlock();
execlists_active_unlock_bh(execlists);
} else if (INTEL_GEN(dev_priv) > 6) {
drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
@@ -1447,8 +1447,17 @@ void intel_engine_dump(struct intel_engine_cs *engine,
drm_printf(m, "*** WEDGED ***\n");
drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count));
- drm_printf(m, "\tHangcheck: %d ms ago\n",
- jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp));
+ drm_printf(m, "\tBarriers?: %s\n",
+ yesno(!llist_empty(&engine->barrier_tasks)));
+ drm_printf(m, "\tLatency: %luus\n",
+ ewma__engine_latency_read(&engine->latency));
+
+ rcu_read_lock();
+ rq = READ_ONCE(engine->heartbeat.systole);
+ if (rq)
+ drm_printf(m, "\tHeartbeat: %d ms ago\n",
+ jiffies_to_msecs(jiffies - rq->emitted_jiffies));
+ rcu_read_unlock();
drm_printf(m, "\tReset count: %d (global %d)\n",
i915_reset_engine_count(error, engine),
i915_reset_count(error));
@@ -1481,9 +1490,9 @@ void intel_engine_dump(struct intel_engine_cs *engine,
print_request_ring(m, rq);
- if (rq->hw_context->lrc_reg_state) {
+ if (rq->context->lrc_reg_state) {
drm_printf(m, "Logical Ring Context:\n");
- hexdump(m, rq->hw_context->lrc_reg_state, PAGE_SIZE);
+ hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE);
}
}
spin_unlock_irqrestore(&engine->active.lock, flags);
@@ -1544,7 +1553,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
for (port = execlists->pending; (rq = *port); port++) {
/* Exclude any contexts already counted in active */
- if (!intel_context_inflight_count(rq->hw_context))
+ if (!intel_context_inflight_count(rq->context))
engine->stats.active++;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
new file mode 100644
index 000000000000..742628e40201
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -0,0 +1,242 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_request.h"
+
+#include "intel_context.h"
+#include "intel_engine_heartbeat.h"
+#include "intel_engine_pm.h"
+#include "intel_engine.h"
+#include "intel_gt.h"
+#include "intel_reset.h"
+
+/*
+ * While the engine is active, we send a periodic pulse along the engine
+ * to check on its health and to flush any idle-barriers. If that request
+ * is stuck, and we fail to preempt it, we declare the engine hung and
+ * issue a reset -- in the hope that restores progress.
+ */
+
+static bool next_heartbeat(struct intel_engine_cs *engine)
+{
+ long delay;
+
+ delay = READ_ONCE(engine->props.heartbeat_interval_ms);
+ if (!delay)
+ return false;
+
+ delay = msecs_to_jiffies_timeout(delay);
+ if (delay >= HZ)
+ delay = round_jiffies_up_relative(delay);
+ schedule_delayed_work(&engine->heartbeat.work, delay);
+
+ return true;
+}
+
+static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
+{
+ engine->wakeref_serial = READ_ONCE(engine->serial) + 1;
+ i915_request_add_active_barriers(rq);
+}
+
+static void show_heartbeat(const struct i915_request *rq,
+ struct intel_engine_cs *engine)
+{
+ struct drm_printer p = drm_debug_printer("heartbeat");
+
+ intel_engine_dump(engine, &p,
+ "%s heartbeat {prio:%d} not ticking\n",
+ engine->name,
+ rq->sched.attr.priority);
+}
+
+static void heartbeat(struct work_struct *wrk)
+{
+ struct i915_sched_attr attr = {
+ .priority = I915_USER_PRIORITY(I915_PRIORITY_MIN),
+ };
+ struct intel_engine_cs *engine =
+ container_of(wrk, typeof(*engine), heartbeat.work.work);
+ struct intel_context *ce = engine->kernel_context;
+ struct i915_request *rq;
+
+ rq = engine->heartbeat.systole;
+ if (rq && i915_request_completed(rq)) {
+ i915_request_put(rq);
+ engine->heartbeat.systole = NULL;
+ }
+
+ if (!intel_engine_pm_get_if_awake(engine))
+ return;
+
+ if (intel_gt_is_wedged(engine->gt))
+ goto out;
+
+ if (engine->heartbeat.systole) {
+ if (engine->schedule &&
+ rq->sched.attr.priority < I915_PRIORITY_BARRIER) {
+ /*
+ * Gradually raise the priority of the heartbeat to
+ * give high priority work [which presumably desires
+ * low latency and no jitter] the chance to naturally
+ * complete before being preempted.
+ */
+ attr.priority = I915_PRIORITY_MASK;
+ if (rq->sched.attr.priority >= attr.priority)
+ attr.priority |= I915_USER_PRIORITY(I915_PRIORITY_HEARTBEAT);
+ if (rq->sched.attr.priority >= attr.priority)
+ attr.priority = I915_PRIORITY_BARRIER;
+
+ local_bh_disable();
+ engine->schedule(rq, &attr);
+ local_bh_enable();
+ } else {
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ show_heartbeat(rq, engine);
+
+ intel_gt_handle_error(engine->gt, engine->mask,
+ I915_ERROR_CAPTURE,
+ "stopped heartbeat on %s",
+ engine->name);
+ }
+ goto out;
+ }
+
+ if (engine->wakeref_serial == engine->serial)
+ goto out;
+
+ mutex_lock(&ce->timeline->mutex);
+
+ intel_context_enter(ce);
+ rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
+ intel_context_exit(ce);
+ if (IS_ERR(rq))
+ goto unlock;
+
+ idle_pulse(engine, rq);
+ if (i915_modparams.enable_hangcheck)
+ engine->heartbeat.systole = i915_request_get(rq);
+
+ __i915_request_commit(rq);
+ __i915_request_queue(rq, &attr);
+
+unlock:
+ mutex_unlock(&ce->timeline->mutex);
+out:
+ if (!next_heartbeat(engine))
+ i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
+ intel_engine_pm_put(engine);
+}
+
+void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine)
+{
+ if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL))
+ return;
+
+ next_heartbeat(engine);
+}
+
+void intel_engine_park_heartbeat(struct intel_engine_cs *engine)
+{
+ if (cancel_delayed_work(&engine->heartbeat.work))
+ i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
+}
+
+void intel_engine_init_heartbeat(struct intel_engine_cs *engine)
+{
+ INIT_DELAYED_WORK(&engine->heartbeat.work, heartbeat);
+}
+
+int intel_engine_set_heartbeat(struct intel_engine_cs *engine,
+ unsigned long delay)
+{
+ int err;
+
+ /* Send one last pulse before to cleanup persistent hogs */
+ if (!delay && IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) {
+ err = intel_engine_pulse(engine);
+ if (err)
+ return err;
+ }
+
+ WRITE_ONCE(engine->props.heartbeat_interval_ms, delay);
+
+ if (intel_engine_pm_get_if_awake(engine)) {
+ if (delay)
+ intel_engine_unpark_heartbeat(engine);
+ else
+ intel_engine_park_heartbeat(engine);
+ intel_engine_pm_put(engine);
+ }
+
+ return 0;
+}
+
+int intel_engine_pulse(struct intel_engine_cs *engine)
+{
+ struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER };
+ struct intel_context *ce = engine->kernel_context;
+ struct i915_request *rq;
+ int err = 0;
+
+ if (!intel_engine_has_preemption(engine))
+ return -ENODEV;
+
+ if (!intel_engine_pm_get_if_awake(engine))
+ return 0;
+
+ if (mutex_lock_interruptible(&ce->timeline->mutex))
+ goto out_rpm;
+
+ intel_context_enter(ce);
+ rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
+ intel_context_exit(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_unlock;
+ }
+
+ rq->flags |= I915_REQUEST_SENTINEL;
+ idle_pulse(engine, rq);
+
+ __i915_request_commit(rq);
+ __i915_request_queue(rq, &attr);
+
+out_unlock:
+ mutex_unlock(&ce->timeline->mutex);
+out_rpm:
+ intel_engine_pm_put(engine);
+ return err;
+}
+
+int intel_engine_flush_barriers(struct intel_engine_cs *engine)
+{
+ struct i915_request *rq;
+ int err = 0;
+
+ if (llist_empty(&engine->barrier_tasks))
+ return 0;
+
+ if (!intel_engine_pm_get_if_awake(engine))
+ return 0;
+
+ rq = i915_request_create(engine->kernel_context);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_rpm;
+ }
+
+ idle_pulse(engine, rq);
+ i915_request_add(rq);
+
+out_rpm:
+ intel_engine_pm_put(engine);
+ return err;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_engine_heartbeat.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
new file mode 100644
index 000000000000..a7b8c0f9e005
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
@@ -0,0 +1,23 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_ENGINE_HEARTBEAT_H
+#define INTEL_ENGINE_HEARTBEAT_H
+
+struct intel_engine_cs;
+
+void intel_engine_init_heartbeat(struct intel_engine_cs *engine);
+
+int intel_engine_set_heartbeat(struct intel_engine_cs *engine,
+ unsigned long delay);
+
+void intel_engine_park_heartbeat(struct intel_engine_cs *engine);
+void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine);
+
+int intel_engine_pulse(struct intel_engine_cs *engine);
+int intel_engine_flush_barriers(struct intel_engine_cs *engine);
+
+#endif /* INTEL_ENGINE_HEARTBEAT_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 67eb6183648a..010620b78202 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -6,12 +6,15 @@
#include "i915_drv.h"
+#include "intel_context.h"
#include "intel_engine.h"
+#include "intel_engine_heartbeat.h"
#include "intel_engine_pm.h"
#include "intel_engine_pool.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
#include "intel_rc6.h"
+#include "intel_ring.h"
static int __engine_unpark(struct intel_wakeref *wf)
{
@@ -19,7 +22,7 @@ static int __engine_unpark(struct intel_wakeref *wf)
container_of(wf, typeof(*engine), wakeref);
void *map;
- GEM_TRACE("%s\n", engine->name);
+ ENGINE_TRACE(engine, "\n");
intel_gt_pm_get(engine->gt);
@@ -34,7 +37,7 @@ static int __engine_unpark(struct intel_wakeref *wf)
if (engine->unpark)
engine->unpark(engine);
- intel_engine_init_hangcheck(engine);
+ intel_engine_unpark_heartbeat(engine);
return 0;
}
@@ -53,7 +56,7 @@ static inline unsigned long __timeline_mark_lock(struct intel_context *ce)
static inline void __timeline_mark_unlock(struct intel_context *ce,
unsigned long flags)
{
- mutex_release(&ce->timeline->mutex.dep_map, 0, _THIS_IP_);
+ mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_);
local_irq_restore(flags);
}
@@ -71,12 +74,57 @@ static inline void __timeline_mark_unlock(struct intel_context *ce,
#endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
+static void duration(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+ struct i915_request *rq = to_request(fence);
+
+ ewma__engine_latency_add(&rq->engine->latency,
+ ktime_us_delta(rq->fence.timestamp,
+ rq->duration.emitted));
+}
+
+static void
+__queue_and_release_pm(struct i915_request *rq,
+ struct intel_timeline *tl,
+ struct intel_engine_cs *engine)
+{
+ struct intel_gt_timelines *timelines = &engine->gt->timelines;
+
+ ENGINE_TRACE(engine, "\n");
+
+ /*
+ * We have to serialise all potential retirement paths with our
+ * submission, as we don't want to underflow either the
+ * engine->wakeref.counter or our timeline->active_count.
+ *
+ * Equally, we cannot allow a new submission to start until
+ * after we finish queueing, nor could we allow that submitter
+ * to retire us before we are ready!
+ */
+ spin_lock(&timelines->lock);
+
+ /* Let intel_gt_retire_requests() retire us (acquired under lock) */
+ if (!atomic_fetch_inc(&tl->active_count))
+ list_add_tail(&tl->link, &timelines->active_list);
+
+ /* Hand the request over to HW and so engine_retire() */
+ __i915_request_queue(rq, NULL);
+
+ /* Let new submissions commence (and maybe retire this timeline) */
+ __intel_wakeref_defer_park(&engine->wakeref);
+
+ spin_unlock(&timelines->lock);
+}
+
static bool switch_to_kernel_context(struct intel_engine_cs *engine)
{
+ struct intel_context *ce = engine->kernel_context;
struct i915_request *rq;
unsigned long flags;
bool result = true;
+ GEM_BUG_ON(!intel_context_is_barrier(ce));
+
/* Already inside the kernel context, safe to power down. */
if (engine->wakeref_serial == engine->serial)
return true;
@@ -96,31 +144,56 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
* This should hold true as we can only park the engine after
* retiring the last request, thus all rings should be empty and
* all timelines idle.
+ *
+ * For unlocking, there are 2 other parties and the GPU who have a
+ * stake here.
+ *
+ * A new gpu user will be waiting on the engine-pm to start their
+ * engine_unpark. New waiters are predicated on engine->wakeref.count
+ * and so intel_wakeref_defer_park() acts like a mutex_unlock of the
+ * engine->wakeref.
+ *
+ * The other party is intel_gt_retire_requests(), which is walking the
+ * list of active timelines looking for completions. Meanwhile as soon
+ * as we call __i915_request_queue(), the GPU may complete our request.
+ * Ergo, if we put ourselves on the timelines.active_list
+ * (se intel_timeline_enter()) before we increment the
+ * engine->wakeref.count, we may see the request completion and retire
+ * it causing an undeflow of the engine->wakeref.
*/
- flags = __timeline_mark_lock(engine->kernel_context);
+ flags = __timeline_mark_lock(ce);
+ GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0);
- rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT);
+ rq = __i915_request_create(ce, GFP_NOWAIT);
if (IS_ERR(rq))
/* Context switch failed, hope for the best! Maybe reset? */
goto out_unlock;
- intel_timeline_enter(i915_request_timeline(rq));
-
/* Check again on the next retirement. */
engine->wakeref_serial = engine->serial + 1;
i915_request_add_active_barriers(rq);
/* Install ourselves as a preemption barrier */
- rq->sched.attr.priority = I915_PRIORITY_UNPREEMPTABLE;
- __i915_request_commit(rq);
+ rq->sched.attr.priority = I915_PRIORITY_BARRIER;
+ if (likely(!__i915_request_commit(rq))) { /* engine should be idle! */
+ /*
+ * Use an interrupt for precise measurement of duration,
+ * otherwise we rely on someone else retiring all the requests
+ * which may delay the signaling (i.e. we will likely wait
+ * until the background request retirement running every
+ * second or two).
+ */
+ BUILD_BUG_ON(sizeof(rq->duration) > sizeof(rq->submitq));
+ dma_fence_add_callback(&rq->fence, &rq->duration.cb, duration);
+ rq->duration.emitted = ktime_get();
+ }
- /* Release our exclusive hold on the engine */
- __intel_wakeref_defer_park(&engine->wakeref);
- __i915_request_queue(rq, NULL);
+ /* Expose ourselves to the world */
+ __queue_and_release_pm(rq, ce->timeline, engine);
result = false;
out_unlock:
- __timeline_mark_unlock(engine->kernel_context, flags);
+ __timeline_mark_unlock(ce, flags);
return result;
}
@@ -133,7 +206,7 @@ static void call_idle_barriers(struct intel_engine_cs *engine)
container_of((struct list_head *)node,
typeof(*cb), node);
- cb->func(NULL, cb);
+ cb->func(ERR_PTR(-EAGAIN), cb);
}
}
@@ -154,10 +227,11 @@ static int __engine_park(struct intel_wakeref *wf)
if (!switch_to_kernel_context(engine))
return -EBUSY;
- GEM_TRACE("%s\n", engine->name);
+ ENGINE_TRACE(engine, "\n");
call_idle_barriers(engine); /* cleanup after wedging */
+ intel_engine_park_heartbeat(engine);
intel_engine_disarm_breadcrumbs(engine);
intel_engine_pool_park(&engine->pool);
@@ -174,7 +248,8 @@ static int __engine_park(struct intel_wakeref *wf)
engine->execlists.no_priolist = false;
- intel_gt_pm_put(engine->gt);
+ /* While gt calls i915_vma_parked(), we have to break the lock cycle */
+ intel_gt_pm_put_async(engine->gt);
return 0;
}
@@ -188,6 +263,7 @@ void intel_engine_init__pm(struct intel_engine_cs *engine)
struct intel_runtime_pm *rpm = engine->uncore->rpm;
intel_wakeref_init(&engine->wakeref, rpm, &wf_ops);
+ intel_engine_init_heartbeat(engine);
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
index 739c50fefcef..e52c2b0cb245 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
@@ -7,6 +7,7 @@
#ifndef INTEL_ENGINE_PM_H
#define INTEL_ENGINE_PM_H
+#include "i915_request.h"
#include "intel_engine_types.h"
#include "intel_wakeref.h"
@@ -31,6 +32,36 @@ static inline void intel_engine_pm_put(struct intel_engine_cs *engine)
intel_wakeref_put(&engine->wakeref);
}
+static inline void intel_engine_pm_put_async(struct intel_engine_cs *engine)
+{
+ intel_wakeref_put_async(&engine->wakeref);
+}
+
+static inline void intel_engine_pm_flush(struct intel_engine_cs *engine)
+{
+ intel_wakeref_unlock_wait(&engine->wakeref);
+}
+
+static inline struct i915_request *
+intel_engine_create_kernel_request(struct intel_engine_cs *engine)
+{
+ struct i915_request *rq;
+
+ /*
+ * The engine->kernel_context is special as it is used inside
+ * the engine-pm barrier (see __engine_park()), circumventing
+ * the usual mutexes and relying on the engine-pm barrier
+ * instead. So whenever we use the engine->kernel_context
+ * outside of the barrier, we must manually handle the
+ * engine wakeref to serialise with the use inside.
+ */
+ intel_engine_pm_get(engine);
+ rq = i915_request_create(engine->kernel_context);
+ intel_engine_pm_put(engine);
+
+ return rq;
+}
+
void intel_engine_init__pm(struct intel_engine_cs *engine);
#endif /* INTEL_ENGINE_PM_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
index 3cdbd5f8b5be..397186818305 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
@@ -104,6 +104,8 @@ node_create(struct intel_engine_pool *pool, size_t sz)
return ERR_CAST(obj);
}
+ i915_gem_object_set_readonly(obj);
+
node->obj = obj;
return node;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 3451be034caf..00287515e7af 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -7,6 +7,7 @@
#ifndef __INTEL_ENGINE_TYPES__
#define __INTEL_ENGINE_TYPES__
+#include <linux/average.h>
#include <linux/hashtable.h>
#include <linux/irq_work.h>
#include <linux/kref.h>
@@ -15,6 +16,7 @@
#include <linux/rbtree.h>
#include <linux/timer.h>
#include <linux/types.h>
+#include <linux/workqueue.h>
#include "i915_gem.h"
#include "i915_pmu.h"
@@ -58,6 +60,7 @@ struct i915_gem_context;
struct i915_request;
struct i915_sched_attr;
struct intel_gt;
+struct intel_ring;
struct intel_uncore;
typedef u8 intel_engine_mask_t;
@@ -76,40 +79,6 @@ struct intel_instdone {
u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
};
-struct intel_engine_hangcheck {
- u64 acthd;
- u32 last_ring;
- u32 last_head;
- unsigned long action_timestamp;
- struct intel_instdone instdone;
-};
-
-struct intel_ring {
- struct kref ref;
- struct i915_vma *vma;
- void *vaddr;
-
- /*
- * As we have two types of rings, one global to the engine used
- * by ringbuffer submission and those that are exclusive to a
- * context used by execlists, we have to play safe and allow
- * atomic updates to the pin_count. However, the actual pinning
- * of the context is either done during initialisation for
- * ringbuffer submission or serialised as part of the context
- * pinning for execlists, and so we do not need a mutex ourselves
- * to serialise intel_ring_pin/intel_ring_unpin.
- */
- atomic_t pin_count;
-
- u32 head;
- u32 tail;
- u32 emit;
-
- u32 space;
- u32 size;
- u32 effective_size;
-};
-
/*
* we use a single page to load ctx workarounds so all of these
* values are referred in terms of dwords
@@ -151,6 +120,9 @@ enum intel_engine_id {
#define INVALID_ENGINE ((enum intel_engine_id)-1)
};
+/* A simple estimator for the round-trip latency of an engine */
+DECLARE_EWMA(_engine_latency, 6, 4)
+
struct st_preempt_hang {
struct completion completion;
unsigned int count;
@@ -175,6 +147,11 @@ struct intel_engine_execlists {
struct timer_list timer;
/**
+ * @preempt: reset the current context if it fails to give way
+ */
+ struct timer_list preempt;
+
+ /**
* @default_priolist: priority list for I915_PRIORITY_NORMAL
*/
struct i915_priolist default_priolist;
@@ -326,6 +303,11 @@ struct intel_engine_cs {
intel_engine_mask_t saturated; /* submitting semaphores too late? */
+ struct {
+ struct delayed_work work;
+ struct i915_request *systole;
+ } heartbeat;
+
unsigned long serial;
unsigned long wakeref_serial;
@@ -338,6 +320,13 @@ struct intel_engine_cs {
struct intel_timeline *timeline;
} legacy;
+ /*
+ * We track the average duration of the idle pulse on parking the
+ * engine to keep an estimate of the how the fast the engine is
+ * under ideal conditions.
+ */
+ struct ewma__engine_latency latency;
+
/* Rather than have every client wait upon all user interrupts,
* with the herd waking after every interrupt and each doing the
* heavyweight seqno dance, we delegate the task (of being the
@@ -411,7 +400,10 @@ struct intel_engine_cs {
struct {
void (*prepare)(struct intel_engine_cs *engine);
- void (*reset)(struct intel_engine_cs *engine, bool stalled);
+
+ void (*rewind)(struct intel_engine_cs *engine, bool stalled);
+ void (*cancel)(struct intel_engine_cs *engine);
+
void (*finish)(struct intel_engine_cs *engine);
} reset;
@@ -461,30 +453,29 @@ struct intel_engine_cs {
void (*schedule)(struct i915_request *request,
const struct i915_sched_attr *attr);
- /*
- * Cancel all requests on the hardware, or queued for execution.
- * This should only cancel the ready requests that have been
- * submitted to the engine (via the engine->submit_request callback).
- * This is called when marking the device as wedged.
- */
- void (*cancel_requests)(struct intel_engine_cs *engine);
-
- void (*destroy)(struct intel_engine_cs *engine);
+ void (*release)(struct intel_engine_cs *engine);
struct intel_engine_execlists execlists;
+ /*
+ * Keep track of completed timelines on this engine for early
+ * retirement with the goal of quickly enabling powersaving as
+ * soon as the engine is idle.
+ */
+ struct intel_timeline *retire;
+ struct work_struct retire_work;
+
/* status_notifier: list of callbacks for context-switch changes */
struct atomic_notifier_head context_status_notifier;
- struct intel_engine_hangcheck hangcheck;
-
-#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
+#define I915_ENGINE_USING_CMD_PARSER BIT(0)
#define I915_ENGINE_SUPPORTS_STATS BIT(1)
#define I915_ENGINE_HAS_PREEMPTION BIT(2)
#define I915_ENGINE_HAS_SEMAPHORES BIT(3)
#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
#define I915_ENGINE_IS_VIRTUAL BIT(5)
#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
+#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
unsigned int flags;
/*
@@ -542,12 +533,25 @@ struct intel_engine_cs {
*/
ktime_t total;
} stats;
+
+ struct {
+ unsigned long heartbeat_interval_ms;
+ unsigned long preempt_timeout_ms;
+ unsigned long stop_timeout_ms;
+ unsigned long timeslice_duration_ms;
+ } props;
};
static inline bool
-intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine)
+intel_engine_using_cmd_parser(const struct intel_engine_cs *engine)
+{
+ return engine->flags & I915_ENGINE_USING_CMD_PARSER;
+}
+
+static inline bool
+intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine)
{
- return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER;
+ return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER;
}
static inline bool
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 4294f146f13c..51b8718513bc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -7,6 +7,8 @@
#ifndef _INTEL_GPU_COMMANDS_H_
#define _INTEL_GPU_COMMANDS_H_
+#include <linux/bitops.h>
+
/*
* Target address alignments required for GPU access e.g.
* MI_STORE_DWORD_IMM.
@@ -319,4 +321,31 @@
#define COLOR_BLT ((0x2<<29)|(0x40<<22))
#define SRC_COPY_BLT ((0x2<<29)|(0x43<<22))
+/*
+ * Used to convert any address to canonical form.
+ * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
+ * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
+ * addresses to be in a canonical form:
+ * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
+ * canonical form [63:48] == [47]."
+ */
+#define GEN8_HIGH_ADDRESS_BIT 47
+static inline u64 gen8_canonical_addr(u64 address)
+{
+ return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
+}
+
+static inline u64 gen8_noncanonical_addr(u64 address)
+{
+ return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0);
+}
+
+static inline u32 *__gen6_emit_bb_start(u32 *cs, u32 addr, unsigned int flags)
+{
+ *cs++ = MI_BATCH_BUFFER_START | flags;
+ *cs++ = addr;
+
+ return cs;
+}
+
#endif /* _INTEL_GPU_COMMANDS_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 1c4b6c9642ad..ec84b5e62fef 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -3,12 +3,16 @@
* Copyright © 2019 Intel Corporation
*/
+#include "debugfs_gt.h"
#include "i915_drv.h"
+#include "intel_context.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
#include "intel_gt_requests.h"
#include "intel_mocs.h"
#include "intel_rc6.h"
+#include "intel_renderstate.h"
+#include "intel_rps.h"
#include "intel_uncore.h"
#include "intel_pm.h"
@@ -22,19 +26,20 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
INIT_LIST_HEAD(&gt->closed_vma);
spin_lock_init(&gt->closed_lock);
- intel_gt_init_hangcheck(gt);
intel_gt_init_reset(gt);
intel_gt_init_requests(gt);
+ intel_gt_init_timelines(gt);
intel_gt_pm_init_early(gt);
+
+ intel_rps_init_early(&gt->rps);
intel_uc_init_early(&gt->uc);
}
-void intel_gt_init_hw_early(struct drm_i915_private *i915)
+void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt)
{
- i915->gt.ggtt = &i915->ggtt;
+ gt->ggtt = ggtt;
- /* BIOS often leaves RC6 enabled, but disable it for hw init */
- intel_gt_pm_disable(&i915->gt);
+ intel_gt_sanitize(gt, false);
}
static void init_unused_ring(struct intel_gt *gt, u32 base)
@@ -72,7 +77,6 @@ int intel_gt_init_hw(struct intel_gt *gt)
struct intel_uncore *uncore = gt->uncore;
int ret;
- BUG_ON(!i915->kernel_context);
ret = intel_gt_terminally_wedged(gt);
if (ret)
return ret;
@@ -302,7 +306,7 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt)
intel_gt_chipset_flush(gt);
- with_intel_runtime_pm(uncore->rpm, wakeref) {
+ with_intel_runtime_pm_if_in_use(uncore->rpm, wakeref) {
unsigned long flags;
spin_lock_irqsave(&uncore->lock, flags);
@@ -321,8 +325,9 @@ void intel_gt_chipset_flush(struct intel_gt *gt)
void intel_gt_driver_register(struct intel_gt *gt)
{
- if (IS_GEN(gt->i915, 5))
- intel_gpu_ips_init(gt->i915);
+ intel_rps_driver_register(&gt->rps);
+
+ debugfs_gt_register(gt);
}
static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
@@ -364,41 +369,297 @@ static void intel_gt_fini_scratch(struct intel_gt *gt)
i915_vma_unpin_and_release(&gt->scratch, 0);
}
+static struct i915_address_space *kernel_vm(struct intel_gt *gt)
+{
+ if (INTEL_PPGTT(gt->i915) > INTEL_PPGTT_ALIASING)
+ return &i915_ppgtt_create(gt->i915)->vm;
+ else
+ return i915_vm_get(&gt->ggtt->vm);
+}
+
+static int __intel_context_flush_retire(struct intel_context *ce)
+{
+ struct intel_timeline *tl;
+
+ tl = intel_context_timeline_lock(ce);
+ if (IS_ERR(tl))
+ return PTR_ERR(tl);
+
+ intel_context_timeline_unlock(tl);
+ return 0;
+}
+
+static int __engines_record_defaults(struct intel_gt *gt)
+{
+ struct i915_request *requests[I915_NUM_ENGINES] = {};
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * As we reset the gpu during very early sanitisation, the current
+ * register state on the GPU should reflect its defaults values.
+ * We load a context onto the hw (with restore-inhibit), then switch
+ * over to a second context to save that default register state. We
+ * can then prime every new context with that state so they all start
+ * from the same default HW values.
+ */
+
+ for_each_engine(engine, gt, id) {
+ struct intel_renderstate so;
+ struct intel_context *ce;
+ struct i915_request *rq;
+
+ err = intel_renderstate_init(&so, engine);
+ if (err)
+ goto out;
+
+ /* We must be able to switch to something! */
+ GEM_BUG_ON(!engine->kernel_context);
+ engine->serial++; /* force the kernel context switch */
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ goto out;
+ }
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ intel_context_put(ce);
+ goto out;
+ }
+
+ err = intel_engine_emit_ctx_wa(rq);
+ if (err)
+ goto err_rq;
+
+ err = intel_renderstate_emit(&so, rq);
+ if (err)
+ goto err_rq;
+
+err_rq:
+ requests[id] = i915_request_get(rq);
+ i915_request_add(rq);
+ intel_renderstate_fini(&so);
+ if (err)
+ goto out;
+ }
+
+ /* Flush the default context image to memory, and enable powersaving. */
+ if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+ err = -EIO;
+ goto out;
+ }
+
+ for (id = 0; id < ARRAY_SIZE(requests); id++) {
+ struct i915_request *rq;
+ struct i915_vma *state;
+ void *vaddr;
+
+ rq = requests[id];
+ if (!rq)
+ continue;
+
+ GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, &rq->context->flags));
+ state = rq->context->state;
+ if (!state)
+ continue;
+
+ /* Serialise with retirement on another CPU */
+ GEM_BUG_ON(!i915_request_completed(rq));
+ err = __intel_context_flush_retire(rq->context);
+ if (err)
+ goto out;
+
+ /* We want to be able to unbind the state from the GGTT */
+ GEM_BUG_ON(intel_context_is_pinned(rq->context));
+
+ /*
+ * As we will hold a reference to the logical state, it will
+ * not be torn down with the context, and importantly the
+ * object will hold onto its vma (making it possible for a
+ * stray GTT write to corrupt our defaults). Unmap the vma
+ * from the GTT to prevent such accidents and reclaim the
+ * space.
+ */
+ err = i915_vma_unbind(state);
+ if (err)
+ goto out;
+
+ i915_gem_object_lock(state->obj);
+ err = i915_gem_object_set_to_cpu_domain(state->obj, false);
+ i915_gem_object_unlock(state->obj);
+ if (err)
+ goto out;
+
+ i915_gem_object_set_cache_coherency(state->obj, I915_CACHE_LLC);
+
+ /* Check we can acquire the image of the context state */
+ vaddr = i915_gem_object_pin_map(state->obj, I915_MAP_FORCE_WB);
+ if (IS_ERR(vaddr)) {
+ err = PTR_ERR(vaddr);
+ goto out;
+ }
+
+ rq->engine->default_state = i915_gem_object_get(state->obj);
+ i915_gem_object_unpin_map(state->obj);
+ }
+
+out:
+ /*
+ * If we have to abandon now, we expect the engines to be idle
+ * and ready to be torn-down. The quickest way we can accomplish
+ * this is by declaring ourselves wedged.
+ */
+ if (err)
+ intel_gt_set_wedged(gt);
+
+ for (id = 0; id < ARRAY_SIZE(requests); id++) {
+ struct intel_context *ce;
+ struct i915_request *rq;
+
+ rq = requests[id];
+ if (!rq)
+ continue;
+
+ ce = rq->context;
+ i915_request_put(rq);
+ intel_context_put(ce);
+ }
+ return err;
+}
+
+static int __engines_verify_workarounds(struct intel_gt *gt)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ return 0;
+
+ for_each_engine(engine, gt, id) {
+ if (intel_engine_verify_workarounds(engine, "load"))
+ err = -EIO;
+ }
+
+ return err;
+}
+
+static void __intel_gt_disable(struct intel_gt *gt)
+{
+ intel_gt_set_wedged_on_init(gt);
+
+ intel_gt_suspend_prepare(gt);
+ intel_gt_suspend_late(gt);
+
+ GEM_BUG_ON(intel_gt_pm_is_awake(gt));
+}
+
int intel_gt_init(struct intel_gt *gt)
{
int err;
- err = intel_gt_init_scratch(gt, IS_GEN(gt->i915, 2) ? SZ_256K : SZ_4K);
+ err = i915_inject_probe_error(gt->i915, -ENODEV);
if (err)
return err;
+ /*
+ * This is just a security blanket to placate dragons.
+ * On some systems, we very sporadically observe that the first TLBs
+ * used by the CS may be stale, despite us poking the TLB reset. If
+ * we hold the forcewake during initialisation these problems
+ * just magically go away.
+ */
+ intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
+
+ err = intel_gt_init_scratch(gt, IS_GEN(gt->i915, 2) ? SZ_256K : SZ_4K);
+ if (err)
+ goto out_fw;
+
intel_gt_pm_init(gt);
- return 0;
+ gt->vm = kernel_vm(gt);
+ if (!gt->vm) {
+ err = -ENOMEM;
+ goto err_pm;
+ }
+
+ err = intel_engines_init(gt);
+ if (err)
+ goto err_engines;
+
+ intel_uc_init(&gt->uc);
+
+ err = intel_gt_resume(gt);
+ if (err)
+ goto err_uc_init;
+
+ err = __engines_record_defaults(gt);
+ if (err)
+ goto err_gt;
+
+ err = __engines_verify_workarounds(gt);
+ if (err)
+ goto err_gt;
+
+ err = i915_inject_probe_error(gt->i915, -EIO);
+ if (err)
+ goto err_gt;
+
+ goto out_fw;
+err_gt:
+ __intel_gt_disable(gt);
+ intel_uc_fini_hw(&gt->uc);
+err_uc_init:
+ intel_uc_fini(&gt->uc);
+err_engines:
+ intel_engines_release(gt);
+ i915_vm_put(fetch_and_zero(&gt->vm));
+err_pm:
+ intel_gt_pm_fini(gt);
+ intel_gt_fini_scratch(gt);
+out_fw:
+ if (err)
+ intel_gt_set_wedged_on_init(gt);
+ intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
+ return err;
}
void intel_gt_driver_remove(struct intel_gt *gt)
{
- GEM_BUG_ON(gt->awake);
- intel_gt_pm_disable(gt);
+ __intel_gt_disable(gt);
+
+ intel_uc_fini_hw(&gt->uc);
+ intel_uc_fini(&gt->uc);
+
+ intel_engines_release(gt);
}
void intel_gt_driver_unregister(struct intel_gt *gt)
{
- intel_gpu_ips_teardown();
+ intel_rps_driver_unregister(&gt->rps);
}
void intel_gt_driver_release(struct intel_gt *gt)
{
- /* Paranoia: make sure we have disabled everything before we exit. */
- intel_gt_pm_disable(gt);
- intel_gt_pm_fini(gt);
+ struct i915_address_space *vm;
+ vm = fetch_and_zero(&gt->vm);
+ if (vm) /* FIXME being called twice on error paths :( */
+ i915_vm_put(vm);
+
+ intel_gt_pm_fini(gt);
intel_gt_fini_scratch(gt);
}
void intel_gt_driver_late_release(struct intel_gt *gt)
{
intel_uc_driver_late_release(&gt->uc);
+ intel_gt_fini_requests(gt);
intel_gt_fini_reset(gt);
+ intel_gt_fini_timelines(gt);
+ intel_engines_free(gt);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index e6ab0bff0efb..2355cf129e9c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -12,6 +12,12 @@
struct drm_i915_private;
+#define GT_TRACE(gt, fmt, ...) do { \
+ const struct intel_gt *gt__ __maybe_unused = (gt); \
+ GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev), \
+ ##__VA_ARGS__); \
+} while (0)
+
static inline struct intel_gt *uc_to_gt(struct intel_uc *uc)
{
return container_of(uc, struct intel_gt, uc);
@@ -28,7 +34,7 @@ static inline struct intel_gt *huc_to_gt(struct intel_huc *huc)
}
void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915);
-void intel_gt_init_hw_early(struct drm_i915_private *i915);
+void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt);
int __must_check intel_gt_init_hw(struct intel_gt *gt);
int intel_gt_init(struct intel_gt *gt);
void intel_gt_driver_register(struct intel_gt *gt);
@@ -46,8 +52,6 @@ void intel_gt_clear_error_registers(struct intel_gt *gt,
void intel_gt_flush_ggtt_writes(struct intel_gt *gt);
void intel_gt_chipset_flush(struct intel_gt *gt);
-void intel_gt_init_hangcheck(struct intel_gt *gt);
-
static inline u32 intel_gt_scratch_offset(const struct intel_gt *gt,
enum intel_gt_scratch_field field)
{
@@ -59,6 +63,4 @@ static inline bool intel_gt_is_wedged(struct intel_gt *gt)
return __intel_reset_failed(&gt->reset);
}
-void intel_gt_queue_hangcheck(struct intel_gt *gt);
-
#endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index 34a4fb624bf7..f796bdf1ed30 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -11,6 +11,7 @@
#include "intel_gt.h"
#include "intel_gt_irq.h"
#include "intel_uncore.h"
+#include "intel_rps.h"
static void guc_irq_handler(struct intel_guc *guc, u16 iir)
{
@@ -27,7 +28,7 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
tasklet = true;
if (iir & GT_RENDER_USER_INTERRUPT) {
- intel_engine_breadcrumbs_irq(engine);
+ intel_engine_signal_breadcrumbs(engine);
tasklet |= intel_engine_needs_breadcrumb_tasklet(engine);
}
@@ -77,7 +78,7 @@ gen11_other_irq_handler(struct intel_gt *gt, const u8 instance,
return guc_irq_handler(&gt->uc.guc, iir);
if (instance == OTHER_GTPM_INSTANCE)
- return gen11_rps_irq_handler(gt, iir);
+ return gen11_rps_irq_handler(&gt->rps, iir);
WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n",
instance, iir);
@@ -244,9 +245,9 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
void gen5_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
{
if (gt_iir & GT_RENDER_USER_INTERRUPT)
- intel_engine_breadcrumbs_irq(gt->engine_class[RENDER_CLASS][0]);
+ intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]);
if (gt_iir & ILK_BSD_USER_INTERRUPT)
- intel_engine_breadcrumbs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0]);
+ intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]);
}
static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir)
@@ -270,11 +271,11 @@ static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir)
void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
{
if (gt_iir & GT_RENDER_USER_INTERRUPT)
- intel_engine_breadcrumbs_irq(gt->engine_class[RENDER_CLASS][0]);
+ intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]);
if (gt_iir & GT_BSD_USER_INTERRUPT)
- intel_engine_breadcrumbs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0]);
+ intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]);
if (gt_iir & GT_BLT_USER_INTERRUPT)
- intel_engine_breadcrumbs_irq(gt->engine_class[COPY_ENGINE_CLASS][0]);
+ intel_engine_signal_breadcrumbs(gt->engine_class[COPY_ENGINE_CLASS][0]);
if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |
GT_BSD_CS_ERROR_INTERRUPT |
@@ -336,7 +337,7 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4])
}
if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) {
- gen6_rps_irq_handler(gt->i915, gt_iir[2]);
+ gen6_rps_irq_handler(&gt->rps, gt_iir[2]);
guc_irq_handler(&gt->uc.guc, gt_iir[2] >> 16);
}
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index b866d5b1eee0..45b68a17da4d 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -4,6 +4,8 @@
* Copyright © 2019 Intel Corporation
*/
+#include <linux/suspend.h>
+
#include "i915_drv.h"
#include "i915_globals.h"
#include "i915_params.h"
@@ -12,13 +14,28 @@
#include "intel_gt.h"
#include "intel_gt_pm.h"
#include "intel_gt_requests.h"
+#include "intel_llc.h"
#include "intel_pm.h"
#include "intel_rc6.h"
+#include "intel_rps.h"
#include "intel_wakeref.h"
-static void pm_notify(struct intel_gt *gt, int state)
+static void user_forcewake(struct intel_gt *gt, bool suspend)
{
- blocking_notifier_call_chain(&gt->pm_notifications, state, gt->i915);
+ int count = atomic_read(&gt->user_wakeref);
+
+ /* Inside suspend/resume so single threaded, no races to worry about. */
+ if (likely(!count))
+ return;
+
+ intel_gt_pm_get(gt);
+ if (suspend) {
+ GEM_BUG_ON(count > atomic_read(&gt->wakeref.count));
+ atomic_sub(count, &gt->wakeref.count);
+ } else {
+ atomic_add(count, &gt->wakeref.count);
+ }
+ intel_gt_pm_put(gt);
}
static int __gt_unpark(struct intel_wakeref *wf)
@@ -26,7 +43,7 @@ static int __gt_unpark(struct intel_wakeref *wf)
struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref);
struct drm_i915_private *i915 = gt->i915;
- GEM_TRACE("\n");
+ GT_TRACE(gt, "\n");
i915_globals_unpark();
@@ -44,19 +61,12 @@ static int __gt_unpark(struct intel_wakeref *wf)
gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
GEM_BUG_ON(!gt->awake);
- intel_enable_gt_powersave(i915);
-
- i915_update_gfx_val(i915);
- if (INTEL_GEN(i915) >= 6)
- gen6_rps_busy(i915);
-
+ intel_rc6_unpark(&gt->rc6);
+ intel_rps_unpark(&gt->rps);
i915_pmu_gt_unparked(i915);
- intel_gt_queue_hangcheck(gt);
intel_gt_unpark_requests(gt);
- pm_notify(gt, INTEL_GT_UNPARK);
-
return 0;
}
@@ -66,20 +76,21 @@ static int __gt_park(struct intel_wakeref *wf)
intel_wakeref_t wakeref = fetch_and_zero(&gt->awake);
struct drm_i915_private *i915 = gt->i915;
- GEM_TRACE("\n");
+ GT_TRACE(gt, "\n");
- pm_notify(gt, INTEL_GT_PARK);
intel_gt_park_requests(gt);
+ i915_vma_parked(gt);
i915_pmu_gt_parked(i915);
- if (INTEL_GEN(i915) >= 6)
- gen6_rps_idle(i915);
+ intel_rps_park(&gt->rps);
+ intel_rc6_park(&gt->rc6);
/* Everything switched off, flush any residual interrupt just in case */
intel_synchronize_irq(i915);
+ /* Defer dropping the display power well for 100ms, it's slow! */
GEM_BUG_ON(!wakeref);
- intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
+ intel_display_power_put_async(i915, POWER_DOMAIN_GT_IRQ, wakeref);
i915_globals_park();
@@ -89,14 +100,11 @@ static int __gt_park(struct intel_wakeref *wf)
static const struct intel_wakeref_ops wf_ops = {
.get = __gt_unpark,
.put = __gt_park,
- .flags = INTEL_WAKEREF_PUT_ASYNC,
};
void intel_gt_pm_init_early(struct intel_gt *gt)
{
intel_wakeref_init(&gt->wakeref, gt->uncore->rpm, &wf_ops);
-
- BLOCKING_INIT_NOTIFIER_HEAD(&gt->pm_notifications);
}
void intel_gt_pm_init(struct intel_gt *gt)
@@ -107,6 +115,7 @@ void intel_gt_pm_init(struct intel_gt *gt)
* user.
*/
intel_rc6_init(&gt->rc6);
+ intel_rps_init(&gt->rps);
}
static bool reset_engines(struct intel_gt *gt)
@@ -131,8 +140,22 @@ void intel_gt_sanitize(struct intel_gt *gt, bool force)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ intel_wakeref_t wakeref;
+
+ GT_TRACE(gt, "force:%s", yesno(force));
- GEM_TRACE("\n");
+ /* Use a raw wakeref to avoid calling intel_display_power_get early */
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
+ intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
+
+ /*
+ * As we have just resumed the machine and woken the device up from
+ * deep PCI sleep (presumably D3_cold), assume the HW has been reset
+ * back to defaults, recovering from whatever wedged state we left it
+ * in and so worth trying to use the device once more.
+ */
+ if (intel_gt_is_wedged(gt))
+ intel_gt_unset_wedged(gt);
intel_uc_sanitize(&gt->uc);
@@ -140,6 +163,8 @@ void intel_gt_sanitize(struct intel_gt *gt, bool force)
if (engine->reset.prepare)
engine->reset.prepare(engine);
+ intel_uc_reset_prepare(&gt->uc);
+
if (reset_engines(gt) || force) {
for_each_engine(engine, gt, id)
__intel_engine_reset(engine, false);
@@ -148,12 +173,9 @@ void intel_gt_sanitize(struct intel_gt *gt, bool force)
for_each_engine(engine, gt, id)
if (engine->reset.finish)
engine->reset.finish(engine);
-}
-void intel_gt_pm_disable(struct intel_gt *gt)
-{
- if (!is_mock_gt(gt))
- intel_sanitize_gt_powersave(gt->i915);
+ intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
}
void intel_gt_pm_fini(struct intel_gt *gt)
@@ -165,7 +187,9 @@ int intel_gt_resume(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
- int err = 0;
+ int err;
+
+ GT_TRACE(gt, "\n");
/*
* After resume, we may need to poke into the pinned kernel
@@ -174,9 +198,22 @@ int intel_gt_resume(struct intel_gt *gt)
* allowing us to fixup the user contexts on their first pin.
*/
intel_gt_pm_get(gt);
+
intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
intel_rc6_sanitize(&gt->rc6);
+ /* Only when the HW is re-initialised, can we replay the requests */
+ err = intel_gt_init_hw(gt);
+ if (err) {
+ dev_err(gt->i915->drm.dev,
+ "Failed to initialize GPU, declaring it wedged!\n");
+ intel_gt_set_wedged(gt);
+ goto err_fw;
+ }
+
+ intel_rps_enable(&gt->rps);
+ intel_llc_enable(&gt->llc);
+
for_each_engine(engine, gt, id) {
struct intel_context *ce;
@@ -185,9 +222,7 @@ int intel_gt_resume(struct intel_gt *gt)
ce = engine->kernel_context;
if (ce) {
GEM_BUG_ON(!intel_context_is_pinned(ce));
- mutex_acquire(&ce->pin_mutex.dep_map, 0, 0, _THIS_IP_);
ce->ops->reset(ce);
- mutex_release(&ce->pin_mutex.dep_map, 0, _THIS_IP_);
}
engine->serial++; /* kernel context lost */
@@ -203,43 +238,98 @@ int intel_gt_resume(struct intel_gt *gt)
}
intel_rc6_enable(&gt->rc6);
+
+ intel_uc_resume(&gt->uc);
+
+ user_forcewake(gt, false);
+
+err_fw:
intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
intel_gt_pm_put(gt);
return err;
}
-static void wait_for_idle(struct intel_gt *gt)
+static void wait_for_suspend(struct intel_gt *gt)
{
+ if (!intel_gt_pm_is_awake(gt))
+ return;
+
if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
/*
* Forcibly cancel outstanding work and leave
* the gpu quiet.
*/
intel_gt_set_wedged(gt);
+ intel_gt_retire_requests(gt);
}
intel_gt_pm_wait_for_idle(gt);
}
-void intel_gt_suspend(struct intel_gt *gt)
+void intel_gt_suspend_prepare(struct intel_gt *gt)
+{
+ user_forcewake(gt, true);
+ wait_for_suspend(gt);
+
+ intel_uc_suspend(&gt->uc);
+}
+
+static suspend_state_t pm_suspend_target(void)
+{
+#if IS_ENABLED(CONFIG_SUSPEND) && IS_ENABLED(CONFIG_PM_SLEEP)
+ return pm_suspend_target_state;
+#else
+ return PM_SUSPEND_TO_IDLE;
+#endif
+}
+
+void intel_gt_suspend_late(struct intel_gt *gt)
{
intel_wakeref_t wakeref;
/* We expect to be idle already; but also want to be independent */
- wait_for_idle(gt);
+ wait_for_suspend(gt);
+
+ if (is_mock_gt(gt))
+ return;
+
+ GEM_BUG_ON(gt->awake);
+
+ /*
+ * On disabling the device, we want to turn off HW access to memory
+ * that we no longer own.
+ *
+ * However, not all suspend-states disable the device. S0 (s2idle)
+ * is effectively runtime-suspend, the device is left powered on
+ * but needs to be put into a low power state. We need to keep
+ * powermanagement enabled, but we also retain system state and so
+ * it remains safe to keep on using our allocated memory.
+ */
+ if (pm_suspend_target() == PM_SUSPEND_TO_IDLE)
+ return;
- with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
+ intel_rps_disable(&gt->rps);
intel_rc6_disable(&gt->rc6);
+ intel_llc_disable(&gt->llc);
+ }
+
+ intel_gt_sanitize(gt, false);
+
+ GT_TRACE(gt, "\n");
}
void intel_gt_runtime_suspend(struct intel_gt *gt)
{
intel_uc_runtime_suspend(&gt->uc);
+
+ GT_TRACE(gt, "\n");
}
int intel_gt_runtime_resume(struct intel_gt *gt)
{
+ GT_TRACE(gt, "\n");
intel_gt_init_swizzling(gt);
return intel_uc_runtime_resume(&gt->uc);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index 997770d3a968..4a9e48c12bd4 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -12,11 +12,6 @@
#include "intel_gt_types.h"
#include "intel_wakeref.h"
-enum {
- INTEL_GT_UNPARK,
- INTEL_GT_PARK,
-};
-
static inline bool intel_gt_pm_is_awake(const struct intel_gt *gt)
{
return intel_wakeref_is_active(&gt->wakeref);
@@ -27,6 +22,11 @@ static inline void intel_gt_pm_get(struct intel_gt *gt)
intel_wakeref_get(&gt->wakeref);
}
+static inline void __intel_gt_pm_get(struct intel_gt *gt)
+{
+ __intel_wakeref_get(&gt->wakeref);
+}
+
static inline bool intel_gt_pm_get_if_awake(struct intel_gt *gt)
{
return intel_wakeref_get_if_active(&gt->wakeref);
@@ -37,6 +37,11 @@ static inline void intel_gt_pm_put(struct intel_gt *gt)
intel_wakeref_put(&gt->wakeref);
}
+static inline void intel_gt_pm_put_async(struct intel_gt *gt)
+{
+ intel_wakeref_put_async(&gt->wakeref);
+}
+
static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
{
return intel_wakeref_wait_for_idle(&gt->wakeref);
@@ -44,13 +49,13 @@ static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
void intel_gt_pm_init_early(struct intel_gt *gt);
void intel_gt_pm_init(struct intel_gt *gt);
-void intel_gt_pm_disable(struct intel_gt *gt);
void intel_gt_pm_fini(struct intel_gt *gt);
void intel_gt_sanitize(struct intel_gt *gt, bool force);
+void intel_gt_suspend_prepare(struct intel_gt *gt);
+void intel_gt_suspend_late(struct intel_gt *gt);
int intel_gt_resume(struct intel_gt *gt);
-void intel_gt_suspend(struct intel_gt *gt);
void intel_gt_runtime_suspend(struct intel_gt *gt);
int intel_gt_runtime_resume(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
index b73229a84d85..b4f04614230e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -4,8 +4,11 @@
* Copyright © 2019 Intel Corporation
*/
+#include <linux/workqueue.h>
+
#include "i915_drv.h" /* for_each_engine() */
#include "i915_request.h"
+#include "intel_engine_heartbeat.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
#include "intel_gt_requests.h"
@@ -20,13 +23,88 @@ static void retire_requests(struct intel_timeline *tl)
break;
}
-static void flush_submission(struct intel_gt *gt)
+static bool flush_submission(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ bool active = false;
+
+ for_each_engine(engine, gt, id) {
+ active |= intel_engine_flush_submission(engine);
+ active |= flush_work(&engine->retire_work);
+ }
+
+ return active;
+}
+
+static void engine_retire(struct work_struct *work)
+{
+ struct intel_engine_cs *engine =
+ container_of(work, typeof(*engine), retire_work);
+ struct intel_timeline *tl = xchg(&engine->retire, NULL);
+
+ do {
+ struct intel_timeline *next = xchg(&tl->retire, NULL);
+
+ /*
+ * Our goal here is to retire _idle_ timelines as soon as
+ * possible (as they are idle, we do not expect userspace
+ * to be cleaning up anytime soon).
+ *
+ * If the timeline is currently locked, either it is being
+ * retired elsewhere or about to be!
+ */
+ if (mutex_trylock(&tl->mutex)) {
+ retire_requests(tl);
+ mutex_unlock(&tl->mutex);
+ }
+ intel_timeline_put(tl);
+
+ GEM_BUG_ON(!next);
+ tl = ptr_mask_bits(next, 1);
+ } while (tl);
+}
+
+static bool add_retire(struct intel_engine_cs *engine,
+ struct intel_timeline *tl)
+{
+#define STUB ((struct intel_timeline *)1)
+ struct intel_timeline *first;
+
+ /*
+ * We open-code a llist here to include the additional tag [BIT(0)]
+ * so that we know when the timeline is already on a
+ * retirement queue: either this engine or another.
+ */
+
+ if (cmpxchg(&tl->retire, NULL, STUB)) /* already queued */
+ return false;
+
+ intel_timeline_get(tl);
+ first = READ_ONCE(engine->retire);
+ do
+ tl->retire = ptr_pack_bits(first, 1, 1);
+ while (!try_cmpxchg(&engine->retire, &first, tl));
+
+ return !first;
+}
+
+void intel_engine_add_retire(struct intel_engine_cs *engine,
+ struct intel_timeline *tl)
+{
+ if (add_retire(engine, tl))
+ schedule_work(&engine->retire_work);
+}
- for_each_engine(engine, gt, id)
- intel_engine_flush_submission(engine);
+void intel_engine_init_retire(struct intel_engine_cs *engine)
+{
+ INIT_WORK(&engine->retire_work, engine_retire);
+}
+
+void intel_engine_fini_retire(struct intel_engine_cs *engine)
+{
+ flush_work(&engine->retire_work);
+ GEM_BUG_ON(engine->retire);
}
long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
@@ -34,7 +112,6 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
struct intel_gt_timelines *timelines = &gt->timelines;
struct intel_timeline *tl, *tn;
unsigned long active_count = 0;
- unsigned long flags;
bool interruptible;
LIST_HEAD(free);
@@ -44,7 +121,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
flush_submission(gt); /* kick the ksoftirqd tasklets */
- spin_lock_irqsave(&timelines->lock, flags);
+ spin_lock(&timelines->lock);
list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
if (!mutex_trylock(&tl->mutex)) {
active_count++; /* report busy to caller, try again? */
@@ -52,9 +129,9 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
}
intel_timeline_get(tl);
- GEM_BUG_ON(!tl->active_count);
- tl->active_count++; /* pin the list element */
- spin_unlock_irqrestore(&timelines->lock, flags);
+ GEM_BUG_ON(!atomic_read(&tl->active_count));
+ atomic_inc(&tl->active_count); /* pin the list element */
+ spin_unlock(&timelines->lock);
if (timeout > 0) {
struct dma_fence *fence;
@@ -70,28 +147,31 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
retire_requests(tl);
- spin_lock_irqsave(&timelines->lock, flags);
+ spin_lock(&timelines->lock);
/* Resume iteration after dropping lock */
list_safe_reset_next(tl, tn, link);
- if (--tl->active_count)
- active_count += !!rcu_access_pointer(tl->last_request.fence);
- else
+ if (atomic_dec_and_test(&tl->active_count))
list_del(&tl->link);
+ else
+ active_count += i915_active_fence_isset(&tl->last_request);
mutex_unlock(&tl->mutex);
/* Defer the final release to after the spinlock */
if (refcount_dec_and_test(&tl->kref.refcount)) {
- GEM_BUG_ON(tl->active_count);
+ GEM_BUG_ON(atomic_read(&tl->active_count));
list_add(&tl->link, &free);
}
}
- spin_unlock_irqrestore(&timelines->lock, flags);
+ spin_unlock(&timelines->lock);
list_for_each_entry_safe(tl, tn, &free, link)
__intel_timeline_free(&tl->kref);
+ if (flush_submission(gt))
+ active_count++;
+
return active_count ? timeout : 0;
}
@@ -115,9 +195,9 @@ static void retire_work_handler(struct work_struct *work)
struct intel_gt *gt =
container_of(work, typeof(*gt), requests.retire_work.work);
- intel_gt_retire_requests(gt);
schedule_delayed_work(&gt->requests.retire_work,
round_jiffies_up_relative(HZ));
+ intel_gt_retire_requests(gt);
}
void intel_gt_init_requests(struct intel_gt *gt)
@@ -135,3 +215,9 @@ void intel_gt_unpark_requests(struct intel_gt *gt)
schedule_delayed_work(&gt->requests.retire_work,
round_jiffies_up_relative(HZ));
}
+
+void intel_gt_fini_requests(struct intel_gt *gt)
+{
+ /* Wait until the work is marked as finished before unloading! */
+ cancel_delayed_work_sync(&gt->requests.retire_work);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
index bd31cbce47e0..dbac53baf1cb 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
@@ -7,7 +7,9 @@
#ifndef INTEL_GT_REQUESTS_H
#define INTEL_GT_REQUESTS_H
+struct intel_engine_cs;
struct intel_gt;
+struct intel_timeline;
long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout);
static inline void intel_gt_retire_requests(struct intel_gt *gt)
@@ -15,10 +17,16 @@ static inline void intel_gt_retire_requests(struct intel_gt *gt)
intel_gt_retire_requests_timeout(gt, 0);
}
+void intel_engine_init_retire(struct intel_engine_cs *engine);
+void intel_engine_add_retire(struct intel_engine_cs *engine,
+ struct intel_timeline *tl);
+void intel_engine_fini_retire(struct intel_engine_cs *engine);
+
int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout);
void intel_gt_init_requests(struct intel_gt *gt);
void intel_gt_park_requests(struct intel_gt *gt);
void intel_gt_unpark_requests(struct intel_gt *gt);
+void intel_gt_fini_requests(struct intel_gt *gt);
#endif /* INTEL_GT_REQUESTS_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index ae4aaf75ac78..96890dd12b5f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -20,6 +20,7 @@
#include "intel_llc_types.h"
#include "intel_reset_types.h"
#include "intel_rc6_types.h"
+#include "intel_rps_types.h"
#include "intel_wakeref.h"
struct drm_i915_private;
@@ -27,14 +28,6 @@ struct i915_ggtt;
struct intel_engine_cs;
struct intel_uncore;
-struct intel_hangcheck {
- /* For hangcheck timer */
-#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
-#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)
-
- struct delayed_work work;
-};
-
struct intel_gt {
struct drm_i915_private *i915;
struct intel_uncore *uncore;
@@ -68,7 +61,6 @@ struct intel_gt {
struct list_head closed_vma;
spinlock_t closed_lock; /* guards the list of closed_vma */
- struct intel_hangcheck hangcheck;
struct intel_reset reset;
/**
@@ -82,8 +74,7 @@ struct intel_gt {
struct intel_llc llc;
struct intel_rc6 rc6;
-
- struct blocking_notifier_head pm_notifications;
+ struct intel_rps rps;
ktime_t last_init_time;
@@ -99,6 +90,13 @@ struct intel_gt {
struct intel_engine_cs *engine[I915_NUM_ENGINES];
struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1]
[MAX_ENGINE_INSTANCE + 1];
+
+ /*
+ * Default address space (either GGTT or ppGTT depending on arch).
+ *
+ * Reserved for exclusive use by the kernel.
+ */
+ struct i915_address_space *vm;
};
enum intel_gt_scratch_field {
diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
deleted file mode 100644
index 0fdef00af9e4..000000000000
--- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c
+++ /dev/null
@@ -1,361 +0,0 @@
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#include "i915_drv.h"
-#include "intel_engine.h"
-#include "intel_gt.h"
-#include "intel_reset.h"
-
-struct hangcheck {
- u64 acthd;
- u32 ring;
- u32 head;
- enum intel_engine_hangcheck_action action;
- unsigned long action_timestamp;
- int deadlock;
- struct intel_instdone instdone;
- bool wedged:1;
- bool stalled:1;
-};
-
-static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone)
-{
- u32 tmp = current_instdone | *old_instdone;
- bool unchanged;
-
- unchanged = tmp == *old_instdone;
- *old_instdone |= tmp;
-
- return unchanged;
-}
-
-static bool subunits_stuck(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
- struct intel_instdone instdone;
- struct intel_instdone *accu_instdone = &engine->hangcheck.instdone;
- bool stuck;
- int slice;
- int subslice;
-
- intel_engine_get_instdone(engine, &instdone);
-
- /* There might be unstable subunit states even when
- * actual head is not moving. Filter out the unstable ones by
- * accumulating the undone -> done transitions and only
- * consider those as progress.
- */
- stuck = instdone_unchanged(instdone.instdone,
- &accu_instdone->instdone);
- stuck &= instdone_unchanged(instdone.slice_common,
- &accu_instdone->slice_common);
-
- for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice) {
- stuck &= instdone_unchanged(instdone.sampler[slice][subslice],
- &accu_instdone->sampler[slice][subslice]);
- stuck &= instdone_unchanged(instdone.row[slice][subslice],
- &accu_instdone->row[slice][subslice]);
- }
-
- return stuck;
-}
-
-static enum intel_engine_hangcheck_action
-head_stuck(struct intel_engine_cs *engine, u64 acthd)
-{
- if (acthd != engine->hangcheck.acthd) {
-
- /* Clear subunit states on head movement */
- memset(&engine->hangcheck.instdone, 0,
- sizeof(engine->hangcheck.instdone));
-
- return ENGINE_ACTIVE_HEAD;
- }
-
- if (!subunits_stuck(engine))
- return ENGINE_ACTIVE_SUBUNITS;
-
- return ENGINE_DEAD;
-}
-
-static enum intel_engine_hangcheck_action
-engine_stuck(struct intel_engine_cs *engine, u64 acthd)
-{
- enum intel_engine_hangcheck_action ha;
- u32 tmp;
-
- ha = head_stuck(engine, acthd);
- if (ha != ENGINE_DEAD)
- return ha;
-
- if (IS_GEN(engine->i915, 2))
- return ENGINE_DEAD;
-
- /* Is the chip hanging on a WAIT_FOR_EVENT?
- * If so we can simply poke the RB_WAIT bit
- * and break the hang. This should work on
- * all but the second generation chipsets.
- */
- tmp = ENGINE_READ(engine, RING_CTL);
- if (tmp & RING_WAIT) {
- intel_gt_handle_error(engine->gt, engine->mask, 0,
- "stuck wait on %s", engine->name);
- ENGINE_WRITE(engine, RING_CTL, tmp);
- return ENGINE_WAIT_KICK;
- }
-
- return ENGINE_DEAD;
-}
-
-static void hangcheck_load_sample(struct intel_engine_cs *engine,
- struct hangcheck *hc)
-{
- hc->acthd = intel_engine_get_active_head(engine);
- hc->ring = ENGINE_READ(engine, RING_START);
- hc->head = ENGINE_READ(engine, RING_HEAD);
-}
-
-static void hangcheck_store_sample(struct intel_engine_cs *engine,
- const struct hangcheck *hc)
-{
- engine->hangcheck.acthd = hc->acthd;
- engine->hangcheck.last_ring = hc->ring;
- engine->hangcheck.last_head = hc->head;
-}
-
-static enum intel_engine_hangcheck_action
-hangcheck_get_action(struct intel_engine_cs *engine,
- const struct hangcheck *hc)
-{
- if (intel_engine_is_idle(engine))
- return ENGINE_IDLE;
-
- if (engine->hangcheck.last_ring != hc->ring)
- return ENGINE_ACTIVE_SEQNO;
-
- if (engine->hangcheck.last_head != hc->head)
- return ENGINE_ACTIVE_SEQNO;
-
- return engine_stuck(engine, hc->acthd);
-}
-
-static void hangcheck_accumulate_sample(struct intel_engine_cs *engine,
- struct hangcheck *hc)
-{
- unsigned long timeout = I915_ENGINE_DEAD_TIMEOUT;
-
- hc->action = hangcheck_get_action(engine, hc);
-
- /* We always increment the progress
- * if the engine is busy and still processing
- * the same request, so that no single request
- * can run indefinitely (such as a chain of
- * batches). The only time we do not increment
- * the hangcheck score on this ring, if this
- * engine is in a legitimate wait for another
- * engine. In that case the waiting engine is a
- * victim and we want to be sure we catch the
- * right culprit. Then every time we do kick
- * the ring, make it as a progress as the seqno
- * advancement might ensure and if not, it
- * will catch the hanging engine.
- */
-
- switch (hc->action) {
- case ENGINE_IDLE:
- case ENGINE_ACTIVE_SEQNO:
- /* Clear head and subunit states on seqno movement */
- hc->acthd = 0;
-
- memset(&engine->hangcheck.instdone, 0,
- sizeof(engine->hangcheck.instdone));
-
- /* Intentional fall through */
- case ENGINE_WAIT_KICK:
- case ENGINE_WAIT:
- engine->hangcheck.action_timestamp = jiffies;
- break;
-
- case ENGINE_ACTIVE_HEAD:
- case ENGINE_ACTIVE_SUBUNITS:
- /*
- * Seqno stuck with still active engine gets leeway,
- * in hopes that it is just a long shader.
- */
- timeout = I915_SEQNO_DEAD_TIMEOUT;
- break;
-
- case ENGINE_DEAD:
- break;
-
- default:
- MISSING_CASE(hc->action);
- }
-
- hc->stalled = time_after(jiffies,
- engine->hangcheck.action_timestamp + timeout);
- hc->wedged = time_after(jiffies,
- engine->hangcheck.action_timestamp +
- I915_ENGINE_WEDGED_TIMEOUT);
-}
-
-static void hangcheck_declare_hang(struct intel_gt *gt,
- intel_engine_mask_t hung,
- intel_engine_mask_t stuck)
-{
- struct intel_engine_cs *engine;
- intel_engine_mask_t tmp;
- char msg[80];
- int len;
-
- /* If some rings hung but others were still busy, only
- * blame the hanging rings in the synopsis.
- */
- if (stuck != hung)
- hung &= ~stuck;
- len = scnprintf(msg, sizeof(msg),
- "%s on ", stuck == hung ? "no progress" : "hang");
- for_each_engine_masked(engine, gt, hung, tmp)
- len += scnprintf(msg + len, sizeof(msg) - len,
- "%s, ", engine->name);
- msg[len-2] = '\0';
-
- return intel_gt_handle_error(gt, hung, I915_ERROR_CAPTURE, "%s", msg);
-}
-
-/*
- * This is called when the chip hasn't reported back with completed
- * batchbuffers in a long time. We keep track per ring seqno progress and
- * if there are no progress, hangcheck score for that ring is increased.
- * Further, acthd is inspected to see if the ring is stuck. On stuck case
- * we kick the ring. If we see no progress on three subsequent calls
- * we assume chip is wedged and try to fix it by resetting the chip.
- */
-static void hangcheck_elapsed(struct work_struct *work)
-{
- struct intel_gt *gt =
- container_of(work, typeof(*gt), hangcheck.work.work);
- intel_engine_mask_t hung = 0, stuck = 0, wedged = 0;
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- intel_wakeref_t wakeref;
-
- if (!i915_modparams.enable_hangcheck)
- return;
-
- if (!READ_ONCE(gt->awake))
- return;
-
- if (intel_gt_is_wedged(gt))
- return;
-
- wakeref = intel_runtime_pm_get_if_in_use(gt->uncore->rpm);
- if (!wakeref)
- return;
-
- /* As enabling the GPU requires fairly extensive mmio access,
- * periodically arm the mmio checker to see if we are triggering
- * any invalid access.
- */
- intel_uncore_arm_unclaimed_mmio_detection(gt->uncore);
-
- for_each_engine(engine, gt, id) {
- struct hangcheck hc;
-
- intel_engine_breadcrumbs_irq(engine);
-
- hangcheck_load_sample(engine, &hc);
- hangcheck_accumulate_sample(engine, &hc);
- hangcheck_store_sample(engine, &hc);
-
- if (hc.stalled) {
- hung |= engine->mask;
- if (hc.action != ENGINE_DEAD)
- stuck |= engine->mask;
- }
-
- if (hc.wedged)
- wedged |= engine->mask;
- }
-
- if (GEM_SHOW_DEBUG() && (hung | stuck)) {
- struct drm_printer p = drm_debug_printer("hangcheck");
-
- for_each_engine(engine, gt, id) {
- if (intel_engine_is_idle(engine))
- continue;
-
- intel_engine_dump(engine, &p, "%s\n", engine->name);
- }
- }
-
- if (wedged) {
- dev_err(gt->i915->drm.dev,
- "GPU recovery timed out,"
- " cancelling all in-flight rendering.\n");
- GEM_TRACE_DUMP();
- intel_gt_set_wedged(gt);
- }
-
- if (hung)
- hangcheck_declare_hang(gt, hung, stuck);
-
- intel_runtime_pm_put(gt->uncore->rpm, wakeref);
-
- /* Reset timer in case GPU hangs without another request being added */
- intel_gt_queue_hangcheck(gt);
-}
-
-void intel_gt_queue_hangcheck(struct intel_gt *gt)
-{
- unsigned long delay;
-
- if (unlikely(!i915_modparams.enable_hangcheck))
- return;
-
- /*
- * Don't continually defer the hangcheck so that it is always run at
- * least once after work has been scheduled on any ring. Otherwise,
- * we will ignore a hung ring if a second ring is kept busy.
- */
-
- delay = round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES);
- queue_delayed_work(system_long_wq, &gt->hangcheck.work, delay);
-}
-
-void intel_engine_init_hangcheck(struct intel_engine_cs *engine)
-{
- memset(&engine->hangcheck, 0, sizeof(engine->hangcheck));
- engine->hangcheck.action_timestamp = jiffies;
-}
-
-void intel_gt_init_hangcheck(struct intel_gt *gt)
-{
- INIT_DELAYED_WORK(&gt->hangcheck.work, hangcheck_elapsed);
-}
-
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftest_hangcheck.c"
-#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_llc.c b/drivers/gpu/drm/i915/gt/intel_llc.c
index 35093eb5f24e..ceb785b75c25 100644
--- a/drivers/gpu/drm/i915/gt/intel_llc.c
+++ b/drivers/gpu/drm/i915/gt/intel_llc.c
@@ -48,7 +48,7 @@ static bool get_ia_constants(struct intel_llc *llc,
struct ia_constants *consts)
{
struct drm_i915_private *i915 = llc_to_gt(llc)->i915;
- struct intel_rps *rps = &i915->gt_pm.rps;
+ struct intel_rps *rps = &llc_to_gt(llc)->rps;
if (rps->max_freq <= rps->min_freq)
return false;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index d0088d020220..4fb70a7716e3 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -133,18 +133,19 @@
*/
#include <linux/interrupt.h>
-#include "gem/i915_gem_context.h"
-
#include "i915_drv.h"
#include "i915_perf.h"
#include "i915_trace.h"
#include "i915_vgpu.h"
+#include "intel_context.h"
#include "intel_engine_pm.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
#include "intel_lrc_reg.h"
#include "intel_mocs.h"
#include "intel_reset.h"
+#include "intel_ring.h"
#include "intel_workarounds.h"
#define RING_EXECLIST_QFULL (1 << 0x2)
@@ -234,16 +235,9 @@ static void execlists_init_reg_state(u32 *reg_state,
const struct intel_engine_cs *engine,
const struct intel_ring *ring,
bool close);
-
-static void __context_pin_acquire(struct intel_context *ce)
-{
- mutex_acquire(&ce->pin_mutex.dep_map, 2, 0, _RET_IP_);
-}
-
-static void __context_pin_release(struct intel_context *ce)
-{
- mutex_release(&ce->pin_mutex.dep_map, 0, _RET_IP_);
-}
+static void
+__execlists_update_reg_state(const struct intel_context *ce,
+ const struct intel_engine_cs *engine);
static void mark_eio(struct i915_request *rq)
{
@@ -256,6 +250,23 @@ static void mark_eio(struct i915_request *rq)
i915_request_mark_complete(rq);
}
+static struct i915_request *
+active_request(const struct intel_timeline * const tl, struct i915_request *rq)
+{
+ struct i915_request *active = rq;
+
+ rcu_read_lock();
+ list_for_each_entry_continue_reverse(rq, &tl->requests, link) {
+ if (i915_request_completed(rq))
+ break;
+
+ active = rq;
+ }
+ rcu_read_unlock();
+
+ return active;
+}
+
static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
{
return (i915_ggtt_offset(engine->status_page.vma) +
@@ -460,8 +471,7 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
if (IS_GEN(engine->i915, 8))
desc |= GEN8_CTX_L3LLC_COHERENT;
- desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE;
- /* bits 12-31 */
+ desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */
/*
* The following 32bits are copied into the OA reports (dword 2).
* Consider updating oa_get_render_ctx_id in i915_perf.c when changing
@@ -834,12 +844,6 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine)
}
}
-static void unwind_wa_tail(struct i915_request *rq)
-{
- rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES);
- assert_ring_tail_valid(rq->ring, rq->tail);
-}
-
static struct i915_request *
__unwind_incomplete_requests(struct intel_engine_cs *engine)
{
@@ -852,12 +856,10 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
list_for_each_entry_safe_reverse(rq, rn,
&engine->active.requests,
sched.link) {
-
if (i915_request_completed(rq))
continue; /* XXX */
__i915_request_unsubmit(rq);
- unwind_wa_tail(rq);
/*
* Push the request back into the queue for later resubmission.
@@ -877,7 +879,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
list_move(&rq->sched.link, pl);
active = rq;
} else {
- struct intel_engine_cs *owner = rq->hw_context->engine;
+ struct intel_engine_cs *owner = rq->context->engine;
/*
* Decouple the virtual breadcrumb before moving it
@@ -925,14 +927,180 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status)
status, rq);
}
+static void intel_engine_context_in(struct intel_engine_cs *engine)
+{
+ unsigned long flags;
+
+ if (READ_ONCE(engine->stats.enabled) == 0)
+ return;
+
+ write_seqlock_irqsave(&engine->stats.lock, flags);
+
+ if (engine->stats.enabled > 0) {
+ if (engine->stats.active++ == 0)
+ engine->stats.start = ktime_get();
+ GEM_BUG_ON(engine->stats.active == 0);
+ }
+
+ write_sequnlock_irqrestore(&engine->stats.lock, flags);
+}
+
+static void intel_engine_context_out(struct intel_engine_cs *engine)
+{
+ unsigned long flags;
+
+ if (READ_ONCE(engine->stats.enabled) == 0)
+ return;
+
+ write_seqlock_irqsave(&engine->stats.lock, flags);
+
+ if (engine->stats.enabled > 0) {
+ ktime_t last;
+
+ if (engine->stats.active && --engine->stats.active == 0) {
+ /*
+ * Decrement the active context count and in case GPU
+ * is now idle add up to the running total.
+ */
+ last = ktime_sub(ktime_get(), engine->stats.start);
+
+ engine->stats.total = ktime_add(engine->stats.total,
+ last);
+ } else if (engine->stats.active == 0) {
+ /*
+ * After turning on engine stats, context out might be
+ * the first event in which case we account from the
+ * time stats gathering was turned on.
+ */
+ last = ktime_sub(ktime_get(), engine->stats.enabled_at);
+
+ engine->stats.total = ktime_add(engine->stats.total,
+ last);
+ }
+ }
+
+ write_sequnlock_irqrestore(&engine->stats.lock, flags);
+}
+
+static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
+{
+ if (INTEL_GEN(engine->i915) >= 12)
+ return 0x60;
+ else if (INTEL_GEN(engine->i915) >= 9)
+ return 0x54;
+ else if (engine->class == RENDER_CLASS)
+ return 0x58;
+ else
+ return -1;
+}
+
+static void
+execlists_check_context(const struct intel_context *ce,
+ const struct intel_engine_cs *engine)
+{
+ const struct intel_ring *ring = ce->ring;
+ u32 *regs = ce->lrc_reg_state;
+ bool valid = true;
+ int x;
+
+ if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
+ pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
+ engine->name,
+ regs[CTX_RING_START],
+ i915_ggtt_offset(ring->vma));
+ regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
+ valid = false;
+ }
+
+ if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
+ (RING_CTL_SIZE(ring->size) | RING_VALID)) {
+ pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
+ engine->name,
+ regs[CTX_RING_CTL],
+ (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
+ regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
+ valid = false;
+ }
+
+ x = lrc_ring_mi_mode(engine);
+ if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
+ pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
+ engine->name, regs[x + 1]);
+ regs[x + 1] &= ~STOP_RING;
+ regs[x + 1] |= STOP_RING << 16;
+ valid = false;
+ }
+
+ WARN_ONCE(!valid, "Invalid lrc state found before submission\n");
+}
+
+static void restore_default_state(struct intel_context *ce,
+ struct intel_engine_cs *engine)
+{
+ u32 *regs = ce->lrc_reg_state;
+
+ if (engine->pinned_default_state)
+ memcpy(regs, /* skip restoring the vanilla PPHWSP */
+ engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
+ engine->context_size - PAGE_SIZE);
+
+ execlists_init_reg_state(regs, ce, engine, ce->ring, false);
+}
+
+static void reset_active(struct i915_request *rq,
+ struct intel_engine_cs *engine)
+{
+ struct intel_context * const ce = rq->context;
+ u32 head;
+
+ /*
+ * The executing context has been cancelled. We want to prevent
+ * further execution along this context and propagate the error on
+ * to anything depending on its results.
+ *
+ * In __i915_request_submit(), we apply the -EIO and remove the
+ * requests' payloads for any banned requests. But first, we must
+ * rewind the context back to the start of the incomplete request so
+ * that we do not jump back into the middle of the batch.
+ *
+ * We preserve the breadcrumbs and semaphores of the incomplete
+ * requests so that inter-timeline dependencies (i.e other timelines)
+ * remain correctly ordered. And we defer to __i915_request_submit()
+ * so that all asynchronous waits are correctly handled.
+ */
+ ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n",
+ rq->fence.context, rq->fence.seqno);
+
+ /* On resubmission of the active request, payload will be scrubbed */
+ if (i915_request_completed(rq))
+ head = rq->tail;
+ else
+ head = active_request(ce->timeline, rq)->head;
+ ce->ring->head = intel_ring_wrap(ce->ring, head);
+ intel_ring_update_space(ce->ring);
+
+ /* Scrub the context image to prevent replaying the previous batch */
+ restore_default_state(ce, engine);
+ __execlists_update_reg_state(ce, engine);
+
+ /* We've switched away, so this should be a no-op, but intent matters */
+ ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+}
+
static inline struct intel_engine_cs *
__execlists_schedule_in(struct i915_request *rq)
{
struct intel_engine_cs * const engine = rq->engine;
- struct intel_context * const ce = rq->hw_context;
+ struct intel_context * const ce = rq->context;
intel_context_get(ce);
+ if (unlikely(intel_context_is_banned(ce)))
+ reset_active(rq, engine);
+
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ execlists_check_context(ce, engine);
+
if (ce->tag) {
/* Use a fixed tag for OA and friends */
ce->lrc_desc |= (u64)ce->tag << 32;
@@ -945,7 +1113,7 @@ __execlists_schedule_in(struct i915_request *rq)
BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
}
- intel_gt_pm_get(engine->gt);
+ __intel_gt_pm_get(engine->gt);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
intel_engine_context_in(engine);
@@ -955,7 +1123,7 @@ __execlists_schedule_in(struct i915_request *rq)
static inline struct i915_request *
execlists_schedule_in(struct i915_request *rq, int idx)
{
- struct intel_context * const ce = rq->hw_context;
+ struct intel_context * const ce = rq->context;
struct intel_engine_cs *old;
GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
@@ -986,11 +1154,25 @@ static inline void
__execlists_schedule_out(struct i915_request *rq,
struct intel_engine_cs * const engine)
{
- struct intel_context * const ce = rq->hw_context;
+ struct intel_context * const ce = rq->context;
+
+ /*
+ * NB process_csb() is not under the engine->active.lock and hence
+ * schedule_out can race with schedule_in meaning that we should
+ * refrain from doing non-trivial work here.
+ */
+
+ /*
+ * If we have just completed this context, the engine may now be
+ * idle and we want to re-enter powersaving.
+ */
+ if (list_is_last(&rq->link, &ce->timeline->requests) &&
+ i915_request_completed(rq))
+ intel_engine_add_retire(engine, ce->timeline);
intel_engine_context_out(engine);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
- intel_gt_pm_put(engine->gt);
+ intel_gt_pm_put_async(engine->gt);
/*
* If this is part of a virtual engine, its next request may
@@ -1010,7 +1192,7 @@ __execlists_schedule_out(struct i915_request *rq,
static inline void
execlists_schedule_out(struct i915_request *rq)
{
- struct intel_context * const ce = rq->hw_context;
+ struct intel_context * const ce = rq->context;
struct intel_engine_cs *cur, *old;
trace_i915_request_out(rq);
@@ -1025,13 +1207,29 @@ execlists_schedule_out(struct i915_request *rq)
i915_request_put(rq);
}
-static u64 execlists_update_context(const struct i915_request *rq)
+static u64 execlists_update_context(struct i915_request *rq)
{
- struct intel_context *ce = rq->hw_context;
- u64 desc;
+ struct intel_context *ce = rq->context;
+ u64 desc = ce->lrc_desc;
+ u32 tail;
- ce->lrc_reg_state[CTX_RING_TAIL] =
- intel_ring_set_tail(rq->ring, rq->tail);
+ /*
+ * WaIdleLiteRestore:bdw,skl
+ *
+ * We should never submit the context with the same RING_TAIL twice
+ * just in case we submit an empty ring, which confuses the HW.
+ *
+ * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
+ * the normal request to be able to always advance the RING_TAIL on
+ * subsequent resubmissions (for lite restore). Should that fail us,
+ * and we try and submit the same tail again, force the context
+ * reload.
+ */
+ tail = intel_ring_set_tail(rq->ring, rq->tail);
+ if (unlikely(ce->lrc_reg_state[CTX_RING_TAIL] == tail))
+ desc |= CTX_DESC_FORCE_RESTORE;
+ ce->lrc_reg_state[CTX_RING_TAIL] = tail;
+ rq->tail = rq->wa_tail;
/*
* Make sure the context image is complete before we submit it to HW.
@@ -1042,21 +1240,14 @@ static u64 execlists_update_context(const struct i915_request *rq)
* may not be visible to the HW prior to the completion of the UC
* register write and that we may begin execution from the context
* before its image is complete leading to invalid PD chasing.
- *
- * Furthermore, Braswell, at least, wants a full mb to be sure that
- * the writes are coherent in memory (visible to the GPU) prior to
- * execution, and not just visible to other CPUs (as is the result of
- * wmb).
*/
- mb();
-
- desc = ce->lrc_desc;
- ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
+ wmb();
/* Wa_1607138340:tgl */
if (IS_TGL_REVID(rq->i915, TGL_REVID_A0, TGL_REVID_A0))
desc |= CTX_DESC_FORCE_RESTORE;
+ ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
return desc;
}
@@ -1082,15 +1273,14 @@ trace_ports(const struct intel_engine_execlists *execlists,
if (!ports[0])
return;
- GEM_TRACE("%s: %s { %llx:%lld%s, %llx:%lld }\n",
- engine->name, msg,
- ports[0]->fence.context,
- ports[0]->fence.seqno,
- i915_request_completed(ports[0]) ? "!" :
- i915_request_started(ports[0]) ? "*" :
- "",
- ports[1] ? ports[1]->fence.context : 0,
- ports[1] ? ports[1]->fence.seqno : 0);
+ ENGINE_TRACE(engine, "%s { %llx:%lld%s, %llx:%lld }\n", msg,
+ ports[0]->fence.context,
+ ports[0]->fence.seqno,
+ i915_request_completed(ports[0]) ? "!" :
+ i915_request_started(ports[0]) ? "*" :
+ "",
+ ports[1] ? ports[1]->fence.context : 0,
+ ports[1] ? ports[1]->fence.seqno : 0);
}
static __maybe_unused bool
@@ -1114,33 +1304,56 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
}
for (port = execlists->pending; (rq = *port); port++) {
- if (ce == rq->hw_context) {
- GEM_TRACE_ERR("Duplicate context in pending[%zd]\n",
+ unsigned long flags;
+ bool ok = true;
+
+ GEM_BUG_ON(!kref_read(&rq->fence.refcount));
+ GEM_BUG_ON(!i915_request_is_active(rq));
+
+ if (ce == rq->context) {
+ GEM_TRACE_ERR("Dup context:%llx in pending[%zd]\n",
+ ce->timeline->fence_context,
port - execlists->pending);
return false;
}
+ ce = rq->context;
- ce = rq->hw_context;
- if (i915_request_completed(rq))
+ /* Hold tightly onto the lock to prevent concurrent retires! */
+ if (!spin_trylock_irqsave(&rq->lock, flags))
continue;
- if (i915_active_is_idle(&ce->active)) {
- GEM_TRACE_ERR("Inactive context in pending[%zd]\n",
+ if (i915_request_completed(rq))
+ goto unlock;
+
+ if (i915_active_is_idle(&ce->active) &&
+ !intel_context_is_barrier(ce)) {
+ GEM_TRACE_ERR("Inactive context:%llx in pending[%zd]\n",
+ ce->timeline->fence_context,
port - execlists->pending);
- return false;
+ ok = false;
+ goto unlock;
}
if (!i915_vma_is_pinned(ce->state)) {
- GEM_TRACE_ERR("Unpinned context in pending[%zd]\n",
+ GEM_TRACE_ERR("Unpinned context:%llx in pending[%zd]\n",
+ ce->timeline->fence_context,
port - execlists->pending);
- return false;
+ ok = false;
+ goto unlock;
}
if (!i915_vma_is_pinned(ce->ring->vma)) {
- GEM_TRACE_ERR("Unpinned ringbuffer in pending[%zd]\n",
+ GEM_TRACE_ERR("Unpinned ring:%llx in pending[%zd]\n",
+ ce->timeline->fence_context,
port - execlists->pending);
- return false;
+ ok = false;
+ goto unlock;
}
+
+unlock:
+ spin_unlock_irqrestore(&rq->lock, flags);
+ if (!ok)
+ return false;
}
return ce;
@@ -1185,7 +1398,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
static bool ctx_single_port_submission(const struct intel_context *ce)
{
return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
- i915_gem_context_force_single_submission(ce->gem_context));
+ intel_context_force_single_submission(ce));
}
static bool can_merge_ctx(const struct intel_context *prev,
@@ -1221,7 +1434,7 @@ static bool can_merge_rq(const struct i915_request *prev,
(I915_REQUEST_NOPREEMPT | I915_REQUEST_SENTINEL)))
return false;
- if (!can_merge_ctx(prev->hw_context, next->hw_context))
+ if (!can_merge_ctx(prev->context, next->context))
return false;
return true;
@@ -1269,7 +1482,7 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
if (!list_empty(&ve->context.signal_link)) {
list_move_tail(&ve->context.signal_link,
&engine->breadcrumbs.signalers);
- intel_engine_queue_breadcrumbs(engine);
+ intel_engine_signal_breadcrumbs(engine);
}
spin_unlock(&old->breadcrumbs.irq_lock);
}
@@ -1345,7 +1558,7 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
{
int hint;
- if (!intel_engine_has_semaphores(engine))
+ if (!intel_engine_has_timeslices(engine))
return false;
if (list_is_last(&rq->sched.link, &engine->active.requests))
@@ -1366,15 +1579,32 @@ switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
return rq_prio(list_next_entry(rq, sched.link));
}
-static bool
-enable_timeslice(const struct intel_engine_execlists *execlists)
+static inline unsigned long
+timeslice(const struct intel_engine_cs *engine)
{
- const struct i915_request *rq = *execlists->active;
+ return READ_ONCE(engine->props.timeslice_duration_ms);
+}
+
+static unsigned long
+active_timeslice(const struct intel_engine_cs *engine)
+{
+ const struct i915_request *rq = *engine->execlists.active;
if (i915_request_completed(rq))
- return false;
+ return 0;
+
+ if (engine->execlists.switch_priority_hint < effective_prio(rq))
+ return 0;
+
+ return timeslice(engine);
+}
- return execlists->switch_priority_hint >= effective_prio(rq);
+static void set_timeslice(struct intel_engine_cs *engine)
+{
+ if (!intel_engine_has_timeslices(engine))
+ return;
+
+ set_timer_ms(&engine->execlists.timer, active_timeslice(engine));
}
static void record_preemption(struct intel_engine_execlists *execlists)
@@ -1382,6 +1612,30 @@ static void record_preemption(struct intel_engine_execlists *execlists)
(void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
}
+static unsigned long active_preempt_timeout(struct intel_engine_cs *engine)
+{
+ struct i915_request *rq;
+
+ rq = last_active(&engine->execlists);
+ if (!rq)
+ return 0;
+
+ /* Force a fast reset for terminated contexts (ignoring sysfs!) */
+ if (unlikely(intel_context_is_banned(rq->context)))
+ return 1;
+
+ return READ_ONCE(engine->props.preempt_timeout_ms);
+}
+
+static void set_preempt_timeout(struct intel_engine_cs *engine)
+{
+ if (!intel_engine_has_preempt_reset(engine))
+ return;
+
+ set_timer_ms(&engine->execlists.preempt,
+ active_preempt_timeout(engine));
+}
+
static void execlists_dequeue(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -1444,12 +1698,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
last = last_active(execlists);
if (last) {
if (need_preempt(engine, last, rb)) {
- GEM_TRACE("%s: preempting last=%llx:%lld, prio=%d, hint=%d\n",
- engine->name,
- last->fence.context,
- last->fence.seqno,
- last->sched.attr.priority,
- execlists->queue_priority_hint);
+ ENGINE_TRACE(engine,
+ "preempting last=%llx:%lld, prio=%d, hint=%d\n",
+ last->fence.context,
+ last->fence.seqno,
+ last->sched.attr.priority,
+ execlists->queue_priority_hint);
record_preemption(execlists);
/*
@@ -1475,16 +1729,16 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* tendency to ignore us rewinding the TAIL to the
* end of an earlier request.
*/
- last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+ last->context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
last = NULL;
} else if (need_timeslice(engine, last) &&
timer_expired(&engine->execlists.timer)) {
- GEM_TRACE("%s: expired last=%llx:%lld, prio=%d, hint=%d\n",
- engine->name,
- last->fence.context,
- last->fence.seqno,
- last->sched.attr.priority,
- execlists->queue_priority_hint);
+ ENGINE_TRACE(engine,
+ "expired last=%llx:%lld, prio=%d, hint=%d\n",
+ last->fence.context,
+ last->fence.seqno,
+ last->sched.attr.priority,
+ execlists->queue_priority_hint);
ring_set_paused(engine, 1);
defer_active(engine);
@@ -1521,20 +1775,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
*/
if (!execlists->timer.expires &&
need_timeslice(engine, last))
- mod_timer(&execlists->timer,
- jiffies + 1);
+ set_timer_ms(&execlists->timer,
+ timeslice(engine));
+
return;
}
-
- /*
- * WaIdleLiteRestore:bdw,skl
- * Apply the wa NOOPs to prevent
- * ring:HEAD == rq:TAIL as we resubmit the
- * request. See gen8_emit_fini_breadcrumb() for
- * where we prepare the padding after the
- * end of the request.
- */
- last->tail = last->wa_tail;
}
}
@@ -1556,7 +1801,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
GEM_BUG_ON(rq != ve->request);
GEM_BUG_ON(rq->engine != &ve->base);
- GEM_BUG_ON(rq->hw_context != &ve->context);
+ GEM_BUG_ON(rq->context != &ve->context);
if (rq_prio(rq) >= queue_prio(execlists)) {
if (!virtual_matches(ve, rq, engine)) {
@@ -1570,14 +1815,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
return; /* leave this for another */
}
- GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n",
- engine->name,
- rq->fence.context,
- rq->fence.seqno,
- i915_request_completed(rq) ? "!" :
- i915_request_started(rq) ? "*" :
- "",
- yesno(engine != ve->siblings[0]));
+ ENGINE_TRACE(engine,
+ "virtual rq=%llx:%lld%s, new engine? %s\n",
+ rq->fence.context,
+ rq->fence.seqno,
+ i915_request_completed(rq) ? "!" :
+ i915_request_started(rq) ? "*" :
+ "",
+ yesno(engine != ve->siblings[0]));
ve->request = NULL;
ve->base.execlists.queue_priority_hint = INT_MIN;
@@ -1675,7 +1920,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* same LRCA, i.e. we must submit 2 different
* contexts if we submit 2 ELSP.
*/
- if (last->hw_context == rq->hw_context)
+ if (last->context == rq->context)
goto done;
if (i915_request_has_sentinel(last))
@@ -1688,8 +1933,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* the same context (even though a different
* request) to the second port.
*/
- if (ctx_single_port_submission(last->hw_context) ||
- ctx_single_port_submission(rq->hw_context))
+ if (ctx_single_port_submission(last->context) ||
+ ctx_single_port_submission(rq->context))
goto done;
merge = false;
@@ -1703,8 +1948,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
}
GEM_BUG_ON(last &&
- !can_merge_ctx(last->hw_context,
- rq->hw_context));
+ !can_merge_ctx(last->context,
+ rq->context));
submit = true;
last = rq;
@@ -1733,9 +1978,6 @@ done:
* interrupt for secondary ports).
*/
execlists->queue_priority_hint = queue_prio(execlists);
- GEM_TRACE("%s: queue_priority_hint:%d, submit:%s\n",
- engine->name, execlists->queue_priority_hint,
- yesno(submit));
if (submit) {
*port = execlists_schedule_in(last, port - execlists->pending);
@@ -1757,6 +1999,8 @@ done:
memset(port + 1, 0, (last_port - port) * sizeof(*port));
execlists_submit_ports(engine);
+
+ set_preempt_timeout(engine);
} else {
skip_submit:
ring_set_paused(engine, 0);
@@ -1766,16 +2010,17 @@ skip_submit:
static void
cancel_port_requests(struct intel_engine_execlists * const execlists)
{
- struct i915_request * const *port, *rq;
+ struct i915_request * const *port;
- for (port = execlists->pending; (rq = *port); port++)
- execlists_schedule_out(rq);
+ for (port = execlists->pending; *port; port++)
+ execlists_schedule_out(*port);
memset(execlists->pending, 0, sizeof(execlists->pending));
- for (port = execlists->active; (rq = *port); port++)
- execlists_schedule_out(rq);
- execlists->active =
- memset(execlists->inflight, 0, sizeof(execlists->inflight));
+ /* Mark the end of active before we overwrite *active */
+ for (port = xchg(&execlists->active, execlists->pending); *port; port++)
+ execlists_schedule_out(*port);
+ WRITE_ONCE(execlists->active,
+ memset(execlists->inflight, 0, sizeof(execlists->inflight)));
}
static inline void
@@ -1867,7 +2112,7 @@ static void process_csb(struct intel_engine_cs *engine)
*/
GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&
!reset_in_progress(execlists));
- GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915));
+ GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine));
/*
* Note that csb_write, csb_status may be either in HWSP or mmio.
@@ -1881,7 +2126,7 @@ static void process_csb(struct intel_engine_cs *engine)
*/
head = execlists->csb_head;
tail = READ_ONCE(*execlists->csb_write);
- GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail);
+ ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
if (unlikely(head == tail))
return;
@@ -1919,35 +2164,35 @@ static void process_csb(struct intel_engine_cs *engine)
* status notifier.
*/
- GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x\n",
- engine->name, head,
- buf[2 * head + 0], buf[2 * head + 1]);
+ ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
+ head, buf[2 * head + 0], buf[2 * head + 1]);
if (INTEL_GEN(engine->i915) >= 12)
promote = gen12_csb_parse(execlists, buf + 2 * head);
else
promote = gen8_csb_parse(execlists, buf + 2 * head);
if (promote) {
+ struct i915_request * const *old = execlists->active;
+
+ /* Point active to the new ELSP; prevent overwriting */
+ WRITE_ONCE(execlists->active, execlists->pending);
+ set_timeslice(engine);
+
if (!inject_preempt_hang(execlists))
ring_set_paused(engine, 0);
/* cancel old inflight, prepare for switch */
- trace_ports(execlists, "preempted", execlists->active);
- while (*execlists->active)
- execlists_schedule_out(*execlists->active++);
+ trace_ports(execlists, "preempted", old);
+ while (*old)
+ execlists_schedule_out(*old++);
/* switch pending to inflight */
GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
- execlists->active =
- memcpy(execlists->inflight,
- execlists->pending,
- execlists_num_ports(execlists) *
- sizeof(*execlists->pending));
-
- if (enable_timeslice(execlists))
- mod_timer(&execlists->timer, jiffies + 1);
- else
- cancel_timer(&execlists->timer);
+ WRITE_ONCE(execlists->active,
+ memcpy(execlists->inflight,
+ execlists->pending,
+ execlists_num_ports(execlists) *
+ sizeof(*execlists->pending)));
WRITE_ONCE(execlists->pending[0], NULL);
} else {
@@ -1997,6 +2242,42 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
}
}
+static noinline void preempt_reset(struct intel_engine_cs *engine)
+{
+ const unsigned int bit = I915_RESET_ENGINE + engine->id;
+ unsigned long *lock = &engine->gt->reset.flags;
+
+ if (i915_modparams.reset < 3)
+ return;
+
+ if (test_and_set_bit(bit, lock))
+ return;
+
+ /* Mark this tasklet as disabled to avoid waiting for it to complete */
+ tasklet_disable_nosync(&engine->execlists.tasklet);
+
+ ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n",
+ READ_ONCE(engine->props.preempt_timeout_ms),
+ jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
+ intel_engine_reset(engine, "preemption time out");
+
+ tasklet_enable(&engine->execlists.tasklet);
+ clear_and_wake_up_bit(bit, lock);
+}
+
+static bool preempt_timeout(const struct intel_engine_cs *const engine)
+{
+ const struct timer_list *t = &engine->execlists.preempt;
+
+ if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
+ return false;
+
+ if (!timer_expired(t))
+ return false;
+
+ return READ_ONCE(engine->execlists.pending[0]);
+}
+
/*
* Check the unread Context Status Buffers and manage the submission of new
* contexts to the ELSP accordingly.
@@ -2004,23 +2285,39 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
static void execlists_submission_tasklet(unsigned long data)
{
struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
- unsigned long flags;
+ bool timeout = preempt_timeout(engine);
process_csb(engine);
- if (!READ_ONCE(engine->execlists.pending[0])) {
+ if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
+ unsigned long flags;
+
spin_lock_irqsave(&engine->active.lock, flags);
__execlists_submission_tasklet(engine);
spin_unlock_irqrestore(&engine->active.lock, flags);
+
+ /* Recheck after serialising with direct-submission */
+ if (timeout && preempt_timeout(engine))
+ preempt_reset(engine);
}
}
-static void execlists_submission_timer(struct timer_list *timer)
+static void __execlists_kick(struct intel_engine_execlists *execlists)
{
- struct intel_engine_cs *engine =
- from_timer(engine, timer, execlists.timer);
-
/* Kick the tasklet for some interrupt coalescing and reset handling */
- tasklet_hi_schedule(&engine->execlists.tasklet);
+ tasklet_hi_schedule(&execlists->tasklet);
+}
+
+#define execlists_kick(t, member) \
+ __execlists_kick(container_of(t, struct intel_engine_execlists, member))
+
+static void execlists_timeslice(struct timer_list *timer)
+{
+ execlists_kick(timer, timer);
+}
+
+static void execlists_preempt(struct timer_list *timer)
+{
+ execlists_kick(timer, preempt);
}
static void queue_request(struct intel_engine_cs *engine,
@@ -2100,7 +2397,6 @@ set_redzone(void *vaddr, const struct intel_engine_cs *engine)
if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
return;
- vaddr += LRC_HEADER_PAGES * PAGE_SIZE;
vaddr += engine->context_size;
memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE);
@@ -2112,7 +2408,6 @@ check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
return;
- vaddr += LRC_HEADER_PAGES * PAGE_SIZE;
vaddr += engine->context_size;
if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE))
@@ -2140,7 +2435,7 @@ __execlists_update_reg_state(const struct intel_context *ce,
GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
- regs[CTX_RING_BUFFER_START] = i915_ggtt_offset(ring->vma);
+ regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
regs[CTX_RING_HEAD] = ring->head;
regs[CTX_RING_TAIL] = ring->tail;
@@ -2268,7 +2563,7 @@ static int execlists_request_alloc(struct i915_request *request)
{
int ret;
- GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
+ GEM_BUG_ON(!intel_context_is_pinned(request->context));
/*
* Flush enough space to reduce the likelihood of waiting after
@@ -2669,8 +2964,8 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
struct intel_engine_execlists * const execlists = &engine->execlists;
unsigned long flags;
- GEM_TRACE("%s: depth<-%d\n", engine->name,
- atomic_read(&execlists->tasklet.count));
+ ENGINE_TRACE(engine, "depth<-%d\n",
+ atomic_read(&execlists->tasklet.count));
/*
* Prevent request submission to the hardware until we have
@@ -2723,41 +3018,28 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
WRITE_ONCE(*execlists->csb_write, reset_value);
wmb(); /* Make sure this is visible to HW (paranoia?) */
+ /*
+ * Sometimes Icelake forgets to reset its pointers on a GPU reset.
+ * Bludgeon them with a mmio update to be sure.
+ */
+ ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
+ reset_value << 8 | reset_value);
+ ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
+
invalidate_csb_entries(&execlists->csb_status[0],
&execlists->csb_status[reset_value]);
}
-static struct i915_request *active_request(struct i915_request *rq)
-{
- const struct intel_context * const ce = rq->hw_context;
- struct i915_request *active = NULL;
- struct list_head *list;
-
- if (!i915_request_is_active(rq)) /* unwound, but incomplete! */
- return rq;
-
- list = &i915_request_active_timeline(rq)->requests;
- list_for_each_entry_from_reverse(rq, list, link) {
- if (i915_request_completed(rq))
- break;
-
- if (rq->hw_context != ce)
- break;
-
- active = rq;
- }
-
- return active;
-}
-
static void __execlists_reset_reg_state(const struct intel_context *ce,
const struct intel_engine_cs *engine)
{
u32 *regs = ce->lrc_reg_state;
+ int x;
- if (INTEL_GEN(engine->i915) >= 9) {
- regs[GEN9_CTX_RING_MI_MODE + 1] &= ~STOP_RING;
- regs[GEN9_CTX_RING_MI_MODE + 1] |= STOP_RING << 16;
+ x = lrc_ring_mi_mode(engine);
+ if (x != -1) {
+ regs[x + 1] &= ~STOP_RING;
+ regs[x + 1] |= STOP_RING << 16;
}
}
@@ -2766,7 +3048,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
struct intel_engine_execlists * const execlists = &engine->execlists;
struct intel_context *ce;
struct i915_request *rq;
- u32 *regs;
mb(); /* paranoia: read the CSB pointers from after the reset */
clflush(execlists->csb_write);
@@ -2789,22 +3070,20 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
/* We still have requests in-flight; the engine should be active */
GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
- ce = rq->hw_context;
+ ce = rq->context;
GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
- /* Proclaim we have exclusive access to the context image! */
- __context_pin_acquire(ce);
-
- rq = active_request(rq);
- if (!rq) {
+ if (i915_request_completed(rq)) {
/* Idle context; tidy up the ring so we can restart afresh */
- ce->ring->head = ce->ring->tail;
+ ce->ring->head = intel_ring_wrap(ce->ring, rq->tail);
goto out_replay;
}
/* Context has requests still in-flight; it should not be idle! */
GEM_BUG_ON(i915_active_is_idle(&ce->active));
+ rq = active_request(ce->timeline, rq);
ce->ring->head = intel_ring_wrap(ce->ring, rq->head);
+ GEM_BUG_ON(ce->ring->head == ce->ring->tail);
/*
* If this request hasn't started yet, e.g. it is waiting on a
@@ -2845,22 +3124,15 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
* to recreate its own state.
*/
GEM_BUG_ON(!intel_context_is_pinned(ce));
- regs = ce->lrc_reg_state;
- if (engine->pinned_default_state) {
- memcpy(regs, /* skip restoring the vanilla PPHWSP */
- engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
- engine->context_size - PAGE_SIZE);
- }
- execlists_init_reg_state(regs, ce, engine, ce->ring, false);
+ restore_default_state(ce, engine);
out_replay:
- GEM_TRACE("%s replay {head:%04x, tail:%04x\n",
- engine->name, ce->ring->head, ce->ring->tail);
+ ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
+ ce->ring->head, ce->ring->tail);
intel_ring_update_space(ce->ring);
__execlists_reset_reg_state(ce, engine);
__execlists_update_reg_state(ce, engine);
ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
- __context_pin_release(ce);
unwind:
/* Push back any incomplete requests for replay after the reset. */
@@ -2868,11 +3140,11 @@ unwind:
__unwind_incomplete_requests(engine);
}
-static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
+static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
{
unsigned long flags;
- GEM_TRACE("%s\n", engine->name);
+ ENGINE_TRACE(engine, "\n");
spin_lock_irqsave(&engine->active.lock, flags);
@@ -2886,14 +3158,14 @@ static void nop_submission_tasklet(unsigned long data)
/* The driver is wedged; don't process any more events. */
}
-static void execlists_cancel_requests(struct intel_engine_cs *engine)
+static void execlists_reset_cancel(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
struct i915_request *rq, *rn;
struct rb_node *rb;
unsigned long flags;
- GEM_TRACE("%s\n", engine->name);
+ ENGINE_TRACE(engine, "\n");
/*
* Before we call engine->cancel_requests(), we should have exclusive
@@ -2980,13 +3252,13 @@ static void execlists_reset_finish(struct intel_engine_cs *engine)
if (__tasklet_enable(&execlists->tasklet))
/* And kick in case we missed a new request submission. */
tasklet_hi_schedule(&execlists->tasklet);
- GEM_TRACE("%s: depth->%d\n", engine->name,
- atomic_read(&execlists->tasklet.count));
+ ENGINE_TRACE(engine, "depth->%d\n",
+ atomic_read(&execlists->tasklet.count));
}
-static int gen8_emit_bb_start(struct i915_request *rq,
- u64 offset, u32 len,
- const unsigned int flags)
+static int gen8_emit_bb_start_noarb(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags)
{
u32 *cs;
@@ -3020,7 +3292,7 @@ static int gen8_emit_bb_start(struct i915_request *rq,
return 0;
}
-static int gen9_emit_bb_start(struct i915_request *rq,
+static int gen8_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags)
{
@@ -3469,17 +3741,18 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
static void execlists_park(struct intel_engine_cs *engine)
{
cancel_timer(&engine->execlists.timer);
+ cancel_timer(&engine->execlists.preempt);
}
void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
{
engine->submit_request = execlists_submit_request;
- engine->cancel_requests = execlists_cancel_requests;
engine->schedule = i915_schedule;
engine->execlists.tasklet.func = execlists_submission_tasklet;
engine->reset.prepare = execlists_reset_prepare;
- engine->reset.reset = execlists_reset;
+ engine->reset.rewind = execlists_reset_rewind;
+ engine->reset.cancel = execlists_reset_cancel;
engine->reset.finish = execlists_reset_finish;
engine->park = execlists_park;
@@ -3494,13 +3767,27 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
if (INTEL_GEN(engine->i915) >= 12)
engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
+
+ if (intel_engine_has_preemption(engine))
+ engine->emit_bb_start = gen8_emit_bb_start;
+ else
+ engine->emit_bb_start = gen8_emit_bb_start_noarb;
}
-static void execlists_destroy(struct intel_engine_cs *engine)
+static void execlists_shutdown(struct intel_engine_cs *engine)
{
+ /* Synchronise with residual timers and any softirq they raise */
+ del_timer_sync(&engine->execlists.timer);
+ del_timer_sync(&engine->execlists.preempt);
+ tasklet_kill(&engine->execlists.tasklet);
+}
+
+static void execlists_release(struct intel_engine_cs *engine)
+{
+ execlists_shutdown(engine);
+
intel_engine_cleanup_common(engine);
lrc_destroy_wa_ctx(engine);
- kfree(engine);
}
static void
@@ -3508,13 +3795,9 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
{
/* Default vfuncs which can be overriden by each engine. */
- engine->destroy = execlists_destroy;
+ engine->release = execlists_release;
engine->resume = execlists_resume;
- engine->reset.prepare = execlists_reset_prepare;
- engine->reset.reset = execlists_reset;
- engine->reset.finish = execlists_reset_finish;
-
engine->cops = &execlists_context_ops;
engine->request_alloc = execlists_request_alloc;
@@ -3537,10 +3820,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
* until a more refined solution exists.
*/
}
- if (IS_GEN(engine->i915, 8))
- engine->emit_bb_start = gen8_emit_bb_start;
- else
- engine->emit_bb_start = gen9_emit_bb_start;
}
static inline void
@@ -3584,9 +3863,15 @@ static void rcs_submission_override(struct intel_engine_cs *engine)
int intel_execlists_submission_setup(struct intel_engine_cs *engine)
{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ struct drm_i915_private *i915 = engine->i915;
+ struct intel_uncore *uncore = engine->uncore;
+ u32 base = engine->mmio_base;
+
tasklet_init(&engine->execlists.tasklet,
execlists_submission_tasklet, (unsigned long)engine);
- timer_setup(&engine->execlists.timer, execlists_submission_timer, 0);
+ timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
+ timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
logical_ring_default_vfuncs(engine);
logical_ring_default_irqs(engine);
@@ -3594,21 +3879,6 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
if (engine->class == RENDER_CLASS)
rcs_submission_override(engine);
- return 0;
-}
-
-int intel_execlists_submission_init(struct intel_engine_cs *engine)
-{
- struct intel_engine_execlists * const execlists = &engine->execlists;
- struct drm_i915_private *i915 = engine->i915;
- struct intel_uncore *uncore = engine->uncore;
- u32 base = engine->mmio_base;
- int ret;
-
- ret = intel_engine_init_common(engine);
- if (ret)
- return ret;
-
if (intel_init_workaround_bb(engine))
/*
* We continue even if we fail to initialize WA batch
@@ -3689,7 +3959,7 @@ static void init_common_reg_state(u32 * const regs,
_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
CTX_CTRL_RS_CTX_ENABLE);
- regs[CTX_RING_BUFFER_CONTROL] = RING_CTL_SIZE(ring->size) | RING_VALID;
+ regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
regs[CTX_BB_STATE] = RING_BB_PPGTT;
}
@@ -3796,12 +4066,6 @@ populate_lr_context(struct intel_context *ce,
set_redzone(vaddr, engine);
if (engine->default_state) {
- /*
- * We only want to copy over the template context state;
- * skipping over the headers reserved for GuC communication,
- * leaving those as zero.
- */
- const unsigned long start = LRC_HEADER_PAGES * PAGE_SIZE;
void *defaults;
defaults = i915_gem_object_pin_map(engine->default_state,
@@ -3811,8 +4075,9 @@ populate_lr_context(struct intel_context *ce,
goto err_unpin_ctx;
}
- memcpy(vaddr + start, defaults + start, engine->context_size);
+ memcpy(vaddr, defaults, engine->context_size);
i915_gem_object_unpin_map(engine->default_state);
+ __set_bit(CONTEXT_VALID_BIT, &ce->flags);
inhibit = false;
}
@@ -3826,9 +4091,7 @@ populate_lr_context(struct intel_context *ce,
ret = 0;
err_unpin_ctx:
- __i915_gem_object_flush_map(ctx_obj,
- LRC_HEADER_PAGES * PAGE_SIZE,
- engine->context_size);
+ __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
i915_gem_object_unpin_map(ctx_obj);
return ret;
}
@@ -3845,11 +4108,6 @@ static int __execlists_context_alloc(struct intel_context *ce,
GEM_BUG_ON(ce->state);
context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
- /*
- * Before the actual start of the context image, we insert a few pages
- * for our own use and for sharing with the GuC.
- */
- context_size += LRC_HEADER_PAGES * PAGE_SIZE;
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
context_size += I915_GTT_PAGE_SIZE; /* for redzone */
@@ -3917,17 +4175,18 @@ static void virtual_context_destroy(struct kref *kref)
for (n = 0; n < ve->num_siblings; n++) {
struct intel_engine_cs *sibling = ve->siblings[n];
struct rb_node *node = &ve->nodes[sibling->id].rb;
+ unsigned long flags;
if (RB_EMPTY_NODE(node))
continue;
- spin_lock_irq(&sibling->active.lock);
+ spin_lock_irqsave(&sibling->active.lock, flags);
/* Detachment is lazily performed in the execlists tasklet */
if (!RB_EMPTY_NODE(node))
rb_erase_cached(node, &sibling->execlists.virtual);
- spin_unlock_irq(&sibling->active.lock);
+ spin_unlock_irqrestore(&sibling->active.lock, flags);
}
GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
@@ -3966,6 +4225,13 @@ static void virtual_engine_initial_hint(struct virtual_engine *ve)
ve->siblings[0]);
}
+static int virtual_context_alloc(struct intel_context *ce)
+{
+ struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
+
+ return __execlists_context_alloc(ce, ve->siblings[0]);
+}
+
static int virtual_context_pin(struct intel_context *ce)
{
struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
@@ -4003,6 +4269,8 @@ static void virtual_context_exit(struct intel_context *ce)
}
static const struct intel_context_ops virtual_context_ops = {
+ .alloc = virtual_context_alloc,
+
.pin = virtual_context_pin,
.unpin = execlists_context_unpin,
@@ -4029,10 +4297,9 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
mask = ve->siblings[0]->mask;
}
- GEM_TRACE("%s: rq=%llx:%lld, mask=%x, prio=%d\n",
- ve->base.name,
- rq->fence.context, rq->fence.seqno,
- mask, ve->base.execlists.queue_priority_hint);
+ ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",
+ rq->fence.context, rq->fence.seqno,
+ mask, ve->base.execlists.queue_priority_hint);
return mask;
}
@@ -4123,10 +4390,9 @@ static void virtual_submit_request(struct i915_request *rq)
struct i915_request *old;
unsigned long flags;
- GEM_TRACE("%s: rq=%llx:%lld\n",
- ve->base.name,
- rq->fence.context,
- rq->fence.seqno);
+ ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",
+ rq->fence.context,
+ rq->fence.seqno);
GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
@@ -4194,8 +4460,7 @@ virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
}
struct intel_context *
-intel_execlists_create_virtual(struct i915_gem_context *ctx,
- struct intel_engine_cs **siblings,
+intel_execlists_create_virtual(struct intel_engine_cs **siblings,
unsigned int count)
{
struct virtual_engine *ve;
@@ -4206,13 +4471,13 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
return ERR_PTR(-EINVAL);
if (count == 1)
- return intel_context_create(ctx, siblings[0]);
+ return intel_context_create(siblings[0]);
ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
if (!ve)
return ERR_PTR(-ENOMEM);
- ve->base.i915 = ctx->i915;
+ ve->base.i915 = siblings[0]->i915;
ve->base.gt = siblings[0]->gt;
ve->base.uncore = siblings[0]->uncore;
ve->base.id = -1;
@@ -4239,7 +4504,6 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
intel_engine_init_breadcrumbs(&ve->base);
-
intel_engine_init_execlists(&ve->base);
ve->base.cops = &virtual_context_ops;
@@ -4255,7 +4519,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
virtual_submission_tasklet,
(unsigned long)ve);
- intel_context_init(&ve->context, ctx, &ve->base);
+ intel_context_init(&ve->context, &ve->base);
for (n = 0; n < count; n++) {
struct intel_engine_cs *sibling = siblings[n];
@@ -4322,12 +4586,6 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
- err = __execlists_context_alloc(&ve->context, siblings[0]);
- if (err)
- goto err_put;
-
- __set_bit(CONTEXT_ALLOC_BIT, &ve->context.flags);
-
return &ve->context;
err_put:
@@ -4336,14 +4594,12 @@ err_put:
}
struct intel_context *
-intel_execlists_clone_virtual(struct i915_gem_context *ctx,
- struct intel_engine_cs *src)
+intel_execlists_clone_virtual(struct intel_engine_cs *src)
{
struct virtual_engine *se = to_virtual_engine(src);
struct intel_context *dst;
- dst = intel_execlists_create_virtual(ctx,
- se->siblings,
+ dst = intel_execlists_create_virtual(se->siblings,
se->num_siblings);
if (IS_ERR(dst))
return dst;
@@ -4502,7 +4758,6 @@ void intel_lr_context_reset(struct intel_engine_cs *engine,
bool scrub)
{
GEM_BUG_ON(!intel_context_is_pinned(ce));
- __context_pin_acquire(ce);
/*
* We want a simple context + ring to execute the breadcrumb update.
@@ -4512,23 +4767,21 @@ void intel_lr_context_reset(struct intel_engine_cs *engine,
* future request will be after userspace has had the opportunity
* to recreate its own state.
*/
- if (scrub) {
- u32 *regs = ce->lrc_reg_state;
-
- if (engine->pinned_default_state) {
- memcpy(regs, /* skip restoring the vanilla PPHWSP */
- engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
- engine->context_size - PAGE_SIZE);
- }
- execlists_init_reg_state(regs, ce, engine, ce->ring, false);
- }
+ if (scrub)
+ restore_default_state(ce, engine);
/* Rerun the request; its payload has been neutered (if guilty). */
ce->ring->head = head;
intel_ring_update_space(ce->ring);
__execlists_update_reg_state(ce, engine);
- __context_pin_release(ce);
+}
+
+bool
+intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
+{
+ return engine->set_default_submission ==
+ intel_execlists_set_default_submission;
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index 99dc576a4e25..dfbc214e14f5 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -43,6 +43,7 @@ struct intel_engine_cs;
#define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0)
#define CTX_CTRL_RS_CTX_ENABLE (1 << 1)
#define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT (1 << 2)
+#define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE (1 << 8)
#define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0)
#define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510)
#define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550)
@@ -82,34 +83,14 @@ enum {
void intel_logical_ring_cleanup(struct intel_engine_cs *engine);
int intel_execlists_submission_setup(struct intel_engine_cs *engine);
-int intel_execlists_submission_init(struct intel_engine_cs *engine);
/* Logical Ring Contexts */
-
-/*
- * We allocate a header at the start of the context image for our own
- * use, therefore the actual location of the logical state is offset
- * from the start of the VMA. The layout is
- *
- * | [guc] | [hwsp] [logical state] |
- * |<- our header ->|<- context image ->|
- *
- */
-/* The first page is used for sharing data with the GuC */
-#define LRC_GUCSHR_PN (0)
-#define LRC_GUCSHR_SZ (1)
/* At the start of the context image is its per-process HWS page */
-#define LRC_PPHWSP_PN (LRC_GUCSHR_PN + LRC_GUCSHR_SZ)
+#define LRC_PPHWSP_PN (0)
#define LRC_PPHWSP_SZ (1)
-/* Finally we have the logical state for the context */
+/* After the PPHWSP we have the logical state for the context */
#define LRC_STATE_PN (LRC_PPHWSP_PN + LRC_PPHWSP_SZ)
-/*
- * Currently we include the PPHWSP in __intel_engine_context_size() so
- * the size of the header is synonymous with the start of the PPHWSP.
- */
-#define LRC_HEADER_PAGES LRC_PPHWSP_PN
-
/* Space within PPHWSP reserved to be used as scratch */
#define LRC_PPHWSP_SCRATCH 0x34
#define LRC_PPHWSP_SCRATCH_ADDR (LRC_PPHWSP_SCRATCH * sizeof(u32))
@@ -129,13 +110,11 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
unsigned int max);
struct intel_context *
-intel_execlists_create_virtual(struct i915_gem_context *ctx,
- struct intel_engine_cs **siblings,
+intel_execlists_create_virtual(struct intel_engine_cs **siblings,
unsigned int count);
struct intel_context *
-intel_execlists_clone_virtual(struct i915_gem_context *ctx,
- struct intel_engine_cs *src);
+intel_execlists_clone_virtual(struct intel_engine_cs *src);
int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
const struct intel_engine_cs *master,
@@ -145,4 +124,7 @@ struct intel_engine_cs *
intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
unsigned int sibling);
+bool
+intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine);
+
#endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
index 06ab0276e10e..08a3be65f700 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
@@ -13,8 +13,8 @@
#define CTX_CONTEXT_CONTROL (0x02 + 1)
#define CTX_RING_HEAD (0x04 + 1)
#define CTX_RING_TAIL (0x06 + 1)
-#define CTX_RING_BUFFER_START (0x08 + 1)
-#define CTX_RING_BUFFER_CONTROL (0x0a + 1)
+#define CTX_RING_START (0x08 + 1)
+#define CTX_RING_CTL (0x0a + 1)
#define CTX_BB_STATE (0x10 + 1)
#define CTX_BB_PER_CTX_PTR (0x18 + 1)
#define CTX_PDP3_UDW (0x24 + 1)
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 5bac3966906b..893249ea48d4 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -26,6 +26,7 @@
#include "intel_gt.h"
#include "intel_mocs.h"
#include "intel_lrc.h"
+#include "intel_ring.h"
/* structures required */
struct drm_i915_mocs_entry {
@@ -199,14 +200,6 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
MOCS_ENTRY(15, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
L3_3_WB), \
- /* Bypass LLC - Uncached (EHL+) */ \
- MOCS_ENTRY(16, \
- LE_1_UC | LE_TC_1_LLC | LE_SCF(1), \
- L3_1_UC), \
- /* Bypass LLC - L3 (Read-Only) (EHL+) */ \
- MOCS_ENTRY(17, \
- LE_1_UC | LE_TC_1_LLC | LE_SCF(1), \
- L3_3_WB), \
/* Self-Snoop - L3 + LLC */ \
MOCS_ENTRY(18, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \
@@ -270,7 +263,7 @@ static const struct drm_i915_mocs_entry tigerlake_mocs_table[] = {
L3_1_UC),
/* HW Special Case (Displayable) */
MOCS_ENTRY(61,
- LE_1_UC | LE_TC_1_LLC | LE_SCF(1),
+ LE_1_UC | LE_TC_1_LLC,
L3_3_WB),
};
@@ -290,65 +283,42 @@ static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
static bool get_mocs_settings(const struct drm_i915_private *i915,
struct drm_i915_mocs_table *table)
{
- bool result = false;
-
if (INTEL_GEN(i915) >= 12) {
table->size = ARRAY_SIZE(tigerlake_mocs_table);
table->table = tigerlake_mocs_table;
table->n_entries = GEN11_NUM_MOCS_ENTRIES;
- result = true;
} else if (IS_GEN(i915, 11)) {
table->size = ARRAY_SIZE(icelake_mocs_table);
table->table = icelake_mocs_table;
table->n_entries = GEN11_NUM_MOCS_ENTRIES;
- result = true;
} else if (IS_GEN9_BC(i915) || IS_CANNONLAKE(i915)) {
table->size = ARRAY_SIZE(skylake_mocs_table);
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
table->table = skylake_mocs_table;
- result = true;
} else if (IS_GEN9_LP(i915)) {
table->size = ARRAY_SIZE(broxton_mocs_table);
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
table->table = broxton_mocs_table;
- result = true;
} else {
WARN_ONCE(INTEL_GEN(i915) >= 9,
"Platform that should have a MOCS table does not.\n");
+ return false;
}
+ if (GEM_DEBUG_WARN_ON(table->size > table->n_entries))
+ return false;
+
/* WaDisableSkipCaching:skl,bxt,kbl,glk */
if (IS_GEN(i915, 9)) {
int i;
for (i = 0; i < table->size; i++)
- if (WARN_ON(table->table[i].l3cc_value &
- (L3_ESC(1) | L3_SCC(0x7))))
+ if (GEM_DEBUG_WARN_ON(table->table[i].l3cc_value &
+ (L3_ESC(1) | L3_SCC(0x7))))
return false;
}
- return result;
-}
-
-static i915_reg_t mocs_register(const struct intel_engine_cs *engine, int index)
-{
- switch (engine->id) {
- case RCS0:
- return GEN9_GFX_MOCS(index);
- case VCS0:
- return GEN9_MFX0_MOCS(index);
- case BCS0:
- return GEN9_BLT_MOCS(index);
- case VECS0:
- return GEN9_VEBOX_MOCS(index);
- case VCS1:
- return GEN9_MFX1_MOCS(index);
- case VCS2:
- return GEN11_MFX2_MOCS(index);
- default:
- MISSING_CASE(engine->id);
- return INVALID_MMIO_REG;
- }
+ return true;
}
/*
@@ -358,29 +328,47 @@ static i915_reg_t mocs_register(const struct intel_engine_cs *engine, int index)
static u32 get_entry_control(const struct drm_i915_mocs_table *table,
unsigned int index)
{
- if (table->table[index].used)
+ if (index < table->size && table->table[index].used)
return table->table[index].control_value;
return table->table[I915_MOCS_PTE].control_value;
}
-static void init_mocs_table(struct intel_engine_cs *engine,
- const struct drm_i915_mocs_table *table)
+#define for_each_mocs(mocs, t, i) \
+ for (i = 0; \
+ i < (t)->n_entries ? (mocs = get_entry_control((t), i)), 1 : 0;\
+ i++)
+
+static void __init_mocs_table(struct intel_uncore *uncore,
+ const struct drm_i915_mocs_table *table,
+ u32 addr)
{
- struct intel_uncore *uncore = engine->uncore;
- u32 unused_value = table->table[I915_MOCS_PTE].control_value;
unsigned int i;
+ u32 mocs;
+
+ for_each_mocs(mocs, table, i)
+ intel_uncore_write_fw(uncore, _MMIO(addr + i * 4), mocs);
+}
- for (i = 0; i < table->size; i++)
- intel_uncore_write_fw(uncore,
- mocs_register(engine, i),
- get_entry_control(table, i));
+static u32 mocs_offset(const struct intel_engine_cs *engine)
+{
+ static const u32 offset[] = {
+ [RCS0] = __GEN9_RCS0_MOCS0,
+ [VCS0] = __GEN9_VCS0_MOCS0,
+ [VCS1] = __GEN9_VCS1_MOCS0,
+ [VECS0] = __GEN9_VECS0_MOCS0,
+ [BCS0] = __GEN9_BCS0_MOCS0,
+ [VCS2] = __GEN11_VCS2_MOCS0,
+ };
+
+ GEM_BUG_ON(engine->id >= ARRAY_SIZE(offset));
+ return offset[engine->id];
+}
- /* All remaining entries are unused */
- for (; i < table->n_entries; i++)
- intel_uncore_write_fw(uncore,
- mocs_register(engine, i),
- unused_value);
+static void init_mocs_table(struct intel_engine_cs *engine,
+ const struct drm_i915_mocs_table *table)
+{
+ __init_mocs_table(engine->uncore, table, mocs_offset(engine));
}
/*
@@ -390,51 +378,34 @@ static void init_mocs_table(struct intel_engine_cs *engine,
static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table,
unsigned int index)
{
- if (table->table[index].used)
+ if (index < table->size && table->table[index].used)
return table->table[index].l3cc_value;
return table->table[I915_MOCS_PTE].l3cc_value;
}
-static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
- u16 low,
- u16 high)
+static inline u32 l3cc_combine(u16 low, u16 high)
{
return low | (u32)high << 16;
}
+#define for_each_l3cc(l3cc, t, i) \
+ for (i = 0; \
+ i < ((t)->n_entries + 1) / 2 ? \
+ (l3cc = l3cc_combine(get_entry_l3cc((t), 2 * i), \
+ get_entry_l3cc((t), 2 * i + 1))), 1 : \
+ 0; \
+ i++)
+
static void init_l3cc_table(struct intel_engine_cs *engine,
const struct drm_i915_mocs_table *table)
{
struct intel_uncore *uncore = engine->uncore;
- u16 unused_value = table->table[I915_MOCS_PTE].l3cc_value;
unsigned int i;
+ u32 l3cc;
- for (i = 0; i < table->size / 2; i++) {
- u16 low = get_entry_l3cc(table, 2 * i);
- u16 high = get_entry_l3cc(table, 2 * i + 1);
-
- intel_uncore_write(uncore,
- GEN9_LNCFCMOCS(i),
- l3cc_combine(table, low, high));
- }
-
- /* Odd table size - 1 left over */
- if (table->size & 1) {
- u16 low = get_entry_l3cc(table, 2 * i);
-
- intel_uncore_write(uncore,
- GEN9_LNCFCMOCS(i),
- l3cc_combine(table, low, unused_value));
- i++;
- }
-
- /* All remaining entries are also unused */
- for (; i < table->n_entries / 2; i++)
- intel_uncore_write(uncore,
- GEN9_LNCFCMOCS(i),
- l3cc_combine(table, unused_value,
- unused_value));
+ for_each_l3cc(l3cc, table, i)
+ intel_uncore_write_fw(uncore, GEN9_LNCFCMOCS(i), l3cc);
}
void intel_mocs_init_engine(struct intel_engine_cs *engine)
@@ -455,38 +426,33 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
init_l3cc_table(engine, &table);
}
-static void intel_mocs_init_global(struct intel_gt *gt)
+static u32 global_mocs_offset(void)
{
- struct intel_uncore *uncore = gt->uncore;
- struct drm_i915_mocs_table table;
- unsigned int index;
+ return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0));
+}
- GEM_BUG_ON(!HAS_GLOBAL_MOCS_REGISTERS(gt->i915));
+static void init_global_mocs(struct intel_gt *gt)
+{
+ struct drm_i915_mocs_table table;
- if (!get_mocs_settings(gt->i915, &table))
+ /*
+ * LLC and eDRAM control values are not applicable to dgfx
+ */
+ if (IS_DGFX(gt->i915))
return;
- if (GEM_DEBUG_WARN_ON(table.size > table.n_entries))
+ if (!get_mocs_settings(gt->i915, &table))
return;
- for (index = 0; index < table.size; index++)
- intel_uncore_write(uncore,
- GEN12_GLOBAL_MOCS(index),
- table.table[index].control_value);
-
- /*
- * Ok, now set the unused entries to the invalid entry (index 0). These
- * entries are officially undefined and no contract for the contents and
- * settings is given for these entries.
- */
- for (; index < table.n_entries; index++)
- intel_uncore_write(uncore,
- GEN12_GLOBAL_MOCS(index),
- table.table[0].control_value);
+ __init_mocs_table(gt->uncore, &table, global_mocs_offset());
}
void intel_mocs_init(struct intel_gt *gt)
{
if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915))
- intel_mocs_init_global(gt);
+ init_global_mocs(gt);
}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_mocs.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index 70f0e01a38b9..9e303c29d6e3 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -88,21 +88,18 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
* do not want the enable hysteresis to less than the wakeup latency.
*
* igt/gem_exec_nop/sequential provides a rough estimate for the
- * service latency, and puts it around 10us for Broadwell (and other
- * big core) and around 40us for Broxton (and other low power cores).
- * [Note that for legacy ringbuffer submission, this is less than 1us!]
- * However, the wakeup latency on Broxton is closer to 100us. To be
- * conservative, we have to factor in a context switch on top (due
- * to ksoftirqd).
+ * service latency, and puts it under 10us for Icelake, similar to
+ * Broadwell+, To be conservative, we want to factor in a context
+ * switch on top (due to ksoftirqd).
*/
- set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
- set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
+ set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 60);
+ set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60);
/* 3a: Enable RC6 */
- set(uncore, GEN6_RC_CONTROL,
- GEN6_RC_CTL_HW_ENABLE |
- GEN6_RC_CTL_RC6_ENABLE |
- GEN6_RC_CTL_EI_MODE(1));
+ rc6->ctl_enable =
+ GEN6_RC_CTL_HW_ENABLE |
+ GEN6_RC_CTL_RC6_ENABLE |
+ GEN6_RC_CTL_EI_MODE(1);
set(uncore, GEN9_PG_ENABLE,
GEN9_RENDER_PG_ENABLE |
@@ -173,13 +170,18 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6)
else
rc6_mode = GEN6_RC_CTL_EI_MODE(1);
- set(uncore, GEN6_RC_CONTROL,
- GEN6_RC_CTL_HW_ENABLE |
- GEN6_RC_CTL_RC6_ENABLE |
- rc6_mode);
+ rc6->ctl_enable =
+ GEN6_RC_CTL_HW_ENABLE |
+ GEN6_RC_CTL_RC6_ENABLE |
+ rc6_mode;
- set(uncore, GEN9_PG_ENABLE,
- GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
+ /*
+ * WaRsDisableCoarsePowerGating:skl,cnl
+ * - Render/Media PG need to be disabled with RC6.
+ */
+ if (!NEEDS_WaRsDisableCoarsePowerGating(rc6_to_i915(rc6)))
+ set(uncore, GEN9_PG_ENABLE,
+ GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
}
static void gen8_rc6_enable(struct intel_rc6 *rc6)
@@ -198,10 +200,10 @@ static void gen8_rc6_enable(struct intel_rc6 *rc6)
set(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
/* 3: Enable RC6 */
- set(uncore, GEN6_RC_CONTROL,
+ rc6->ctl_enable =
GEN6_RC_CTL_HW_ENABLE |
GEN7_RC_CTL_TO_MODE |
- GEN6_RC_CTL_RC6_ENABLE);
+ GEN6_RC_CTL_RC6_ENABLE;
}
static void gen6_rc6_enable(struct intel_rc6 *rc6)
@@ -237,10 +239,10 @@ static void gen6_rc6_enable(struct intel_rc6 *rc6)
rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
if (HAS_RC6pp(i915))
rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
- set(uncore, GEN6_RC_CONTROL,
+ rc6->ctl_enable =
rc6_mask |
GEN6_RC_CTL_EI_MODE(1) |
- GEN6_RC_CTL_HW_ENABLE);
+ GEN6_RC_CTL_HW_ENABLE;
rc6vids = 0;
ret = sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS,
@@ -358,7 +360,7 @@ static void chv_rc6_enable(struct intel_rc6 *rc6)
VLV_RENDER_RC6_COUNT_EN));
/* 3: Enable RC6 */
- set(uncore, GEN6_RC_CONTROL, GEN7_RC_CTL_TO_MODE);
+ rc6->ctl_enable = GEN7_RC_CTL_TO_MODE;
}
static void vlv_rc6_enable(struct intel_rc6 *rc6)
@@ -384,8 +386,8 @@ static void vlv_rc6_enable(struct intel_rc6 *rc6)
VLV_MEDIA_RC6_COUNT_EN |
VLV_RENDER_RC6_COUNT_EN));
- set(uncore, GEN6_RC_CONTROL,
- GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL);
+ rc6->ctl_enable =
+ GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
}
static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6)
@@ -486,6 +488,21 @@ static void rpm_put(struct intel_rc6 *rc6)
rc6->wakeref = false;
}
+static bool pctx_corrupted(struct intel_rc6 *rc6)
+{
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+
+ if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
+ return false;
+
+ if (intel_uncore_read(rc6_to_uncore(rc6), GEN8_RC6_CTX_INFO))
+ return false;
+
+ dev_notice(i915->drm.dev,
+ "RC6 context corruption, disabling runtime power management\n");
+ return true;
+}
+
static void __intel_rc6_disable(struct intel_rc6 *rc6)
{
struct drm_i915_private *i915 = rc6_to_i915(rc6);
@@ -525,6 +542,11 @@ void intel_rc6_init(struct intel_rc6 *rc6)
void intel_rc6_sanitize(struct intel_rc6 *rc6)
{
+ if (rc6->enabled) { /* unbalanced suspend/resume */
+ rpm_get(rc6);
+ rc6->enabled = false;
+ }
+
if (rc6->supported)
__intel_rc6_disable(rc6);
}
@@ -554,13 +576,51 @@ void intel_rc6_enable(struct intel_rc6 *rc6)
else if (INTEL_GEN(i915) >= 6)
gen6_rc6_enable(rc6);
+ rc6->manual = rc6->ctl_enable & GEN6_RC_CTL_RC6_ENABLE;
+ if (NEEDS_RC6_CTX_CORRUPTION_WA(i915))
+ rc6->ctl_enable = 0;
+
intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+ if (unlikely(pctx_corrupted(rc6)))
+ return;
+
/* rc6 is ready, runtime-pm is go! */
rpm_put(rc6);
rc6->enabled = true;
}
+void intel_rc6_unpark(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+
+ if (!rc6->enabled)
+ return;
+
+ /* Restore HW timers for automatic RC6 entry while busy */
+ set(uncore, GEN6_RC_CONTROL, rc6->ctl_enable);
+}
+
+void intel_rc6_park(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+
+ if (!rc6->enabled)
+ return;
+
+ if (unlikely(pctx_corrupted(rc6))) {
+ intel_rc6_disable(rc6);
+ return;
+ }
+
+ if (!rc6->manual)
+ return;
+
+ /* Turn off the HW timers and go directly to rc6 */
+ set(uncore, GEN6_RC_CONTROL, GEN6_RC_CTL_RC6_ENABLE);
+ set(uncore, GEN6_RC_STATE, 0x4 << RC_SW_TARGET_STATE_SHIFT);
+}
+
void intel_rc6_disable(struct intel_rc6 *rc6)
{
if (!rc6->enabled)
@@ -710,3 +770,7 @@ u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg)
{
return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(rc6, reg), 1000);
}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_rc6.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.h b/drivers/gpu/drm/i915/gt/intel_rc6.h
index 5e6711f36457..9f0f23fca8af 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.h
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.h
@@ -15,6 +15,9 @@ struct intel_rc6;
void intel_rc6_init(struct intel_rc6 *rc6);
void intel_rc6_fini(struct intel_rc6 *rc6);
+void intel_rc6_unpark(struct intel_rc6 *rc6);
+void intel_rc6_park(struct intel_rc6 *rc6);
+
void intel_rc6_sanitize(struct intel_rc6 *rc6);
void intel_rc6_enable(struct intel_rc6 *rc6);
void intel_rc6_disable(struct intel_rc6 *rc6);
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6_types.h b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
index 214f354d6ae4..bfbb623f7a4f 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
@@ -18,10 +18,13 @@ struct intel_rc6 {
u64 prev_hw_residency[4];
u64 cur_residency[4];
+ u32 ctl_enable;
+
struct drm_i915_gem_object *pctx;
bool supported : 1;
bool enabled : 1;
+ bool manual : 1;
bool wakeref : 1;
};
diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c
index 6d05f9c64178..5954ecc3207f 100644
--- a/drivers/gpu/drm/i915/gt/intel_renderstate.c
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c
@@ -27,16 +27,7 @@
#include "i915_drv.h"
#include "intel_renderstate.h"
-
-struct intel_renderstate {
- const struct intel_renderstate_rodata *rodata;
- struct drm_i915_gem_object *obj;
- struct i915_vma *vma;
- u32 batch_offset;
- u32 batch_size;
- u32 aux_offset;
- u32 aux_size;
-};
+#include "intel_ring.h"
static const struct intel_renderstate_rodata *
render_state_get_rodata(const struct intel_engine_cs *engine)
@@ -83,11 +74,11 @@ static int render_state_setup(struct intel_renderstate *so,
u32 *d;
int ret;
- ret = i915_gem_object_prepare_write(so->obj, &needs_clflush);
+ ret = i915_gem_object_prepare_write(so->vma->obj, &needs_clflush);
if (ret)
return ret;
- d = kmap_atomic(i915_gem_object_get_dirty_page(so->obj, 0));
+ d = kmap_atomic(i915_gem_object_get_dirty_page(so->vma->obj, 0));
while (i < rodata->batch_items) {
u32 s = rodata->batch[i];
@@ -165,7 +156,7 @@ static int render_state_setup(struct intel_renderstate *so,
ret = 0;
out:
- i915_gem_object_finish_access(so->obj);
+ i915_gem_object_finish_access(so->vma->obj);
return ret;
err:
@@ -176,61 +167,84 @@ err:
#undef OUT_BATCH
-int intel_renderstate_emit(struct i915_request *rq)
+int intel_renderstate_init(struct intel_renderstate *so,
+ struct intel_engine_cs *engine)
{
- struct intel_engine_cs *engine = rq->engine;
- struct intel_renderstate so = {}; /* keep the compiler happy */
+ struct drm_i915_gem_object *obj;
int err;
- so.rodata = render_state_get_rodata(engine);
- if (!so.rodata)
+ memset(so, 0, sizeof(*so));
+
+ so->rodata = render_state_get_rodata(engine);
+ if (!so->rodata)
return 0;
- if (so.rodata->batch_items * 4 > PAGE_SIZE)
+ if (so->rodata->batch_items * 4 > PAGE_SIZE)
return -EINVAL;
- so.obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
- if (IS_ERR(so.obj))
- return PTR_ERR(so.obj);
+ obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
- so.vma = i915_vma_instance(so.obj, &engine->gt->ggtt->vm, NULL);
- if (IS_ERR(so.vma)) {
- err = PTR_ERR(so.vma);
+ so->vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
+ if (IS_ERR(so->vma)) {
+ err = PTR_ERR(so->vma);
goto err_obj;
}
- err = i915_vma_pin(so.vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ err = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
if (err)
goto err_vma;
- err = render_state_setup(&so, rq->i915);
+ err = render_state_setup(so, engine->i915);
if (err)
goto err_unpin;
+ return 0;
+
+err_unpin:
+ i915_vma_unpin(so->vma);
+err_vma:
+ i915_vma_close(so->vma);
+err_obj:
+ i915_gem_object_put(obj);
+ so->vma = NULL;
+ return err;
+}
+
+int intel_renderstate_emit(struct intel_renderstate *so,
+ struct i915_request *rq)
+{
+ struct intel_engine_cs *engine = rq->engine;
+ int err;
+
+ if (!so->vma)
+ return 0;
+
err = engine->emit_bb_start(rq,
- so.batch_offset, so.batch_size,
+ so->batch_offset, so->batch_size,
I915_DISPATCH_SECURE);
if (err)
- goto err_unpin;
+ return err;
- if (so.aux_size > 8) {
+ if (so->aux_size > 8) {
err = engine->emit_bb_start(rq,
- so.aux_offset, so.aux_size,
+ so->aux_offset, so->aux_size,
I915_DISPATCH_SECURE);
if (err)
- goto err_unpin;
+ return err;
}
- i915_vma_lock(so.vma);
- err = i915_request_await_object(rq, so.vma->obj, false);
+ i915_vma_lock(so->vma);
+ err = i915_request_await_object(rq, so->vma->obj, false);
if (err == 0)
- err = i915_vma_move_to_active(so.vma, rq, 0);
- i915_vma_unlock(so.vma);
-err_unpin:
- i915_vma_unpin(so.vma);
-err_vma:
- i915_vma_close(so.vma);
-err_obj:
- i915_gem_object_put(so.obj);
+ err = i915_vma_move_to_active(so->vma, rq, 0);
+ i915_vma_unlock(so->vma);
+
return err;
}
+
+void intel_renderstate_fini(struct intel_renderstate *so)
+{
+ i915_vma_unpin_and_release(&so->vma, 0);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.h b/drivers/gpu/drm/i915/gt/intel_renderstate.h
index 8d5079145054..5700be69a05a 100644
--- a/drivers/gpu/drm/i915/gt/intel_renderstate.h
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.h
@@ -27,6 +27,8 @@
#include <linux/types.h>
struct i915_request;
+struct intel_engine_cs;
+struct i915_vma;
struct intel_renderstate_rodata {
const u32 *reloc;
@@ -46,6 +48,19 @@ extern const struct intel_renderstate_rodata gen7_null_state;
extern const struct intel_renderstate_rodata gen8_null_state;
extern const struct intel_renderstate_rodata gen9_null_state;
-int intel_renderstate_emit(struct i915_request *rq);
+struct intel_renderstate {
+ const struct intel_renderstate_rodata *rodata;
+ struct i915_vma *vma;
+ u32 batch_offset;
+ u32 batch_size;
+ u32 aux_offset;
+ u32 aux_size;
+};
+
+int intel_renderstate_init(struct intel_renderstate *so,
+ struct intel_engine_cs *engine);
+int intel_renderstate_emit(struct intel_renderstate *so,
+ struct i915_request *rq);
+void intel_renderstate_fini(struct intel_renderstate *so);
#endif /* _INTEL_RENDERSTATE_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index bf8d1ed4b1d8..1c51296646e0 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -21,6 +21,7 @@
#include "intel_reset.h"
#include "uc/intel_guc.h"
+#include "uc/intel_guc_submission.h"
#define RESET_MAX_RETRIES 3
@@ -40,27 +41,29 @@ static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr)
static void engine_skip_context(struct i915_request *rq)
{
struct intel_engine_cs *engine = rq->engine;
- struct i915_gem_context *hung_ctx = rq->gem_context;
+ struct intel_context *hung_ctx = rq->context;
if (!i915_request_is_active(rq))
return;
lockdep_assert_held(&engine->active.lock);
list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
- if (rq->gem_context == hung_ctx)
+ if (rq->context == hung_ctx)
i915_request_skip(rq, -EIO);
}
-static void client_mark_guilty(struct drm_i915_file_private *file_priv,
- const struct i915_gem_context *ctx)
+static void client_mark_guilty(struct i915_gem_context *ctx, bool banned)
{
- unsigned int score;
+ struct drm_i915_file_private *file_priv = ctx->file_priv;
unsigned long prev_hang;
+ unsigned int score;
+
+ if (IS_ERR_OR_NULL(file_priv))
+ return;
- if (i915_gem_context_is_banned(ctx))
+ score = 0;
+ if (banned)
score = I915_CLIENT_SCORE_CONTEXT_BAN;
- else
- score = 0;
prev_hang = xchg(&file_priv->hang_timestamp, jiffies);
if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES))
@@ -75,17 +78,38 @@ static void client_mark_guilty(struct drm_i915_file_private *file_priv,
}
}
-static bool context_mark_guilty(struct i915_gem_context *ctx)
+static bool mark_guilty(struct i915_request *rq)
{
+ struct i915_gem_context *ctx;
unsigned long prev_hang;
bool banned;
int i;
+ rcu_read_lock();
+ ctx = rcu_dereference(rq->context->gem_context);
+ if (ctx && !kref_get_unless_zero(&ctx->ref))
+ ctx = NULL;
+ rcu_read_unlock();
+ if (!ctx)
+ return false;
+
+ if (i915_gem_context_is_closed(ctx)) {
+ intel_context_set_banned(rq->context);
+ banned = true;
+ goto out;
+ }
+
atomic_inc(&ctx->guilty_count);
/* Cool contexts are too cool to be banned! (Used for reset testing.) */
- if (!i915_gem_context_is_bannable(ctx))
- return false;
+ if (!i915_gem_context_is_bannable(ctx)) {
+ banned = false;
+ goto out;
+ }
+
+ dev_notice(ctx->i915->drm.dev,
+ "%s context reset due to GPU hang\n",
+ ctx->name);
/* Record the timestamp for the last N hangs */
prev_hang = ctx->hang_timestamp[0];
@@ -100,18 +124,25 @@ static bool context_mark_guilty(struct i915_gem_context *ctx)
if (banned) {
DRM_DEBUG_DRIVER("context %s: guilty %d, banned\n",
ctx->name, atomic_read(&ctx->guilty_count));
- i915_gem_context_set_banned(ctx);
+ intel_context_set_banned(rq->context);
}
- if (!IS_ERR_OR_NULL(ctx->file_priv))
- client_mark_guilty(ctx->file_priv, ctx);
+ client_mark_guilty(ctx, banned);
+out:
+ i915_gem_context_put(ctx);
return banned;
}
-static void context_mark_innocent(struct i915_gem_context *ctx)
+static void mark_innocent(struct i915_request *rq)
{
- atomic_inc(&ctx->active_count);
+ struct i915_gem_context *ctx;
+
+ rcu_read_lock();
+ ctx = rcu_dereference(rq->context->gem_context);
+ if (ctx)
+ atomic_inc(&ctx->active_count);
+ rcu_read_unlock();
}
void __i915_request_reset(struct i915_request *rq, bool guilty)
@@ -124,14 +155,16 @@ void __i915_request_reset(struct i915_request *rq, bool guilty)
GEM_BUG_ON(i915_request_completed(rq));
+ rcu_read_lock(); /* protect the GEM context */
if (guilty) {
i915_request_skip(rq, -EIO);
- if (context_mark_guilty(rq->gem_context))
+ if (mark_guilty(rq))
engine_skip_context(rq);
} else {
dma_fence_set_error(&rq->fence, -EAGAIN);
- context_mark_innocent(rq->gem_context);
+ mark_innocent(rq);
}
+ rcu_read_unlock();
}
static bool i915_in_reset(struct pci_dev *pdev)
@@ -647,7 +680,8 @@ static void reset_prepare_engine(struct intel_engine_cs *engine)
* GPU state upon resume, i.e. fail to restart after a reset.
*/
intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
- engine->reset.prepare(engine);
+ if (engine->reset.prepare)
+ engine->reset.prepare(engine);
}
static void revoke_mmaps(struct intel_gt *gt)
@@ -667,8 +701,13 @@ static void revoke_mmaps(struct intel_gt *gt)
continue;
GEM_BUG_ON(vma->fence != &gt->ggtt->fence_regs[i]);
- node = &vma->obj->base.vma_node;
+
+ if (!vma->mmo)
+ continue;
+
+ node = &vma->mmo->vma_node;
vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT;
+
unmap_mapping_range(gt->i915->drm.anon_inode->i_mapping,
drm_vma_node_offset_addr(node) + vma_offset,
vma->size,
@@ -722,10 +761,11 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
static void reset_finish_engine(struct intel_engine_cs *engine)
{
- engine->reset.finish(engine);
+ if (engine->reset.finish)
+ engine->reset.finish(engine);
intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
- intel_engine_breadcrumbs_irq(engine);
+ intel_engine_signal_breadcrumbs(engine);
}
static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake)
@@ -754,7 +794,7 @@ static void nop_submit_request(struct i915_request *request)
i915_request_mark_complete(request);
spin_unlock_irqrestore(&engine->active.lock, flags);
- intel_engine_queue_breadcrumbs(engine);
+ intel_engine_signal_breadcrumbs(engine);
}
static void __intel_gt_set_wedged(struct intel_gt *gt)
@@ -799,7 +839,8 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
/* Mark all executing requests as skipped */
for_each_engine(engine, gt, id)
- engine->cancel_requests(engine);
+ if (engine->reset.cancel)
+ engine->reset.cancel(engine);
reset_finish(gt, awake);
@@ -820,7 +861,6 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
{
struct intel_gt_timelines *timelines = &gt->timelines;
struct intel_timeline *tl;
- unsigned long flags;
bool ok;
if (!test_bit(I915_WEDGED, &gt->reset.flags))
@@ -842,7 +882,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
*
* No more can be submitted until we reset the wedged bit.
*/
- spin_lock_irqsave(&timelines->lock, flags);
+ spin_lock(&timelines->lock);
list_for_each_entry(tl, &timelines->active_list, link) {
struct dma_fence *fence;
@@ -850,7 +890,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
if (!fence)
continue;
- spin_unlock_irqrestore(&timelines->lock, flags);
+ spin_unlock(&timelines->lock);
/*
* All internal dependencies (i915_requests) will have
@@ -863,10 +903,10 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
dma_fence_put(fence);
/* Restart iteration after droping lock */
- spin_lock_irqsave(&timelines->lock, flags);
+ spin_lock(&timelines->lock);
tl = list_entry(&timelines->active_list, typeof(*tl), link);
}
- spin_unlock_irqrestore(&timelines->lock, flags);
+ spin_unlock(&timelines->lock);
/* We must reset pending GPU events before restoring our submission */
ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */
@@ -1024,8 +1064,6 @@ void intel_gt_reset(struct intel_gt *gt,
if (ret)
goto taint;
- intel_gt_queue_hangcheck(gt);
-
finish:
reset_finish(gt, awake);
unlock:
@@ -1072,9 +1110,10 @@ static inline int intel_gt_reset_engine(struct intel_engine_cs *engine)
int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
{
struct intel_gt *gt = engine->gt;
+ bool uses_guc = intel_engine_in_guc_submission_mode(engine);
int ret;
- GEM_TRACE("%s flags=%lx\n", engine->name, gt->reset.flags);
+ ENGINE_TRACE(engine, "flags=%lx\n", gt->reset.flags);
GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags));
if (!intel_engine_pm_get_if_awake(engine))
@@ -1087,14 +1126,14 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
"Resetting %s for %s\n", engine->name, msg);
atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]);
- if (!engine->gt->uc.guc.execbuf_client)
+ if (!uses_guc)
ret = intel_gt_reset_engine(engine);
else
ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine);
if (ret) {
/* If we fail here, we expect to fallback to a global reset */
DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n",
- engine->gt->uc.guc.execbuf_client ? "GuC " : "",
+ uses_guc ? "GuC " : "",
engine->name, ret);
goto out;
}
@@ -1116,7 +1155,7 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
out:
intel_engine_cancel_stop_cs(engine);
reset_finish_engine(engine);
- intel_engine_pm_put(engine);
+ intel_engine_pm_put_async(engine);
return ret;
}
@@ -1353,4 +1392,5 @@ void __intel_fini_wedge(struct intel_wedge_me *w)
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_reset.c"
+#include "selftest_hangcheck.c"
#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
new file mode 100644
index 000000000000..374b28f13ca0
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -0,0 +1,318 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "gem/i915_gem_object.h"
+#include "i915_drv.h"
+#include "i915_vma.h"
+#include "intel_engine.h"
+#include "intel_ring.h"
+#include "intel_timeline.h"
+
+unsigned int intel_ring_update_space(struct intel_ring *ring)
+{
+ unsigned int space;
+
+ space = __intel_ring_space(ring->head, ring->emit, ring->size);
+
+ ring->space = space;
+ return space;
+}
+
+int intel_ring_pin(struct intel_ring *ring)
+{
+ struct i915_vma *vma = ring->vma;
+ unsigned int flags;
+ void *addr;
+ int ret;
+
+ if (atomic_fetch_inc(&ring->pin_count))
+ return 0;
+
+ flags = PIN_GLOBAL;
+
+ /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
+ flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
+
+ if (vma->obj->stolen)
+ flags |= PIN_MAPPABLE;
+ else
+ flags |= PIN_HIGH;
+
+ ret = i915_vma_pin(vma, 0, 0, flags);
+ if (unlikely(ret))
+ goto err_unpin;
+
+ if (i915_vma_is_map_and_fenceable(vma))
+ addr = (void __force *)i915_vma_pin_iomap(vma);
+ else
+ addr = i915_gem_object_pin_map(vma->obj,
+ i915_coherent_map_type(vma->vm->i915));
+ if (IS_ERR(addr)) {
+ ret = PTR_ERR(addr);
+ goto err_ring;
+ }
+
+ i915_vma_make_unshrinkable(vma);
+
+ /* Discard any unused bytes beyond that submitted to hw. */
+ intel_ring_reset(ring, ring->emit);
+
+ ring->vaddr = addr;
+ return 0;
+
+err_ring:
+ i915_vma_unpin(vma);
+err_unpin:
+ atomic_dec(&ring->pin_count);
+ return ret;
+}
+
+void intel_ring_reset(struct intel_ring *ring, u32 tail)
+{
+ tail = intel_ring_wrap(ring, tail);
+ ring->tail = tail;
+ ring->head = tail;
+ ring->emit = tail;
+ intel_ring_update_space(ring);
+}
+
+void intel_ring_unpin(struct intel_ring *ring)
+{
+ struct i915_vma *vma = ring->vma;
+
+ if (!atomic_dec_and_test(&ring->pin_count))
+ return;
+
+ i915_vma_unset_ggtt_write(vma);
+ if (i915_vma_is_map_and_fenceable(vma))
+ i915_vma_unpin_iomap(vma);
+ else
+ i915_gem_object_unpin_map(vma->obj);
+
+ i915_vma_make_purgeable(vma);
+ i915_vma_unpin(vma);
+}
+
+static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
+{
+ struct i915_address_space *vm = &ggtt->vm;
+ struct drm_i915_private *i915 = vm->i915;
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+
+ obj = ERR_PTR(-ENODEV);
+ if (i915_ggtt_has_aperture(ggtt))
+ obj = i915_gem_object_create_stolen(i915, size);
+ if (IS_ERR(obj))
+ obj = i915_gem_object_create_internal(i915, size);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ /*
+ * Mark ring buffers as read-only from GPU side (so no stray overwrites)
+ * if supported by the platform's GGTT.
+ */
+ if (vm->has_read_only)
+ i915_gem_object_set_readonly(obj);
+
+ vma = i915_vma_instance(obj, vm, NULL);
+ if (IS_ERR(vma))
+ goto err;
+
+ return vma;
+
+err:
+ i915_gem_object_put(obj);
+ return vma;
+}
+
+struct intel_ring *
+intel_engine_create_ring(struct intel_engine_cs *engine, int size)
+{
+ struct drm_i915_private *i915 = engine->i915;
+ struct intel_ring *ring;
+ struct i915_vma *vma;
+
+ GEM_BUG_ON(!is_power_of_2(size));
+ GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
+
+ ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+ if (!ring)
+ return ERR_PTR(-ENOMEM);
+
+ kref_init(&ring->ref);
+ ring->size = size;
+
+ /*
+ * Workaround an erratum on the i830 which causes a hang if
+ * the TAIL pointer points to within the last 2 cachelines
+ * of the buffer.
+ */
+ ring->effective_size = size;
+ if (IS_I830(i915) || IS_I845G(i915))
+ ring->effective_size -= 2 * CACHELINE_BYTES;
+
+ intel_ring_update_space(ring);
+
+ vma = create_ring_vma(engine->gt->ggtt, size);
+ if (IS_ERR(vma)) {
+ kfree(ring);
+ return ERR_CAST(vma);
+ }
+ ring->vma = vma;
+
+ return ring;
+}
+
+void intel_ring_free(struct kref *ref)
+{
+ struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
+
+ i915_vma_put(ring->vma);
+ kfree(ring);
+}
+
+static noinline int
+wait_for_space(struct intel_ring *ring,
+ struct intel_timeline *tl,
+ unsigned int bytes)
+{
+ struct i915_request *target;
+ long timeout;
+
+ if (intel_ring_update_space(ring) >= bytes)
+ return 0;
+
+ GEM_BUG_ON(list_empty(&tl->requests));
+ list_for_each_entry(target, &tl->requests, link) {
+ if (target->ring != ring)
+ continue;
+
+ /* Would completion of this request free enough space? */
+ if (bytes <= __intel_ring_space(target->postfix,
+ ring->emit, ring->size))
+ break;
+ }
+
+ if (GEM_WARN_ON(&target->link == &tl->requests))
+ return -ENOSPC;
+
+ timeout = i915_request_wait(target,
+ I915_WAIT_INTERRUPTIBLE,
+ MAX_SCHEDULE_TIMEOUT);
+ if (timeout < 0)
+ return timeout;
+
+ i915_request_retire_upto(target);
+
+ intel_ring_update_space(ring);
+ GEM_BUG_ON(ring->space < bytes);
+ return 0;
+}
+
+u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
+{
+ struct intel_ring *ring = rq->ring;
+ const unsigned int remain_usable = ring->effective_size - ring->emit;
+ const unsigned int bytes = num_dwords * sizeof(u32);
+ unsigned int need_wrap = 0;
+ unsigned int total_bytes;
+ u32 *cs;
+
+ /* Packets must be qword aligned. */
+ GEM_BUG_ON(num_dwords & 1);
+
+ total_bytes = bytes + rq->reserved_space;
+ GEM_BUG_ON(total_bytes > ring->effective_size);
+
+ if (unlikely(total_bytes > remain_usable)) {
+ const int remain_actual = ring->size - ring->emit;
+
+ if (bytes > remain_usable) {
+ /*
+ * Not enough space for the basic request. So need to
+ * flush out the remainder and then wait for
+ * base + reserved.
+ */
+ total_bytes += remain_actual;
+ need_wrap = remain_actual | 1;
+ } else {
+ /*
+ * The base request will fit but the reserved space
+ * falls off the end. So we don't need an immediate
+ * wrap and only need to effectively wait for the
+ * reserved size from the start of ringbuffer.
+ */
+ total_bytes = rq->reserved_space + remain_actual;
+ }
+ }
+
+ if (unlikely(total_bytes > ring->space)) {
+ int ret;
+
+ /*
+ * Space is reserved in the ringbuffer for finalising the
+ * request, as that cannot be allowed to fail. During request
+ * finalisation, reserved_space is set to 0 to stop the
+ * overallocation and the assumption is that then we never need
+ * to wait (which has the risk of failing with EINTR).
+ *
+ * See also i915_request_alloc() and i915_request_add().
+ */
+ GEM_BUG_ON(!rq->reserved_space);
+
+ ret = wait_for_space(ring,
+ i915_request_timeline(rq),
+ total_bytes);
+ if (unlikely(ret))
+ return ERR_PTR(ret);
+ }
+
+ if (unlikely(need_wrap)) {
+ need_wrap &= ~1;
+ GEM_BUG_ON(need_wrap > ring->space);
+ GEM_BUG_ON(ring->emit + need_wrap > ring->size);
+ GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64)));
+
+ /* Fill the tail with MI_NOOP */
+ memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64));
+ ring->space -= need_wrap;
+ ring->emit = 0;
+ }
+
+ GEM_BUG_ON(ring->emit > ring->size - bytes);
+ GEM_BUG_ON(ring->space < bytes);
+ cs = ring->vaddr + ring->emit;
+ GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs)));
+ ring->emit += bytes;
+ ring->space -= bytes;
+
+ return cs;
+}
+
+/* Align the ring tail to a cacheline boundary */
+int intel_ring_cacheline_align(struct i915_request *rq)
+{
+ int num_dwords;
+ void *cs;
+
+ num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
+ if (num_dwords == 0)
+ return 0;
+
+ num_dwords = CACHELINE_DWORDS - num_dwords;
+ GEM_BUG_ON(num_dwords & 1);
+
+ cs = intel_ring_begin(rq, num_dwords);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
+ intel_ring_advance(rq, cs + num_dwords);
+
+ GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
+ return 0;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h
new file mode 100644
index 000000000000..ea2839d9e044
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ring.h
@@ -0,0 +1,131 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_RING_H
+#define INTEL_RING_H
+
+#include "i915_gem.h" /* GEM_BUG_ON */
+#include "i915_request.h"
+#include "intel_ring_types.h"
+
+struct intel_engine_cs;
+
+struct intel_ring *
+intel_engine_create_ring(struct intel_engine_cs *engine, int size);
+
+u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords);
+int intel_ring_cacheline_align(struct i915_request *rq);
+
+unsigned int intel_ring_update_space(struct intel_ring *ring);
+
+int intel_ring_pin(struct intel_ring *ring);
+void intel_ring_unpin(struct intel_ring *ring);
+void intel_ring_reset(struct intel_ring *ring, u32 tail);
+
+void intel_ring_free(struct kref *ref);
+
+static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
+{
+ kref_get(&ring->ref);
+ return ring;
+}
+
+static inline void intel_ring_put(struct intel_ring *ring)
+{
+ kref_put(&ring->ref, intel_ring_free);
+}
+
+static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
+{
+ /* Dummy function.
+ *
+ * This serves as a placeholder in the code so that the reader
+ * can compare against the preceding intel_ring_begin() and
+ * check that the number of dwords emitted matches the space
+ * reserved for the command packet (i.e. the value passed to
+ * intel_ring_begin()).
+ */
+ GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
+}
+
+static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
+{
+ return pos & (ring->size - 1);
+}
+
+static inline bool
+intel_ring_offset_valid(const struct intel_ring *ring,
+ unsigned int pos)
+{
+ if (pos & -ring->size) /* must be strictly within the ring */
+ return false;
+
+ if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
+ return false;
+
+ return true;
+}
+
+static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
+{
+ /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
+ u32 offset = addr - rq->ring->vaddr;
+ GEM_BUG_ON(offset > rq->ring->size);
+ return intel_ring_wrap(rq->ring, offset);
+}
+
+static inline void
+assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
+{
+ GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
+
+ /*
+ * "Ring Buffer Use"
+ * Gen2 BSpec "1. Programming Environment" / 1.4.4.6
+ * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
+ * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
+ * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
+ * same cacheline, the Head Pointer must not be greater than the Tail
+ * Pointer."
+ *
+ * We use ring->head as the last known location of the actual RING_HEAD,
+ * it may have advanced but in the worst case it is equally the same
+ * as ring->head and so we should never program RING_TAIL to advance
+ * into the same cacheline as ring->head.
+ */
+#define cacheline(a) round_down(a, CACHELINE_BYTES)
+ GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
+ tail < ring->head);
+#undef cacheline
+}
+
+static inline unsigned int
+intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
+{
+ /* Whilst writes to the tail are strictly order, there is no
+ * serialisation between readers and the writers. The tail may be
+ * read by i915_request_retire() just as it is being updated
+ * by execlists, as although the breadcrumb is complete, the context
+ * switch hasn't been seen.
+ */
+ assert_ring_tail_valid(ring, tail);
+ ring->tail = tail;
+ return tail;
+}
+
+static inline unsigned int
+__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
+{
+ /*
+ * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
+ * same cacheline, the Head Pointer must not be greater than the Tail
+ * Pointer."
+ */
+ GEM_BUG_ON(!is_power_of_2(size));
+ return (head - tail - CACHELINE_BYTES) & (size - 1);
+}
+
+#endif /* INTEL_RING_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index bf631f15aa78..81f872f9ef03 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -40,6 +40,7 @@
#include "intel_gt_irq.h"
#include "intel_gt_pm_irq.h"
#include "intel_reset.h"
+#include "intel_ring.h"
#include "intel_workarounds.h"
/* Rough estimate of the typical request size, performing a flush,
@@ -47,16 +48,6 @@
*/
#define LEGACY_REQUEST_SIZE 200
-unsigned int intel_ring_update_space(struct intel_ring *ring)
-{
- unsigned int space;
-
- space = __intel_ring_space(ring->head, ring->emit, ring->size);
-
- ring->space = space;
- return space;
-}
-
static int
gen2_render_ring_flush(struct i915_request *rq, u32 mode)
{
@@ -371,6 +362,12 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode)
*/
flags |= PIPE_CONTROL_CS_STALL;
+ /*
+ * CS_STALL suggests at least a post-sync write.
+ */
+ flags |= PIPE_CONTROL_QW_WRITE;
+ flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+
/* Just flush everything. Experiments have shown that reducing the
* number of bits based on the write domains has little performance
* impact.
@@ -389,13 +386,6 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode)
flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
- /*
- * TLB invalidate requires a post-sync write.
- */
- flags |= PIPE_CONTROL_QW_WRITE;
- flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
-
- flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
/* Workaround: we must issue a pipe_control with CS-stall bit
* set before a pipe_control command that has the state cache
@@ -463,7 +453,8 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
- *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+ *cs++ = MI_FLUSH_DW | MI_INVALIDATE_TLB |
+ MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
*cs++ = rq->fence.seqno;
@@ -505,14 +496,13 @@ static void set_hwstam(struct intel_engine_cs *engine, u32 mask)
static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys)
{
- struct drm_i915_private *dev_priv = engine->i915;
u32 addr;
addr = lower_32_bits(phys);
- if (INTEL_GEN(dev_priv) >= 4)
+ if (INTEL_GEN(engine->i915) >= 4)
addr |= (phys >> 28) & 0xf0;
- I915_WRITE(HWS_PGA, addr);
+ intel_uncore_write(engine->uncore, HWS_PGA, addr);
}
static struct page *status_page(struct intel_engine_cs *engine)
@@ -531,14 +521,13 @@ static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
static void set_hwsp(struct intel_engine_cs *engine, u32 offset)
{
- struct drm_i915_private *dev_priv = engine->i915;
i915_reg_t hwsp;
/*
* The ring status page addresses are no longer next to the rest of
* the ring registers as of gen7.
*/
- if (IS_GEN(dev_priv, 7)) {
+ if (IS_GEN(engine->i915, 7)) {
switch (engine->id) {
/*
* No more rings exist on Gen7. Default case is only to shut up
@@ -560,14 +549,14 @@ static void set_hwsp(struct intel_engine_cs *engine, u32 offset)
hwsp = VEBOX_HWS_PGA_GEN7;
break;
}
- } else if (IS_GEN(dev_priv, 6)) {
+ } else if (IS_GEN(engine->i915, 6)) {
hwsp = RING_HWS_PGA_GEN6(engine->mmio_base);
} else {
hwsp = RING_HWS_PGA(engine->mmio_base);
}
- I915_WRITE(hwsp, offset);
- POSTING_READ(hwsp);
+ intel_uncore_write(engine->uncore, hwsp, offset);
+ intel_uncore_posting_read(engine->uncore, hwsp);
}
static void flush_cs_tlb(struct intel_engine_cs *engine)
@@ -642,8 +631,8 @@ static int xcs_resume(struct intel_engine_cs *engine)
struct intel_ring *ring = engine->legacy.ring;
int ret = 0;
- GEM_TRACE("%s: ring:{HEAD:%04x, TAIL:%04x}\n",
- engine->name, ring->head, ring->tail);
+ ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n",
+ ring->head, ring->tail);
intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
@@ -730,7 +719,7 @@ static int xcs_resume(struct intel_engine_cs *engine)
}
/* Papering over lost _interrupts_ immediately following the restart */
- intel_engine_queue_breadcrumbs(engine);
+ intel_engine_signal_breadcrumbs(engine);
out:
intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
@@ -756,10 +745,10 @@ static void reset_prepare(struct intel_engine_cs *engine)
*
* FIXME: Wa for more modern gens needs to be validated
*/
- GEM_TRACE("%s\n", engine->name);
+ ENGINE_TRACE(engine, "\n");
if (intel_engine_stop_cs(engine))
- GEM_TRACE("%s: timed out on STOP_RING\n", engine->name);
+ ENGINE_TRACE(engine, "timed out on STOP_RING\n");
intel_uncore_write_fw(uncore,
RING_HEAD(base),
@@ -775,12 +764,11 @@ static void reset_prepare(struct intel_engine_cs *engine)
/* Check acts as a post */
if (intel_uncore_read_fw(uncore, RING_HEAD(base)))
- GEM_TRACE("%s: ring head [%x] not parked\n",
- engine->name,
- intel_uncore_read_fw(uncore, RING_HEAD(base)));
+ ENGINE_TRACE(engine, "ring head [%x] not parked\n",
+ intel_uncore_read_fw(uncore, RING_HEAD(base)));
}
-static void reset_ring(struct intel_engine_cs *engine, bool stalled)
+static void reset_rewind(struct intel_engine_cs *engine, bool stalled)
{
struct i915_request *pos, *rq;
unsigned long flags;
@@ -851,7 +839,8 @@ static void reset_finish(struct intel_engine_cs *engine)
static int rcs_resume(struct intel_engine_cs *engine)
{
- struct drm_i915_private *dev_priv = engine->i915;
+ struct drm_i915_private *i915 = engine->i915;
+ struct intel_uncore *uncore = engine->uncore;
/*
* Disable CONSTANT_BUFFER before it is loaded from the context
@@ -863,13 +852,14 @@ static int rcs_resume(struct intel_engine_cs *engine)
* they are already accustomed to from before contexts were
* enabled.
*/
- if (IS_GEN(dev_priv, 4))
- I915_WRITE(ECOSKPD,
+ if (IS_GEN(i915, 4))
+ intel_uncore_write(uncore, ECOSKPD,
_MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE));
/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
- if (IS_GEN_RANGE(dev_priv, 4, 6))
- I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
+ if (IS_GEN_RANGE(i915, 4, 6))
+ intel_uncore_write(uncore, MI_MODE,
+ _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
/* We need to disable the AsyncFlip performance optimisations in order
* to use MI_WAIT_FOR_EVENT within the CS. It should already be
@@ -877,38 +867,40 @@ static int rcs_resume(struct intel_engine_cs *engine)
*
* WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
*/
- if (IS_GEN_RANGE(dev_priv, 6, 7))
- I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
+ if (IS_GEN_RANGE(i915, 6, 7))
+ intel_uncore_write(uncore, MI_MODE,
+ _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
/* Required for the hardware to program scanline values for waiting */
/* WaEnableFlushTlbInvalidationMode:snb */
- if (IS_GEN(dev_priv, 6))
- I915_WRITE(GFX_MODE,
+ if (IS_GEN(i915, 6))
+ intel_uncore_write(uncore, GFX_MODE,
_MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT));
/* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
- if (IS_GEN(dev_priv, 7))
- I915_WRITE(GFX_MODE_GEN7,
+ if (IS_GEN(i915, 7))
+ intel_uncore_write(uncore, GFX_MODE_GEN7,
_MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
_MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
- if (IS_GEN(dev_priv, 6)) {
+ if (IS_GEN(i915, 6)) {
/* From the Sandybridge PRM, volume 1 part 3, page 24:
* "If this bit is set, STCunit will have LRA as replacement
* policy. [...] This bit must be reset. LRA replacement
* policy is not supported."
*/
- I915_WRITE(CACHE_MODE_0,
+ intel_uncore_write(uncore, CACHE_MODE_0,
_MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
}
- if (IS_GEN_RANGE(dev_priv, 6, 7))
- I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
+ if (IS_GEN_RANGE(i915, 6, 7))
+ intel_uncore_write(uncore, INSTPM,
+ _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
return xcs_resume(engine);
}
-static void cancel_requests(struct intel_engine_cs *engine)
+static void reset_cancel(struct intel_engine_cs *engine)
{
struct i915_request *request;
unsigned long flags;
@@ -1186,162 +1178,6 @@ i915_emit_bb_start(struct i915_request *rq,
return 0;
}
-int intel_ring_pin(struct intel_ring *ring)
-{
- struct i915_vma *vma = ring->vma;
- unsigned int flags;
- void *addr;
- int ret;
-
- if (atomic_fetch_inc(&ring->pin_count))
- return 0;
-
- flags = PIN_GLOBAL;
-
- /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
- flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
-
- if (vma->obj->stolen)
- flags |= PIN_MAPPABLE;
- else
- flags |= PIN_HIGH;
-
- ret = i915_vma_pin(vma, 0, 0, flags);
- if (unlikely(ret))
- goto err_unpin;
-
- if (i915_vma_is_map_and_fenceable(vma))
- addr = (void __force *)i915_vma_pin_iomap(vma);
- else
- addr = i915_gem_object_pin_map(vma->obj,
- i915_coherent_map_type(vma->vm->i915));
- if (IS_ERR(addr)) {
- ret = PTR_ERR(addr);
- goto err_ring;
- }
-
- i915_vma_make_unshrinkable(vma);
-
- GEM_BUG_ON(ring->vaddr);
- ring->vaddr = addr;
-
- return 0;
-
-err_ring:
- i915_vma_unpin(vma);
-err_unpin:
- atomic_dec(&ring->pin_count);
- return ret;
-}
-
-void intel_ring_reset(struct intel_ring *ring, u32 tail)
-{
- tail = intel_ring_wrap(ring, tail);
- ring->tail = tail;
- ring->head = tail;
- ring->emit = tail;
- intel_ring_update_space(ring);
-}
-
-void intel_ring_unpin(struct intel_ring *ring)
-{
- struct i915_vma *vma = ring->vma;
-
- if (!atomic_dec_and_test(&ring->pin_count))
- return;
-
- /* Discard any unused bytes beyond that submitted to hw. */
- intel_ring_reset(ring, ring->emit);
-
- i915_vma_unset_ggtt_write(vma);
- if (i915_vma_is_map_and_fenceable(vma))
- i915_vma_unpin_iomap(vma);
- else
- i915_gem_object_unpin_map(vma->obj);
-
- GEM_BUG_ON(!ring->vaddr);
- ring->vaddr = NULL;
-
- i915_vma_unpin(vma);
- i915_vma_make_purgeable(vma);
-}
-
-static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
-{
- struct i915_address_space *vm = &ggtt->vm;
- struct drm_i915_private *i915 = vm->i915;
- struct drm_i915_gem_object *obj;
- struct i915_vma *vma;
-
- obj = i915_gem_object_create_stolen(i915, size);
- if (IS_ERR(obj))
- obj = i915_gem_object_create_internal(i915, size);
- if (IS_ERR(obj))
- return ERR_CAST(obj);
-
- /*
- * Mark ring buffers as read-only from GPU side (so no stray overwrites)
- * if supported by the platform's GGTT.
- */
- if (vm->has_read_only)
- i915_gem_object_set_readonly(obj);
-
- vma = i915_vma_instance(obj, vm, NULL);
- if (IS_ERR(vma))
- goto err;
-
- return vma;
-
-err:
- i915_gem_object_put(obj);
- return vma;
-}
-
-struct intel_ring *
-intel_engine_create_ring(struct intel_engine_cs *engine, int size)
-{
- struct drm_i915_private *i915 = engine->i915;
- struct intel_ring *ring;
- struct i915_vma *vma;
-
- GEM_BUG_ON(!is_power_of_2(size));
- GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
-
- ring = kzalloc(sizeof(*ring), GFP_KERNEL);
- if (!ring)
- return ERR_PTR(-ENOMEM);
-
- kref_init(&ring->ref);
-
- ring->size = size;
- /* Workaround an erratum on the i830 which causes a hang if
- * the TAIL pointer points to within the last 2 cachelines
- * of the buffer.
- */
- ring->effective_size = size;
- if (IS_I830(i915) || IS_I845G(i915))
- ring->effective_size -= 2 * CACHELINE_BYTES;
-
- intel_ring_update_space(ring);
-
- vma = create_ring_vma(engine->gt->ggtt, size);
- if (IS_ERR(vma)) {
- kfree(ring);
- return ERR_CAST(vma);
- }
- ring->vma = vma;
-
- return ring;
-}
-
-void intel_ring_free(struct kref *ref)
-{
- struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
-
- i915_vma_put(ring->vma);
- kfree(ring);
-}
-
static void __ring_context_fini(struct intel_context *ce)
{
i915_vma_put(ce->state);
@@ -1483,6 +1319,8 @@ static int ring_context_alloc(struct intel_context *ce)
return PTR_ERR(vma);
ce->state = vma;
+ if (engine->default_state)
+ __set_bit(CONTEXT_VALID_BIT, &ce->flags);
}
return 0;
@@ -1525,45 +1363,37 @@ static const struct intel_context_ops ring_context_ops = {
.destroy = ring_context_destroy,
};
-static int load_pd_dir(struct i915_request *rq, const struct i915_ppgtt *ppgtt)
+static int load_pd_dir(struct i915_request *rq,
+ const struct i915_ppgtt *ppgtt,
+ u32 valid)
{
const struct intel_engine_cs * const engine = rq->engine;
u32 *cs;
- cs = intel_ring_begin(rq, 6);
+ cs = intel_ring_begin(rq, 12);
if (IS_ERR(cs))
return PTR_ERR(cs);
*cs++ = MI_LOAD_REGISTER_IMM(1);
*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
- *cs++ = PP_DIR_DCLV_2G;
+ *cs++ = valid;
*cs++ = MI_LOAD_REGISTER_IMM(1);
*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
*cs++ = px_base(ppgtt->pd)->ggtt_offset << 10;
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-static int flush_pd_dir(struct i915_request *rq)
-{
- const struct intel_engine_cs * const engine = rq->engine;
- u32 *cs;
-
- cs = intel_ring_begin(rq, 4);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- /* Stall until the page table load is complete */
+ /* Stall until the page table load is complete? */
*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
- *cs++ = intel_gt_scratch_offset(rq->engine->gt,
+ *cs++ = intel_gt_scratch_offset(engine->gt,
INTEL_GT_SCRATCH_FIELD_DEFAULT);
- *cs++ = MI_NOOP;
+
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
+ *cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
intel_ring_advance(rq, cs);
+
return 0;
}
@@ -1650,7 +1480,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
*cs++ = MI_NOOP;
*cs++ = MI_SET_CONTEXT;
- *cs++ = i915_ggtt_offset(rq->hw_context->state) | flags;
+ *cs++ = i915_ggtt_offset(rq->context->state) | flags;
/*
* w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
* WaMiSetContext_Hang:snb,ivb,vlv
@@ -1720,10 +1550,10 @@ static int remap_l3_slice(struct i915_request *rq, int slice)
static int remap_l3(struct i915_request *rq)
{
- struct i915_gem_context *ctx = rq->gem_context;
+ struct i915_gem_context *ctx = i915_request_gem_context(rq);
int i, err;
- if (!ctx->remap_slice)
+ if (!ctx || !ctx->remap_slice)
return 0;
for (i = 0; i < MAX_L3_SLICES; i++) {
@@ -1739,34 +1569,50 @@ static int remap_l3(struct i915_request *rq)
return 0;
}
+static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
+{
+ int ret;
+
+ if (!vm)
+ return 0;
+
+ ret = rq->engine->emit_flush(rq, EMIT_FLUSH);
+ if (ret)
+ return ret;
+
+ /*
+ * Not only do we need a full barrier (post-sync write) after
+ * invalidating the TLBs, but we need to wait a little bit
+ * longer. Whether this is merely delaying us, or the
+ * subsequent flush is a key part of serialising with the
+ * post-sync op, this extra pass appears vital before a
+ * mm switch!
+ */
+ ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
+ if (ret)
+ return ret;
+
+ return rq->engine->emit_flush(rq, EMIT_FLUSH);
+}
+
static int switch_context(struct i915_request *rq)
{
- struct intel_context *ce = rq->hw_context;
- struct i915_address_space *vm = vm_alias(ce);
+ struct intel_context *ce = rq->context;
int ret;
GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
- if (vm) {
- ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm));
- if (ret)
- return ret;
- }
+ ret = switch_mm(rq, vm_alias(ce));
+ if (ret)
+ return ret;
if (ce->state) {
u32 hw_flags;
GEM_BUG_ON(rq->engine->id != RCS0);
- /*
- * The kernel context(s) is treated as pure scratch and is not
- * expected to retain any state (as we sacrifice it during
- * suspend and on resume it may be corrupted). This is ok,
- * as nothing actually executes using the kernel context; it
- * is purely used for flushing user contexts.
- */
hw_flags = 0;
- if (i915_gem_context_is_kernel(rq->gem_context))
+ if (!test_bit(CONTEXT_VALID_BIT, &ce->flags))
hw_flags = MI_RESTORE_INHIBIT;
ret = mi_set_context(rq, hw_flags);
@@ -1774,34 +1620,6 @@ static int switch_context(struct i915_request *rq)
return ret;
}
- if (vm) {
- struct intel_engine_cs *engine = rq->engine;
-
- ret = engine->emit_flush(rq, EMIT_INVALIDATE);
- if (ret)
- return ret;
-
- ret = flush_pd_dir(rq);
- if (ret)
- return ret;
-
- /*
- * Not only do we need a full barrier (post-sync write) after
- * invalidating the TLBs, but we need to wait a little bit
- * longer. Whether this is merely delaying us, or the
- * subsequent flush is a key part of serialising with the
- * post-sync op, this extra pass appears vital before a
- * mm switch!
- */
- ret = engine->emit_flush(rq, EMIT_INVALIDATE);
- if (ret)
- return ret;
-
- ret = engine->emit_flush(rq, EMIT_FLUSH);
- if (ret)
- return ret;
- }
-
ret = remap_l3(rq);
if (ret)
return ret;
@@ -1813,7 +1631,7 @@ static int ring_request_alloc(struct i915_request *request)
{
int ret;
- GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
+ GEM_BUG_ON(!intel_context_is_pinned(request->context));
GEM_BUG_ON(i915_request_timeline(request)->has_initial_breadcrumb);
/*
@@ -1836,148 +1654,6 @@ static int ring_request_alloc(struct i915_request *request)
return 0;
}
-static noinline int
-wait_for_space(struct intel_ring *ring,
- struct intel_timeline *tl,
- unsigned int bytes)
-{
- struct i915_request *target;
- long timeout;
-
- if (intel_ring_update_space(ring) >= bytes)
- return 0;
-
- GEM_BUG_ON(list_empty(&tl->requests));
- list_for_each_entry(target, &tl->requests, link) {
- if (target->ring != ring)
- continue;
-
- /* Would completion of this request free enough space? */
- if (bytes <= __intel_ring_space(target->postfix,
- ring->emit, ring->size))
- break;
- }
-
- if (GEM_WARN_ON(&target->link == &tl->requests))
- return -ENOSPC;
-
- timeout = i915_request_wait(target,
- I915_WAIT_INTERRUPTIBLE,
- MAX_SCHEDULE_TIMEOUT);
- if (timeout < 0)
- return timeout;
-
- i915_request_retire_upto(target);
-
- intel_ring_update_space(ring);
- GEM_BUG_ON(ring->space < bytes);
- return 0;
-}
-
-u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
-{
- struct intel_ring *ring = rq->ring;
- const unsigned int remain_usable = ring->effective_size - ring->emit;
- const unsigned int bytes = num_dwords * sizeof(u32);
- unsigned int need_wrap = 0;
- unsigned int total_bytes;
- u32 *cs;
-
- /* Packets must be qword aligned. */
- GEM_BUG_ON(num_dwords & 1);
-
- total_bytes = bytes + rq->reserved_space;
- GEM_BUG_ON(total_bytes > ring->effective_size);
-
- if (unlikely(total_bytes > remain_usable)) {
- const int remain_actual = ring->size - ring->emit;
-
- if (bytes > remain_usable) {
- /*
- * Not enough space for the basic request. So need to
- * flush out the remainder and then wait for
- * base + reserved.
- */
- total_bytes += remain_actual;
- need_wrap = remain_actual | 1;
- } else {
- /*
- * The base request will fit but the reserved space
- * falls off the end. So we don't need an immediate
- * wrap and only need to effectively wait for the
- * reserved size from the start of ringbuffer.
- */
- total_bytes = rq->reserved_space + remain_actual;
- }
- }
-
- if (unlikely(total_bytes > ring->space)) {
- int ret;
-
- /*
- * Space is reserved in the ringbuffer for finalising the
- * request, as that cannot be allowed to fail. During request
- * finalisation, reserved_space is set to 0 to stop the
- * overallocation and the assumption is that then we never need
- * to wait (which has the risk of failing with EINTR).
- *
- * See also i915_request_alloc() and i915_request_add().
- */
- GEM_BUG_ON(!rq->reserved_space);
-
- ret = wait_for_space(ring,
- i915_request_timeline(rq),
- total_bytes);
- if (unlikely(ret))
- return ERR_PTR(ret);
- }
-
- if (unlikely(need_wrap)) {
- need_wrap &= ~1;
- GEM_BUG_ON(need_wrap > ring->space);
- GEM_BUG_ON(ring->emit + need_wrap > ring->size);
- GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64)));
-
- /* Fill the tail with MI_NOOP */
- memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64));
- ring->space -= need_wrap;
- ring->emit = 0;
- }
-
- GEM_BUG_ON(ring->emit > ring->size - bytes);
- GEM_BUG_ON(ring->space < bytes);
- cs = ring->vaddr + ring->emit;
- GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs)));
- ring->emit += bytes;
- ring->space -= bytes;
-
- return cs;
-}
-
-/* Align the ring tail to a cacheline boundary */
-int intel_ring_cacheline_align(struct i915_request *rq)
-{
- int num_dwords;
- void *cs;
-
- num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
- if (num_dwords == 0)
- return 0;
-
- num_dwords = CACHELINE_DWORDS - num_dwords;
- GEM_BUG_ON(num_dwords & 1);
-
- cs = intel_ring_begin(rq, num_dwords);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
- intel_ring_advance(rq, cs);
-
- GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
- return 0;
-}
-
static void gen6_bsd_submit_request(struct i915_request *request)
{
struct intel_uncore *uncore = request->engine->uncore;
@@ -2111,7 +1787,6 @@ static int gen6_ring_flush(struct i915_request *rq, u32 mode)
static void i9xx_set_default_submission(struct intel_engine_cs *engine)
{
engine->submit_request = i9xx_submit_request;
- engine->cancel_requests = cancel_requests;
engine->park = NULL;
engine->unpark = NULL;
@@ -2123,7 +1798,7 @@ static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
engine->submit_request = gen6_bsd_submit_request;
}
-static void ring_destroy(struct intel_engine_cs *engine)
+static void ring_release(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
@@ -2137,8 +1812,6 @@ static void ring_destroy(struct intel_engine_cs *engine)
intel_timeline_unpin(engine->legacy.timeline);
intel_timeline_put(engine->legacy.timeline);
-
- kfree(engine);
}
static void setup_irq(struct intel_engine_cs *engine)
@@ -2169,11 +1842,12 @@ static void setup_common(struct intel_engine_cs *engine)
setup_irq(engine);
- engine->destroy = ring_destroy;
+ engine->release = ring_release;
engine->resume = xcs_resume;
engine->reset.prepare = reset_prepare;
- engine->reset.reset = reset_ring;
+ engine->reset.rewind = reset_rewind;
+ engine->reset.cancel = reset_cancel;
engine->reset.finish = reset_finish;
engine->cops = &ring_context_ops;
@@ -2284,6 +1958,10 @@ static void setup_vecs(struct intel_engine_cs *engine)
int intel_ring_submission_setup(struct intel_engine_cs *engine)
{
+ struct intel_timeline *timeline;
+ struct intel_ring *ring;
+ int err;
+
setup_common(engine);
switch (engine->class) {
@@ -2304,15 +1982,6 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine)
return -ENODEV;
}
- return 0;
-}
-
-int intel_ring_submission_init(struct intel_engine_cs *engine)
-{
- struct intel_timeline *timeline;
- struct intel_ring *ring;
- int err;
-
timeline = intel_timeline_create(engine->gt, engine->status_page.vma);
if (IS_ERR(timeline)) {
err = PTR_ERR(timeline);
@@ -2338,16 +2007,10 @@ int intel_ring_submission_init(struct intel_engine_cs *engine)
engine->legacy.ring = ring;
engine->legacy.timeline = timeline;
- err = intel_engine_init_common(engine);
- if (err)
- goto err_ring_unpin;
-
GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma);
return 0;
-err_ring_unpin:
- intel_ring_unpin(ring);
err_ring:
intel_ring_put(ring);
err_timeline_unpin:
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_types.h b/drivers/gpu/drm/i915/gt/intel_ring_types.h
new file mode 100644
index 000000000000..d9f17f38e0cc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ring_types.h
@@ -0,0 +1,51 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_RING_TYPES_H
+#define INTEL_RING_TYPES_H
+
+#include <linux/atomic.h>
+#include <linux/kref.h>
+#include <linux/types.h>
+
+/*
+ * Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
+ * but keeps the logic simple. Indeed, the whole purpose of this macro is just
+ * to give some inclination as to some of the magic values used in the various
+ * workarounds!
+ */
+#define CACHELINE_BYTES 64
+#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32))
+
+struct i915_vma;
+
+struct intel_ring {
+ struct kref ref;
+ struct i915_vma *vma;
+ void *vaddr;
+
+ /*
+ * As we have two types of rings, one global to the engine used
+ * by ringbuffer submission and those that are exclusive to a
+ * context used by execlists, we have to play safe and allow
+ * atomic updates to the pin_count. However, the actual pinning
+ * of the context is either done during initialisation for
+ * ringbuffer submission or serialised as part of the context
+ * pinning for execlists, and so we do not need a mutex ourselves
+ * to serialise intel_ring_pin/intel_ring_unpin.
+ */
+ atomic_t pin_count;
+
+ u32 head;
+ u32 tail;
+ u32 emit;
+
+ u32 space;
+ u32 size;
+ u32 effective_size;
+};
+
+#endif /* INTEL_RING_TYPES_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
new file mode 100644
index 000000000000..f232036c3c7a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -0,0 +1,1903 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_gt_irq.h"
+#include "intel_gt_pm_irq.h"
+#include "intel_rps.h"
+#include "intel_sideband.h"
+#include "../../../platform/x86/intel_ips.h"
+
+/*
+ * Lock protecting IPS related data structures
+ */
+static DEFINE_SPINLOCK(mchdev_lock);
+
+static struct intel_gt *rps_to_gt(struct intel_rps *rps)
+{
+ return container_of(rps, struct intel_gt, rps);
+}
+
+static struct drm_i915_private *rps_to_i915(struct intel_rps *rps)
+{
+ return rps_to_gt(rps)->i915;
+}
+
+static struct intel_uncore *rps_to_uncore(struct intel_rps *rps)
+{
+ return rps_to_gt(rps)->uncore;
+}
+
+static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask)
+{
+ return mask & ~rps->pm_intrmsk_mbz;
+}
+
+static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
+{
+ intel_uncore_write_fw(uncore, reg, val);
+}
+
+static u32 rps_pm_mask(struct intel_rps *rps, u8 val)
+{
+ u32 mask = 0;
+
+ /* We use UP_EI_EXPIRED interrupts for both up/down in manual mode */
+ if (val > rps->min_freq_softlimit)
+ mask |= (GEN6_PM_RP_UP_EI_EXPIRED |
+ GEN6_PM_RP_DOWN_THRESHOLD |
+ GEN6_PM_RP_DOWN_TIMEOUT);
+
+ if (val < rps->max_freq_softlimit)
+ mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
+
+ mask &= rps->pm_events;
+
+ return rps_pm_sanitize_mask(rps, ~mask);
+}
+
+static void rps_reset_ei(struct intel_rps *rps)
+{
+ memset(&rps->ei, 0, sizeof(rps->ei));
+}
+
+static void rps_enable_interrupts(struct intel_rps *rps)
+{
+ struct intel_gt *gt = rps_to_gt(rps);
+
+ rps_reset_ei(rps);
+
+ if (IS_VALLEYVIEW(gt->i915))
+ /* WaGsvRC0ResidencyMethod:vlv */
+ rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED;
+ else
+ rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
+ GEN6_PM_RP_DOWN_THRESHOLD |
+ GEN6_PM_RP_DOWN_TIMEOUT);
+
+ spin_lock_irq(&gt->irq_lock);
+ gen6_gt_pm_enable_irq(gt, rps->pm_events);
+ spin_unlock_irq(&gt->irq_lock);
+
+ set(gt->uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, rps->cur_freq));
+}
+
+static void gen6_rps_reset_interrupts(struct intel_rps *rps)
+{
+ gen6_gt_pm_reset_iir(rps_to_gt(rps), GEN6_PM_RPS_EVENTS);
+}
+
+static void gen11_rps_reset_interrupts(struct intel_rps *rps)
+{
+ while (gen11_gt_reset_one_iir(rps_to_gt(rps), 0, GEN11_GTPM))
+ ;
+}
+
+static void rps_reset_interrupts(struct intel_rps *rps)
+{
+ struct intel_gt *gt = rps_to_gt(rps);
+
+ spin_lock_irq(&gt->irq_lock);
+ if (INTEL_GEN(gt->i915) >= 11)
+ gen11_rps_reset_interrupts(rps);
+ else
+ gen6_rps_reset_interrupts(rps);
+
+ rps->pm_iir = 0;
+ spin_unlock_irq(&gt->irq_lock);
+}
+
+static void rps_disable_interrupts(struct intel_rps *rps)
+{
+ struct intel_gt *gt = rps_to_gt(rps);
+
+ rps->pm_events = 0;
+
+ set(gt->uncore, GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u));
+
+ spin_lock_irq(&gt->irq_lock);
+ gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS);
+ spin_unlock_irq(&gt->irq_lock);
+
+ intel_synchronize_irq(gt->i915);
+
+ /*
+ * Now that we will not be generating any more work, flush any
+ * outstanding tasks. As we are called on the RPS idle path,
+ * we will reset the GPU to minimum frequencies, so the current
+ * state of the worker can be discarded.
+ */
+ cancel_work_sync(&rps->work);
+
+ rps_reset_interrupts(rps);
+}
+
+static const struct cparams {
+ u16 i;
+ u16 t;
+ u16 m;
+ u16 c;
+} cparams[] = {
+ { 1, 1333, 301, 28664 },
+ { 1, 1066, 294, 24460 },
+ { 1, 800, 294, 25192 },
+ { 0, 1333, 276, 27605 },
+ { 0, 1066, 276, 27605 },
+ { 0, 800, 231, 23784 },
+};
+
+static void gen5_rps_init(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ u8 fmax, fmin, fstart;
+ u32 rgvmodectl;
+ int c_m, i;
+
+ if (i915->fsb_freq <= 3200)
+ c_m = 0;
+ else if (i915->fsb_freq <= 4800)
+ c_m = 1;
+ else
+ c_m = 2;
+
+ for (i = 0; i < ARRAY_SIZE(cparams); i++) {
+ if (cparams[i].i == c_m && cparams[i].t == i915->mem_freq) {
+ rps->ips.m = cparams[i].m;
+ rps->ips.c = cparams[i].c;
+ break;
+ }
+ }
+
+ rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
+
+ /* Set up min, max, and cur for interrupt handling */
+ fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
+ fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
+ fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
+ MEMMODE_FSTART_SHIFT;
+ DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
+ fmax, fmin, fstart);
+
+ rps->min_freq = fmax;
+ rps->max_freq = fmin;
+
+ rps->idle_freq = rps->min_freq;
+ rps->cur_freq = rps->idle_freq;
+}
+
+static unsigned long
+__ips_chipset_val(struct intel_ips *ips)
+{
+ struct intel_uncore *uncore =
+ rps_to_uncore(container_of(ips, struct intel_rps, ips));
+ unsigned long now = jiffies_to_msecs(jiffies), dt;
+ unsigned long result;
+ u64 total, delta;
+
+ lockdep_assert_held(&mchdev_lock);
+
+ /*
+ * Prevent division-by-zero if we are asking too fast.
+ * Also, we don't get interesting results if we are polling
+ * faster than once in 10ms, so just return the saved value
+ * in such cases.
+ */
+ dt = now - ips->last_time1;
+ if (dt <= 10)
+ return ips->chipset_power;
+
+ /* FIXME: handle per-counter overflow */
+ total = intel_uncore_read(uncore, DMIEC);
+ total += intel_uncore_read(uncore, DDREC);
+ total += intel_uncore_read(uncore, CSIEC);
+
+ delta = total - ips->last_count1;
+
+ result = div_u64(div_u64(ips->m * delta, dt) + ips->c, 10);
+
+ ips->last_count1 = total;
+ ips->last_time1 = now;
+
+ ips->chipset_power = result;
+
+ return result;
+}
+
+static unsigned long ips_mch_val(struct intel_uncore *uncore)
+{
+ unsigned int m, x, b;
+ u32 tsfs;
+
+ tsfs = intel_uncore_read(uncore, TSFS);
+ x = intel_uncore_read8(uncore, TR1);
+
+ b = tsfs & TSFS_INTR_MASK;
+ m = (tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT;
+
+ return m * x / 127 - b;
+}
+
+static int _pxvid_to_vd(u8 pxvid)
+{
+ if (pxvid == 0)
+ return 0;
+
+ if (pxvid >= 8 && pxvid < 31)
+ pxvid = 31;
+
+ return (pxvid + 2) * 125;
+}
+
+static u32 pvid_to_extvid(struct drm_i915_private *i915, u8 pxvid)
+{
+ const int vd = _pxvid_to_vd(pxvid);
+
+ if (INTEL_INFO(i915)->is_mobile)
+ return max(vd - 1125, 0);
+
+ return vd;
+}
+
+static void __gen5_ips_update(struct intel_ips *ips)
+{
+ struct intel_uncore *uncore =
+ rps_to_uncore(container_of(ips, struct intel_rps, ips));
+ u64 now, delta, dt;
+ u32 count;
+
+ lockdep_assert_held(&mchdev_lock);
+
+ now = ktime_get_raw_ns();
+ dt = now - ips->last_time2;
+ do_div(dt, NSEC_PER_MSEC);
+
+ /* Don't divide by 0 */
+ if (dt <= 10)
+ return;
+
+ count = intel_uncore_read(uncore, GFXEC);
+ delta = count - ips->last_count2;
+
+ ips->last_count2 = count;
+ ips->last_time2 = now;
+
+ /* More magic constants... */
+ ips->gfx_power = div_u64(delta * 1181, dt * 10);
+}
+
+static void gen5_rps_update(struct intel_rps *rps)
+{
+ spin_lock_irq(&mchdev_lock);
+ __gen5_ips_update(&rps->ips);
+ spin_unlock_irq(&mchdev_lock);
+}
+
+static bool gen5_rps_set(struct intel_rps *rps, u8 val)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ u16 rgvswctl;
+
+ lockdep_assert_held(&mchdev_lock);
+
+ rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
+ if (rgvswctl & MEMCTL_CMD_STS) {
+ DRM_DEBUG("gpu busy, RCS change rejected\n");
+ return false; /* still busy with another command */
+ }
+
+ /* Invert the frequency bin into an ips delay */
+ val = rps->max_freq - val;
+ val = rps->min_freq + val;
+
+ rgvswctl =
+ (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
+ (val << MEMCTL_FREQ_SHIFT) |
+ MEMCTL_SFCAVM;
+ intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
+ intel_uncore_posting_read16(uncore, MEMSWCTL);
+
+ rgvswctl |= MEMCTL_CMD_STS;
+ intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
+
+ return true;
+}
+
+static unsigned long intel_pxfreq(u32 vidfreq)
+{
+ int div = (vidfreq & 0x3f0000) >> 16;
+ int post = (vidfreq & 0x3000) >> 12;
+ int pre = (vidfreq & 0x7);
+
+ if (!pre)
+ return 0;
+
+ return div * 133333 / (pre << post);
+}
+
+static unsigned int init_emon(struct intel_uncore *uncore)
+{
+ u8 pxw[16];
+ int i;
+
+ /* Disable to program */
+ intel_uncore_write(uncore, ECR, 0);
+ intel_uncore_posting_read(uncore, ECR);
+
+ /* Program energy weights for various events */
+ intel_uncore_write(uncore, SDEW, 0x15040d00);
+ intel_uncore_write(uncore, CSIEW0, 0x007f0000);
+ intel_uncore_write(uncore, CSIEW1, 0x1e220004);
+ intel_uncore_write(uncore, CSIEW2, 0x04000004);
+
+ for (i = 0; i < 5; i++)
+ intel_uncore_write(uncore, PEW(i), 0);
+ for (i = 0; i < 3; i++)
+ intel_uncore_write(uncore, DEW(i), 0);
+
+ /* Program P-state weights to account for frequency power adjustment */
+ for (i = 0; i < 16; i++) {
+ u32 pxvidfreq = intel_uncore_read(uncore, PXVFREQ(i));
+ unsigned int freq = intel_pxfreq(pxvidfreq);
+ unsigned int vid =
+ (pxvidfreq & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT;
+ unsigned int val;
+
+ val = vid * vid * freq / 1000 * 255;
+ val /= 127 * 127 * 900;
+
+ pxw[i] = val;
+ }
+ /* Render standby states get 0 weight */
+ pxw[14] = 0;
+ pxw[15] = 0;
+
+ for (i = 0; i < 4; i++) {
+ intel_uncore_write(uncore, PXW(i),
+ pxw[i * 4 + 0] << 24 |
+ pxw[i * 4 + 1] << 16 |
+ pxw[i * 4 + 2] << 8 |
+ pxw[i * 4 + 3] << 0);
+ }
+
+ /* Adjust magic regs to magic values (more experimental results) */
+ intel_uncore_write(uncore, OGW0, 0);
+ intel_uncore_write(uncore, OGW1, 0);
+ intel_uncore_write(uncore, EG0, 0x00007f00);
+ intel_uncore_write(uncore, EG1, 0x0000000e);
+ intel_uncore_write(uncore, EG2, 0x000e0000);
+ intel_uncore_write(uncore, EG3, 0x68000300);
+ intel_uncore_write(uncore, EG4, 0x42000000);
+ intel_uncore_write(uncore, EG5, 0x00140031);
+ intel_uncore_write(uncore, EG6, 0);
+ intel_uncore_write(uncore, EG7, 0);
+
+ for (i = 0; i < 8; i++)
+ intel_uncore_write(uncore, PXWL(i), 0);
+
+ /* Enable PMON + select events */
+ intel_uncore_write(uncore, ECR, 0x80000019);
+
+ return intel_uncore_read(uncore, LCFUSE02) & LCFUSE_HIV_MASK;
+}
+
+static bool gen5_rps_enable(struct intel_rps *rps)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ u8 fstart, vstart;
+ u32 rgvmodectl;
+
+ spin_lock_irq(&mchdev_lock);
+
+ rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
+
+ /* Enable temp reporting */
+ intel_uncore_write16(uncore, PMMISC,
+ intel_uncore_read16(uncore, PMMISC) | MCPPCE_EN);
+ intel_uncore_write16(uncore, TSC1,
+ intel_uncore_read16(uncore, TSC1) | TSE);
+
+ /* 100ms RC evaluation intervals */
+ intel_uncore_write(uncore, RCUPEI, 100000);
+ intel_uncore_write(uncore, RCDNEI, 100000);
+
+ /* Set max/min thresholds to 90ms and 80ms respectively */
+ intel_uncore_write(uncore, RCBMAXAVG, 90000);
+ intel_uncore_write(uncore, RCBMINAVG, 80000);
+
+ intel_uncore_write(uncore, MEMIHYST, 1);
+
+ /* Set up min, max, and cur for interrupt handling */
+ fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
+ MEMMODE_FSTART_SHIFT;
+
+ vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) &
+ PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT;
+
+ intel_uncore_write(uncore,
+ MEMINTREN,
+ MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
+
+ intel_uncore_write(uncore, VIDSTART, vstart);
+ intel_uncore_posting_read(uncore, VIDSTART);
+
+ rgvmodectl |= MEMMODE_SWMODE_EN;
+ intel_uncore_write(uncore, MEMMODECTL, rgvmodectl);
+
+ if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) &
+ MEMCTL_CMD_STS) == 0, 10))
+ DRM_ERROR("stuck trying to change perf mode\n");
+ mdelay(1);
+
+ gen5_rps_set(rps, rps->cur_freq);
+
+ rps->ips.last_count1 = intel_uncore_read(uncore, DMIEC);
+ rps->ips.last_count1 += intel_uncore_read(uncore, DDREC);
+ rps->ips.last_count1 += intel_uncore_read(uncore, CSIEC);
+ rps->ips.last_time1 = jiffies_to_msecs(jiffies);
+
+ rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC);
+ rps->ips.last_time2 = ktime_get_raw_ns();
+
+ spin_unlock_irq(&mchdev_lock);
+
+ rps->ips.corr = init_emon(uncore);
+
+ return true;
+}
+
+static void gen5_rps_disable(struct intel_rps *rps)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ u16 rgvswctl;
+
+ spin_lock_irq(&mchdev_lock);
+
+ rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
+
+ /* Ack interrupts, disable EFC interrupt */
+ intel_uncore_write(uncore, MEMINTREN,
+ intel_uncore_read(uncore, MEMINTREN) &
+ ~MEMINT_EVAL_CHG_EN);
+ intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
+ intel_uncore_write(uncore, DEIER,
+ intel_uncore_read(uncore, DEIER) & ~DE_PCU_EVENT);
+ intel_uncore_write(uncore, DEIIR, DE_PCU_EVENT);
+ intel_uncore_write(uncore, DEIMR,
+ intel_uncore_read(uncore, DEIMR) | DE_PCU_EVENT);
+
+ /* Go back to the starting frequency */
+ gen5_rps_set(rps, rps->idle_freq);
+ mdelay(1);
+ rgvswctl |= MEMCTL_CMD_STS;
+ intel_uncore_write(uncore, MEMSWCTL, rgvswctl);
+ mdelay(1);
+
+ spin_unlock_irq(&mchdev_lock);
+}
+
+static u32 rps_limits(struct intel_rps *rps, u8 val)
+{
+ u32 limits;
+
+ /*
+ * Only set the down limit when we've reached the lowest level to avoid
+ * getting more interrupts, otherwise leave this clear. This prevents a
+ * race in the hw when coming out of rc6: There's a tiny window where
+ * the hw runs at the minimal clock before selecting the desired
+ * frequency, if the down threshold expires in that window we will not
+ * receive a down interrupt.
+ */
+ if (INTEL_GEN(rps_to_i915(rps)) >= 9) {
+ limits = rps->max_freq_softlimit << 23;
+ if (val <= rps->min_freq_softlimit)
+ limits |= rps->min_freq_softlimit << 14;
+ } else {
+ limits = rps->max_freq_softlimit << 24;
+ if (val <= rps->min_freq_softlimit)
+ limits |= rps->min_freq_softlimit << 16;
+ }
+
+ return limits;
+}
+
+static void rps_set_power(struct intel_rps *rps, int new_power)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 threshold_up = 0, threshold_down = 0; /* in % */
+ u32 ei_up = 0, ei_down = 0;
+
+ lockdep_assert_held(&rps->power.mutex);
+
+ if (new_power == rps->power.mode)
+ return;
+
+ /* Note the units here are not exactly 1us, but 1280ns. */
+ switch (new_power) {
+ case LOW_POWER:
+ /* Upclock if more than 95% busy over 16ms */
+ ei_up = 16000;
+ threshold_up = 95;
+
+ /* Downclock if less than 85% busy over 32ms */
+ ei_down = 32000;
+ threshold_down = 85;
+ break;
+
+ case BETWEEN:
+ /* Upclock if more than 90% busy over 13ms */
+ ei_up = 13000;
+ threshold_up = 90;
+
+ /* Downclock if less than 75% busy over 32ms */
+ ei_down = 32000;
+ threshold_down = 75;
+ break;
+
+ case HIGH_POWER:
+ /* Upclock if more than 85% busy over 10ms */
+ ei_up = 10000;
+ threshold_up = 85;
+
+ /* Downclock if less than 60% busy over 32ms */
+ ei_down = 32000;
+ threshold_down = 60;
+ break;
+ }
+
+ /* When byt can survive without system hang with dynamic
+ * sw freq adjustments, this restriction can be lifted.
+ */
+ if (IS_VALLEYVIEW(i915))
+ goto skip_hw_write;
+
+ set(uncore, GEN6_RP_UP_EI, GT_INTERVAL_FROM_US(i915, ei_up));
+ set(uncore, GEN6_RP_UP_THRESHOLD,
+ GT_INTERVAL_FROM_US(i915, ei_up * threshold_up / 100));
+
+ set(uncore, GEN6_RP_DOWN_EI, GT_INTERVAL_FROM_US(i915, ei_down));
+ set(uncore, GEN6_RP_DOWN_THRESHOLD,
+ GT_INTERVAL_FROM_US(i915, ei_down * threshold_down / 100));
+
+ set(uncore, GEN6_RP_CONTROL,
+ (INTEL_GEN(i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
+ GEN6_RP_MEDIA_HW_NORMAL_MODE |
+ GEN6_RP_MEDIA_IS_GFX |
+ GEN6_RP_ENABLE |
+ GEN6_RP_UP_BUSY_AVG |
+ GEN6_RP_DOWN_IDLE_AVG);
+
+skip_hw_write:
+ rps->power.mode = new_power;
+ rps->power.up_threshold = threshold_up;
+ rps->power.down_threshold = threshold_down;
+}
+
+static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val)
+{
+ int new_power;
+
+ new_power = rps->power.mode;
+ switch (rps->power.mode) {
+ case LOW_POWER:
+ if (val > rps->efficient_freq + 1 &&
+ val > rps->cur_freq)
+ new_power = BETWEEN;
+ break;
+
+ case BETWEEN:
+ if (val <= rps->efficient_freq &&
+ val < rps->cur_freq)
+ new_power = LOW_POWER;
+ else if (val >= rps->rp0_freq &&
+ val > rps->cur_freq)
+ new_power = HIGH_POWER;
+ break;
+
+ case HIGH_POWER:
+ if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
+ val < rps->cur_freq)
+ new_power = BETWEEN;
+ break;
+ }
+ /* Max/min bins are special */
+ if (val <= rps->min_freq_softlimit)
+ new_power = LOW_POWER;
+ if (val >= rps->max_freq_softlimit)
+ new_power = HIGH_POWER;
+
+ mutex_lock(&rps->power.mutex);
+ if (rps->power.interactive)
+ new_power = HIGH_POWER;
+ rps_set_power(rps, new_power);
+ mutex_unlock(&rps->power.mutex);
+}
+
+void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive)
+{
+ mutex_lock(&rps->power.mutex);
+ if (interactive) {
+ if (!rps->power.interactive++ && rps->active)
+ rps_set_power(rps, HIGH_POWER);
+ } else {
+ GEM_BUG_ON(!rps->power.interactive);
+ rps->power.interactive--;
+ }
+ mutex_unlock(&rps->power.mutex);
+}
+
+static int gen6_rps_set(struct intel_rps *rps, u8 val)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 swreq;
+
+ if (INTEL_GEN(i915) >= 9)
+ swreq = GEN9_FREQUENCY(val);
+ else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
+ swreq = HSW_FREQUENCY(val);
+ else
+ swreq = (GEN6_FREQUENCY(val) |
+ GEN6_OFFSET(0) |
+ GEN6_AGGRESSIVE_TURBO);
+ set(uncore, GEN6_RPNSWREQ, swreq);
+
+ return 0;
+}
+
+static int vlv_rps_set(struct intel_rps *rps, u8 val)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ int err;
+
+ vlv_punit_get(i915);
+ err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val);
+ vlv_punit_put(i915);
+
+ return err;
+}
+
+static int rps_set(struct intel_rps *rps, u8 val, bool update)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ int err;
+
+ if (INTEL_GEN(i915) < 6)
+ return 0;
+
+ if (val == rps->last_freq)
+ return 0;
+
+ if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+ err = vlv_rps_set(rps, val);
+ else
+ err = gen6_rps_set(rps, val);
+ if (err)
+ return err;
+
+ if (update)
+ gen6_rps_set_thresholds(rps, val);
+ rps->last_freq = val;
+
+ return 0;
+}
+
+void intel_rps_unpark(struct intel_rps *rps)
+{
+ u8 freq;
+
+ if (!rps->enabled)
+ return;
+
+ /*
+ * Use the user's desired frequency as a guide, but for better
+ * performance, jump directly to RPe as our starting frequency.
+ */
+ mutex_lock(&rps->lock);
+ rps->active = true;
+ freq = max(rps->cur_freq, rps->efficient_freq),
+ freq = clamp(freq, rps->min_freq_softlimit, rps->max_freq_softlimit);
+ intel_rps_set(rps, freq);
+ rps->last_adj = 0;
+ mutex_unlock(&rps->lock);
+
+ if (INTEL_GEN(rps_to_i915(rps)) >= 6)
+ rps_enable_interrupts(rps);
+
+ if (IS_GEN(rps_to_i915(rps), 5))
+ gen5_rps_update(rps);
+}
+
+void intel_rps_park(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+
+ if (!rps->enabled)
+ return;
+
+ if (INTEL_GEN(i915) >= 6)
+ rps_disable_interrupts(rps);
+
+ rps->active = false;
+ if (rps->last_freq <= rps->idle_freq)
+ return;
+
+ /*
+ * The punit delays the write of the frequency and voltage until it
+ * determines the GPU is awake. During normal usage we don't want to
+ * waste power changing the frequency if the GPU is sleeping (rc6).
+ * However, the GPU and driver is now idle and we do not want to delay
+ * switching to minimum voltage (reducing power whilst idle) as we do
+ * not expect to be woken in the near future and so must flush the
+ * change by waking the device.
+ *
+ * We choose to take the media powerwell (either would do to trick the
+ * punit into committing the voltage change) as that takes a lot less
+ * power than the render powerwell.
+ */
+ intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA);
+ rps_set(rps, rps->idle_freq, false);
+ intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA);
+}
+
+void intel_rps_boost(struct i915_request *rq)
+{
+ struct intel_rps *rps = &rq->engine->gt->rps;
+ unsigned long flags;
+
+ if (i915_request_signaled(rq) || !rps->active)
+ return;
+
+ /* Serializes with i915_request_retire() */
+ spin_lock_irqsave(&rq->lock, flags);
+ if (!i915_request_has_waitboost(rq) &&
+ !dma_fence_is_signaled_locked(&rq->fence)) {
+ rq->flags |= I915_REQUEST_WAITBOOST;
+
+ if (!atomic_fetch_inc(&rps->num_waiters) &&
+ READ_ONCE(rps->cur_freq) < rps->boost_freq)
+ schedule_work(&rps->work);
+
+ atomic_inc(&rps->boosts);
+ }
+ spin_unlock_irqrestore(&rq->lock, flags);
+}
+
+int intel_rps_set(struct intel_rps *rps, u8 val)
+{
+ int err;
+
+ lockdep_assert_held(&rps->lock);
+ GEM_BUG_ON(val > rps->max_freq);
+ GEM_BUG_ON(val < rps->min_freq);
+
+ if (rps->active) {
+ err = rps_set(rps, val, true);
+ if (err)
+ return err;
+
+ /*
+ * Make sure we continue to get interrupts
+ * until we hit the minimum or maximum frequencies.
+ */
+ if (INTEL_GEN(rps_to_i915(rps)) >= 6) {
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+
+ set(uncore,
+ GEN6_RP_INTERRUPT_LIMITS, rps_limits(rps, val));
+
+ set(uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, val));
+ }
+ }
+
+ rps->cur_freq = val;
+ return 0;
+}
+
+static void gen6_rps_init(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+
+ /* All of these values are in units of 50MHz */
+
+ /* static values from HW: RP0 > RP1 > RPn (min_freq) */
+ if (IS_GEN9_LP(i915)) {
+ u32 rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP);
+
+ rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
+ rps->rp1_freq = (rp_state_cap >> 8) & 0xff;
+ rps->min_freq = (rp_state_cap >> 0) & 0xff;
+ } else {
+ u32 rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
+
+ rps->rp0_freq = (rp_state_cap >> 0) & 0xff;
+ rps->rp1_freq = (rp_state_cap >> 8) & 0xff;
+ rps->min_freq = (rp_state_cap >> 16) & 0xff;
+ }
+
+ /* hw_max = RP0 until we check for overclocking */
+ rps->max_freq = rps->rp0_freq;
+
+ rps->efficient_freq = rps->rp1_freq;
+ if (IS_HASWELL(i915) || IS_BROADWELL(i915) ||
+ IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) {
+ u32 ddcc_status = 0;
+
+ if (sandybridge_pcode_read(i915,
+ HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
+ &ddcc_status, NULL) == 0)
+ rps->efficient_freq =
+ clamp_t(u8,
+ (ddcc_status >> 8) & 0xff,
+ rps->min_freq,
+ rps->max_freq);
+ }
+
+ if (IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) {
+ /* Store the frequency values in 16.66 MHZ units, which is
+ * the natural hardware unit for SKL
+ */
+ rps->rp0_freq *= GEN9_FREQ_SCALER;
+ rps->rp1_freq *= GEN9_FREQ_SCALER;
+ rps->min_freq *= GEN9_FREQ_SCALER;
+ rps->max_freq *= GEN9_FREQ_SCALER;
+ rps->efficient_freq *= GEN9_FREQ_SCALER;
+ }
+}
+
+static bool rps_reset(struct intel_rps *rps)
+{
+ /* force a reset */
+ rps->power.mode = -1;
+ rps->last_freq = -1;
+
+ if (rps_set(rps, rps->min_freq, true)) {
+ DRM_ERROR("Failed to reset RPS to initial values\n");
+ return false;
+ }
+
+ rps->cur_freq = rps->min_freq;
+ return true;
+}
+
+/* See the Gen9_GT_PM_Programming_Guide doc for the below */
+static bool gen9_rps_enable(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+
+ /* Program defaults and thresholds for RPS */
+ if (IS_GEN(i915, 9))
+ intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
+ GEN9_FREQUENCY(rps->rp1_freq));
+
+ /* 1 second timeout */
+ intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT,
+ GT_INTERVAL_FROM_US(i915, 1000000));
+
+ intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa);
+
+ return rps_reset(rps);
+}
+
+static bool gen8_rps_enable(struct intel_rps *rps)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+
+ intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
+ HSW_FREQUENCY(rps->rp1_freq));
+
+ /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
+ intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT,
+ 100000000 / 128); /* 1 second timeout */
+
+ intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
+
+ return rps_reset(rps);
+}
+
+static bool gen6_rps_enable(struct intel_rps *rps)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+
+ /* Power down if completely idle for over 50ms */
+ intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000);
+ intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
+
+ return rps_reset(rps);
+}
+
+static int chv_rps_max_freq(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val;
+
+ val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
+
+ switch (RUNTIME_INFO(i915)->sseu.eu_total) {
+ case 8:
+ /* (2 * 4) config */
+ val >>= FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT;
+ break;
+ case 12:
+ /* (2 * 6) config */
+ val >>= FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT;
+ break;
+ case 16:
+ /* (2 * 8) config */
+ default:
+ /* Setting (2 * 8) Min RP0 for any other combination */
+ val >>= FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT;
+ break;
+ }
+
+ return val & FB_GFX_FREQ_FUSE_MASK;
+}
+
+static int chv_rps_rpe_freq(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val;
+
+ val = vlv_punit_read(i915, PUNIT_GPU_DUTYCYCLE_REG);
+ val >>= PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT;
+
+ return val & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
+}
+
+static int chv_rps_guar_freq(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val;
+
+ val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
+
+ return val & FB_GFX_FREQ_FUSE_MASK;
+}
+
+static u32 chv_rps_min_freq(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val;
+
+ val = vlv_punit_read(i915, FB_GFX_FMIN_AT_VMIN_FUSE);
+ val >>= FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT;
+
+ return val & FB_GFX_FREQ_FUSE_MASK;
+}
+
+static bool chv_rps_enable(struct intel_rps *rps)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val;
+
+ /* 1: Program defaults and thresholds for RPS*/
+ intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000);
+ intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400);
+ intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000);
+ intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000);
+ intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000);
+
+ intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
+
+ /* 2: Enable RPS */
+ intel_uncore_write_fw(uncore, GEN6_RP_CONTROL,
+ GEN6_RP_MEDIA_HW_NORMAL_MODE |
+ GEN6_RP_MEDIA_IS_GFX |
+ GEN6_RP_ENABLE |
+ GEN6_RP_UP_BUSY_AVG |
+ GEN6_RP_DOWN_IDLE_AVG);
+
+ /* Setting Fixed Bias */
+ vlv_punit_get(i915);
+
+ val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50;
+ vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
+
+ val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
+
+ vlv_punit_put(i915);
+
+ /* RPS code assumes GPLL is used */
+ WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
+
+ DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
+ DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
+
+ return rps_reset(rps);
+}
+
+static int vlv_rps_guar_freq(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val, rp1;
+
+ val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
+
+ rp1 = val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK;
+ rp1 >>= FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
+
+ return rp1;
+}
+
+static int vlv_rps_max_freq(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val, rp0;
+
+ val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
+
+ rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
+ /* Clamp to max */
+ rp0 = min_t(u32, rp0, 0xea);
+
+ return rp0;
+}
+
+static int vlv_rps_rpe_freq(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val, rpe;
+
+ val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
+ rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
+ val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
+ rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
+
+ return rpe;
+}
+
+static int vlv_rps_min_freq(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val;
+
+ val = vlv_punit_read(i915, PUNIT_REG_GPU_LFM) & 0xff;
+ /*
+ * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
+ * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
+ * a BYT-M B0 the above register contains 0xbf. Moreover when setting
+ * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
+ * to make sure it matches what Punit accepts.
+ */
+ return max_t(u32, val, 0xc0);
+}
+
+static bool vlv_rps_enable(struct intel_rps *rps)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val;
+
+ intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000);
+ intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400);
+ intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000);
+ intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000);
+ intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000);
+
+ intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
+
+ intel_uncore_write_fw(uncore, GEN6_RP_CONTROL,
+ GEN6_RP_MEDIA_TURBO |
+ GEN6_RP_MEDIA_HW_NORMAL_MODE |
+ GEN6_RP_MEDIA_IS_GFX |
+ GEN6_RP_ENABLE |
+ GEN6_RP_UP_BUSY_AVG |
+ GEN6_RP_DOWN_IDLE_CONT);
+
+ vlv_punit_get(i915);
+
+ /* Setting Fixed Bias */
+ val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875;
+ vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
+
+ val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
+
+ vlv_punit_put(i915);
+
+ /* RPS code assumes GPLL is used */
+ WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
+
+ DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
+ DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
+
+ return rps_reset(rps);
+}
+
+static unsigned long __ips_gfx_val(struct intel_ips *ips)
+{
+ struct intel_rps *rps = container_of(ips, typeof(*rps), ips);
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ unsigned long t, corr, state1, corr2, state2;
+ u32 pxvid, ext_v;
+
+ lockdep_assert_held(&mchdev_lock);
+
+ pxvid = intel_uncore_read(uncore, PXVFREQ(rps->cur_freq));
+ pxvid = (pxvid >> 24) & 0x7f;
+ ext_v = pvid_to_extvid(rps_to_i915(rps), pxvid);
+
+ state1 = ext_v;
+
+ /* Revel in the empirically derived constants */
+
+ /* Correction factor in 1/100000 units */
+ t = ips_mch_val(uncore);
+ if (t > 80)
+ corr = t * 2349 + 135940;
+ else if (t >= 50)
+ corr = t * 964 + 29317;
+ else /* < 50 */
+ corr = t * 301 + 1004;
+
+ corr = corr * 150142 * state1 / 10000 - 78642;
+ corr /= 100000;
+ corr2 = corr * ips->corr;
+
+ state2 = corr2 * state1 / 10000;
+ state2 /= 100; /* convert to mW */
+
+ __gen5_ips_update(ips);
+
+ return ips->gfx_power + state2;
+}
+
+void intel_rps_enable(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+ if (IS_CHERRYVIEW(i915))
+ rps->enabled = chv_rps_enable(rps);
+ else if (IS_VALLEYVIEW(i915))
+ rps->enabled = vlv_rps_enable(rps);
+ else if (INTEL_GEN(i915) >= 9)
+ rps->enabled = gen9_rps_enable(rps);
+ else if (INTEL_GEN(i915) >= 8)
+ rps->enabled = gen8_rps_enable(rps);
+ else if (INTEL_GEN(i915) >= 6)
+ rps->enabled = gen6_rps_enable(rps);
+ else if (IS_IRONLAKE_M(i915))
+ rps->enabled = gen5_rps_enable(rps);
+ intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+ if (!rps->enabled)
+ return;
+
+ WARN_ON(rps->max_freq < rps->min_freq);
+ WARN_ON(rps->idle_freq > rps->max_freq);
+
+ WARN_ON(rps->efficient_freq < rps->min_freq);
+ WARN_ON(rps->efficient_freq > rps->max_freq);
+}
+
+static void gen6_rps_disable(struct intel_rps *rps)
+{
+ set(rps_to_uncore(rps), GEN6_RP_CONTROL, 0);
+}
+
+void intel_rps_disable(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+
+ rps->enabled = false;
+
+ if (INTEL_GEN(i915) >= 6)
+ gen6_rps_disable(rps);
+ else if (IS_IRONLAKE_M(i915))
+ gen5_rps_disable(rps);
+}
+
+static int byt_gpu_freq(struct intel_rps *rps, int val)
+{
+ /*
+ * N = val - 0xb7
+ * Slow = Fast = GPLL ref * N
+ */
+ return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
+}
+
+static int byt_freq_opcode(struct intel_rps *rps, int val)
+{
+ return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
+}
+
+static int chv_gpu_freq(struct intel_rps *rps, int val)
+{
+ /*
+ * N = val / 2
+ * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
+ */
+ return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
+}
+
+static int chv_freq_opcode(struct intel_rps *rps, int val)
+{
+ /* CHV needs even values */
+ return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
+}
+
+int intel_gpu_freq(struct intel_rps *rps, int val)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+
+ if (INTEL_GEN(i915) >= 9)
+ return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
+ GEN9_FREQ_SCALER);
+ else if (IS_CHERRYVIEW(i915))
+ return chv_gpu_freq(rps, val);
+ else if (IS_VALLEYVIEW(i915))
+ return byt_gpu_freq(rps, val);
+ else
+ return val * GT_FREQUENCY_MULTIPLIER;
+}
+
+int intel_freq_opcode(struct intel_rps *rps, int val)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+
+ if (INTEL_GEN(i915) >= 9)
+ return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
+ GT_FREQUENCY_MULTIPLIER);
+ else if (IS_CHERRYVIEW(i915))
+ return chv_freq_opcode(rps, val);
+ else if (IS_VALLEYVIEW(i915))
+ return byt_freq_opcode(rps, val);
+ else
+ return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
+}
+
+static void vlv_init_gpll_ref_freq(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+
+ rps->gpll_ref_freq =
+ vlv_get_cck_clock(i915, "GPLL ref",
+ CCK_GPLL_CLOCK_CONTROL,
+ i915->czclk_freq);
+
+ DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", rps->gpll_ref_freq);
+}
+
+static void vlv_rps_init(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val;
+
+ vlv_iosf_sb_get(i915,
+ BIT(VLV_IOSF_SB_PUNIT) |
+ BIT(VLV_IOSF_SB_NC) |
+ BIT(VLV_IOSF_SB_CCK));
+
+ vlv_init_gpll_ref_freq(rps);
+
+ val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
+ switch ((val >> 6) & 3) {
+ case 0:
+ case 1:
+ i915->mem_freq = 800;
+ break;
+ case 2:
+ i915->mem_freq = 1066;
+ break;
+ case 3:
+ i915->mem_freq = 1333;
+ break;
+ }
+ DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq);
+
+ rps->max_freq = vlv_rps_max_freq(rps);
+ rps->rp0_freq = rps->max_freq;
+ DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->max_freq),
+ rps->max_freq);
+
+ rps->efficient_freq = vlv_rps_rpe_freq(rps);
+ DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->efficient_freq),
+ rps->efficient_freq);
+
+ rps->rp1_freq = vlv_rps_guar_freq(rps);
+ DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->rp1_freq),
+ rps->rp1_freq);
+
+ rps->min_freq = vlv_rps_min_freq(rps);
+ DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->min_freq),
+ rps->min_freq);
+
+ vlv_iosf_sb_put(i915,
+ BIT(VLV_IOSF_SB_PUNIT) |
+ BIT(VLV_IOSF_SB_NC) |
+ BIT(VLV_IOSF_SB_CCK));
+}
+
+static void chv_rps_init(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val;
+
+ vlv_iosf_sb_get(i915,
+ BIT(VLV_IOSF_SB_PUNIT) |
+ BIT(VLV_IOSF_SB_NC) |
+ BIT(VLV_IOSF_SB_CCK));
+
+ vlv_init_gpll_ref_freq(rps);
+
+ val = vlv_cck_read(i915, CCK_FUSE_REG);
+
+ switch ((val >> 2) & 0x7) {
+ case 3:
+ i915->mem_freq = 2000;
+ break;
+ default:
+ i915->mem_freq = 1600;
+ break;
+ }
+ DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq);
+
+ rps->max_freq = chv_rps_max_freq(rps);
+ rps->rp0_freq = rps->max_freq;
+ DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->max_freq),
+ rps->max_freq);
+
+ rps->efficient_freq = chv_rps_rpe_freq(rps);
+ DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->efficient_freq),
+ rps->efficient_freq);
+
+ rps->rp1_freq = chv_rps_guar_freq(rps);
+ DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->rp1_freq),
+ rps->rp1_freq);
+
+ rps->min_freq = chv_rps_min_freq(rps);
+ DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->min_freq),
+ rps->min_freq);
+
+ vlv_iosf_sb_put(i915,
+ BIT(VLV_IOSF_SB_PUNIT) |
+ BIT(VLV_IOSF_SB_NC) |
+ BIT(VLV_IOSF_SB_CCK));
+
+ WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
+ rps->min_freq) & 1,
+ "Odd GPU freq values\n");
+}
+
+static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei)
+{
+ ei->ktime = ktime_get_raw();
+ ei->render_c0 = intel_uncore_read(uncore, VLV_RENDER_C0_COUNT);
+ ei->media_c0 = intel_uncore_read(uncore, VLV_MEDIA_C0_COUNT);
+}
+
+static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ const struct intel_rps_ei *prev = &rps->ei;
+ struct intel_rps_ei now;
+ u32 events = 0;
+
+ if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
+ return 0;
+
+ vlv_c0_read(uncore, &now);
+
+ if (prev->ktime) {
+ u64 time, c0;
+ u32 render, media;
+
+ time = ktime_us_delta(now.ktime, prev->ktime);
+
+ time *= rps_to_i915(rps)->czclk_freq;
+
+ /* Workload can be split between render + media,
+ * e.g. SwapBuffers being blitted in X after being rendered in
+ * mesa. To account for this we need to combine both engines
+ * into our activity counter.
+ */
+ render = now.render_c0 - prev->render_c0;
+ media = now.media_c0 - prev->media_c0;
+ c0 = max(render, media);
+ c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */
+
+ if (c0 > time * rps->power.up_threshold)
+ events = GEN6_PM_RP_UP_THRESHOLD;
+ else if (c0 < time * rps->power.down_threshold)
+ events = GEN6_PM_RP_DOWN_THRESHOLD;
+ }
+
+ rps->ei = now;
+ return events;
+}
+
+static void rps_work(struct work_struct *work)
+{
+ struct intel_rps *rps = container_of(work, typeof(*rps), work);
+ struct intel_gt *gt = rps_to_gt(rps);
+ bool client_boost = false;
+ int new_freq, adj, min, max;
+ u32 pm_iir = 0;
+
+ spin_lock_irq(&gt->irq_lock);
+ pm_iir = fetch_and_zero(&rps->pm_iir);
+ client_boost = atomic_read(&rps->num_waiters);
+ spin_unlock_irq(&gt->irq_lock);
+
+ /* Make sure we didn't queue anything we're not going to process. */
+ if ((pm_iir & rps->pm_events) == 0 && !client_boost)
+ goto out;
+
+ mutex_lock(&rps->lock);
+
+ pm_iir |= vlv_wa_c0_ei(rps, pm_iir);
+
+ adj = rps->last_adj;
+ new_freq = rps->cur_freq;
+ min = rps->min_freq_softlimit;
+ max = rps->max_freq_softlimit;
+ if (client_boost)
+ max = rps->max_freq;
+ if (client_boost && new_freq < rps->boost_freq) {
+ new_freq = rps->boost_freq;
+ adj = 0;
+ } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
+ if (adj > 0)
+ adj *= 2;
+ else /* CHV needs even encode values */
+ adj = IS_CHERRYVIEW(gt->i915) ? 2 : 1;
+
+ if (new_freq >= rps->max_freq_softlimit)
+ adj = 0;
+ } else if (client_boost) {
+ adj = 0;
+ } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
+ if (rps->cur_freq > rps->efficient_freq)
+ new_freq = rps->efficient_freq;
+ else if (rps->cur_freq > rps->min_freq_softlimit)
+ new_freq = rps->min_freq_softlimit;
+ adj = 0;
+ } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
+ if (adj < 0)
+ adj *= 2;
+ else /* CHV needs even encode values */
+ adj = IS_CHERRYVIEW(gt->i915) ? -2 : -1;
+
+ if (new_freq <= rps->min_freq_softlimit)
+ adj = 0;
+ } else { /* unknown event */
+ adj = 0;
+ }
+
+ rps->last_adj = adj;
+
+ /*
+ * Limit deboosting and boosting to keep ourselves at the extremes
+ * when in the respective power modes (i.e. slowly decrease frequencies
+ * while in the HIGH_POWER zone and slowly increase frequencies while
+ * in the LOW_POWER zone). On idle, we will hit the timeout and drop
+ * to the next level quickly, and conversely if busy we expect to
+ * hit a waitboost and rapidly switch into max power.
+ */
+ if ((adj < 0 && rps->power.mode == HIGH_POWER) ||
+ (adj > 0 && rps->power.mode == LOW_POWER))
+ rps->last_adj = 0;
+
+ /* sysfs frequency interfaces may have snuck in while servicing the
+ * interrupt
+ */
+ new_freq += adj;
+ new_freq = clamp_t(int, new_freq, min, max);
+
+ if (intel_rps_set(rps, new_freq)) {
+ DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
+ rps->last_adj = 0;
+ }
+
+ mutex_unlock(&rps->lock);
+
+out:
+ spin_lock_irq(&gt->irq_lock);
+ gen6_gt_pm_unmask_irq(gt, rps->pm_events);
+ spin_unlock_irq(&gt->irq_lock);
+}
+
+void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
+{
+ struct intel_gt *gt = rps_to_gt(rps);
+ const u32 events = rps->pm_events & pm_iir;
+
+ lockdep_assert_held(&gt->irq_lock);
+
+ if (unlikely(!events))
+ return;
+
+ gen6_gt_pm_mask_irq(gt, events);
+
+ rps->pm_iir |= events;
+ schedule_work(&rps->work);
+}
+
+void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
+{
+ struct intel_gt *gt = rps_to_gt(rps);
+
+ if (pm_iir & rps->pm_events) {
+ spin_lock(&gt->irq_lock);
+ gen6_gt_pm_mask_irq(gt, pm_iir & rps->pm_events);
+ rps->pm_iir |= pm_iir & rps->pm_events;
+ schedule_work(&rps->work);
+ spin_unlock(&gt->irq_lock);
+ }
+
+ if (INTEL_GEN(gt->i915) >= 8)
+ return;
+
+ if (pm_iir & PM_VEBOX_USER_INTERRUPT)
+ intel_engine_signal_breadcrumbs(gt->engine[VECS0]);
+
+ if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
+ DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
+}
+
+void gen5_rps_irq_handler(struct intel_rps *rps)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ u32 busy_up, busy_down, max_avg, min_avg;
+ u8 new_freq;
+
+ spin_lock(&mchdev_lock);
+
+ intel_uncore_write16(uncore,
+ MEMINTRSTS,
+ intel_uncore_read(uncore, MEMINTRSTS));
+
+ intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
+ busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG);
+ busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG);
+ max_avg = intel_uncore_read(uncore, RCBMAXAVG);
+ min_avg = intel_uncore_read(uncore, RCBMINAVG);
+
+ /* Handle RCS change request from hw */
+ new_freq = rps->cur_freq;
+ if (busy_up > max_avg)
+ new_freq++;
+ else if (busy_down < min_avg)
+ new_freq--;
+ new_freq = clamp(new_freq,
+ rps->min_freq_softlimit,
+ rps->max_freq_softlimit);
+
+ if (new_freq != rps->cur_freq && gen5_rps_set(rps, new_freq))
+ rps->cur_freq = new_freq;
+
+ spin_unlock(&mchdev_lock);
+}
+
+void intel_rps_init_early(struct intel_rps *rps)
+{
+ mutex_init(&rps->lock);
+ mutex_init(&rps->power.mutex);
+
+ INIT_WORK(&rps->work, rps_work);
+
+ atomic_set(&rps->num_waiters, 0);
+}
+
+void intel_rps_init(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+
+ if (IS_CHERRYVIEW(i915))
+ chv_rps_init(rps);
+ else if (IS_VALLEYVIEW(i915))
+ vlv_rps_init(rps);
+ else if (INTEL_GEN(i915) >= 6)
+ gen6_rps_init(rps);
+ else if (IS_IRONLAKE_M(i915))
+ gen5_rps_init(rps);
+
+ /* Derive initial user preferences/limits from the hardware limits */
+ rps->max_freq_softlimit = rps->max_freq;
+ rps->min_freq_softlimit = rps->min_freq;
+
+ /* After setting max-softlimit, find the overclock max freq */
+ if (IS_GEN(i915, 6) || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) {
+ u32 params = 0;
+
+ sandybridge_pcode_read(i915, GEN6_READ_OC_PARAMS,
+ &params, NULL);
+ if (params & BIT(31)) { /* OC supported */
+ DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
+ (rps->max_freq & 0xff) * 50,
+ (params & 0xff) * 50);
+ rps->max_freq = params & 0xff;
+ }
+ }
+
+ /* Finally allow us to boost to max by default */
+ rps->boost_freq = rps->max_freq;
+ rps->idle_freq = rps->min_freq;
+ rps->cur_freq = rps->idle_freq;
+
+ rps->pm_intrmsk_mbz = 0;
+
+ /*
+ * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer
+ * if GEN6_PM_UP_EI_EXPIRED is masked.
+ *
+ * TODO: verify if this can be reproduced on VLV,CHV.
+ */
+ if (INTEL_GEN(i915) <= 7)
+ rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED;
+
+ if (INTEL_GEN(i915) >= 8 && INTEL_GEN(i915) < 11)
+ rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
+}
+
+u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 cagf;
+
+ if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+ cagf = (rpstat >> 8) & 0xff;
+ else if (INTEL_GEN(i915) >= 9)
+ cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
+ else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
+ cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
+ else
+ cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
+
+ return cagf;
+}
+
+static u32 read_cagf(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 freq;
+
+ if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
+ vlv_punit_get(i915);
+ freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
+ vlv_punit_put(i915);
+ } else {
+ freq = intel_uncore_read(rps_to_gt(rps)->uncore, GEN6_RPSTAT1);
+ }
+
+ return intel_rps_get_cagf(rps, freq);
+}
+
+u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
+{
+ struct intel_runtime_pm *rpm = rps_to_gt(rps)->uncore->rpm;
+ intel_wakeref_t wakeref;
+ u32 freq = 0;
+
+ with_intel_runtime_pm_if_in_use(rpm, wakeref)
+ freq = intel_gpu_freq(rps, read_cagf(rps));
+
+ return freq;
+}
+
+/* External interface for intel_ips.ko */
+
+static struct drm_i915_private __rcu *ips_mchdev;
+
+/**
+ * Tells the intel_ips driver that the i915 driver is now loaded, if
+ * IPS got loaded first.
+ *
+ * This awkward dance is so that neither module has to depend on the
+ * other in order for IPS to do the appropriate communication of
+ * GPU turbo limits to i915.
+ */
+static void
+ips_ping_for_i915_load(void)
+{
+ void (*link)(void);
+
+ link = symbol_get(ips_link_to_i915_driver);
+ if (link) {
+ link();
+ symbol_put(ips_link_to_i915_driver);
+ }
+}
+
+void intel_rps_driver_register(struct intel_rps *rps)
+{
+ struct intel_gt *gt = rps_to_gt(rps);
+
+ /*
+ * We only register the i915 ips part with intel-ips once everything is
+ * set up, to avoid intel-ips sneaking in and reading bogus values.
+ */
+ if (IS_GEN(gt->i915, 5)) {
+ GEM_BUG_ON(ips_mchdev);
+ rcu_assign_pointer(ips_mchdev, gt->i915);
+ ips_ping_for_i915_load();
+ }
+}
+
+void intel_rps_driver_unregister(struct intel_rps *rps)
+{
+ if (rcu_access_pointer(ips_mchdev) == rps_to_i915(rps))
+ rcu_assign_pointer(ips_mchdev, NULL);
+}
+
+static struct drm_i915_private *mchdev_get(void)
+{
+ struct drm_i915_private *i915;
+
+ rcu_read_lock();
+ i915 = rcu_dereference(ips_mchdev);
+ if (!kref_get_unless_zero(&i915->drm.ref))
+ i915 = NULL;
+ rcu_read_unlock();
+
+ return i915;
+}
+
+/**
+ * i915_read_mch_val - return value for IPS use
+ *
+ * Calculate and return a value for the IPS driver to use when deciding whether
+ * we have thermal and power headroom to increase CPU or GPU power budget.
+ */
+unsigned long i915_read_mch_val(void)
+{
+ struct drm_i915_private *i915;
+ unsigned long chipset_val = 0;
+ unsigned long graphics_val = 0;
+ intel_wakeref_t wakeref;
+
+ i915 = mchdev_get();
+ if (!i915)
+ return 0;
+
+ with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
+ struct intel_ips *ips = &i915->gt.rps.ips;
+
+ spin_lock_irq(&mchdev_lock);
+ chipset_val = __ips_chipset_val(ips);
+ graphics_val = __ips_gfx_val(ips);
+ spin_unlock_irq(&mchdev_lock);
+ }
+
+ drm_dev_put(&i915->drm);
+ return chipset_val + graphics_val;
+}
+EXPORT_SYMBOL_GPL(i915_read_mch_val);
+
+/**
+ * i915_gpu_raise - raise GPU frequency limit
+ *
+ * Raise the limit; IPS indicates we have thermal headroom.
+ */
+bool i915_gpu_raise(void)
+{
+ struct drm_i915_private *i915;
+ struct intel_rps *rps;
+
+ i915 = mchdev_get();
+ if (!i915)
+ return false;
+
+ rps = &i915->gt.rps;
+
+ spin_lock_irq(&mchdev_lock);
+ if (rps->max_freq_softlimit < rps->max_freq)
+ rps->max_freq_softlimit++;
+ spin_unlock_irq(&mchdev_lock);
+
+ drm_dev_put(&i915->drm);
+ return true;
+}
+EXPORT_SYMBOL_GPL(i915_gpu_raise);
+
+/**
+ * i915_gpu_lower - lower GPU frequency limit
+ *
+ * IPS indicates we're close to a thermal limit, so throttle back the GPU
+ * frequency maximum.
+ */
+bool i915_gpu_lower(void)
+{
+ struct drm_i915_private *i915;
+ struct intel_rps *rps;
+
+ i915 = mchdev_get();
+ if (!i915)
+ return false;
+
+ rps = &i915->gt.rps;
+
+ spin_lock_irq(&mchdev_lock);
+ if (rps->max_freq_softlimit > rps->min_freq)
+ rps->max_freq_softlimit--;
+ spin_unlock_irq(&mchdev_lock);
+
+ drm_dev_put(&i915->drm);
+ return true;
+}
+EXPORT_SYMBOL_GPL(i915_gpu_lower);
+
+/**
+ * i915_gpu_busy - indicate GPU business to IPS
+ *
+ * Tell the IPS driver whether or not the GPU is busy.
+ */
+bool i915_gpu_busy(void)
+{
+ struct drm_i915_private *i915;
+ bool ret;
+
+ i915 = mchdev_get();
+ if (!i915)
+ return false;
+
+ ret = i915->gt.awake;
+
+ drm_dev_put(&i915->drm);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(i915_gpu_busy);
+
+/**
+ * i915_gpu_turbo_disable - disable graphics turbo
+ *
+ * Disable graphics turbo by resetting the max frequency and setting the
+ * current frequency to the default.
+ */
+bool i915_gpu_turbo_disable(void)
+{
+ struct drm_i915_private *i915;
+ struct intel_rps *rps;
+ bool ret;
+
+ i915 = mchdev_get();
+ if (!i915)
+ return false;
+
+ rps = &i915->gt.rps;
+
+ spin_lock_irq(&mchdev_lock);
+ rps->max_freq_softlimit = rps->min_freq;
+ ret = gen5_rps_set(&i915->gt.rps, rps->min_freq);
+ spin_unlock_irq(&mchdev_lock);
+
+ drm_dev_put(&i915->drm);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h
new file mode 100644
index 000000000000..dfa98194f3b2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_rps.h
@@ -0,0 +1,39 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_RPS_H
+#define INTEL_RPS_H
+
+#include "intel_rps_types.h"
+
+struct i915_request;
+
+void intel_rps_init_early(struct intel_rps *rps);
+void intel_rps_init(struct intel_rps *rps);
+
+void intel_rps_driver_register(struct intel_rps *rps);
+void intel_rps_driver_unregister(struct intel_rps *rps);
+
+void intel_rps_enable(struct intel_rps *rps);
+void intel_rps_disable(struct intel_rps *rps);
+
+void intel_rps_park(struct intel_rps *rps);
+void intel_rps_unpark(struct intel_rps *rps);
+void intel_rps_boost(struct i915_request *rq);
+
+int intel_rps_set(struct intel_rps *rps, u8 val);
+void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive);
+
+int intel_gpu_freq(struct intel_rps *rps, int val);
+int intel_freq_opcode(struct intel_rps *rps, int val);
+u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1);
+u32 intel_rps_read_actual_frequency(struct intel_rps *rps);
+
+void gen5_rps_irq_handler(struct intel_rps *rps);
+void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
+void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
+
+#endif /* INTEL_RPS_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_rps_types.h b/drivers/gpu/drm/i915/gt/intel_rps_types.h
new file mode 100644
index 000000000000..c2e279154bd5
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_rps_types.h
@@ -0,0 +1,93 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_RPS_TYPES_H
+#define INTEL_RPS_TYPES_H
+
+#include <linux/atomic.h>
+#include <linux/ktime.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
+struct intel_ips {
+ u64 last_count1;
+ unsigned long last_time1;
+ unsigned long chipset_power;
+ u64 last_count2;
+ u64 last_time2;
+ unsigned long gfx_power;
+ u8 corr;
+
+ int c, m;
+};
+
+struct intel_rps_ei {
+ ktime_t ktime;
+ u32 render_c0;
+ u32 media_c0;
+};
+
+struct intel_rps {
+ struct mutex lock; /* protects enabling and the worker */
+
+ /*
+ * work, interrupts_enabled and pm_iir are protected by
+ * dev_priv->irq_lock
+ */
+ struct work_struct work;
+ bool enabled;
+ bool active;
+ u32 pm_iir;
+
+ /* PM interrupt bits that should never be masked */
+ u32 pm_intrmsk_mbz;
+ u32 pm_events;
+
+ /* Frequencies are stored in potentially platform dependent multiples.
+ * In other words, *_freq needs to be multiplied by X to be interesting.
+ * Soft limits are those which are used for the dynamic reclocking done
+ * by the driver (raise frequencies under heavy loads, and lower for
+ * lighter loads). Hard limits are those imposed by the hardware.
+ *
+ * A distinction is made for overclocking, which is never enabled by
+ * default, and is considered to be above the hard limit if it's
+ * possible at all.
+ */
+ u8 cur_freq; /* Current frequency (cached, may not == HW) */
+ u8 last_freq; /* Last SWREQ frequency */
+ u8 min_freq_softlimit; /* Minimum frequency permitted by the driver */
+ u8 max_freq_softlimit; /* Max frequency permitted by the driver */
+ u8 max_freq; /* Maximum frequency, RP0 if not overclocking */
+ u8 min_freq; /* AKA RPn. Minimum frequency */
+ u8 boost_freq; /* Frequency to request when wait boosting */
+ u8 idle_freq; /* Frequency to request when we are idle */
+ u8 efficient_freq; /* AKA RPe. Pre-determined balanced frequency */
+ u8 rp1_freq; /* "less than" RP0 power/freqency */
+ u8 rp0_freq; /* Non-overclocked max frequency. */
+ u16 gpll_ref_freq; /* vlv/chv GPLL reference frequency */
+
+ int last_adj;
+
+ struct {
+ struct mutex mutex;
+
+ enum { LOW_POWER, BETWEEN, HIGH_POWER } mode;
+ unsigned int interactive;
+
+ u8 up_threshold; /* Current %busy required to uplock */
+ u8 down_threshold; /* Current %busy required to downclock */
+ } power;
+
+ atomic_t num_waiters;
+ atomic_t boosts;
+
+ /* manual wa residency calculations */
+ struct intel_rps_ei ei;
+ struct intel_ips ips;
+};
+
+#endif /* INTEL_RPS_TYPES_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 0f959694303c..ee5dc4fbdeb9 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -4,17 +4,20 @@
* Copyright © 2016-2018 Intel Corporation
*/
-#include "gt/intel_gt_types.h"
-
#include "i915_drv.h"
#include "i915_active.h"
#include "i915_syncmap.h"
-#include "gt/intel_timeline.h"
+#include "intel_gt.h"
+#include "intel_ring.h"
+#include "intel_timeline.h"
#define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
#define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
+#define CACHELINE_BITS 6
+#define CACHELINE_FREE CACHELINE_BITS
+
struct intel_timeline_hwsp {
struct intel_gt *gt;
struct intel_gt_timelines *gt_timelines;
@@ -23,14 +26,6 @@ struct intel_timeline_hwsp {
u64 free_bitmap;
};
-struct intel_timeline_cacheline {
- struct i915_active active;
- struct intel_timeline_hwsp *hwsp;
- void *vaddr;
-#define CACHELINE_BITS 6
-#define CACHELINE_FREE CACHELINE_BITS
-};
-
static struct i915_vma *__hwsp_alloc(struct intel_gt *gt)
{
struct drm_i915_private *i915 = gt->i915;
@@ -133,7 +128,7 @@ static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
__idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
i915_active_fini(&cl->active);
- kfree(cl);
+ kfree_rcu(cl, rcu);
}
__i915_active_call
@@ -254,7 +249,7 @@ int intel_timeline_init(struct intel_timeline *timeline,
mutex_init(&timeline->mutex);
- INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex);
+ INIT_ACTIVE_FENCE(&timeline->last_request);
INIT_LIST_HEAD(&timeline->requests);
i915_syncmap_init(&timeline->sync);
@@ -262,7 +257,7 @@ int intel_timeline_init(struct intel_timeline *timeline,
return 0;
}
-static void timelines_init(struct intel_gt *gt)
+void intel_gt_init_timelines(struct intel_gt *gt)
{
struct intel_gt_timelines *timelines = &gt->timelines;
@@ -273,15 +268,11 @@ static void timelines_init(struct intel_gt *gt)
INIT_LIST_HEAD(&timelines->hwsp_free_list);
}
-void intel_timelines_init(struct drm_i915_private *i915)
-{
- timelines_init(&i915->gt);
-}
-
void intel_timeline_fini(struct intel_timeline *timeline)
{
GEM_BUG_ON(atomic_read(&timeline->pin_count));
GEM_BUG_ON(!list_empty(&timeline->requests));
+ GEM_BUG_ON(timeline->retire);
if (timeline->hwsp_cacheline)
cacheline_free(timeline->hwsp_cacheline);
@@ -337,34 +328,52 @@ int intel_timeline_pin(struct intel_timeline *tl)
void intel_timeline_enter(struct intel_timeline *tl)
{
struct intel_gt_timelines *timelines = &tl->gt->timelines;
- unsigned long flags;
+ /*
+ * Pretend we are serialised by the timeline->mutex.
+ *
+ * While generally true, there are a few exceptions to the rule
+ * for the engine->kernel_context being used to manage power
+ * transitions. As the engine_park may be called from under any
+ * timeline, it uses the power mutex as a global serialisation
+ * lock to prevent any other request entering its timeline.
+ *
+ * The rule is generally tl->mutex, otherwise engine->wakeref.mutex.
+ *
+ * However, intel_gt_retire_request() does not know which engine
+ * it is retiring along and so cannot partake in the engine-pm
+ * barrier, and there we use the tl->active_count as a means to
+ * pin the timeline in the active_list while the locks are dropped.
+ * Ergo, as that is outside of the engine-pm barrier, we need to
+ * use atomic to manipulate tl->active_count.
+ */
lockdep_assert_held(&tl->mutex);
-
GEM_BUG_ON(!atomic_read(&tl->pin_count));
- if (tl->active_count++)
+
+ if (atomic_add_unless(&tl->active_count, 1, 0))
return;
- GEM_BUG_ON(!tl->active_count); /* overflow? */
- spin_lock_irqsave(&timelines->lock, flags);
- list_add(&tl->link, &timelines->active_list);
- spin_unlock_irqrestore(&timelines->lock, flags);
+ spin_lock(&timelines->lock);
+ if (!atomic_fetch_inc(&tl->active_count))
+ list_add_tail(&tl->link, &timelines->active_list);
+ spin_unlock(&timelines->lock);
}
void intel_timeline_exit(struct intel_timeline *tl)
{
struct intel_gt_timelines *timelines = &tl->gt->timelines;
- unsigned long flags;
+ /* See intel_timeline_enter() */
lockdep_assert_held(&tl->mutex);
- GEM_BUG_ON(!tl->active_count);
- if (--tl->active_count)
+ GEM_BUG_ON(!atomic_read(&tl->active_count));
+ if (atomic_add_unless(&tl->active_count, -1, 1))
return;
- spin_lock_irqsave(&timelines->lock, flags);
- list_del(&tl->link);
- spin_unlock_irqrestore(&timelines->lock, flags);
+ spin_lock(&timelines->lock);
+ if (atomic_dec_and_test(&tl->active_count))
+ list_del(&tl->link);
+ spin_unlock(&timelines->lock);
/*
* Since this timeline is idle, all bariers upon which we were waiting
@@ -500,46 +509,35 @@ int intel_timeline_read_hwsp(struct i915_request *from,
struct i915_request *to,
u32 *hwsp)
{
- struct intel_timeline *tl;
+ struct intel_timeline_cacheline *cl;
int err;
+ GEM_BUG_ON(!rcu_access_pointer(from->hwsp_cacheline));
+
rcu_read_lock();
- tl = rcu_dereference(from->timeline);
- if (i915_request_completed(from) || !kref_get_unless_zero(&tl->kref))
- tl = NULL;
+ cl = rcu_dereference(from->hwsp_cacheline);
+ if (unlikely(!i915_active_acquire_if_busy(&cl->active)))
+ goto unlock; /* seqno wrapped and completed! */
+ if (unlikely(i915_request_completed(from)))
+ goto release;
rcu_read_unlock();
- if (!tl) /* already completed */
- return 1;
- GEM_BUG_ON(rcu_access_pointer(to->timeline) == tl);
-
- err = -EBUSY;
- if (mutex_trylock(&tl->mutex)) {
- struct intel_timeline_cacheline *cl = from->hwsp_cacheline;
-
- if (i915_request_completed(from)) {
- err = 1;
- goto unlock;
- }
+ err = cacheline_ref(cl, to);
+ if (err)
+ goto out;
- err = cacheline_ref(cl, to);
- if (err)
- goto unlock;
+ *hwsp = i915_ggtt_offset(cl->hwsp->vma) +
+ ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES;
- if (likely(cl == tl->hwsp_cacheline)) {
- *hwsp = tl->hwsp_offset;
- } else { /* across a seqno wrap, recover the original offset */
- *hwsp = i915_ggtt_offset(cl->hwsp->vma) +
- ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) *
- CACHELINE_BYTES;
- }
+out:
+ i915_active_release(&cl->active);
+ return err;
+release:
+ i915_active_release(&cl->active);
unlock:
- mutex_unlock(&tl->mutex);
- }
- intel_timeline_put(tl);
-
- return err;
+ rcu_read_unlock();
+ return 1;
}
void intel_timeline_unpin(struct intel_timeline *tl)
@@ -562,7 +560,7 @@ void __intel_timeline_free(struct kref *kref)
kfree_rcu(timeline, rcu);
}
-static void timelines_fini(struct intel_gt *gt)
+void intel_gt_fini_timelines(struct intel_gt *gt)
{
struct intel_gt_timelines *timelines = &gt->timelines;
@@ -570,11 +568,6 @@ static void timelines_fini(struct intel_gt *gt)
GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list));
}
-void intel_timelines_fini(struct drm_i915_private *i915)
-{
- timelines_fini(&i915->gt);
-}
-
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "gt/selftests/mock_timeline.c"
#include "gt/selftest_timeline.c"
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h
index f583af1ba18d..f5b7eade3809 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.h
@@ -88,7 +88,7 @@ int intel_timeline_read_hwsp(struct i915_request *from,
struct i915_request *until,
u32 *hwsp_offset);
-void intel_timelines_init(struct drm_i915_private *i915);
-void intel_timelines_fini(struct drm_i915_private *i915);
+void intel_gt_init_timelines(struct intel_gt *gt);
+void intel_gt_fini_timelines(struct intel_gt *gt);
#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
index 98d9ee166379..02181c5020db 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
@@ -10,14 +10,15 @@
#include <linux/list.h>
#include <linux/kref.h>
#include <linux/mutex.h>
+#include <linux/rcupdate.h>
#include <linux/types.h>
#include "i915_active_types.h"
-struct drm_i915_private;
struct i915_vma;
-struct intel_timeline_cacheline;
struct i915_syncmap;
+struct intel_gt;
+struct intel_timeline_hwsp;
struct intel_timeline {
u64 fence_context;
@@ -42,7 +43,7 @@ struct intel_timeline {
* from the intel_context caller plus internal atomicity.
*/
atomic_t pin_count;
- unsigned int active_count;
+ atomic_t active_count;
const u32 *hwsp_seqno;
struct i915_vma *hwsp_ggtt;
@@ -66,6 +67,9 @@ struct intel_timeline {
*/
struct i915_active_fence last_request;
+ /** A chain of completed timelines ready for early retirement. */
+ struct intel_timeline *retire;
+
/**
* We track the most recent seqno that we wait on in every context so
* that we only have to emit a new await and dependency on a more
@@ -84,4 +88,13 @@ struct intel_timeline {
struct rcu_head rcu;
};
+struct intel_timeline_cacheline {
+ struct i915_active active;
+
+ struct intel_timeline_hwsp *hwsp;
+ void *vaddr;
+
+ struct rcu_head rcu;
+};
+
#endif /* __I915_TIMELINE_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index af8a8183154a..195ccf7db272 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -6,7 +6,9 @@
#include "i915_drv.h"
#include "intel_context.h"
+#include "intel_engine_pm.h"
#include "intel_gt.h"
+#include "intel_ring.h"
#include "intel_workarounds.h"
/**
@@ -145,21 +147,27 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
}
}
-static void
-wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
- u32 val)
+static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
+ u32 val, u32 read_mask)
{
struct i915_wa wa = {
.reg = reg,
.mask = mask,
.val = val,
- .read = mask,
+ .read = read_mask,
};
_wa_add(wal, &wa);
}
static void
+wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
+ u32 val)
+{
+ wa_add(wal, reg, mask, val, mask);
+}
+
+static void
wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
{
wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val));
@@ -567,9 +575,24 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
+ u32 val;
+
/* Wa_1409142259:tgl */
WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
+
+ /* Wa_1604555607:tgl */
+ val = intel_uncore_read(engine->uncore, FF_MODE2);
+ val &= ~FF_MODE2_TDS_TIMER_MASK;
+ val |= FF_MODE2_TDS_TIMER_128;
+ /*
+ * FIXME: FF_MODE2 register is not readable till TGL B0. We can
+ * enable verification of WA from the later steppings, which enables
+ * the read of FF_MODE2.
+ */
+ wa_add(wal, FF_MODE2, FF_MODE2_TDS_TIMER_MASK, val,
+ IS_TGL_REVID(engine->i915, TGL_REVID_A0, TGL_REVID_A0) ? 0 :
+ FF_MODE2_TDS_TIMER_MASK);
}
static void
@@ -1215,6 +1238,26 @@ static void icl_whitelist_build(struct intel_engine_cs *engine)
static void tgl_whitelist_build(struct intel_engine_cs *engine)
{
+ struct i915_wa_list *w = &engine->whitelist;
+
+ switch (engine->class) {
+ case RENDER_CLASS:
+ /*
+ * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl
+ *
+ * This covers 4 registers which are next to one another :
+ * - PS_INVOCATION_COUNT
+ * - PS_INVOCATION_COUNT_UDW
+ * - PS_DEPTH_COUNT
+ * - PS_DEPTH_COUNT_UDW
+ */
+ whitelist_reg_ext(w, PS_INVOCATION_COUNT,
+ RING_FORCE_TO_NONPRIV_ACCESS_RD |
+ RING_FORCE_TO_NONPRIV_RANGE_4);
+ break;
+ default:
+ break;
+ }
}
void intel_engine_init_whitelist(struct intel_engine_cs *engine)
@@ -1294,6 +1337,14 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN6_RC_SLEEP_PSMI_CONTROL,
GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
GEN8_RC_SEMA_IDLE_MSG_DISABLE);
+
+ /*
+ * Wa_1606679103:tgl
+ * (see also Wa_1606682166:icl)
+ */
+ wa_write_or(wal,
+ GEN7_SARCHKMD,
+ GEN7_DISABLE_SAMPLER_PREFETCH);
}
if (IS_GEN(i915, 11)) {
@@ -1553,7 +1604,9 @@ static int engine_wa_list_verify(struct intel_context *ce,
if (IS_ERR(vma))
return PTR_ERR(vma);
+ intel_engine_pm_get(ce->engine);
rq = intel_context_create_request(ce);
+ intel_engine_pm_put(ce->engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_vma;
@@ -1563,16 +1616,17 @@ static int engine_wa_list_verify(struct intel_context *ce,
if (err)
goto err_vma;
+ i915_request_get(rq);
i915_request_add(rq);
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
err = -ETIME;
- goto err_vma;
+ goto err_rq;
}
results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
if (IS_ERR(results)) {
err = PTR_ERR(results);
- goto err_vma;
+ goto err_rq;
}
err = 0;
@@ -1586,6 +1640,8 @@ static int engine_wa_list_verify(struct intel_context *ce,
i915_gem_object_unpin_map(vma->obj);
+err_rq:
+ i915_request_put(rq);
err_vma:
i915_vma_unpin(vma);
i915_vma_put(vma);
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 123db2c3f956..4e1eafa94be9 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -23,6 +23,7 @@
*/
#include "gem/i915_gem_context.h"
+#include "gt/intel_ring.h"
#include "i915_drv.h"
#include "intel_context.h"
@@ -76,7 +77,7 @@ static void advance(struct i915_request *request)
i915_request_mark_complete(request);
GEM_BUG_ON(!i915_request_completed(request));
- intel_engine_queue_breadcrumbs(request->engine);
+ intel_engine_signal_breadcrumbs(request->engine);
}
static void hw_delay_complete(struct timer_list *t)
@@ -206,16 +207,12 @@ static void mock_reset_prepare(struct intel_engine_cs *engine)
{
}
-static void mock_reset(struct intel_engine_cs *engine, bool stalled)
+static void mock_reset_rewind(struct intel_engine_cs *engine, bool stalled)
{
GEM_BUG_ON(stalled);
}
-static void mock_reset_finish(struct intel_engine_cs *engine)
-{
-}
-
-static void mock_cancel_requests(struct intel_engine_cs *engine)
+static void mock_reset_cancel(struct intel_engine_cs *engine)
{
struct i915_request *request;
unsigned long flags;
@@ -233,6 +230,24 @@ static void mock_cancel_requests(struct intel_engine_cs *engine)
spin_unlock_irqrestore(&engine->active.lock, flags);
}
+static void mock_reset_finish(struct intel_engine_cs *engine)
+{
+}
+
+static void mock_engine_release(struct intel_engine_cs *engine)
+{
+ struct mock_engine *mock =
+ container_of(engine, typeof(*mock), base);
+
+ GEM_BUG_ON(timer_pending(&mock->hw_delay));
+
+ intel_context_unpin(engine->kernel_context);
+ intel_context_put(engine->kernel_context);
+
+ intel_engine_fini_retire(engine);
+ intel_engine_fini_breadcrumbs(engine);
+}
+
struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
const char *name,
int id)
@@ -264,9 +279,11 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
engine->base.submit_request = mock_submit_request;
engine->base.reset.prepare = mock_reset_prepare;
- engine->base.reset.reset = mock_reset;
+ engine->base.reset.rewind = mock_reset_rewind;
+ engine->base.reset.cancel = mock_reset_cancel;
engine->base.reset.finish = mock_reset_finish;
- engine->base.cancel_requests = mock_cancel_requests;
+
+ engine->base.release = mock_engine_release;
i915->gt.engine[id] = &engine->base;
i915->gt.engine_class[0][id] = &engine->base;
@@ -289,6 +306,7 @@ int mock_engine_init(struct intel_engine_cs *engine)
intel_engine_init_breadcrumbs(engine);
intel_engine_init_execlists(engine);
intel_engine_init__pm(engine);
+ intel_engine_init_retire(engine);
intel_engine_pool_init(&engine->pool);
ce = create_kernel_context(engine);
@@ -320,18 +338,3 @@ void mock_engine_flush(struct intel_engine_cs *engine)
void mock_engine_reset(struct intel_engine_cs *engine)
{
}
-
-void mock_engine_free(struct intel_engine_cs *engine)
-{
- struct mock_engine *mock =
- container_of(engine, typeof(*mock), base);
-
- GEM_BUG_ON(timer_pending(&mock->hw_delay));
-
- intel_context_unpin(engine->kernel_context);
- intel_context_put(engine->kernel_context);
-
- intel_engine_fini_breadcrumbs(engine);
-
- kfree(engine);
-}
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
index f63a26a3e620..e874dfaa5316 100644
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -5,6 +5,7 @@
*/
#include "i915_selftest.h"
+#include "intel_engine_heartbeat.h"
#include "intel_engine_pm.h"
#include "intel_gt.h"
@@ -47,35 +48,36 @@ static int context_sync(struct intel_context *ce)
mutex_lock(&tl->mutex);
do {
- struct dma_fence *fence;
+ struct i915_request *rq;
long timeout;
- fence = i915_active_fence_get(&tl->last_request);
- if (!fence)
+ if (list_empty(&tl->requests))
break;
- timeout = dma_fence_wait_timeout(fence, false, HZ / 10);
+ rq = list_last_entry(&tl->requests, typeof(*rq), link);
+ i915_request_get(rq);
+
+ timeout = i915_request_wait(rq, 0, HZ / 10);
if (timeout < 0)
err = timeout;
else
- i915_request_retire_upto(to_request(fence));
+ i915_request_retire_upto(rq);
- dma_fence_put(fence);
+ i915_request_put(rq);
} while (!err);
mutex_unlock(&tl->mutex);
return err;
}
-static int __live_context_size(struct intel_engine_cs *engine,
- struct i915_gem_context *fixme)
+static int __live_context_size(struct intel_engine_cs *engine)
{
struct intel_context *ce;
struct i915_request *rq;
void *vaddr;
int err;
- ce = intel_context_create(fixme, engine);
+ ce = intel_context_create(engine);
if (IS_ERR(ce))
return PTR_ERR(ce);
@@ -103,9 +105,6 @@ static int __live_context_size(struct intel_engine_cs *engine,
*
* TLDR; this overlaps with the execlists redzone.
*/
- if (HAS_EXECLISTS(engine->i915))
- vaddr += LRC_HEADER_PAGES * PAGE_SIZE;
-
vaddr += engine->context_size - I915_GTT_PAGE_SIZE;
memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE);
@@ -121,7 +120,7 @@ static int __live_context_size(struct intel_engine_cs *engine,
goto err_unpin;
/* Force the context switch */
- rq = i915_request_create(engine->kernel_context);
+ rq = intel_engine_create_kernel_request(engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_unpin;
@@ -146,7 +145,6 @@ static int live_context_size(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
- struct i915_gem_context *fixme;
enum intel_engine_id id;
int err = 0;
@@ -155,10 +153,6 @@ static int live_context_size(void *arg)
* HW tries to write past the end of one.
*/
- fixme = kernel_context(gt->i915);
- if (IS_ERR(fixme))
- return PTR_ERR(fixme);
-
for_each_engine(engine, gt, id) {
struct {
struct drm_i915_gem_object *state;
@@ -183,7 +177,7 @@ static int live_context_size(void *arg)
/* Overlaps with the execlists redzone */
engine->context_size += I915_GTT_PAGE_SIZE;
- err = __live_context_size(engine, fixme);
+ err = __live_context_size(engine);
engine->context_size -= I915_GTT_PAGE_SIZE;
@@ -196,13 +190,12 @@ static int live_context_size(void *arg)
break;
}
- kernel_context_close(fixme);
return err;
}
-static int __live_active_context(struct intel_engine_cs *engine,
- struct i915_gem_context *fixme)
+static int __live_active_context(struct intel_engine_cs *engine)
{
+ unsigned long saved_heartbeat;
struct intel_context *ce;
int pass;
int err;
@@ -226,40 +219,55 @@ static int __live_active_context(struct intel_engine_cs *engine,
return -EINVAL;
}
- ce = intel_context_create(fixme, engine);
+ ce = intel_context_create(engine);
if (IS_ERR(ce))
return PTR_ERR(ce);
+ saved_heartbeat = engine->props.heartbeat_interval_ms;
+ engine->props.heartbeat_interval_ms = 0;
+
for (pass = 0; pass <= 2; pass++) {
struct i915_request *rq;
+ intel_engine_pm_get(engine);
+
rq = intel_context_create_request(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
- goto err;
+ goto out_engine;
}
err = request_sync(rq);
if (err)
- goto err;
+ goto out_engine;
/* Context will be kept active until after an idle-barrier. */
if (i915_active_is_idle(&ce->active)) {
pr_err("context is not active; expected idle-barrier (%s pass %d)\n",
engine->name, pass);
err = -EINVAL;
- goto err;
+ goto out_engine;
}
if (!intel_engine_pm_is_awake(engine)) {
pr_err("%s is asleep before idle-barrier\n",
engine->name);
err = -EINVAL;
- goto err;
+ goto out_engine;
}
+
+out_engine:
+ intel_engine_pm_put(engine);
+ if (err)
+ goto err;
}
/* Now make sure our idle-barriers are flushed */
+ err = intel_engine_flush_barriers(engine);
+ if (err)
+ goto err;
+
+ /* Wait for the barrier and in the process wait for engine to park */
err = context_sync(engine->kernel_context);
if (err)
goto err;
@@ -269,12 +277,15 @@ static int __live_active_context(struct intel_engine_cs *engine,
err = -EINVAL;
}
+ intel_engine_pm_flush(engine);
+
if (intel_engine_pm_is_awake(engine)) {
struct drm_printer p = drm_debug_printer(__func__);
intel_engine_dump(engine, &p,
- "%s is still awake after idle-barriers\n",
- engine->name);
+ "%s is still awake:%d after idle-barriers\n",
+ engine->name,
+ atomic_read(&engine->wakeref.count));
GEM_TRACE_DUMP();
err = -EINVAL;
@@ -282,6 +293,7 @@ static int __live_active_context(struct intel_engine_cs *engine,
}
err:
+ engine->props.heartbeat_interval_ms = saved_heartbeat;
intel_context_put(ce);
return err;
}
@@ -290,23 +302,11 @@ static int live_active_context(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
- struct i915_gem_context *fixme;
enum intel_engine_id id;
- struct drm_file *file;
int err = 0;
- file = mock_file(gt->i915);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
- fixme = live_context(gt->i915, file);
- if (IS_ERR(fixme)) {
- err = PTR_ERR(fixme);
- goto out_file;
- }
-
for_each_engine(engine, gt, id) {
- err = __live_active_context(engine, fixme);
+ err = __live_active_context(engine);
if (err)
break;
@@ -315,8 +315,6 @@ static int live_active_context(void *arg)
break;
}
-out_file:
- mock_file_free(gt->i915, file);
return err;
}
@@ -348,10 +346,10 @@ unpin:
return err;
}
-static int __live_remote_context(struct intel_engine_cs *engine,
- struct i915_gem_context *fixme)
+static int __live_remote_context(struct intel_engine_cs *engine)
{
struct intel_context *local, *remote;
+ unsigned long saved_heartbeat;
int pass;
int err;
@@ -363,16 +361,26 @@ static int __live_remote_context(struct intel_engine_cs *engine,
* clobber the idle-barrier.
*/
- remote = intel_context_create(fixme, engine);
+ if (intel_engine_pm_is_awake(engine)) {
+ pr_err("%s is awake before starting %s!\n",
+ engine->name, __func__);
+ return -EINVAL;
+ }
+
+ remote = intel_context_create(engine);
if (IS_ERR(remote))
return PTR_ERR(remote);
- local = intel_context_create(fixme, engine);
+ local = intel_context_create(engine);
if (IS_ERR(local)) {
err = PTR_ERR(local);
goto err_remote;
}
+ saved_heartbeat = engine->props.heartbeat_interval_ms;
+ engine->props.heartbeat_interval_ms = 0;
+ intel_engine_pm_get(engine);
+
for (pass = 0; pass <= 2; pass++) {
err = __remote_sync(local, remote);
if (err)
@@ -390,6 +398,9 @@ static int __live_remote_context(struct intel_engine_cs *engine,
}
}
+ intel_engine_pm_put(engine);
+ engine->props.heartbeat_interval_ms = saved_heartbeat;
+
intel_context_put(local);
err_remote:
intel_context_put(remote);
@@ -400,23 +411,11 @@ static int live_remote_context(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
- struct i915_gem_context *fixme;
enum intel_engine_id id;
- struct drm_file *file;
int err = 0;
- file = mock_file(gt->i915);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
- fixme = live_context(gt->i915, file);
- if (IS_ERR(fixme)) {
- err = PTR_ERR(fixme);
- goto out_file;
- }
-
for_each_engine(engine, gt, id) {
- err = __live_remote_context(engine, fixme);
+ err = __live_remote_context(engine);
if (err)
break;
@@ -425,8 +424,6 @@ static int live_remote_context(void *arg)
break;
}
-out_file:
- mock_file_free(gt->i915, file);
return err;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
index 3880f07c29b8..f88e445a1cae 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
@@ -4,7 +4,365 @@
* Copyright © 2018 Intel Corporation
*/
-#include "../i915_selftest.h"
+#include <linux/sort.h>
+
+#include "intel_gt_pm.h"
+#include "intel_rps.h"
+
+#include "i915_selftest.h"
+#include "selftests/igt_flush_test.h"
+
+#define COUNT 5
+
+static int cmp_u32(const void *A, const void *B)
+{
+ const u32 *a = A, *b = B;
+
+ return *a - *b;
+}
+
+static void perf_begin(struct intel_gt *gt)
+{
+ intel_gt_pm_get(gt);
+
+ /* Boost gpufreq to max [waitboost] and keep it fixed */
+ atomic_inc(&gt->rps.num_waiters);
+ schedule_work(&gt->rps.work);
+ flush_work(&gt->rps.work);
+}
+
+static int perf_end(struct intel_gt *gt)
+{
+ atomic_dec(&gt->rps.num_waiters);
+ intel_gt_pm_put(gt);
+
+ return igt_flush_test(gt->i915);
+}
+
+static int write_timestamp(struct i915_request *rq, int slot)
+{
+ u32 cmd;
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
+ if (INTEL_GEN(rq->i915) >= 8)
+ cmd++;
+ *cs++ = cmd;
+ *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
+ *cs++ = i915_request_timeline(rq)->hwsp_offset + slot * sizeof(u32);
+ *cs++ = 0;
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static struct i915_vma *create_empty_batch(struct intel_context *ce)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ u32 *cs;
+ int err;
+
+ obj = i915_gem_object_create_internal(ce->engine->i915, PAGE_SIZE);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err_put;
+ }
+
+ cs[0] = MI_BATCH_BUFFER_END;
+
+ i915_gem_object_flush_map(obj);
+
+ vma = i915_vma_instance(obj, ce->vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err_unpin;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err)
+ goto err_unpin;
+
+ i915_gem_object_unpin_map(obj);
+ return vma;
+
+err_unpin:
+ i915_gem_object_unpin_map(obj);
+err_put:
+ i915_gem_object_put(obj);
+ return ERR_PTR(err);
+}
+
+static u32 trifilter(u32 *a)
+{
+ u64 sum;
+
+ sort(a, COUNT, sizeof(*a), cmp_u32, NULL);
+
+ sum = mul_u32_u32(a[2], 2);
+ sum += a[1];
+ sum += a[3];
+
+ return sum >> 2;
+}
+
+static int perf_mi_bb_start(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */
+ return 0;
+
+ perf_begin(gt);
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce = engine->kernel_context;
+ struct i915_vma *batch;
+ u32 cycles[COUNT];
+ int i;
+
+ intel_engine_pm_get(engine);
+
+ batch = create_empty_batch(ce);
+ if (IS_ERR(batch)) {
+ err = PTR_ERR(batch);
+ intel_engine_pm_put(engine);
+ break;
+ }
+
+ err = i915_vma_sync(batch);
+ if (err) {
+ intel_engine_pm_put(engine);
+ i915_vma_put(batch);
+ break;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(cycles); i++) {
+ struct i915_request *rq;
+
+ rq = i915_request_create(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ break;
+ }
+
+ err = write_timestamp(rq, 2);
+ if (err)
+ goto out;
+
+ err = rq->engine->emit_bb_start(rq,
+ batch->node.start, 8,
+ 0);
+ if (err)
+ goto out;
+
+ err = write_timestamp(rq, 3);
+ if (err)
+ goto out;
+
+out:
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0)
+ err = -EIO;
+ i915_request_put(rq);
+ if (err)
+ break;
+
+ cycles[i] = rq->hwsp_seqno[3] - rq->hwsp_seqno[2];
+ }
+ i915_vma_put(batch);
+ intel_engine_pm_put(engine);
+ if (err)
+ break;
+
+ pr_info("%s: MI_BB_START cycles: %u\n",
+ engine->name, trifilter(cycles));
+ }
+ if (perf_end(gt))
+ err = -EIO;
+
+ return err;
+}
+
+static struct i915_vma *create_nop_batch(struct intel_context *ce)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ u32 *cs;
+ int err;
+
+ obj = i915_gem_object_create_internal(ce->engine->i915, SZ_64K);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err_put;
+ }
+
+ memset(cs, 0, SZ_64K);
+ cs[SZ_64K / sizeof(*cs) - 1] = MI_BATCH_BUFFER_END;
+
+ i915_gem_object_flush_map(obj);
+
+ vma = i915_vma_instance(obj, ce->vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err_unpin;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err)
+ goto err_unpin;
+
+ i915_gem_object_unpin_map(obj);
+ return vma;
+
+err_unpin:
+ i915_gem_object_unpin_map(obj);
+err_put:
+ i915_gem_object_put(obj);
+ return ERR_PTR(err);
+}
+
+static int perf_mi_noop(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */
+ return 0;
+
+ perf_begin(gt);
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce = engine->kernel_context;
+ struct i915_vma *base, *nop;
+ u32 cycles[COUNT];
+ int i;
+
+ intel_engine_pm_get(engine);
+
+ base = create_empty_batch(ce);
+ if (IS_ERR(base)) {
+ err = PTR_ERR(base);
+ intel_engine_pm_put(engine);
+ break;
+ }
+
+ err = i915_vma_sync(base);
+ if (err) {
+ i915_vma_put(base);
+ intel_engine_pm_put(engine);
+ break;
+ }
+
+ nop = create_nop_batch(ce);
+ if (IS_ERR(nop)) {
+ err = PTR_ERR(nop);
+ i915_vma_put(base);
+ intel_engine_pm_put(engine);
+ break;
+ }
+
+ err = i915_vma_sync(nop);
+ if (err) {
+ i915_vma_put(nop);
+ i915_vma_put(base);
+ intel_engine_pm_put(engine);
+ break;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(cycles); i++) {
+ struct i915_request *rq;
+
+ rq = i915_request_create(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ break;
+ }
+
+ err = write_timestamp(rq, 2);
+ if (err)
+ goto out;
+
+ err = rq->engine->emit_bb_start(rq,
+ base->node.start, 8,
+ 0);
+ if (err)
+ goto out;
+
+ err = write_timestamp(rq, 3);
+ if (err)
+ goto out;
+
+ err = rq->engine->emit_bb_start(rq,
+ nop->node.start,
+ nop->node.size,
+ 0);
+ if (err)
+ goto out;
+
+ err = write_timestamp(rq, 4);
+ if (err)
+ goto out;
+
+out:
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0)
+ err = -EIO;
+ i915_request_put(rq);
+ if (err)
+ break;
+
+ cycles[i] =
+ (rq->hwsp_seqno[4] - rq->hwsp_seqno[3]) -
+ (rq->hwsp_seqno[3] - rq->hwsp_seqno[2]);
+ }
+ i915_vma_put(nop);
+ i915_vma_put(base);
+ intel_engine_pm_put(engine);
+ if (err)
+ break;
+
+ pr_info("%s: 16K MI_NOOP cycles: %u\n",
+ engine->name, trifilter(cycles));
+ }
+ if (perf_end(gt))
+ err = -EIO;
+
+ return err;
+}
+
+int intel_engine_cs_perf_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(perf_mi_bb_start),
+ SUBTEST(perf_mi_noop),
+ };
+
+ if (intel_gt_is_wedged(&i915->gt))
+ return 0;
+
+ return intel_gt_live_subtests(tests, &i915->gt);
+}
static int intel_mmio_bases_check(void *arg)
{
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
new file mode 100644
index 000000000000..43d4d589749f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
@@ -0,0 +1,374 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include <linux/sort.h>
+
+#include "i915_drv.h"
+
+#include "intel_gt_requests.h"
+#include "i915_selftest.h"
+
+static int timeline_sync(struct intel_timeline *tl)
+{
+ struct dma_fence *fence;
+ long timeout;
+
+ fence = i915_active_fence_get(&tl->last_request);
+ if (!fence)
+ return 0;
+
+ timeout = dma_fence_wait_timeout(fence, true, HZ / 2);
+ dma_fence_put(fence);
+ if (timeout < 0)
+ return timeout;
+
+ return 0;
+}
+
+static int engine_sync_barrier(struct intel_engine_cs *engine)
+{
+ return timeline_sync(engine->kernel_context->timeline);
+}
+
+struct pulse {
+ struct i915_active active;
+ struct kref kref;
+};
+
+static int pulse_active(struct i915_active *active)
+{
+ kref_get(&container_of(active, struct pulse, active)->kref);
+ return 0;
+}
+
+static void pulse_free(struct kref *kref)
+{
+ kfree(container_of(kref, struct pulse, kref));
+}
+
+static void pulse_put(struct pulse *p)
+{
+ kref_put(&p->kref, pulse_free);
+}
+
+static void pulse_retire(struct i915_active *active)
+{
+ pulse_put(container_of(active, struct pulse, active));
+}
+
+static struct pulse *pulse_create(void)
+{
+ struct pulse *p;
+
+ p = kmalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return p;
+
+ kref_init(&p->kref);
+ i915_active_init(&p->active, pulse_active, pulse_retire);
+
+ return p;
+}
+
+static void pulse_unlock_wait(struct pulse *p)
+{
+ i915_active_unlock_wait(&p->active);
+}
+
+static int __live_idle_pulse(struct intel_engine_cs *engine,
+ int (*fn)(struct intel_engine_cs *cs))
+{
+ struct pulse *p;
+ int err;
+
+ GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
+
+ p = pulse_create();
+ if (!p)
+ return -ENOMEM;
+
+ err = i915_active_acquire(&p->active);
+ if (err)
+ goto out;
+
+ err = i915_active_acquire_preallocate_barrier(&p->active, engine);
+ if (err) {
+ i915_active_release(&p->active);
+ goto out;
+ }
+
+ i915_active_acquire_barrier(&p->active);
+ i915_active_release(&p->active);
+
+ GEM_BUG_ON(i915_active_is_idle(&p->active));
+ GEM_BUG_ON(llist_empty(&engine->barrier_tasks));
+
+ err = fn(engine);
+ if (err)
+ goto out;
+
+ GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
+
+ if (engine_sync_barrier(engine)) {
+ struct drm_printer m = drm_err_printer("pulse");
+
+ pr_err("%s: no heartbeat pulse?\n", engine->name);
+ intel_engine_dump(engine, &m, "%s", engine->name);
+
+ err = -ETIME;
+ goto out;
+ }
+
+ GEM_BUG_ON(READ_ONCE(engine->serial) != engine->wakeref_serial);
+
+ pulse_unlock_wait(p); /* synchronize with the retirement callback */
+
+ if (!i915_active_is_idle(&p->active)) {
+ struct drm_printer m = drm_err_printer("pulse");
+
+ pr_err("%s: heartbeat pulse did not flush idle tasks\n",
+ engine->name);
+ i915_active_print(&p->active, &m);
+
+ err = -EINVAL;
+ goto out;
+ }
+
+out:
+ pulse_put(p);
+ return err;
+}
+
+static int live_idle_flush(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /* Check that we can flush the idle barriers */
+
+ for_each_engine(engine, gt, id) {
+ intel_engine_pm_get(engine);
+ err = __live_idle_pulse(engine, intel_engine_flush_barriers);
+ intel_engine_pm_put(engine);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+static int live_idle_pulse(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /* Check that heartbeat pulses flush the idle barriers */
+
+ for_each_engine(engine, gt, id) {
+ intel_engine_pm_get(engine);
+ err = __live_idle_pulse(engine, intel_engine_pulse);
+ intel_engine_pm_put(engine);
+ if (err && err != -ENODEV)
+ break;
+
+ err = 0;
+ }
+
+ return err;
+}
+
+static int cmp_u32(const void *_a, const void *_b)
+{
+ const u32 *a = _a, *b = _b;
+
+ return *a - *b;
+}
+
+static int __live_heartbeat_fast(struct intel_engine_cs *engine)
+{
+ struct intel_context *ce;
+ struct i915_request *rq;
+ ktime_t t0, t1;
+ u32 times[5];
+ int err;
+ int i;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ intel_engine_pm_get(engine);
+
+ err = intel_engine_set_heartbeat(engine, 1);
+ if (err)
+ goto err_pm;
+
+ for (i = 0; i < ARRAY_SIZE(times); i++) {
+ /* Manufacture a tick */
+ do {
+ while (READ_ONCE(engine->heartbeat.systole))
+ flush_delayed_work(&engine->heartbeat.work);
+
+ engine->serial++; /* quick, pretend we are not idle! */
+ flush_delayed_work(&engine->heartbeat.work);
+ if (!delayed_work_pending(&engine->heartbeat.work)) {
+ pr_err("%s: heartbeat did not start\n",
+ engine->name);
+ err = -EINVAL;
+ goto err_pm;
+ }
+
+ rcu_read_lock();
+ rq = READ_ONCE(engine->heartbeat.systole);
+ if (rq)
+ rq = i915_request_get_rcu(rq);
+ rcu_read_unlock();
+ } while (!rq);
+
+ t0 = ktime_get();
+ while (rq == READ_ONCE(engine->heartbeat.systole))
+ yield(); /* work is on the local cpu! */
+ t1 = ktime_get();
+
+ i915_request_put(rq);
+ times[i] = ktime_us_delta(t1, t0);
+ }
+
+ sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL);
+
+ pr_info("%s: Heartbeat delay: %uus [%u, %u]\n",
+ engine->name,
+ times[ARRAY_SIZE(times) / 2],
+ times[0],
+ times[ARRAY_SIZE(times) - 1]);
+
+ /* Min work delay is 2 * 2 (worst), +1 for scheduling, +1 for slack */
+ if (times[ARRAY_SIZE(times) / 2] > jiffies_to_usecs(6)) {
+ pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n",
+ engine->name,
+ times[ARRAY_SIZE(times) / 2],
+ jiffies_to_usecs(6));
+ err = -EINVAL;
+ }
+
+ intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL);
+err_pm:
+ intel_engine_pm_put(engine);
+ intel_context_put(ce);
+ return err;
+}
+
+static int live_heartbeat_fast(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /* Check that the heartbeat ticks at the desired rate. */
+ if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
+ return 0;
+
+ for_each_engine(engine, gt, id) {
+ err = __live_heartbeat_fast(engine);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+static int __live_heartbeat_off(struct intel_engine_cs *engine)
+{
+ int err;
+
+ intel_engine_pm_get(engine);
+
+ engine->serial++;
+ flush_delayed_work(&engine->heartbeat.work);
+ if (!delayed_work_pending(&engine->heartbeat.work)) {
+ pr_err("%s: heartbeat not running\n",
+ engine->name);
+ err = -EINVAL;
+ goto err_pm;
+ }
+
+ err = intel_engine_set_heartbeat(engine, 0);
+ if (err)
+ goto err_pm;
+
+ engine->serial++;
+ flush_delayed_work(&engine->heartbeat.work);
+ if (delayed_work_pending(&engine->heartbeat.work)) {
+ pr_err("%s: heartbeat still running\n",
+ engine->name);
+ err = -EINVAL;
+ goto err_beat;
+ }
+
+ if (READ_ONCE(engine->heartbeat.systole)) {
+ pr_err("%s: heartbeat still allocated\n",
+ engine->name);
+ err = -EINVAL;
+ goto err_beat;
+ }
+
+err_beat:
+ intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL);
+err_pm:
+ intel_engine_pm_put(engine);
+ return err;
+}
+
+static int live_heartbeat_off(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /* Check that we can turn off heartbeat and not interrupt VIP */
+ if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
+ return 0;
+
+ for_each_engine(engine, gt, id) {
+ if (!intel_engine_has_preemption(engine))
+ continue;
+
+ err = __live_heartbeat_off(engine);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+int intel_heartbeat_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(live_idle_flush),
+ SUBTEST(live_idle_pulse),
+ SUBTEST(live_heartbeat_fast),
+ SUBTEST(live_heartbeat_off),
+ };
+ int saved_hangcheck;
+ int err;
+
+ if (intel_gt_is_wedged(&i915->gt))
+ return 0;
+
+ saved_hangcheck = i915_modparams.enable_hangcheck;
+ i915_modparams.enable_hangcheck = INT_MAX;
+
+ err = intel_gt_live_subtests(tests, &i915->gt);
+
+ i915_modparams.enable_hangcheck = saved_hangcheck;
+ return err;
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
index 20b9c83f43ad..cbf6b0735272 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
@@ -51,11 +51,12 @@ static int live_engine_pm(void *arg)
pr_err("intel_engine_pm_get_if_awake(%s) failed under %s\n",
engine->name, p->name);
else
- intel_engine_pm_put(engine);
- intel_engine_pm_put(engine);
+ intel_engine_pm_put_async(engine);
+ intel_engine_pm_put_async(engine);
p->critical_section_end();
- /* engine wakeref is sync (instant) */
+ intel_engine_pm_flush(engine);
+
if (intel_engine_pm_is_awake(engine)) {
pr_err("%s is still awake after flushing pm\n",
engine->name);
diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
index 5d429037cdad..09ff8e4f88af 100644
--- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
@@ -6,6 +6,7 @@
*/
#include "selftest_llc.h"
+#include "selftest_rc6.h"
static int live_gt_resume(void *arg)
{
@@ -15,7 +16,8 @@ static int live_gt_resume(void *arg)
/* Do several suspend/resume cycles to check we don't explode! */
do {
- intel_gt_suspend(gt);
+ intel_gt_suspend_prepare(gt);
+ intel_gt_suspend_late(gt);
if (gt->rc6.enabled) {
pr_err("rc6 still enabled after suspend!\n");
@@ -49,6 +51,7 @@ static int live_gt_resume(void *arg)
int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
+ SUBTEST(live_rc6_manual),
SUBTEST(live_gt_resume),
};
@@ -57,3 +60,20 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
return intel_gt_live_subtests(tests, &i915->gt);
}
+
+int intel_gt_pm_late_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ /*
+ * These tests may leave the system in an undesirable state.
+ * They are intended to be run last in CI and the system
+ * rebooted afterwards.
+ */
+ SUBTEST(live_rc6_ctx_wa),
+ };
+
+ if (intel_gt_is_wedged(&i915->gt))
+ return 0;
+
+ return intel_gt_live_subtests(tests, &i915->gt);
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 8e0016464325..5dbda2a74272 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -25,7 +25,9 @@
#include <linux/kthread.h>
#include "gem/i915_gem_context.h"
-#include "gt/intel_gt.h"
+
+#include "intel_gt.h"
+#include "intel_engine_heartbeat.h"
#include "intel_engine_pm.h"
#include "i915_selftest.h"
@@ -308,6 +310,24 @@ static bool wait_until_running(struct hang *h, struct i915_request *rq)
1000));
}
+static void engine_heartbeat_disable(struct intel_engine_cs *engine,
+ unsigned long *saved)
+{
+ *saved = engine->props.heartbeat_interval_ms;
+ engine->props.heartbeat_interval_ms = 0;
+
+ intel_engine_pm_get(engine);
+ intel_engine_park_heartbeat(engine);
+}
+
+static void engine_heartbeat_enable(struct intel_engine_cs *engine,
+ unsigned long saved)
+{
+ intel_engine_pm_put(engine);
+
+ engine->props.heartbeat_interval_ms = saved;
+}
+
static int igt_hang_sanitycheck(void *arg)
{
struct intel_gt *gt = arg;
@@ -377,36 +397,30 @@ static int igt_reset_nop(void *arg)
struct intel_gt *gt = arg;
struct i915_gpu_error *global = &gt->i915->gpu_error;
struct intel_engine_cs *engine;
- struct i915_gem_context *ctx;
unsigned int reset_count, count;
enum intel_engine_id id;
- struct drm_file *file;
IGT_TIMEOUT(end_time);
int err = 0;
/* Check that we can reset during non-user portions of requests */
- file = mock_file(gt->i915);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
- ctx = live_context(gt->i915, file);
- if (IS_ERR(ctx)) {
- err = PTR_ERR(ctx);
- goto out;
- }
-
- i915_gem_context_clear_bannable(ctx);
reset_count = i915_reset_count(global);
count = 0;
do {
for_each_engine(engine, gt, id) {
+ struct intel_context *ce;
int i;
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ break;
+ }
+
for (i = 0; i < 16; i++) {
struct i915_request *rq;
- rq = igt_request_alloc(ctx, engine);
+ rq = intel_context_create_request(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
break;
@@ -414,6 +428,8 @@ static int igt_reset_nop(void *arg)
i915_request_add(rq);
}
+
+ intel_context_put(ce);
}
igt_global_reset_lock(gt);
@@ -437,10 +453,7 @@ static int igt_reset_nop(void *arg)
} while (time_before(jiffies, end_time));
pr_info("%s: %d resets\n", __func__, count);
- err = igt_flush_test(gt->i915);
-out:
- mock_file_free(gt->i915, file);
- if (intel_gt_is_wedged(gt))
+ if (igt_flush_test(gt->i915))
err = -EIO;
return err;
}
@@ -450,36 +463,29 @@ static int igt_reset_nop_engine(void *arg)
struct intel_gt *gt = arg;
struct i915_gpu_error *global = &gt->i915->gpu_error;
struct intel_engine_cs *engine;
- struct i915_gem_context *ctx;
enum intel_engine_id id;
- struct drm_file *file;
- int err = 0;
/* Check that we can engine-reset during non-user portions */
if (!intel_has_reset_engine(gt))
return 0;
- file = mock_file(gt->i915);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
- ctx = live_context(gt->i915, file);
- if (IS_ERR(ctx)) {
- err = PTR_ERR(ctx);
- goto out;
- }
-
- i915_gem_context_clear_bannable(ctx);
for_each_engine(engine, gt, id) {
- unsigned int reset_count, reset_engine_count;
- unsigned int count;
+ unsigned int reset_count, reset_engine_count, count;
+ struct intel_context *ce;
+ unsigned long heartbeat;
IGT_TIMEOUT(end_time);
+ int err;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
reset_count = i915_reset_count(global);
reset_engine_count = i915_reset_engine_count(global, engine);
count = 0;
+ engine_heartbeat_disable(engine, &heartbeat);
set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
do {
int i;
@@ -494,7 +500,7 @@ static int igt_reset_nop_engine(void *arg)
for (i = 0; i < 16; i++) {
struct i915_request *rq;
- rq = igt_request_alloc(ctx, engine);
+ rq = intel_context_create_request(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
break;
@@ -523,22 +529,18 @@ static int igt_reset_nop_engine(void *arg)
}
} while (time_before(jiffies, end_time));
clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
- pr_info("%s(%s): %d resets\n", __func__, engine->name, count);
+ engine_heartbeat_enable(engine, heartbeat);
- if (err)
- break;
+ pr_info("%s(%s): %d resets\n", __func__, engine->name, count);
- err = igt_flush_test(gt->i915);
+ intel_context_put(ce);
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
if (err)
- break;
+ return err;
}
- err = igt_flush_test(gt->i915);
-out:
- mock_file_free(gt->i915, file);
- if (intel_gt_is_wedged(gt))
- err = -EIO;
- return err;
+ return 0;
}
static int __igt_reset_engine(struct intel_gt *gt, bool active)
@@ -562,6 +564,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
for_each_engine(engine, gt, id) {
unsigned int reset_count, reset_engine_count;
+ unsigned long heartbeat;
IGT_TIMEOUT(end_time);
if (active && !intel_engine_can_store_dword(engine))
@@ -577,7 +580,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
reset_count = i915_reset_count(global);
reset_engine_count = i915_reset_engine_count(global, engine);
- intel_engine_pm_get(engine);
+ engine_heartbeat_disable(engine, &heartbeat);
set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
do {
if (active) {
@@ -629,7 +632,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
}
} while (time_before(jiffies, end_time));
clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
- intel_engine_pm_put(engine);
+ engine_heartbeat_enable(engine, heartbeat);
if (err)
break;
@@ -699,43 +702,43 @@ static int active_engine(void *data)
struct active_engine *arg = data;
struct intel_engine_cs *engine = arg->engine;
struct i915_request *rq[8] = {};
- struct i915_gem_context *ctx[ARRAY_SIZE(rq)];
- struct drm_file *file;
- unsigned long count = 0;
+ struct intel_context *ce[ARRAY_SIZE(rq)];
+ unsigned long count;
int err = 0;
- file = mock_file(engine->i915);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
- for (count = 0; count < ARRAY_SIZE(ctx); count++) {
- ctx[count] = live_context(engine->i915, file);
- if (IS_ERR(ctx[count])) {
- err = PTR_ERR(ctx[count]);
+ for (count = 0; count < ARRAY_SIZE(ce); count++) {
+ ce[count] = intel_context_create(engine);
+ if (IS_ERR(ce[count])) {
+ err = PTR_ERR(ce[count]);
while (--count)
- i915_gem_context_put(ctx[count]);
- goto err_file;
+ intel_context_put(ce[count]);
+ return err;
}
}
+ count = 0;
while (!kthread_should_stop()) {
unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1);
struct i915_request *old = rq[idx];
struct i915_request *new;
- new = igt_request_alloc(ctx[idx], engine);
+ new = intel_context_create_request(ce[idx]);
if (IS_ERR(new)) {
err = PTR_ERR(new);
break;
}
- if (arg->flags & TEST_PRIORITY)
- ctx[idx]->sched.priority =
- i915_prandom_u32_max_state(512, &prng);
-
rq[idx] = i915_request_get(new);
i915_request_add(new);
+ if (engine->schedule && arg->flags & TEST_PRIORITY) {
+ struct i915_sched_attr attr = {
+ .priority =
+ i915_prandom_u32_max_state(512, &prng),
+ };
+ engine->schedule(rq[idx], &attr);
+ }
+
err = active_request_put(old);
if (err)
break;
@@ -749,10 +752,10 @@ static int active_engine(void *data)
/* Keep the first error */
if (!err)
err = err__;
+
+ intel_context_put(ce[count]);
}
-err_file:
- mock_file_free(engine->i915, file);
return err;
}
@@ -786,6 +789,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
struct active_engine threads[I915_NUM_ENGINES] = {};
unsigned long device = i915_reset_count(global);
unsigned long count = 0, reported;
+ unsigned long heartbeat;
IGT_TIMEOUT(end_time);
if (flags & TEST_ACTIVE &&
@@ -826,7 +830,9 @@ static int __igt_reset_engines(struct intel_gt *gt,
get_task_struct(tsk);
}
- intel_engine_pm_get(engine);
+ yield(); /* start all threads before we begin */
+
+ engine_heartbeat_disable(engine, &heartbeat);
set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
do {
struct i915_request *rq = NULL;
@@ -900,7 +906,8 @@ static int __igt_reset_engines(struct intel_gt *gt,
}
} while (time_before(jiffies, end_time));
clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
- intel_engine_pm_put(engine);
+ engine_heartbeat_enable(engine, heartbeat);
+
pr_info("i915_reset_engine(%s:%s): %lu resets\n",
engine->name, test_name, count);
@@ -1016,7 +1023,7 @@ static int igt_reset_wait(void *arg)
{
struct intel_gt *gt = arg;
struct i915_gpu_error *global = &gt->i915->gpu_error;
- struct intel_engine_cs *engine = gt->i915->engine[RCS0];
+ struct intel_engine_cs *engine = gt->engine[RCS0];
struct i915_request *rq;
unsigned int reset_count;
struct hang h;
@@ -1143,14 +1150,18 @@ static int __igt_reset_evict_vma(struct intel_gt *gt,
int (*fn)(void *),
unsigned int flags)
{
- struct intel_engine_cs *engine = gt->i915->engine[RCS0];
+ struct intel_engine_cs *engine = gt->engine[RCS0];
struct drm_i915_gem_object *obj;
struct task_struct *tsk = NULL;
struct i915_request *rq;
struct evict_vma arg;
struct hang h;
+ unsigned int pin_flags;
int err;
+ if (!gt->ggtt->num_fences && flags & EXEC_OBJECT_NEEDS_FENCE)
+ return 0;
+
if (!engine || !intel_engine_can_store_dword(engine))
return 0;
@@ -1186,10 +1197,12 @@ static int __igt_reset_evict_vma(struct intel_gt *gt,
goto out_obj;
}
- err = i915_vma_pin(arg.vma, 0, 0,
- i915_vma_is_ggtt(arg.vma) ?
- PIN_GLOBAL | PIN_MAPPABLE :
- PIN_USER);
+ pin_flags = i915_vma_is_ggtt(arg.vma) ? PIN_GLOBAL : PIN_USER;
+
+ if (flags & EXEC_OBJECT_NEEDS_FENCE)
+ pin_flags |= PIN_MAPPABLE;
+
+ err = i915_vma_pin(arg.vma, 0, 0, pin_flags);
if (err) {
i915_request_add(rq);
goto out_obj;
@@ -1292,32 +1305,21 @@ static int igt_reset_evict_ggtt(void *arg)
static int igt_reset_evict_ppgtt(void *arg)
{
struct intel_gt *gt = arg;
- struct i915_gem_context *ctx;
- struct i915_address_space *vm;
- struct drm_file *file;
+ struct i915_ppgtt *ppgtt;
int err;
- file = mock_file(gt->i915);
- if (IS_ERR(file))
- return PTR_ERR(file);
+ /* aliasing == global gtt locking, covered above */
+ if (INTEL_PPGTT(gt->i915) < INTEL_PPGTT_FULL)
+ return 0;
- ctx = live_context(gt->i915, file);
- if (IS_ERR(ctx)) {
- err = PTR_ERR(ctx);
- goto out;
- }
+ ppgtt = i915_ppgtt_create(gt->i915);
+ if (IS_ERR(ppgtt))
+ return PTR_ERR(ppgtt);
- err = 0;
- vm = i915_gem_context_get_vm_rcu(ctx);
- if (!i915_is_ggtt(vm)) {
- /* aliasing == global gtt locking, covered above */
- err = __igt_reset_evict_vma(gt, vm,
- evict_vma, EXEC_OBJECT_WRITE);
- }
- i915_vm_put(vm);
+ err = __igt_reset_evict_vma(gt, &ppgtt->vm,
+ evict_vma, EXEC_OBJECT_WRITE);
+ i915_vm_put(&ppgtt->vm);
-out:
- mock_file_free(gt->i915, file);
return err;
}
@@ -1493,7 +1495,7 @@ static int igt_handle_error(void *arg)
{
struct intel_gt *gt = arg;
struct i915_gpu_error *global = &gt->i915->gpu_error;
- struct intel_engine_cs *engine = gt->i915->engine[RCS0];
+ struct intel_engine_cs *engine = gt->engine[RCS0];
struct hang h;
struct i915_request *rq;
struct i915_gpu_state *error;
@@ -1563,7 +1565,7 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine,
GEM_TRACE("i915_reset_engine(%s:%s) under %s\n",
engine->name, mode, p->name);
- tasklet_disable_nosync(t);
+ tasklet_disable(t);
p->critical_section_begin();
err = intel_engine_reset(engine, NULL);
@@ -1686,7 +1688,6 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
};
struct intel_gt *gt = &i915->gt;
intel_wakeref_t wakeref;
- bool saved_hangcheck;
int err;
if (!intel_has_gpu_reset(gt))
@@ -1696,12 +1697,9 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
return -EIO; /* we're long past hope of a successful reset */
wakeref = intel_runtime_pm_get(gt->uncore->rpm);
- saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck);
- drain_delayed_work(&gt->hangcheck.work); /* flush param */
err = intel_gt_live_subtests(tests, gt);
- i915_modparams.enable_hangcheck = saved_hangcheck;
intel_runtime_pm_put(gt->uncore->rpm, wakeref);
return err;
diff --git a/drivers/gpu/drm/i915/gt/selftest_llc.c b/drivers/gpu/drm/i915/gt/selftest_llc.c
index a7057785e420..fd3770e48ac7 100644
--- a/drivers/gpu/drm/i915/gt/selftest_llc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_llc.c
@@ -6,6 +6,7 @@
#include "intel_pm.h" /* intel_gpu_freq() */
#include "selftest_llc.h"
+#include "intel_rps.h"
static int gen6_verify_ring_freq(struct intel_llc *llc)
{
@@ -25,6 +26,8 @@ static int gen6_verify_ring_freq(struct intel_llc *llc)
for (gpu_freq = consts.min_gpu_freq;
gpu_freq <= consts.max_gpu_freq;
gpu_freq++) {
+ struct intel_rps *rps = &llc_to_gt(llc)->rps;
+
unsigned int ia_freq, ring_freq, found;
u32 val;
@@ -44,7 +47,7 @@ static int gen6_verify_ring_freq(struct intel_llc *llc)
if (found != ia_freq) {
pr_err("Min freq table(%d/[%d, %d]):%dMHz did not match expected CPU freq, found %d, expected %d\n",
gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq,
- intel_gpu_freq(i915, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)),
+ intel_gpu_freq(rps, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)),
found, ia_freq);
err = -EINVAL;
break;
@@ -54,7 +57,7 @@ static int gen6_verify_ring_freq(struct intel_llc *llc)
if (found != ring_freq) {
pr_err("Min freq table(%d/[%d, %d]):%dMHz did not match expected ring freq, found %d, expected %d\n",
gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq,
- intel_gpu_freq(i915, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)),
+ intel_gpu_freq(rps, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)),
found, ring_freq);
err = -EINVAL;
break;
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 5dc679781a08..9ec9833c9c7b 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -7,6 +7,7 @@
#include <linux/prime_numbers.h>
#include "gem/i915_gem_pm.h"
+#include "gt/intel_engine_heartbeat.h"
#include "gt/intel_reset.h"
#include "i915_selftest.h"
@@ -49,14 +50,31 @@ static struct i915_vma *create_scratch(struct intel_gt *gt)
return vma;
}
+static void engine_heartbeat_disable(struct intel_engine_cs *engine,
+ unsigned long *saved)
+{
+ *saved = engine->props.heartbeat_interval_ms;
+ engine->props.heartbeat_interval_ms = 0;
+
+ intel_engine_pm_get(engine);
+ intel_engine_park_heartbeat(engine);
+}
+
+static void engine_heartbeat_enable(struct intel_engine_cs *engine,
+ unsigned long saved)
+{
+ intel_engine_pm_put(engine);
+
+ engine->props.heartbeat_interval_ms = saved;
+}
+
static int live_sanitycheck(void *arg)
{
struct intel_gt *gt = arg;
- struct i915_gem_engines_iter it;
- struct i915_gem_context *ctx;
- struct intel_context *ce;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
struct igt_spinner spin;
- int err = -ENOMEM;
+ int err = 0;
if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
return 0;
@@ -64,17 +82,20 @@ static int live_sanitycheck(void *arg)
if (igt_spinner_init(&spin, gt))
return -ENOMEM;
- ctx = kernel_context(gt->i915);
- if (!ctx)
- goto err_spin;
-
- for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce;
struct i915_request *rq;
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ break;
+ }
+
rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
- goto err_ctx;
+ goto out_ctx;
}
i915_request_add(rq);
@@ -83,21 +104,21 @@ static int live_sanitycheck(void *arg)
GEM_TRACE_DUMP();
intel_gt_set_wedged(gt);
err = -EIO;
- goto err_ctx;
+ goto out_ctx;
}
igt_spinner_end(&spin);
if (igt_flush_test(gt->i915)) {
err = -EIO;
- goto err_ctx;
+ goto out_ctx;
}
+
+out_ctx:
+ intel_context_put(ce);
+ if (err)
+ break;
}
- err = 0;
-err_ctx:
- i915_gem_context_unlock_engines(ctx);
- kernel_context_close(ctx);
-err_spin:
igt_spinner_fini(&spin);
return err;
}
@@ -105,7 +126,6 @@ err_spin:
static int live_unlite_restore(struct intel_gt *gt, int prio)
{
struct intel_engine_cs *engine;
- struct i915_gem_context *ctx;
enum intel_engine_id id;
struct igt_spinner spin;
int err = -ENOMEM;
@@ -118,15 +138,12 @@ static int live_unlite_restore(struct intel_gt *gt, int prio)
if (igt_spinner_init(&spin, gt))
return err;
- ctx = kernel_context(gt->i915);
- if (!ctx)
- goto err_spin;
-
err = 0;
for_each_engine(engine, gt, id) {
struct intel_context *ce[2] = {};
struct i915_request *rq[2];
struct igt_live_test t;
+ unsigned long saved;
int n;
if (prio && !intel_engine_has_preemption(engine))
@@ -139,11 +156,12 @@ static int live_unlite_restore(struct intel_gt *gt, int prio)
err = -EIO;
break;
}
+ engine_heartbeat_disable(engine, &saved);
for (n = 0; n < ARRAY_SIZE(ce); n++) {
struct intel_context *tmp;
- tmp = intel_context_create(ctx, engine);
+ tmp = intel_context_create(engine);
if (IS_ERR(tmp)) {
err = PTR_ERR(tmp);
goto err_ce;
@@ -168,12 +186,7 @@ static int live_unlite_restore(struct intel_gt *gt, int prio)
}
GEM_BUG_ON(!ce[1]->ring->size);
intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
-
- local_irq_disable(); /* appease lockdep */
- __context_pin_acquire(ce[1]);
__execlists_update_reg_state(ce[1], engine);
- __context_pin_release(ce[1]);
- local_irq_enable();
rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
if (IS_ERR(rq[0])) {
@@ -251,14 +264,13 @@ err_ce:
intel_context_put(ce[n]);
}
+ engine_heartbeat_enable(engine, saved);
if (igt_live_test_end(&t))
err = -EIO;
if (err)
break;
}
- kernel_context_close(ctx);
-err_spin:
igt_spinner_fini(&spin);
return err;
}
@@ -313,17 +325,17 @@ emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
static struct i915_request *
semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
{
- struct i915_gem_context *ctx;
+ struct intel_context *ce;
struct i915_request *rq;
int err;
- ctx = kernel_context(engine->i915);
- if (!ctx)
- return ERR_PTR(-ENOMEM);
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return ERR_CAST(ce);
- rq = igt_request_alloc(ctx, engine);
+ rq = intel_context_create_request(ce);
if (IS_ERR(rq))
- goto out_ctx;
+ goto out_ce;
err = 0;
if (rq->engine->emit_init_breadcrumb)
@@ -336,8 +348,8 @@ semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
if (err)
rq = ERR_PTR(err);
-out_ctx:
- kernel_context_close(ctx);
+out_ce:
+ intel_context_put(ce);
return rq;
}
@@ -352,7 +364,7 @@ release_queue(struct intel_engine_cs *engine,
struct i915_request *rq;
u32 *cs;
- rq = i915_request_create(engine->kernel_context);
+ rq = intel_engine_create_kernel_request(engine);
if (IS_ERR(rq))
return PTR_ERR(rq);
@@ -444,6 +456,8 @@ static int live_timeslice_preempt(void *arg)
* need to preempt the current task and replace it with another
* ready task.
*/
+ if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ return 0;
obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
if (IS_ERR(obj))
@@ -470,12 +484,16 @@ static int live_timeslice_preempt(void *arg)
enum intel_engine_id id;
for_each_engine(engine, gt, id) {
+ unsigned long saved;
+
if (!intel_engine_has_preemption(engine))
continue;
memset(vaddr, 0, PAGE_SIZE);
+ engine_heartbeat_disable(engine, &saved);
err = slice_semaphore_queue(engine, vma, count);
+ engine_heartbeat_enable(engine, saved);
if (err)
goto err_pin;
@@ -499,7 +517,7 @@ static struct i915_request *nop_request(struct intel_engine_cs *engine)
{
struct i915_request *rq;
- rq = i915_request_create(engine->kernel_context);
+ rq = intel_engine_create_kernel_request(engine);
if (IS_ERR(rq))
return rq;
@@ -518,6 +536,11 @@ static void wait_for_submit(struct intel_engine_cs *engine,
} while (!i915_request_is_active(rq));
}
+static long timeslice_threshold(const struct intel_engine_cs *engine)
+{
+ return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1;
+}
+
static int live_timeslice_queue(void *arg)
{
struct intel_gt *gt = arg;
@@ -535,6 +558,8 @@ static int live_timeslice_queue(void *arg)
* ELSP[1] is already occupied, so must rely on timeslicing to
* eject ELSP[0] in favour of the queue.)
*/
+ if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ return 0;
obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
if (IS_ERR(obj))
@@ -561,17 +586,19 @@ static int live_timeslice_queue(void *arg)
.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
};
struct i915_request *rq, *nop;
+ unsigned long saved;
if (!intel_engine_has_preemption(engine))
continue;
+ engine_heartbeat_disable(engine, &saved);
memset(vaddr, 0, PAGE_SIZE);
/* ELSP[0]: semaphore wait */
rq = semaphore_queue(engine, vma, 0);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
- goto err_pin;
+ goto err_heartbeat;
}
engine->schedule(rq, &attr);
wait_for_submit(engine, rq);
@@ -580,8 +607,7 @@ static int live_timeslice_queue(void *arg)
nop = nop_request(engine);
if (IS_ERR(nop)) {
err = PTR_ERR(nop);
- i915_request_put(rq);
- goto err_pin;
+ goto err_rq;
}
wait_for_submit(engine, nop);
i915_request_put(nop);
@@ -591,10 +617,8 @@ static int live_timeslice_queue(void *arg)
/* Queue: semaphore signal, matching priority as semaphore */
err = release_queue(engine, vma, 1, effective_prio(rq));
- if (err) {
- i915_request_put(rq);
- goto err_pin;
- }
+ if (err)
+ goto err_rq;
intel_engine_flush_submission(engine);
if (!READ_ONCE(engine->execlists.timer.expires) &&
@@ -612,8 +636,8 @@ static int live_timeslice_queue(void *arg)
err = -EINVAL;
}
- /* Timeslice every jiffie, so within 2 we should signal */
- if (i915_request_wait(rq, 0, 3) < 0) {
+ /* Timeslice every jiffy, so within 2 we should signal */
+ if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) {
struct drm_printer p =
drm_info_printer(gt->i915->drm.dev);
@@ -625,12 +649,14 @@ static int live_timeslice_queue(void *arg)
memset(vaddr, 0xff, PAGE_SIZE);
err = -EIO;
}
+err_rq:
i915_request_put(rq);
+err_heartbeat:
+ engine_heartbeat_enable(engine, saved);
if (err)
break;
}
-err_pin:
i915_vma_unpin(vma);
err_map:
i915_gem_object_unpin_map(obj);
@@ -743,15 +769,19 @@ static int live_busywait_preempt(void *arg)
*cs++ = 0;
intel_ring_advance(lo, cs);
+
+ i915_request_get(lo);
i915_request_add(lo);
if (wait_for(READ_ONCE(*map), 10)) {
+ i915_request_put(lo);
err = -ETIMEDOUT;
goto err_vma;
}
/* Low priority request should be busywaiting now */
if (i915_request_wait(lo, 0, 1) != -ETIME) {
+ i915_request_put(lo);
pr_err("%s: Busywaiting request did not!\n",
engine->name);
err = -EIO;
@@ -761,6 +791,7 @@ static int live_busywait_preempt(void *arg)
hi = igt_request_alloc(ctx_hi, engine);
if (IS_ERR(hi)) {
err = PTR_ERR(hi);
+ i915_request_put(lo);
goto err_vma;
}
@@ -768,6 +799,7 @@ static int live_busywait_preempt(void *arg)
if (IS_ERR(cs)) {
err = PTR_ERR(cs);
i915_request_add(hi);
+ i915_request_put(lo);
goto err_vma;
}
@@ -788,11 +820,13 @@ static int live_busywait_preempt(void *arg)
intel_engine_dump(engine, &p, "%s\n", engine->name);
GEM_TRACE_DUMP();
+ i915_request_put(lo);
intel_gt_set_wedged(gt);
err = -EIO;
goto err_vma;
}
GEM_BUG_ON(READ_ONCE(*map));
+ i915_request_put(lo);
if (igt_live_test_end(&t)) {
err = -EIO;
@@ -1165,6 +1199,325 @@ err_wedged:
goto err_client_b;
}
+struct live_preempt_cancel {
+ struct intel_engine_cs *engine;
+ struct preempt_client a, b;
+};
+
+static int __cancel_active0(struct live_preempt_cancel *arg)
+{
+ struct i915_request *rq;
+ struct igt_live_test t;
+ int err;
+
+ /* Preempt cancel of ELSP0 */
+ GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
+ if (igt_live_test_begin(&t, arg->engine->i915,
+ __func__, arg->engine->name))
+ return -EIO;
+
+ rq = spinner_create_request(&arg->a.spin,
+ arg->a.ctx, arg->engine,
+ MI_ARB_CHECK);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ clear_bit(CONTEXT_BANNED, &rq->context->flags);
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
+ err = -EIO;
+ goto out;
+ }
+
+ intel_context_set_banned(rq->context);
+ err = intel_engine_pulse(arg->engine);
+ if (err)
+ goto out;
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ err = -EIO;
+ goto out;
+ }
+
+ if (rq->fence.error != -EIO) {
+ pr_err("Cancelled inflight0 request did not report -EIO\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+out:
+ i915_request_put(rq);
+ if (igt_live_test_end(&t))
+ err = -EIO;
+ return err;
+}
+
+static int __cancel_active1(struct live_preempt_cancel *arg)
+{
+ struct i915_request *rq[2] = {};
+ struct igt_live_test t;
+ int err;
+
+ /* Preempt cancel of ELSP1 */
+ GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
+ if (igt_live_test_begin(&t, arg->engine->i915,
+ __func__, arg->engine->name))
+ return -EIO;
+
+ rq[0] = spinner_create_request(&arg->a.spin,
+ arg->a.ctx, arg->engine,
+ MI_NOOP); /* no preemption */
+ if (IS_ERR(rq[0]))
+ return PTR_ERR(rq[0]);
+
+ clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
+ i915_request_get(rq[0]);
+ i915_request_add(rq[0]);
+ if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
+ err = -EIO;
+ goto out;
+ }
+
+ rq[1] = spinner_create_request(&arg->b.spin,
+ arg->b.ctx, arg->engine,
+ MI_ARB_CHECK);
+ if (IS_ERR(rq[1])) {
+ err = PTR_ERR(rq[1]);
+ goto out;
+ }
+
+ clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
+ i915_request_get(rq[1]);
+ err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
+ i915_request_add(rq[1]);
+ if (err)
+ goto out;
+
+ intel_context_set_banned(rq[1]->context);
+ err = intel_engine_pulse(arg->engine);
+ if (err)
+ goto out;
+
+ igt_spinner_end(&arg->a.spin);
+ if (i915_request_wait(rq[1], 0, HZ / 5) < 0) {
+ err = -EIO;
+ goto out;
+ }
+
+ if (rq[0]->fence.error != 0) {
+ pr_err("Normal inflight0 request did not complete\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (rq[1]->fence.error != -EIO) {
+ pr_err("Cancelled inflight1 request did not report -EIO\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+out:
+ i915_request_put(rq[1]);
+ i915_request_put(rq[0]);
+ if (igt_live_test_end(&t))
+ err = -EIO;
+ return err;
+}
+
+static int __cancel_queued(struct live_preempt_cancel *arg)
+{
+ struct i915_request *rq[3] = {};
+ struct igt_live_test t;
+ int err;
+
+ /* Full ELSP and one in the wings */
+ GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
+ if (igt_live_test_begin(&t, arg->engine->i915,
+ __func__, arg->engine->name))
+ return -EIO;
+
+ rq[0] = spinner_create_request(&arg->a.spin,
+ arg->a.ctx, arg->engine,
+ MI_ARB_CHECK);
+ if (IS_ERR(rq[0]))
+ return PTR_ERR(rq[0]);
+
+ clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
+ i915_request_get(rq[0]);
+ i915_request_add(rq[0]);
+ if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
+ err = -EIO;
+ goto out;
+ }
+
+ rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
+ if (IS_ERR(rq[1])) {
+ err = PTR_ERR(rq[1]);
+ goto out;
+ }
+
+ clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
+ i915_request_get(rq[1]);
+ err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
+ i915_request_add(rq[1]);
+ if (err)
+ goto out;
+
+ rq[2] = spinner_create_request(&arg->b.spin,
+ arg->a.ctx, arg->engine,
+ MI_ARB_CHECK);
+ if (IS_ERR(rq[2])) {
+ err = PTR_ERR(rq[2]);
+ goto out;
+ }
+
+ i915_request_get(rq[2]);
+ err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
+ i915_request_add(rq[2]);
+ if (err)
+ goto out;
+
+ intel_context_set_banned(rq[2]->context);
+ err = intel_engine_pulse(arg->engine);
+ if (err)
+ goto out;
+
+ if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
+ err = -EIO;
+ goto out;
+ }
+
+ if (rq[0]->fence.error != -EIO) {
+ pr_err("Cancelled inflight0 request did not report -EIO\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (rq[1]->fence.error != 0) {
+ pr_err("Normal inflight1 request did not complete\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (rq[2]->fence.error != -EIO) {
+ pr_err("Cancelled queued request did not report -EIO\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+out:
+ i915_request_put(rq[2]);
+ i915_request_put(rq[1]);
+ i915_request_put(rq[0]);
+ if (igt_live_test_end(&t))
+ err = -EIO;
+ return err;
+}
+
+static int __cancel_hostile(struct live_preempt_cancel *arg)
+{
+ struct i915_request *rq;
+ int err;
+
+ /* Preempt cancel non-preemptible spinner in ELSP0 */
+ if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
+ return 0;
+
+ GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
+ rq = spinner_create_request(&arg->a.spin,
+ arg->a.ctx, arg->engine,
+ MI_NOOP); /* preemption disabled */
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ clear_bit(CONTEXT_BANNED, &rq->context->flags);
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
+ err = -EIO;
+ goto out;
+ }
+
+ intel_context_set_banned(rq->context);
+ err = intel_engine_pulse(arg->engine); /* force reset */
+ if (err)
+ goto out;
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ err = -EIO;
+ goto out;
+ }
+
+ if (rq->fence.error != -EIO) {
+ pr_err("Cancelled inflight0 request did not report -EIO\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+out:
+ i915_request_put(rq);
+ if (igt_flush_test(arg->engine->i915))
+ err = -EIO;
+ return err;
+}
+
+static int live_preempt_cancel(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct live_preempt_cancel data;
+ enum intel_engine_id id;
+ int err = -ENOMEM;
+
+ /*
+ * To cancel an inflight context, we need to first remove it from the
+ * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
+ */
+
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
+ return 0;
+
+ if (preempt_client_init(gt, &data.a))
+ return -ENOMEM;
+ if (preempt_client_init(gt, &data.b))
+ goto err_client_a;
+
+ for_each_engine(data.engine, gt, id) {
+ if (!intel_engine_has_preemption(data.engine))
+ continue;
+
+ err = __cancel_active0(&data);
+ if (err)
+ goto err_wedged;
+
+ err = __cancel_active1(&data);
+ if (err)
+ goto err_wedged;
+
+ err = __cancel_queued(&data);
+ if (err)
+ goto err_wedged;
+
+ err = __cancel_hostile(&data);
+ if (err)
+ goto err_wedged;
+ }
+
+ err = 0;
+err_client_b:
+ preempt_client_fini(&data.b);
+err_client_a:
+ preempt_client_fini(&data.a);
+ return err;
+
+err_wedged:
+ GEM_TRACE_DUMP();
+ igt_spinner_end(&data.b.spin);
+ igt_spinner_end(&data.a.spin);
+ intel_gt_set_wedged(gt);
+ goto err_client_b;
+}
+
static int live_suppress_self_preempt(void *arg)
{
struct intel_gt *gt = arg;
@@ -1341,6 +1694,7 @@ static int live_suppress_wait_preempt(void *arg)
{
struct intel_gt *gt = arg;
struct preempt_client client[4];
+ struct i915_request *rq[ARRAY_SIZE(client)] = {};
struct intel_engine_cs *engine;
enum intel_engine_id id;
int err = -ENOMEM;
@@ -1374,7 +1728,6 @@ static int live_suppress_wait_preempt(void *arg)
continue;
for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
- struct i915_request *rq[ARRAY_SIZE(client)];
struct i915_request *dummy;
engine->execlists.preempt_hang.count = 0;
@@ -1384,18 +1737,22 @@ static int live_suppress_wait_preempt(void *arg)
goto err_client_3;
for (i = 0; i < ARRAY_SIZE(client); i++) {
- rq[i] = spinner_create_request(&client[i].spin,
- client[i].ctx, engine,
- MI_NOOP);
- if (IS_ERR(rq[i])) {
- err = PTR_ERR(rq[i]);
+ struct i915_request *this;
+
+ this = spinner_create_request(&client[i].spin,
+ client[i].ctx, engine,
+ MI_NOOP);
+ if (IS_ERR(this)) {
+ err = PTR_ERR(this);
goto err_wedged;
}
/* Disable NEWCLIENT promotion */
- __i915_active_fence_set(&i915_request_timeline(rq[i])->last_request,
+ __i915_active_fence_set(&i915_request_timeline(this)->last_request,
&dummy->fence);
- i915_request_add(rq[i]);
+
+ rq[i] = i915_request_get(this);
+ i915_request_add(this);
}
dummy_request_free(dummy);
@@ -1416,8 +1773,11 @@ static int live_suppress_wait_preempt(void *arg)
goto err_wedged;
}
- for (i = 0; i < ARRAY_SIZE(client); i++)
+ for (i = 0; i < ARRAY_SIZE(client); i++) {
igt_spinner_end(&client[i].spin);
+ i915_request_put(rq[i]);
+ rq[i] = NULL;
+ }
if (igt_flush_test(gt->i915))
goto err_wedged;
@@ -1445,8 +1805,10 @@ err_client_0:
return err;
err_wedged:
- for (i = 0; i < ARRAY_SIZE(client); i++)
+ for (i = 0; i < ARRAY_SIZE(client); i++) {
igt_spinner_end(&client[i].spin);
+ i915_request_put(rq[i]);
+ }
intel_gt_set_wedged(gt);
err = -EIO;
goto err_client_3;
@@ -1491,6 +1853,8 @@ static int live_chain_preempt(void *arg)
MI_ARB_CHECK);
if (IS_ERR(rq))
goto err_wedged;
+
+ i915_request_get(rq);
i915_request_add(rq);
ring_size = rq->wa_tail - rq->head;
@@ -1503,8 +1867,10 @@ static int live_chain_preempt(void *arg)
igt_spinner_end(&lo.spin);
if (i915_request_wait(rq, 0, HZ / 2) < 0) {
pr_err("Timed out waiting to flush %s\n", engine->name);
+ i915_request_put(rq);
goto err_wedged;
}
+ i915_request_put(rq);
if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
err = -EIO;
@@ -1538,6 +1904,8 @@ static int live_chain_preempt(void *arg)
rq = igt_request_alloc(hi.ctx, engine);
if (IS_ERR(rq))
goto err_wedged;
+
+ i915_request_get(rq);
i915_request_add(rq);
engine->schedule(rq, &attr);
@@ -1550,14 +1918,19 @@ static int live_chain_preempt(void *arg)
count);
intel_engine_dump(engine, &p,
"%s\n", engine->name);
+ i915_request_put(rq);
goto err_wedged;
}
igt_spinner_end(&lo.spin);
+ i915_request_put(rq);
rq = igt_request_alloc(lo.ctx, engine);
if (IS_ERR(rq))
goto err_wedged;
+
+ i915_request_get(rq);
i915_request_add(rq);
+
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
struct drm_printer p =
drm_info_printer(gt->i915->drm.dev);
@@ -1566,8 +1939,11 @@ static int live_chain_preempt(void *arg)
count);
intel_engine_dump(engine, &p,
"%s\n", engine->name);
+
+ i915_request_put(rq);
goto err_wedged;
}
+ i915_request_put(rq);
}
if (igt_live_test_end(&t)) {
@@ -1591,6 +1967,201 @@ err_wedged:
goto err_client_lo;
}
+static int create_gang(struct intel_engine_cs *engine,
+ struct i915_request **prev)
+{
+ struct drm_i915_gem_object *obj;
+ struct intel_context *ce;
+ struct i915_request *rq;
+ struct i915_vma *vma;
+ u32 *cs;
+ int err;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ obj = i915_gem_object_create_internal(engine->i915, 4096);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ goto err_ce;
+ }
+
+ vma = i915_vma_instance(obj, ce->vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err_obj;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err)
+ goto err_obj;
+
+ cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
+ if (IS_ERR(cs))
+ goto err_obj;
+
+ /* Semaphore target: spin until zero */
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
+ *cs++ = MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD;
+ *cs++ = 0;
+ *cs++ = lower_32_bits(vma->node.start);
+ *cs++ = upper_32_bits(vma->node.start);
+
+ if (*prev) {
+ u64 offset = (*prev)->batch->node.start;
+
+ /* Terminate the spinner in the next lower priority batch. */
+ *cs++ = MI_STORE_DWORD_IMM_GEN4;
+ *cs++ = lower_32_bits(offset);
+ *cs++ = upper_32_bits(offset);
+ *cs++ = 0;
+ }
+
+ *cs++ = MI_BATCH_BUFFER_END;
+ i915_gem_object_flush_map(obj);
+ i915_gem_object_unpin_map(obj);
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ goto err_obj;
+
+ rq->batch = vma;
+ i915_request_get(rq);
+
+ i915_vma_lock(vma);
+ err = i915_request_await_object(rq, vma->obj, false);
+ if (!err)
+ err = i915_vma_move_to_active(vma, rq, 0);
+ if (!err)
+ err = rq->engine->emit_bb_start(rq,
+ vma->node.start,
+ PAGE_SIZE, 0);
+ i915_vma_unlock(vma);
+ i915_request_add(rq);
+ if (err)
+ goto err_rq;
+
+ i915_gem_object_put(obj);
+ intel_context_put(ce);
+
+ rq->client_link.next = &(*prev)->client_link;
+ *prev = rq;
+ return 0;
+
+err_rq:
+ i915_request_put(rq);
+err_obj:
+ i915_gem_object_put(obj);
+err_ce:
+ intel_context_put(ce);
+ return err;
+}
+
+static int live_preempt_gang(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
+ return 0;
+
+ /*
+ * Build as long a chain of preempters as we can, with each
+ * request higher priority than the last. Once we are ready, we release
+ * the last batch which then precolates down the chain, each releasing
+ * the next oldest in turn. The intent is to simply push as hard as we
+ * can with the number of preemptions, trying to exceed narrow HW
+ * limits. At a minimum, we insist that we can sort all the user
+ * high priority levels into execution order.
+ */
+
+ for_each_engine(engine, gt, id) {
+ struct i915_request *rq = NULL;
+ struct igt_live_test t;
+ IGT_TIMEOUT(end_time);
+ int prio = 0;
+ int err = 0;
+ u32 *cs;
+
+ if (!intel_engine_has_preemption(engine))
+ continue;
+
+ if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
+ return -EIO;
+
+ do {
+ struct i915_sched_attr attr = {
+ .priority = I915_USER_PRIORITY(prio++),
+ };
+
+ err = create_gang(engine, &rq);
+ if (err)
+ break;
+
+ /* Submit each spinner at increasing priority */
+ engine->schedule(rq, &attr);
+
+ if (prio <= I915_PRIORITY_MAX)
+ continue;
+
+ if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT))
+ break;
+
+ if (__igt_timeout(end_time, NULL))
+ break;
+ } while (1);
+ pr_debug("%s: Preempt chain of %d requests\n",
+ engine->name, prio);
+
+ /*
+ * Such that the last spinner is the highest priority and
+ * should execute first. When that spinner completes,
+ * it will terminate the next lowest spinner until there
+ * are no more spinners and the gang is complete.
+ */
+ cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC);
+ if (!IS_ERR(cs)) {
+ *cs = 0;
+ i915_gem_object_unpin_map(rq->batch->obj);
+ } else {
+ err = PTR_ERR(cs);
+ intel_gt_set_wedged(gt);
+ }
+
+ while (rq) { /* wait for each rq from highest to lowest prio */
+ struct i915_request *n =
+ list_next_entry(rq, client_link);
+
+ if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
+ struct drm_printer p =
+ drm_info_printer(engine->i915->drm.dev);
+
+ pr_err("Failed to flush chain of %d requests, at %d\n",
+ prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
+ intel_engine_dump(engine, &p,
+ "%s\n", engine->name);
+
+ err = -ETIME;
+ }
+
+ i915_request_put(rq);
+ rq = n;
+ }
+
+ if (igt_live_test_end(&t))
+ err = -EIO;
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
static int live_preempt_hang(void *arg)
{
struct intel_gt *gt = arg;
@@ -1702,6 +2273,105 @@ err_spin_hi:
return err;
}
+static int live_preempt_timeout(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct i915_gem_context *ctx_hi, *ctx_lo;
+ struct igt_spinner spin_lo;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = -ENOMEM;
+
+ /*
+ * Check that we force preemption to occur by cancelling the previous
+ * context if it refuses to yield the GPU.
+ */
+ if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
+ return 0;
+
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
+ return 0;
+
+ if (!intel_has_reset_engine(gt))
+ return 0;
+
+ if (igt_spinner_init(&spin_lo, gt))
+ return -ENOMEM;
+
+ ctx_hi = kernel_context(gt->i915);
+ if (!ctx_hi)
+ goto err_spin_lo;
+ ctx_hi->sched.priority =
+ I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
+
+ ctx_lo = kernel_context(gt->i915);
+ if (!ctx_lo)
+ goto err_ctx_hi;
+ ctx_lo->sched.priority =
+ I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
+
+ for_each_engine(engine, gt, id) {
+ unsigned long saved_timeout;
+ struct i915_request *rq;
+
+ if (!intel_engine_has_preemption(engine))
+ continue;
+
+ rq = spinner_create_request(&spin_lo, ctx_lo, engine,
+ MI_NOOP); /* preemption disabled */
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_ctx_lo;
+ }
+
+ i915_request_add(rq);
+ if (!igt_wait_for_spinner(&spin_lo, rq)) {
+ intel_gt_set_wedged(gt);
+ err = -EIO;
+ goto err_ctx_lo;
+ }
+
+ rq = igt_request_alloc(ctx_hi, engine);
+ if (IS_ERR(rq)) {
+ igt_spinner_end(&spin_lo);
+ err = PTR_ERR(rq);
+ goto err_ctx_lo;
+ }
+
+ /* Flush the previous CS ack before changing timeouts */
+ while (READ_ONCE(engine->execlists.pending[0]))
+ cpu_relax();
+
+ saved_timeout = engine->props.preempt_timeout_ms;
+ engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ intel_engine_flush_submission(engine);
+ engine->props.preempt_timeout_ms = saved_timeout;
+
+ if (i915_request_wait(rq, 0, HZ / 10) < 0) {
+ intel_gt_set_wedged(gt);
+ i915_request_put(rq);
+ err = -ETIME;
+ goto err_ctx_lo;
+ }
+
+ igt_spinner_end(&spin_lo);
+ i915_request_put(rq);
+ }
+
+ err = 0;
+err_ctx_lo:
+ kernel_context_close(ctx_lo);
+err_ctx_hi:
+ kernel_context_close(ctx_hi);
+err_spin_lo:
+ igt_spinner_fini(&spin_lo);
+ return err;
+}
+
static int random_range(struct rnd_state *rnd, int min, int max)
{
return i915_prandom_u32_max_state(max - min, rnd) + min;
@@ -1829,6 +2499,8 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
get_task_struct(tsk[id]);
}
+ yield(); /* start all threads before we kthread_stop() */
+
count = 0;
for_each_engine(engine, smoke->gt, id) {
int status;
@@ -1966,28 +2638,18 @@ static int nop_virtual_engine(struct intel_gt *gt,
#define CHAIN BIT(0)
{
IGT_TIMEOUT(end_time);
- struct i915_request *request[16];
- struct i915_gem_context *ctx[16];
+ struct i915_request *request[16] = {};
struct intel_context *ve[16];
unsigned long n, prime, nc;
struct igt_live_test t;
ktime_t times[2] = {};
int err;
- GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx));
+ GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
for (n = 0; n < nctx; n++) {
- ctx[n] = kernel_context(gt->i915);
- if (!ctx[n]) {
- err = -ENOMEM;
- nctx = n;
- goto out;
- }
-
- ve[n] = intel_execlists_create_virtual(ctx[n],
- siblings, nsibling);
+ ve[n] = intel_execlists_create_virtual(siblings, nsibling);
if (IS_ERR(ve[n])) {
- kernel_context_close(ctx[n]);
err = PTR_ERR(ve[n]);
nctx = n;
goto out;
@@ -1996,7 +2658,6 @@ static int nop_virtual_engine(struct intel_gt *gt,
err = intel_context_pin(ve[n]);
if (err) {
intel_context_put(ve[n]);
- kernel_context_close(ctx[n]);
nctx = n;
goto out;
}
@@ -2012,27 +2673,35 @@ static int nop_virtual_engine(struct intel_gt *gt,
if (flags & CHAIN) {
for (nc = 0; nc < nctx; nc++) {
for (n = 0; n < prime; n++) {
- request[nc] =
- i915_request_create(ve[nc]);
- if (IS_ERR(request[nc])) {
- err = PTR_ERR(request[nc]);
+ struct i915_request *rq;
+
+ rq = i915_request_create(ve[nc]);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
goto out;
}
- i915_request_add(request[nc]);
+ if (request[nc])
+ i915_request_put(request[nc]);
+ request[nc] = i915_request_get(rq);
+ i915_request_add(rq);
}
}
} else {
for (n = 0; n < prime; n++) {
for (nc = 0; nc < nctx; nc++) {
- request[nc] =
- i915_request_create(ve[nc]);
- if (IS_ERR(request[nc])) {
- err = PTR_ERR(request[nc]);
+ struct i915_request *rq;
+
+ rq = i915_request_create(ve[nc]);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
goto out;
}
- i915_request_add(request[nc]);
+ if (request[nc])
+ i915_request_put(request[nc]);
+ request[nc] = i915_request_get(rq);
+ i915_request_add(rq);
}
}
}
@@ -2058,6 +2727,11 @@ static int nop_virtual_engine(struct intel_gt *gt,
if (prime == 1)
times[0] = times[1];
+ for (nc = 0; nc < nctx; nc++) {
+ i915_request_put(request[nc]);
+ request[nc] = NULL;
+ }
+
if (__igt_timeout(end_time, NULL))
break;
}
@@ -2075,9 +2749,9 @@ out:
err = -EIO;
for (nc = 0; nc < nctx; nc++) {
+ i915_request_put(request[nc]);
intel_context_unpin(ve[nc]);
intel_context_put(ve[nc]);
- kernel_context_close(ctx[nc]);
}
return err;
}
@@ -2136,7 +2810,6 @@ static int mask_virtual_engine(struct intel_gt *gt,
unsigned int nsibling)
{
struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
- struct i915_gem_context *ctx;
struct intel_context *ve;
struct igt_live_test t;
unsigned int n;
@@ -2147,11 +2820,7 @@ static int mask_virtual_engine(struct intel_gt *gt,
* restrict it to our desired engine within the virtual engine.
*/
- ctx = kernel_context(gt->i915);
- if (!ctx)
- return -ENOMEM;
-
- ve = intel_execlists_create_virtual(ctx, siblings, nsibling);
+ ve = intel_execlists_create_virtual(siblings, nsibling);
if (IS_ERR(ve)) {
err = PTR_ERR(ve);
goto out_close;
@@ -2219,7 +2888,6 @@ out_unpin:
out_put:
intel_context_put(ve);
out_close:
- kernel_context_close(ctx);
return err;
}
@@ -2259,7 +2927,6 @@ static int preserved_virtual_engine(struct intel_gt *gt,
unsigned int nsibling)
{
struct i915_request *last = NULL;
- struct i915_gem_context *ctx;
struct intel_context *ve;
struct i915_vma *scratch;
struct igt_live_test t;
@@ -2267,17 +2934,11 @@ static int preserved_virtual_engine(struct intel_gt *gt,
int err = 0;
u32 *cs;
- ctx = kernel_context(gt->i915);
- if (!ctx)
- return -ENOMEM;
-
scratch = create_scratch(siblings[0]->gt);
- if (IS_ERR(scratch)) {
- err = PTR_ERR(scratch);
- goto out_close;
- }
+ if (IS_ERR(scratch))
+ return PTR_ERR(scratch);
- ve = intel_execlists_create_virtual(ctx, siblings, nsibling);
+ ve = intel_execlists_create_virtual(siblings, nsibling);
if (IS_ERR(ve)) {
err = PTR_ERR(ve);
goto out_scratch;
@@ -2360,8 +3021,6 @@ out_put:
intel_context_put(ve);
out_scratch:
i915_vma_unpin_and_release(&scratch, 0);
-out_close:
- kernel_context_close(ctx);
return err;
}
@@ -2413,16 +3072,54 @@ static int bond_virtual_engine(struct intel_gt *gt,
#define BOND_SCHEDULE BIT(0)
{
struct intel_engine_cs *master;
- struct i915_gem_context *ctx;
struct i915_request *rq[16];
enum intel_engine_id id;
+ struct igt_spinner spin;
unsigned long n;
int err;
+ /*
+ * A set of bonded requests is intended to be run concurrently
+ * across a number of engines. We use one request per-engine
+ * and a magic fence to schedule each of the bonded requests
+ * at the same time. A consequence of our current scheduler is that
+ * we only move requests to the HW ready queue when the request
+ * becomes ready, that is when all of its prerequisite fences have
+ * been signaled. As one of those fences is the master submit fence,
+ * there is a delay on all secondary fences as the HW may be
+ * currently busy. Equally, as all the requests are independent,
+ * they may have other fences that delay individual request
+ * submission to HW. Ergo, we do not guarantee that all requests are
+ * immediately submitted to HW at the same time, just that if the
+ * rules are abided by, they are ready at the same time as the
+ * first is submitted. Userspace can embed semaphores in its batch
+ * to ensure parallel execution of its phases as it requires.
+ * Though naturally it gets requested that perhaps the scheduler should
+ * take care of parallel execution, even across preemption events on
+ * different HW. (The proper answer is of course "lalalala".)
+ *
+ * With the submit-fence, we have identified three possible phases
+ * of synchronisation depending on the master fence: queued (not
+ * ready), executing, and signaled. The first two are quite simple
+ * and checked below. However, the signaled master fence handling is
+ * contentious. Currently we do not distinguish between a signaled
+ * fence and an expired fence, as once signaled it does not convey
+ * any information about the previous execution. It may even be freed
+ * and hence checking later it may not exist at all. Ergo we currently
+ * do not apply the bonding constraint for an already signaled fence,
+ * as our expectation is that it should not constrain the secondaries
+ * and is outside of the scope of the bonded request API (i.e. all
+ * userspace requests are meant to be running in parallel). As
+ * it imposes no constraint, and is effectively a no-op, we do not
+ * check below as normal execution flows are checked extensively above.
+ *
+ * XXX Is the degenerate handling of signaled submit fences the
+ * expected behaviour for userpace?
+ */
+
GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
- ctx = kernel_context(gt->i915);
- if (!ctx)
+ if (igt_spinner_init(&spin, gt))
return -ENOMEM;
err = 0;
@@ -2435,7 +3132,9 @@ static int bond_virtual_engine(struct intel_gt *gt,
memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
- rq[0] = igt_request_alloc(ctx, master);
+ rq[0] = igt_spinner_create_request(&spin,
+ master->kernel_context,
+ MI_NOOP);
if (IS_ERR(rq[0])) {
err = PTR_ERR(rq[0]);
goto out;
@@ -2448,16 +3147,21 @@ static int bond_virtual_engine(struct intel_gt *gt,
&fence,
GFP_KERNEL);
}
+
i915_request_add(rq[0]);
if (err < 0)
goto out;
+ if (!(flags & BOND_SCHEDULE) &&
+ !igt_wait_for_spinner(&spin, rq[0])) {
+ err = -EIO;
+ goto out;
+ }
+
for (n = 0; n < nsibling; n++) {
struct intel_context *ve;
- ve = intel_execlists_create_virtual(ctx,
- siblings,
- nsibling);
+ ve = intel_execlists_create_virtual(siblings, nsibling);
if (IS_ERR(ve)) {
err = PTR_ERR(ve);
onstack_fence_fini(&fence);
@@ -2499,6 +3203,8 @@ static int bond_virtual_engine(struct intel_gt *gt,
}
}
onstack_fence_fini(&fence);
+ intel_engine_flush_submission(master);
+ igt_spinner_end(&spin);
if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
pr_err("Master request did not execute (on %s)!\n",
@@ -2535,7 +3241,7 @@ out:
if (igt_flush_test(gt->i915))
err = -EIO;
- kernel_context_close(ctx);
+ igt_spinner_fini(&spin);
return err;
}
@@ -2599,10 +3305,13 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_preempt),
SUBTEST(live_late_preempt),
SUBTEST(live_nopreempt),
+ SUBTEST(live_preempt_cancel),
SUBTEST(live_suppress_self_preempt),
SUBTEST(live_suppress_wait_preempt),
SUBTEST(live_chain_preempt),
+ SUBTEST(live_preempt_gang),
SUBTEST(live_preempt_hang),
+ SUBTEST(live_preempt_timeout),
SUBTEST(live_preempt_smoke),
SUBTEST(live_virtual_engine),
SUBTEST(live_virtual_mask),
@@ -2749,8 +3458,101 @@ static int live_lrc_layout(void *arg)
return err;
}
-static int __live_lrc_state(struct i915_gem_context *fixme,
- struct intel_engine_cs *engine,
+static int find_offset(const u32 *lri, u32 offset)
+{
+ int i;
+
+ for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
+ if (lri[i] == offset)
+ return i;
+
+ return -1;
+}
+
+static int live_lrc_fixed(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * Check the assumed register offsets match the actual locations in
+ * the context image.
+ */
+
+ for_each_engine(engine, gt, id) {
+ const struct {
+ u32 reg;
+ u32 offset;
+ const char *name;
+ } tbl[] = {
+ {
+ i915_mmio_reg_offset(RING_START(engine->mmio_base)),
+ CTX_RING_START - 1,
+ "RING_START"
+ },
+ {
+ i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
+ CTX_RING_CTL - 1,
+ "RING_CTL"
+ },
+ {
+ i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
+ CTX_RING_HEAD - 1,
+ "RING_HEAD"
+ },
+ {
+ i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
+ CTX_RING_TAIL - 1,
+ "RING_TAIL"
+ },
+ {
+ i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
+ lrc_ring_mi_mode(engine),
+ "RING_MI_MODE"
+ },
+ {
+ i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
+ CTX_BB_STATE - 1,
+ "BB_STATE"
+ },
+ { },
+ }, *t;
+ u32 *hw;
+
+ if (!engine->default_state)
+ continue;
+
+ hw = i915_gem_object_pin_map(engine->default_state,
+ I915_MAP_WB);
+ if (IS_ERR(hw)) {
+ err = PTR_ERR(hw);
+ break;
+ }
+ hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+
+ for (t = tbl; t->name; t++) {
+ int dw = find_offset(hw, t->reg);
+
+ if (dw != t->offset) {
+ pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
+ engine->name,
+ t->name,
+ t->reg,
+ dw,
+ t->offset);
+ err = -EINVAL;
+ }
+ }
+
+ i915_gem_object_unpin_map(engine->default_state);
+ }
+
+ return err;
+}
+
+static int __live_lrc_state(struct intel_engine_cs *engine,
struct i915_vma *scratch)
{
struct intel_context *ce;
@@ -2765,7 +3567,7 @@ static int __live_lrc_state(struct i915_gem_context *fixme,
int err;
int n;
- ce = intel_context_create(fixme, engine);
+ ce = intel_context_create(engine);
if (IS_ERR(ce))
return PTR_ERR(ce);
@@ -2839,7 +3641,6 @@ static int live_lrc_state(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
- struct i915_gem_context *fixme;
struct i915_vma *scratch;
enum intel_engine_id id;
int err = 0;
@@ -2849,18 +3650,12 @@ static int live_lrc_state(void *arg)
* intel_context.
*/
- fixme = kernel_context(gt->i915);
- if (!fixme)
- return -ENOMEM;
-
scratch = create_scratch(gt);
- if (IS_ERR(scratch)) {
- err = PTR_ERR(scratch);
- goto out_close;
- }
+ if (IS_ERR(scratch))
+ return PTR_ERR(scratch);
for_each_engine(engine, gt, id) {
- err = __live_lrc_state(fixme, engine, scratch);
+ err = __live_lrc_state(engine, scratch);
if (err)
break;
}
@@ -2869,8 +3664,6 @@ static int live_lrc_state(void *arg)
err = -EIO;
i915_vma_unpin_and_release(&scratch, 0);
-out_close:
- kernel_context_close(fixme);
return err;
}
@@ -2880,7 +3673,7 @@ static int gpr_make_dirty(struct intel_engine_cs *engine)
u32 *cs;
int n;
- rq = i915_request_create(engine->kernel_context);
+ rq = intel_engine_create_kernel_request(engine);
if (IS_ERR(rq))
return PTR_ERR(rq);
@@ -2903,8 +3696,7 @@ static int gpr_make_dirty(struct intel_engine_cs *engine)
return 0;
}
-static int __live_gpr_clear(struct i915_gem_context *fixme,
- struct intel_engine_cs *engine,
+static int __live_gpr_clear(struct intel_engine_cs *engine,
struct i915_vma *scratch)
{
struct intel_context *ce;
@@ -2920,7 +3712,7 @@ static int __live_gpr_clear(struct i915_gem_context *fixme,
if (err)
return err;
- ce = intel_context_create(fixme, engine);
+ ce = intel_context_create(engine);
if (IS_ERR(ce))
return PTR_ERR(ce);
@@ -2982,7 +3774,6 @@ static int live_gpr_clear(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
- struct i915_gem_context *fixme;
struct i915_vma *scratch;
enum intel_engine_id id;
int err = 0;
@@ -2992,18 +3783,12 @@ static int live_gpr_clear(void *arg)
* to avoid leaking any information from previous contexts.
*/
- fixme = kernel_context(gt->i915);
- if (!fixme)
- return -ENOMEM;
-
scratch = create_scratch(gt);
- if (IS_ERR(scratch)) {
- err = PTR_ERR(scratch);
- goto out_close;
- }
+ if (IS_ERR(scratch))
+ return PTR_ERR(scratch);
for_each_engine(engine, gt, id) {
- err = __live_gpr_clear(fixme, engine, scratch);
+ err = __live_gpr_clear(engine, scratch);
if (err)
break;
}
@@ -3012,8 +3797,6 @@ static int live_gpr_clear(void *arg)
err = -EIO;
i915_vma_unpin_and_release(&scratch, 0);
-out_close:
- kernel_context_close(fixme);
return err;
}
@@ -3021,6 +3804,7 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(live_lrc_layout),
+ SUBTEST(live_lrc_fixed),
SUBTEST(live_lrc_state),
SUBTEST(live_gpr_clear),
};
diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c
new file mode 100644
index 000000000000..de1f83100fb6
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
@@ -0,0 +1,419 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "gt/intel_engine_pm.h"
+#include "i915_selftest.h"
+
+#include "gem/selftests/mock_context.h"
+#include "selftests/igt_reset.h"
+#include "selftests/igt_spinner.h"
+
+struct live_mocs {
+ struct drm_i915_mocs_table table;
+ struct i915_vma *scratch;
+ void *vaddr;
+};
+
+static int request_add_sync(struct i915_request *rq, int err)
+{
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (i915_request_wait(rq, 0, HZ / 5) < 0)
+ err = -ETIME;
+ i915_request_put(rq);
+
+ return err;
+}
+
+static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin)
+{
+ int err = 0;
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (spin && !igt_wait_for_spinner(spin, rq))
+ err = -ETIME;
+ i915_request_put(rq);
+
+ return err;
+}
+
+static struct i915_vma *create_scratch(struct intel_gt *gt)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ int err;
+
+ obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
+
+ vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
+ if (IS_ERR(vma)) {
+ i915_gem_object_put(obj);
+ return vma;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+ if (err) {
+ i915_gem_object_put(obj);
+ return ERR_PTR(err);
+ }
+
+ return vma;
+}
+
+static int live_mocs_init(struct live_mocs *arg, struct intel_gt *gt)
+{
+ int err;
+
+ if (!get_mocs_settings(gt->i915, &arg->table))
+ return -EINVAL;
+
+ arg->scratch = create_scratch(gt);
+ if (IS_ERR(arg->scratch))
+ return PTR_ERR(arg->scratch);
+
+ arg->vaddr = i915_gem_object_pin_map(arg->scratch->obj, I915_MAP_WB);
+ if (IS_ERR(arg->vaddr)) {
+ err = PTR_ERR(arg->vaddr);
+ goto err_scratch;
+ }
+
+ return 0;
+
+err_scratch:
+ i915_vma_unpin_and_release(&arg->scratch, 0);
+ return err;
+}
+
+static void live_mocs_fini(struct live_mocs *arg)
+{
+ i915_vma_unpin_and_release(&arg->scratch, I915_VMA_RELEASE_MAP);
+}
+
+static int read_regs(struct i915_request *rq,
+ u32 addr, unsigned int count,
+ uint32_t *offset)
+{
+ unsigned int i;
+ u32 *cs;
+
+ GEM_BUG_ON(!IS_ALIGNED(*offset, sizeof(u32)));
+
+ cs = intel_ring_begin(rq, 4 * count);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ for (i = 0; i < count; i++) {
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+ *cs++ = addr;
+ *cs++ = *offset;
+ *cs++ = 0;
+
+ addr += sizeof(u32);
+ *offset += sizeof(u32);
+ }
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int read_mocs_table(struct i915_request *rq,
+ const struct drm_i915_mocs_table *table,
+ uint32_t *offset)
+{
+ u32 addr;
+
+ if (HAS_GLOBAL_MOCS_REGISTERS(rq->i915))
+ addr = global_mocs_offset();
+ else
+ addr = mocs_offset(rq->engine);
+
+ return read_regs(rq, addr, table->n_entries, offset);
+}
+
+static int read_l3cc_table(struct i915_request *rq,
+ const struct drm_i915_mocs_table *table,
+ uint32_t *offset)
+{
+ u32 addr = i915_mmio_reg_offset(GEN9_LNCFCMOCS(0));
+
+ return read_regs(rq, addr, (table->n_entries + 1) / 2, offset);
+}
+
+static int check_mocs_table(struct intel_engine_cs *engine,
+ const struct drm_i915_mocs_table *table,
+ uint32_t **vaddr)
+{
+ unsigned int i;
+ u32 expect;
+
+ for_each_mocs(expect, table, i) {
+ if (**vaddr != expect) {
+ pr_err("%s: Invalid MOCS[%d] entry, found %08x, expected %08x\n",
+ engine->name, i, **vaddr, expect);
+ return -EINVAL;
+ }
+ ++*vaddr;
+ }
+
+ return 0;
+}
+
+static bool mcr_range(struct drm_i915_private *i915, u32 offset)
+{
+ /*
+ * Registers in this range are affected by the MCR selector
+ * which only controls CPU initiated MMIO. Routing does not
+ * work for CS access so we cannot verify them on this path.
+ */
+ return INTEL_GEN(i915) >= 8 && offset >= 0xb000 && offset <= 0xb4ff;
+}
+
+static int check_l3cc_table(struct intel_engine_cs *engine,
+ const struct drm_i915_mocs_table *table,
+ uint32_t **vaddr)
+{
+ /* Can we read the MCR range 0xb00 directly? See intel_workarounds! */
+ u32 reg = i915_mmio_reg_offset(GEN9_LNCFCMOCS(0));
+ unsigned int i;
+ u32 expect;
+
+ for_each_l3cc(expect, table, i) {
+ if (!mcr_range(engine->i915, reg) && **vaddr != expect) {
+ pr_err("%s: Invalid L3CC[%d] entry, found %08x, expected %08x\n",
+ engine->name, i, **vaddr, expect);
+ return -EINVAL;
+ }
+ ++*vaddr;
+ reg += 4;
+ }
+
+ return 0;
+}
+
+static int check_mocs_engine(struct live_mocs *arg,
+ struct intel_context *ce)
+{
+ struct i915_vma *vma = arg->scratch;
+ struct i915_request *rq;
+ u32 offset;
+ u32 *vaddr;
+ int err;
+
+ memset32(arg->vaddr, STACK_MAGIC, PAGE_SIZE / sizeof(u32));
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ i915_vma_lock(vma);
+ err = i915_request_await_object(rq, vma->obj, true);
+ if (!err)
+ err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+ i915_vma_unlock(vma);
+
+ /* Read the mocs tables back using SRM */
+ offset = i915_ggtt_offset(vma);
+ if (!err)
+ err = read_mocs_table(rq, &arg->table, &offset);
+ if (!err && ce->engine->class == RENDER_CLASS)
+ err = read_l3cc_table(rq, &arg->table, &offset);
+ offset -= i915_ggtt_offset(vma);
+ GEM_BUG_ON(offset > PAGE_SIZE);
+
+ err = request_add_sync(rq, err);
+ if (err)
+ return err;
+
+ /* Compare the results against the expected tables */
+ vaddr = arg->vaddr;
+ if (!err)
+ err = check_mocs_table(ce->engine, &arg->table, &vaddr);
+ if (!err && ce->engine->class == RENDER_CLASS)
+ err = check_l3cc_table(ce->engine, &arg->table, &vaddr);
+ if (err)
+ return err;
+
+ GEM_BUG_ON(arg->vaddr + offset != vaddr);
+ return 0;
+}
+
+static int live_mocs_kernel(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct live_mocs mocs;
+ int err;
+
+ /* Basic check the system is configured with the expected mocs table */
+
+ err = live_mocs_init(&mocs, gt);
+ if (err)
+ return err;
+
+ for_each_engine(engine, gt, id) {
+ intel_engine_pm_get(engine);
+ err = check_mocs_engine(&mocs, engine->kernel_context);
+ intel_engine_pm_put(engine);
+ if (err)
+ break;
+ }
+
+ live_mocs_fini(&mocs);
+ return err;
+}
+
+static int live_mocs_clean(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct live_mocs mocs;
+ int err;
+
+ /* Every new context should see the same mocs table */
+
+ err = live_mocs_init(&mocs, gt);
+ if (err)
+ return err;
+
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ break;
+ }
+
+ err = check_mocs_engine(&mocs, ce);
+ intel_context_put(ce);
+ if (err)
+ break;
+ }
+
+ live_mocs_fini(&mocs);
+ return err;
+}
+
+static int active_engine_reset(struct intel_context *ce,
+ const char *reason)
+{
+ struct igt_spinner spin;
+ struct i915_request *rq;
+ int err;
+
+ err = igt_spinner_init(&spin, ce->engine->gt);
+ if (err)
+ return err;
+
+ rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
+ if (IS_ERR(rq)) {
+ igt_spinner_fini(&spin);
+ return PTR_ERR(rq);
+ }
+
+ err = request_add_spin(rq, &spin);
+ if (err == 0)
+ err = intel_engine_reset(ce->engine, reason);
+
+ igt_spinner_end(&spin);
+ igt_spinner_fini(&spin);
+
+ return err;
+}
+
+static int __live_mocs_reset(struct live_mocs *mocs,
+ struct intel_context *ce)
+{
+ int err;
+
+ err = intel_engine_reset(ce->engine, "mocs");
+ if (err)
+ return err;
+
+ err = check_mocs_engine(mocs, ce);
+ if (err)
+ return err;
+
+ err = active_engine_reset(ce, "mocs");
+ if (err)
+ return err;
+
+ err = check_mocs_engine(mocs, ce);
+ if (err)
+ return err;
+
+ intel_gt_reset(ce->engine->gt, ce->engine->mask, "mocs");
+
+ err = check_mocs_engine(mocs, ce);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int live_mocs_reset(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct live_mocs mocs;
+ int err = 0;
+
+ /* Check the mocs setup is retained over per-engine and global resets */
+
+ if (!intel_has_reset_engine(gt))
+ return 0;
+
+ err = live_mocs_init(&mocs, gt);
+ if (err)
+ return err;
+
+ igt_global_reset_lock(gt);
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ break;
+ }
+
+ intel_engine_pm_get(engine);
+ err = __live_mocs_reset(&mocs, ce);
+ intel_engine_pm_put(engine);
+
+ intel_context_put(ce);
+ if (err)
+ break;
+ }
+ igt_global_reset_unlock(gt);
+
+ live_mocs_fini(&mocs);
+ return err;
+}
+
+int intel_mocs_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(live_mocs_kernel),
+ SUBTEST(live_mocs_clean),
+ SUBTEST(live_mocs_reset),
+ };
+ struct drm_i915_mocs_table table;
+
+ if (!get_mocs_settings(i915, &table))
+ return 0;
+
+ return intel_gt_live_subtests(tests, &i915->gt);
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c
new file mode 100644
index 000000000000..8cc55a0e9e06
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c
@@ -0,0 +1,203 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "intel_context.h"
+#include "intel_engine_pm.h"
+#include "intel_gt_requests.h"
+#include "intel_ring.h"
+#include "selftest_rc6.h"
+
+#include "selftests/i915_random.h"
+
+int live_rc6_manual(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_rc6 *rc6 = &gt->rc6;
+ intel_wakeref_t wakeref;
+ u64 res[2];
+ int err = 0;
+
+ /*
+ * Our claim is that we can "encourage" the GPU to enter rc6 at will.
+ * Let's try it!
+ */
+
+ if (!rc6->enabled)
+ return 0;
+
+ /* bsw/byt use a PCU and decouple RC6 from our manual control */
+ if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915))
+ return 0;
+
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
+
+ /* Force RC6 off for starters */
+ __intel_rc6_disable(rc6);
+ msleep(1); /* wakeup is not immediate, takes about 100us on icl */
+
+ res[0] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+ msleep(250);
+ res[1] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+ if ((res[1] - res[0]) >> 10) {
+ pr_err("RC6 residency increased by %lldus while disabled for 250ms!\n",
+ (res[1] - res[0]) >> 10);
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ /* Manually enter RC6 */
+ intel_rc6_park(rc6);
+
+ res[0] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+ msleep(100);
+ res[1] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+
+ if (res[1] == res[0]) {
+ pr_err("Did not enter RC6! RC6_STATE=%08x, RC6_CONTROL=%08x\n",
+ intel_uncore_read_fw(gt->uncore, GEN6_RC_STATE),
+ intel_uncore_read_fw(gt->uncore, GEN6_RC_CONTROL));
+ err = -EINVAL;
+ }
+
+ /* Restore what should have been the original state! */
+ intel_rc6_unpark(rc6);
+
+out_unlock:
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+ return err;
+}
+
+static const u32 *__live_rc6_ctx(struct intel_context *ce)
+{
+ struct i915_request *rq;
+ const u32 *result;
+ u32 cmd;
+ u32 *cs;
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ return ERR_CAST(rq);
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs)) {
+ i915_request_add(rq);
+ return cs;
+ }
+
+ cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
+ if (INTEL_GEN(rq->i915) >= 8)
+ cmd++;
+
+ *cs++ = cmd;
+ *cs++ = i915_mmio_reg_offset(GEN8_RC6_CTX_INFO);
+ *cs++ = ce->timeline->hwsp_offset + 8;
+ *cs++ = 0;
+ intel_ring_advance(rq, cs);
+
+ result = rq->hwsp_seqno + 2;
+ i915_request_add(rq);
+
+ return result;
+}
+
+static struct intel_engine_cs **
+randomised_engines(struct intel_gt *gt,
+ struct rnd_state *prng,
+ unsigned int *count)
+{
+ struct intel_engine_cs *engine, **engines;
+ enum intel_engine_id id;
+ int n;
+
+ n = 0;
+ for_each_engine(engine, gt, id)
+ n++;
+ if (!n)
+ return NULL;
+
+ engines = kmalloc_array(n, sizeof(*engines), GFP_KERNEL);
+ if (!engines)
+ return NULL;
+
+ n = 0;
+ for_each_engine(engine, gt, id)
+ engines[n++] = engine;
+
+ i915_prandom_shuffle(engines, sizeof(*engines), n, prng);
+
+ *count = n;
+ return engines;
+}
+
+int live_rc6_ctx_wa(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs **engines;
+ unsigned int n, count;
+ I915_RND_STATE(prng);
+ int err = 0;
+
+ /* A read of CTX_INFO upsets rc6. Poke the bear! */
+ if (INTEL_GEN(gt->i915) < 8)
+ return 0;
+
+ engines = randomised_engines(gt, &prng, &count);
+ if (!engines)
+ return 0;
+
+ for (n = 0; n < count; n++) {
+ struct intel_engine_cs *engine = engines[n];
+ int pass;
+
+ for (pass = 0; pass < 2; pass++) {
+ struct intel_context *ce;
+ unsigned int resets =
+ i915_reset_engine_count(&gt->i915->gpu_error,
+ engine);
+ const u32 *res;
+
+ /* Use a sacrifical context */
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ goto out;
+ }
+
+ intel_engine_pm_get(engine);
+ res = __live_rc6_ctx(ce);
+ intel_engine_pm_put(engine);
+ intel_context_put(ce);
+ if (IS_ERR(res)) {
+ err = PTR_ERR(res);
+ goto out;
+ }
+
+ if (intel_gt_wait_for_idle(gt, HZ / 5) == -ETIME) {
+ intel_gt_set_wedged(gt);
+ err = -ETIME;
+ goto out;
+ }
+
+ intel_gt_pm_wait_for_idle(gt);
+ pr_debug("%s: CTX_INFO=%0x\n",
+ engine->name, READ_ONCE(*res));
+
+ if (resets !=
+ i915_reset_engine_count(&gt->i915->gpu_error,
+ engine)) {
+ pr_err("%s: GPU reset required\n",
+ engine->name);
+ add_taint_for_CI(TAINT_WARN);
+ err = -EIO;
+ goto out;
+ }
+ }
+ }
+
+out:
+ kfree(engines);
+ return err;
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.h b/drivers/gpu/drm/i915/gt/selftest_rc6.h
new file mode 100644
index 000000000000..762fd442d7b2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_rc6.h
@@ -0,0 +1,13 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef SELFTEST_RC6_H
+#define SELFTEST_RC6_H
+
+int live_rc6_ctx_wa(void *arg);
+int live_rc6_manual(void *arg);
+
+#endif /* SELFTEST_RC6_H */
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
index 6efb9221b7fa..6ad6aca315f6 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -126,7 +126,7 @@ static int igt_atomic_engine_reset(void *arg)
goto out_unlock;
for_each_engine(engine, gt, id) {
- tasklet_disable_nosync(&engine->execlists.tasklet);
+ tasklet_disable(&engine->execlists.tasklet);
intel_engine_pm_get(engine);
for (p = igt_atomic_phases; p->name; p++) {
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index dac86f699a4c..e2d78cc22fb4 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -9,6 +9,7 @@
#include "intel_engine_pm.h"
#include "intel_gt.h"
#include "intel_gt_requests.h"
+#include "intel_ring.h"
#include "../selftests/i915_random.h"
#include "../i915_selftest.h"
@@ -457,7 +458,7 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
goto out;
}
- rq = i915_request_create(engine->kernel_context);
+ rq = intel_engine_create_kernel_request(engine);
if (IS_ERR(rq))
goto out_unpin;
@@ -674,9 +675,7 @@ static int live_hwsp_wrap(void *arg)
if (!intel_engine_can_store_dword(engine))
continue;
- intel_engine_pm_get(engine);
- rq = i915_request_create(engine->kernel_context);
- intel_engine_pm_put(engine);
+ rq = intel_engine_create_kernel_request(engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out;
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index ef02920cec29..ac1921854cbf 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -264,22 +264,15 @@ static int
switch_to_scratch_context(struct intel_engine_cs *engine,
struct igt_spinner *spin)
{
- struct i915_gem_context *ctx;
struct intel_context *ce;
struct i915_request *rq;
int err = 0;
- ctx = kernel_context(engine->i915);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- GEM_BUG_ON(i915_gem_context_is_bannable(ctx));
-
- ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
- GEM_BUG_ON(IS_ERR(ce));
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
rq = igt_spinner_create_request(spin, ce, MI_NOOP);
-
intel_context_put(ce);
if (IS_ERR(rq)) {
@@ -293,7 +286,6 @@ err:
if (err && spin)
igt_spinner_end(spin);
- kernel_context_close(ctx);
return err;
}
@@ -367,20 +359,17 @@ out_ctx:
return err;
}
-static struct i915_vma *create_batch(struct i915_gem_context *ctx)
+static struct i915_vma *create_batch(struct i915_address_space *vm)
{
struct drm_i915_gem_object *obj;
- struct i915_address_space *vm;
struct i915_vma *vma;
int err;
- obj = i915_gem_object_create_internal(ctx->i915, 16 * PAGE_SIZE);
+ obj = i915_gem_object_create_internal(vm->i915, 16 * PAGE_SIZE);
if (IS_ERR(obj))
return ERR_CAST(obj);
- vm = i915_gem_context_get_vm_rcu(ctx);
vma = i915_vma_instance(obj, vm, NULL);
- i915_vm_put(vm);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err_obj;
@@ -452,8 +441,7 @@ static int whitelist_writable_count(struct intel_engine_cs *engine)
return count;
}
-static int check_dirty_whitelist(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine)
+static int check_dirty_whitelist(struct intel_context *ce)
{
const u32 values[] = {
0x00000000,
@@ -481,19 +469,17 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
0xffff00ff,
0xffffffff,
};
- struct i915_address_space *vm;
+ struct intel_engine_cs *engine = ce->engine;
struct i915_vma *scratch;
struct i915_vma *batch;
int err = 0, i, v;
u32 *cs, *results;
- vm = i915_gem_context_get_vm_rcu(ctx);
- scratch = create_scratch(vm, 2 * ARRAY_SIZE(values) + 1);
- i915_vm_put(vm);
+ scratch = create_scratch(ce->vm, 2 * ARRAY_SIZE(values) + 1);
if (IS_ERR(scratch))
return PTR_ERR(scratch);
- batch = create_batch(ctx);
+ batch = create_batch(ce->vm);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
goto out_scratch;
@@ -513,9 +499,12 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
ro_reg = ro_register(reg);
+ /* Clear non priv flags */
+ reg &= RING_FORCE_TO_NONPRIV_ADDRESS_MASK;
+
srm = MI_STORE_REGISTER_MEM;
lrm = MI_LOAD_REGISTER_MEM;
- if (INTEL_GEN(ctx->i915) >= 8)
+ if (INTEL_GEN(engine->i915) >= 8)
lrm++, srm++;
pr_debug("%s: Writing garbage to %x\n",
@@ -574,7 +563,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
i915_gem_object_unpin_map(batch->obj);
intel_gt_chipset_flush(engine->gt);
- rq = igt_request_alloc(ctx, engine);
+ rq = intel_context_create_request(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out_batch;
@@ -693,7 +682,7 @@ out_unpin:
break;
}
- if (igt_flush_test(ctx->i915))
+ if (igt_flush_test(engine->i915))
err = -EIO;
out_batch:
i915_vma_unpin_and_release(&batch, 0);
@@ -706,38 +695,31 @@ static int live_dirty_whitelist(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
- struct i915_gem_context *ctx;
enum intel_engine_id id;
- struct drm_file *file;
- int err = 0;
/* Can the user write to the whitelisted registers? */
if (INTEL_GEN(gt->i915) < 7) /* minimum requirement for LRI, SRM, LRM */
return 0;
- file = mock_file(gt->i915);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
- ctx = live_context(gt->i915, file);
- if (IS_ERR(ctx)) {
- err = PTR_ERR(ctx);
- goto out_file;
- }
-
for_each_engine(engine, gt, id) {
+ struct intel_context *ce;
+ int err;
+
if (engine->whitelist.count == 0)
continue;
- err = check_dirty_whitelist(ctx, engine);
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ err = check_dirty_whitelist(ce);
+ intel_context_put(ce);
if (err)
- goto out_file;
+ return err;
}
-out_file:
- mock_file_free(gt->i915, file);
- return err;
+ return 0;
}
static int live_reset_whitelist(void *arg)
@@ -810,8 +792,8 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx,
u64 offset = results->node.start + sizeof(u32) * i;
u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg);
- /* Clear access permission field */
- reg &= ~RING_FORCE_TO_NONPRIV_ACCESS_MASK;
+ /* Clear non priv flags */
+ reg &= RING_FORCE_TO_NONPRIV_ADDRESS_MASK;
*cs++ = srm;
*cs++ = reg;
@@ -827,12 +809,15 @@ err_req:
static int scrub_whitelisted_registers(struct i915_gem_context *ctx,
struct intel_engine_cs *engine)
{
+ struct i915_address_space *vm;
struct i915_request *rq;
struct i915_vma *batch;
int i, err = 0;
u32 *cs;
- batch = create_batch(ctx);
+ vm = i915_gem_context_get_vm_rcu(ctx);
+ batch = create_batch(vm);
+ i915_vm_put(vm);
if (IS_ERR(batch))
return PTR_ERR(batch);
@@ -849,6 +834,9 @@ static int scrub_whitelisted_registers(struct i915_gem_context *ctx,
if (ro_register(reg))
continue;
+ /* Clear non priv flags */
+ reg &= RING_FORCE_TO_NONPRIV_ADDRESS_MASK;
+
*cs++ = reg;
*cs++ = 0xffffffff;
}
diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
index 2a77c051f36a..aeb1d1f616e8 100644
--- a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
@@ -15,7 +15,7 @@ void mock_timeline_init(struct intel_timeline *timeline, u64 context)
mutex_init(&timeline->mutex);
- INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex);
+ INIT_ACTIVE_FENCE(&timeline->last_request);
INIT_LIST_HEAD(&timeline->requests);
i915_syncmap_init(&timeline->sync);
diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h
index 689efc66c908..d2bcc3df6183 100644
--- a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h
+++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h
@@ -7,6 +7,8 @@
#ifndef __MOCK_TIMELINE__
#define __MOCK_TIMELINE__
+#include <linux/types.h>
+
struct intel_timeline;
void mock_timeline_init(struct intel_timeline *timeline, u64 context);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 37f7bcbf7dac..5d00a3b2d914 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -4,6 +4,8 @@
*/
#include "gt/intel_gt.h"
+#include "gt/intel_gt_irq.h"
+#include "gt/intel_gt_pm_irq.h"
#include "intel_guc.h"
#include "intel_guc_ads.h"
#include "intel_guc_submission.h"
@@ -30,18 +32,17 @@
* just the HuC, but more are expected to land in the future).
*/
-static void gen8_guc_raise_irq(struct intel_guc *guc)
+void intel_guc_notify(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
- intel_uncore_write(gt->uncore, GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER);
-}
-
-static void gen11_guc_raise_irq(struct intel_guc *guc)
-{
- struct intel_gt *gt = guc_to_gt(guc);
-
- intel_uncore_write(gt->uncore, GEN11_GUC_HOST_INTERRUPT, 0);
+ /*
+ * On Gen11+, the value written to the register is passes as a payload
+ * to the FW. However, the FW currently treats all values the same way
+ * (H2G interrupt), so we can just write the value that the HW expects
+ * on older gens.
+ */
+ intel_uncore_write(gt->uncore, guc->notify_reg, GUC_SEND_TRIGGER);
}
static inline i915_reg_t guc_send_reg(struct intel_guc *guc, u32 i)
@@ -77,6 +78,93 @@ void intel_guc_init_send_regs(struct intel_guc *guc)
guc->send_regs.fw_domains = fw_domains;
}
+static void gen9_reset_guc_interrupts(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+
+ assert_rpm_wakelock_held(&gt->i915->runtime_pm);
+
+ spin_lock_irq(&gt->irq_lock);
+ gen6_gt_pm_reset_iir(gt, gt->pm_guc_events);
+ spin_unlock_irq(&gt->irq_lock);
+}
+
+static void gen9_enable_guc_interrupts(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+
+ assert_rpm_wakelock_held(&gt->i915->runtime_pm);
+
+ spin_lock_irq(&gt->irq_lock);
+ if (!guc->interrupts.enabled) {
+ WARN_ON_ONCE(intel_uncore_read(gt->uncore, GEN8_GT_IIR(2)) &
+ gt->pm_guc_events);
+ guc->interrupts.enabled = true;
+ gen6_gt_pm_enable_irq(gt, gt->pm_guc_events);
+ }
+ spin_unlock_irq(&gt->irq_lock);
+}
+
+static void gen9_disable_guc_interrupts(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+
+ assert_rpm_wakelock_held(&gt->i915->runtime_pm);
+
+ spin_lock_irq(&gt->irq_lock);
+ guc->interrupts.enabled = false;
+
+ gen6_gt_pm_disable_irq(gt, gt->pm_guc_events);
+
+ spin_unlock_irq(&gt->irq_lock);
+ intel_synchronize_irq(gt->i915);
+
+ gen9_reset_guc_interrupts(guc);
+}
+
+static void gen11_reset_guc_interrupts(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+
+ spin_lock_irq(&gt->irq_lock);
+ gen11_gt_reset_one_iir(gt, 0, GEN11_GUC);
+ spin_unlock_irq(&gt->irq_lock);
+}
+
+static void gen11_enable_guc_interrupts(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+
+ spin_lock_irq(&gt->irq_lock);
+ if (!guc->interrupts.enabled) {
+ u32 events = REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST);
+
+ WARN_ON_ONCE(gen11_gt_reset_one_iir(gt, 0, GEN11_GUC));
+ intel_uncore_write(gt->uncore,
+ GEN11_GUC_SG_INTR_ENABLE, events);
+ intel_uncore_write(gt->uncore,
+ GEN11_GUC_SG_INTR_MASK, ~events);
+ guc->interrupts.enabled = true;
+ }
+ spin_unlock_irq(&gt->irq_lock);
+}
+
+static void gen11_disable_guc_interrupts(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+
+ spin_lock_irq(&gt->irq_lock);
+ guc->interrupts.enabled = false;
+
+ intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_MASK, ~0);
+ intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_ENABLE, 0);
+
+ spin_unlock_irq(&gt->irq_lock);
+ intel_synchronize_irq(gt->i915);
+
+ gen11_reset_guc_interrupts(guc);
+}
+
void intel_guc_init_early(struct intel_guc *guc)
{
struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
@@ -88,47 +176,19 @@ void intel_guc_init_early(struct intel_guc *guc)
mutex_init(&guc->send_mutex);
spin_lock_init(&guc->irq_lock);
- guc->send = intel_guc_send_nop;
- guc->handler = intel_guc_to_host_event_handler_nop;
if (INTEL_GEN(i915) >= 11) {
- guc->notify = gen11_guc_raise_irq;
+ guc->notify_reg = GEN11_GUC_HOST_INTERRUPT;
guc->interrupts.reset = gen11_reset_guc_interrupts;
guc->interrupts.enable = gen11_enable_guc_interrupts;
guc->interrupts.disable = gen11_disable_guc_interrupts;
} else {
- guc->notify = gen8_guc_raise_irq;
+ guc->notify_reg = GUC_SEND_INTERRUPT;
guc->interrupts.reset = gen9_reset_guc_interrupts;
guc->interrupts.enable = gen9_enable_guc_interrupts;
guc->interrupts.disable = gen9_disable_guc_interrupts;
}
}
-static int guc_shared_data_create(struct intel_guc *guc)
-{
- struct i915_vma *vma;
- void *vaddr;
-
- vma = intel_guc_allocate_vma(guc, PAGE_SIZE);
- if (IS_ERR(vma))
- return PTR_ERR(vma);
-
- vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
- if (IS_ERR(vaddr)) {
- i915_vma_unpin_and_release(&vma, 0);
- return PTR_ERR(vaddr);
- }
-
- guc->shared_data = vma;
- guc->shared_data_vaddr = vaddr;
-
- return 0;
-}
-
-static void guc_shared_data_destroy(struct intel_guc *guc)
-{
- i915_vma_unpin_and_release(&guc->shared_data, I915_VMA_RELEASE_MAP);
-}
-
static u32 guc_ctl_debug_flags(struct intel_guc *guc)
{
u32 level = intel_guc_log_get_level(&guc->log);
@@ -275,14 +335,9 @@ int intel_guc_init(struct intel_guc *guc)
if (ret)
goto err_fetch;
- ret = guc_shared_data_create(guc);
- if (ret)
- goto err_fw;
- GEM_BUG_ON(!guc->shared_data);
-
ret = intel_guc_log_create(&guc->log);
if (ret)
- goto err_shared;
+ goto err_fw;
ret = intel_guc_ads_create(guc);
if (ret)
@@ -317,8 +372,6 @@ err_ads:
intel_guc_ads_destroy(guc);
err_log:
intel_guc_log_destroy(&guc->log);
-err_shared:
- guc_shared_data_destroy(guc);
err_fw:
intel_uc_fw_fini(&guc->fw);
err_fetch:
@@ -343,21 +396,10 @@ void intel_guc_fini(struct intel_guc *guc)
intel_guc_ads_destroy(guc);
intel_guc_log_destroy(&guc->log);
- guc_shared_data_destroy(guc);
intel_uc_fw_fini(&guc->fw);
intel_uc_fw_cleanup_fetch(&guc->fw);
-}
-
-int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len,
- u32 *response_buf, u32 response_buf_size)
-{
- WARN(1, "Unexpected send: action=%#x\n", *action);
- return -ENODEV;
-}
-void intel_guc_to_host_event_handler_nop(struct intel_guc *guc)
-{
- WARN(1, "Unexpected event: no suitable handler\n");
+ intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_DISABLED);
}
/*
@@ -499,6 +541,13 @@ int intel_guc_suspend(struct intel_guc *guc)
};
/*
+ * If GuC communication is enabled but submission is not supported,
+ * we do not need to suspend the GuC.
+ */
+ if (!intel_guc_submission_is_enabled(guc))
+ return 0;
+
+ /*
* The ENTER_S_STATE action queues the save/restore operation in GuC FW
* and then returns, so waiting on the H2G is not enough to guarantee
* GuC is done. When all the processing is done, GuC writes
@@ -539,19 +588,9 @@ int intel_guc_suspend(struct intel_guc *guc)
int intel_guc_reset_engine(struct intel_guc *guc,
struct intel_engine_cs *engine)
{
- u32 data[7];
-
- GEM_BUG_ON(!guc->execbuf_client);
-
- data[0] = INTEL_GUC_ACTION_REQUEST_ENGINE_RESET;
- data[1] = engine->guc_id;
- data[2] = 0;
- data[3] = 0;
- data[4] = 0;
- data[5] = guc->execbuf_client->stage_id;
- data[6] = intel_guc_ggtt_offset(guc, guc->shared_data);
+ /* XXX: to be implemented with submission interface rework */
- return intel_guc_send(guc, data, ARRAY_SIZE(data));
+ return -ENODEV;
}
/**
@@ -565,6 +604,14 @@ int intel_guc_resume(struct intel_guc *guc)
GUC_POWER_D0,
};
+ /*
+ * If GuC communication is enabled but submission is not supported,
+ * we do not need to resume the GuC but we do need to enable the
+ * GuC communication on resume (above).
+ */
+ if (!intel_guc_submission_is_enabled(guc))
+ return 0;
+
return intel_guc_send(guc, action, ARRAY_SIZE(action));
}
@@ -644,3 +691,37 @@ err:
i915_gem_object_put(obj);
return vma;
}
+
+/**
+ * intel_guc_allocate_and_map_vma() - Allocate and map VMA for GuC usage
+ * @guc: the guc
+ * @size: size of area to allocate (both virtual space and memory)
+ * @out_vma: return variable for the allocated vma pointer
+ * @out_vaddr: return variable for the obj mapping
+ *
+ * This wrapper calls intel_guc_allocate_vma() and then maps the allocated
+ * object with I915_MAP_WB.
+ *
+ * Return: 0 if successful, a negative errno code otherwise.
+ */
+int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size,
+ struct i915_vma **out_vma, void **out_vaddr)
+{
+ struct i915_vma *vma;
+ void *vaddr;
+
+ vma = intel_guc_allocate_vma(guc, size);
+ if (IS_ERR(vma))
+ return PTR_ERR(vma);
+
+ vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
+ if (IS_ERR(vaddr)) {
+ i915_vma_unpin_and_release(&vma, 0);
+ return PTR_ERR(vaddr);
+ }
+
+ *out_vma = vma;
+ *out_vaddr = vaddr;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 2b2f046d3cc3..910d49590068 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -20,8 +20,8 @@ struct __guc_ads_blob;
/*
* Top level structure of GuC. It handles firmware loading and manages client
- * pool and doorbells. intel_guc owns a intel_guc_client to replace the legacy
- * ExecList submission.
+ * pool. intel_guc owns a intel_guc_client to replace the legacy ExecList
+ * submission.
*/
struct intel_guc {
struct intel_uc_fw fw;
@@ -46,15 +46,13 @@ struct intel_guc {
struct i915_vma *stage_desc_pool;
void *stage_desc_pool_vaddr;
- struct ida stage_ids;
- struct i915_vma *shared_data;
- void *shared_data_vaddr;
- struct intel_guc_client *execbuf_client;
+ struct i915_vma *workqueue;
+ void *workqueue_vaddr;
+ spinlock_t wq_lock;
- DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS);
- /* Cyclic counter mod pagesize */
- u32 db_cacheline;
+ struct i915_vma *proc_desc;
+ void *proc_desc_vaddr;
/* Control params for fw initialization */
u32 params[GUC_CTL_MAX_DWORDS];
@@ -66,44 +64,33 @@ struct intel_guc {
enum forcewake_domains fw_domains;
} send_regs;
+ /* register used to send interrupts to the GuC FW */
+ i915_reg_t notify_reg;
+
/* Store msg (e.g. log flush) that we see while CTBs are disabled */
u32 mmio_msg;
/* To serialize the intel_guc_send actions */
struct mutex send_mutex;
-
- /* GuC's FW specific send function */
- int (*send)(struct intel_guc *guc, const u32 *data, u32 len,
- u32 *response_buf, u32 response_buf_size);
-
- /* GuC's FW specific event handler function */
- void (*handler)(struct intel_guc *guc);
-
- /* GuC's FW specific notify function */
- void (*notify)(struct intel_guc *guc);
};
static
inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len)
{
- return guc->send(guc, action, len, NULL, 0);
+ return intel_guc_ct_send(&guc->ct, action, len, NULL, 0);
}
static inline int
intel_guc_send_and_receive(struct intel_guc *guc, const u32 *action, u32 len,
u32 *response_buf, u32 response_buf_size)
{
- return guc->send(guc, action, len, response_buf, response_buf_size);
-}
-
-static inline void intel_guc_notify(struct intel_guc *guc)
-{
- guc->notify(guc);
+ return intel_guc_ct_send(&guc->ct, action, len,
+ response_buf, response_buf_size);
}
static inline void intel_guc_to_host_event_handler(struct intel_guc *guc)
{
- guc->handler(guc);
+ intel_guc_ct_event_handler(&guc->ct);
}
/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */
@@ -138,12 +125,9 @@ void intel_guc_init_send_regs(struct intel_guc *guc);
void intel_guc_write_params(struct intel_guc *guc);
int intel_guc_init(struct intel_guc *guc);
void intel_guc_fini(struct intel_guc *guc);
-int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len,
- u32 *response_buf, u32 response_buf_size);
+void intel_guc_notify(struct intel_guc *guc);
int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len,
u32 *response_buf, u32 response_buf_size);
-void intel_guc_to_host_event_handler(struct intel_guc *guc);
-void intel_guc_to_host_event_handler_nop(struct intel_guc *guc);
int intel_guc_to_host_process_recv_msg(struct intel_guc *guc,
const u32 *payload, u32 len);
int intel_guc_sample_forcewake(struct intel_guc *guc);
@@ -151,6 +135,8 @@ int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset);
int intel_guc_suspend(struct intel_guc *guc);
int intel_guc_resume(struct intel_guc *guc);
struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size);
+int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size,
+ struct i915_vma **out_vma, void **out_vaddr);
static inline bool intel_guc_is_supported(struct intel_guc *guc)
{
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index ca6674b8e00c..101728006ae9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -93,7 +93,8 @@ static void __guc_ads_init(struct intel_guc *guc)
*/
blob->ads.golden_context_lrca[engine_class] = 0;
blob->ads.eng_state_size[engine_class] =
- intel_engine_context_size(dev_priv, engine_class) -
+ intel_engine_context_size(guc_to_gt(guc),
+ engine_class) -
skipped_size;
}
@@ -135,32 +136,19 @@ static void __guc_ads_init(struct intel_guc *guc)
int intel_guc_ads_create(struct intel_guc *guc)
{
const u32 size = PAGE_ALIGN(sizeof(struct __guc_ads_blob));
- struct i915_vma *vma;
- void *blob;
int ret;
GEM_BUG_ON(guc->ads_vma);
- vma = intel_guc_allocate_vma(guc, size);
- if (IS_ERR(vma))
- return PTR_ERR(vma);
+ ret = intel_guc_allocate_and_map_vma(guc, size, &guc->ads_vma,
+ (void **)&guc->ads_blob);
- blob = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
- if (IS_ERR(blob)) {
- ret = PTR_ERR(blob);
- goto err_vma;
- }
-
- guc->ads_vma = vma;
- guc->ads_blob = blob;
+ if (ret)
+ return ret;
__guc_ads_init(guc);
return 0;
-
-err_vma:
- i915_vma_unpin_and_release(&guc->ads_vma, 0);
- return ret;
}
void intel_guc_ads_destroy(struct intel_guc *guc)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index b49115517510..c6f971a049f9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -37,13 +37,10 @@ static void ct_incoming_request_worker_func(struct work_struct *w);
*/
void intel_guc_ct_init_early(struct intel_guc_ct *ct)
{
- /* we're using static channel owners */
- ct->host_channel.owner = CTB_OWNER_HOST;
-
- spin_lock_init(&ct->lock);
- INIT_LIST_HEAD(&ct->pending_requests);
- INIT_LIST_HEAD(&ct->incoming_requests);
- INIT_WORK(&ct->worker, ct_incoming_request_worker_func);
+ spin_lock_init(&ct->requests.lock);
+ INIT_LIST_HEAD(&ct->requests.pending);
+ INIT_LIST_HEAD(&ct->requests.incoming);
+ INIT_WORK(&ct->requests.worker, ct_incoming_request_worker_func);
}
static inline struct intel_guc *ct_to_guc(struct intel_guc_ct *ct)
@@ -64,14 +61,13 @@ static inline const char *guc_ct_buffer_type_to_str(u32 type)
}
static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc,
- u32 cmds_addr, u32 size, u32 owner)
+ u32 cmds_addr, u32 size)
{
- CT_DEBUG_DRIVER("CT: desc %p init addr=%#x size=%u owner=%u\n",
- desc, cmds_addr, size, owner);
+ CT_DEBUG_DRIVER("CT: init addr=%#x size=%u\n", cmds_addr, size);
memset(desc, 0, sizeof(*desc));
desc->addr = cmds_addr;
desc->size = size;
- desc->owner = owner;
+ desc->owner = CTB_OWNER_HOST;
}
static void guc_ct_buffer_desc_reset(struct guc_ct_buffer_desc *desc)
@@ -104,12 +100,11 @@ static int guc_action_register_ct_buffer(struct intel_guc *guc,
}
static int guc_action_deregister_ct_buffer(struct intel_guc *guc,
- u32 owner,
u32 type)
{
u32 action[] = {
INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER,
- owner,
+ CTB_OWNER_HOST,
type
};
int err;
@@ -117,20 +112,27 @@ static int guc_action_deregister_ct_buffer(struct intel_guc *guc,
/* Can't use generic send(), CT deregistration must go over MMIO */
err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0);
if (err)
- DRM_ERROR("CT: deregister %s buffer failed; owner=%d err=%d\n",
- guc_ct_buffer_type_to_str(type), owner, err);
+ DRM_ERROR("CT: deregister %s buffer failed; err=%d\n",
+ guc_ct_buffer_type_to_str(type), err);
return err;
}
-static int ctch_init(struct intel_guc *guc,
- struct intel_guc_ct_channel *ctch)
+/**
+ * intel_guc_ct_init - Init buffer-based communication
+ * @ct: pointer to CT struct
+ *
+ * Allocate memory required for buffer-based communication.
+ *
+ * Return: 0 on success, a negative errno code on failure.
+ */
+int intel_guc_ct_init(struct intel_guc_ct *ct)
{
- struct i915_vma *vma;
+ struct intel_guc *guc = ct_to_guc(ct);
void *blob;
int err;
int i;
- GEM_BUG_ON(ctch->vma);
+ GEM_BUG_ON(ct->vma);
/* We allocate 1 page to hold both descriptors and both buffers.
* ___________.....................
@@ -154,71 +156,65 @@ static int ctch_init(struct intel_guc *guc,
* other code will need updating as well.
*/
- /* allocate vma */
- vma = intel_guc_allocate_vma(guc, PAGE_SIZE);
- if (IS_ERR(vma)) {
- err = PTR_ERR(vma);
- goto err_out;
+ err = intel_guc_allocate_and_map_vma(guc, PAGE_SIZE, &ct->vma, &blob);
+ if (err) {
+ DRM_ERROR("CT: channel allocation failed; err=%d\n", err);
+ return err;
}
- ctch->vma = vma;
- /* map first page */
- blob = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
- if (IS_ERR(blob)) {
- err = PTR_ERR(blob);
- goto err_vma;
- }
CT_DEBUG_DRIVER("CT: vma base=%#x\n",
- intel_guc_ggtt_offset(guc, ctch->vma));
+ intel_guc_ggtt_offset(guc, ct->vma));
/* store pointers to desc and cmds */
- for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) {
- GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV));
- ctch->ctbs[i].desc = blob + PAGE_SIZE/4 * i;
- ctch->ctbs[i].cmds = blob + PAGE_SIZE/4 * i + PAGE_SIZE/2;
+ for (i = 0; i < ARRAY_SIZE(ct->ctbs); i++) {
+ GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV));
+ ct->ctbs[i].desc = blob + PAGE_SIZE/4 * i;
+ ct->ctbs[i].cmds = blob + PAGE_SIZE/4 * i + PAGE_SIZE/2;
}
return 0;
-
-err_vma:
- i915_vma_unpin_and_release(&ctch->vma, 0);
-err_out:
- CT_DEBUG_DRIVER("CT: channel %d initialization failed; err=%d\n",
- ctch->owner, err);
- return err;
}
-static void ctch_fini(struct intel_guc *guc,
- struct intel_guc_ct_channel *ctch)
+/**
+ * intel_guc_ct_fini - Fini buffer-based communication
+ * @ct: pointer to CT struct
+ *
+ * Deallocate memory required for buffer-based communication.
+ */
+void intel_guc_ct_fini(struct intel_guc_ct *ct)
{
- GEM_BUG_ON(ctch->enabled);
+ GEM_BUG_ON(ct->enabled);
- i915_vma_unpin_and_release(&ctch->vma, I915_VMA_RELEASE_MAP);
+ i915_vma_unpin_and_release(&ct->vma, I915_VMA_RELEASE_MAP);
}
-static int ctch_enable(struct intel_guc *guc,
- struct intel_guc_ct_channel *ctch)
+/**
+ * intel_guc_ct_enable - Enable buffer based command transport.
+ * @ct: pointer to CT struct
+ *
+ * Return: 0 on success, a negative errno code on failure.
+ */
+int intel_guc_ct_enable(struct intel_guc_ct *ct)
{
+ struct intel_guc *guc = ct_to_guc(ct);
u32 base;
int err;
int i;
- GEM_BUG_ON(!ctch->vma);
-
- GEM_BUG_ON(ctch->enabled);
+ GEM_BUG_ON(ct->enabled);
/* vma should be already allocated and map'ed */
- base = intel_guc_ggtt_offset(guc, ctch->vma);
+ GEM_BUG_ON(!ct->vma);
+ base = intel_guc_ggtt_offset(guc, ct->vma);
/* (re)initialize descriptors
* cmds buffers are in the second half of the blob page
*/
- for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) {
+ for (i = 0; i < ARRAY_SIZE(ct->ctbs); i++) {
GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV));
- guc_ct_buffer_desc_init(ctch->ctbs[i].desc,
+ guc_ct_buffer_desc_init(ct->ctbs[i].desc,
base + PAGE_SIZE/4 * i + PAGE_SIZE/2,
- PAGE_SIZE/4,
- ctch->owner);
+ PAGE_SIZE/4);
}
/* register buffers, starting wirh RECV buffer
@@ -236,38 +232,42 @@ static int ctch_enable(struct intel_guc *guc,
if (unlikely(err))
goto err_deregister;
- ctch->enabled = true;
+ ct->enabled = true;
return 0;
err_deregister:
guc_action_deregister_ct_buffer(guc,
- ctch->owner,
INTEL_GUC_CT_BUFFER_TYPE_RECV);
err_out:
- DRM_ERROR("CT: can't open channel %d; err=%d\n", ctch->owner, err);
+ DRM_ERROR("CT: can't open channel; err=%d\n", err);
return err;
}
-static void ctch_disable(struct intel_guc *guc,
- struct intel_guc_ct_channel *ctch)
+/**
+ * intel_guc_ct_disable - Disable buffer based command transport.
+ * @ct: pointer to CT struct
+ */
+void intel_guc_ct_disable(struct intel_guc_ct *ct)
{
- GEM_BUG_ON(!ctch->enabled);
+ struct intel_guc *guc = ct_to_guc(ct);
- ctch->enabled = false;
+ GEM_BUG_ON(!ct->enabled);
- guc_action_deregister_ct_buffer(guc,
- ctch->owner,
- INTEL_GUC_CT_BUFFER_TYPE_SEND);
- guc_action_deregister_ct_buffer(guc,
- ctch->owner,
- INTEL_GUC_CT_BUFFER_TYPE_RECV);
+ ct->enabled = false;
+
+ if (intel_guc_is_running(guc)) {
+ guc_action_deregister_ct_buffer(guc,
+ INTEL_GUC_CT_BUFFER_TYPE_SEND);
+ guc_action_deregister_ct_buffer(guc,
+ INTEL_GUC_CT_BUFFER_TYPE_RECV);
+ }
}
-static u32 ctch_get_next_fence(struct intel_guc_ct_channel *ctch)
+static u32 ct_get_next_fence(struct intel_guc_ct *ct)
{
/* For now it's trivial */
- return ++ctch->next_fence;
+ return ++ct->requests.next_fence;
}
/**
@@ -440,35 +440,34 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status)
return err;
}
-static int ctch_send(struct intel_guc_ct *ct,
- struct intel_guc_ct_channel *ctch,
- const u32 *action,
- u32 len,
- u32 *response_buf,
- u32 response_buf_size,
- u32 *status)
+static int ct_send(struct intel_guc_ct *ct,
+ const u32 *action,
+ u32 len,
+ u32 *response_buf,
+ u32 response_buf_size,
+ u32 *status)
{
- struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_SEND];
+ struct intel_guc_ct_buffer *ctb = &ct->ctbs[CTB_SEND];
struct guc_ct_buffer_desc *desc = ctb->desc;
struct ct_request request;
unsigned long flags;
u32 fence;
int err;
- GEM_BUG_ON(!ctch->enabled);
+ GEM_BUG_ON(!ct->enabled);
GEM_BUG_ON(!len);
GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK);
GEM_BUG_ON(!response_buf && response_buf_size);
- fence = ctch_get_next_fence(ctch);
+ fence = ct_get_next_fence(ct);
request.fence = fence;
request.status = 0;
request.response_len = response_buf_size;
request.response_buf = response_buf;
- spin_lock_irqsave(&ct->lock, flags);
- list_add_tail(&request.link, &ct->pending_requests);
- spin_unlock_irqrestore(&ct->lock, flags);
+ spin_lock_irqsave(&ct->requests.lock, flags);
+ list_add_tail(&request.link, &ct->requests.pending);
+ spin_unlock_irqrestore(&ct->requests.lock, flags);
err = ctb_write(ctb, action, len, fence, !!response_buf);
if (unlikely(err))
@@ -501,9 +500,9 @@ static int ctch_send(struct intel_guc_ct *ct,
}
unlink:
- spin_lock_irqsave(&ct->lock, flags);
+ spin_lock_irqsave(&ct->requests.lock, flags);
list_del(&request.link);
- spin_unlock_irqrestore(&ct->lock, flags);
+ spin_unlock_irqrestore(&ct->requests.lock, flags);
return err;
}
@@ -511,18 +510,21 @@ unlink:
/*
* Command Transport (CT) buffer based GuC send function.
*/
-int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32 len,
+int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len,
u32 *response_buf, u32 response_buf_size)
{
- struct intel_guc_ct *ct = &guc->ct;
- struct intel_guc_ct_channel *ctch = &ct->host_channel;
+ struct intel_guc *guc = ct_to_guc(ct);
u32 status = ~0; /* undefined */
int ret;
+ if (unlikely(!ct->enabled)) {
+ WARN(1, "Unexpected send: action=%#x\n", *action);
+ return -ENODEV;
+ }
+
mutex_lock(&guc->send_mutex);
- ret = ctch_send(ct, ctch, action, len, response_buf, response_buf_size,
- &status);
+ ret = ct_send(ct, action, len, response_buf, response_buf_size, &status);
if (unlikely(ret < 0)) {
DRM_ERROR("CT: send action %#X failed; err=%d status=%#X\n",
action[0], ret, status);
@@ -653,8 +655,8 @@ static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg)
CT_DEBUG_DRIVER("CT: response fence %u status %#x\n", fence, status);
- spin_lock(&ct->lock);
- list_for_each_entry(req, &ct->pending_requests, link) {
+ spin_lock(&ct->requests.lock);
+ list_for_each_entry(req, &ct->requests.pending, link) {
if (unlikely(fence != req->fence)) {
CT_DEBUG_DRIVER("CT: request %u awaits response\n",
req->fence);
@@ -672,7 +674,7 @@ static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg)
found = true;
break;
}
- spin_unlock(&ct->lock);
+ spin_unlock(&ct->requests.lock);
if (!found)
DRM_ERROR("CT: unsolicited response %*ph\n", 4 * msglen, msg);
@@ -710,13 +712,13 @@ static bool ct_process_incoming_requests(struct intel_guc_ct *ct)
u32 *payload;
bool done;
- spin_lock_irqsave(&ct->lock, flags);
- request = list_first_entry_or_null(&ct->incoming_requests,
+ spin_lock_irqsave(&ct->requests.lock, flags);
+ request = list_first_entry_or_null(&ct->requests.incoming,
struct ct_incoming_request, link);
if (request)
list_del(&request->link);
- done = !!list_empty(&ct->incoming_requests);
- spin_unlock_irqrestore(&ct->lock, flags);
+ done = !!list_empty(&ct->requests.incoming);
+ spin_unlock_irqrestore(&ct->requests.lock, flags);
if (!request)
return true;
@@ -734,12 +736,13 @@ static bool ct_process_incoming_requests(struct intel_guc_ct *ct)
static void ct_incoming_request_worker_func(struct work_struct *w)
{
- struct intel_guc_ct *ct = container_of(w, struct intel_guc_ct, worker);
+ struct intel_guc_ct *ct =
+ container_of(w, struct intel_guc_ct, requests.worker);
bool done;
done = ct_process_incoming_requests(ct);
if (!done)
- queue_work(system_unbound_wq, &ct->worker);
+ queue_work(system_unbound_wq, &ct->requests.worker);
}
/**
@@ -777,23 +780,28 @@ static int ct_handle_request(struct intel_guc_ct *ct, const u32 *msg)
}
memcpy(request->msg, msg, 4 * msglen);
- spin_lock_irqsave(&ct->lock, flags);
- list_add_tail(&request->link, &ct->incoming_requests);
- spin_unlock_irqrestore(&ct->lock, flags);
+ spin_lock_irqsave(&ct->requests.lock, flags);
+ list_add_tail(&request->link, &ct->requests.incoming);
+ spin_unlock_irqrestore(&ct->requests.lock, flags);
- queue_work(system_unbound_wq, &ct->worker);
+ queue_work(system_unbound_wq, &ct->requests.worker);
return 0;
}
-static void ct_process_host_channel(struct intel_guc_ct *ct)
+/*
+ * When we're communicating with the GuC over CT, GuC uses events
+ * to notify us about new messages being posted on the RECV buffer.
+ */
+void intel_guc_ct_event_handler(struct intel_guc_ct *ct)
{
- struct intel_guc_ct_channel *ctch = &ct->host_channel;
- struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_RECV];
+ struct intel_guc_ct_buffer *ctb = &ct->ctbs[CTB_RECV];
u32 msg[GUC_CT_MSG_LEN_MASK + 1]; /* one extra dw for the header */
int err = 0;
- if (!ctch->enabled)
+ if (unlikely(!ct->enabled)) {
+ WARN(1, "Unexpected GuC event received while CT disabled!\n");
return;
+ }
do {
err = ctb_read(ctb, msg);
@@ -812,86 +820,3 @@ static void ct_process_host_channel(struct intel_guc_ct *ct)
}
}
-/*
- * When we're communicating with the GuC over CT, GuC uses events
- * to notify us about new messages being posted on the RECV buffer.
- */
-void intel_guc_to_host_event_handler_ct(struct intel_guc *guc)
-{
- struct intel_guc_ct *ct = &guc->ct;
-
- ct_process_host_channel(ct);
-}
-
-/**
- * intel_guc_ct_init - Init CT communication
- * @ct: pointer to CT struct
- *
- * Allocate memory required for communication via
- * the CT channel.
- *
- * Return: 0 on success, a negative errno code on failure.
- */
-int intel_guc_ct_init(struct intel_guc_ct *ct)
-{
- struct intel_guc *guc = ct_to_guc(ct);
- struct intel_guc_ct_channel *ctch = &ct->host_channel;
- int err;
-
- err = ctch_init(guc, ctch);
- if (unlikely(err)) {
- DRM_ERROR("CT: can't open channel %d; err=%d\n",
- ctch->owner, err);
- return err;
- }
-
- GEM_BUG_ON(!ctch->vma);
- return 0;
-}
-
-/**
- * intel_guc_ct_fini - Fini CT communication
- * @ct: pointer to CT struct
- *
- * Deallocate memory required for communication via
- * the CT channel.
- */
-void intel_guc_ct_fini(struct intel_guc_ct *ct)
-{
- struct intel_guc *guc = ct_to_guc(ct);
- struct intel_guc_ct_channel *ctch = &ct->host_channel;
-
- ctch_fini(guc, ctch);
-}
-
-/**
- * intel_guc_ct_enable - Enable buffer based command transport.
- * @ct: pointer to CT struct
- *
- * Return: 0 on success, a negative errno code on failure.
- */
-int intel_guc_ct_enable(struct intel_guc_ct *ct)
-{
- struct intel_guc *guc = ct_to_guc(ct);
- struct intel_guc_ct_channel *ctch = &ct->host_channel;
-
- if (ctch->enabled)
- return 0;
-
- return ctch_enable(guc, ctch);
-}
-
-/**
- * intel_guc_ct_disable - Disable buffer based command transport.
- * @ct: pointer to CT struct
- */
-void intel_guc_ct_disable(struct intel_guc_ct *ct)
-{
- struct intel_guc *guc = ct_to_guc(ct);
- struct intel_guc_ct_channel *ctch = &ct->host_channel;
-
- if (!ctch->enabled)
- return;
-
- ctch_disable(guc, ctch);
-}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
index 7c24d83f5c24..3e7fe237cfa5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
@@ -35,44 +35,28 @@ struct intel_guc_ct_buffer {
u32 *cmds;
};
-/** Represents pair of command transport buffers.
- *
- * Buffers go in pairs to allow bi-directional communication.
- * To simplify the code we place both of them in the same vma.
- * Buffers from the same pair must share unique owner id.
- *
- * @vma: pointer to the vma with pair of CT buffers
- * @ctbs: buffers for sending(0) and receiving(1) commands
- * @owner: unique identifier
- * @next_fence: fence to be used with next send command
- */
-struct intel_guc_ct_channel {
- struct i915_vma *vma;
- struct intel_guc_ct_buffer ctbs[2];
- u32 owner;
- u32 next_fence;
- bool enabled;
-};
-/** Holds all command transport channels.
+/** Top-level structure for Command Transport related data
*
- * @host_channel: main channel used by the host
+ * Includes a pair of CT buffers for bi-directional communication and tracking
+ * for the H2G and G2H requests sent and received through the buffers.
*/
struct intel_guc_ct {
- struct intel_guc_ct_channel host_channel;
- /* other channels are tbd */
+ struct i915_vma *vma;
+ bool enabled;
- /** @lock: protects pending requests list */
- spinlock_t lock;
+ /* buffers for sending(0) and receiving(1) commands */
+ struct intel_guc_ct_buffer ctbs[2];
- /** @pending_requests: list of requests waiting for response */
- struct list_head pending_requests;
+ struct {
+ u32 next_fence; /* fence to be used with next request to send */
- /** @incoming_requests: list of incoming requests */
- struct list_head incoming_requests;
+ spinlock_t lock; /* protects pending requests list */
+ struct list_head pending; /* requests waiting for response */
- /** @worker: worker for handling incoming requests */
- struct work_struct worker;
+ struct list_head incoming; /* incoming requests */
+ struct work_struct worker; /* handler for incoming requests */
+ } requests;
};
void intel_guc_ct_init_early(struct intel_guc_ct *ct);
@@ -81,13 +65,13 @@ void intel_guc_ct_fini(struct intel_guc_ct *ct);
int intel_guc_ct_enable(struct intel_guc_ct *ct);
void intel_guc_ct_disable(struct intel_guc_ct *ct);
-static inline void intel_guc_ct_stop(struct intel_guc_ct *ct)
+static inline bool intel_guc_ct_enabled(struct intel_guc_ct *ct)
{
- ct->host_channel.enabled = false;
+ return ct->enabled;
}
-int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32 len,
+int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len,
u32 *response_buf, u32 response_buf_size);
-void intel_guc_to_host_event_handler_ct(struct intel_guc *guc);
+void intel_guc_ct_event_handler(struct intel_guc_ct *ct);
#endif /* _INTEL_GUC_CT_H_ */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
index 5528224448f6..3a1c47d600ea 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -149,7 +149,7 @@ int intel_guc_fw_upload(struct intel_guc *guc)
* Current uCode expects the code to be loaded at 8k; locations below
* this are used for the stack.
*/
- ret = intel_uc_fw_upload(&guc->fw, gt, 0x2000, UOS_MOVE);
+ ret = intel_uc_fw_upload(&guc->fw, 0x2000, UOS_MOVE);
if (ret)
goto out;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index 1d3cdd67ca2f..a6b733c146c9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -31,7 +31,6 @@
#define GUC_DOORBELL_INVALID 256
-#define GUC_DB_SIZE (PAGE_SIZE)
#define GUC_WQ_SIZE (PAGE_SIZE * 2)
/* Work queue item header definitions */
@@ -548,6 +547,7 @@ enum intel_guc_action {
INTEL_GUC_ACTION_ALLOCATE_DOORBELL = 0x10,
INTEL_GUC_ACTION_DEALLOCATE_DOORBELL = 0x20,
INTEL_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE = 0x30,
+ INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x40,
INTEL_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH = 0x302,
INTEL_GUC_ACTION_ENTER_S_STATE = 0x501,
INTEL_GUC_ACTION_EXIT_S_STATE = 0x502,
@@ -556,7 +556,6 @@ enum intel_guc_action {
INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000,
INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
- INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x0E000,
INTEL_GUC_ACTION_LIMIT
};
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index 2cf2d3314f62..caed0d57e704 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -226,7 +226,7 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log)
mutex_lock(&log->relay.lock);
- if (WARN_ON(!intel_guc_log_relay_enabled(log)))
+ if (WARN_ON(!intel_guc_log_relay_created(log)))
goto out_unlock;
/* Get the pointer to shared GuC log buffer */
@@ -361,6 +361,7 @@ void intel_guc_log_init_early(struct intel_guc_log *log)
{
mutex_init(&log->relay.lock);
INIT_WORK(&log->relay.flush_work, capture_logs_work);
+ log->relay.started = false;
}
static int guc_log_relay_create(struct intel_guc_log *log)
@@ -546,7 +547,7 @@ out_unlock:
return ret;
}
-bool intel_guc_log_relay_enabled(const struct intel_guc_log *log)
+bool intel_guc_log_relay_created(const struct intel_guc_log *log)
{
return log->relay.buf_addr;
}
@@ -560,7 +561,7 @@ int intel_guc_log_relay_open(struct intel_guc_log *log)
mutex_lock(&log->relay.lock);
- if (intel_guc_log_relay_enabled(log)) {
+ if (intel_guc_log_relay_created(log)) {
ret = -EEXIST;
goto out_unlock;
}
@@ -585,6 +586,21 @@ int intel_guc_log_relay_open(struct intel_guc_log *log)
mutex_unlock(&log->relay.lock);
+ return 0;
+
+out_relay:
+ guc_log_relay_destroy(log);
+out_unlock:
+ mutex_unlock(&log->relay.lock);
+
+ return ret;
+}
+
+int intel_guc_log_relay_start(struct intel_guc_log *log)
+{
+ if (log->relay.started)
+ return -EEXIST;
+
guc_log_enable_flush_events(log);
/*
@@ -594,14 +610,9 @@ int intel_guc_log_relay_open(struct intel_guc_log *log)
*/
queue_work(system_highpri_wq, &log->relay.flush_work);
- return 0;
-
-out_relay:
- guc_log_relay_destroy(log);
-out_unlock:
- mutex_unlock(&log->relay.lock);
+ log->relay.started = true;
- return ret;
+ return 0;
}
void intel_guc_log_relay_flush(struct intel_guc_log *log)
@@ -609,6 +620,9 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log)
struct intel_guc *guc = log_to_guc(log);
intel_wakeref_t wakeref;
+ if (!log->relay.started)
+ return;
+
/*
* Before initiating the forceful flush, wait for any pending/ongoing
* flush to complete otherwise forceful flush may not actually happen.
@@ -622,18 +636,33 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log)
guc_log_capture_logs(log);
}
-void intel_guc_log_relay_close(struct intel_guc_log *log)
+/*
+ * Stops the relay log. Called from intel_guc_log_relay_close(), so no
+ * possibility of race with start/flush since relay_write cannot race
+ * relay_close.
+ */
+static void guc_log_relay_stop(struct intel_guc_log *log)
{
struct intel_guc *guc = log_to_guc(log);
struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+ if (!log->relay.started)
+ return;
+
guc_log_disable_flush_events(log);
intel_synchronize_irq(i915);
flush_work(&log->relay.flush_work);
+ log->relay.started = false;
+}
+
+void intel_guc_log_relay_close(struct intel_guc_log *log)
+{
+ guc_log_relay_stop(log);
+
mutex_lock(&log->relay.lock);
- GEM_BUG_ON(!intel_guc_log_relay_enabled(log));
+ GEM_BUG_ON(!intel_guc_log_relay_created(log));
guc_log_unmap(log);
guc_log_relay_destroy(log);
mutex_unlock(&log->relay.lock);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
index 6f764879acb1..c252c022c5fc 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
@@ -47,6 +47,7 @@ struct intel_guc_log {
struct i915_vma *vma;
struct {
void *buf_addr;
+ bool started;
struct work_struct flush_work;
struct rchan *channel;
struct mutex lock;
@@ -65,8 +66,9 @@ int intel_guc_log_create(struct intel_guc_log *log);
void intel_guc_log_destroy(struct intel_guc_log *log);
int intel_guc_log_set_level(struct intel_guc_log *log, u32 level);
-bool intel_guc_log_relay_enabled(const struct intel_guc_log *log);
+bool intel_guc_log_relay_created(const struct intel_guc_log *log);
int intel_guc_log_relay_open(struct intel_guc_log *log);
+int intel_guc_log_relay_start(struct intel_guc_log *log);
void intel_guc_log_relay_flush(struct intel_guc_log *log);
void intel_guc_log_relay_close(struct intel_guc_log *log);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 009e54a3764f..9e42324fdecd 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -6,26 +6,18 @@
#include <linux/circ_buf.h>
#include "gem/i915_gem_context.h"
-
#include "gt/intel_context.h"
#include "gt/intel_engine_pm.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_pm.h"
#include "gt/intel_lrc_reg.h"
+#include "gt/intel_ring.h"
+
#include "intel_guc_submission.h"
#include "i915_drv.h"
#include "i915_trace.h"
-enum {
- GUC_PREEMPT_NONE = 0,
- GUC_PREEMPT_INPROGRESS,
- GUC_PREEMPT_FINISHED,
-};
-#define GUC_PREEMPT_BREADCRUMB_DWORDS 0x8
-#define GUC_PREEMPT_BREADCRUMB_BYTES \
- (sizeof(u32) * GUC_PREEMPT_BREADCRUMB_DWORDS)
-
/**
* DOC: GuC-based command submission
*
@@ -35,25 +27,14 @@ enum {
* code) matches the old submission model and will be updated as part of the
* upgrade to the new flow.
*
- * GuC client:
- * A intel_guc_client refers to a submission path through GuC. Currently, there
- * is only one client, which is charged with all submissions to the GuC. This
- * struct is the owner of a doorbell, a process descriptor and a workqueue (all
- * of them inside a single gem object that contains all required pages for these
- * elements).
- *
* GuC stage descriptor:
* During initialization, the driver allocates a static pool of 1024 such
- * descriptors, and shares them with the GuC.
- * Currently, there exists a 1:1 mapping between a intel_guc_client and a
- * guc_stage_desc (via the client's stage_id), so effectively only one
- * gets used. This stage descriptor lets the GuC know about the doorbell,
- * workqueue and process descriptor. Theoretically, it also lets the GuC
- * know about our HW contexts (context ID, etc...), but we actually
- * employ a kind of submission where the GuC uses the LRCA sent via the work
- * item instead (the single guc_stage_desc associated to execbuf client
- * contains information about the default kernel context only, but this is
- * essentially unused). This is called a "proxy" submission.
+ * descriptors, and shares them with the GuC. Currently, we only use one
+ * descriptor. This stage descriptor lets the GuC know about the workqueue and
+ * process descriptor. Theoretically, it also lets the GuC know about our HW
+ * contexts (context ID, etc...), but we actually employ a kind of submission
+ * where the GuC uses the LRCA sent via the work item instead. This is called
+ * a "proxy" submission.
*
* The Scratch registers:
* There are 16 MMIO-based registers start from 0xC180. The kernel driver writes
@@ -62,11 +43,6 @@ enum {
* Firmware writes a success/fail code back to the action register after
* processes the request. The kernel driver polls waiting for this update and
* then proceeds.
- * See intel_guc_send()
- *
- * Doorbells:
- * Doorbells are interrupts to uKernel. A doorbell is a single cache line (QW)
- * mapped into process space.
*
* Work Items:
* There are several types of work items that the host may place into a
@@ -83,213 +59,45 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb)
return rb_entry(rb, struct i915_priolist, node);
}
-static inline bool is_high_priority(struct intel_guc_client *client)
-{
- return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH ||
- client->priority == GUC_CLIENT_PRIORITY_HIGH);
-}
-
-static int reserve_doorbell(struct intel_guc_client *client)
-{
- unsigned long offset;
- unsigned long end;
- u16 id;
-
- GEM_BUG_ON(client->doorbell_id != GUC_DOORBELL_INVALID);
-
- /*
- * The bitmap tracks which doorbell registers are currently in use.
- * It is split into two halves; the first half is used for normal
- * priority contexts, the second half for high-priority ones.
- */
- offset = 0;
- end = GUC_NUM_DOORBELLS / 2;
- if (is_high_priority(client)) {
- offset = end;
- end += offset;
- }
-
- id = find_next_zero_bit(client->guc->doorbell_bitmap, end, offset);
- if (id == end)
- return -ENOSPC;
-
- __set_bit(id, client->guc->doorbell_bitmap);
- client->doorbell_id = id;
- DRM_DEBUG_DRIVER("client %u (high prio=%s) reserved doorbell: %d\n",
- client->stage_id, yesno(is_high_priority(client)),
- id);
- return 0;
-}
-
-static bool has_doorbell(struct intel_guc_client *client)
-{
- if (client->doorbell_id == GUC_DOORBELL_INVALID)
- return false;
-
- return test_bit(client->doorbell_id, client->guc->doorbell_bitmap);
-}
-
-static void unreserve_doorbell(struct intel_guc_client *client)
-{
- GEM_BUG_ON(!has_doorbell(client));
-
- __clear_bit(client->doorbell_id, client->guc->doorbell_bitmap);
- client->doorbell_id = GUC_DOORBELL_INVALID;
-}
-
-/*
- * Tell the GuC to allocate or deallocate a specific doorbell
- */
-
-static int __guc_allocate_doorbell(struct intel_guc *guc, u32 stage_id)
+static struct guc_stage_desc *__get_stage_desc(struct intel_guc *guc, u32 id)
{
- u32 action[] = {
- INTEL_GUC_ACTION_ALLOCATE_DOORBELL,
- stage_id
- };
+ struct guc_stage_desc *base = guc->stage_desc_pool_vaddr;
- return intel_guc_send(guc, action, ARRAY_SIZE(action));
+ return &base[id];
}
-static int __guc_deallocate_doorbell(struct intel_guc *guc, u32 stage_id)
+static int guc_workqueue_create(struct intel_guc *guc)
{
- u32 action[] = {
- INTEL_GUC_ACTION_DEALLOCATE_DOORBELL,
- stage_id
- };
-
- return intel_guc_send(guc, action, ARRAY_SIZE(action));
+ return intel_guc_allocate_and_map_vma(guc, GUC_WQ_SIZE, &guc->workqueue,
+ &guc->workqueue_vaddr);
}
-static struct guc_stage_desc *__get_stage_desc(struct intel_guc_client *client)
+static void guc_workqueue_destroy(struct intel_guc *guc)
{
- struct guc_stage_desc *base = client->guc->stage_desc_pool_vaddr;
-
- return &base[client->stage_id];
+ i915_vma_unpin_and_release(&guc->workqueue, I915_VMA_RELEASE_MAP);
}
/*
- * Initialise, update, or clear doorbell data shared with the GuC
- *
- * These functions modify shared data and so need access to the mapped
- * client object which contains the page being used for the doorbell
+ * Initialise the process descriptor shared with the GuC firmware.
*/
-
-static void __update_doorbell_desc(struct intel_guc_client *client, u16 new_id)
+static int guc_proc_desc_create(struct intel_guc *guc)
{
- struct guc_stage_desc *desc;
+ const u32 size = PAGE_ALIGN(sizeof(struct guc_process_desc));
- /* Update the GuC's idea of the doorbell ID */
- desc = __get_stage_desc(client);
- desc->db_id = new_id;
+ return intel_guc_allocate_and_map_vma(guc, size, &guc->proc_desc,
+ &guc->proc_desc_vaddr);
}
-static struct guc_doorbell_info *__get_doorbell(struct intel_guc_client *client)
+static void guc_proc_desc_destroy(struct intel_guc *guc)
{
- return client->vaddr + client->doorbell_offset;
-}
-
-static bool __doorbell_valid(struct intel_guc *guc, u16 db_id)
-{
- struct intel_uncore *uncore = guc_to_gt(guc)->uncore;
-
- GEM_BUG_ON(db_id >= GUC_NUM_DOORBELLS);
- return intel_uncore_read(uncore, GEN8_DRBREGL(db_id)) & GEN8_DRB_VALID;
+ i915_vma_unpin_and_release(&guc->proc_desc, I915_VMA_RELEASE_MAP);
}
-static void __init_doorbell(struct intel_guc_client *client)
-{
- struct guc_doorbell_info *doorbell;
-
- doorbell = __get_doorbell(client);
- doorbell->db_status = GUC_DOORBELL_ENABLED;
- doorbell->cookie = 0;
-}
-
-static void __fini_doorbell(struct intel_guc_client *client)
-{
- struct guc_doorbell_info *doorbell;
- u16 db_id = client->doorbell_id;
-
- doorbell = __get_doorbell(client);
- doorbell->db_status = GUC_DOORBELL_DISABLED;
-
- /* Doorbell release flow requires that we wait for GEN8_DRB_VALID bit
- * to go to zero after updating db_status before we call the GuC to
- * release the doorbell
- */
- if (wait_for_us(!__doorbell_valid(client->guc, db_id), 10))
- WARN_ONCE(true, "Doorbell never became invalid after disable\n");
-}
-
-static int create_doorbell(struct intel_guc_client *client)
-{
- int ret;
-
- if (WARN_ON(!has_doorbell(client)))
- return -ENODEV; /* internal setup error, should never happen */
-
- __update_doorbell_desc(client, client->doorbell_id);
- __init_doorbell(client);
-
- ret = __guc_allocate_doorbell(client->guc, client->stage_id);
- if (ret) {
- __fini_doorbell(client);
- __update_doorbell_desc(client, GUC_DOORBELL_INVALID);
- DRM_DEBUG_DRIVER("Couldn't create client %u doorbell: %d\n",
- client->stage_id, ret);
- return ret;
- }
-
- return 0;
-}
-
-static int destroy_doorbell(struct intel_guc_client *client)
-{
- int ret;
-
- GEM_BUG_ON(!has_doorbell(client));
-
- __fini_doorbell(client);
- ret = __guc_deallocate_doorbell(client->guc, client->stage_id);
- if (ret)
- DRM_ERROR("Couldn't destroy client %u doorbell: %d\n",
- client->stage_id, ret);
-
- __update_doorbell_desc(client, GUC_DOORBELL_INVALID);
-
- return ret;
-}
-
-static unsigned long __select_cacheline(struct intel_guc *guc)
-{
- unsigned long offset;
-
- /* Doorbell uses a single cache line within a page */
- offset = offset_in_page(guc->db_cacheline);
-
- /* Moving to next cache line to reduce contention */
- guc->db_cacheline += cache_line_size();
-
- DRM_DEBUG_DRIVER("reserved cacheline 0x%lx, next 0x%x, linesize %u\n",
- offset, guc->db_cacheline, cache_line_size());
- return offset;
-}
-
-static inline struct guc_process_desc *
-__get_process_desc(struct intel_guc_client *client)
-{
- return client->vaddr + client->proc_desc_offset;
-}
-
-/*
- * Initialise the process descriptor shared with the GuC firmware.
- */
-static void guc_proc_desc_init(struct intel_guc_client *client)
+static void guc_proc_desc_init(struct intel_guc *guc)
{
struct guc_process_desc *desc;
- desc = memset(__get_process_desc(client), 0, sizeof(*desc));
+ desc = memset(guc->proc_desc_vaddr, 0, sizeof(*desc));
/*
* XXX: pDoorbell and WQVBaseAddress are pointers in process address
@@ -300,47 +108,27 @@ static void guc_proc_desc_init(struct intel_guc_client *client)
desc->wq_base_addr = 0;
desc->db_base_addr = 0;
- desc->stage_id = client->stage_id;
desc->wq_size_bytes = GUC_WQ_SIZE;
desc->wq_status = WQ_STATUS_ACTIVE;
- desc->priority = client->priority;
+ desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL;
}
-static void guc_proc_desc_fini(struct intel_guc_client *client)
+static void guc_proc_desc_fini(struct intel_guc *guc)
{
- struct guc_process_desc *desc;
-
- desc = __get_process_desc(client);
- memset(desc, 0, sizeof(*desc));
+ memset(guc->proc_desc_vaddr, 0, sizeof(struct guc_process_desc));
}
static int guc_stage_desc_pool_create(struct intel_guc *guc)
{
- struct i915_vma *vma;
- void *vaddr;
-
- vma = intel_guc_allocate_vma(guc,
- PAGE_ALIGN(sizeof(struct guc_stage_desc) *
- GUC_MAX_STAGE_DESCRIPTORS));
- if (IS_ERR(vma))
- return PTR_ERR(vma);
-
- vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
- if (IS_ERR(vaddr)) {
- i915_vma_unpin_and_release(&vma, 0);
- return PTR_ERR(vaddr);
- }
+ u32 size = PAGE_ALIGN(sizeof(struct guc_stage_desc) *
+ GUC_MAX_STAGE_DESCRIPTORS);
- guc->stage_desc_pool = vma;
- guc->stage_desc_pool_vaddr = vaddr;
- ida_init(&guc->stage_ids);
-
- return 0;
+ return intel_guc_allocate_and_map_vma(guc, size, &guc->stage_desc_pool,
+ &guc->stage_desc_pool_vaddr);
}
static void guc_stage_desc_pool_destroy(struct intel_guc *guc)
{
- ida_destroy(&guc->stage_ids);
i915_vma_unpin_and_release(&guc->stage_desc_pool, I915_VMA_RELEASE_MAP);
}
@@ -348,63 +136,49 @@ static void guc_stage_desc_pool_destroy(struct intel_guc *guc)
* Initialise/clear the stage descriptor shared with the GuC firmware.
*
* This descriptor tells the GuC where (in GGTT space) to find the important
- * data structures relating to this client (doorbell, process descriptor,
- * write queue, etc).
+ * data structures related to work submission (process descriptor, write queue,
+ * etc).
*/
-static void guc_stage_desc_init(struct intel_guc_client *client)
+static void guc_stage_desc_init(struct intel_guc *guc)
{
- struct intel_guc *guc = client->guc;
struct guc_stage_desc *desc;
- u32 gfx_addr;
- desc = __get_stage_desc(client);
+ /* we only use 1 stage desc, so hardcode it to 0 */
+ desc = __get_stage_desc(guc, 0);
memset(desc, 0, sizeof(*desc));
desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE |
GUC_STAGE_DESC_ATTR_KERNEL;
- if (is_high_priority(client))
- desc->attribute |= GUC_STAGE_DESC_ATTR_PREEMPT;
- desc->stage_id = client->stage_id;
- desc->priority = client->priority;
- desc->db_id = client->doorbell_id;
- /*
- * The doorbell, process descriptor, and workqueue are all parts
- * of the client object, which the GuC will reference via the GGTT
- */
- gfx_addr = intel_guc_ggtt_offset(guc, client->vma);
- desc->db_trigger_phy = sg_dma_address(client->vma->pages->sgl) +
- client->doorbell_offset;
- desc->db_trigger_cpu = ptr_to_u64(__get_doorbell(client));
- desc->db_trigger_uk = gfx_addr + client->doorbell_offset;
- desc->process_desc = gfx_addr + client->proc_desc_offset;
- desc->wq_addr = gfx_addr + GUC_DB_SIZE;
- desc->wq_size = GUC_WQ_SIZE;
+ desc->stage_id = 0;
+ desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL;
- desc->desc_private = ptr_to_u64(client);
+ desc->process_desc = intel_guc_ggtt_offset(guc, guc->proc_desc);
+ desc->wq_addr = intel_guc_ggtt_offset(guc, guc->workqueue);
+ desc->wq_size = GUC_WQ_SIZE;
}
-static void guc_stage_desc_fini(struct intel_guc_client *client)
+static void guc_stage_desc_fini(struct intel_guc *guc)
{
struct guc_stage_desc *desc;
- desc = __get_stage_desc(client);
+ desc = __get_stage_desc(guc, 0);
memset(desc, 0, sizeof(*desc));
}
/* Construct a Work Item and append it to the GuC's Work Queue */
-static void guc_wq_item_append(struct intel_guc_client *client,
+static void guc_wq_item_append(struct intel_guc *guc,
u32 target_engine, u32 context_desc,
u32 ring_tail, u32 fence_id)
{
/* wqi_len is in DWords, and does not include the one-word header */
const size_t wqi_size = sizeof(struct guc_wq_item);
const u32 wqi_len = wqi_size / sizeof(u32) - 1;
- struct guc_process_desc *desc = __get_process_desc(client);
+ struct guc_process_desc *desc = guc->proc_desc_vaddr;
struct guc_wq_item *wqi;
u32 wq_off;
- lockdep_assert_held(&client->wq_lock);
+ lockdep_assert_held(&guc->wq_lock);
/* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
* should not have the case where structure wqi is across page, neither
@@ -424,58 +198,30 @@ static void guc_wq_item_append(struct intel_guc_client *client,
GUC_WQ_SIZE) < wqi_size);
GEM_BUG_ON(wq_off & (wqi_size - 1));
- /* WQ starts from the page after doorbell / process_desc */
- wqi = client->vaddr + wq_off + GUC_DB_SIZE;
-
- if (I915_SELFTEST_ONLY(client->use_nop_wqi)) {
- wqi->header = WQ_TYPE_NOOP | (wqi_len << WQ_LEN_SHIFT);
- } else {
- /* Now fill in the 4-word work queue item */
- wqi->header = WQ_TYPE_INORDER |
- (wqi_len << WQ_LEN_SHIFT) |
- (target_engine << WQ_TARGET_SHIFT) |
- WQ_NO_WCFLUSH_WAIT;
- wqi->context_desc = context_desc;
- wqi->submit_element_info = ring_tail << WQ_RING_TAIL_SHIFT;
- GEM_BUG_ON(ring_tail > WQ_RING_TAIL_MAX);
- wqi->fence_id = fence_id;
- }
+ wqi = guc->workqueue_vaddr + wq_off;
+
+ /* Now fill in the 4-word work queue item */
+ wqi->header = WQ_TYPE_INORDER |
+ (wqi_len << WQ_LEN_SHIFT) |
+ (target_engine << WQ_TARGET_SHIFT) |
+ WQ_NO_WCFLUSH_WAIT;
+ wqi->context_desc = context_desc;
+ wqi->submit_element_info = ring_tail << WQ_RING_TAIL_SHIFT;
+ GEM_BUG_ON(ring_tail > WQ_RING_TAIL_MAX);
+ wqi->fence_id = fence_id;
/* Make the update visible to GuC */
WRITE_ONCE(desc->tail, (wq_off + wqi_size) & (GUC_WQ_SIZE - 1));
}
-static void guc_ring_doorbell(struct intel_guc_client *client)
-{
- struct guc_doorbell_info *db;
- u32 cookie;
-
- lockdep_assert_held(&client->wq_lock);
-
- /* pointer of current doorbell cacheline */
- db = __get_doorbell(client);
-
- /*
- * We're not expecting the doorbell cookie to change behind our back,
- * we also need to treat 0 as a reserved value.
- */
- cookie = READ_ONCE(db->cookie);
- WARN_ON_ONCE(xchg(&db->cookie, cookie + 1 ?: cookie + 2) != cookie);
-
- /* XXX: doorbell was lost and need to acquire it again */
- GEM_BUG_ON(db->db_status != GUC_DOORBELL_ENABLED);
-}
-
static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
{
- struct intel_guc_client *client = guc->execbuf_client;
struct intel_engine_cs *engine = rq->engine;
- u32 ctx_desc = lower_32_bits(rq->hw_context->lrc_desc);
+ u32 ctx_desc = lower_32_bits(rq->context->lrc_desc);
u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64);
- guc_wq_item_append(client, engine->guc_id, ctx_desc,
+ guc_wq_item_append(guc, engine->guc_id, ctx_desc,
ring_tail, rq->fence.seqno);
- guc_ring_doorbell(client);
}
/*
@@ -487,10 +233,9 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
*/
static void flush_ggtt_writes(struct i915_vma *vma)
{
- struct drm_i915_private *i915 = vma->vm->i915;
-
if (i915_vma_is_map_and_fenceable(vma))
- intel_uncore_posting_read_fw(&i915->uncore, GUC_STATUS);
+ intel_uncore_posting_read_fw(vma->vm->gt->uncore,
+ GUC_STATUS);
}
static void guc_submit(struct intel_engine_cs *engine,
@@ -498,9 +243,8 @@ static void guc_submit(struct intel_engine_cs *engine,
struct i915_request **end)
{
struct intel_guc *guc = &engine->gt->uc.guc;
- struct intel_guc_client *client = guc->execbuf_client;
- spin_lock(&client->wq_lock);
+ spin_lock(&guc->wq_lock);
do {
struct i915_request *rq = *out++;
@@ -509,7 +253,7 @@ static void guc_submit(struct intel_engine_cs *engine,
guc_add_request(guc, rq);
} while (out != end);
- spin_unlock(&client->wq_lock);
+ spin_unlock(&guc->wq_lock);
}
static inline int rq_prio(const struct i915_request *rq)
@@ -528,7 +272,7 @@ static struct i915_request *schedule_in(struct i915_request *rq, int idx)
* required if we generalise the inflight tracking.
*/
- intel_gt_pm_get(rq->engine->gt);
+ __intel_gt_pm_get(rq->engine->gt);
return i915_request_get(rq);
}
@@ -536,7 +280,7 @@ static void schedule_out(struct i915_request *rq)
{
trace_i915_request_out(rq);
- intel_gt_pm_put(rq->engine->gt);
+ intel_gt_pm_put_async(rq->engine->gt);
i915_request_put(rq);
}
@@ -571,7 +315,7 @@ static void __guc_dequeue(struct intel_engine_cs *engine)
int i;
priolist_for_each_request_consume(rq, rn, p, i) {
- if (last && rq->hw_context != last->hw_context) {
+ if (last && rq->context != last->context) {
if (port == last_port)
goto done;
@@ -630,7 +374,7 @@ static void guc_reset_prepare(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
- GEM_TRACE("%s\n", engine->name);
+ ENGINE_TRACE(engine, "\n");
/*
* Prevent request submission to the hardware until we have
@@ -657,7 +401,7 @@ cancel_port_requests(struct intel_engine_execlists * const execlists)
memset(execlists->inflight, 0, sizeof(execlists->inflight));
}
-static void guc_reset(struct intel_engine_cs *engine, bool stalled)
+static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
struct i915_request *rq;
@@ -676,20 +420,20 @@ static void guc_reset(struct intel_engine_cs *engine, bool stalled)
stalled = false;
__i915_request_reset(rq, stalled);
- intel_lr_context_reset(engine, rq->hw_context, rq->head, stalled);
+ intel_lr_context_reset(engine, rq->context, rq->head, stalled);
out_unlock:
spin_unlock_irqrestore(&engine->active.lock, flags);
}
-static void guc_cancel_requests(struct intel_engine_cs *engine)
+static void guc_reset_cancel(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
struct i915_request *rq, *rn;
struct rb_node *rb;
unsigned long flags;
- GEM_TRACE("%s\n", engine->name);
+ ENGINE_TRACE(engine, "\n");
/*
* Before we call engine->cancel_requests(), we should have exclusive
@@ -750,8 +494,8 @@ static void guc_reset_finish(struct intel_engine_cs *engine)
/* And kick in case we missed a new request submission. */
tasklet_hi_schedule(&execlists->tasklet);
- GEM_TRACE("%s: depth->%d\n", engine->name,
- atomic_read(&execlists->tasklet.count));
+ ENGINE_TRACE(engine, "depth->%d\n",
+ atomic_read(&execlists->tasklet.count));
}
/*
@@ -760,213 +504,6 @@ static void guc_reset_finish(struct intel_engine_cs *engine)
* path of guc_submit() above.
*/
-/* Check that a doorbell register is in the expected state */
-static bool doorbell_ok(struct intel_guc *guc, u16 db_id)
-{
- bool valid;
-
- GEM_BUG_ON(db_id >= GUC_NUM_DOORBELLS);
-
- valid = __doorbell_valid(guc, db_id);
-
- if (test_bit(db_id, guc->doorbell_bitmap) == valid)
- return true;
-
- DRM_DEBUG_DRIVER("Doorbell %u has unexpected state: valid=%s\n",
- db_id, yesno(valid));
-
- return false;
-}
-
-static bool guc_verify_doorbells(struct intel_guc *guc)
-{
- bool doorbells_ok = true;
- u16 db_id;
-
- for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id)
- if (!doorbell_ok(guc, db_id))
- doorbells_ok = false;
-
- return doorbells_ok;
-}
-
-/**
- * guc_client_alloc() - Allocate an intel_guc_client
- * @guc: the intel_guc structure
- * @priority: four levels priority _CRITICAL, _HIGH, _NORMAL and _LOW
- * The kernel client to replace ExecList submission is created with
- * NORMAL priority. Priority of a client for scheduler can be HIGH,
- * while a preemption context can use CRITICAL.
- *
- * Return: An intel_guc_client object if success, else NULL.
- */
-static struct intel_guc_client *
-guc_client_alloc(struct intel_guc *guc, u32 priority)
-{
- struct intel_guc_client *client;
- struct i915_vma *vma;
- void *vaddr;
- int ret;
-
- client = kzalloc(sizeof(*client), GFP_KERNEL);
- if (!client)
- return ERR_PTR(-ENOMEM);
-
- client->guc = guc;
- client->priority = priority;
- client->doorbell_id = GUC_DOORBELL_INVALID;
- spin_lock_init(&client->wq_lock);
-
- ret = ida_simple_get(&guc->stage_ids, 0, GUC_MAX_STAGE_DESCRIPTORS,
- GFP_KERNEL);
- if (ret < 0)
- goto err_client;
-
- client->stage_id = ret;
-
- /* The first page is doorbell/proc_desc. Two followed pages are wq. */
- vma = intel_guc_allocate_vma(guc, GUC_DB_SIZE + GUC_WQ_SIZE);
- if (IS_ERR(vma)) {
- ret = PTR_ERR(vma);
- goto err_id;
- }
-
- /* We'll keep just the first (doorbell/proc) page permanently kmap'd. */
- client->vma = vma;
-
- vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
- if (IS_ERR(vaddr)) {
- ret = PTR_ERR(vaddr);
- goto err_vma;
- }
- client->vaddr = vaddr;
-
- ret = reserve_doorbell(client);
- if (ret)
- goto err_vaddr;
-
- client->doorbell_offset = __select_cacheline(guc);
-
- /*
- * Since the doorbell only requires a single cacheline, we can save
- * space by putting the application process descriptor in the same
- * page. Use the half of the page that doesn't include the doorbell.
- */
- if (client->doorbell_offset >= (GUC_DB_SIZE / 2))
- client->proc_desc_offset = 0;
- else
- client->proc_desc_offset = (GUC_DB_SIZE / 2);
-
- DRM_DEBUG_DRIVER("new priority %u client %p: stage_id %u\n",
- priority, client, client->stage_id);
- DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%lx\n",
- client->doorbell_id, client->doorbell_offset);
-
- return client;
-
-err_vaddr:
- i915_gem_object_unpin_map(client->vma->obj);
-err_vma:
- i915_vma_unpin_and_release(&client->vma, 0);
-err_id:
- ida_simple_remove(&guc->stage_ids, client->stage_id);
-err_client:
- kfree(client);
- return ERR_PTR(ret);
-}
-
-static void guc_client_free(struct intel_guc_client *client)
-{
- unreserve_doorbell(client);
- i915_vma_unpin_and_release(&client->vma, I915_VMA_RELEASE_MAP);
- ida_simple_remove(&client->guc->stage_ids, client->stage_id);
- kfree(client);
-}
-
-static inline bool ctx_save_restore_disabled(struct intel_context *ce)
-{
- u32 sr = ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1];
-
-#define SR_DISABLED \
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | \
- CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)
-
- return (sr & SR_DISABLED) == SR_DISABLED;
-
-#undef SR_DISABLED
-}
-
-static int guc_clients_create(struct intel_guc *guc)
-{
- struct intel_guc_client *client;
-
- GEM_BUG_ON(guc->execbuf_client);
-
- client = guc_client_alloc(guc, GUC_CLIENT_PRIORITY_KMD_NORMAL);
- if (IS_ERR(client)) {
- DRM_ERROR("Failed to create GuC client for submission!\n");
- return PTR_ERR(client);
- }
- guc->execbuf_client = client;
-
- return 0;
-}
-
-static void guc_clients_destroy(struct intel_guc *guc)
-{
- struct intel_guc_client *client;
-
- client = fetch_and_zero(&guc->execbuf_client);
- if (client)
- guc_client_free(client);
-}
-
-static int __guc_client_enable(struct intel_guc_client *client)
-{
- int ret;
-
- guc_proc_desc_init(client);
- guc_stage_desc_init(client);
-
- ret = create_doorbell(client);
- if (ret)
- goto fail;
-
- return 0;
-
-fail:
- guc_stage_desc_fini(client);
- guc_proc_desc_fini(client);
- return ret;
-}
-
-static void __guc_client_disable(struct intel_guc_client *client)
-{
- /*
- * By the time we're here, GuC may have already been reset. if that is
- * the case, instead of trying (in vain) to communicate with it, let's
- * just cleanup the doorbell HW and our internal state.
- */
- if (intel_guc_is_running(client->guc))
- destroy_doorbell(client);
- else
- __fini_doorbell(client);
-
- guc_stage_desc_fini(client);
- guc_proc_desc_fini(client);
-}
-
-static int guc_clients_enable(struct intel_guc *guc)
-{
- return __guc_client_enable(guc->execbuf_client);
-}
-
-static void guc_clients_disable(struct intel_guc *guc)
-{
- if (guc->execbuf_client)
- __guc_client_disable(guc->execbuf_client);
-}
-
/*
* Set up the memory resources to be shared with the GuC (via the GGTT)
* at firmware loading time.
@@ -987,13 +524,20 @@ int intel_guc_submission_init(struct intel_guc *guc)
*/
GEM_BUG_ON(!guc->stage_desc_pool);
- WARN_ON(!guc_verify_doorbells(guc));
- ret = guc_clients_create(guc);
+ ret = guc_workqueue_create(guc);
if (ret)
goto err_pool;
+ ret = guc_proc_desc_create(guc);
+ if (ret)
+ goto err_workqueue;
+
+ spin_lock_init(&guc->wq_lock);
+
return 0;
+err_workqueue:
+ guc_workqueue_destroy(guc);
err_pool:
guc_stage_desc_pool_destroy(guc);
return ret;
@@ -1001,83 +545,37 @@ err_pool:
void intel_guc_submission_fini(struct intel_guc *guc)
{
- guc_clients_destroy(guc);
- WARN_ON(!guc_verify_doorbells(guc));
-
- if (guc->stage_desc_pool)
+ if (guc->stage_desc_pool) {
+ guc_proc_desc_destroy(guc);
+ guc_workqueue_destroy(guc);
guc_stage_desc_pool_destroy(guc);
+ }
}
static void guc_interrupts_capture(struct intel_gt *gt)
{
- struct intel_rps *rps = &gt->i915->gt_pm.rps;
struct intel_uncore *uncore = gt->uncore;
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- int irqs;
+ u32 irqs = GT_CONTEXT_SWITCH_INTERRUPT;
+ u32 dmask = irqs << 16 | irqs;
- /* tell all command streamers to forward interrupts (but not vblank)
- * to GuC
- */
- irqs = _MASKED_BIT_ENABLE(GFX_INTERRUPT_STEERING);
- for_each_engine(engine, gt, id)
- ENGINE_WRITE(engine, RING_MODE_GEN7, irqs);
-
- /* route USER_INTERRUPT to Host, all others are sent to GuC. */
- irqs = GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT |
- GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
- /* These three registers have the same bit definitions */
- intel_uncore_write(uncore, GUC_BCS_RCS_IER, ~irqs);
- intel_uncore_write(uncore, GUC_VCS2_VCS1_IER, ~irqs);
- intel_uncore_write(uncore, GUC_WD_VECS_IER, ~irqs);
+ GEM_BUG_ON(INTEL_GEN(gt->i915) < 11);
- /*
- * The REDIRECT_TO_GUC bit of the PMINTRMSK register directs all
- * (unmasked) PM interrupts to the GuC. All other bits of this
- * register *disable* generation of a specific interrupt.
- *
- * 'pm_intrmsk_mbz' indicates bits that are NOT to be set when
- * writing to the PM interrupt mask register, i.e. interrupts
- * that must not be disabled.
- *
- * If the GuC is handling these interrupts, then we must not let
- * the PM code disable ANY interrupt that the GuC is expecting.
- * So for each ENABLED (0) bit in this register, we must SET the
- * bit in pm_intrmsk_mbz so that it's left enabled for the GuC.
- * GuC needs ARAT expired interrupt unmasked hence it is set in
- * pm_intrmsk_mbz.
- *
- * Here we CLEAR REDIRECT_TO_GUC bit in pm_intrmsk_mbz, which will
- * result in the register bit being left SET!
- */
- rps->pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK;
- rps->pm_intrmsk_mbz &= ~GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
+ /* Don't handle the ctx switch interrupt in GuC submission mode */
+ intel_uncore_rmw(uncore, GEN11_RENDER_COPY_INTR_ENABLE, dmask, 0);
+ intel_uncore_rmw(uncore, GEN11_VCS_VECS_INTR_ENABLE, dmask, 0);
}
static void guc_interrupts_release(struct intel_gt *gt)
{
- struct intel_rps *rps = &gt->i915->gt_pm.rps;
struct intel_uncore *uncore = gt->uncore;
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- int irqs;
+ u32 irqs = GT_CONTEXT_SWITCH_INTERRUPT;
+ u32 dmask = irqs << 16 | irqs;
- /*
- * tell all command streamers NOT to forward interrupts or vblank
- * to GuC.
- */
- irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK, GFX_FORWARD_VBLANK_NEVER);
- irqs |= _MASKED_BIT_DISABLE(GFX_INTERRUPT_STEERING);
- for_each_engine(engine, gt, id)
- ENGINE_WRITE(engine, RING_MODE_GEN7, irqs);
-
- /* route all GT interrupts to the host */
- intel_uncore_write(uncore, GUC_BCS_RCS_IER, 0);
- intel_uncore_write(uncore, GUC_VCS2_VCS1_IER, 0);
- intel_uncore_write(uncore, GUC_WD_VECS_IER, 0);
-
- rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
- rps->pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK;
+ GEM_BUG_ON(INTEL_GEN(gt->i915) < 11);
+
+ /* Handle ctx switch interrupts again */
+ intel_uncore_rmw(uncore, GEN11_RENDER_COPY_INTR_ENABLE, 0, dmask);
+ intel_uncore_rmw(uncore, GEN11_VCS_VECS_INTR_ENABLE, 0, dmask);
}
static void guc_set_default_submission(struct intel_engine_cs *engine)
@@ -1101,11 +599,10 @@ static void guc_set_default_submission(struct intel_engine_cs *engine)
engine->park = engine->unpark = NULL;
engine->reset.prepare = guc_reset_prepare;
- engine->reset.reset = guc_reset;
+ engine->reset.rewind = guc_reset_rewind;
+ engine->reset.cancel = guc_reset_cancel;
engine->reset.finish = guc_reset_finish;
- engine->cancel_requests = guc_cancel_requests;
-
engine->flags &= ~I915_ENGINE_SUPPORTS_STATS;
engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
@@ -1118,16 +615,11 @@ static void guc_set_default_submission(struct intel_engine_cs *engine)
GEM_BUG_ON(engine->irq_enable || engine->irq_disable);
}
-int intel_guc_submission_enable(struct intel_guc *guc)
+void intel_guc_submission_enable(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
struct intel_engine_cs *engine;
enum intel_engine_id id;
- int err;
-
- err = i915_inject_load_error(gt->i915, -ENXIO);
- if (err)
- return err;
/*
* We're using GuC work items for submitting work through GuC. Since
@@ -1142,11 +634,8 @@ int intel_guc_submission_enable(struct intel_guc *guc)
sizeof(struct guc_wq_item) *
I915_NUM_ENGINES > GUC_WQ_SIZE);
- GEM_BUG_ON(!guc->execbuf_client);
-
- err = guc_clients_enable(guc);
- if (err)
- return err;
+ guc_proc_desc_init(guc);
+ guc_stage_desc_init(guc);
/* Take over from manual control of ELSP (execlists) */
guc_interrupts_capture(gt);
@@ -1155,8 +644,6 @@ int intel_guc_submission_enable(struct intel_guc *guc)
engine->set_default_submission = guc_set_default_submission;
engine->set_default_submission(engine);
}
-
- return 0;
}
void intel_guc_submission_disable(struct intel_guc *guc)
@@ -1165,8 +652,12 @@ void intel_guc_submission_disable(struct intel_guc *guc)
GEM_BUG_ON(gt->awake); /* GT should be parked first */
+ /* Note: By the time we're here, GuC may have already been reset */
+
guc_interrupts_release(gt);
- guc_clients_disable(guc);
+
+ guc_stage_desc_fini(guc);
+ guc_proc_desc_fini(guc);
}
static bool __guc_submission_support(struct intel_guc *guc)
@@ -1185,6 +676,7 @@ void intel_guc_submission_init_early(struct intel_guc *guc)
guc->submission_supported = __guc_submission_support(guc);
}
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftest_guc.c"
-#endif
+bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine)
+{
+ return engine->set_default_submission == guc_set_default_submission;
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
index 54d716828352..e402a2932592 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
@@ -6,62 +6,18 @@
#ifndef _INTEL_GUC_SUBMISSION_H_
#define _INTEL_GUC_SUBMISSION_H_
-#include <linux/spinlock.h>
+#include <linux/types.h>
-#include "gt/intel_engine_types.h"
-
-#include "i915_gem.h"
-#include "i915_selftest.h"
-
-struct drm_i915_private;
-
-/*
- * This structure primarily describes the GEM object shared with the GuC.
- * The specs sometimes refer to this object as a "GuC context", but we use
- * the term "client" to avoid confusion with hardware contexts. This
- * GEM object is held for the entire lifetime of our interaction with
- * the GuC, being allocated before the GuC is loaded with its firmware.
- * Because there's no way to update the address used by the GuC after
- * initialisation, the shared object must stay pinned into the GGTT as
- * long as the GuC is in use. We also keep the first page (only) mapped
- * into kernel address space, as it includes shared data that must be
- * updated on every request submission.
- *
- * The single GEM object described here is actually made up of several
- * separate areas, as far as the GuC is concerned. The first page (kept
- * kmap'd) includes the "process descriptor" which holds sequence data for
- * the doorbell, and one cacheline which actually *is* the doorbell; a
- * write to this will "ring the doorbell" (i.e. send an interrupt to the
- * GuC). The subsequent pages of the client object constitute the work
- * queue (a circular array of work items), again described in the process
- * descriptor. Work queue pages are mapped momentarily as required.
- */
-struct intel_guc_client {
- struct i915_vma *vma;
- void *vaddr;
- struct intel_guc *guc;
-
- /* bitmap of (host) engine ids */
- u32 priority;
- u32 stage_id;
- u32 proc_desc_offset;
-
- u16 doorbell_id;
- unsigned long doorbell_offset;
-
- /* Protects GuC client's WQ access */
- spinlock_t wq_lock;
-
- /* For testing purposes, use nop WQ items instead of real ones */
- I915_SELFTEST_DECLARE(bool use_nop_wqi);
-};
+struct intel_guc;
+struct intel_engine_cs;
void intel_guc_submission_init_early(struct intel_guc *guc);
int intel_guc_submission_init(struct intel_guc *guc);
-int intel_guc_submission_enable(struct intel_guc *guc);
+void intel_guc_submission_enable(struct intel_guc *guc);
void intel_guc_submission_disable(struct intel_guc *guc);
void intel_guc_submission_fini(struct intel_guc *guc);
int intel_guc_preempt_work_create(struct intel_guc *guc);
void intel_guc_preempt_work_destroy(struct intel_guc *guc);
+bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine);
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index 8be515c8d0f0..32a069841c14 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -63,7 +63,7 @@ static int intel_huc_rsa_data_create(struct intel_huc *huc)
void *vaddr;
int err;
- err = i915_inject_load_error(gt->i915, -ENXIO);
+ err = i915_inject_probe_error(gt->i915, -ENXIO);
if (err)
return err;
@@ -161,7 +161,7 @@ int intel_huc_auth(struct intel_huc *huc)
if (!intel_uc_fw_is_loaded(&huc->fw))
return -ENOEXEC;
- ret = i915_inject_load_error(gt->i915, -ENXIO);
+ ret = i915_inject_probe_error(gt->i915, -ENXIO);
if (ret)
goto fail;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
index d654340d4d03..eee193bf2cc4 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
@@ -39,5 +39,5 @@ void intel_huc_fw_init_early(struct intel_huc *huc)
int intel_huc_fw_upload(struct intel_huc *huc)
{
/* HW doesn't look at destination address for HuC, so set it to 0 */
- return intel_uc_fw_upload(&huc->fw, huc_to_gt(huc), 0, HUC_UKERNEL);
+ return intel_uc_fw_upload(&huc->fw, 0, HUC_UKERNEL);
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 3fdbc935d155..3ffc6267f96e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -20,7 +20,7 @@ static int __intel_uc_reset_hw(struct intel_uc *uc)
int ret;
u32 guc_status;
- ret = i915_inject_load_error(gt->i915, -ENXIO);
+ ret = i915_inject_probe_error(gt->i915, -ENXIO);
if (ret)
return ret;
@@ -123,6 +123,11 @@ static void __uc_free_load_err_log(struct intel_uc *uc)
i915_gem_object_put(log);
}
+static inline bool guc_communication_enabled(struct intel_guc *guc)
+{
+ return intel_guc_ct_enabled(&guc->ct);
+}
+
/*
* Events triggered while CT buffers are disabled are logged in the SCRATCH_15
* register using the same bits used in the CT message payload. Since our
@@ -158,7 +163,7 @@ static void guc_handle_mmio_msg(struct intel_guc *guc)
struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
/* we need communication to be enabled to reply to GuC */
- GEM_BUG_ON(guc->handler == intel_guc_to_host_event_handler_nop);
+ GEM_BUG_ON(!guc_communication_enabled(guc));
if (!guc->mmio_msg)
return;
@@ -185,11 +190,6 @@ static void guc_disable_interrupts(struct intel_guc *guc)
guc->interrupts.disable(guc);
}
-static inline bool guc_communication_enabled(struct intel_guc *guc)
-{
- return guc->send != intel_guc_send_nop;
-}
-
static int guc_enable_communication(struct intel_guc *guc)
{
struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
@@ -197,7 +197,7 @@ static int guc_enable_communication(struct intel_guc *guc)
GEM_BUG_ON(guc_communication_enabled(guc));
- ret = i915_inject_load_error(i915, -ENXIO);
+ ret = i915_inject_probe_error(i915, -ENXIO);
if (ret)
return ret;
@@ -205,9 +205,6 @@ static int guc_enable_communication(struct intel_guc *guc)
if (ret)
return ret;
- guc->send = intel_guc_send_ct;
- guc->handler = intel_guc_to_host_event_handler_ct;
-
/* check for mmio messages received before/during the CT enable */
guc_get_mmio_msg(guc);
guc_handle_mmio_msg(guc);
@@ -216,7 +213,7 @@ static int guc_enable_communication(struct intel_guc *guc)
/* check for CT messages received before we enabled interrupts */
spin_lock_irq(&i915->irq_lock);
- intel_guc_to_host_event_handler_ct(guc);
+ intel_guc_ct_event_handler(&guc->ct);
spin_unlock_irq(&i915->irq_lock);
DRM_INFO("GuC communication enabled\n");
@@ -224,7 +221,7 @@ static int guc_enable_communication(struct intel_guc *guc)
return 0;
}
-static void __guc_stop_communication(struct intel_guc *guc)
+static void guc_disable_communication(struct intel_guc *guc)
{
/*
* Events generated during or after CT disable are logged by guc in
@@ -235,23 +232,6 @@ static void __guc_stop_communication(struct intel_guc *guc)
guc_disable_interrupts(guc);
- guc->send = intel_guc_send_nop;
- guc->handler = intel_guc_to_host_event_handler_nop;
-}
-
-static void guc_stop_communication(struct intel_guc *guc)
-{
- intel_guc_ct_stop(&guc->ct);
-
- __guc_stop_communication(guc);
-
- DRM_INFO("GuC communication stopped\n");
-}
-
-static void guc_disable_communication(struct intel_guc *guc)
-{
- __guc_stop_communication(guc);
-
intel_guc_ct_disable(&guc->ct);
/*
@@ -267,28 +247,22 @@ static void guc_disable_communication(struct intel_guc *guc)
void intel_uc_fetch_firmwares(struct intel_uc *uc)
{
- struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
int err;
if (!intel_uc_uses_guc(uc))
return;
- err = intel_uc_fw_fetch(&uc->guc.fw, i915);
+ err = intel_uc_fw_fetch(&uc->guc.fw);
if (err)
return;
if (intel_uc_uses_huc(uc))
- intel_uc_fw_fetch(&uc->huc.fw, i915);
+ intel_uc_fw_fetch(&uc->huc.fw);
}
void intel_uc_cleanup_firmwares(struct intel_uc *uc)
{
- if (!intel_uc_uses_guc(uc))
- return;
-
- if (intel_uc_uses_huc(uc))
- intel_uc_fw_cleanup_fetch(&uc->huc.fw);
-
+ intel_uc_fw_cleanup_fetch(&uc->huc.fw);
intel_uc_fw_cleanup_fetch(&uc->guc.fw);
}
@@ -316,15 +290,8 @@ void intel_uc_init(struct intel_uc *uc)
void intel_uc_fini(struct intel_uc *uc)
{
- struct intel_guc *guc = &uc->guc;
-
- if (!intel_uc_uses_guc(uc))
- return;
-
- if (intel_uc_uses_huc(uc))
- intel_huc_fini(&uc->huc);
-
- intel_guc_fini(guc);
+ intel_huc_fini(&uc->huc);
+ intel_guc_fini(&uc->guc);
__uc_free_load_err_log(uc);
}
@@ -372,7 +339,7 @@ static int uc_init_wopcm(struct intel_uc *uc)
GEM_BUG_ON(!(size & GUC_WOPCM_SIZE_MASK));
GEM_BUG_ON(size & ~GUC_WOPCM_SIZE_MASK);
- err = i915_inject_load_error(gt->i915, -ENXIO);
+ err = i915_inject_probe_error(gt->i915, -ENXIO);
if (err)
return err;
@@ -486,11 +453,8 @@ int intel_uc_init_hw(struct intel_uc *uc)
if (ret)
goto err_communication;
- if (intel_uc_supports_guc_submission(uc)) {
- ret = intel_guc_submission_enable(guc);
- if (ret)
- goto err_communication;
- }
+ if (intel_uc_supports_guc_submission(uc))
+ intel_guc_submission_enable(guc);
dev_info(i915->drm.dev, "%s firmware %s version %u.%u %s:%s\n",
intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_GUC), guc->fw.path,
@@ -560,7 +524,7 @@ void intel_uc_reset_prepare(struct intel_uc *uc)
if (!intel_guc_is_running(guc))
return;
- guc_stop_communication(guc);
+ guc_disable_communication(guc);
__uc_sanitize(uc);
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index bb4889d2346d..8ee0a0c7f447 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -11,7 +11,6 @@
#include "intel_uc_fw_abi.h"
#include "i915_drv.h"
-#ifdef CONFIG_DRM_I915_DEBUG_GUC
static inline struct intel_gt *__uc_fw_to_gt(struct intel_uc_fw *uc_fw)
{
GEM_BUG_ON(uc_fw->status == INTEL_UC_FIRMWARE_UNINITIALIZED);
@@ -22,6 +21,7 @@ static inline struct intel_gt *__uc_fw_to_gt(struct intel_uc_fw *uc_fw)
return container_of(uc_fw, struct intel_gt, uc.huc.fw);
}
+#ifdef CONFIG_DRM_I915_DEBUG_GUC
void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
enum intel_uc_fw_status status)
{
@@ -37,8 +37,13 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
/*
* List of required GuC and HuC binaries per-platform.
* Must be ordered based on platform + revid, from newer to older.
+ *
+ * TGL 35.2 is interface-compatible with 33.0 for previous Gens. The deltas
+ * between 33.0 and 35.2 are only related to new additions to support new Gen12
+ * features.
*/
#define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \
+ fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 0, 3)) \
fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \
fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 9, 0, 0)) \
fw_def(COFFEELAKE, 5, guc_def(cml, 33, 0, 0), huc_def(cml, 4, 0, 0)) \
@@ -214,35 +219,36 @@ void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
INTEL_UC_FIRMWARE_NOT_SUPPORTED);
}
-static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw,
- struct drm_i915_private *i915,
- int e)
+static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw, int e)
{
+ struct drm_i915_private *i915 = __uc_fw_to_gt(uc_fw)->i915;
bool user = e == -EINVAL;
- if (i915_inject_load_error(i915, e)) {
+ if (i915_inject_probe_error(i915, e)) {
/* non-existing blob */
uc_fw->path = "<invalid>";
uc_fw->user_overridden = user;
- } else if (i915_inject_load_error(i915, e)) {
+ } else if (i915_inject_probe_error(i915, e)) {
/* require next major version */
uc_fw->major_ver_wanted += 1;
uc_fw->minor_ver_wanted = 0;
uc_fw->user_overridden = user;
- } else if (i915_inject_load_error(i915, e)) {
+ } else if (i915_inject_probe_error(i915, e)) {
/* require next minor version */
uc_fw->minor_ver_wanted += 1;
uc_fw->user_overridden = user;
- } else if (uc_fw->major_ver_wanted && i915_inject_load_error(i915, e)) {
+ } else if (uc_fw->major_ver_wanted &&
+ i915_inject_probe_error(i915, e)) {
/* require prev major version */
uc_fw->major_ver_wanted -= 1;
uc_fw->minor_ver_wanted = 0;
uc_fw->user_overridden = user;
- } else if (uc_fw->minor_ver_wanted && i915_inject_load_error(i915, e)) {
+ } else if (uc_fw->minor_ver_wanted &&
+ i915_inject_probe_error(i915, e)) {
/* require prev minor version - hey, this should work! */
uc_fw->minor_ver_wanted -= 1;
uc_fw->user_overridden = user;
- } else if (user && i915_inject_load_error(i915, e)) {
+ } else if (user && i915_inject_probe_error(i915, e)) {
/* officially unsupported platform */
uc_fw->major_ver_wanted = 0;
uc_fw->minor_ver_wanted = 0;
@@ -253,14 +259,14 @@ static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw,
/**
* intel_uc_fw_fetch - fetch uC firmware
* @uc_fw: uC firmware
- * @i915: device private
*
* Fetch uC firmware into GEM obj.
*
* Return: 0 on success, a negative errno code on failure.
*/
-int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw, struct drm_i915_private *i915)
+int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
{
+ struct drm_i915_private *i915 = __uc_fw_to_gt(uc_fw)->i915;
struct device *dev = i915->drm.dev;
struct drm_i915_gem_object *obj;
const struct firmware *fw = NULL;
@@ -271,12 +277,12 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw, struct drm_i915_private *i915)
GEM_BUG_ON(!i915->wopcm.size);
GEM_BUG_ON(!intel_uc_fw_is_enabled(uc_fw));
- err = i915_inject_load_error(i915, -ENXIO);
+ err = i915_inject_probe_error(i915, -ENXIO);
if (err)
return err;
- __force_fw_fetch_failures(uc_fw, i915, -EINVAL);
- __force_fw_fetch_failures(uc_fw, i915, -ESTALE);
+ __force_fw_fetch_failures(uc_fw, -EINVAL);
+ __force_fw_fetch_failures(uc_fw, -ESTALE);
err = request_firmware(&fw, uc_fw->path, dev);
if (err)
@@ -383,8 +389,9 @@ fail:
return err;
}
-static u32 uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw, struct i915_ggtt *ggtt)
+static u32 uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw)
{
+ struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt;
struct drm_mm_node *node = &ggtt->uc_fw;
GEM_BUG_ON(!drm_mm_node_allocated(node));
@@ -394,13 +401,12 @@ static u32 uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw, struct i915_ggtt *ggtt)
return lower_32_bits(node->start);
}
-static void intel_uc_fw_ggtt_bind(struct intel_uc_fw *uc_fw,
- struct intel_gt *gt)
+static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw)
{
struct drm_i915_gem_object *obj = uc_fw->obj;
- struct i915_ggtt *ggtt = gt->ggtt;
+ struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt;
struct i915_vma dummy = {
- .node.start = uc_fw_ggtt_offset(uc_fw, ggtt),
+ .node.start = uc_fw_ggtt_offset(uc_fw),
.node.size = obj->base.size,
.pages = obj->mm.pages,
.vm = &ggtt->vm,
@@ -415,37 +421,36 @@ static void intel_uc_fw_ggtt_bind(struct intel_uc_fw *uc_fw,
ggtt->vm.insert_entries(&ggtt->vm, &dummy, I915_CACHE_NONE, 0);
}
-static void intel_uc_fw_ggtt_unbind(struct intel_uc_fw *uc_fw,
- struct intel_gt *gt)
+static void uc_fw_unbind_ggtt(struct intel_uc_fw *uc_fw)
{
struct drm_i915_gem_object *obj = uc_fw->obj;
- struct i915_ggtt *ggtt = gt->ggtt;
- u64 start = uc_fw_ggtt_offset(uc_fw, ggtt);
+ struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt;
+ u64 start = uc_fw_ggtt_offset(uc_fw);
ggtt->vm.clear_range(&ggtt->vm, start, obj->base.size);
}
-static int uc_fw_xfer(struct intel_uc_fw *uc_fw, struct intel_gt *gt,
- u32 wopcm_offset, u32 dma_flags)
+static int uc_fw_xfer(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags)
{
+ struct intel_gt *gt = __uc_fw_to_gt(uc_fw);
struct intel_uncore *uncore = gt->uncore;
u64 offset;
int ret;
- ret = i915_inject_load_error(gt->i915, -ETIMEDOUT);
+ ret = i915_inject_probe_error(gt->i915, -ETIMEDOUT);
if (ret)
return ret;
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
/* Set the source address for the uCode */
- offset = uc_fw_ggtt_offset(uc_fw, gt->ggtt);
+ offset = uc_fw_ggtt_offset(uc_fw);
GEM_BUG_ON(upper_32_bits(offset) & 0xFFFF0000);
intel_uncore_write_fw(uncore, DMA_ADDR_0_LOW, lower_32_bits(offset));
intel_uncore_write_fw(uncore, DMA_ADDR_0_HIGH, upper_32_bits(offset));
/* Set the DMA destination */
- intel_uncore_write_fw(uncore, DMA_ADDR_1_LOW, wopcm_offset);
+ intel_uncore_write_fw(uncore, DMA_ADDR_1_LOW, dst_offset);
intel_uncore_write_fw(uncore, DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM);
/*
@@ -477,23 +482,22 @@ static int uc_fw_xfer(struct intel_uc_fw *uc_fw, struct intel_gt *gt,
/**
* intel_uc_fw_upload - load uC firmware using custom loader
* @uc_fw: uC firmware
- * @gt: the intel_gt structure
- * @wopcm_offset: destination offset in wopcm
+ * @dst_offset: destination offset
* @dma_flags: flags for flags for dma ctrl
*
* Loads uC firmware and updates internal flags.
*
* Return: 0 on success, non-zero on failure.
*/
-int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct intel_gt *gt,
- u32 wopcm_offset, u32 dma_flags)
+int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags)
{
+ struct intel_gt *gt = __uc_fw_to_gt(uc_fw);
int err;
/* make sure the status was cleared the last time we reset the uc */
GEM_BUG_ON(intel_uc_fw_is_loaded(uc_fw));
- err = i915_inject_load_error(gt->i915, -ENOEXEC);
+ err = i915_inject_probe_error(gt->i915, -ENOEXEC);
if (err)
return err;
@@ -501,9 +505,9 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct intel_gt *gt,
return -ENOEXEC;
/* Call custom loader */
- intel_uc_fw_ggtt_bind(uc_fw, gt);
- err = uc_fw_xfer(uc_fw, gt, wopcm_offset, dma_flags);
- intel_uc_fw_ggtt_unbind(uc_fw, gt);
+ uc_fw_bind_ggtt(uc_fw);
+ err = uc_fw_xfer(uc_fw, dst_offset, dma_flags);
+ uc_fw_unbind_ggtt(uc_fw);
if (err)
goto fail;
@@ -540,10 +544,7 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw)
void intel_uc_fw_fini(struct intel_uc_fw *uc_fw)
{
- if (!intel_uc_fw_is_available(uc_fw))
- return;
-
- i915_gem_object_unpin_pages(uc_fw->obj);
+ intel_uc_fw_cleanup_fetch(uc_fw);
}
/**
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
index 7a0a5989afc9..1f30543d0d2d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -229,10 +229,9 @@ static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw)
void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
enum intel_uc_fw_type type, bool supported,
enum intel_platform platform, u8 rev);
-int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw, struct drm_i915_private *i915);
+int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw);
void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw);
-int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct intel_gt *gt,
- u32 wopcm_offset, u32 dma_flags);
+int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 offset, u32 dma_flags);
int intel_uc_fw_init(struct intel_uc_fw *uc_fw);
void intel_uc_fw_fini(struct intel_uc_fw *uc_fw);
size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len);
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
deleted file mode 100644
index d8a80388bd31..000000000000
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
+++ /dev/null
@@ -1,299 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2017 Intel Corporation
- */
-
-#include "i915_selftest.h"
-#include "gem/i915_gem_pm.h"
-
-/* max doorbell number + negative test for each client type */
-#define ATTEMPTS (GUC_NUM_DOORBELLS + GUC_CLIENT_PRIORITY_NUM)
-
-static struct intel_guc_client *clients[ATTEMPTS];
-
-static bool available_dbs(struct intel_guc *guc, u32 priority)
-{
- unsigned long offset;
- unsigned long end;
- u16 id;
-
- /* first half is used for normal priority, second half for high */
- offset = 0;
- end = GUC_NUM_DOORBELLS / 2;
- if (priority <= GUC_CLIENT_PRIORITY_HIGH) {
- offset = end;
- end += offset;
- }
-
- id = find_next_zero_bit(guc->doorbell_bitmap, end, offset);
- if (id < end)
- return true;
-
- return false;
-}
-
-static int check_all_doorbells(struct intel_guc *guc)
-{
- u16 db_id;
-
- pr_info_once("Max number of doorbells: %d", GUC_NUM_DOORBELLS);
- for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id) {
- if (!doorbell_ok(guc, db_id)) {
- pr_err("doorbell %d, not ok\n", db_id);
- return -EIO;
- }
- }
-
- return 0;
-}
-
-static int ring_doorbell_nop(struct intel_guc_client *client)
-{
- struct guc_process_desc *desc = __get_process_desc(client);
- int err;
-
- client->use_nop_wqi = true;
-
- spin_lock_irq(&client->wq_lock);
-
- guc_wq_item_append(client, 0, 0, 0, 0);
- guc_ring_doorbell(client);
-
- spin_unlock_irq(&client->wq_lock);
-
- client->use_nop_wqi = false;
-
- /* if there are no issues GuC will update the WQ head and keep the
- * WQ in active status
- */
- err = wait_for(READ_ONCE(desc->head) == READ_ONCE(desc->tail), 10);
- if (err) {
- pr_err("doorbell %u ring failed!\n", client->doorbell_id);
- return -EIO;
- }
-
- if (desc->wq_status != WQ_STATUS_ACTIVE) {
- pr_err("doorbell %u ring put WQ in bad state (%u)!\n",
- client->doorbell_id, desc->wq_status);
- return -EIO;
- }
-
- return 0;
-}
-
-/*
- * Basic client sanity check, handy to validate create_clients.
- */
-static int validate_client(struct intel_guc_client *client, int client_priority)
-{
- if (client->priority != client_priority ||
- client->doorbell_id == GUC_DOORBELL_INVALID)
- return -EINVAL;
- else
- return 0;
-}
-
-static bool client_doorbell_in_sync(struct intel_guc_client *client)
-{
- return !client || doorbell_ok(client->guc, client->doorbell_id);
-}
-
-/*
- * Check that we're able to synchronize guc_clients with their doorbells
- *
- * We're creating clients and reserving doorbells once, at module load. During
- * module lifetime, GuC, doorbell HW, and i915 state may go out of sync due to
- * GuC being reset. In other words - GuC clients are still around, but the
- * status of their doorbells may be incorrect. This is the reason behind
- * validating that the doorbells status expected by the driver matches what the
- * GuC/HW have.
- */
-static int igt_guc_clients(void *arg)
-{
- struct intel_gt *gt = arg;
- struct intel_guc *guc = &gt->uc.guc;
- intel_wakeref_t wakeref;
- int err = 0;
-
- GEM_BUG_ON(!HAS_GT_UC(gt->i915));
- wakeref = intel_runtime_pm_get(gt->uncore->rpm);
-
- err = check_all_doorbells(guc);
- if (err)
- goto unlock;
-
- /*
- * Get rid of clients created during driver load because the test will
- * recreate them.
- */
- guc_clients_disable(guc);
- guc_clients_destroy(guc);
- if (guc->execbuf_client) {
- pr_err("guc_clients_destroy lied!\n");
- err = -EINVAL;
- goto unlock;
- }
-
- err = guc_clients_create(guc);
- if (err) {
- pr_err("Failed to create clients\n");
- goto unlock;
- }
- GEM_BUG_ON(!guc->execbuf_client);
-
- err = validate_client(guc->execbuf_client,
- GUC_CLIENT_PRIORITY_KMD_NORMAL);
- if (err) {
- pr_err("execbug client validation failed\n");
- goto out;
- }
-
- /* the client should now have reserved a doorbell */
- if (!has_doorbell(guc->execbuf_client)) {
- pr_err("guc_clients_create didn't reserve doorbells\n");
- err = -EINVAL;
- goto out;
- }
-
- /* Now enable the clients */
- guc_clients_enable(guc);
-
- /* each client should now have received a doorbell */
- if (!client_doorbell_in_sync(guc->execbuf_client)) {
- pr_err("failed to initialize the doorbells\n");
- err = -EINVAL;
- goto out;
- }
-
- /*
- * Basic test - an attempt to reallocate a valid doorbell to the
- * client it is currently assigned should not cause a failure.
- */
- err = create_doorbell(guc->execbuf_client);
-
-out:
- /*
- * Leave clean state for other test, plus the driver always destroy the
- * clients during unload.
- */
- guc_clients_disable(guc);
- guc_clients_destroy(guc);
- guc_clients_create(guc);
- guc_clients_enable(guc);
-unlock:
- intel_runtime_pm_put(gt->uncore->rpm, wakeref);
- return err;
-}
-
-/*
- * Create as many clients as number of doorbells. Note that there's already
- * client(s)/doorbell(s) created during driver load, but this test creates
- * its own and do not interact with the existing ones.
- */
-static int igt_guc_doorbells(void *arg)
-{
- struct intel_gt *gt = arg;
- struct intel_guc *guc = &gt->uc.guc;
- intel_wakeref_t wakeref;
- int i, err = 0;
- u16 db_id;
-
- GEM_BUG_ON(!HAS_GT_UC(gt->i915));
- wakeref = intel_runtime_pm_get(gt->uncore->rpm);
-
- err = check_all_doorbells(guc);
- if (err)
- goto unlock;
-
- for (i = 0; i < ATTEMPTS; i++) {
- clients[i] = guc_client_alloc(guc, i % GUC_CLIENT_PRIORITY_NUM);
-
- if (!clients[i]) {
- pr_err("[%d] No guc client\n", i);
- err = -EINVAL;
- goto out;
- }
-
- if (IS_ERR(clients[i])) {
- if (PTR_ERR(clients[i]) != -ENOSPC) {
- pr_err("[%d] unexpected error\n", i);
- err = PTR_ERR(clients[i]);
- goto out;
- }
-
- if (available_dbs(guc, i % GUC_CLIENT_PRIORITY_NUM)) {
- pr_err("[%d] non-db related alloc fail\n", i);
- err = -EINVAL;
- goto out;
- }
-
- /* expected, ran out of dbs for this client type */
- continue;
- }
-
- /*
- * The check below is only valid because we keep a doorbell
- * assigned during the whole life of the client.
- */
- if (clients[i]->stage_id >= GUC_NUM_DOORBELLS) {
- pr_err("[%d] more clients than doorbells (%d >= %d)\n",
- i, clients[i]->stage_id, GUC_NUM_DOORBELLS);
- err = -EINVAL;
- goto out;
- }
-
- err = validate_client(clients[i], i % GUC_CLIENT_PRIORITY_NUM);
- if (err) {
- pr_err("[%d] client_alloc sanity check failed!\n", i);
- err = -EINVAL;
- goto out;
- }
-
- db_id = clients[i]->doorbell_id;
-
- err = __guc_client_enable(clients[i]);
- if (err) {
- pr_err("[%d] Failed to create a doorbell\n", i);
- goto out;
- }
-
- /* doorbell id shouldn't change, we are holding the mutex */
- if (db_id != clients[i]->doorbell_id) {
- pr_err("[%d] doorbell id changed (%d != %d)\n",
- i, db_id, clients[i]->doorbell_id);
- err = -EINVAL;
- goto out;
- }
-
- err = check_all_doorbells(guc);
- if (err)
- goto out;
-
- err = ring_doorbell_nop(clients[i]);
- if (err)
- goto out;
- }
-
-out:
- for (i = 0; i < ATTEMPTS; i++)
- if (!IS_ERR_OR_NULL(clients[i])) {
- __guc_client_disable(clients[i]);
- guc_client_free(clients[i]);
- }
-unlock:
- intel_runtime_pm_put(gt->uncore->rpm, wakeref);
- return err;
-}
-
-int intel_guc_live_selftest(struct drm_i915_private *i915)
-{
- static const struct i915_subtest tests[] = {
- SUBTEST(igt_guc_clients),
- SUBTEST(igt_guc_doorbells),
- };
-
- if (!USES_GUC_SUBMISSION(i915))
- return 0;
-
- return intel_gt_live_subtests(tests, &i915->gt);
-}