summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_drv.h
diff options
context:
space:
mode:
authorRobert Bragg <robert@sixbynine.org>2017-06-13 14:23:03 +0300
committerBen Widawsky <ben@bwidawsk.net>2017-06-14 22:31:57 +0300
commit19f81df2859eb10e92d68991cefa39f826dea013 (patch)
tree9cc888e18aef9c9fedc69f2eabb91e1590761c9d /drivers/gpu/drm/i915/i915_drv.h
parent5182f646c7615ede9a9ba3ecd241f6cbe16829dc (diff)
downloadlinux-19f81df2859eb10e92d68991cefa39f826dea013.tar.xz
drm/i915/perf: Add OA unit support for Gen 8+
Enables access to OA unit metrics for BDW, CHV, SKL and BXT which all share (more-or-less) the same OA unit design. Of particular note in comparison to Haswell: some OA unit HW config state has become per-context state and as a consequence it is somewhat more complicated to manage synchronous state changes from the cpu while there's no guarantee of what context (if any) is currently actively running on the gpu. The periodic sampling frequency which can be particularly useful for system-wide analysis (as opposed to command stream synchronised MI_REPORT_PERF_COUNT commands) is perhaps the most surprising state to have become per-context save and restored (while the OABUFFER destination is still a shared, system-wide resource). This support for gen8+ takes care to consider a number of timing challenges involved in synchronously updating per-context state primarily by programming all config state from the cpu and updating all current and saved contexts synchronously while the OA unit is still disabled. The driver intentionally avoids depending on command streamer programming to update OA state considering the lack of synchronization between the automatic loading of OACTXCONTROL state (that includes the periodic sampling state and enable state) on context restore and the parsing of any general purpose BB the driver can control. I.e. this implementation is careful to avoid the possibility of a context restore temporarily enabling any out-of-date periodic sampling state. In addition to the risk of transiently-out-of-date state being loaded automatically; there are also internal HW latencies involved in the loading of MUX configurations which would be difficult to account for from the command streamer (and we only want to enable the unit when once the MUX configuration is complete). Since the Gen8+ OA unit design no longer supports clock gating the unit off for a single given context (which effectively stopped any progress of counters while any other context was running) and instead supports tagging OA reports with a context ID for filtering on the CPU, it means we can no longer hide the system-wide progress of counters from a non-privileged application only interested in metrics for its own context. Although we could theoretically try and subtract the progress of other contexts before forwarding reports via read() we aren't in a position to filter reports captured via MI_REPORT_PERF_COUNT commands. As a result, for Gen8+, we always require the dev.i915.perf_stream_paranoid to be unset for any access to OA metrics if not root. v5: Drain submitted requests when enabling metric set to ensure no lite-restore erases the context image we just updated (Lionel) v6: In addition to drain, switch to kernel context & update all context in place (Chris) v7: Add missing mutex_unlock() if switching to kernel context fails (Matthew) v8: Simplify OA period/flex-eu-counters programming by using the batchbuffer instead of modifying ctx-image (Lionel) v9: Back to updating the context image (due to erroneous testing, batchbuffer programming the OA unit doesn't actually work) (Lionel) Pin context before updating context image (Chris) Drop MMIO programming now that we switch to a kernel context with right values in initial context image (Chris) v10: Just pin_map the contexts we want to modify or let the configuration happen on first use (Chris) v11: Update kernel context OA config through the batchbuffer rather than on the fly ctx-image update (Lionel) v12: Rework OA context registers update again by swithing away from user contexts and reconfiguring the kernel context through the batchbuffer and updating all the other contexts' context image. Also take care to lock slice/subslice configuration when OA is on. (Lionel) v13: Request rpcs updates on all engine when updating the OA config (Lionel) v14: Drop any kind of rpcs management now that we monitor sseu configuration changes in a later patch (Lionel) Remove usleep after programming the NOA configs on Gen8+, this doesn't seem to be needed (Lionel) v15: Respect coding style for block comments (Chris) v16: Add missing i915_add_request() in case we fail to emit OA configuration (Matthew) Signed-off-by: Robert Bragg <robert@sixbynine.org> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Matthew Auld <matthew.auld@intel.com> \o/ Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Diffstat (limited to 'drivers/gpu/drm/i915/i915_drv.h')
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h46
1 files changed, 31 insertions, 15 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2faad94c9ef2..eefb35a5d27d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2018,9 +2018,17 @@ struct i915_oa_ops {
void (*init_oa_buffer)(struct drm_i915_private *dev_priv);
/**
- * @enable_metric_set: Applies any MUX configuration to set up the
- * Boolean and Custom (B/C) counters that are part of the counter
- * reports being sampled. May apply system constraints such as
+ * @select_metric_set: The auto generated code that checks whether a
+ * requested OA config is applicable to the system and if so sets up
+ * the mux, oa and flex eu register config pointers according to the
+ * current dev_priv->perf.oa.metrics_set.
+ */
+ int (*select_metric_set)(struct drm_i915_private *dev_priv);
+
+ /**
+ * @enable_metric_set: Selects and applies any MUX configuration to set
+ * up the Boolean and Custom (B/C) counters that are part of the
+ * counter reports being sampled. May apply system constraints such as
* disabling EU clock gating as required.
*/
int (*enable_metric_set)(struct drm_i915_private *dev_priv);
@@ -2051,20 +2059,13 @@ struct i915_oa_ops {
size_t *offset);
/**
- * @oa_buffer_check: Check for OA buffer data + update tail
- *
- * This is either called via fops or the poll check hrtimer (atomic
- * ctx) without any locks taken.
+ * @oa_hw_tail_read: read the OA tail pointer register
*
- * It's safe to read OA config state here unlocked, assuming that this
- * is only called while the stream is enabled, while the global OA
- * configuration can't be modified.
- *
- * Efficiency is more important than avoiding some false positives
- * here, which will be handled gracefully - likely resulting in an
- * %EAGAIN error for userspace.
+ * In particular this enables us to share all the fiddly code for
+ * handling the OA unit tail pointer race that affects multiple
+ * generations.
*/
- bool (*oa_buffer_check)(struct drm_i915_private *dev_priv);
+ u32 (*oa_hw_tail_read)(struct drm_i915_private *dev_priv);
};
struct intel_cdclk_state {
@@ -2429,6 +2430,7 @@ struct drm_i915_private {
struct {
struct i915_vma *vma;
u8 *vaddr;
+ u32 last_ctx_id;
int format;
int format_size;
@@ -2498,6 +2500,15 @@ struct drm_i915_private {
} oa_buffer;
u32 gen7_latched_oastatus1;
+ u32 ctx_oactxctrl_offset;
+ u32 ctx_flexeu0_offset;
+
+ /**
+ * The RPT_ID/reason field for Gen8+ includes a bit
+ * to determine if the CTX ID in the report is valid
+ * but the specific bit differs between Gen 8 and 9
+ */
+ u32 gen8_valid_ctx_bit;
struct i915_oa_ops ops;
const struct i915_oa_format *oa_formats;
@@ -2810,6 +2821,8 @@ intel_info(const struct drm_i915_private *dev_priv)
#define IS_KBL_ULX(dev_priv) (INTEL_DEVID(dev_priv) == 0x590E || \
INTEL_DEVID(dev_priv) == 0x5915 || \
INTEL_DEVID(dev_priv) == 0x591E)
+#define IS_SKL_GT2(dev_priv) (IS_SKYLAKE(dev_priv) && \
+ (INTEL_DEVID(dev_priv) & 0x00F0) == 0x0010)
#define IS_SKL_GT3(dev_priv) (IS_SKYLAKE(dev_priv) && \
(INTEL_DEVID(dev_priv) & 0x00F0) == 0x0020)
#define IS_SKL_GT4(dev_priv) (IS_SKYLAKE(dev_priv) && \
@@ -3554,6 +3567,9 @@ i915_gem_context_lookup_timeline(struct i915_gem_context *ctx,
int i915_perf_open_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
+void i915_oa_init_reg_state(struct intel_engine_cs *engine,
+ struct i915_gem_context *ctx,
+ uint32_t *reg_state);
/* i915_gem_evict.c */
int __must_check i915_gem_evict_something(struct i915_address_space *vm,