From 11ecbdddf2f8b6cc2480aff6d877b7a4076e3b7f Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 17 Mar 2020 15:22:22 +0200 Subject: drm/i915/perf: introduce global sseu pinning On Gen11 powergating half the execution units is a functional requirement when using the VME samplers. Not fullfilling this requirement can lead to hangs. This unfortunately plays fairly poorly with the NOA requirements. NOA requires a stable power configuration to maintain its configuration. As a result using OA (and NOA feeding into it) so far has required us to use a power configuration that can work for all contexts. The only power configuration fullfilling this is powergating half the execution units. This makes performance analysis for 3D workloads somewhat pointless. Failing to find a solution that would work for everybody, this change introduces a new i915-perf stream open parameter that punts the decision off to userspace. If this parameter is omitted, the existing Gen11 behavior remains (half EU array powergating). This change takes the initiative to move all perf related sseu configuration into i915_perf.c v2: Make parameter priviliged if different from default v3: Fix context modifying its sseu config while i915-perf is enabled v4: Always consider global sseu a privileged operation (Tvrtko) Override req_sseu point in intel_sseu_make_rpcs() (Tvrtko) Remove unrelated changes (Tvrtko) v5: Some typos (Tvrtko) Process sseu param in read_properties_unlocked() (Tvrtko) v6: Actually commit the bits from v5... Fixup some checkpath warnings v7: Only compare engine uabi field (Chris) Signed-off-by: Lionel Landwerlin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200317132222.2638719-3-lionel.g.landwerlin@intel.com --- include/uapi/drm/i915_drm.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 2813e579b480..db649d03ab52 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1969,6 +1969,17 @@ enum drm_i915_perf_property_id { */ DRM_I915_PERF_PROP_HOLD_PREEMPTION, + /** + * Specifying this pins all contexts to the specified SSEU power + * configuration for the duration of the recording. + * + * This parameter's value is a pointer to a struct + * drm_i915_gem_context_param_sseu. + * + * This property is available in perf revision 4. + */ + DRM_I915_PERF_PROP_GLOBAL_SSEU, + DRM_I915_PERF_PROP_MAX /* non-ABI */ }; -- cgit v1.2.3 From 4ef10fe05ba0b08ce7029c07878afe3c8d5754d8 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 24 Mar 2020 11:54:57 -0700 Subject: drm/i915/perf: add new open param to configure polling of OA buffer This new parameter let's the application choose how often the OA buffer should be checked on the CPU side for data availability. Longer polling period tend to reduce CPU overhead if the application does not care about somewhat real time data collection. v2: Allow disabling polling completely with 0 value (Lionel) v3: Version the new parameter (Joonas) v4: Rebase (Umesh) v5: Make poll delay value of 0 invalid (Umesh) v6: - Describe poll_oa_period (Ashutosh) - Fix comment for new poll parameter (Lionel) - Drop open_flags in read_properties_unlocked (Lionel) - Rename uapi parameter (Ashutosh) v7: Reword the comment in uapi (Ashutosh) Signed-off-by: Lionel Landwerlin Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Signed-off-by: Lionel Landwerlin Link: https://patchwork.freedesktop.org/patch/msgid/20200324185457.14635-4-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/i915_perf.c | 32 +++++++++++++++++++++++++------- drivers/gpu/drm/i915/i915_perf_types.h | 6 ++++++ include/uapi/drm/i915_drm.h | 13 +++++++++++++ 3 files changed, 44 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 4cadf97f94bc..c74ebac50015 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -248,11 +248,11 @@ #define OA_TAIL_MARGIN_NSEC 100000ULL #define INVALID_TAIL_PTR 0xffffffff -/* frequency for checking whether the OA unit has written new reports to the - * circular OA buffer... +/* The default frequency for checking whether the OA unit has written new + * reports to the circular OA buffer... */ -#define POLL_FREQUENCY 200 -#define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY) +#define DEFAULT_POLL_FREQUENCY_HZ 200 +#define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) /* for sysctl proc_dointvec_minmax of dev.i915.perf_stream_paranoid */ static u32 i915_perf_stream_paranoid = true; @@ -339,6 +339,8 @@ static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = { * @sseu: internal SSEU configuration computed either from the userspace * specified configuration in the opening parameters or a default value * (see get_default_sseu_config()) + * @poll_oa_period: The period in nanoseconds at which the CPU will check for OA + * data availability * * As read_properties_unlocked() enumerates and validates the properties given * to open a stream of metrics the configuration is built up in the structure @@ -361,6 +363,8 @@ struct perf_open_properties { bool has_sseu; struct intel_sseu sseu; + + u64 poll_oa_period; }; struct i915_oa_config_bo { @@ -2600,7 +2604,7 @@ static void i915_oa_stream_enable(struct i915_perf_stream *stream) if (stream->periodic) hrtimer_start(&stream->poll_check_timer, - ns_to_ktime(POLL_PERIOD), + ns_to_ktime(stream->poll_oa_period), HRTIMER_MODE_REL_PINNED); } @@ -3035,7 +3039,8 @@ static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer) wake_up(&stream->poll_wq); } - hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD)); + hrtimer_forward_now(hrtimer, + ns_to_ktime(stream->poll_oa_period)); return HRTIMER_RESTART; } @@ -3424,6 +3429,7 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, stream->perf = perf; stream->ctx = specific_ctx; + stream->poll_oa_period = props->poll_oa_period; ret = i915_oa_stream_init(stream, param, props); if (ret) @@ -3502,6 +3508,7 @@ static int read_properties_unlocked(struct i915_perf *perf, int ret; memset(props, 0, sizeof(struct perf_open_properties)); + props->poll_oa_period = DEFAULT_POLL_PERIOD_NS; if (!n_props) { DRM_DEBUG("No i915 perf properties given\n"); @@ -3634,6 +3641,14 @@ static int read_properties_unlocked(struct i915_perf *perf, props->has_sseu = true; break; } + case DRM_I915_PERF_PROP_POLL_OA_PERIOD: + if (value < 100000 /* 100us */) { + DRM_DEBUG("OA availability timer too small (%lluns < 100us)\n", + value); + return -EINVAL; + } + props->poll_oa_period = value; + break; case DRM_I915_PERF_PROP_MAX: MISSING_CASE(id); return -EINVAL; @@ -4416,8 +4431,11 @@ int i915_perf_ioctl_version(void) * 4: Add DRM_I915_PERF_PROP_ALLOWED_SSEU to limit what contexts can * be run for the duration of the performance recording based on * their SSEU configuration. + * + * 5: Add DRM_I915_PERF_PROP_POLL_OA_PERIOD parameter that controls the + * interval for the hrtimer used to check for OA data. */ - return 4; + return 5; } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index c3ab184c604a..371dcf243622 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -304,6 +304,12 @@ struct i915_perf_stream { * reprogrammed. */ struct i915_vma *noa_wait; + + /** + * @poll_oa_period: The period in nanoseconds at which the OA + * buffer should be checked for available data. + */ + u64 poll_oa_period; }; /** diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index db649d03ab52..14b67cd6b54b 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1980,6 +1980,19 @@ enum drm_i915_perf_property_id { */ DRM_I915_PERF_PROP_GLOBAL_SSEU, + /** + * This optional parameter specifies the timer interval in nanoseconds + * at which the i915 driver will check the OA buffer for available data. + * Minimum allowed value is 100 microseconds. A default value is used by + * the driver if this parameter is not specified. Note that larger timer + * values will reduce cpu consumption during OA perf captures. However, + * excessively large values would potentially result in OA buffer + * overwrites as captures reach end of the OA buffer. + * + * This property is available in perf revision 5. + */ + DRM_I915_PERF_PROP_POLL_OA_PERIOD, + DRM_I915_PERF_PROP_MAX /* non-ABI */ }; -- cgit v1.2.3