summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_irq.c
diff options
context:
space:
mode:
authorDeepak S <deepak.s@linux.intel.com>2014-07-04 01:33:01 +0400
committerDaniel Vetter <daniel.vetter@ffwll.ch>2014-07-08 23:05:33 +0400
commit31685c258e0b0ad6aa486c5ec001382cf8a64212 (patch)
tree5dc4134e07533dcd4ac281bd870669a9e08991ad /drivers/gpu/drm/i915/i915_irq.c
parent8e09bf837f8c6b09784bf22c3a8c597df3c20b79 (diff)
downloadlinux-31685c258e0b0ad6aa486c5ec001382cf8a64212.tar.xz
drm/i915/vlv: WA for Turbo and RC6 to work together.
With RC6 enabled, BYT has an HW issue in determining the right Gfx busyness. WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide on increasing/decreasing the freq. This logic will monitor C0 counters of render/media power-wells over EI period and takes necessary action based on these values v2: Refactor duplicate code. (Ville) v3: Reformat the comments. (Ville) v4: Enable required counters and remove unwanted code (Ville) v5: Added frequency change acceleration support and remove kernel-doc style comments. (Ville) v6: Updated comment section and Fix w/a comment. (Ville) Signed-off-by: Deepak S <deepak.s@linux.intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers/gpu/drm/i915/i915_irq.c')
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c133
1 files changed, 132 insertions, 1 deletions
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index d672053fdb10..0b4a8ed76a54 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1272,6 +1272,131 @@ static void notify_ring(struct drm_device *dev,
i915_queue_hangcheck(dev);
}
+static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
+ struct intel_rps_ei_calc *rps_ei)
+{
+ u32 cz_ts, cz_freq_khz;
+ u32 render_count, media_count;
+ u32 elapsed_render, elapsed_media, elapsed_time;
+ u32 residency = 0;
+
+ cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
+ cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
+
+ render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
+ media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
+
+ if (rps_ei->cz_ts_ei == 0) {
+ rps_ei->cz_ts_ei = cz_ts;
+ rps_ei->render_ei_c0 = render_count;
+ rps_ei->media_ei_c0 = media_count;
+
+ return dev_priv->rps.cur_freq;
+ }
+
+ elapsed_time = cz_ts - rps_ei->cz_ts_ei;
+ rps_ei->cz_ts_ei = cz_ts;
+
+ elapsed_render = render_count - rps_ei->render_ei_c0;
+ rps_ei->render_ei_c0 = render_count;
+
+ elapsed_media = media_count - rps_ei->media_ei_c0;
+ rps_ei->media_ei_c0 = media_count;
+
+ /* Convert all the counters into common unit of milli sec */
+ elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
+ elapsed_render /= cz_freq_khz;
+ elapsed_media /= cz_freq_khz;
+
+ /*
+ * Calculate overall C0 residency percentage
+ * only if elapsed time is non zero
+ */
+ if (elapsed_time) {
+ residency =
+ ((max(elapsed_render, elapsed_media) * 100)
+ / elapsed_time);
+ }
+
+ return residency;
+}
+
+/**
+ * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
+ * busy-ness calculated from C0 counters of render & media power wells
+ * @dev_priv: DRM device private
+ *
+ */
+static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
+{
+ u32 residency_C0_up = 0, residency_C0_down = 0;
+ u8 new_delay, adj;
+
+ dev_priv->rps.ei_interrupt_count++;
+
+ WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+
+
+ if (dev_priv->rps_up_ei.cz_ts_ei == 0) {
+ vlv_c0_residency(dev_priv, &dev_priv->rps_up_ei);
+ vlv_c0_residency(dev_priv, &dev_priv->rps_down_ei);
+ return dev_priv->rps.cur_freq;
+ }
+
+
+ /*
+ * To down throttle, C0 residency should be less than down threshold
+ * for continous EI intervals. So calculate down EI counters
+ * once in VLV_INT_COUNT_FOR_DOWN_EI
+ */
+ if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {
+
+ dev_priv->rps.ei_interrupt_count = 0;
+
+ residency_C0_down = vlv_c0_residency(dev_priv,
+ &dev_priv->rps_down_ei);
+ } else {
+ residency_C0_up = vlv_c0_residency(dev_priv,
+ &dev_priv->rps_up_ei);
+ }
+
+ new_delay = dev_priv->rps.cur_freq;
+
+ adj = dev_priv->rps.last_adj;
+ /* C0 residency is greater than UP threshold. Increase Frequency */
+ if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
+ if (adj > 0)
+ adj *= 2;
+ else
+ adj = 1;
+
+ if (dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)
+ new_delay = dev_priv->rps.cur_freq + adj;
+
+ /*
+ * For better performance, jump directly
+ * to RPe if we're below it.
+ */
+ if (new_delay < dev_priv->rps.efficient_freq)
+ new_delay = dev_priv->rps.efficient_freq;
+
+ } else if (!dev_priv->rps.ei_interrupt_count &&
+ (residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
+ if (adj < 0)
+ adj *= 2;
+ else
+ adj = -1;
+ /*
+ * This means, C0 residency is less than down threshold over
+ * a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
+ */
+ if (dev_priv->rps.cur_freq > dev_priv->rps.min_freq_softlimit)
+ new_delay = dev_priv->rps.cur_freq + adj;
+ }
+
+ return new_delay;
+}
+
static void gen6_pm_rps_work(struct work_struct *work)
{
struct drm_i915_private *dev_priv =
@@ -1320,6 +1445,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
else
new_delay = dev_priv->rps.min_freq_softlimit;
adj = 0;
+ } else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
+ new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
if (adj < 0)
adj *= 2;
@@ -4511,7 +4638,11 @@ void intel_irq_init(struct drm_device *dev)
INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
/* Let's track the enabled rps events */
- dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
+ if (IS_VALLEYVIEW(dev))
+ /* WaGsvRC0ResidenncyMethod:VLV */
+ dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
+ else
+ dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
setup_timer(&dev_priv->gpu_error.hangcheck_timer,
i915_hangcheck_elapsed,