drm/i915: Improved w/a for rps on Baytrail

Rewrite commit 31685c258e0b0ad6aa486c5ec001382cf8a64212 Author: Deepak S <deepak.s@linux.intel.com> Date: Thu Jul 3 17:33:01 2014 -0400 drm/i915/vlv: WA for Turbo and RC6 to work together. Other than code clarity, the major improvement is to disable the extra interrupts generated when idle. However, the reclocking remains rather slow under the new manual regime, in particular it fails to downclock as quickly as desired. The second major improvement is that for certain workloads, like games, we need to combine render+media activity counters as the work of displaying the frame is split across the engines and both need to be taken into account when deciding the global GPU frequency as memory cycles are shared. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Deepak S <deepak.s@linux.intel.com> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Reviewed-by: Deepak S<deepak.s@linux.intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
author: Chris Wilson <chris@chris-wilson.co.uk> 2015-03-18 12:48:22 +0300
committer: Daniel Vetter <daniel.vetter@ffwll.ch> 2015-03-20 13:48:14 +0300
commit: 43cf3bf084ba097463d67e756ff821505bdaa69d (patch)
tree: 54529985290904a480f7830763f632db9e9b7d6e /drivers/gpu/drm/i915/i915_irq.c
parent: aed242ff7ebb697e4dff912bd4dc7ec7192f7581 (diff)
download: linux-43cf3bf084ba097463d67e756ff821505bdaa69d.tar.xz
1 files changed, 55 insertions, 100 deletions
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 49ad5fb82ace..8d8d33d068dd 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -997,129 +997,84 @@ static void notify_ring(struct drm_device *dev,
 	wake_up_all(&ring->irq_queue);
 }
 
-static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
-			    struct intel_rps_ei *rps_ei)
+static void vlv_c0_read(struct drm_i915_private *dev_priv,
+			struct intel_rps_ei *ei)
 {
-	u32 cz_ts, cz_freq_khz;
-	u32 render_count, media_count;
-	u32 elapsed_render, elapsed_media, elapsed_time;
-	u32 residency = 0;
-
-	cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
-	cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
-
-	render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
-	media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
-
-	if (rps_ei->cz_clock == 0) {
-		rps_ei->cz_clock = cz_ts;
-		rps_ei->render_c0 = render_count;
-		rps_ei->media_c0 = media_count;
-
-		return dev_priv->rps.cur_freq;
-	}
-
-	elapsed_time = cz_ts - rps_ei->cz_clock;
-	rps_ei->cz_clock = cz_ts;
+	ei->cz_clock = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
+	ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT);
+	ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
+}
 
-	elapsed_render = render_count - rps_ei->render_c0;
-	rps_ei->render_c0 = render_count;
+static bool vlv_c0_above(struct drm_i915_private *dev_priv,
+			 const struct intel_rps_ei *old,
+			 const struct intel_rps_ei *now,
+			 int threshold)
+{
+	u64 time, c0;
 
-	elapsed_media = media_count - rps_ei->media_c0;
-	rps_ei->media_c0 = media_count;
+	if (old->cz_clock == 0)
+		return false;
 
-	/* Convert all the counters into common unit of milli sec */
-	elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
-	elapsed_render /=  cz_freq_khz;
-	elapsed_media /= cz_freq_khz;
+	time = now->cz_clock - old->cz_clock;
+	time *= threshold * dev_priv->mem_freq;
 
-	/*
-	 * Calculate overall C0 residency percentage
-	 * only if elapsed time is non zero
+	/* Workload can be split between render + media, e.g. SwapBuffers
+	 * being blitted in X after being rendered in mesa. To account for
+	 * this we need to combine both engines into our activity counter.
 	 */
-	if (elapsed_time) {
-		residency =
-			((max(elapsed_render, elapsed_media) * 100)
-				/ elapsed_time);
-	}
+	c0 = now->render_c0 - old->render_c0;
+	c0 += now->media_c0 - old->media_c0;
+	c0 *= 100 * VLV_CZ_CLOCK_TO_MILLI_SEC * 4 / 1000;
 
-	return residency;
+	return c0 >= time;
 }
 
-/**
- * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
- * busy-ness calculated from C0 counters of render & media power wells
- * @dev_priv: DRM device private
- *
- */
-static int vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
+void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
 {
-	u32 residency_C0_up = 0, residency_C0_down = 0;
-	int new_delay, adj;
-
-	dev_priv->rps.ei_interrupt_count++;
-
-	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+	vlv_c0_read(dev_priv, &dev_priv->rps.down_ei);
+	dev_priv->rps.up_ei = dev_priv->rps.down_ei;
+	dev_priv->rps.ei_interrupt_count = 0;
+}
 
+static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
+{
+	struct intel_rps_ei now;
+	u32 events = 0;
 
-	if (dev_priv->rps.up_ei.cz_clock == 0) {
-		vlv_c0_residency(dev_priv, &dev_priv->rps.up_ei);
-		vlv_c0_residency(dev_priv, &dev_priv->rps.down_ei);
-		return dev_priv->rps.cur_freq;
-	}
+	if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
+		return 0;
 
+	vlv_c0_read(dev_priv, &now);
+	if (now.cz_clock == 0)
+		return 0;
 
 	/*
 	 * To down throttle, C0 residency should be less than down threshold
 	 * for continous EI intervals. So calculate down EI counters
 	 * once in VLV_INT_COUNT_FOR_DOWN_EI
 	 */
-	if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {
-
+	if (++dev_priv->rps.ei_interrupt_count >= VLV_INT_COUNT_FOR_DOWN_EI) {
+		pm_iir |= GEN6_PM_RP_DOWN_EI_EXPIRED;
 		dev_priv->rps.ei_interrupt_count = 0;
-
-		residency_C0_down = vlv_c0_residency(dev_priv,
-						     &dev_priv->rps.down_ei);
-	} else {
-		residency_C0_up = vlv_c0_residency(dev_priv,
-						   &dev_priv->rps.up_ei);
 	}
 
-	new_delay = dev_priv->rps.cur_freq;
-
-	adj = dev_priv->rps.last_adj;
-	/* C0 residency is greater than UP threshold. Increase Frequency */
-	if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
-		if (adj > 0)
-			adj *= 2;
-		else
-			adj = 1;
-
-		if (dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)
-			new_delay = dev_priv->rps.cur_freq + adj;
-
-		/*
-		 * For better performance, jump directly
-		 * to RPe if we're below it.
-		 */
-		if (new_delay < dev_priv->rps.efficient_freq)
-			new_delay = dev_priv->rps.efficient_freq;
+	if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
+		if (!vlv_c0_above(dev_priv,
+				  &dev_priv->rps.down_ei, &now,
+				  VLV_RP_DOWN_EI_THRESHOLD))
+			events |= GEN6_PM_RP_DOWN_THRESHOLD;
+		dev_priv->rps.down_ei = now;
+	}
 
-	} else if (!dev_priv->rps.ei_interrupt_count &&
-			(residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
-		if (adj < 0)
-			adj *= 2;
-		else
-			adj = -1;
-		/*
-		 * This means, C0 residency is less than down threshold over
-		 * a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
-		 */
-		if (dev_priv->rps.cur_freq > dev_priv->rps.min_freq_softlimit)
-			new_delay = dev_priv->rps.cur_freq + adj;
+	if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
+		if (vlv_c0_above(dev_priv,
+				 &dev_priv->rps.up_ei, &now,
+				 VLV_RP_UP_EI_THRESHOLD))
+			events |= GEN6_PM_RP_UP_THRESHOLD;
+		dev_priv->rps.up_ei = now;
 	}
 
-	return new_delay;
+	return events;
 }
 
 static void gen6_pm_rps_work(struct work_struct *work)
@@ -1149,6 +1104,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
 
 	mutex_lock(&dev_priv->rps.hw_lock);
 
+	pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir);
+
 	adj = dev_priv->rps.last_adj;
 	if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
 		if (adj > 0)
@@ -1171,8 +1128,6 @@ static void gen6_pm_rps_work(struct work_struct *work)
 		else
 			new_delay = dev_priv->rps.min_freq_softlimit;
 		adj = 0;
-	} else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
-		new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
 	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
 		if (adj < 0)
 			adj *= 2;
author	Chris Wilson <chris@chris-wilson.co.uk>	2015-03-18 12:48:22 +0300
committer	Daniel Vetter <daniel.vetter@ffwll.ch>	2015-03-20 13:48:14 +0300
commit	43cf3bf084ba097463d67e756ff821505bdaa69d (patch)
tree	54529985290904a480f7830763f632db9e9b7d6e /drivers/gpu/drm/i915/i915_irq.c
parent	aed242ff7ebb697e4dff912bd4dc7ec7192f7581 (diff)
download	linux-43cf3bf084ba097463d67e756ff821505bdaa69d.tar.xz