drm/i915/gvt: Optimize ring siwtch 2x faster by removing unnecessary POSTING_READ

There are lots of POSTING_READ alongside each mmio write Op. While actually this is not necessary. It just bring too much latency since PCIe read Op is very slow which is of non-posted transaction. For PCIe device, the mem transaction for strong ordering rules are: o PCIe mmio write sequence is FIFO. Posted request cannot pass previous posted request. o PCIe mmio read will not go ahead of previous write. Intel graphics doesn't support RO, so we can apply above rules. In our case, we only need one POSTING_READ at last. This can remove half of mmio read Op and then the average ring switch performance is nearly doubled. Before After cycles ~970000 ~550000 Signed-off-by: Changbin Du <changbin.du@intel.com> Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
author: Changbin Du <changbin.du@intel.com> 2017-06-23 10:45:31 +0300
committer: Zhenyu Wang <zhenyuw@linux.intel.com> 2017-08-10 05:26:05 +0300
commit: f846c8de64ced9965e04cc9ae1922036175e395b (patch)
tree: 848bf95e8bd279c2ee88159988bf8e6bc039f5b2 /drivers/gpu/drm/i915/gvt/render.c
parent: 4cf196eb1ecb8b74c05fcd89266a70506ed4c5a6 (diff)
download: linux-f846c8de64ced9965e04cc9ae1922036175e395b.tar.xz
1 files changed, 13 insertions, 6 deletions
diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c
index 504e57c3bc23..5a08bcd436a1 100644
--- a/drivers/gpu/drm/i915/gvt/render.c
+++ b/drivers/gpu/drm/i915/gvt/render.c
@@ -209,7 +209,6 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id)
 	for (i = 0; i < 64; i++) {
 		gen9_render_mocs[ring_id][i] = I915_READ(offset);
 		I915_WRITE(offset, vgpu_vreg(vgpu, offset));
-		POSTING_READ(offset);
 		offset.reg += 4;
 	}
 
@@ -218,7 +217,6 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id)
 		for (i = 0; i < 32; i++) {
 			gen9_render_mocs_L3[i] = I915_READ(l3_offset);
 			I915_WRITE(l3_offset, vgpu_vreg(vgpu, l3_offset));
-			POSTING_READ(l3_offset);
 			l3_offset.reg += 4;
 		}
 	}
@@ -244,7 +242,6 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id)
 	for (i = 0; i < 64; i++) {
 		vgpu_vreg(vgpu, offset) = I915_READ(offset);
 		I915_WRITE(offset, gen9_render_mocs[ring_id][i]);
-		POSTING_READ(offset);
 		offset.reg += 4;
 	}
 
@@ -253,7 +250,6 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id)
 		for (i = 0; i < 32; i++) {
 			vgpu_vreg(vgpu, l3_offset) = I915_READ(l3_offset);
 			I915_WRITE(l3_offset, gen9_render_mocs_L3[i]);
-			POSTING_READ(l3_offset);
 			l3_offset.reg += 4;
 		}
 	}
@@ -272,6 +268,7 @@ static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id)
 	u32 ctx_ctrl = reg_state[CTX_CONTEXT_CONTROL_VAL];
 	u32 inhibit_mask =
 		_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
+	i915_reg_t last_reg = _MMIO(0);
 
 	if (IS_SKYLAKE(vgpu->gvt->dev_priv)
 		|| IS_KABYLAKE(vgpu->gvt->dev_priv)) {
@@ -305,12 +302,17 @@ static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id)
 			v = vgpu_vreg(vgpu, mmio->reg);
 
 		I915_WRITE(mmio->reg, v);
-		POSTING_READ(mmio->reg);
+		last_reg = mmio->reg;
 
 		trace_render_mmio(vgpu->id, "load",
 				  i915_mmio_reg_offset(mmio->reg),
 				  mmio->value, v);
 	}
+
+	/* Make sure the swiched MMIOs has taken effect. */
+	if (likely(INTEL_GVT_MMIO_OFFSET(last_reg)))
+		POSTING_READ(last_reg);
+
 	handle_tlb_pending_event(vgpu, ring_id);
 }
 
@@ -319,6 +321,7 @@ static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id)
 {
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 	struct render_mmio *mmio;
+	i915_reg_t last_reg = _MMIO(0);
 	u32 v;
 	int i, array_size;
 
@@ -347,12 +350,16 @@ static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id)
 			continue;
 
 		I915_WRITE(mmio->reg, v);
-		POSTING_READ(mmio->reg);
+		last_reg = mmio->reg;
 
 		trace_render_mmio(vgpu->id, "restore",
 				  i915_mmio_reg_offset(mmio->reg),
 				  mmio->value, v);
 	}
+
+	/* Make sure the swiched MMIOs has taken effect. */
+	if (likely(INTEL_GVT_MMIO_OFFSET(last_reg)))
+		POSTING_READ(last_reg);
 }
 
 /**
author	Changbin Du <changbin.du@intel.com>	2017-06-23 10:45:31 +0300
committer	Zhenyu Wang <zhenyuw@linux.intel.com>	2017-08-10 05:26:05 +0300
commit	f846c8de64ced9965e04cc9ae1922036175e395b (patch)
tree	848bf95e8bd279c2ee88159988bf8e6bc039f5b2 /drivers/gpu/drm/i915/gvt/render.c
parent	4cf196eb1ecb8b74c05fcd89266a70506ed4c5a6 (diff)
download	linux-f846c8de64ced9965e04cc9ae1922036175e395b.tar.xz