diff options
author | Changbin Du <changbin.du@intel.com> | 2017-06-23 10:45:31 +0300 |
---|---|---|
committer | Zhenyu Wang <zhenyuw@linux.intel.com> | 2017-08-10 05:26:05 +0300 |
commit | f846c8de64ced9965e04cc9ae1922036175e395b (patch) | |
tree | 848bf95e8bd279c2ee88159988bf8e6bc039f5b2 /drivers/gpu/drm/i915/gvt/render.c | |
parent | 4cf196eb1ecb8b74c05fcd89266a70506ed4c5a6 (diff) | |
download | linux-f846c8de64ced9965e04cc9ae1922036175e395b.tar.xz |
drm/i915/gvt: Optimize ring siwtch 2x faster by removing unnecessary POSTING_READ
There are lots of POSTING_READ alongside each mmio write Op. While
actually this is not necessary. It just bring too much latency since
PCIe read Op is very slow which is of non-posted transaction.
For PCIe device, the mem transaction for strong ordering rules are:
o PCIe mmio write sequence is FIFO. Posted request cannot
pass previous posted request.
o PCIe mmio read will not go ahead of previous write.
Intel graphics doesn't support RO, so we can apply above rules. In
our case, we only need one POSTING_READ at last. This can remove
half of mmio read Op and then the average ring switch performance
is nearly doubled.
Before After
cycles ~970000 ~550000
Signed-off-by: Changbin Du <changbin.du@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Diffstat (limited to 'drivers/gpu/drm/i915/gvt/render.c')
-rw-r--r-- | drivers/gpu/drm/i915/gvt/render.c | 19 |
1 files changed, 13 insertions, 6 deletions
diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c index 504e57c3bc23..5a08bcd436a1 100644 --- a/drivers/gpu/drm/i915/gvt/render.c +++ b/drivers/gpu/drm/i915/gvt/render.c @@ -209,7 +209,6 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id) for (i = 0; i < 64; i++) { gen9_render_mocs[ring_id][i] = I915_READ(offset); I915_WRITE(offset, vgpu_vreg(vgpu, offset)); - POSTING_READ(offset); offset.reg += 4; } @@ -218,7 +217,6 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id) for (i = 0; i < 32; i++) { gen9_render_mocs_L3[i] = I915_READ(l3_offset); I915_WRITE(l3_offset, vgpu_vreg(vgpu, l3_offset)); - POSTING_READ(l3_offset); l3_offset.reg += 4; } } @@ -244,7 +242,6 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id) for (i = 0; i < 64; i++) { vgpu_vreg(vgpu, offset) = I915_READ(offset); I915_WRITE(offset, gen9_render_mocs[ring_id][i]); - POSTING_READ(offset); offset.reg += 4; } @@ -253,7 +250,6 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id) for (i = 0; i < 32; i++) { vgpu_vreg(vgpu, l3_offset) = I915_READ(l3_offset); I915_WRITE(l3_offset, gen9_render_mocs_L3[i]); - POSTING_READ(l3_offset); l3_offset.reg += 4; } } @@ -272,6 +268,7 @@ static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id) u32 ctx_ctrl = reg_state[CTX_CONTEXT_CONTROL_VAL]; u32 inhibit_mask = _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); + i915_reg_t last_reg = _MMIO(0); if (IS_SKYLAKE(vgpu->gvt->dev_priv) || IS_KABYLAKE(vgpu->gvt->dev_priv)) { @@ -305,12 +302,17 @@ static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id) v = vgpu_vreg(vgpu, mmio->reg); I915_WRITE(mmio->reg, v); - POSTING_READ(mmio->reg); + last_reg = mmio->reg; trace_render_mmio(vgpu->id, "load", i915_mmio_reg_offset(mmio->reg), mmio->value, v); } + + /* Make sure the swiched MMIOs has taken effect. */ + if (likely(INTEL_GVT_MMIO_OFFSET(last_reg))) + POSTING_READ(last_reg); + handle_tlb_pending_event(vgpu, ring_id); } @@ -319,6 +321,7 @@ static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct render_mmio *mmio; + i915_reg_t last_reg = _MMIO(0); u32 v; int i, array_size; @@ -347,12 +350,16 @@ static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id) continue; I915_WRITE(mmio->reg, v); - POSTING_READ(mmio->reg); + last_reg = mmio->reg; trace_render_mmio(vgpu->id, "restore", i915_mmio_reg_offset(mmio->reg), mmio->value, v); } + + /* Make sure the swiched MMIOs has taken effect. */ + if (likely(INTEL_GVT_MMIO_OFFSET(last_reg))) + POSTING_READ(last_reg); } /** |