diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2017-09-13 16:35:34 +0300 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2017-09-13 19:24:32 +0300 |
commit | 6d2cb5aa383bf020ee95e33d9d107975f340ae1c (patch) | |
tree | 5812dcf736a75ebcf72859de19ca1223fbeede40 /drivers/gpu/drm/i915/intel_lrc.c | |
parent | 34a04e5e46cb984a6bab336484fa856574db332f (diff) | |
download | linux-6d2cb5aa383bf020ee95e33d9d107975f340ae1c.tar.xz |
drm/i915/execlists: Read the context-status buffer from the HWSP
The engine provides a mirror of the CSB in the HWSP. If we use the
cacheable reads from the HWSP, we can shave off a few mmio reads per
context-switch interrupt (which are quite frequent!). Just removing a
couple of mmio is not enough to actually reduce any latency, but a small
reduction in overall cpu usage.
Much appreciation for Ben dropping the bombshell that the CSB was in the
HWSP and for Michel in digging out the details.
v2: Don't be lazy, add the defines for the indices.
v3: Include the HWSP in debugfs/i915_engine_info
v4: Check for GVT-g, it currently depends on intercepting CSB mmio
v5: Fixup GVT-g mmio path
v6: Disable HWSP if VT-d is active as the iommu adds unpredictable
memory latency. (Mika)
v7: Also markup the CSB read with READ_ONCE() as it may still be an mmio
read and we want to stop the compiler from issuing a later (v.slow) reload.
Suggested-by: Ben Widawsky <benjamin.widawsky@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michel Thierry <michel.thierry@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: Zhi Wang <zhi.a.wang@intel.com>
Acked-by: Michel Thierry <michel.thierry@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170913133534.26927-1-chris@chris-wilson.co.uk
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Diffstat (limited to 'drivers/gpu/drm/i915/intel_lrc.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_lrc.c | 35 |
1 files changed, 30 insertions, 5 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 8886e3b60e82..6d7bbb9e769a 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -541,10 +541,17 @@ static void intel_lrc_irq_handler(unsigned long data) while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) { u32 __iomem *csb_mmio = dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)); - u32 __iomem *buf = - dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)); + /* The HWSP contains a (cacheable) mirror of the CSB */ + const u32 *buf = + &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; unsigned int head, tail; + /* However GVT emulation depends upon intercepting CSB mmio */ + if (unlikely(engine->csb_use_mmio)) { + buf = (u32 * __force) + (dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); + } + /* The write will be ordered by the uncached read (itself * a memory barrier), so we do not need another in the form * of a locked instruction. The race between the interrupt @@ -584,13 +591,12 @@ static void intel_lrc_irq_handler(unsigned long data) * status notifier. */ - status = readl(buf + 2 * head); + status = READ_ONCE(buf[2 * head]); /* maybe mmio! */ if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) continue; /* Check the context/desc id for this event matches */ - GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) != - port->context_id); + GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); rq = port_unpack(port, &count); GEM_BUG_ON(count == 0); @@ -1720,6 +1726,23 @@ logical_ring_default_irqs(struct intel_engine_cs *engine) engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; } +static bool irq_handler_force_mmio(struct drm_i915_private *i915) +{ + /* GVT emulation depends upon intercepting CSB mmio */ + if (intel_vgpu_active(i915)) + return true; + + /* + * IOMMU adds unpredictable latency causing the CSB write (from the + * GPU into the HWSP) to only be visible some time after the interrupt + * (missed breadcrumb syndrome). + */ + if (intel_vtd_active()) + return true; + + return false; +} + static void logical_ring_setup(struct intel_engine_cs *engine) { @@ -1731,6 +1754,8 @@ logical_ring_setup(struct intel_engine_cs *engine) /* Intentionally left blank. */ engine->buffer = NULL; + engine->csb_use_mmio = irq_handler_force_mmio(dev_priv); + fw_domains = intel_uncore_forcewake_for_reg(dev_priv, RING_ELSP(engine), FW_REG_WRITE); |