summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/intel_engine_cs.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c')
-rw-r--r--drivers/gpu/drm/i915/intel_engine_cs.c288
1 files changed, 222 insertions, 66 deletions
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 1590375f31cb..2d1952849d69 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -25,7 +25,6 @@
#include <drm/drm_print.h>
#include "i915_drv.h"
-#include "i915_vgpu.h"
#include "intel_ringbuffer.h"
#include "intel_lrc.h"
@@ -230,6 +229,7 @@ __intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
break;
default:
MISSING_CASE(class);
+ /* fall through */
case VIDEO_DECODE_CLASS:
case VIDEO_ENHANCEMENT_CLASS:
case COPY_ENGINE_CLASS:
@@ -302,6 +302,8 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
engine->class);
if (WARN_ON(engine->context_size > BIT(20)))
engine->context_size = 0;
+ if (engine->context_size)
+ DRIVER_CAPS(dev_priv)->has_logical_contexts = true;
/* Nothing to do here, execute in order of dependencies */
engine->schedule = NULL;
@@ -456,28 +458,16 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
i915_gem_batch_pool_init(&engine->batch_pool, engine);
}
-static bool csb_force_mmio(struct drm_i915_private *i915)
-{
- /* Older GVT emulation depends upon intercepting CSB mmio */
- if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915))
- return true;
-
- return false;
-}
-
static void intel_engine_init_execlist(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
- execlists->csb_use_mmio = csb_force_mmio(engine->i915);
-
execlists->port_mask = 1;
BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
execlists->queue_priority = INT_MIN;
- execlists->queue = RB_ROOT;
- execlists->first = NULL;
+ execlists->queue = RB_ROOT_CACHED;
}
/**
@@ -492,6 +482,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
void intel_engine_setup_common(struct intel_engine_cs *engine)
{
i915_timeline_init(engine->i915, &engine->timeline, engine->name);
+ lockdep_set_subclass(&engine->timeline.lock, TIMELINE_ENGINE);
intel_engine_init_execlist(engine);
intel_engine_init_hangcheck(engine);
@@ -499,7 +490,8 @@ void intel_engine_setup_common(struct intel_engine_cs *engine)
intel_engine_init_cmd_parser(engine);
}
-int intel_engine_create_scratch(struct intel_engine_cs *engine, int size)
+int intel_engine_create_scratch(struct intel_engine_cs *engine,
+ unsigned int size)
{
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
@@ -515,7 +507,7 @@ int intel_engine_create_scratch(struct intel_engine_cs *engine, int size)
return PTR_ERR(obj);
}
- vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL);
+ vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto err_unref;
@@ -533,7 +525,7 @@ err_unref:
return ret;
}
-static void intel_engine_cleanup_scratch(struct intel_engine_cs *engine)
+void intel_engine_cleanup_scratch(struct intel_engine_cs *engine)
{
i915_vma_unpin_and_release(&engine->scratch);
}
@@ -585,7 +577,7 @@ static int init_status_page(struct intel_engine_cs *engine)
if (ret)
goto err;
- vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL);
+ vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto err;
@@ -645,6 +637,12 @@ static int init_phys_status_page(struct intel_engine_cs *engine)
return 0;
}
+static void __intel_context_unpin(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine)
+{
+ intel_context_unpin(to_intel_context(ctx, engine));
+}
+
/**
* intel_engines_init_common - initialize cengine state which might require hw access
* @engine: Engine to initialize.
@@ -658,7 +656,8 @@ static int init_phys_status_page(struct intel_engine_cs *engine)
*/
int intel_engine_init_common(struct intel_engine_cs *engine)
{
- struct intel_ring *ring;
+ struct drm_i915_private *i915 = engine->i915;
+ struct intel_context *ce;
int ret;
engine->set_default_submission(engine);
@@ -670,18 +669,18 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
* be available. To avoid this we always pin the default
* context.
*/
- ring = intel_context_pin(engine->i915->kernel_context, engine);
- if (IS_ERR(ring))
- return PTR_ERR(ring);
+ ce = intel_context_pin(i915->kernel_context, engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
/*
* Similarly the preempt context must always be available so that
* we can interrupt the engine at any time.
*/
- if (engine->i915->preempt_context) {
- ring = intel_context_pin(engine->i915->preempt_context, engine);
- if (IS_ERR(ring)) {
- ret = PTR_ERR(ring);
+ if (i915->preempt_context) {
+ ce = intel_context_pin(i915->preempt_context, engine);
+ if (IS_ERR(ce)) {
+ ret = PTR_ERR(ce);
goto err_unpin_kernel;
}
}
@@ -690,7 +689,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
if (ret)
goto err_unpin_preempt;
- if (HWS_NEEDS_PHYSICAL(engine->i915))
+ if (HWS_NEEDS_PHYSICAL(i915))
ret = init_phys_status_page(engine);
else
ret = init_status_page(engine);
@@ -702,10 +701,11 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
err_breadcrumbs:
intel_engine_fini_breadcrumbs(engine);
err_unpin_preempt:
- if (engine->i915->preempt_context)
- intel_context_unpin(engine->i915->preempt_context, engine);
+ if (i915->preempt_context)
+ __intel_context_unpin(i915->preempt_context, engine);
+
err_unpin_kernel:
- intel_context_unpin(engine->i915->kernel_context, engine);
+ __intel_context_unpin(i915->kernel_context, engine);
return ret;
}
@@ -718,6 +718,8 @@ err_unpin_kernel:
*/
void intel_engine_cleanup_common(struct intel_engine_cs *engine)
{
+ struct drm_i915_private *i915 = engine->i915;
+
intel_engine_cleanup_scratch(engine);
if (HWS_NEEDS_PHYSICAL(engine->i915))
@@ -732,9 +734,9 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
if (engine->default_state)
i915_gem_object_put(engine->default_state);
- if (engine->i915->preempt_context)
- intel_context_unpin(engine->i915->preempt_context, engine);
- intel_context_unpin(engine->i915->kernel_context, engine);
+ if (i915->preempt_context)
+ __intel_context_unpin(i915->preempt_context, engine);
+ __intel_context_unpin(i915->kernel_context, engine);
i915_timeline_fini(&engine->timeline);
}
@@ -769,6 +771,35 @@ u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine)
return bbaddr;
}
+int intel_engine_stop_cs(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+ const u32 base = engine->mmio_base;
+ const i915_reg_t mode = RING_MI_MODE(base);
+ int err;
+
+ if (INTEL_GEN(dev_priv) < 3)
+ return -ENODEV;
+
+ GEM_TRACE("%s\n", engine->name);
+
+ I915_WRITE_FW(mode, _MASKED_BIT_ENABLE(STOP_RING));
+
+ err = 0;
+ if (__intel_wait_for_register_fw(dev_priv,
+ mode, MODE_IDLE, MODE_IDLE,
+ 1000, 0,
+ NULL)) {
+ GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name);
+ err = -ETIMEDOUT;
+ }
+
+ /* A final mmio read to let GPU writes be hopefully flushed to memory */
+ POSTING_READ_FW(mode);
+
+ return err;
+}
+
const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
{
switch (type) {
@@ -780,12 +811,32 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
}
}
+u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv)
+{
+ const struct sseu_dev_info *sseu = &(INTEL_INFO(dev_priv)->sseu);
+ u32 mcr_s_ss_select;
+ u32 slice = fls(sseu->slice_mask);
+ u32 subslice = fls(sseu->subslice_mask[slice]);
+
+ if (INTEL_GEN(dev_priv) == 10)
+ mcr_s_ss_select = GEN8_MCR_SLICE(slice) |
+ GEN8_MCR_SUBSLICE(subslice);
+ else if (INTEL_GEN(dev_priv) >= 11)
+ mcr_s_ss_select = GEN11_MCR_SLICE(slice) |
+ GEN11_MCR_SUBSLICE(subslice);
+ else
+ mcr_s_ss_select = 0;
+
+ return mcr_s_ss_select;
+}
+
static inline uint32_t
read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
int subslice, i915_reg_t reg)
{
uint32_t mcr_slice_subslice_mask;
uint32_t mcr_slice_subslice_select;
+ uint32_t default_mcr_s_ss_select;
uint32_t mcr;
uint32_t ret;
enum forcewake_domains fw_domains;
@@ -802,6 +853,8 @@ read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
GEN8_MCR_SUBSLICE(subslice);
}
+ default_mcr_s_ss_select = intel_calculate_mcr_s_ss_select(dev_priv);
+
fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg,
FW_REG_READ);
fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
@@ -812,11 +865,10 @@ read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
mcr = I915_READ_FW(GEN8_MCR_SELECTOR);
- /*
- * The HW expects the slice and sublice selectors to be reset to 0
- * after reading out the registers.
- */
- WARN_ON_ONCE(mcr & mcr_slice_subslice_mask);
+
+ WARN_ON_ONCE((mcr & mcr_slice_subslice_mask) !=
+ default_mcr_s_ss_select);
+
mcr &= ~mcr_slice_subslice_mask;
mcr |= mcr_slice_subslice_select;
I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
@@ -824,6 +876,8 @@ read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
ret = I915_READ_FW(reg);
mcr &= ~mcr_slice_subslice_mask;
+ mcr |= default_mcr_s_ss_select;
+
I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
@@ -934,11 +988,24 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
return true;
/* Waiting to drain ELSP? */
- if (READ_ONCE(engine->execlists.active))
- return false;
+ if (READ_ONCE(engine->execlists.active)) {
+ struct tasklet_struct *t = &engine->execlists.tasklet;
+
+ local_bh_disable();
+ if (tasklet_trylock(t)) {
+ /* Must wait for any GPU reset in progress. */
+ if (__tasklet_is_enabled(t))
+ t->func(t->data);
+ tasklet_unlock(t);
+ }
+ local_bh_enable();
- /* ELSP is empty, but there are ready requests? */
- if (READ_ONCE(engine->execlists.first))
+ if (READ_ONCE(engine->execlists.active))
+ return false;
+ }
+
+ /* ELSP is empty, but there are ready requests? E.g. after reset */
+ if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root))
return false;
/* Ring stopped? */
@@ -978,8 +1045,8 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv)
*/
bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine)
{
- const struct i915_gem_context * const kernel_context =
- engine->i915->kernel_context;
+ const struct intel_context *kernel_context =
+ to_intel_context(engine->i915->kernel_context, engine);
struct i915_request *rq;
lockdep_assert_held(&engine->i915->drm.struct_mutex);
@@ -991,7 +1058,7 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine)
*/
rq = __i915_gem_active_peek(&engine->timeline.last_request);
if (rq)
- return rq->ctx == kernel_context;
+ return rq->hw_context == kernel_context;
else
return engine->last_retired_context == kernel_context;
}
@@ -1006,6 +1073,28 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915)
}
/**
+ * intel_engines_sanitize: called after the GPU has lost power
+ * @i915: the i915 device
+ *
+ * Anytime we reset the GPU, either with an explicit GPU reset or through a
+ * PCI power cycle, the GPU loses state and we must reset our state tracking
+ * to match. Note that calling intel_engines_sanitize() if the GPU has not
+ * been reset results in much confusion!
+ */
+void intel_engines_sanitize(struct drm_i915_private *i915)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ GEM_TRACE("\n");
+
+ for_each_engine(engine, i915, id) {
+ if (engine->reset.reset)
+ engine->reset.reset(engine, NULL);
+ }
+}
+
+/**
* intel_engines_park: called when the GT is transitioning from busy->idle
* @i915: the i915 device
*
@@ -1043,6 +1132,11 @@ void intel_engines_park(struct drm_i915_private *i915)
if (engine->park)
engine->park(engine);
+ if (engine->pinned_default_state) {
+ i915_gem_object_unpin_map(engine->default_state);
+ engine->pinned_default_state = NULL;
+ }
+
i915_gem_batch_pool_fini(&engine->batch_pool);
engine->execlists.no_priolist = false;
}
@@ -1060,6 +1154,16 @@ void intel_engines_unpark(struct drm_i915_private *i915)
enum intel_engine_id id;
for_each_engine(engine, i915, id) {
+ void *map;
+
+ /* Pin the default state for fast resets from atomic context. */
+ map = NULL;
+ if (engine->default_state)
+ map = i915_gem_object_pin_map(engine->default_state,
+ I915_MAP_WB);
+ if (!IS_ERR_OR_NULL(map))
+ engine->pinned_default_state = map;
+
if (engine->unpark)
engine->unpark(engine);
@@ -1067,6 +1171,26 @@ void intel_engines_unpark(struct drm_i915_private *i915)
}
}
+/**
+ * intel_engine_lost_context: called when the GPU is reset into unknown state
+ * @engine: the engine
+ *
+ * We have either reset the GPU or otherwise about to lose state tracking of
+ * the current GPU logical state (e.g. suspend). On next use, it is therefore
+ * imperative that we make no presumptions about the current state and load
+ * from scratch.
+ */
+void intel_engine_lost_context(struct intel_engine_cs *engine)
+{
+ struct intel_context *ce;
+
+ lockdep_assert_held(&engine->i915->drm.struct_mutex);
+
+ ce = fetch_and_zero(&engine->last_retired_context);
+ if (ce)
+ intel_context_unpin(ce);
+}
+
bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
{
switch (INTEL_GEN(engine->i915)) {
@@ -1151,7 +1275,7 @@ static void hexdump(struct drm_printer *m, const void *buf, size_t len)
rowsize, sizeof(u32),
line, sizeof(line),
false) >= sizeof(line));
- drm_printf(m, "%08zx %s\n", pos, line);
+ drm_printf(m, "[%04zx] %s\n", pos, line);
prev = buf + pos;
skip = false;
@@ -1166,6 +1290,8 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
&engine->execlists;
u64 addr;
+ if (engine->id == RCS && IS_GEN(dev_priv, 4, 7))
+ drm_printf(m, "\tCCID: 0x%08x\n", I915_READ(CCID));
drm_printf(m, "\tRING_START: 0x%08x\n",
I915_READ(RING_START(engine->mmio_base)));
drm_printf(m, "\tRING_HEAD: 0x%08x\n",
@@ -1232,12 +1358,10 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
read = GEN8_CSB_READ_PTR(ptr);
write = GEN8_CSB_WRITE_PTR(ptr);
- drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s, tasklet queued? %s (%s)\n",
+ drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], tasklet queued? %s (%s)\n",
read, execlists->csb_head,
write,
intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)),
- yesno(test_bit(ENGINE_IRQ_EXECLIST,
- &engine->irq_posted)),
yesno(test_bit(TASKLET_STATE_SCHED,
&engine->execlists.tasklet.state)),
enableddisabled(!atomic_read(&engine->execlists.tasklet.count)));
@@ -1287,6 +1411,39 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
}
}
+static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
+{
+ void *ring;
+ int size;
+
+ drm_printf(m,
+ "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n",
+ rq->head, rq->postfix, rq->tail,
+ rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u,
+ rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u);
+
+ size = rq->tail - rq->head;
+ if (rq->tail < rq->head)
+ size += rq->ring->size;
+
+ ring = kmalloc(size, GFP_ATOMIC);
+ if (ring) {
+ const void *vaddr = rq->ring->vaddr;
+ unsigned int head = rq->head;
+ unsigned int len = 0;
+
+ if (rq->tail < head) {
+ len = rq->ring->size - head;
+ memcpy(ring, vaddr + head, len);
+ head = 0;
+ }
+ memcpy(ring + len, vaddr + head, size - len);
+
+ hexdump(m, ring, size);
+ kfree(ring);
+ }
+}
+
void intel_engine_dump(struct intel_engine_cs *engine,
struct drm_printer *m,
const char *header, ...)
@@ -1296,6 +1453,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
const struct intel_engine_execlists * const execlists = &engine->execlists;
struct i915_gpu_error * const error = &engine->i915->gpu_error;
struct i915_request *rq, *last;
+ unsigned long flags;
struct rb_node *rb;
int count;
@@ -1336,11 +1494,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
rq = i915_gem_find_active_request(engine);
if (rq) {
print_request(m, rq, "\t\tactive ");
- drm_printf(m,
- "\t\t[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]\n",
- rq->head, rq->postfix, rq->tail,
- rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u,
- rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u);
+
drm_printf(m, "\t\tring->start: 0x%08x\n",
i915_ggtt_offset(rq->ring->vma));
drm_printf(m, "\t\tring->head: 0x%08x\n",
@@ -1351,6 +1505,8 @@ void intel_engine_dump(struct intel_engine_cs *engine,
rq->ring->emit);
drm_printf(m, "\t\tring->space: 0x%08x\n",
rq->ring->space);
+
+ print_request_ring(m, rq);
}
rcu_read_unlock();
@@ -1362,7 +1518,8 @@ void intel_engine_dump(struct intel_engine_cs *engine,
drm_printf(m, "\tDevice is asleep; skipping register dump\n");
}
- spin_lock_irq(&engine->timeline.lock);
+ local_irq_save(flags);
+ spin_lock(&engine->timeline.lock);
last = NULL;
count = 0;
@@ -1384,7 +1541,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
last = NULL;
count = 0;
drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
- for (rb = execlists->first; rb; rb = rb_next(rb)) {
+ for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
struct i915_priolist *p =
rb_entry(rb, typeof(*p), node);
@@ -1404,22 +1561,21 @@ void intel_engine_dump(struct intel_engine_cs *engine,
print_request(m, last, "\t\tQ ");
}
- spin_unlock_irq(&engine->timeline.lock);
+ spin_unlock(&engine->timeline.lock);
- spin_lock_irq(&b->rb_lock);
+ spin_lock(&b->rb_lock);
for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
struct intel_wait *w = rb_entry(rb, typeof(*w), node);
drm_printf(m, "\t%s [%d] waiting for %x\n",
w->tsk->comm, w->tsk->pid, w->seqno);
}
- spin_unlock_irq(&b->rb_lock);
+ spin_unlock(&b->rb_lock);
+ local_irq_restore(flags);
- drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s) (execlists? %s)\n",
+ drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s)\n",
engine->irq_posted,
yesno(test_bit(ENGINE_IRQ_BREADCRUMB,
- &engine->irq_posted)),
- yesno(test_bit(ENGINE_IRQ_EXECLIST,
&engine->irq_posted)));
drm_printf(m, "HWSP:\n");
@@ -1468,8 +1624,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
if (!intel_engine_supports_stats(engine))
return -ENODEV;
- tasklet_disable(&execlists->tasklet);
- write_seqlock_irqsave(&engine->stats.lock, flags);
+ spin_lock_irqsave(&engine->timeline.lock, flags);
+ write_seqlock(&engine->stats.lock);
if (unlikely(engine->stats.enabled == ~0)) {
err = -EBUSY;
@@ -1493,8 +1649,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
}
unlock:
- write_sequnlock_irqrestore(&engine->stats.lock, flags);
- tasklet_enable(&execlists->tasklet);
+ write_sequnlock(&engine->stats.lock);
+ spin_unlock_irqrestore(&engine->timeline.lock, flags);
return err;
}