summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/intel_lrc.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/intel_lrc.c')
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c731
1 files changed, 455 insertions, 276 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7ece2f061b9e..697af5add78b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -137,6 +137,7 @@
#include <drm/i915_drm.h>
#include "i915_drv.h"
#include "i915_gem_render_state.h"
+#include "intel_lrc_reg.h"
#include "intel_mocs.h"
#define RING_EXECLIST_QFULL (1 << 0x2)
@@ -156,60 +157,10 @@
#define GEN8_CTX_STATUS_COMPLETED_MASK \
(GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
-#define CTX_LRI_HEADER_0 0x01
-#define CTX_CONTEXT_CONTROL 0x02
-#define CTX_RING_HEAD 0x04
-#define CTX_RING_TAIL 0x06
-#define CTX_RING_BUFFER_START 0x08
-#define CTX_RING_BUFFER_CONTROL 0x0a
-#define CTX_BB_HEAD_U 0x0c
-#define CTX_BB_HEAD_L 0x0e
-#define CTX_BB_STATE 0x10
-#define CTX_SECOND_BB_HEAD_U 0x12
-#define CTX_SECOND_BB_HEAD_L 0x14
-#define CTX_SECOND_BB_STATE 0x16
-#define CTX_BB_PER_CTX_PTR 0x18
-#define CTX_RCS_INDIRECT_CTX 0x1a
-#define CTX_RCS_INDIRECT_CTX_OFFSET 0x1c
-#define CTX_LRI_HEADER_1 0x21
-#define CTX_CTX_TIMESTAMP 0x22
-#define CTX_PDP3_UDW 0x24
-#define CTX_PDP3_LDW 0x26
-#define CTX_PDP2_UDW 0x28
-#define CTX_PDP2_LDW 0x2a
-#define CTX_PDP1_UDW 0x2c
-#define CTX_PDP1_LDW 0x2e
-#define CTX_PDP0_UDW 0x30
-#define CTX_PDP0_LDW 0x32
-#define CTX_LRI_HEADER_2 0x41
-#define CTX_R_PWR_CLK_STATE 0x42
-#define CTX_GPGPU_CSR_BASE_ADDRESS 0x44
-
-#define CTX_REG(reg_state, pos, reg, val) do { \
- (reg_state)[(pos)+0] = i915_mmio_reg_offset(reg); \
- (reg_state)[(pos)+1] = (val); \
-} while (0)
-
-#define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \
- const u64 _addr = i915_page_dir_dma_addr((ppgtt), (n)); \
- reg_state[CTX_PDP ## n ## _UDW+1] = upper_32_bits(_addr); \
- reg_state[CTX_PDP ## n ## _LDW+1] = lower_32_bits(_addr); \
-} while (0)
-
-#define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \
- reg_state[CTX_PDP0_UDW + 1] = upper_32_bits(px_dma(&ppgtt->pml4)); \
- reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(px_dma(&ppgtt->pml4)); \
-} while (0)
-
-#define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17
-#define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x26
-#define GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x19
-
/* Typical size of the average request (2 pipecontrols and a MI_BB) */
#define EXECLISTS_REQUEST_SIZE 64 /* bytes */
#define WA_TAIL_DWORDS 2
#define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
-#define PREEMPT_ID 0x1
static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
struct intel_engine_cs *engine);
@@ -218,6 +169,23 @@ static void execlists_init_reg_state(u32 *reg_state,
struct intel_engine_cs *engine,
struct intel_ring *ring);
+static inline struct i915_priolist *to_priolist(struct rb_node *rb)
+{
+ return rb_entry(rb, struct i915_priolist, node);
+}
+
+static inline int rq_prio(const struct i915_request *rq)
+{
+ return rq->priotree.priority;
+}
+
+static inline bool need_preempt(const struct intel_engine_cs *engine,
+ const struct i915_request *last,
+ int prio)
+{
+ return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
+}
+
/**
* intel_lr_context_descriptor_update() - calculate & cache the descriptor
* descriptor for a pinned context
@@ -236,6 +204,18 @@ static void execlists_init_reg_state(u32 *reg_state,
* bits 32-52: ctx ID, a globally unique tag
* bits 53-54: mbz, reserved for use by hardware
* bits 55-63: group ID, currently unused and set to 0
+ *
+ * Starting from Gen11, the upper dword of the descriptor has a new format:
+ *
+ * bits 32-36: reserved
+ * bits 37-47: SW context ID
+ * bits 48:53: engine instance
+ * bit 54: mbz, reserved for use by hardware
+ * bits 55-60: SW counter
+ * bits 61-63: engine class
+ *
+ * engine info, SW context ID and SW counter need to form a unique number
+ * (Context ID) per lrc.
*/
static void
intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
@@ -244,12 +224,32 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
struct intel_context *ce = &ctx->engine[engine->id];
u64 desc;
- BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (1<<GEN8_CTX_ID_WIDTH));
+ BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
+ BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH)));
desc = ctx->desc_template; /* bits 0-11 */
+ GEM_BUG_ON(desc & GENMASK_ULL(63, 12));
+
desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE;
/* bits 12-31 */
- desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */
+ GEM_BUG_ON(desc & GENMASK_ULL(63, 32));
+
+ if (INTEL_GEN(ctx->i915) >= 11) {
+ GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
+ desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
+ /* bits 37-47 */
+
+ desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
+ /* bits 48-53 */
+
+ /* TODO: decide what to do with SW counter (bits 55-60) */
+
+ desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
+ /* bits 61-63 */
+ } else {
+ GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH));
+ desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */
+ }
ce->lrc_desc = desc;
}
@@ -273,7 +273,7 @@ find_priolist:
parent = &execlists->queue.rb_node;
while (*parent) {
rb = *parent;
- p = rb_entry(rb, typeof(*p), node);
+ p = to_priolist(rb);
if (prio > p->priority) {
parent = &rb->rb_left;
} else if (prio < p->priority) {
@@ -313,10 +313,10 @@ find_priolist:
if (first)
execlists->first = &p->node;
- return ptr_pack_bits(p, first, 1);
+ return p;
}
-static void unwind_wa_tail(struct drm_i915_gem_request *rq)
+static void unwind_wa_tail(struct i915_request *rq)
{
rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES);
assert_ring_tail_valid(rq->ring, rq->tail);
@@ -324,7 +324,7 @@ static void unwind_wa_tail(struct drm_i915_gem_request *rq)
static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
{
- struct drm_i915_gem_request *rq, *rn;
+ struct i915_request *rq, *rn;
struct i915_priolist *uninitialized_var(p);
int last_prio = I915_PRIORITY_INVALID;
@@ -333,20 +333,16 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
list_for_each_entry_safe_reverse(rq, rn,
&engine->timeline->requests,
link) {
- if (i915_gem_request_completed(rq))
+ if (i915_request_completed(rq))
return;
- __i915_gem_request_unsubmit(rq);
+ __i915_request_unsubmit(rq);
unwind_wa_tail(rq);
- GEM_BUG_ON(rq->priotree.priority == I915_PRIORITY_INVALID);
- if (rq->priotree.priority != last_prio) {
- p = lookup_priolist(engine,
- &rq->priotree,
- rq->priotree.priority);
- p = ptr_mask_bits(p, 1);
-
- last_prio = rq->priotree.priority;
+ GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
+ if (rq_prio(rq) != last_prio) {
+ last_prio = rq_prio(rq);
+ p = lookup_priolist(engine, &rq->priotree, last_prio);
}
list_add(&rq->priotree.link, &p->requests);
@@ -365,8 +361,7 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
}
static inline void
-execlists_context_status_change(struct drm_i915_gem_request *rq,
- unsigned long status)
+execlists_context_status_change(struct i915_request *rq, unsigned long status)
{
/*
* Only used when GVT-g is enabled now. When GVT-g is disabled,
@@ -380,14 +375,14 @@ execlists_context_status_change(struct drm_i915_gem_request *rq,
}
static inline void
-execlists_context_schedule_in(struct drm_i915_gem_request *rq)
+execlists_context_schedule_in(struct i915_request *rq)
{
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
intel_engine_context_in(rq->engine);
}
static inline void
-execlists_context_schedule_out(struct drm_i915_gem_request *rq)
+execlists_context_schedule_out(struct i915_request *rq)
{
intel_engine_context_out(rq->engine);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
@@ -402,7 +397,7 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
}
-static u64 execlists_update_context(struct drm_i915_gem_request *rq)
+static u64 execlists_update_context(struct i915_request *rq)
{
struct intel_context *ce = &rq->ctx->engine[rq->engine->id];
struct i915_hw_ppgtt *ppgtt =
@@ -422,19 +417,31 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
return ce->lrc_desc;
}
-static inline void elsp_write(u64 desc, u32 __iomem *elsp)
+static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
{
- writel(upper_32_bits(desc), elsp);
- writel(lower_32_bits(desc), elsp);
+ if (execlists->ctrl_reg) {
+ writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
+ writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
+ } else {
+ writel(upper_32_bits(desc), execlists->submit_reg);
+ writel(lower_32_bits(desc), execlists->submit_reg);
+ }
}
static void execlists_submit_ports(struct intel_engine_cs *engine)
{
- struct execlist_port *port = engine->execlists.port;
+ struct intel_engine_execlists *execlists = &engine->execlists;
+ struct execlist_port *port = execlists->port;
unsigned int n;
- for (n = execlists_num_ports(&engine->execlists); n--; ) {
- struct drm_i915_gem_request *rq;
+ /*
+ * ELSQ note: the submit queue is not cleared after being submitted
+ * to the HW so we need to make sure we always clean it up. This is
+ * currently ensured by the fact that we always write the same number
+ * of elsq entries, keep this in mind before changing the loop below.
+ */
+ for (n = execlists_num_ports(execlists); n--; ) {
+ struct i915_request *rq;
unsigned int count;
u64 desc;
@@ -447,18 +454,24 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
desc = execlists_update_context(rq);
GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
- GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x\n",
+ GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x, prio=%d\n",
engine->name, n,
port[n].context_id, count,
- rq->global_seqno);
+ rq->global_seqno,
+ rq_prio(rq));
} else {
GEM_BUG_ON(!n);
desc = 0;
}
- elsp_write(desc, engine->execlists.elsp);
+ write_desc(execlists, desc, n);
}
- execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
+
+ /* we need to manually load the submit queue */
+ if (execlists->ctrl_reg)
+ writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+
+ execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
}
static bool ctx_single_port_submission(const struct i915_gem_context *ctx)
@@ -479,37 +492,47 @@ static bool can_merge_ctx(const struct i915_gem_context *prev,
return true;
}
-static void port_assign(struct execlist_port *port,
- struct drm_i915_gem_request *rq)
+static void port_assign(struct execlist_port *port, struct i915_request *rq)
{
GEM_BUG_ON(rq == port_request(port));
if (port_isset(port))
- i915_gem_request_put(port_request(port));
+ i915_request_put(port_request(port));
- port_set(port, port_pack(i915_gem_request_get(rq), port_count(port)));
+ port_set(port, port_pack(i915_request_get(rq), port_count(port)));
}
static void inject_preempt_context(struct intel_engine_cs *engine)
{
+ struct intel_engine_execlists *execlists = &engine->execlists;
struct intel_context *ce =
&engine->i915->preempt_context->engine[engine->id];
unsigned int n;
- GEM_BUG_ON(engine->i915->preempt_context->hw_id != PREEMPT_ID);
- GEM_BUG_ON(!IS_ALIGNED(ce->ring->size, WA_TAIL_BYTES));
-
- memset(ce->ring->vaddr + ce->ring->tail, 0, WA_TAIL_BYTES);
- ce->ring->tail += WA_TAIL_BYTES;
- ce->ring->tail &= (ce->ring->size - 1);
- ce->lrc_reg_state[CTX_RING_TAIL+1] = ce->ring->tail;
+ GEM_BUG_ON(execlists->preempt_complete_status !=
+ upper_32_bits(ce->lrc_desc));
+ GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+ CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+ CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
+ /*
+ * Switch to our empty preempt context so
+ * the state of the GPU is known (idle).
+ */
GEM_TRACE("%s\n", engine->name);
- for (n = execlists_num_ports(&engine->execlists); --n; )
- elsp_write(0, engine->execlists.elsp);
+ for (n = execlists_num_ports(execlists); --n; )
+ write_desc(execlists, 0, n);
+
+ write_desc(execlists, ce->lrc_desc, n);
+
+ /* we need to manually load the submit queue */
+ if (execlists->ctrl_reg)
+ writel(EL_CTRL_LOAD, execlists->ctrl_reg);
- elsp_write(ce->lrc_desc, engine->execlists.elsp);
execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
+ execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
}
static void execlists_dequeue(struct intel_engine_cs *engine)
@@ -518,7 +541,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
struct execlist_port *port = execlists->port;
const struct execlist_port * const last_port =
&execlists->port[execlists->port_mask];
- struct drm_i915_gem_request *last = port_request(port);
+ struct i915_request *last = port_request(port);
struct rb_node *rb;
bool submit = false;
@@ -546,8 +569,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
spin_lock_irq(&engine->timeline->lock);
rb = execlists->first;
GEM_BUG_ON(rb_first(&execlists->queue) != rb);
- if (!rb)
- goto unlock;
if (last) {
/*
@@ -570,55 +591,49 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
goto unlock;
- if (HAS_LOGICAL_RING_PREEMPTION(engine->i915) &&
- rb_entry(rb, struct i915_priolist, node)->priority >
- max(last->priotree.priority, 0)) {
- /*
- * Switch to our empty preempt context so
- * the state of the GPU is known (idle).
- */
+ if (need_preempt(engine, last, execlists->queue_priority)) {
inject_preempt_context(engine);
- execlists_set_active(execlists,
- EXECLISTS_ACTIVE_PREEMPT);
goto unlock;
- } else {
- /*
- * In theory, we could coalesce more requests onto
- * the second port (the first port is active, with
- * no preemptions pending). However, that means we
- * then have to deal with the possible lite-restore
- * of the second port (as we submit the ELSP, there
- * may be a context-switch) but also we may complete
- * the resubmission before the context-switch. Ergo,
- * coalescing onto the second port will cause a
- * preemption event, but we cannot predict whether
- * that will affect port[0] or port[1].
- *
- * If the second port is already active, we can wait
- * until the next context-switch before contemplating
- * new requests. The GPU will be busy and we should be
- * able to resubmit the new ELSP before it idles,
- * avoiding pipeline bubbles (momentary pauses where
- * the driver is unable to keep up the supply of new
- * work).
- */
- if (port_count(&port[1]))
- goto unlock;
-
- /* WaIdleLiteRestore:bdw,skl
- * Apply the wa NOOPs to prevent
- * ring:HEAD == req:TAIL as we resubmit the
- * request. See gen8_emit_breadcrumb() for
- * where we prepare the padding after the
- * end of the request.
- */
- last->tail = last->wa_tail;
}
+
+ /*
+ * In theory, we could coalesce more requests onto
+ * the second port (the first port is active, with
+ * no preemptions pending). However, that means we
+ * then have to deal with the possible lite-restore
+ * of the second port (as we submit the ELSP, there
+ * may be a context-switch) but also we may complete
+ * the resubmission before the context-switch. Ergo,
+ * coalescing onto the second port will cause a
+ * preemption event, but we cannot predict whether
+ * that will affect port[0] or port[1].
+ *
+ * If the second port is already active, we can wait
+ * until the next context-switch before contemplating
+ * new requests. The GPU will be busy and we should be
+ * able to resubmit the new ELSP before it idles,
+ * avoiding pipeline bubbles (momentary pauses where
+ * the driver is unable to keep up the supply of new
+ * work). However, we have to double check that the
+ * priorities of the ports haven't been switch.
+ */
+ if (port_count(&port[1]))
+ goto unlock;
+
+ /*
+ * WaIdleLiteRestore:bdw,skl
+ * Apply the wa NOOPs to prevent
+ * ring:HEAD == rq:TAIL as we resubmit the
+ * request. See gen8_emit_breadcrumb() for
+ * where we prepare the padding after the
+ * end of the request.
+ */
+ last->tail = last->wa_tail;
}
- do {
- struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
- struct drm_i915_gem_request *rq, *rn;
+ while (rb) {
+ struct i915_priolist *p = to_priolist(rb);
+ struct i915_request *rq, *rn;
list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
/*
@@ -668,8 +683,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
}
INIT_LIST_HEAD(&rq->priotree.link);
- __i915_gem_request_submit(rq);
- trace_i915_gem_request_in(rq, port_index(port, execlists));
+ __i915_request_submit(rq);
+ trace_i915_request_in(rq, port_index(port, execlists));
last = rq;
submit = true;
}
@@ -679,11 +694,16 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
INIT_LIST_HEAD(&p->requests);
if (p->priority != I915_PRIORITY_NORMAL)
kmem_cache_free(engine->i915->priorities, p);
- } while (rb);
+ }
done:
+ execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
execlists->first = rb;
if (submit)
port_assign(port, last);
+
+ /* We must always keep the beast fed if we have work piled up */
+ GEM_BUG_ON(execlists->first && !port_isset(execlists->port));
+
unlock:
spin_unlock_irq(&engine->timeline->lock);
@@ -691,6 +711,9 @@ unlock:
execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
execlists_submit_ports(engine);
}
+
+ GEM_BUG_ON(port_isset(execlists->port) &&
+ !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
}
void
@@ -700,12 +723,17 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
unsigned int num_ports = execlists_num_ports(execlists);
while (num_ports-- && port_isset(port)) {
- struct drm_i915_gem_request *rq = port_request(port);
+ struct i915_request *rq = port_request(port);
GEM_BUG_ON(!execlists->active);
intel_engine_context_out(rq->engine);
- execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_PREEMPTED);
- i915_gem_request_put(rq);
+
+ execlists_context_status_change(rq,
+ i915_request_completed(rq) ?
+ INTEL_CONTEXT_SCHEDULE_OUT :
+ INTEL_CONTEXT_SCHEDULE_PREEMPTED);
+
+ i915_request_put(rq);
memset(port, 0, sizeof(*port));
port++;
@@ -715,32 +743,50 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
static void execlists_cancel_requests(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
- struct drm_i915_gem_request *rq, *rn;
+ struct i915_request *rq, *rn;
struct rb_node *rb;
unsigned long flags;
- spin_lock_irqsave(&engine->timeline->lock, flags);
+ GEM_TRACE("%s\n", engine->name);
+
+ /*
+ * Before we call engine->cancel_requests(), we should have exclusive
+ * access to the submission state. This is arranged for us by the
+ * caller disabling the interrupt generation, the tasklet and other
+ * threads that may then access the same state, giving us a free hand
+ * to reset state. However, we still need to let lockdep be aware that
+ * we know this state may be accessed in hardirq context, so we
+ * disable the irq around this manipulation and we want to keep
+ * the spinlock focused on its duties and not accidentally conflate
+ * coverage to the submission's irq state. (Similarly, although we
+ * shouldn't need to disable irq around the manipulation of the
+ * submission's irq state, we also wish to remind ourselves that
+ * it is irq state.)
+ */
+ local_irq_save(flags);
/* Cancel the requests on the HW and clear the ELSP tracker. */
execlists_cancel_port_requests(execlists);
+ spin_lock(&engine->timeline->lock);
+
/* Mark all executing requests as skipped. */
list_for_each_entry(rq, &engine->timeline->requests, link) {
GEM_BUG_ON(!rq->global_seqno);
- if (!i915_gem_request_completed(rq))
+ if (!i915_request_completed(rq))
dma_fence_set_error(&rq->fence, -EIO);
}
/* Flush the queued requests to the timeline list (for retiring). */
rb = execlists->first;
while (rb) {
- struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+ struct i915_priolist *p = to_priolist(rb);
list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
INIT_LIST_HEAD(&rq->priotree.link);
dma_fence_set_error(&rq->fence, -EIO);
- __i915_gem_request_submit(rq);
+ __i915_request_submit(rq);
}
rb = rb_next(rb);
@@ -752,11 +798,13 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
/* Remaining _unready_ requests will be nop'ed when submitted */
-
+ execlists->queue_priority = INT_MIN;
execlists->queue = RB_ROOT;
execlists->first = NULL;
GEM_BUG_ON(port_isset(execlists->port));
+ spin_unlock(&engine->timeline->lock);
+
/*
* The port is checked prior to scheduling a tasklet, but
* just in case we have suspended the tasklet to do the
@@ -765,7 +813,10 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
*/
clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
- spin_unlock_irqrestore(&engine->timeline->lock, flags);
+ /* Mark all CS interrupts as complete */
+ execlists->active = 0;
+
+ local_irq_restore(flags);
}
/*
@@ -778,8 +829,10 @@ static void execlists_submission_tasklet(unsigned long data)
struct intel_engine_execlists * const execlists = &engine->execlists;
struct execlist_port * const port = execlists->port;
struct drm_i915_private *dev_priv = engine->i915;
+ bool fw = false;
- /* We can skip acquiring intel_runtime_pm_get() here as it was taken
+ /*
+ * We can skip acquiring intel_runtime_pm_get() here as it was taken
* on our behalf by the request (see i915_gem_mark_busy()) and it will
* not be relinquished until the device is idle (see
* i915_gem_idle_work_handler()). As a precaution, we make sure
@@ -788,9 +841,8 @@ static void execlists_submission_tasklet(unsigned long data)
*/
GEM_BUG_ON(!dev_priv->gt.awake);
- intel_uncore_forcewake_get(dev_priv, execlists->fw_domains);
-
- /* Prefer doing test_and_clear_bit() as a two stage operation to avoid
+ /*
+ * Prefer doing test_and_clear_bit() as a two stage operation to avoid
* imposing the cost of a locked atomic transaction when submitting a
* new request (outside of the context-switch interrupt).
*/
@@ -806,18 +858,17 @@ static void execlists_submission_tasklet(unsigned long data)
execlists->csb_head = -1; /* force mmio read of CSB ptrs */
}
- /* The write will be ordered by the uncached read (itself
- * a memory barrier), so we do not need another in the form
- * of a locked instruction. The race between the interrupt
- * handler and the split test/clear is harmless as we order
- * our clear before the CSB read. If the interrupt arrived
- * first between the test and the clear, we read the updated
- * CSB and clear the bit. If the interrupt arrives as we read
- * the CSB or later (i.e. after we had cleared the bit) the bit
- * is set and we do a new loop.
- */
- __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+ /* Clear before reading to catch new interrupts */
+ clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+ smp_mb__after_atomic();
+
if (unlikely(execlists->csb_head == -1)) { /* following a reset */
+ if (!fw) {
+ intel_uncore_forcewake_get(dev_priv,
+ execlists->fw_domains);
+ fw = true;
+ }
+
head = readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
tail = GEN8_CSB_WRITE_PTR(head);
head = GEN8_CSB_READ_PTR(head);
@@ -830,13 +881,13 @@ static void execlists_submission_tasklet(unsigned long data)
head = execlists->csb_head;
tail = READ_ONCE(buf[write_idx]);
}
- GEM_TRACE("%s cs-irq head=%d [%d], tail=%d [%d]\n",
+ GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
engine->name,
- head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))),
- tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))));
+ head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
+ tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
while (head != tail) {
- struct drm_i915_gem_request *rq;
+ struct i915_request *rq;
unsigned int status;
unsigned int count;
@@ -881,7 +932,7 @@ static void execlists_submission_tasklet(unsigned long data)
GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
if (status & GEN8_CTX_STATUS_COMPLETE &&
- buf[2*head + 1] == PREEMPT_ID) {
+ buf[2*head + 1] == execlists->preempt_complete_status) {
GEM_TRACE("%s preempt-idle\n", engine->name);
execlists_cancel_port_requests(execlists);
@@ -902,23 +953,28 @@ static void execlists_submission_tasklet(unsigned long data)
GEM_BUG_ON(!execlists_is_active(execlists,
EXECLISTS_ACTIVE_USER));
- /* Check the context/desc id for this event matches */
- GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
-
rq = port_unpack(port, &count);
- GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x\n",
+ GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
engine->name,
port->context_id, count,
- rq ? rq->global_seqno : 0);
+ rq ? rq->global_seqno : 0,
+ rq ? rq_prio(rq) : 0);
+
+ /* Check the context/desc id for this event matches */
+ GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
+
GEM_BUG_ON(count == 0);
if (--count == 0) {
GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
GEM_BUG_ON(port_isset(&port[1]) &&
!(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
- GEM_BUG_ON(!i915_gem_request_completed(rq));
+ GEM_BUG_ON(!i915_request_completed(rq));
execlists_context_schedule_out(rq);
- trace_i915_gem_request_out(rq);
- i915_gem_request_put(rq);
+ trace_i915_request_out(rq);
+ i915_request_put(rq);
+
+ GEM_TRACE("%s completed ctx=%d\n",
+ engine->name, port->context_id);
execlists_port_complete(execlists, port);
} else {
@@ -943,21 +999,26 @@ static void execlists_submission_tasklet(unsigned long data)
if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
execlists_dequeue(engine);
- intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
+ if (fw)
+ intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
}
-static void insert_request(struct intel_engine_cs *engine,
- struct i915_priotree *pt,
- int prio)
+static void queue_request(struct intel_engine_cs *engine,
+ struct i915_priotree *pt,
+ int prio)
{
- struct i915_priolist *p = lookup_priolist(engine, pt, prio);
+ list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests);
+}
- list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests);
- if (ptr_unmask_bits(p, 1))
+static void submit_queue(struct intel_engine_cs *engine, int prio)
+{
+ if (prio > engine->execlists.queue_priority) {
+ engine->execlists.queue_priority = prio;
tasklet_hi_schedule(&engine->execlists.tasklet);
+ }
}
-static void execlists_submit_request(struct drm_i915_gem_request *request)
+static void execlists_submit_request(struct i915_request *request)
{
struct intel_engine_cs *engine = request->engine;
unsigned long flags;
@@ -965,7 +1026,8 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
/* Will be called from irq-context when using foreign fences. */
spin_lock_irqsave(&engine->timeline->lock, flags);
- insert_request(engine, &request->priotree, request->priotree.priority);
+ queue_request(engine, &request->priotree, rq_prio(request));
+ submit_queue(engine, rq_prio(request));
GEM_BUG_ON(!engine->execlists.first);
GEM_BUG_ON(list_empty(&request->priotree.link));
@@ -973,9 +1035,9 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
spin_unlock_irqrestore(&engine->timeline->lock, flags);
}
-static struct drm_i915_gem_request *pt_to_request(struct i915_priotree *pt)
+static struct i915_request *pt_to_request(struct i915_priotree *pt)
{
- return container_of(pt, struct drm_i915_gem_request, priotree);
+ return container_of(pt, struct i915_request, priotree);
}
static struct intel_engine_cs *
@@ -993,7 +1055,7 @@ pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
return engine;
}
-static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
+static void execlists_schedule(struct i915_request *request, int prio)
{
struct intel_engine_cs *engine;
struct i915_dependency *dep, *p;
@@ -1002,7 +1064,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
- if (i915_gem_request_completed(request))
+ if (i915_request_completed(request))
return;
if (prio <= READ_ONCE(request->priotree.priority))
@@ -1014,13 +1076,14 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
stack.signaler = &request->priotree;
list_add(&stack.dfs_link, &dfs);
- /* Recursively bump all dependent priorities to match the new request.
+ /*
+ * Recursively bump all dependent priorities to match the new request.
*
* A naive approach would be to use recursion:
* static void update_priorities(struct i915_priotree *pt, prio) {
* list_for_each_entry(dep, &pt->signalers_list, signal_link)
* update_priorities(dep->signal, prio)
- * insert_request(pt);
+ * queue_request(pt);
* }
* but that may have unlimited recursion depth and so runs a very
* real risk of overunning the kernel stack. Instead, we build
@@ -1031,27 +1094,29 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
* end result is a topological list of requests in reverse order, the
* last element in the list is the request we must execute first.
*/
- list_for_each_entry_safe(dep, p, &dfs, dfs_link) {
+ list_for_each_entry(dep, &dfs, dfs_link) {
struct i915_priotree *pt = dep->signaler;
- /* Within an engine, there can be no cycle, but we may
+ /*
+ * Within an engine, there can be no cycle, but we may
* refer to the same dependency chain multiple times
* (redundant dependencies are not eliminated) and across
* engines.
*/
list_for_each_entry(p, &pt->signalers_list, signal_link) {
- if (i915_gem_request_completed(pt_to_request(p->signaler)))
+ GEM_BUG_ON(p == dep); /* no cycles! */
+
+ if (i915_priotree_signaled(p->signaler))
continue;
GEM_BUG_ON(p->signaler->priority < pt->priority);
if (prio > READ_ONCE(p->signaler->priority))
list_move_tail(&p->dfs_link, &dfs);
}
-
- list_safe_reset_next(dep, p, dfs_link);
}
- /* If we didn't need to bump any existing priorities, and we haven't
+ /*
+ * If we didn't need to bump any existing priorities, and we haven't
* yet submitted this request (i.e. there is no potential race with
* execlists_submit_request()), we can set our own priority and skip
* acquiring the engine locks.
@@ -1081,8 +1146,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
pt->priority = prio;
if (!list_empty(&pt->link)) {
__list_del_entry(&pt->link);
- insert_request(engine, pt, prio);
+ queue_request(engine, pt, prio);
}
+ submit_queue(engine, prio);
}
spin_unlock_irq(&engine->timeline->lock);
@@ -1125,11 +1191,9 @@ execlists_context_pin(struct intel_engine_cs *engine,
goto out;
GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
- if (!ce->state) {
- ret = execlists_context_deferred_alloc(ctx, engine);
- if (ret)
- goto err;
- }
+ ret = execlists_context_deferred_alloc(ctx, engine);
+ if (ret)
+ goto err;
GEM_BUG_ON(!ce->state);
ret = __context_pin(ctx, ce->state);
@@ -1186,7 +1250,7 @@ static void execlists_context_unpin(struct intel_engine_cs *engine,
i915_gem_context_put(ctx);
}
-static int execlists_request_alloc(struct drm_i915_gem_request *request)
+static int execlists_request_alloc(struct i915_request *request)
{
struct intel_engine_cs *engine = request->engine;
struct intel_context *ce = &request->ctx->engine[engine->id];
@@ -1363,6 +1427,40 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
return batch;
}
+static u32 *
+gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
+{
+ int i;
+
+ /*
+ * WaPipeControlBefore3DStateSamplePattern: cnl
+ *
+ * Ensure the engine is idle prior to programming a
+ * 3DSTATE_SAMPLE_PATTERN during a context restore.
+ */
+ batch = gen8_emit_pipe_control(batch,
+ PIPE_CONTROL_CS_STALL,
+ 0);
+ /*
+ * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
+ * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
+ * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
+ * confusing. Since gen8_emit_pipe_control() already advances the
+ * batch by 6 dwords, we advance the other 10 here, completing a
+ * cacheline. It's not clear if the workaround requires this padding
+ * before other commands, or if it's just the regular padding we would
+ * already have for the workaround bb, so leave it here for now.
+ */
+ for (i = 0; i < 10; i++)
+ *batch++ = MI_NOOP;
+
+ /* Pad to end of cacheline */
+ while ((unsigned long)batch % CACHELINE_BYTES)
+ *batch++ = MI_NOOP;
+
+ return batch;
+}
+
#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
@@ -1411,12 +1509,14 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
unsigned int i;
int ret;
- if (WARN_ON(engine->id != RCS || !engine->scratch))
+ if (GEM_WARN_ON(engine->id != RCS))
return -EINVAL;
switch (INTEL_GEN(engine->i915)) {
case 10:
- return 0;
+ wa_bb_fn[0] = gen10_init_indirectctx_bb;
+ wa_bb_fn[1] = NULL;
+ break;
case 9:
wa_bb_fn[0] = gen9_init_indirectctx_bb;
wa_bb_fn[1] = NULL;
@@ -1446,7 +1546,8 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
*/
for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
wa_bb[i]->offset = batch_ptr - batch;
- if (WARN_ON(!IS_ALIGNED(wa_bb[i]->offset, CACHELINE_BYTES))) {
+ if (GEM_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
+ CACHELINE_BYTES))) {
ret = -EINVAL;
break;
}
@@ -1472,47 +1573,48 @@ static u8 gtiir[] = {
[VECS] = 3,
};
-static int gen8_init_common_ring(struct intel_engine_cs *engine)
+static void enable_execlists(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
- struct intel_engine_execlists * const execlists = &engine->execlists;
- int ret;
- ret = intel_mocs_init_engine(engine);
- if (ret)
- return ret;
+ I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff);
- intel_engine_reset_breadcrumbs(engine);
- intel_engine_init_hangcheck(engine);
+ /*
+ * Make sure we're not enabling the new 12-deep CSB
+ * FIFO as that requires a slightly updated handling
+ * in the ctx switch irq. Since we're currently only
+ * using only 2 elements of the enhanced execlists the
+ * deeper FIFO it's not needed and it's not worth adding
+ * more statements to the irq handler to support it.
+ */
+ if (INTEL_GEN(dev_priv) >= 11)
+ I915_WRITE(RING_MODE_GEN7(engine),
+ _MASKED_BIT_DISABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
+ else
+ I915_WRITE(RING_MODE_GEN7(engine),
+ _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
- I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff);
- I915_WRITE(RING_MODE_GEN7(engine),
- _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
I915_WRITE(RING_HWS_PGA(engine->mmio_base),
engine->status_page.ggtt_offset);
POSTING_READ(RING_HWS_PGA(engine->mmio_base));
- DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name);
+ /* Following the reset, we need to reload the CSB read/write pointers */
+ engine->execlists.csb_head = -1;
+}
- GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir));
+static int gen8_init_common_ring(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ int ret;
- /*
- * Clear any pending interrupt state.
- *
- * We do it twice out of paranoia that some of the IIR are double
- * buffered, and if we only reset it once there may still be
- * an interrupt pending.
- */
- I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
- GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
- I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
- GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
- clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
- execlists->csb_head = -1;
- execlists->active = 0;
+ ret = intel_mocs_init_engine(engine);
+ if (ret)
+ return ret;
+
+ intel_engine_reset_breadcrumbs(engine);
+ intel_engine_init_hangcheck(engine);
- execlists->elsp =
- dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+ enable_execlists(engine);
/* After a GPU reset, we may have requests to replay */
if (execlists->first)
@@ -1554,8 +1656,33 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine)
return init_workarounds_ring(engine);
}
+static void reset_irq(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+ int i;
+
+ GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir));
+
+ /*
+ * Clear any pending interrupt state.
+ *
+ * We do it twice out of paranoia that some of the IIR are double
+ * buffered, and if we only reset it once there may still be
+ * an interrupt pending.
+ */
+ for (i = 0; i < 2; i++) {
+ I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
+ GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
+ POSTING_READ(GEN8_GT_IIR(gtiir[engine->id]));
+ }
+ GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) &
+ (GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift));
+
+ clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+}
+
static void reset_common_ring(struct intel_engine_cs *engine,
- struct drm_i915_gem_request *request)
+ struct i915_request *request)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
struct intel_context *ce;
@@ -1563,7 +1690,11 @@ static void reset_common_ring(struct intel_engine_cs *engine,
GEM_TRACE("%s seqno=%x\n",
engine->name, request ? request->global_seqno : 0);
- spin_lock_irqsave(&engine->timeline->lock, flags);
+
+ /* See execlists_cancel_requests() for the irq/spinlock split. */
+ local_irq_save(flags);
+
+ reset_irq(engine);
/*
* Catch up with any missed context-switch interrupts.
@@ -1577,11 +1708,17 @@ static void reset_common_ring(struct intel_engine_cs *engine,
execlists_cancel_port_requests(execlists);
/* Push back any incomplete requests for replay after the reset. */
+ spin_lock(&engine->timeline->lock);
__unwind_incomplete_requests(engine);
+ spin_unlock(&engine->timeline->lock);
- spin_unlock_irqrestore(&engine->timeline->lock, flags);
+ /* Mark all CS interrupts as complete */
+ execlists->active = 0;
- /* If the request was innocent, we leave the request in the ELSP
+ local_irq_restore(flags);
+
+ /*
+ * If the request was innocent, we leave the request in the ELSP
* and will try to replay it on restarting. The context image may
* have been corrupted by the reset, in which case we may have
* to service a new GPU hang, but more likely we can continue on
@@ -1594,7 +1731,8 @@ static void reset_common_ring(struct intel_engine_cs *engine,
if (!request || request->fence.error != -EIO)
return;
- /* We want a simple context + ring to execute the breadcrumb update.
+ /*
+ * We want a simple context + ring to execute the breadcrumb update.
* We cannot rely on the context being intact across the GPU hang,
* so clear it and rebuild just what we need for the breadcrumb.
* All pending requests for this context will be zapped, and any
@@ -1617,15 +1755,15 @@ static void reset_common_ring(struct intel_engine_cs *engine,
unwind_wa_tail(request);
}
-static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
+static int intel_logical_ring_emit_pdps(struct i915_request *rq)
{
- struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt;
- struct intel_engine_cs *engine = req->engine;
+ struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
+ struct intel_engine_cs *engine = rq->engine;
const int num_lri_cmds = GEN8_3LVL_PDPES * 2;
u32 *cs;
int i;
- cs = intel_ring_begin(req, num_lri_cmds * 2 + 2);
+ cs = intel_ring_begin(rq, num_lri_cmds * 2 + 2);
if (IS_ERR(cs))
return PTR_ERR(cs);
@@ -1640,12 +1778,12 @@ static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
}
*cs++ = MI_NOOP;
- intel_ring_advance(req, cs);
+ intel_ring_advance(rq, cs);
return 0;
}
-static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
+static int gen8_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags)
{
@@ -1658,18 +1796,18 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
* it is unsafe in case of lite-restore (because the ctx is
* not idle). PML4 is allocated during ppgtt init so this is
* not needed in 48-bit.*/
- if (req->ctx->ppgtt &&
- (intel_engine_flag(req->engine) & req->ctx->ppgtt->pd_dirty_rings) &&
- !i915_vm_is_48bit(&req->ctx->ppgtt->base) &&
- !intel_vgpu_active(req->i915)) {
- ret = intel_logical_ring_emit_pdps(req);
+ if (rq->ctx->ppgtt &&
+ (intel_engine_flag(rq->engine) & rq->ctx->ppgtt->pd_dirty_rings) &&
+ !i915_vm_is_48bit(&rq->ctx->ppgtt->base) &&
+ !intel_vgpu_active(rq->i915)) {
+ ret = intel_logical_ring_emit_pdps(rq);
if (ret)
return ret;
- req->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(req->engine);
+ rq->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine);
}
- cs = intel_ring_begin(req, 4);
+ cs = intel_ring_begin(rq, 4);
if (IS_ERR(cs))
return PTR_ERR(cs);
@@ -1698,7 +1836,7 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
(flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0);
*cs++ = lower_32_bits(offset);
*cs++ = upper_32_bits(offset);
- intel_ring_advance(req, cs);
+ intel_ring_advance(rq, cs);
return 0;
}
@@ -1717,7 +1855,7 @@ static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
I915_WRITE_IMR(engine, ~engine->irq_keep_mask);
}
-static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode)
+static int gen8_emit_flush(struct i915_request *request, u32 mode)
{
u32 cmd, *cs;
@@ -1749,7 +1887,7 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode)
return 0;
}
-static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
+static int gen8_emit_flush_render(struct i915_request *request,
u32 mode)
{
struct intel_engine_cs *engine = request->engine;
@@ -1824,7 +1962,7 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
* used as a workaround for not being allowed to do lite
* restore with HEAD==TAIL (WaIdleLiteRestore).
*/
-static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs)
+static void gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
{
/* Ensure there's always at least one preemption point per-request. */
*cs++ = MI_ARB_CHECK;
@@ -1832,7 +1970,7 @@ static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs)
request->wa_tail = intel_ring_offset(request, cs);
}
-static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs)
+static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
{
/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
@@ -1848,8 +1986,7 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs)
}
static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
-static void gen8_emit_breadcrumb_rcs(struct drm_i915_gem_request *request,
- u32 *cs)
+static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{
/* We're using qword write, seqno should be aligned to 8 bytes. */
BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
@@ -1865,15 +2002,15 @@ static void gen8_emit_breadcrumb_rcs(struct drm_i915_gem_request *request,
}
static const int gen8_emit_breadcrumb_rcs_sz = 8 + WA_TAIL_DWORDS;
-static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
+static int gen8_init_rcs_context(struct i915_request *rq)
{
int ret;
- ret = intel_ring_workarounds_emit(req);
+ ret = intel_ring_workarounds_emit(rq);
if (ret)
return ret;
- ret = intel_rcs_context_init_mocs(req);
+ ret = intel_rcs_context_init_mocs(rq);
/*
* Failing to program the MOCS is non-fatal.The system will not
* run at peak performance. So generate an error and carry on.
@@ -1881,7 +2018,7 @@ static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
if (ret)
DRM_ERROR("MOCS failed to program: expect performance issues.\n");
- return i915_gem_render_state_emit(req);
+ return i915_gem_render_state_emit(rq);
}
/**
@@ -1912,6 +2049,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
intel_engine_cleanup_common(engine);
lrc_destroy_wa_ctx(engine);
+
engine->i915 = NULL;
dev_priv->engine[engine->id] = NULL;
kfree(engine);
@@ -1928,6 +2066,12 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
engine->unpark = NULL;
engine->flags |= I915_ENGINE_SUPPORTS_STATS;
+
+ engine->i915->caps.scheduler =
+ I915_SCHEDULER_CAP_ENABLED |
+ I915_SCHEDULER_CAP_PRIORITY;
+ if (engine->i915->preempt_context)
+ engine->i915->caps.scheduler |= I915_SCHEDULER_CAP_PREEMPTION;
}
static void
@@ -1948,8 +2092,17 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
engine->set_default_submission = execlists_set_default_submission;
- engine->irq_enable = gen8_logical_ring_enable_irq;
- engine->irq_disable = gen8_logical_ring_disable_irq;
+ if (INTEL_GEN(engine->i915) < 11) {
+ engine->irq_enable = gen8_logical_ring_enable_irq;
+ engine->irq_disable = gen8_logical_ring_disable_irq;
+ } else {
+ /*
+ * TODO: On Gen11 interrupt masks need to be clear
+ * to allow C6 entry. Keep interrupts enabled at
+ * and take the hit of generating extra interrupts
+ * until a more refined solution exists.
+ */
+ }
engine->emit_bb_start = gen8_emit_bb_start;
}
@@ -2001,6 +2154,21 @@ static int logical_ring_init(struct intel_engine_cs *engine)
if (ret)
goto error;
+ if (HAS_LOGICAL_RING_ELSQ(engine->i915)) {
+ engine->execlists.submit_reg = engine->i915->regs +
+ i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(engine));
+ engine->execlists.ctrl_reg = engine->i915->regs +
+ i915_mmio_reg_offset(RING_EXECLIST_CONTROL(engine));
+ } else {
+ engine->execlists.submit_reg = engine->i915->regs +
+ i915_mmio_reg_offset(RING_ELSP(engine));
+ }
+
+ engine->execlists.preempt_complete_status = ~0u;
+ if (engine->i915->preempt_context)
+ engine->execlists.preempt_complete_status =
+ upper_32_bits(engine->i915->preempt_context->engine[engine->id].lrc_desc);
+
return 0;
error:
@@ -2080,7 +2248,7 @@ make_rpcs(struct drm_i915_private *dev_priv)
if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) {
rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
- rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) <<
+ rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask[0]) <<
GEN8_RPCS_SS_CNT_SHIFT;
rpcs |= GEN8_RPCS_ENABLE;
}
@@ -2104,6 +2272,10 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
default:
MISSING_CASE(INTEL_GEN(engine->i915));
/* fall through */
+ case 11:
+ indirect_ctx_offset =
+ GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+ break;
case 10:
indirect_ctx_offset =
GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
@@ -2142,6 +2314,8 @@ static void execlists_init_reg_state(u32 *regs,
MI_LRI_FORCE_POSTED;
CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(engine),
+ _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+ CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT) |
_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
(HAS_RESOURCE_STREAMER(dev_priv) ?
CTX_CTRL_RS_CTX_ENABLE : 0)));
@@ -2261,6 +2435,10 @@ populate_lr_context(struct i915_gem_context *ctx,
if (!engine->default_state)
regs[CTX_CONTEXT_CONTROL + 1] |=
_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
+ if (ctx == ctx->i915->preempt_context && INTEL_GEN(engine->i915) < 11)
+ regs[CTX_CONTEXT_CONTROL + 1] |=
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+ CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT);
i915_gem_object_unpin_map(ctx_obj);
@@ -2277,7 +2455,8 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
struct intel_ring *ring;
int ret;
- WARN_ON(ce->state);
+ if (ce->state)
+ return 0;
context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);