1 files changed, 455 insertions, 276 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7ece2f061b9e..697af5add78b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -137,6 +137,7 @@
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
+#include "intel_lrc_reg.h"
 #include "intel_mocs.h"
 
 #define RING_EXECLIST_QFULL		(1 << 0x2)
@@ -156,60 +157,10 @@
 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 	 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
 
-#define CTX_LRI_HEADER_0		0x01
-#define CTX_CONTEXT_CONTROL		0x02
-#define CTX_RING_HEAD			0x04
-#define CTX_RING_TAIL			0x06
-#define CTX_RING_BUFFER_START		0x08
-#define CTX_RING_BUFFER_CONTROL		0x0a
-#define CTX_BB_HEAD_U			0x0c
-#define CTX_BB_HEAD_L			0x0e
-#define CTX_BB_STATE			0x10
-#define CTX_SECOND_BB_HEAD_U		0x12
-#define CTX_SECOND_BB_HEAD_L		0x14
-#define CTX_SECOND_BB_STATE		0x16
-#define CTX_BB_PER_CTX_PTR		0x18
-#define CTX_RCS_INDIRECT_CTX		0x1a
-#define CTX_RCS_INDIRECT_CTX_OFFSET	0x1c
-#define CTX_LRI_HEADER_1		0x21
-#define CTX_CTX_TIMESTAMP		0x22
-#define CTX_PDP3_UDW			0x24
-#define CTX_PDP3_LDW			0x26
-#define CTX_PDP2_UDW			0x28
-#define CTX_PDP2_LDW			0x2a
-#define CTX_PDP1_UDW			0x2c
-#define CTX_PDP1_LDW			0x2e
-#define CTX_PDP0_UDW			0x30
-#define CTX_PDP0_LDW			0x32
-#define CTX_LRI_HEADER_2		0x41
-#define CTX_R_PWR_CLK_STATE		0x42
-#define CTX_GPGPU_CSR_BASE_ADDRESS	0x44
-
-#define CTX_REG(reg_state, pos, reg, val) do { \
-	(reg_state)[(pos)+0] = i915_mmio_reg_offset(reg); \
-	(reg_state)[(pos)+1] = (val); \
-} while (0)
-
-#define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do {		\
-	const u64 _addr = i915_page_dir_dma_addr((ppgtt), (n));	\
-	reg_state[CTX_PDP ## n ## _UDW+1] = upper_32_bits(_addr); \
-	reg_state[CTX_PDP ## n ## _LDW+1] = lower_32_bits(_addr); \
-} while (0)
-
-#define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \
-	reg_state[CTX_PDP0_UDW + 1] = upper_32_bits(px_dma(&ppgtt->pml4)); \
-	reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(px_dma(&ppgtt->pml4)); \
-} while (0)
-
-#define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT	0x17
-#define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT	0x26
-#define GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT	0x19
-
 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
 #define WA_TAIL_DWORDS 2
 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
-#define PREEMPT_ID 0x1
 
 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 					    struct intel_engine_cs *engine);
@@ -218,6 +169,23 @@ static void execlists_init_reg_state(u32 *reg_state,
 				     struct intel_engine_cs *engine,
 				     struct intel_ring *ring);
 
+static inline struct i915_priolist *to_priolist(struct rb_node *rb)
+{
+	return rb_entry(rb, struct i915_priolist, node);
+}
+
+static inline int rq_prio(const struct i915_request *rq)
+{
+	return rq->priotree.priority;
+}
+
+static inline bool need_preempt(const struct intel_engine_cs *engine,
+				const struct i915_request *last,
+				int prio)
+{
+	return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
+}
+
 /**
  * intel_lr_context_descriptor_update() - calculate & cache the descriptor
  * 					  descriptor for a pinned context
@@ -236,6 +204,18 @@ static void execlists_init_reg_state(u32 *reg_state,
  *      bits 32-52:    ctx ID, a globally unique tag
  *      bits 53-54:    mbz, reserved for use by hardware
  *      bits 55-63:    group ID, currently unused and set to 0
+ *
+ * Starting from Gen11, the upper dword of the descriptor has a new format:
+ *
+ *      bits 32-36:    reserved
+ *      bits 37-47:    SW context ID
+ *      bits 48:53:    engine instance
+ *      bit 54:        mbz, reserved for use by hardware
+ *      bits 55-60:    SW counter
+ *      bits 61-63:    engine class
+ *
+ * engine info, SW context ID and SW counter need to form a unique number
+ * (Context ID) per lrc.
  */
 static void
 intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
@@ -244,12 +224,32 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
 	struct intel_context *ce = &ctx->engine[engine->id];
 	u64 desc;
 
-	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (1<<GEN8_CTX_ID_WIDTH));
+	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
+	BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH)));
 
 	desc = ctx->desc_template;				/* bits  0-11 */
+	GEM_BUG_ON(desc & GENMASK_ULL(63, 12));
+
 	desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE;
 								/* bits 12-31 */
-	desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT;		/* bits 32-52 */
+	GEM_BUG_ON(desc & GENMASK_ULL(63, 32));
+
+	if (INTEL_GEN(ctx->i915) >= 11) {
+		GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
+		desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
+								/* bits 37-47 */
+
+		desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
+								/* bits 48-53 */
+
+		/* TODO: decide what to do with SW counter (bits 55-60) */
+
+		desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
+								/* bits 61-63 */
+	} else {
+		GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH));
+		desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT;	/* bits 32-52 */
+	}
 
 	ce->lrc_desc = desc;
 }
@@ -273,7 +273,7 @@ find_priolist:
 	parent = &execlists->queue.rb_node;
 	while (*parent) {
 		rb = *parent;
-		p = rb_entry(rb, typeof(*p), node);
+		p = to_priolist(rb);
 		if (prio > p->priority) {
 			parent = &rb->rb_left;
 		} else if (prio < p->priority) {
@@ -313,10 +313,10 @@ find_priolist:
 	if (first)
 		execlists->first = &p->node;
 
-	return ptr_pack_bits(p, first, 1);
+	return p;
 }
 
-static void unwind_wa_tail(struct drm_i915_gem_request *rq)
+static void unwind_wa_tail(struct i915_request *rq)
 {
 	rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES);
 	assert_ring_tail_valid(rq->ring, rq->tail);
@@ -324,7 +324,7 @@ static void unwind_wa_tail(struct drm_i915_gem_request *rq)
 
 static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 {
-	struct drm_i915_gem_request *rq, *rn;
+	struct i915_request *rq, *rn;
 	struct i915_priolist *uninitialized_var(p);
 	int last_prio = I915_PRIORITY_INVALID;
 
@@ -333,20 +333,16 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 	list_for_each_entry_safe_reverse(rq, rn,
 					 &engine->timeline->requests,
 					 link) {
-		if (i915_gem_request_completed(rq))
+		if (i915_request_completed(rq))
 			return;
 
-		__i915_gem_request_unsubmit(rq);
+		__i915_request_unsubmit(rq);
 		unwind_wa_tail(rq);
 
-		GEM_BUG_ON(rq->priotree.priority == I915_PRIORITY_INVALID);
-		if (rq->priotree.priority != last_prio) {
-			p = lookup_priolist(engine,
-					    &rq->priotree,
-					    rq->priotree.priority);
-			p = ptr_mask_bits(p, 1);
-
-			last_prio = rq->priotree.priority;
+		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
+		if (rq_prio(rq) != last_prio) {
+			last_prio = rq_prio(rq);
+			p = lookup_priolist(engine, &rq->priotree, last_prio);
 		}
 
 		list_add(&rq->priotree.link, &p->requests);
@@ -365,8 +361,7 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
 }
 
 static inline void
-execlists_context_status_change(struct drm_i915_gem_request *rq,
-				unsigned long status)
+execlists_context_status_change(struct i915_request *rq, unsigned long status)
 {
 	/*
 	 * Only used when GVT-g is enabled now. When GVT-g is disabled,
@@ -380,14 +375,14 @@ execlists_context_status_change(struct drm_i915_gem_request *rq,
 }
 
 static inline void
-execlists_context_schedule_in(struct drm_i915_gem_request *rq)
+execlists_context_schedule_in(struct i915_request *rq)
 {
 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
 	intel_engine_context_in(rq->engine);
 }
 
 static inline void
-execlists_context_schedule_out(struct drm_i915_gem_request *rq)
+execlists_context_schedule_out(struct i915_request *rq)
 {
 	intel_engine_context_out(rq->engine);
 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
@@ -402,7 +397,7 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
 	ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
 }
 
-static u64 execlists_update_context(struct drm_i915_gem_request *rq)
+static u64 execlists_update_context(struct i915_request *rq)
 {
 	struct intel_context *ce = &rq->ctx->engine[rq->engine->id];
 	struct i915_hw_ppgtt *ppgtt =
@@ -422,19 +417,31 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
 	return ce->lrc_desc;
 }
 
-static inline void elsp_write(u64 desc, u32 __iomem *elsp)
+static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
 {
-	writel(upper_32_bits(desc), elsp);
-	writel(lower_32_bits(desc), elsp);
+	if (execlists->ctrl_reg) {
+		writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
+		writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
+	} else {
+		writel(upper_32_bits(desc), execlists->submit_reg);
+		writel(lower_32_bits(desc), execlists->submit_reg);
+	}
 }
 
 static void execlists_submit_ports(struct intel_engine_cs *engine)
 {
-	struct execlist_port *port = engine->execlists.port;
+	struct intel_engine_execlists *execlists = &engine->execlists;
+	struct execlist_port *port = execlists->port;
 	unsigned int n;
 
-	for (n = execlists_num_ports(&engine->execlists); n--; ) {
-		struct drm_i915_gem_request *rq;
+	/*
+	 * ELSQ note: the submit queue is not cleared after being submitted
+	 * to the HW so we need to make sure we always clean it up. This is
+	 * currently ensured by the fact that we always write the same number
+	 * of elsq entries, keep this in mind before changing the loop below.
+	 */
+	for (n = execlists_num_ports(execlists); n--; ) {
+		struct i915_request *rq;
 		unsigned int count;
 		u64 desc;
 
@@ -447,18 +454,24 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 			desc = execlists_update_context(rq);
 			GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
 
-			GEM_TRACE("%s in[%d]:  ctx=%d.%d, seqno=%x\n",
+			GEM_TRACE("%s in[%d]:  ctx=%d.%d, seqno=%x, prio=%d\n",
 				  engine->name, n,
 				  port[n].context_id, count,
-				  rq->global_seqno);
+				  rq->global_seqno,
+				  rq_prio(rq));
 		} else {
 			GEM_BUG_ON(!n);
 			desc = 0;
 		}
 
-		elsp_write(desc, engine->execlists.elsp);
+		write_desc(execlists, desc, n);
 	}
-	execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
+
+	/* we need to manually load the submit queue */
+	if (execlists->ctrl_reg)
+		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+
+	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
 }
 
 static bool ctx_single_port_submission(const struct i915_gem_context *ctx)
@@ -479,37 +492,47 @@ static bool can_merge_ctx(const struct i915_gem_context *prev,
 	return true;
 }
 
-static void port_assign(struct execlist_port *port,
-			struct drm_i915_gem_request *rq)
+static void port_assign(struct execlist_port *port, struct i915_request *rq)
 {
 	GEM_BUG_ON(rq == port_request(port));
 
 	if (port_isset(port))
-		i915_gem_request_put(port_request(port));
+		i915_request_put(port_request(port));
 
-	port_set(port, port_pack(i915_gem_request_get(rq), port_count(port)));
+	port_set(port, port_pack(i915_request_get(rq), port_count(port)));
 }
 
 static void inject_preempt_context(struct intel_engine_cs *engine)
 {
+	struct intel_engine_execlists *execlists = &engine->execlists;
 	struct intel_context *ce =
 		&engine->i915->preempt_context->engine[engine->id];
 	unsigned int n;
 
-	GEM_BUG_ON(engine->i915->preempt_context->hw_id != PREEMPT_ID);
-	GEM_BUG_ON(!IS_ALIGNED(ce->ring->size, WA_TAIL_BYTES));
-
-	memset(ce->ring->vaddr + ce->ring->tail, 0, WA_TAIL_BYTES);
-	ce->ring->tail += WA_TAIL_BYTES;
-	ce->ring->tail &= (ce->ring->size - 1);
-	ce->lrc_reg_state[CTX_RING_TAIL+1] = ce->ring->tail;
+	GEM_BUG_ON(execlists->preempt_complete_status !=
+		   upper_32_bits(ce->lrc_desc));
+	GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
+		    _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+				       CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
+		   _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+				      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
 
+	/*
+	 * Switch to our empty preempt context so
+	 * the state of the GPU is known (idle).
+	 */
 	GEM_TRACE("%s\n", engine->name);
-	for (n = execlists_num_ports(&engine->execlists); --n; )
-		elsp_write(0, engine->execlists.elsp);
+	for (n = execlists_num_ports(execlists); --n; )
+		write_desc(execlists, 0, n);
+
+	write_desc(execlists, ce->lrc_desc, n);
+
+	/* we need to manually load the submit queue */
+	if (execlists->ctrl_reg)
+		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
 
-	elsp_write(ce->lrc_desc, engine->execlists.elsp);
 	execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
+	execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
 
 static void execlists_dequeue(struct intel_engine_cs *engine)
@@ -518,7 +541,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	struct execlist_port *port = execlists->port;
 	const struct execlist_port * const last_port =
 		&execlists->port[execlists->port_mask];
-	struct drm_i915_gem_request *last = port_request(port);
+	struct i915_request *last = port_request(port);
 	struct rb_node *rb;
 	bool submit = false;
 
@@ -546,8 +569,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	spin_lock_irq(&engine->timeline->lock);
 	rb = execlists->first;
 	GEM_BUG_ON(rb_first(&execlists->queue) != rb);
-	if (!rb)
-		goto unlock;
 
 	if (last) {
 		/*
@@ -570,55 +591,49 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
 			goto unlock;
 
-		if (HAS_LOGICAL_RING_PREEMPTION(engine->i915) &&
-		    rb_entry(rb, struct i915_priolist, node)->priority >
-		    max(last->priotree.priority, 0)) {
-			/*
-			 * Switch to our empty preempt context so
-			 * the state of the GPU is known (idle).
-			 */
+		if (need_preempt(engine, last, execlists->queue_priority)) {
 			inject_preempt_context(engine);
-			execlists_set_active(execlists,
-					     EXECLISTS_ACTIVE_PREEMPT);
 			goto unlock;
-		} else {
-			/*
-			 * In theory, we could coalesce more requests onto
-			 * the second port (the first port is active, with
-			 * no preemptions pending). However, that means we
-			 * then have to deal with the possible lite-restore
-			 * of the second port (as we submit the ELSP, there
-			 * may be a context-switch) but also we may complete
-			 * the resubmission before the context-switch. Ergo,
-			 * coalescing onto the second port will cause a
-			 * preemption event, but we cannot predict whether
-			 * that will affect port[0] or port[1].
-			 *
-			 * If the second port is already active, we can wait
-			 * until the next context-switch before contemplating
-			 * new requests. The GPU will be busy and we should be
-			 * able to resubmit the new ELSP before it idles,
-			 * avoiding pipeline bubbles (momentary pauses where
-			 * the driver is unable to keep up the supply of new
-			 * work).
-			 */
-			if (port_count(&port[1]))
-				goto unlock;
-
-			/* WaIdleLiteRestore:bdw,skl
-			 * Apply the wa NOOPs to prevent
-			 * ring:HEAD == req:TAIL as we resubmit the
-			 * request. See gen8_emit_breadcrumb() for
-			 * where we prepare the padding after the
-			 * end of the request.
-			 */
-			last->tail = last->wa_tail;
 		}
+
+		/*
+		 * In theory, we could coalesce more requests onto
+		 * the second port (the first port is active, with
+		 * no preemptions pending). However, that means we
+		 * then have to deal with the possible lite-restore
+		 * of the second port (as we submit the ELSP, there
+		 * may be a context-switch) but also we may complete
+		 * the resubmission before the context-switch. Ergo,
+		 * coalescing onto the second port will cause a
+		 * preemption event, but we cannot predict whether
+		 * that will affect port[0] or port[1].
+		 *
+		 * If the second port is already active, we can wait
+		 * until the next context-switch before contemplating
+		 * new requests. The GPU will be busy and we should be
+		 * able to resubmit the new ELSP before it idles,
+		 * avoiding pipeline bubbles (momentary pauses where
+		 * the driver is unable to keep up the supply of new
+		 * work). However, we have to double check that the
+		 * priorities of the ports haven't been switch.
+		 */
+		if (port_count(&port[1]))
+			goto unlock;
+
+		/*
+		 * WaIdleLiteRestore:bdw,skl
+		 * Apply the wa NOOPs to prevent
+		 * ring:HEAD == rq:TAIL as we resubmit the
+		 * request. See gen8_emit_breadcrumb() for
+		 * where we prepare the padding after the
+		 * end of the request.
+		 */
+		last->tail = last->wa_tail;
 	}
 
-	do {
-		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
-		struct drm_i915_gem_request *rq, *rn;
+	while (rb) {
+		struct i915_priolist *p = to_priolist(rb);
+		struct i915_request *rq, *rn;
 
 		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
 			/*
@@ -668,8 +683,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			}
 
 			INIT_LIST_HEAD(&rq->priotree.link);
-			__i915_gem_request_submit(rq);
-			trace_i915_gem_request_in(rq, port_index(port, execlists));
+			__i915_request_submit(rq);
+			trace_i915_request_in(rq, port_index(port, execlists));
 			last = rq;
 			submit = true;
 		}
@@ -679,11 +694,16 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		INIT_LIST_HEAD(&p->requests);
 		if (p->priority != I915_PRIORITY_NORMAL)
 			kmem_cache_free(engine->i915->priorities, p);
-	} while (rb);
+	}
 done:
+	execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
 	execlists->first = rb;
 	if (submit)
 		port_assign(port, last);
+
+	/* We must always keep the beast fed if we have work piled up */
+	GEM_BUG_ON(execlists->first && !port_isset(execlists->port));
+
 unlock:
 	spin_unlock_irq(&engine->timeline->lock);
 
@@ -691,6 +711,9 @@ unlock:
 		execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
 		execlists_submit_ports(engine);
 	}
+
+	GEM_BUG_ON(port_isset(execlists->port) &&
+		   !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
 }
 
 void
@@ -700,12 +723,17 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
 	unsigned int num_ports = execlists_num_ports(execlists);
 
 	while (num_ports-- && port_isset(port)) {
-		struct drm_i915_gem_request *rq = port_request(port);
+		struct i915_request *rq = port_request(port);
 
 		GEM_BUG_ON(!execlists->active);
 		intel_engine_context_out(rq->engine);
-		execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_PREEMPTED);
-		i915_gem_request_put(rq);
+
+		execlists_context_status_change(rq,
+						i915_request_completed(rq) ?
+						INTEL_CONTEXT_SCHEDULE_OUT :
+						INTEL_CONTEXT_SCHEDULE_PREEMPTED);
+
+		i915_request_put(rq);
 
 		memset(port, 0, sizeof(*port));
 		port++;
@@ -715,32 +743,50 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
 static void execlists_cancel_requests(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct drm_i915_gem_request *rq, *rn;
+	struct i915_request *rq, *rn;
 	struct rb_node *rb;
 	unsigned long flags;
 
-	spin_lock_irqsave(&engine->timeline->lock, flags);
+	GEM_TRACE("%s\n", engine->name);
+
+	/*
+	 * Before we call engine->cancel_requests(), we should have exclusive
+	 * access to the submission state. This is arranged for us by the
+	 * caller disabling the interrupt generation, the tasklet and other
+	 * threads that may then access the same state, giving us a free hand
+	 * to reset state. However, we still need to let lockdep be aware that
+	 * we know this state may be accessed in hardirq context, so we
+	 * disable the irq around this manipulation and we want to keep
+	 * the spinlock focused on its duties and not accidentally conflate
+	 * coverage to the submission's irq state. (Similarly, although we
+	 * shouldn't need to disable irq around the manipulation of the
+	 * submission's irq state, we also wish to remind ourselves that
+	 * it is irq state.)
+	 */
+	local_irq_save(flags);
 
 	/* Cancel the requests on the HW and clear the ELSP tracker. */
 	execlists_cancel_port_requests(execlists);
 
+	spin_lock(&engine->timeline->lock);
+
 	/* Mark all executing requests as skipped. */
 	list_for_each_entry(rq, &engine->timeline->requests, link) {
 		GEM_BUG_ON(!rq->global_seqno);
-		if (!i915_gem_request_completed(rq))
+		if (!i915_request_completed(rq))
 			dma_fence_set_error(&rq->fence, -EIO);
 	}
 
 	/* Flush the queued requests to the timeline list (for retiring). */
 	rb = execlists->first;
 	while (rb) {
-		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+		struct i915_priolist *p = to_priolist(rb);
 
 		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
 			INIT_LIST_HEAD(&rq->priotree.link);
 
 			dma_fence_set_error(&rq->fence, -EIO);
-			__i915_gem_request_submit(rq);
+			__i915_request_submit(rq);
 		}
 
 		rb = rb_next(rb);
@@ -752,11 +798,13 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 
 	/* Remaining _unready_ requests will be nop'ed when submitted */
 
-
+	execlists->queue_priority = INT_MIN;
 	execlists->queue = RB_ROOT;
 	execlists->first = NULL;
 	GEM_BUG_ON(port_isset(execlists->port));
 
+	spin_unlock(&engine->timeline->lock);
+
 	/*
 	 * The port is checked prior to scheduling a tasklet, but
 	 * just in case we have suspended the tasklet to do the
@@ -765,7 +813,10 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	 */
 	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 
-	spin_unlock_irqrestore(&engine->timeline->lock, flags);
+	/* Mark all CS interrupts as complete */
+	execlists->active = 0;
+
+	local_irq_restore(flags);
 }
 
 /*
@@ -778,8 +829,10 @@ static void execlists_submission_tasklet(unsigned long data)
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct execlist_port * const port = execlists->port;
 	struct drm_i915_private *dev_priv = engine->i915;
+	bool fw = false;
 
-	/* We can skip acquiring intel_runtime_pm_get() here as it was taken
+	/*
+	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
 	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
 	 * not be relinquished until the device is idle (see
 	 * i915_gem_idle_work_handler()). As a precaution, we make sure
@@ -788,9 +841,8 @@ static void execlists_submission_tasklet(unsigned long data)
 	 */
 	GEM_BUG_ON(!dev_priv->gt.awake);
 
-	intel_uncore_forcewake_get(dev_priv, execlists->fw_domains);
-
-	/* Prefer doing test_and_clear_bit() as a two stage operation to avoid
+	/*
+	 * Prefer doing test_and_clear_bit() as a two stage operation to avoid
 	 * imposing the cost of a locked atomic transaction when submitting a
 	 * new request (outside of the context-switch interrupt).
 	 */
@@ -806,18 +858,17 @@ static void execlists_submission_tasklet(unsigned long data)
 			execlists->csb_head = -1; /* force mmio read of CSB ptrs */
 		}
 
-		/* The write will be ordered by the uncached read (itself
-		 * a memory barrier), so we do not need another in the form
-		 * of a locked instruction. The race between the interrupt
-		 * handler and the split test/clear is harmless as we order
-		 * our clear before the CSB read. If the interrupt arrived
-		 * first between the test and the clear, we read the updated
-		 * CSB and clear the bit. If the interrupt arrives as we read
-		 * the CSB or later (i.e. after we had cleared the bit) the bit
-		 * is set and we do a new loop.
-		 */
-		__clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+		/* Clear before reading to catch new interrupts */
+		clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+		smp_mb__after_atomic();
+
 		if (unlikely(execlists->csb_head == -1)) { /* following a reset */
+			if (!fw) {
+				intel_uncore_forcewake_get(dev_priv,
+							   execlists->fw_domains);
+				fw = true;
+			}
+
 			head = readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
 			tail = GEN8_CSB_WRITE_PTR(head);
 			head = GEN8_CSB_READ_PTR(head);
@@ -830,13 +881,13 @@ static void execlists_submission_tasklet(unsigned long data)
 			head = execlists->csb_head;
 			tail = READ_ONCE(buf[write_idx]);
 		}
-		GEM_TRACE("%s cs-irq head=%d [%d], tail=%d [%d]\n",
+		GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
 			  engine->name,
-			  head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))),
-			  tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))));
+			  head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
+			  tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
 
 		while (head != tail) {
-			struct drm_i915_gem_request *rq;
+			struct i915_request *rq;
 			unsigned int status;
 			unsigned int count;
 
@@ -881,7 +932,7 @@ static void execlists_submission_tasklet(unsigned long data)
 			GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
 
 			if (status & GEN8_CTX_STATUS_COMPLETE &&
-			    buf[2*head + 1] == PREEMPT_ID) {
+			    buf[2*head + 1] == execlists->preempt_complete_status) {
 				GEM_TRACE("%s preempt-idle\n", engine->name);
 
 				execlists_cancel_port_requests(execlists);
@@ -902,23 +953,28 @@ static void execlists_submission_tasklet(unsigned long data)
 			GEM_BUG_ON(!execlists_is_active(execlists,
 							EXECLISTS_ACTIVE_USER));
 
-			/* Check the context/desc id for this event matches */
-			GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
-
 			rq = port_unpack(port, &count);
-			GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x\n",
+			GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
 				  engine->name,
 				  port->context_id, count,
-				  rq ? rq->global_seqno : 0);
+				  rq ? rq->global_seqno : 0,
+				  rq ? rq_prio(rq) : 0);
+
+			/* Check the context/desc id for this event matches */
+			GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
+
 			GEM_BUG_ON(count == 0);
 			if (--count == 0) {
 				GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
 				GEM_BUG_ON(port_isset(&port[1]) &&
 					   !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
-				GEM_BUG_ON(!i915_gem_request_completed(rq));
+				GEM_BUG_ON(!i915_request_completed(rq));
 				execlists_context_schedule_out(rq);
-				trace_i915_gem_request_out(rq);
-				i915_gem_request_put(rq);
+				trace_i915_request_out(rq);
+				i915_request_put(rq);
+
+				GEM_TRACE("%s completed ctx=%d\n",
+					  engine->name, port->context_id);
 
 				execlists_port_complete(execlists, port);
 			} else {
@@ -943,21 +999,26 @@ static void execlists_submission_tasklet(unsigned long data)
 	if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
 		execlists_dequeue(engine);
 
-	intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
+	if (fw)
+		intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
 }
 
-static void insert_request(struct intel_engine_cs *engine,
-			   struct i915_priotree *pt,
-			   int prio)
+static void queue_request(struct intel_engine_cs *engine,
+			  struct i915_priotree *pt,
+			  int prio)
 {
-	struct i915_priolist *p = lookup_priolist(engine, pt, prio);
+	list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests);
+}
 
-	list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests);
-	if (ptr_unmask_bits(p, 1))
+static void submit_queue(struct intel_engine_cs *engine, int prio)
+{
+	if (prio > engine->execlists.queue_priority) {
+		engine->execlists.queue_priority = prio;
 		tasklet_hi_schedule(&engine->execlists.tasklet);
+	}
 }
 
-static void execlists_submit_request(struct drm_i915_gem_request *request)
+static void execlists_submit_request(struct i915_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
 	unsigned long flags;
@@ -965,7 +1026,8 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
 	/* Will be called from irq-context when using foreign fences. */
 	spin_lock_irqsave(&engine->timeline->lock, flags);
 
-	insert_request(engine, &request->priotree, request->priotree.priority);
+	queue_request(engine, &request->priotree, rq_prio(request));
+	submit_queue(engine, rq_prio(request));
 
 	GEM_BUG_ON(!engine->execlists.first);
 	GEM_BUG_ON(list_empty(&request->priotree.link));
@@ -973,9 +1035,9 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
 }
 
-static struct drm_i915_gem_request *pt_to_request(struct i915_priotree *pt)
+static struct i915_request *pt_to_request(struct i915_priotree *pt)
 {
-	return container_of(pt, struct drm_i915_gem_request, priotree);
+	return container_of(pt, struct i915_request, priotree);
 }
 
 static struct intel_engine_cs *
@@ -993,7 +1055,7 @@ pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
 	return engine;
 }
 
-static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
+static void execlists_schedule(struct i915_request *request, int prio)
 {
 	struct intel_engine_cs *engine;
 	struct i915_dependency *dep, *p;
@@ -1002,7 +1064,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 
 	GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
 
-	if (i915_gem_request_completed(request))
+	if (i915_request_completed(request))
 		return;
 
 	if (prio <= READ_ONCE(request->priotree.priority))
@@ -1014,13 +1076,14 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 	stack.signaler = &request->priotree;
 	list_add(&stack.dfs_link, &dfs);
 
-	/* Recursively bump all dependent priorities to match the new request.
+	/*
+	 * Recursively bump all dependent priorities to match the new request.
 	 *
 	 * A naive approach would be to use recursion:
 	 * static void update_priorities(struct i915_priotree *pt, prio) {
 	 *	list_for_each_entry(dep, &pt->signalers_list, signal_link)
 	 *		update_priorities(dep->signal, prio)
-	 *	insert_request(pt);
+	 *	queue_request(pt);
 	 * }
 	 * but that may have unlimited recursion depth and so runs a very
 	 * real risk of overunning the kernel stack. Instead, we build
@@ -1031,27 +1094,29 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 	 * end result is a topological list of requests in reverse order, the
 	 * last element in the list is the request we must execute first.
 	 */
-	list_for_each_entry_safe(dep, p, &dfs, dfs_link) {
+	list_for_each_entry(dep, &dfs, dfs_link) {
 		struct i915_priotree *pt = dep->signaler;
 
-		/* Within an engine, there can be no cycle, but we may
+		/*
+		 * Within an engine, there can be no cycle, but we may
 		 * refer to the same dependency chain multiple times
 		 * (redundant dependencies are not eliminated) and across
 		 * engines.
 		 */
 		list_for_each_entry(p, &pt->signalers_list, signal_link) {
-			if (i915_gem_request_completed(pt_to_request(p->signaler)))
+			GEM_BUG_ON(p == dep); /* no cycles! */
+
+			if (i915_priotree_signaled(p->signaler))
 				continue;
 
 			GEM_BUG_ON(p->signaler->priority < pt->priority);
 			if (prio > READ_ONCE(p->signaler->priority))
 				list_move_tail(&p->dfs_link, &dfs);
 		}
-
-		list_safe_reset_next(dep, p, dfs_link);
 	}
 
-	/* If we didn't need to bump any existing priorities, and we haven't
+	/*
+	 * If we didn't need to bump any existing priorities, and we haven't
 	 * yet submitted this request (i.e. there is no potential race with
 	 * execlists_submit_request()), we can set our own priority and skip
 	 * acquiring the engine locks.
@@ -1081,8 +1146,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 		pt->priority = prio;
 		if (!list_empty(&pt->link)) {
 			__list_del_entry(&pt->link);
-			insert_request(engine, pt, prio);
+			queue_request(engine, pt, prio);
 		}
+		submit_queue(engine, prio);
 	}
 
 	spin_unlock_irq(&engine->timeline->lock);
@@ -1125,11 +1191,9 @@ execlists_context_pin(struct intel_engine_cs *engine,
 		goto out;
 	GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
 
-	if (!ce->state) {
-		ret = execlists_context_deferred_alloc(ctx, engine);
-		if (ret)
-			goto err;
-	}
+	ret = execlists_context_deferred_alloc(ctx, engine);
+	if (ret)
+		goto err;
 	GEM_BUG_ON(!ce->state);
 
 	ret = __context_pin(ctx, ce->state);
@@ -1186,7 +1250,7 @@ static void execlists_context_unpin(struct intel_engine_cs *engine,
 	i915_gem_context_put(ctx);
 }
 
-static int execlists_request_alloc(struct drm_i915_gem_request *request)
+static int execlists_request_alloc(struct i915_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
 	struct intel_context *ce = &request->ctx->engine[engine->id];
@@ -1363,6 +1427,40 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 	return batch;
 }
 
+static u32 *
+gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
+{
+	int i;
+
+	/*
+	 * WaPipeControlBefore3DStateSamplePattern: cnl
+	 *
+	 * Ensure the engine is idle prior to programming a
+	 * 3DSTATE_SAMPLE_PATTERN during a context restore.
+	 */
+	batch = gen8_emit_pipe_control(batch,
+				       PIPE_CONTROL_CS_STALL,
+				       0);
+	/*
+	 * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
+	 * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
+	 * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
+	 * confusing. Since gen8_emit_pipe_control() already advances the
+	 * batch by 6 dwords, we advance the other 10 here, completing a
+	 * cacheline. It's not clear if the workaround requires this padding
+	 * before other commands, or if it's just the regular padding we would
+	 * already have for the workaround bb, so leave it here for now.
+	 */
+	for (i = 0; i < 10; i++)
+		*batch++ = MI_NOOP;
+
+	/* Pad to end of cacheline */
+	while ((unsigned long)batch % CACHELINE_BYTES)
+		*batch++ = MI_NOOP;
+
+	return batch;
+}
+
 #define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
 
 static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
@@ -1411,12 +1509,14 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
 	unsigned int i;
 	int ret;
 
-	if (WARN_ON(engine->id != RCS || !engine->scratch))
+	if (GEM_WARN_ON(engine->id != RCS))
 		return -EINVAL;
 
 	switch (INTEL_GEN(engine->i915)) {
 	case 10:
-		return 0;
+		wa_bb_fn[0] = gen10_init_indirectctx_bb;
+		wa_bb_fn[1] = NULL;
+		break;
 	case 9:
 		wa_bb_fn[0] = gen9_init_indirectctx_bb;
 		wa_bb_fn[1] = NULL;
@@ -1446,7 +1546,8 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
 	 */
 	for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
 		wa_bb[i]->offset = batch_ptr - batch;
-		if (WARN_ON(!IS_ALIGNED(wa_bb[i]->offset, CACHELINE_BYTES))) {
+		if (GEM_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
+					    CACHELINE_BYTES))) {
 			ret = -EINVAL;
 			break;
 		}
@@ -1472,47 +1573,48 @@ static u8 gtiir[] = {
 	[VECS] = 3,
 };
 
-static int gen8_init_common_ring(struct intel_engine_cs *engine)
+static void enable_execlists(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
-	struct intel_engine_execlists * const execlists = &engine->execlists;
-	int ret;
 
-	ret = intel_mocs_init_engine(engine);
-	if (ret)
-		return ret;
+	I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff);
 
-	intel_engine_reset_breadcrumbs(engine);
-	intel_engine_init_hangcheck(engine);
+	/*
+	 * Make sure we're not enabling the new 12-deep CSB
+	 * FIFO as that requires a slightly updated handling
+	 * in the ctx switch irq. Since we're currently only
+	 * using only 2 elements of the enhanced execlists the
+	 * deeper FIFO it's not needed and it's not worth adding
+	 * more statements to the irq handler to support it.
+	 */
+	if (INTEL_GEN(dev_priv) >= 11)
+		I915_WRITE(RING_MODE_GEN7(engine),
+			   _MASKED_BIT_DISABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
+	else
+		I915_WRITE(RING_MODE_GEN7(engine),
+			   _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
 
-	I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff);
-	I915_WRITE(RING_MODE_GEN7(engine),
-		   _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
 	I915_WRITE(RING_HWS_PGA(engine->mmio_base),
 		   engine->status_page.ggtt_offset);
 	POSTING_READ(RING_HWS_PGA(engine->mmio_base));
 
-	DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name);
+	/* Following the reset, we need to reload the CSB read/write pointers */
+	engine->execlists.csb_head = -1;
+}
 
-	GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir));
+static int gen8_init_common_ring(struct intel_engine_cs *engine)
+{
+	struct intel_engine_execlists * const execlists = &engine->execlists;
+	int ret;
 
-	/*
-	 * Clear any pending interrupt state.
-	 *
-	 * We do it twice out of paranoia that some of the IIR are double
-	 * buffered, and if we only reset it once there may still be
-	 * an interrupt pending.
-	 */
-	I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
-		   GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
-	I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
-		   GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
-	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-	execlists->csb_head = -1;
-	execlists->active = 0;
+	ret = intel_mocs_init_engine(engine);
+	if (ret)
+		return ret;
+
+	intel_engine_reset_breadcrumbs(engine);
+	intel_engine_init_hangcheck(engine);
 
-	execlists->elsp =
-		dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+	enable_execlists(engine);
 
 	/* After a GPU reset, we may have requests to replay */
 	if (execlists->first)
@@ -1554,8 +1656,33 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine)
 	return init_workarounds_ring(engine);
 }
 
+static void reset_irq(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+	int i;
+
+	GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir));
+
+	/*
+	 * Clear any pending interrupt state.
+	 *
+	 * We do it twice out of paranoia that some of the IIR are double
+	 * buffered, and if we only reset it once there may still be
+	 * an interrupt pending.
+	 */
+	for (i = 0; i < 2; i++) {
+		I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
+			   GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
+		POSTING_READ(GEN8_GT_IIR(gtiir[engine->id]));
+	}
+	GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) &
+		   (GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift));
+
+	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+}
+
 static void reset_common_ring(struct intel_engine_cs *engine,
-			      struct drm_i915_gem_request *request)
+			      struct i915_request *request)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct intel_context *ce;
@@ -1563,7 +1690,11 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 
 	GEM_TRACE("%s seqno=%x\n",
 		  engine->name, request ? request->global_seqno : 0);
-	spin_lock_irqsave(&engine->timeline->lock, flags);
+
+	/* See execlists_cancel_requests() for the irq/spinlock split. */
+	local_irq_save(flags);
+
+	reset_irq(engine);
 
 	/*
 	 * Catch up with any missed context-switch interrupts.
@@ -1577,11 +1708,17 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	execlists_cancel_port_requests(execlists);
 
 	/* Push back any incomplete requests for replay after the reset. */
+	spin_lock(&engine->timeline->lock);
 	__unwind_incomplete_requests(engine);
+	spin_unlock(&engine->timeline->lock);
 
-	spin_unlock_irqrestore(&engine->timeline->lock, flags);
+	/* Mark all CS interrupts as complete */
+	execlists->active = 0;
 
-	/* If the request was innocent, we leave the request in the ELSP
+	local_irq_restore(flags);
+
+	/*
+	 * If the request was innocent, we leave the request in the ELSP
 	 * and will try to replay it on restarting. The context image may
 	 * have been corrupted by the reset, in which case we may have
 	 * to service a new GPU hang, but more likely we can continue on
@@ -1594,7 +1731,8 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	if (!request || request->fence.error != -EIO)
 		return;
 
-	/* We want a simple context + ring to execute the breadcrumb update.
+	/*
+	 * We want a simple context + ring to execute the breadcrumb update.
 	 * We cannot rely on the context being intact across the GPU hang,
 	 * so clear it and rebuild just what we need for the breadcrumb.
 	 * All pending requests for this context will be zapped, and any
@@ -1617,15 +1755,15 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	unwind_wa_tail(request);
 }
 
-static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
+static int intel_logical_ring_emit_pdps(struct i915_request *rq)
 {
-	struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt;
-	struct intel_engine_cs *engine = req->engine;
+	struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
+	struct intel_engine_cs *engine = rq->engine;
 	const int num_lri_cmds = GEN8_3LVL_PDPES * 2;
 	u32 *cs;
 	int i;
 
-	cs = intel_ring_begin(req, num_lri_cmds * 2 + 2);
+	cs = intel_ring_begin(rq, num_lri_cmds * 2 + 2);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
@@ -1640,12 +1778,12 @@ static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
 	}
 
 	*cs++ = MI_NOOP;
-	intel_ring_advance(req, cs);
+	intel_ring_advance(rq, cs);
 
 	return 0;
 }
 
-static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
+static int gen8_emit_bb_start(struct i915_request *rq,
 			      u64 offset, u32 len,
 			      const unsigned int flags)
 {
@@ -1658,18 +1796,18 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
 	 * it is unsafe in case of lite-restore (because the ctx is
 	 * not idle). PML4 is allocated during ppgtt init so this is
 	 * not needed in 48-bit.*/
-	if (req->ctx->ppgtt &&
-	    (intel_engine_flag(req->engine) & req->ctx->ppgtt->pd_dirty_rings) &&
-	    !i915_vm_is_48bit(&req->ctx->ppgtt->base) &&
-	    !intel_vgpu_active(req->i915)) {
-		ret = intel_logical_ring_emit_pdps(req);
+	if (rq->ctx->ppgtt &&
+	    (intel_engine_flag(rq->engine) & rq->ctx->ppgtt->pd_dirty_rings) &&
+	    !i915_vm_is_48bit(&rq->ctx->ppgtt->base) &&
+	    !intel_vgpu_active(rq->i915)) {
+		ret = intel_logical_ring_emit_pdps(rq);
 		if (ret)
 			return ret;
 
-		req->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(req->engine);
+		rq->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine);
 	}
 
-	cs = intel_ring_begin(req, 4);
+	cs = intel_ring_begin(rq, 4);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
@@ -1698,7 +1836,7 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
 		(flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0);
 	*cs++ = lower_32_bits(offset);
 	*cs++ = upper_32_bits(offset);
-	intel_ring_advance(req, cs);
+	intel_ring_advance(rq, cs);
 
 	return 0;
 }
@@ -1717,7 +1855,7 @@ static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
 	I915_WRITE_IMR(engine, ~engine->irq_keep_mask);
 }
 
-static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode)
+static int gen8_emit_flush(struct i915_request *request, u32 mode)
 {
 	u32 cmd, *cs;
 
@@ -1749,7 +1887,7 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode)
 	return 0;
 }
 
-static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
+static int gen8_emit_flush_render(struct i915_request *request,
 				  u32 mode)
 {
 	struct intel_engine_cs *engine = request->engine;
@@ -1824,7 +1962,7 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
  * used as a workaround for not being allowed to do lite
  * restore with HEAD==TAIL (WaIdleLiteRestore).
  */
-static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs)
+static void gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
 {
 	/* Ensure there's always at least one preemption point per-request. */
 	*cs++ = MI_ARB_CHECK;
@@ -1832,7 +1970,7 @@ static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs)
 	request->wa_tail = intel_ring_offset(request, cs);
 }
 
-static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs)
+static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
 {
 	/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
 	BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
@@ -1848,8 +1986,7 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs)
 }
 static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
 
-static void gen8_emit_breadcrumb_rcs(struct drm_i915_gem_request *request,
-					u32 *cs)
+static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 {
 	/* We're using qword write, seqno should be aligned to 8 bytes. */
 	BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
@@ -1865,15 +2002,15 @@ static void gen8_emit_breadcrumb_rcs(struct drm_i915_gem_request *request,
 }
 static const int gen8_emit_breadcrumb_rcs_sz = 8 + WA_TAIL_DWORDS;
 
-static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
+static int gen8_init_rcs_context(struct i915_request *rq)
 {
 	int ret;
 
-	ret = intel_ring_workarounds_emit(req);
+	ret = intel_ring_workarounds_emit(rq);
 	if (ret)
 		return ret;
 
-	ret = intel_rcs_context_init_mocs(req);
+	ret = intel_rcs_context_init_mocs(rq);
 	/*
 	 * Failing to program the MOCS is non-fatal.The system will not
 	 * run at peak performance. So generate an error and carry on.
@@ -1881,7 +2018,7 @@ static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
 	if (ret)
 		DRM_ERROR("MOCS failed to program: expect performance issues.\n");
 
-	return i915_gem_render_state_emit(req);
+	return i915_gem_render_state_emit(rq);
 }
 
 /**
@@ -1912,6 +2049,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
 	intel_engine_cleanup_common(engine);
 
 	lrc_destroy_wa_ctx(engine);
+
 	engine->i915 = NULL;
 	dev_priv->engine[engine->id] = NULL;
 	kfree(engine);
@@ -1928,6 +2066,12 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
 	engine->unpark = NULL;
 
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
+
+	engine->i915->caps.scheduler =
+		I915_SCHEDULER_CAP_ENABLED |
+		I915_SCHEDULER_CAP_PRIORITY;
+	if (engine->i915->preempt_context)
+		engine->i915->caps.scheduler |= I915_SCHEDULER_CAP_PREEMPTION;
 }
 
 static void
@@ -1948,8 +2092,17 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 
 	engine->set_default_submission = execlists_set_default_submission;
 
-	engine->irq_enable = gen8_logical_ring_enable_irq;
-	engine->irq_disable = gen8_logical_ring_disable_irq;
+	if (INTEL_GEN(engine->i915) < 11) {
+		engine->irq_enable = gen8_logical_ring_enable_irq;
+		engine->irq_disable = gen8_logical_ring_disable_irq;
+	} else {
+		/*
+		 * TODO: On Gen11 interrupt masks need to be clear
+		 * to allow C6 entry. Keep interrupts enabled at
+		 * and take the hit of generating extra interrupts
+		 * until a more refined solution exists.
+		 */
+	}
 	engine->emit_bb_start = gen8_emit_bb_start;
 }
 
@@ -2001,6 +2154,21 @@ static int logical_ring_init(struct intel_engine_cs *engine)
 	if (ret)
 		goto error;
 
+	if (HAS_LOGICAL_RING_ELSQ(engine->i915)) {
+		engine->execlists.submit_reg = engine->i915->regs +
+			i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(engine));
+		engine->execlists.ctrl_reg = engine->i915->regs +
+			i915_mmio_reg_offset(RING_EXECLIST_CONTROL(engine));
+	} else {
+		engine->execlists.submit_reg = engine->i915->regs +
+			i915_mmio_reg_offset(RING_ELSP(engine));
+	}
+
+	engine->execlists.preempt_complete_status = ~0u;
+	if (engine->i915->preempt_context)
+		engine->execlists.preempt_complete_status =
+			upper_32_bits(engine->i915->preempt_context->engine[engine->id].lrc_desc);
+
 	return 0;
 
 error:
@@ -2080,7 +2248,7 @@ make_rpcs(struct drm_i915_private *dev_priv)
 
 	if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) {
 		rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
-		rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) <<
+		rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask[0]) <<
 			GEN8_RPCS_SS_CNT_SHIFT;
 		rpcs |= GEN8_RPCS_ENABLE;
 	}
@@ -2104,6 +2272,10 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
 	default:
 		MISSING_CASE(INTEL_GEN(engine->i915));
 		/* fall through */
+	case 11:
+		indirect_ctx_offset =
+			GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+		break;
 	case 10:
 		indirect_ctx_offset =
 			GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
@@ -2142,6 +2314,8 @@ static void execlists_init_reg_state(u32 *regs,
 				 MI_LRI_FORCE_POSTED;
 
 	CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(engine),
+		_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+				    CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT) |
 		_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
 				   (HAS_RESOURCE_STREAMER(dev_priv) ?
 				   CTX_CTRL_RS_CTX_ENABLE : 0)));
@@ -2261,6 +2435,10 @@ populate_lr_context(struct i915_gem_context *ctx,
 	if (!engine->default_state)
 		regs[CTX_CONTEXT_CONTROL + 1] |=
 			_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
+	if (ctx == ctx->i915->preempt_context && INTEL_GEN(engine->i915) < 11)
+		regs[CTX_CONTEXT_CONTROL + 1] |=
+			_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+					   CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT);
 
 	i915_gem_object_unpin_map(ctx_obj);
 
@@ -2277,7 +2455,8 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 	struct intel_ring *ring;
 	int ret;
 
-	WARN_ON(ce->state);
+	if (ce->state)
+		return 0;
 
 	context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);