88 files changed, 4768 insertions, 3429 deletions
diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c
index 968ca7c91ad8..d5d34d0c79c7 100644
--- a/drivers/gpu/drm/drm_color_mgmt.c
+++ b/drivers/gpu/drm/drm_color_mgmt.c
@@ -474,10 +474,9 @@ EXPORT_SYMBOL(drm_plane_create_color_properties);
  *
  * Returns 0 on success, -EINVAL on failure.
  */
-int drm_color_lut_check(struct drm_property_blob *lut,
-			uint32_t tests)
+int drm_color_lut_check(const struct drm_property_blob *lut, u32 tests)
 {
-	struct drm_color_lut *entry;
+	const struct drm_color_lut *entry;
 	int i;
 
 	if (!lut || !tests)
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 8300efe60fe1..210d0e8777b6 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -22,6 +22,7 @@ subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable)
 subdir-ccflags-y += $(call cc-disable-warning, sign-compare)
 subdir-ccflags-y += $(call cc-disable-warning, sometimes-uninitialized)
 subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides)
+subdir-ccflags-y += $(call cc-disable-warning, uninitialized)
 subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror
 
 # Fine grained warnings disable
diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c
index 4f25b6b7728e..035479e273be 100644
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c
@@ -342,6 +342,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num,
 	port->dpcd->data_valid = true;
 	port->dpcd->data[DPCD_SINK_COUNT] = 0x1;
 	port->type = type;
+	port->id = resolution;
 
 	emulate_monitor_status_change(vgpu);
 
@@ -445,6 +446,36 @@ void intel_gvt_emulate_vblank(struct intel_gvt *gvt)
 }
 
 /**
+ * intel_vgpu_emulate_hotplug - trigger hotplug event for vGPU
+ * @vgpu: a vGPU
+ * @conncted: link state
+ *
+ * This function is used to trigger hotplug interrupt for vGPU
+ *
+ */
+void intel_vgpu_emulate_hotplug(struct intel_vgpu *vgpu, bool connected)
+{
+	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
+
+	/* TODO: add more platforms support */
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
+		if (connected) {
+			vgpu_vreg_t(vgpu, SFUSE_STRAP) |=
+				SFUSE_STRAP_DDID_DETECTED;
+			vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT;
+		} else {
+			vgpu_vreg_t(vgpu, SFUSE_STRAP) &=
+				~SFUSE_STRAP_DDID_DETECTED;
+			vgpu_vreg_t(vgpu, SDEISR) &= ~SDE_PORTD_HOTPLUG_CPT;
+		}
+		vgpu_vreg_t(vgpu, SDEIIR) |= SDE_PORTD_HOTPLUG_CPT;
+		vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |=
+				PORTD_HOTPLUG_STATUS_MASK;
+		intel_vgpu_trigger_virtual_event(vgpu, DP_D_HOTPLUG);
+	}
+}
+
+/**
  * intel_vgpu_clean_display - clean vGPU virtual display emulation
  * @vgpu: a vGPU
  *
diff --git a/drivers/gpu/drm/i915/gvt/display.h b/drivers/gpu/drm/i915/gvt/display.h
index ea7c1c525b8c..a87f33e6a23c 100644
--- a/drivers/gpu/drm/i915/gvt/display.h
+++ b/drivers/gpu/drm/i915/gvt/display.h
@@ -146,18 +146,19 @@ enum intel_vgpu_port_type {
 	GVT_PORT_MAX
 };
 
+enum intel_vgpu_edid {
+	GVT_EDID_1024_768,
+	GVT_EDID_1920_1200,
+	GVT_EDID_NUM,
+};
+
 struct intel_vgpu_port {
 	/* per display EDID information */
 	struct intel_vgpu_edid_data *edid;
 	/* per display DPCD information */
 	struct intel_vgpu_dpcd_data *dpcd;
 	int type;
-};
-
-enum intel_vgpu_edid {
-	GVT_EDID_1024_768,
-	GVT_EDID_1920_1200,
-	GVT_EDID_NUM,
+	enum intel_vgpu_edid id;
 };
 
 static inline char *vgpu_edid_str(enum intel_vgpu_edid id)
@@ -172,6 +173,30 @@ static inline char *vgpu_edid_str(enum intel_vgpu_edid id)
 	}
 }
 
+static inline unsigned int vgpu_edid_xres(enum intel_vgpu_edid id)
+{
+	switch (id) {
+	case GVT_EDID_1024_768:
+		return 1024;
+	case GVT_EDID_1920_1200:
+		return 1920;
+	default:
+		return 0;
+	}
+}
+
+static inline unsigned int vgpu_edid_yres(enum intel_vgpu_edid id)
+{
+	switch (id) {
+	case GVT_EDID_1024_768:
+		return 768;
+	case GVT_EDID_1920_1200:
+		return 1200;
+	default:
+		return 0;
+	}
+}
+
 void intel_gvt_emulate_vblank(struct intel_gvt *gvt);
 void intel_gvt_check_vblank_emulation(struct intel_gvt *gvt);
 
diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c
index 4e8947f33bd0..43f4242062dd 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.c
+++ b/drivers/gpu/drm/i915/gvt/gvt.c
@@ -185,6 +185,7 @@ static const struct intel_gvt_ops intel_gvt_ops = {
 	.vgpu_query_plane = intel_vgpu_query_plane,
 	.vgpu_get_dmabuf = intel_vgpu_get_dmabuf,
 	.write_protect_handler = intel_vgpu_page_track_handler,
+	.emulate_hotplug = intel_vgpu_emulate_hotplug,
 };
 
 static void init_device_info(struct intel_gvt *gvt)
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index fb9cc980e120..8bce09de4b82 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -536,6 +536,8 @@ int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset,
 int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset,
 		void *p_data, unsigned int bytes);
 
+void intel_vgpu_emulate_hotplug(struct intel_vgpu *vgpu, bool connected);
+
 static inline u64 intel_vgpu_get_bar_gpa(struct intel_vgpu *vgpu, int bar)
 {
 	/* We are 64bit bar. */
@@ -577,6 +579,7 @@ struct intel_gvt_ops {
 	int (*vgpu_get_dmabuf)(struct intel_vgpu *vgpu, unsigned int);
 	int (*write_protect_handler)(struct intel_vgpu *, u64, void *,
 				     unsigned int);
+	void (*emulate_hotplug)(struct intel_vgpu *vgpu, bool connected);
 };
 
 
diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h
index 50798868ab15..5e01cc8d9b16 100644
--- a/drivers/gpu/drm/i915/gvt/hypercall.h
+++ b/drivers/gpu/drm/i915/gvt/hypercall.h
@@ -67,6 +67,7 @@ struct intel_gvt_mpt {
 	int (*set_trap_area)(unsigned long handle, u64 start, u64 end,
 			     bool map);
 	int (*set_opregion)(void *vgpu);
+	int (*set_edid)(void *vgpu, int port_num);
 	int (*get_vfio_device)(void *vgpu);
 	void (*put_vfio_device)(void *vgpu);
 	bool (*is_valid_gfn)(unsigned long handle, unsigned long gfn);
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index f8d44e8f86a6..63eef86a2a85 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -57,6 +57,8 @@ static const struct intel_gvt_ops *intel_gvt_ops;
 #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
 #define VFIO_PCI_OFFSET_MASK    (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
 
+#define EDID_BLOB_OFFSET (PAGE_SIZE/2)
+
 #define OPREGION_SIGNATURE "IntelGraphicsMem"
 
 struct vfio_region;
@@ -76,6 +78,11 @@ struct vfio_region {
 	void				*data;
 };
 
+struct vfio_edid_region {
+	struct vfio_region_gfx_edid vfio_edid_regs;
+	void *edid_blob;
+};
+
 struct kvmgt_pgfn {
 	gfn_t gfn;
 	struct hlist_node hnode;
@@ -427,6 +434,111 @@ static const struct intel_vgpu_regops intel_vgpu_regops_opregion = {
 	.release = intel_vgpu_reg_release_opregion,
 };
 
+static int handle_edid_regs(struct intel_vgpu *vgpu,
+			struct vfio_edid_region *region, char *buf,
+			size_t count, u16 offset, bool is_write)
+{
+	struct vfio_region_gfx_edid *regs = &region->vfio_edid_regs;
+	unsigned int data;
+
+	if (offset + count > sizeof(*regs))
+		return -EINVAL;
+
+	if (count != 4)
+		return -EINVAL;
+
+	if (is_write) {
+		data = *((unsigned int *)buf);
+		switch (offset) {
+		case offsetof(struct vfio_region_gfx_edid, link_state):
+			if (data == VFIO_DEVICE_GFX_LINK_STATE_UP) {
+				if (!drm_edid_block_valid(
+					(u8 *)region->edid_blob,
+					0,
+					true,
+					NULL)) {
+					gvt_vgpu_err("invalid EDID blob\n");
+					return -EINVAL;
+				}
+				intel_gvt_ops->emulate_hotplug(vgpu, true);
+			} else if (data == VFIO_DEVICE_GFX_LINK_STATE_DOWN)
+				intel_gvt_ops->emulate_hotplug(vgpu, false);
+			else {
+				gvt_vgpu_err("invalid EDID link state %d\n",
+					regs->link_state);
+				return -EINVAL;
+			}
+			regs->link_state = data;
+			break;
+		case offsetof(struct vfio_region_gfx_edid, edid_size):
+			if (data > regs->edid_max_size) {
+				gvt_vgpu_err("EDID size is bigger than %d!\n",
+					regs->edid_max_size);
+				return -EINVAL;
+			}
+			regs->edid_size = data;
+			break;
+		default:
+			/* read-only regs */
+			gvt_vgpu_err("write read-only EDID region at offset %d\n",
+				offset);
+			return -EPERM;
+		}
+	} else {
+		memcpy(buf, (char *)regs + offset, count);
+	}
+
+	return count;
+}
+
+static int handle_edid_blob(struct vfio_edid_region *region, char *buf,
+			size_t count, u16 offset, bool is_write)
+{
+	if (offset + count > region->vfio_edid_regs.edid_size)
+		return -EINVAL;
+
+	if (is_write)
+		memcpy(region->edid_blob + offset, buf, count);
+	else
+		memcpy(buf, region->edid_blob + offset, count);
+
+	return count;
+}
+
+static size_t intel_vgpu_reg_rw_edid(struct intel_vgpu *vgpu, char *buf,
+		size_t count, loff_t *ppos, bool iswrite)
+{
+	int ret;
+	unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) -
+			VFIO_PCI_NUM_REGIONS;
+	struct vfio_edid_region *region =
+		(struct vfio_edid_region *)vgpu->vdev.region[i].data;
+	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
+
+	if (pos < region->vfio_edid_regs.edid_offset) {
+		ret = handle_edid_regs(vgpu, region, buf, count, pos, iswrite);
+	} else {
+		pos -= EDID_BLOB_OFFSET;
+		ret = handle_edid_blob(region, buf, count, pos, iswrite);
+	}
+
+	if (ret < 0)
+		gvt_vgpu_err("failed to access EDID region\n");
+
+	return ret;
+}
+
+static void intel_vgpu_reg_release_edid(struct intel_vgpu *vgpu,
+					struct vfio_region *region)
+{
+	kfree(region->data);
+}
+
+static const struct intel_vgpu_regops intel_vgpu_regops_edid = {
+	.rw = intel_vgpu_reg_rw_edid,
+	.release = intel_vgpu_reg_release_edid,
+};
+
 static int intel_vgpu_register_reg(struct intel_vgpu *vgpu,
 		unsigned int type, unsigned int subtype,
 		const struct intel_vgpu_regops *ops,
@@ -493,6 +605,36 @@ static int kvmgt_set_opregion(void *p_vgpu)
 	return ret;
 }
 
+static int kvmgt_set_edid(void *p_vgpu, int port_num)
+{
+	struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu;
+	struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num);
+	struct vfio_edid_region *base;
+	int ret;
+
+	base = kzalloc(sizeof(*base), GFP_KERNEL);
+	if (!base)
+		return -ENOMEM;
+
+	/* TODO: Add multi-port and EDID extension block support */
+	base->vfio_edid_regs.edid_offset = EDID_BLOB_OFFSET;
+	base->vfio_edid_regs.edid_max_size = EDID_SIZE;
+	base->vfio_edid_regs.edid_size = EDID_SIZE;
+	base->vfio_edid_regs.max_xres = vgpu_edid_xres(port->id);
+	base->vfio_edid_regs.max_yres = vgpu_edid_yres(port->id);
+	base->edid_blob = port->edid->edid_block;
+
+	ret = intel_vgpu_register_reg(vgpu,
+			VFIO_REGION_TYPE_GFX,
+			VFIO_REGION_SUBTYPE_GFX_EDID,
+			&intel_vgpu_regops_edid, EDID_SIZE,
+			VFIO_REGION_INFO_FLAG_READ |
+			VFIO_REGION_INFO_FLAG_WRITE |
+			VFIO_REGION_INFO_FLAG_CAPS, base);
+
+	return ret;
+}
+
 static void kvmgt_put_vfio_device(void *vgpu)
 {
 	if (WARN_ON(!((struct intel_vgpu *)vgpu)->vdev.vfio_device))
@@ -1874,6 +2016,7 @@ static struct intel_gvt_mpt kvmgt_mpt = {
 	.dma_map_guest_page = kvmgt_dma_map_guest_page,
 	.dma_unmap_guest_page = kvmgt_dma_unmap_guest_page,
 	.set_opregion = kvmgt_set_opregion,
+	.set_edid = kvmgt_set_edid,
 	.get_vfio_device = kvmgt_get_vfio_device,
 	.put_vfio_device = kvmgt_put_vfio_device,
 	.is_valid_gfn = kvmgt_is_valid_gfn,
diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h
index 9b4225d44243..5d8b8f228d8f 100644
--- a/drivers/gpu/drm/i915/gvt/mpt.h
+++ b/drivers/gpu/drm/i915/gvt/mpt.h
@@ -314,6 +314,23 @@ static inline int intel_gvt_hypervisor_set_opregion(struct intel_vgpu *vgpu)
 }
 
 /**
+ * intel_gvt_hypervisor_set_edid - Set EDID region for guest
+ * @vgpu: a vGPU
+ * @port_num: display port number
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ */
+static inline int intel_gvt_hypervisor_set_edid(struct intel_vgpu *vgpu,
+						int port_num)
+{
+	if (!intel_gvt_host.mpt->set_edid)
+		return 0;
+
+	return intel_gvt_host.mpt->set_edid(vgpu, port_num);
+}
+
+/**
  * intel_gvt_hypervisor_get_vfio_device - increase vfio device ref count
  * @vgpu: a vGPU
  *
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index e1c860f80eb0..720e2b10adaa 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -428,6 +428,12 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt,
 	if (ret)
 		goto out_clean_sched_policy;
 
+	/*TODO: add more platforms support */
+	if (IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv))
+		ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_D);
+	if (ret)
+		goto out_clean_sched_policy;
+
 	return vgpu;
 
 out_clean_sched_policy:
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index e80903114ca8..fa2c226fc779 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -160,14 +160,14 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		   obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : "");
 	if (obj->base.name)
 		seq_printf(m, " (name: %d)", obj->base.name);
-	list_for_each_entry(vma, &obj->vma_list, obj_link) {
+	list_for_each_entry(vma, &obj->vma.list, obj_link) {
 		if (i915_vma_is_pinned(vma))
 			pin_count++;
 	}
 	seq_printf(m, " (pinned x %d)", pin_count);
 	if (obj->pin_global)
 		seq_printf(m, " (global)");
-	list_for_each_entry(vma, &obj->vma_list, obj_link) {
+	list_for_each_entry(vma, &obj->vma.list, obj_link) {
 		if (!drm_mm_node_allocated(&vma->node))
 			continue;
 
@@ -323,7 +323,7 @@ static int per_file_stats(int id, void *ptr, void *data)
 	if (obj->base.name || obj->base.dma_buf)
 		stats->shared += obj->base.size;
 
-	list_for_each_entry(vma, &obj->vma_list, obj_link) {
+	list_for_each_entry(vma, &obj->vma.list, obj_link) {
 		if (!drm_mm_node_allocated(&vma->node))
 			continue;
 
@@ -1285,8 +1285,6 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 		seq_puts(m, "Wedged\n");
 	if (test_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags))
 		seq_puts(m, "Reset in progress: struct_mutex backoff\n");
-	if (test_bit(I915_RESET_HANDOFF, &dev_priv->gpu_error.flags))
-		seq_puts(m, "Reset in progress: reset handoff to waiter\n");
 	if (waitqueue_active(&dev_priv->gpu_error.wait_queue))
 		seq_puts(m, "Waiter holding struct mutex\n");
 	if (waitqueue_active(&dev_priv->gpu_error.reset_queue))
@@ -1318,37 +1316,16 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 	seq_printf(m, "GT active? %s\n", yesno(dev_priv->gt.awake));
 
 	for_each_engine(engine, dev_priv, id) {
-		struct intel_breadcrumbs *b = &engine->breadcrumbs;
-		struct rb_node *rb;
-
 		seq_printf(m, "%s:\n", engine->name);
-		seq_printf(m, "\tseqno = %x [current %x, last %x]\n",
+		seq_printf(m, "\tseqno = %x [current %x, last %x], %dms ago\n",
 			   engine->hangcheck.seqno, seqno[id],
-			   intel_engine_last_submit(engine));
-		seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s, wedged? %s\n",
-			   yesno(intel_engine_has_waiter(engine)),
-			   yesno(test_bit(engine->id,
-					  &dev_priv->gpu_error.missed_irq_rings)),
-			   yesno(engine->hangcheck.stalled),
-			   yesno(engine->hangcheck.wedged));
-
-		spin_lock_irq(&b->rb_lock);
-		for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
-			struct intel_wait *w = rb_entry(rb, typeof(*w), node);
-
-			seq_printf(m, "\t%s [%d] waiting for %x\n",
-				   w->tsk->comm, w->tsk->pid, w->seqno);
-		}
-		spin_unlock_irq(&b->rb_lock);
+			   intel_engine_last_submit(engine),
+			   jiffies_to_msecs(jiffies -
+					    engine->hangcheck.action_timestamp));
 
 		seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
 			   (long long)engine->hangcheck.acthd,
 			   (long long)acthd[id]);
-		seq_printf(m, "\taction = %s(%d) %d ms ago\n",
-			   hangcheck_action_to_str(engine->hangcheck.action),
-			   engine->hangcheck.action,
-			   jiffies_to_msecs(jiffies -
-					    engine->hangcheck.action_timestamp));
 
 		if (engine->id == RCS) {
 			seq_puts(m, "\tinstdone read =\n");
@@ -2029,18 +2006,6 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
 	return 0;
 }
 
-static int count_irq_waiters(struct drm_i915_private *i915)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	int count = 0;
-
-	for_each_engine(engine, i915, id)
-		count += intel_engine_has_waiter(engine);
-
-	return count;
-}
-
 static const char *rps_power_to_str(unsigned int power)
 {
 	static const char * const strings[] = {
@@ -2080,7 +2045,6 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 	seq_printf(m, "RPS enabled? %d\n", rps->enabled);
 	seq_printf(m, "GPU busy? %s [%d requests]\n",
 		   yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
-	seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
 	seq_printf(m, "Boosts outstanding? %d\n",
 		   atomic_read(&rps->num_waiters));
 	seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive));
@@ -3912,8 +3876,6 @@ static int
 i915_wedged_set(void *data, u64 val)
 {
 	struct drm_i915_private *i915 = data;
-	struct intel_engine_cs *engine;
-	unsigned int tmp;
 
 	/*
 	 * There is no safeguard against this debugfs entry colliding
@@ -3926,18 +3888,8 @@ i915_wedged_set(void *data, u64 val)
 	if (i915_reset_backoff(&i915->gpu_error))
 		return -EAGAIN;
 
-	for_each_engine_masked(engine, i915, val, tmp) {
-		engine->hangcheck.seqno = intel_engine_get_seqno(engine);
-		engine->hangcheck.stalled = true;
-	}
-
 	i915_handle_error(i915, val, I915_ERROR_CAPTURE,
 			  "Manually set wedged engine mask = %llx", val);
-
-	wait_on_bit(&i915->gpu_error.flags,
-		    I915_RESET_HANDOFF,
-		    TASK_UNINTERRUPTIBLE);
-
 	return 0;
 }
 
@@ -3945,94 +3897,6 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
 			i915_wedged_get, i915_wedged_set,
 			"%llu\n");
 
-static int
-fault_irq_set(struct drm_i915_private *i915,
-	      unsigned long *irq,
-	      unsigned long val)
-{
-	int err;
-
-	err = mutex_lock_interruptible(&i915->drm.struct_mutex);
-	if (err)
-		return err;
-
-	err = i915_gem_wait_for_idle(i915,
-				     I915_WAIT_LOCKED |
-				     I915_WAIT_INTERRUPTIBLE,
-				     MAX_SCHEDULE_TIMEOUT);
-	if (err)
-		goto err_unlock;
-
-	*irq = val;
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	/* Flush idle worker to disarm irq */
-	drain_delayed_work(&i915->gt.idle_work);
-
-	return 0;
-
-err_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
-	return err;
-}
-
-static int
-i915_ring_missed_irq_get(void *data, u64 *val)
-{
-	struct drm_i915_private *dev_priv = data;
-
-	*val = dev_priv->gpu_error.missed_irq_rings;
-	return 0;
-}
-
-static int
-i915_ring_missed_irq_set(void *data, u64 val)
-{
-	struct drm_i915_private *i915 = data;
-
-	return fault_irq_set(i915, &i915->gpu_error.missed_irq_rings, val);
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(i915_ring_missed_irq_fops,
-			i915_ring_missed_irq_get, i915_ring_missed_irq_set,
-			"0x%08llx\n");
-
-static int
-i915_ring_test_irq_get(void *data, u64 *val)
-{
-	struct drm_i915_private *dev_priv = data;
-
-	*val = dev_priv->gpu_error.test_irq_rings;
-
-	return 0;
-}
-
-static int
-i915_ring_test_irq_set(void *data, u64 val)
-{
-	struct drm_i915_private *i915 = data;
-
-	/* GuC keeps the user interrupt permanently enabled for submission */
-	if (USES_GUC_SUBMISSION(i915))
-		return -ENODEV;
-
-	/*
-	 * From icl, we can no longer individually mask interrupt generation
-	 * from each engine.
-	 */
-	if (INTEL_GEN(i915) >= 11)
-		return -ENODEV;
-
-	val &= INTEL_INFO(i915)->ring_mask;
-	DRM_DEBUG_DRIVER("Masking interrupts on rings 0x%08llx\n", val);
-
-	return fault_irq_set(i915, &i915->gpu_error.test_irq_rings, val);
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(i915_ring_test_irq_fops,
-			i915_ring_test_irq_get, i915_ring_test_irq_set,
-			"0x%08llx\n");
-
 #define DROP_UNBOUND	BIT(0)
 #define DROP_BOUND	BIT(1)
 #define DROP_RETIRE	BIT(2)
@@ -4070,7 +3934,8 @@ i915_drop_caches_set(void *data, u64 val)
 		  val, val & DROP_ALL);
 	wakeref = intel_runtime_pm_get(i915);
 
-	if (val & DROP_RESET_ACTIVE && !intel_engines_are_idle(i915))
+	if (val & DROP_RESET_ACTIVE &&
+	    wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT))
 		i915_gem_set_wedged(i915);
 
 	/* No need to check and wait for gpu resets, only libdrm auto-restarts
@@ -4092,13 +3957,8 @@ i915_drop_caches_set(void *data, u64 val)
 		mutex_unlock(&i915->drm.struct_mutex);
 	}
 
-	if (val & DROP_RESET_ACTIVE &&
-	    i915_terminally_wedged(&i915->gpu_error)) {
+	if (val & DROP_RESET_ACTIVE && i915_terminally_wedged(&i915->gpu_error))
 		i915_handle_error(i915, ALL_ENGINES, 0, NULL);
-		wait_on_bit(&i915->gpu_error.flags,
-			    I915_RESET_HANDOFF,
-			    TASK_UNINTERRUPTIBLE);
-	}
 
 	fs_reclaim_acquire(GFP_KERNEL);
 	if (val & DROP_BOUND)
@@ -4800,8 +4660,6 @@ static const struct i915_debugfs_files {
 } i915_debugfs_files[] = {
 	{"i915_wedged", &i915_wedged_fops},
 	{"i915_cache_sharing", &i915_cache_sharing_fops},
-	{"i915_ring_missed_irq", &i915_ring_missed_irq_fops},
-	{"i915_ring_test_irq", &i915_ring_test_irq_fops},
 	{"i915_gem_drop_caches", &i915_drop_caches_fops},
 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
 	{"i915_error_state", &i915_error_state_fops},
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3c111ad09922..534e52e3a8da 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -91,8 +91,8 @@
 
 #define DRIVER_NAME		"i915"
 #define DRIVER_DESC		"Intel Graphics"
-#define DRIVER_DATE		"20190124"
-#define DRIVER_TIMESTAMP	1548370857
+#define DRIVER_DATE		"20190202"
+#define DRIVER_TIMESTAMP	1549095268
 
 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and
  * WARN_ON()) for hw state sanity checks to check for unexpected conditions
@@ -1114,6 +1114,7 @@ struct skl_ddb_values {
 };
 
 struct skl_wm_level {
+	u16 min_ddb_alloc;
 	u16 plane_res_b;
 	u8 plane_res_l;
 	bool plane_en;
@@ -1975,7 +1976,14 @@ struct drm_i915_private {
 		void (*resume)(struct drm_i915_private *);
 		void (*cleanup_engine)(struct intel_engine_cs *engine);
 
-		struct list_head timelines;
+		struct i915_gt_timelines {
+			struct mutex mutex; /* protects list, tainted by GPU */
+			struct list_head active_list;
+
+			/* Pack multiple timelines' seqnos into the same page */
+			spinlock_t hwsp_lock;
+			struct list_head hwsp_free_list;
+		} timelines;
 
 		struct list_head active_rings;
 		struct list_head closed_vma;
@@ -2345,6 +2353,8 @@ static inline unsigned int i915_sg_segment_size(void)
 				 INTEL_INFO(dev_priv)->gt == 3)
 #define IS_CNL_WITH_PORT_F(dev_priv)   (IS_CANNONLAKE(dev_priv) && \
 					(INTEL_DEVID(dev_priv) & 0x0004) == 0x0004)
+#define IS_ICL_WITH_PORT_F(dev_priv)   (IS_ICELAKE(dev_priv) && \
+					INTEL_DEVID(dev_priv) != 0x8A51)
 
 #define IS_ALPHA_SUPPORT(intel_info) ((intel_info)->is_alpha_support)
 
@@ -3001,11 +3011,6 @@ static inline bool i915_reset_backoff(struct i915_gpu_error *error)
 	return unlikely(test_bit(I915_RESET_BACKOFF, &error->flags));
 }
 
-static inline bool i915_reset_handoff(struct i915_gpu_error *error)
-{
-	return unlikely(test_bit(I915_RESET_HANDOFF, &error->flags));
-}
-
 static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
 {
 	return unlikely(test_bit(I915_WEDGED, &error->flags));
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 44c82a6b9934..e802af64d628 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -247,21 +247,19 @@ int
 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
+	struct i915_ggtt *ggtt = &to_i915(dev)->ggtt;
 	struct drm_i915_gem_get_aperture *args = data;
 	struct i915_vma *vma;
 	u64 pinned;
 
+	mutex_lock(&ggtt->vm.mutex);
+
 	pinned = ggtt->vm.reserved;
-	mutex_lock(&dev->struct_mutex);
-	list_for_each_entry(vma, &ggtt->vm.active_list, vm_link)
+	list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
 		if (i915_vma_is_pinned(vma))
 			pinned += vma->node.size;
-	list_for_each_entry(vma, &ggtt->vm.inactive_list, vm_link)
-		if (i915_vma_is_pinned(vma))
-			pinned += vma->node.size;
-	mutex_unlock(&dev->struct_mutex);
+
+	mutex_unlock(&ggtt->vm.mutex);
 
 	args->aper_size = ggtt->vm.total;
 	args->aper_available_size = args->aper_size - pinned;
@@ -441,15 +439,19 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 	if (ret)
 		return ret;
 
-	while ((vma = list_first_entry_or_null(&obj->vma_list,
-					       struct i915_vma,
-					       obj_link))) {
+	spin_lock(&obj->vma.lock);
+	while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
+						       struct i915_vma,
+						       obj_link))) {
 		list_move_tail(&vma->obj_link, &still_in_list);
+		spin_unlock(&obj->vma.lock);
+
 		ret = i915_vma_unbind(vma);
-		if (ret)
-			break;
+
+		spin_lock(&obj->vma.lock);
 	}
-	list_splice(&still_in_list, &obj->vma_list);
+	list_splice(&still_in_list, &obj->vma.list);
+	spin_unlock(&obj->vma.lock);
 
 	return ret;
 }
@@ -659,11 +661,6 @@ i915_gem_object_wait(struct drm_i915_gem_object *obj,
 		     struct intel_rps_client *rps_client)
 {
 	might_sleep();
-#if IS_ENABLED(CONFIG_LOCKDEP)
-	GEM_BUG_ON(debug_locks &&
-		   !!lockdep_is_held(&obj->base.dev->struct_mutex) !=
-		   !!(flags & I915_WAIT_LOCKED));
-#endif
 	GEM_BUG_ON(timeout < 0);
 
 	timeout = i915_gem_object_wait_reservation(obj->resv,
@@ -1539,23 +1536,21 @@ err:
 
 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 {
-	struct drm_i915_private *i915;
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	struct list_head *list;
 	struct i915_vma *vma;
 
 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
 
+	mutex_lock(&i915->ggtt.vm.mutex);
 	for_each_ggtt_vma(vma, obj) {
-		if (i915_vma_is_active(vma))
-			continue;
-
 		if (!drm_mm_node_allocated(&vma->node))
 			continue;
 
-		list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+		list_move_tail(&vma->vm_link, &vma->vm->bound_list);
 	}
+	mutex_unlock(&i915->ggtt.vm.mutex);
 
-	i915 = to_i915(obj->base.dev);
 	spin_lock(&i915->mm.obj_lock);
 	list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
 	list_move_tail(&obj->mm.link, list);
@@ -2878,6 +2873,14 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
 	return 0;
 }
 
+static bool match_ring(struct i915_request *rq)
+{
+	struct drm_i915_private *dev_priv = rq->i915;
+	u32 ring = I915_READ(RING_START(rq->engine->mmio_base));
+
+	return ring == i915_ggtt_offset(rq->ring->vma);
+}
+
 struct i915_request *
 i915_gem_find_active_request(struct intel_engine_cs *engine)
 {
@@ -2897,9 +2900,16 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
 	 */
 	spin_lock_irqsave(&engine->timeline.lock, flags);
 	list_for_each_entry(request, &engine->timeline.requests, link) {
-		if (__i915_request_completed(request, request->global_seqno))
+		if (i915_request_completed(request))
 			continue;
 
+		if (!i915_request_started(request))
+			break;
+
+		/* More than one preemptible request may match! */
+		if (!match_ring(request))
+			break;
+
 		active = request;
 		break;
 	}
@@ -3229,33 +3239,6 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	return ret;
 }
 
-static long wait_for_timeline(struct i915_timeline *tl,
-			      unsigned int flags, long timeout)
-{
-	struct i915_request *rq;
-
-	rq = i915_gem_active_get_unlocked(&tl->last_request);
-	if (!rq)
-		return timeout;
-
-	/*
-	 * "Race-to-idle".
-	 *
-	 * Switching to the kernel context is often used a synchronous
-	 * step prior to idling, e.g. in suspend for flushing all
-	 * current operations to memory before sleeping. These we
-	 * want to complete as quickly as possible to avoid prolonged
-	 * stalls, so allow the gpu to boost to maximum clocks.
-	 */
-	if (flags & I915_WAIT_FOR_IDLE_BOOST)
-		gen6_rps_boost(rq, NULL);
-
-	timeout = i915_request_wait(rq, flags, timeout);
-	i915_request_put(rq);
-
-	return timeout;
-}
-
 static int wait_for_engines(struct drm_i915_private *i915)
 {
 	if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
@@ -3269,6 +3252,52 @@ static int wait_for_engines(struct drm_i915_private *i915)
 	return 0;
 }
 
+static long
+wait_for_timelines(struct drm_i915_private *i915,
+		   unsigned int flags, long timeout)
+{
+	struct i915_gt_timelines *gt = &i915->gt.timelines;
+	struct i915_timeline *tl;
+
+	if (!READ_ONCE(i915->gt.active_requests))
+		return timeout;
+
+	mutex_lock(&gt->mutex);
+	list_for_each_entry(tl, &gt->active_list, link) {
+		struct i915_request *rq;
+
+		rq = i915_gem_active_get_unlocked(&tl->last_request);
+		if (!rq)
+			continue;
+
+		mutex_unlock(&gt->mutex);
+
+		/*
+		 * "Race-to-idle".
+		 *
+		 * Switching to the kernel context is often used a synchronous
+		 * step prior to idling, e.g. in suspend for flushing all
+		 * current operations to memory before sleeping. These we
+		 * want to complete as quickly as possible to avoid prolonged
+		 * stalls, so allow the gpu to boost to maximum clocks.
+		 */
+		if (flags & I915_WAIT_FOR_IDLE_BOOST)
+			gen6_rps_boost(rq, NULL);
+
+		timeout = i915_request_wait(rq, flags, timeout);
+		i915_request_put(rq);
+		if (timeout < 0)
+			return timeout;
+
+		/* restart after reacquiring the lock */
+		mutex_lock(&gt->mutex);
+		tl = list_entry(&gt->active_list, typeof(*tl), link);
+	}
+	mutex_unlock(&gt->mutex);
+
+	return timeout;
+}
+
 int i915_gem_wait_for_idle(struct drm_i915_private *i915,
 			   unsigned int flags, long timeout)
 {
@@ -3280,17 +3309,15 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
 	if (!READ_ONCE(i915->gt.awake))
 		return 0;
 
+	timeout = wait_for_timelines(i915, flags, timeout);
+	if (timeout < 0)
+		return timeout;
+
 	if (flags & I915_WAIT_LOCKED) {
-		struct i915_timeline *tl;
 		int err;
 
 		lockdep_assert_held(&i915->drm.struct_mutex);
 
-		list_for_each_entry(tl, &i915->gt.timelines, link) {
-			timeout = wait_for_timeline(tl, flags, timeout);
-			if (timeout < 0)
-				return timeout;
-		}
 		if (GEM_SHOW_DEBUG() && !timeout) {
 			/* Presume that timeout was non-zero to begin with! */
 			dev_warn(&i915->drm.pdev->dev,
@@ -3304,17 +3331,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
 
 		i915_retire_requests(i915);
 		GEM_BUG_ON(i915->gt.active_requests);
-	} else {
-		struct intel_engine_cs *engine;
-		enum intel_engine_id id;
-
-		for_each_engine(engine, i915, id) {
-			struct i915_timeline *tl = &engine->timeline;
-
-			timeout = wait_for_timeline(tl, flags, timeout);
-			if (timeout < 0)
-				return timeout;
-		}
 	}
 
 	return 0;
@@ -3500,7 +3516,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 	 * reading an invalid PTE on older architectures.
 	 */
 restart:
-	list_for_each_entry(vma, &obj->vma_list, obj_link) {
+	list_for_each_entry(vma, &obj->vma.list, obj_link) {
 		if (!drm_mm_node_allocated(&vma->node))
 			continue;
 
@@ -3578,7 +3594,7 @@ restart:
 			 */
 		}
 
-		list_for_each_entry(vma, &obj->vma_list, obj_link) {
+		list_for_each_entry(vma, &obj->vma.list, obj_link) {
 			if (!drm_mm_node_allocated(&vma->node))
 				continue;
 
@@ -3588,7 +3604,7 @@ restart:
 		}
 	}
 
-	list_for_each_entry(vma, &obj->vma_list, obj_link)
+	list_for_each_entry(vma, &obj->vma.list, obj_link)
 		vma->node.color = cache_level;
 	i915_gem_object_set_cache_coherency(obj, cache_level);
 	obj->cache_dirty = true; /* Always invalidate stale cachelines */
@@ -4164,7 +4180,9 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 {
 	mutex_init(&obj->mm.lock);
 
-	INIT_LIST_HEAD(&obj->vma_list);
+	spin_lock_init(&obj->vma.lock);
+	INIT_LIST_HEAD(&obj->vma.list);
+
 	INIT_LIST_HEAD(&obj->lut_list);
 	INIT_LIST_HEAD(&obj->batch_pool_link);
 
@@ -4330,14 +4348,13 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 		mutex_lock(&i915->drm.struct_mutex);
 
 		GEM_BUG_ON(i915_gem_object_is_active(obj));
-		list_for_each_entry_safe(vma, vn,
-					 &obj->vma_list, obj_link) {
+		list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
 			GEM_BUG_ON(i915_vma_is_active(vma));
 			vma->flags &= ~I915_VMA_PIN_MASK;
 			i915_vma_destroy(vma);
 		}
-		GEM_BUG_ON(!list_empty(&obj->vma_list));
-		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree));
+		GEM_BUG_ON(!list_empty(&obj->vma.list));
+		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));
 
 		/* This serializes freeing with the shrinker. Since the free
 		 * is delayed, first by RCU then by the workqueue, we want the
@@ -4495,8 +4512,6 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 
 	GEM_TRACE("\n");
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	wakeref = intel_runtime_pm_get(i915);
 	intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
 
@@ -4522,6 +4537,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 	intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
 	intel_runtime_pm_put(i915, wakeref);
 
+	mutex_lock(&i915->drm.struct_mutex);
 	i915_gem_contexts_lost(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 }
@@ -4536,6 +4552,8 @@ int i915_gem_suspend(struct drm_i915_private *i915)
 	wakeref = intel_runtime_pm_get(i915);
 	intel_suspend_gt_powersave(i915);
 
+	flush_workqueue(i915->wq);
+
 	mutex_lock(&i915->drm.struct_mutex);
 
 	/*
@@ -4565,11 +4583,9 @@ int i915_gem_suspend(struct drm_i915_private *i915)
 	i915_retire_requests(i915); /* ensure we flush after wedging */
 
 	mutex_unlock(&i915->drm.struct_mutex);
+	i915_reset_flush(i915);
 
-	intel_uc_suspend(i915);
-
-	cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
-	cancel_delayed_work_sync(&i915->gt.retire_work);
+	drain_delayed_work(&i915->gt.retire_work);
 
 	/*
 	 * As the idle_work is rearming if it detects a race, play safe and
@@ -4577,6 +4593,8 @@ int i915_gem_suspend(struct drm_i915_private *i915)
 	 */
 	drain_delayed_work(&i915->gt.idle_work);
 
+	intel_uc_suspend(i915);
+
 	/*
 	 * Assert that we successfully flushed all the work and
 	 * reset the GPU back to its idle, low power state.
@@ -5013,6 +5031,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 		dev_priv->gt.cleanup_engine = intel_engine_cleanup;
 	}
 
+	i915_timelines_init(dev_priv);
+
 	ret = i915_gem_init_userptr(dev_priv);
 	if (ret)
 		return ret;
@@ -5135,8 +5155,10 @@ err_unlock:
 err_uc_misc:
 	intel_uc_fini_misc(dev_priv);
 
-	if (ret != -EIO)
+	if (ret != -EIO) {
 		i915_gem_cleanup_userptr(dev_priv);
+		i915_timelines_fini(dev_priv);
+	}
 
 	if (ret == -EIO) {
 		mutex_lock(&dev_priv->drm.struct_mutex);
@@ -5187,6 +5209,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv)
 
 	intel_uc_fini_misc(dev_priv);
 	i915_gem_cleanup_userptr(dev_priv);
+	i915_timelines_fini(dev_priv);
 
 	i915_gem_drain_freed_objects(dev_priv);
 
@@ -5289,7 +5312,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
 	if (!dev_priv->priorities)
 		goto err_dependencies;
 
-	INIT_LIST_HEAD(&dev_priv->gt.timelines);
 	INIT_LIST_HEAD(&dev_priv->gt.active_rings);
 	INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
 
@@ -5333,7 +5355,6 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
 	GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
 	GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
 	WARN_ON(dev_priv->mm.object_count);
-	WARN_ON(!list_empty(&dev_priv->gt.timelines));
 
 	kmem_cache_destroy(dev_priv->priorities);
 	kmem_cache_destroy(dev_priv->dependencies);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 93e84751370f..6faf1f6faab5 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -327,6 +327,9 @@ intel_context_init(struct intel_context *ce,
 		   struct intel_engine_cs *engine)
 {
 	ce->gem_context = ctx;
+
+	INIT_LIST_HEAD(&ce->signal_link);
+	INIT_LIST_HEAD(&ce->signals);
 }
 
 static struct i915_gem_context *
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 3769438228f6..6ba40ff6b91f 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -164,6 +164,8 @@ struct i915_gem_context {
 	struct intel_context {
 		struct i915_gem_context *gem_context;
 		struct intel_engine_cs *active;
+		struct list_head signal_link;
+		struct list_head signals;
 		struct i915_vma *state;
 		struct intel_ring *ring;
 		u32 *lrc_reg_state;
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index f6855401f247..68d74c50ac39 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -126,31 +126,25 @@ i915_gem_evict_something(struct i915_address_space *vm,
 	struct drm_i915_private *dev_priv = vm->i915;
 	struct drm_mm_scan scan;
 	struct list_head eviction_list;
-	struct list_head *phases[] = {
-		&vm->inactive_list,
-		&vm->active_list,
-		NULL,
-	}, **phase;
 	struct i915_vma *vma, *next;
 	struct drm_mm_node *node;
 	enum drm_mm_insert_mode mode;
+	struct i915_vma *active;
 	int ret;
 
 	lockdep_assert_held(&vm->i915->drm.struct_mutex);
 	trace_i915_gem_evict(vm, min_size, alignment, flags);
 
 	/*
-	 * The goal is to evict objects and amalgamate space in LRU order.
-	 * The oldest idle objects reside on the inactive list, which is in
-	 * retirement order. The next objects to retire are those in flight,
-	 * on the active list, again in retirement order.
+	 * The goal is to evict objects and amalgamate space in rough LRU order.
+	 * Since both active and inactive objects reside on the same list,
+	 * in a mix of creation and last scanned order, as we process the list
+	 * we sort it into inactive/active, which keeps the active portion
+	 * in a rough MRU order.
 	 *
 	 * The retirement sequence is thus:
-	 *   1. Inactive objects (already retired)
-	 *   2. Active objects (will stall on unbinding)
-	 *
-	 * On each list, the oldest objects lie at the HEAD with the freshest
-	 * object on the TAIL.
+	 *   1. Inactive objects (already retired, random order)
+	 *   2. Active objects (will stall on unbinding, oldest scanned first)
 	 */
 	mode = DRM_MM_INSERT_BEST;
 	if (flags & PIN_HIGH)
@@ -169,17 +163,46 @@ i915_gem_evict_something(struct i915_address_space *vm,
 	 */
 	if (!(flags & PIN_NONBLOCK))
 		i915_retire_requests(dev_priv);
-	else
-		phases[1] = NULL;
 
 search_again:
+	active = NULL;
 	INIT_LIST_HEAD(&eviction_list);
-	phase = phases;
-	do {
-		list_for_each_entry(vma, *phase, vm_link)
-			if (mark_free(&scan, vma, flags, &eviction_list))
-				goto found;
-	} while (*++phase);
+	list_for_each_entry_safe(vma, next, &vm->bound_list, vm_link) {
+		/*
+		 * We keep this list in a rough least-recently scanned order
+		 * of active elements (inactive elements are cheap to reap).
+		 * New entries are added to the end, and we move anything we
+		 * scan to the end. The assumption is that the working set
+		 * of applications is either steady state (and thanks to the
+		 * userspace bo cache it almost always is) or volatile and
+		 * frequently replaced after a frame, which are self-evicting!
+		 * Given that assumption, the MRU order of the scan list is
+		 * fairly static, and keeping it in least-recently scan order
+		 * is suitable.
+		 *
+		 * To notice when we complete one full cycle, we record the
+		 * first active element seen, before moving it to the tail.
+		 */
+		if (i915_vma_is_active(vma)) {
+			if (vma == active) {
+				if (flags & PIN_NONBLOCK)
+					break;
+
+				active = ERR_PTR(-EAGAIN);
+			}
+
+			if (active != ERR_PTR(-EAGAIN)) {
+				if (!active)
+					active = vma;
+
+				list_move_tail(&vma->vm_link, &vm->bound_list);
+				continue;
+			}
+		}
+
+		if (mark_free(&scan, vma, flags, &eviction_list))
+			goto found;
+	}
 
 	/* Nothing found, clean up and bail out! */
 	list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
@@ -388,11 +411,6 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
  */
 int i915_gem_evict_vm(struct i915_address_space *vm)
 {
-	struct list_head *phases[] = {
-		&vm->inactive_list,
-		&vm->active_list,
-		NULL
-	}, **phase;
 	struct list_head eviction_list;
 	struct i915_vma *vma, *next;
 	int ret;
@@ -412,16 +430,15 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
 	}
 
 	INIT_LIST_HEAD(&eviction_list);
-	phase = phases;
-	do {
-		list_for_each_entry(vma, *phase, vm_link) {
-			if (i915_vma_is_pinned(vma))
-				continue;
+	mutex_lock(&vm->mutex);
+	list_for_each_entry(vma, &vm->bound_list, vm_link) {
+		if (i915_vma_is_pinned(vma))
+			continue;
 
-			__i915_vma_pin(vma);
-			list_add(&vma->evict_link, &eviction_list);
-		}
-	} while (*++phase);
+		__i915_vma_pin(vma);
+		list_add(&vma->evict_link, &eviction_list);
+	}
+	mutex_unlock(&vm->mutex);
 
 	ret = 0;
 	list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index f250109e1f66..8eedf7cac493 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1976,6 +1976,18 @@ static int eb_submit(struct i915_execbuffer *eb)
 			return err;
 	}
 
+	/*
+	 * After we completed waiting for other engines (using HW semaphores)
+	 * then we can signal that this request/batch is ready to run. This
+	 * allows us to determine if the batch is still waiting on the GPU
+	 * or actually running by checking the breadcrumb.
+	 */
+	if (eb->engine->emit_init_breadcrumb) {
+		err = eb->engine->emit_init_breadcrumb(eb->request);
+		if (err)
+			return err;
+	}
+
 	err = eb->engine->emit_bb_start(eb->request,
 					eb->batch->node.start +
 					eb->batch_start_offset,
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
index 99a31ded4dfd..09dcaf14121b 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.h
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
@@ -50,4 +50,3 @@ struct drm_i915_fence_reg {
 };
 
 #endif
-
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 9081e3bc5a59..49b00996a15e 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -491,9 +491,8 @@ static void i915_address_space_init(struct i915_address_space *vm, int subclass)
 
 	stash_init(&vm->free_pages);
 
-	INIT_LIST_HEAD(&vm->active_list);
-	INIT_LIST_HEAD(&vm->inactive_list);
 	INIT_LIST_HEAD(&vm->unbound_list);
+	INIT_LIST_HEAD(&vm->bound_list);
 }
 
 static void i915_address_space_fini(struct i915_address_space *vm)
@@ -1932,7 +1931,10 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
 	vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
 
 	INIT_LIST_HEAD(&vma->obj_link);
+
+	mutex_lock(&vma->vm->mutex);
 	list_add(&vma->vm_link, &vma->vm->unbound_list);
+	mutex_unlock(&vma->vm->mutex);
 
 	return vma;
 }
@@ -2111,8 +2113,7 @@ void i915_ppgtt_close(struct i915_address_space *vm)
 static void ppgtt_destroy_vma(struct i915_address_space *vm)
 {
 	struct list_head *phases[] = {
-		&vm->active_list,
-		&vm->inactive_list,
+		&vm->bound_list,
 		&vm->unbound_list,
 		NULL,
 	}, **phase;
@@ -2135,8 +2136,7 @@ void i915_ppgtt_release(struct kref *kref)
 
 	ppgtt_destroy_vma(&ppgtt->vm);
 
-	GEM_BUG_ON(!list_empty(&ppgtt->vm.active_list));
-	GEM_BUG_ON(!list_empty(&ppgtt->vm.inactive_list));
+	GEM_BUG_ON(!list_empty(&ppgtt->vm.bound_list));
 	GEM_BUG_ON(!list_empty(&ppgtt->vm.unbound_list));
 
 	ppgtt->vm.cleanup(&ppgtt->vm);
@@ -2801,8 +2801,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
 	mutex_lock(&dev_priv->drm.struct_mutex);
 	i915_gem_fini_aliasing_ppgtt(dev_priv);
 
-	GEM_BUG_ON(!list_empty(&ggtt->vm.active_list));
-	list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link)
+	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link)
 		WARN_ON(i915_vma_unbind(vma));
 
 	if (drm_mm_node_allocated(&ggtt->error_capture))
@@ -3508,32 +3507,39 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
 
 	i915_check_and_clear_faults(dev_priv);
 
+	mutex_lock(&ggtt->vm.mutex);
+
 	/* First fill our portion of the GTT with scratch pages */
 	ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
-
 	ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */
 
 	/* clflush objects bound into the GGTT and rebind them. */
-	GEM_BUG_ON(!list_empty(&ggtt->vm.active_list));
-	list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link) {
+	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
 		struct drm_i915_gem_object *obj = vma->obj;
 
 		if (!(vma->flags & I915_VMA_GLOBAL_BIND))
 			continue;
 
+		mutex_unlock(&ggtt->vm.mutex);
+
 		if (!i915_vma_unbind(vma))
-			continue;
+			goto lock;
 
 		WARN_ON(i915_vma_bind(vma,
 				      obj ? obj->cache_level : 0,
 				      PIN_UPDATE));
 		if (obj)
 			WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
+
+lock:
+		mutex_lock(&ggtt->vm.mutex);
 	}
 
 	ggtt->vm.closed = false;
 	i915_ggtt_invalidate(dev_priv);
 
+	mutex_unlock(&ggtt->vm.mutex);
+
 	if (INTEL_GEN(dev_priv) >= 8) {
 		struct intel_ppat *ppat = &dev_priv->ppat;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 9229b03d629b..03ade71b8d9a 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -39,6 +39,7 @@
 #include <linux/pagevec.h>
 
 #include "i915_request.h"
+#include "i915_reset.h"
 #include "i915_selftest.h"
 #include "i915_timeline.h"
 
@@ -298,32 +299,12 @@ struct i915_address_space {
 	struct i915_page_directory_pointer *scratch_pdp; /* GEN8+ & 48b PPGTT */
 
 	/**
-	 * List of objects currently involved in rendering.
-	 *
-	 * Includes buffers having the contents of their GPU caches
-	 * flushed, not necessarily primitives. last_read_req
-	 * represents when the rendering involved will be completed.
-	 *
-	 * A reference is held on the buffer while on this list.
+	 * List of vma currently bound.
 	 */
-	struct list_head active_list;
+	struct list_head bound_list;
 
 	/**
-	 * LRU list of objects which are not in the ringbuffer and
-	 * are ready to unbind, but are still in the GTT.
-	 *
-	 * last_read_req is NULL while an object is in this list.
-	 *
-	 * A reference is not held on the buffer while on this list,
-	 * as merely being GTT-bound shouldn't prevent its being
-	 * freed, and we'll pull it off the list in the free path.
-	 */
-	struct list_head inactive_list;
-
-	/**
-	 * List of vma that have been unbound.
-	 *
-	 * A reference is not held on the buffer while on this list.
+	 * List of vma that are not unbound.
 	 */
 	struct list_head unbound_list;
 
@@ -661,19 +642,19 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 
 /* Flags used by pin/bind&friends. */
 #define PIN_NONBLOCK		BIT_ULL(0)
-#define PIN_MAPPABLE		BIT_ULL(1)
-#define PIN_ZONE_4G		BIT_ULL(2)
-#define PIN_NONFAULT		BIT_ULL(3)
-#define PIN_NOEVICT		BIT_ULL(4)
-
-#define PIN_MBZ			BIT_ULL(5) /* I915_VMA_PIN_OVERFLOW */
-#define PIN_GLOBAL		BIT_ULL(6) /* I915_VMA_GLOBAL_BIND */
-#define PIN_USER		BIT_ULL(7) /* I915_VMA_LOCAL_BIND */
-#define PIN_UPDATE		BIT_ULL(8)
-
-#define PIN_HIGH		BIT_ULL(9)
-#define PIN_OFFSET_BIAS		BIT_ULL(10)
-#define PIN_OFFSET_FIXED	BIT_ULL(11)
+#define PIN_NONFAULT		BIT_ULL(1)
+#define PIN_NOEVICT		BIT_ULL(2)
+#define PIN_MAPPABLE		BIT_ULL(3)
+#define PIN_ZONE_4G		BIT_ULL(4)
+#define PIN_HIGH		BIT_ULL(5)
+#define PIN_OFFSET_BIAS		BIT_ULL(6)
+#define PIN_OFFSET_FIXED	BIT_ULL(7)
+
+#define PIN_MBZ			BIT_ULL(8) /* I915_VMA_PIN_OVERFLOW */
+#define PIN_GLOBAL		BIT_ULL(9) /* I915_VMA_GLOBAL_BIND */
+#define PIN_USER		BIT_ULL(10) /* I915_VMA_LOCAL_BIND */
+#define PIN_UPDATE		BIT_ULL(11)
+
 #define PIN_OFFSET_MASK		(-I915_GTT_PAGE_SIZE)
 
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index cb1b0144d274..73fec917d097 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -87,24 +87,33 @@ struct drm_i915_gem_object {
 
 	const struct drm_i915_gem_object_ops *ops;
 
-	/**
-	 * @vma_list: List of VMAs backed by this object
-	 *
-	 * The VMA on this list are ordered by type, all GGTT vma are placed
-	 * at the head and all ppGTT vma are placed at the tail. The different
-	 * types of GGTT vma are unordered between themselves, use the
-	 * @vma_tree (which has a defined order between all VMA) to find an
-	 * exact match.
-	 */
-	struct list_head vma_list;
-	/**
-	 * @vma_tree: Ordered tree of VMAs backed by this object
-	 *
-	 * All VMA created for this object are placed in the @vma_tree for
-	 * fast retrieval via a binary search in i915_vma_instance().
-	 * They are also added to @vma_list for easy iteration.
-	 */
-	struct rb_root vma_tree;
+	struct {
+		/**
+		 * @vma.lock: protect the list/tree of vmas
+		 */
+		spinlock_t lock;
+
+		/**
+		 * @vma.list: List of VMAs backed by this object
+		 *
+		 * The VMA on this list are ordered by type, all GGTT vma are
+		 * placed at the head and all ppGTT vma are placed at the tail.
+		 * The different types of GGTT vma are unordered between
+		 * themselves, use the @vma.tree (which has a defined order
+		 * between all VMA) to quickly find an exact match.
+		 */
+		struct list_head list;
+
+		/**
+		 * @vma.tree: Ordered tree of VMAs backed by this object
+		 *
+		 * All VMA created for this object are placed in the @vma.tree
+		 * for fast retrieval via a binary search in
+		 * i915_vma_instance(). They are also added to @vma.list for
+		 * easy iteration.
+		 */
+		struct rb_root tree;
+	} vma;
 
 	/**
 	 * @lut_list: List of vma lookup entries in use for this object.
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 8ceecb026910..6da795c7e62e 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -461,12 +461,20 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 					       I915_SHRINK_VMAPS);
 
 	/* We also want to clear any cached iomaps as they wrap vmap */
+	mutex_lock(&i915->ggtt.vm.mutex);
 	list_for_each_entry_safe(vma, next,
-				 &i915->ggtt.vm.inactive_list, vm_link) {
+				 &i915->ggtt.vm.bound_list, vm_link) {
 		unsigned long count = vma->node.size >> PAGE_SHIFT;
-		if (vma->iomap && i915_vma_unbind(vma) == 0)
+
+		if (!vma->iomap || i915_vma_is_active(vma))
+			continue;
+
+		mutex_unlock(&i915->ggtt.vm.mutex);
+		if (i915_vma_unbind(vma) == 0)
 			freed_pages += count;
+		mutex_lock(&i915->ggtt.vm.mutex);
 	}
+	mutex_unlock(&i915->ggtt.vm.mutex);
 
 out:
 	shrinker_unlock(i915, unlock);
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 9df615eea2d8..74a9661479ca 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -701,7 +701,10 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 	vma->pages = obj->mm.pages;
 	vma->flags |= I915_VMA_GLOBAL_BIND;
 	__i915_vma_set_map_and_fenceable(vma);
-	list_move_tail(&vma->vm_link, &ggtt->vm.inactive_list);
+
+	mutex_lock(&ggtt->vm.mutex);
+	list_move_tail(&vma->vm_link, &ggtt->vm.bound_list);
+	mutex_unlock(&ggtt->vm.mutex);
 
 	spin_lock(&dev_priv->mm.obj_lock);
 	list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 1f8e80e31b49..6e2e5ed2bd0a 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -447,9 +447,14 @@ static void error_print_request(struct drm_i915_error_state_buf *m,
 	if (!erq->seqno)
 		return;
 
-	err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n",
+	err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x%s%s, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n",
 		   prefix, erq->pid, erq->ban_score,
-		   erq->context, erq->seqno, erq->sched_attr.priority,
+		   erq->context, erq->seqno,
+		   test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+			    &erq->flags) ? "!" : "",
+		   test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+			    &erq->flags) ? "+" : "",
+		   erq->sched_attr.priority,
 		   jiffies_to_msecs(erq->jiffies - epoch),
 		   erq->start, erq->head, erq->tail);
 }
@@ -530,13 +535,9 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
 	}
 	err_printf(m, "  seqno: 0x%08x\n", ee->seqno);
 	err_printf(m, "  last_seqno: 0x%08x\n", ee->last_seqno);
-	err_printf(m, "  waiting: %s\n", yesno(ee->waiting));
 	err_printf(m, "  ring->head: 0x%08x\n", ee->cpu_ring_head);
 	err_printf(m, "  ring->tail: 0x%08x\n", ee->cpu_ring_tail);
-	err_printf(m, "  hangcheck stall: %s\n", yesno(ee->hangcheck_stalled));
-	err_printf(m, "  hangcheck action: %s\n",
-		   hangcheck_action_to_str(ee->hangcheck_action));
-	err_printf(m, "  hangcheck action timestamp: %dms (%lu%s)\n",
+	err_printf(m, "  hangcheck timestamp: %dms (%lu%s)\n",
 		   jiffies_to_msecs(ee->hangcheck_timestamp - epoch),
 		   ee->hangcheck_timestamp,
 		   ee->hangcheck_timestamp == epoch ? "; epoch" : "");
@@ -684,15 +685,15 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 		   jiffies_to_msecs(error->capture - error->epoch));
 
 	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
-		if (error->engine[i].hangcheck_stalled &&
-		    error->engine[i].context.pid) {
-			err_printf(m, "Active process (on ring %s): %s [%d], score %d%s\n",
-				   engine_name(m->i915, i),
-				   error->engine[i].context.comm,
-				   error->engine[i].context.pid,
-				   error->engine[i].context.ban_score,
-				   bannable(&error->engine[i].context));
-		}
+		if (!error->engine[i].context.pid)
+			continue;
+
+		err_printf(m, "Active process (on ring %s): %s [%d], score %d%s\n",
+			   engine_name(m->i915, i),
+			   error->engine[i].context.comm,
+			   error->engine[i].context.pid,
+			   error->engine[i].context.ban_score,
+			   bannable(&error->engine[i].context));
 	}
 	err_printf(m, "Reset count: %u\n", error->reset_count);
 	err_printf(m, "Suspend count: %u\n", error->suspend_count);
@@ -722,8 +723,6 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 	err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake);
 	err_printf(m, "DERRMR: 0x%08x\n", error->derrmr);
 	err_printf(m, "CCID: 0x%08x\n", error->ccid);
-	err_printf(m, "Missed interrupts: 0x%08lx\n",
-		   m->i915->gpu_error.missed_irq_rings);
 
 	for (i = 0; i < error->nfence; i++)
 		err_printf(m, "  fence[%d] = %08llx\n", i, error->fence[i]);
@@ -807,21 +806,6 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 						    error->epoch);
 		}
 
-		if (IS_ERR(ee->waiters)) {
-			err_printf(m, "%s --- ? waiters [unable to acquire spinlock]\n",
-				   m->i915->engine[i]->name);
-		} else if (ee->num_waiters) {
-			err_printf(m, "%s --- %d waiters\n",
-				   m->i915->engine[i]->name,
-				   ee->num_waiters);
-			for (j = 0; j < ee->num_waiters; j++) {
-				err_printf(m, " seqno 0x%08x for %s [%d]\n",
-					   ee->waiters[j].seqno,
-					   ee->waiters[j].comm,
-					   ee->waiters[j].pid);
-			}
-		}
-
 		print_error_obj(m, m->i915->engine[i],
 				"ringbuffer", ee->ringbuffer);
 
@@ -1003,8 +987,6 @@ void __i915_gpu_state_free(struct kref *error_ref)
 		i915_error_object_free(ee->wa_ctx);
 
 		kfree(ee->requests);
-		if (!IS_ERR_OR_NULL(ee->waiters))
-			kfree(ee->waiters);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(error->active_bo); i++)
@@ -1124,7 +1106,9 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 
 static u32 capture_error_bo(struct drm_i915_error_buffer *err,
 			    int count, struct list_head *head,
-			    bool pinned_only)
+			    unsigned int flags)
+#define ACTIVE_ONLY BIT(0)
+#define PINNED_ONLY BIT(1)
 {
 	struct i915_vma *vma;
 	int i = 0;
@@ -1133,7 +1117,10 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err,
 		if (!vma->obj)
 			continue;
 
-		if (pinned_only && !i915_vma_is_pinned(vma))
+		if (flags & ACTIVE_ONLY && !i915_vma_is_active(vma))
+			continue;
+
+		if (flags & PINNED_ONLY && !i915_vma_is_pinned(vma))
 			continue;
 
 		capture_bo(err++, vma);
@@ -1144,7 +1131,8 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err,
 	return i;
 }
 
-/* Generate a semi-unique error code. The code is not meant to have meaning, The
+/*
+ * Generate a semi-unique error code. The code is not meant to have meaning, The
  * code's only purpose is to try to prevent false duplicated bug reports by
  * grossly estimating a GPU error state.
  *
@@ -1153,29 +1141,23 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err,
  *
  * It's only a small step better than a random number in its current form.
  */
-static u32 i915_error_generate_code(struct drm_i915_private *dev_priv,
-				    struct i915_gpu_state *error,
-				    int *engine_id)
+static u32 i915_error_generate_code(struct i915_gpu_state *error,
+				    unsigned long engine_mask)
 {
-	u32 error_code = 0;
-	int i;
-
-	/* IPEHR would be an ideal way to detect errors, as it's the gross
+	/*
+	 * IPEHR would be an ideal way to detect errors, as it's the gross
 	 * measure of "the command that hung." However, has some very common
 	 * synchronization commands which almost always appear in the case
 	 * strictly a client bug. Use instdone to differentiate those some.
 	 */
-	for (i = 0; i < I915_NUM_ENGINES; i++) {
-		if (error->engine[i].hangcheck_stalled) {
-			if (engine_id)
-				*engine_id = i;
+	if (engine_mask) {
+		struct drm_i915_error_engine *ee =
+			&error->engine[ffs(engine_mask)];
 
-			return error->engine[i].ipehr ^
-			       error->engine[i].instdone.instdone;
-		}
+		return ee->ipehr ^ ee->instdone.instdone;
 	}
 
-	return error_code;
+	return 0;
 }
 
 static void gem_record_fences(struct i915_gpu_state *error)
@@ -1208,59 +1190,6 @@ static void gen6_record_semaphore_state(struct intel_engine_cs *engine,
 			I915_READ(RING_SYNC_2(engine->mmio_base));
 }
 
-static void error_record_engine_waiters(struct intel_engine_cs *engine,
-					struct drm_i915_error_engine *ee)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	struct drm_i915_error_waiter *waiter;
-	struct rb_node *rb;
-	int count;
-
-	ee->num_waiters = 0;
-	ee->waiters = NULL;
-
-	if (RB_EMPTY_ROOT(&b->waiters))
-		return;
-
-	if (!spin_trylock_irq(&b->rb_lock)) {
-		ee->waiters = ERR_PTR(-EDEADLK);
-		return;
-	}
-
-	count = 0;
-	for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb))
-		count++;
-	spin_unlock_irq(&b->rb_lock);
-
-	waiter = NULL;
-	if (count)
-		waiter = kmalloc_array(count,
-				       sizeof(struct drm_i915_error_waiter),
-				       GFP_ATOMIC);
-	if (!waiter)
-		return;
-
-	if (!spin_trylock_irq(&b->rb_lock)) {
-		kfree(waiter);
-		ee->waiters = ERR_PTR(-EDEADLK);
-		return;
-	}
-
-	ee->waiters = waiter;
-	for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
-		struct intel_wait *w = rb_entry(rb, typeof(*w), node);
-
-		strcpy(waiter->comm, w->tsk->comm);
-		waiter->pid = w->tsk->pid;
-		waiter->seqno = w->seqno;
-		waiter++;
-
-		if (++ee->num_waiters == count)
-			break;
-	}
-	spin_unlock_irq(&b->rb_lock);
-}
-
 static void error_record_engine_registers(struct i915_gpu_state *error,
 					  struct intel_engine_cs *engine,
 					  struct drm_i915_error_engine *ee)
@@ -1296,7 +1225,6 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
 
 	intel_engine_get_instdone(engine, &ee->instdone);
 
-	ee->waiting = intel_engine_has_waiter(engine);
 	ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base));
 	ee->acthd = intel_engine_get_active_head(engine);
 	ee->seqno = intel_engine_get_seqno(engine);
@@ -1338,9 +1266,8 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
 	}
 
 	ee->idle = intel_engine_is_idle(engine);
-	ee->hangcheck_timestamp = engine->hangcheck.action_timestamp;
-	ee->hangcheck_action = engine->hangcheck.action;
-	ee->hangcheck_stalled = engine->hangcheck.stalled;
+	if (!ee->idle)
+		ee->hangcheck_timestamp = engine->hangcheck.action_timestamp;
 	ee->reset_count = i915_reset_engine_count(&dev_priv->gpu_error,
 						  engine);
 
@@ -1371,6 +1298,7 @@ static void record_request(struct i915_request *request,
 {
 	struct i915_gem_context *ctx = request->gem_context;
 
+	erq->flags = request->fence.flags;
 	erq->context = ctx->hw_id;
 	erq->sched_attr = request->sched.attr;
 	erq->ban_score = atomic_read(&ctx->ban_score);
@@ -1546,7 +1474,6 @@ static void gem_record_rings(struct i915_gpu_state *error)
 		ee->engine_id = i;
 
 		error_record_engine_registers(error, engine, ee);
-		error_record_engine_waiters(engine, ee);
 		error_record_engine_execlists(engine, ee);
 
 		request = i915_gem_find_active_request(engine);
@@ -1610,14 +1537,17 @@ static void gem_capture_vm(struct i915_gpu_state *error,
 	int count;
 
 	count = 0;
-	list_for_each_entry(vma, &vm->active_list, vm_link)
-		count++;
+	list_for_each_entry(vma, &vm->bound_list, vm_link)
+		if (i915_vma_is_active(vma))
+			count++;
 
 	active_bo = NULL;
 	if (count)
 		active_bo = kcalloc(count, sizeof(*active_bo), GFP_ATOMIC);
 	if (active_bo)
-		count = capture_error_bo(active_bo, count, &vm->active_list, false);
+		count = capture_error_bo(active_bo,
+					 count, &vm->bound_list,
+					 ACTIVE_ONLY);
 	else
 		count = 0;
 
@@ -1655,28 +1585,20 @@ static void capture_pinned_buffers(struct i915_gpu_state *error)
 	struct i915_address_space *vm = &error->i915->ggtt.vm;
 	struct drm_i915_error_buffer *bo;
 	struct i915_vma *vma;
-	int count_inactive, count_active;
-
-	count_inactive = 0;
-	list_for_each_entry(vma, &vm->inactive_list, vm_link)
-		count_inactive++;
+	int count;
 
-	count_active = 0;
-	list_for_each_entry(vma, &vm->active_list, vm_link)
-		count_active++;
+	count = 0;
+	list_for_each_entry(vma, &vm->bound_list, vm_link)
+		count++;
 
 	bo = NULL;
-	if (count_inactive + count_active)
-		bo = kcalloc(count_inactive + count_active,
-			     sizeof(*bo), GFP_ATOMIC);
+	if (count)
+		bo = kcalloc(count, sizeof(*bo), GFP_ATOMIC);
 	if (!bo)
 		return;
 
-	count_inactive = capture_error_bo(bo, count_inactive,
-					  &vm->active_list, true);
-	count_active = capture_error_bo(bo + count_inactive, count_active,
-					&vm->inactive_list, true);
-	error->pinned_bo_count = count_inactive + count_active;
+	error->pinned_bo_count =
+		capture_error_bo(bo, count, &vm->bound_list, PINNED_ONLY);
 	error->pinned_bo = bo;
 }
 
@@ -1783,31 +1705,35 @@ static void capture_reg_state(struct i915_gpu_state *error)
 	error->pgtbl_er = I915_READ(PGTBL_ER);
 }
 
-static void i915_error_capture_msg(struct drm_i915_private *dev_priv,
-				   struct i915_gpu_state *error,
-				   u32 engine_mask,
-				   const char *error_msg)
+static const char *
+error_msg(struct i915_gpu_state *error, unsigned long engines, const char *msg)
 {
-	u32 ecode;
-	int engine_id = -1, len;
+	int len;
+	int i;
 
-	ecode = i915_error_generate_code(dev_priv, error, &engine_id);
+	for (i = 0; i < ARRAY_SIZE(error->engine); i++)
+		if (!error->engine[i].context.pid)
+			engines &= ~BIT(i);
 
 	len = scnprintf(error->error_msg, sizeof(error->error_msg),
-			"GPU HANG: ecode %d:%d:0x%08x",
-			INTEL_GEN(dev_priv), engine_id, ecode);
-
-	if (engine_id != -1 && error->engine[engine_id].context.pid)
+			"GPU HANG: ecode %d:%lx:0x%08x",
+			INTEL_GEN(error->i915), engines,
+			i915_error_generate_code(error, engines));
+	if (engines) {
+		/* Just show the first executing process, more is confusing */
+		i = ffs(engines);
 		len += scnprintf(error->error_msg + len,
 				 sizeof(error->error_msg) - len,
 				 ", in %s [%d]",
-				 error->engine[engine_id].context.comm,
-				 error->engine[engine_id].context.pid);
+				 error->engine[i].context.comm,
+				 error->engine[i].context.pid);
+	}
+	if (msg)
+		len += scnprintf(error->error_msg + len,
+				 sizeof(error->error_msg) - len,
+				 ", %s", msg);
 
-	scnprintf(error->error_msg + len, sizeof(error->error_msg) - len,
-		  ", reason: %s, action: %s",
-		  error_msg,
-		  engine_mask ? "reset" : "continue");
+	return error->error_msg;
 }
 
 static void capture_gen_state(struct i915_gpu_state *error)
@@ -1847,7 +1773,7 @@ static unsigned long capture_find_epoch(const struct i915_gpu_state *error)
 	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
 		const struct drm_i915_error_engine *ee = &error->engine[i];
 
-		if (ee->hangcheck_stalled &&
+		if (ee->hangcheck_timestamp &&
 		    time_before(ee->hangcheck_timestamp, epoch))
 			epoch = ee->hangcheck_timestamp;
 	}
@@ -1921,7 +1847,7 @@ i915_capture_gpu_state(struct drm_i915_private *i915)
  * i915_capture_error_state - capture an error record for later analysis
  * @i915: i915 device
  * @engine_mask: the mask of engines triggering the hang
- * @error_msg: a message to insert into the error capture header
+ * @msg: a message to insert into the error capture header
  *
  * Should be called when an error is detected (either a hang or an error
  * interrupt) to capture error state from the time of the error.  Fills
@@ -1929,8 +1855,8 @@ i915_capture_gpu_state(struct drm_i915_private *i915)
  * to pick up.
  */
 void i915_capture_error_state(struct drm_i915_private *i915,
-			      u32 engine_mask,
-			      const char *error_msg)
+			      unsigned long engine_mask,
+			      const char *msg)
 {
 	static bool warned;
 	struct i915_gpu_state *error;
@@ -1946,8 +1872,7 @@ void i915_capture_error_state(struct drm_i915_private *i915,
 	if (IS_ERR(error))
 		return;
 
-	i915_error_capture_msg(i915, error, engine_mask, error_msg);
-	DRM_INFO("%s\n", error->error_msg);
+	dev_info(i915->drm.dev, "%s\n", error_msg(error, engine_mask, msg));
 
 	if (!error->simulated) {
 		spin_lock_irqsave(&i915->gpu_error.lock, flags);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 604291f7762d..53b1f22dd365 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -82,11 +82,7 @@ struct i915_gpu_state {
 		int engine_id;
 		/* Software tracked state */
 		bool idle;
-		bool waiting;
-		int num_waiters;
 		unsigned long hangcheck_timestamp;
-		bool hangcheck_stalled;
-		enum intel_engine_hangcheck_action hangcheck_action;
 		struct i915_address_space *vm;
 		int num_requests;
 		u32 reset_count;
@@ -149,6 +145,7 @@ struct i915_gpu_state {
 		struct drm_i915_error_object *default_state;
 
 		struct drm_i915_error_request {
+			unsigned long flags;
 			long jiffies;
 			pid_t pid;
 			u32 context;
@@ -161,12 +158,6 @@ struct i915_gpu_state {
 		} *requests, execlist[EXECLIST_MAX_PORTS];
 		unsigned int num_ports;
 
-		struct drm_i915_error_waiter {
-			char comm[TASK_COMM_LEN];
-			pid_t pid;
-			u32 seqno;
-		} *waiters;
-
 		struct {
 			u32 gfx_mode;
 			union {
@@ -197,6 +188,8 @@ struct i915_gpu_state {
 	struct scatterlist *sgl, *fit;
 };
 
+struct i915_gpu_restart;
+
 struct i915_gpu_error {
 	/* For hangcheck timer */
 #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
@@ -211,8 +204,6 @@ struct i915_gpu_error {
 
 	atomic_t pending_fb_pin;
 
-	unsigned long missed_irq_rings;
-
 	/**
 	 * State variable controlling the reset flow and count
 	 *
@@ -247,15 +238,6 @@ struct i915_gpu_error {
 	 * i915_mutex_lock_interruptible()?). I915_RESET_BACKOFF serves a
 	 * secondary role in preventing two concurrent global reset attempts.
 	 *
-	 * #I915_RESET_HANDOFF - To perform the actual GPU reset, we need the
-	 * struct_mutex. We try to acquire the struct_mutex in the reset worker,
-	 * but it may be held by some long running waiter (that we cannot
-	 * interrupt without causing trouble). Once we are ready to do the GPU
-	 * reset, we set the I915_RESET_HANDOFF bit and wakeup any waiters. If
-	 * they already hold the struct_mutex and want to participate they can
-	 * inspect the bit and do the reset directly, otherwise the worker
-	 * waits for the struct_mutex.
-	 *
 	 * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to
 	 * acquire the struct_mutex to reset an engine, we need an explicit
 	 * flag to prevent two concurrent reset attempts in the same engine.
@@ -269,20 +251,13 @@ struct i915_gpu_error {
 	 */
 	unsigned long flags;
 #define I915_RESET_BACKOFF	0
-#define I915_RESET_HANDOFF	1
-#define I915_RESET_MODESET	2
-#define I915_RESET_ENGINE	3
+#define I915_RESET_MODESET	1
+#define I915_RESET_ENGINE	2
 #define I915_WEDGED		(BITS_PER_LONG - 1)
 
 	/** Number of times an engine has been reset */
 	u32 reset_engine_count[I915_NUM_ENGINES];
 
-	/** Set of stalled engines with guilty requests, in the current reset */
-	u32 stalled_mask;
-
-	/** Reason for the current *global* reset */
-	const char *reason;
-
 	struct mutex wedge_mutex; /* serialises wedging/unwedging */
 
 	/**
@@ -297,8 +272,7 @@ struct i915_gpu_error {
 	 */
 	wait_queue_head_t reset_queue;
 
-	/* For missed irq/seqno simulation. */
-	unsigned long test_irq_rings;
+	struct i915_gpu_restart *restart;
 };
 
 struct drm_i915_error_state_buf {
@@ -320,7 +294,7 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
 
 struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915);
 void i915_capture_error_state(struct drm_i915_private *dev_priv,
-			      u32 engine_mask,
+			      unsigned long engine_mask,
 			      const char *error_msg);
 
 static inline struct i915_gpu_state *
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 7e56611b3d60..441d2674b272 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -823,11 +823,26 @@ static void i915_enable_asle_pipestat(struct drm_i915_private *dev_priv)
 static u32 i915_get_vblank_counter(struct drm_device *dev, unsigned int pipe)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_vblank_crtc *vblank = &dev->vblank[pipe];
+	const struct drm_display_mode *mode = &vblank->hwmode;
 	i915_reg_t high_frame, low_frame;
 	u32 high1, high2, low, pixel, vbl_start, hsync_start, htotal;
-	const struct drm_display_mode *mode = &dev->vblank[pipe].hwmode;
 	unsigned long irqflags;
 
+	/*
+	 * On i965gm TV output the frame counter only works up to
+	 * the point when we enable the TV encoder. After that the
+	 * frame counter ceases to work and reads zero. We need a
+	 * vblank wait before enabling the TV encoder and so we
+	 * have to enable vblank interrupts while the frame counter
+	 * is still in a working state. However the core vblank code
+	 * does not like us returning non-zero frame counter values
+	 * when we've told it that we don't have a working frame
+	 * counter. Thus we must stop non-zero values leaking out.
+	 */
+	if (!vblank->max_vblank_count)
+		return 0;
+
 	htotal = mode->crtc_htotal;
 	hsync_start = mode->crtc_hsync_start;
 	vbl_start = mode->crtc_vblank_start;
@@ -999,6 +1014,9 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
 	int position;
 	int vbl_start, vbl_end, hsync_start, htotal, vtotal;
 	unsigned long irqflags;
+	bool use_scanline_counter = INTEL_GEN(dev_priv) >= 5 ||
+		IS_G4X(dev_priv) || IS_GEN(dev_priv, 2) ||
+		mode->private_flags & I915_MODE_FLAG_USE_SCANLINE_COUNTER;
 
 	if (WARN_ON(!mode->crtc_clock)) {
 		DRM_DEBUG_DRIVER("trying to get scanoutpos for disabled "
@@ -1031,7 +1049,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
 	if (stime)
 		*stime = ktime_get();
 
-	if (IS_GEN(dev_priv, 2) || IS_G4X(dev_priv) || INTEL_GEN(dev_priv) >= 5) {
+	if (use_scanline_counter) {
 		/* No obvious pixelcount register. Only query vertical
 		 * scanout position from Display scan line register.
 		 */
@@ -1091,7 +1109,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
 	else
 		position += vtotal - vbl_end;
 
-	if (IS_GEN(dev_priv, 2) || IS_G4X(dev_priv) || INTEL_GEN(dev_priv) >= 5) {
+	if (use_scanline_counter) {
 		*vpos = position;
 		*hpos = 0;
 	} else {
@@ -1153,68 +1171,6 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
 	return;
 }
 
-static void notify_ring(struct intel_engine_cs *engine)
-{
-	const u32 seqno = intel_engine_get_seqno(engine);
-	struct i915_request *rq = NULL;
-	struct task_struct *tsk = NULL;
-	struct intel_wait *wait;
-
-	if (unlikely(!engine->breadcrumbs.irq_armed))
-		return;
-
-	rcu_read_lock();
-
-	spin_lock(&engine->breadcrumbs.irq_lock);
-	wait = engine->breadcrumbs.irq_wait;
-	if (wait) {
-		/*
-		 * We use a callback from the dma-fence to submit
-		 * requests after waiting on our own requests. To
-		 * ensure minimum delay in queuing the next request to
-		 * hardware, signal the fence now rather than wait for
-		 * the signaler to be woken up. We still wake up the
-		 * waiter in order to handle the irq-seqno coherency
-		 * issues (we may receive the interrupt before the
-		 * seqno is written, see __i915_request_irq_complete())
-		 * and to handle coalescing of multiple seqno updates
-		 * and many waiters.
-		 */
-		if (i915_seqno_passed(seqno, wait->seqno)) {
-			struct i915_request *waiter = wait->request;
-
-			if (waiter &&
-			    !i915_request_signaled(waiter) &&
-			    intel_wait_check_request(wait, waiter))
-				rq = i915_request_get(waiter);
-
-			tsk = wait->tsk;
-		}
-
-		engine->breadcrumbs.irq_count++;
-	} else {
-		if (engine->breadcrumbs.irq_armed)
-			__intel_engine_disarm_breadcrumbs(engine);
-	}
-	spin_unlock(&engine->breadcrumbs.irq_lock);
-
-	if (rq) {
-		spin_lock(&rq->lock);
-		dma_fence_signal_locked(&rq->fence);
-		GEM_BUG_ON(!i915_request_completed(rq));
-		spin_unlock(&rq->lock);
-
-		i915_request_put(rq);
-	}
-
-	if (tsk && tsk->state & TASK_NORMAL)
-		wake_up_process(tsk);
-
-	rcu_read_unlock();
-
-	trace_intel_engine_notify(engine, wait);
-}
-
 static void vlv_c0_read(struct drm_i915_private *dev_priv,
 			struct intel_rps_ei *ei)
 {
@@ -1459,20 +1415,20 @@ static void ilk_gt_irq_handler(struct drm_i915_private *dev_priv,
 			       u32 gt_iir)
 {
 	if (gt_iir & GT_RENDER_USER_INTERRUPT)
-		notify_ring(dev_priv->engine[RCS]);
+		intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
 	if (gt_iir & ILK_BSD_USER_INTERRUPT)
-		notify_ring(dev_priv->engine[VCS]);
+		intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]);
 }
 
 static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
 			       u32 gt_iir)
 {
 	if (gt_iir & GT_RENDER_USER_INTERRUPT)
-		notify_ring(dev_priv->engine[RCS]);
+		intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
 	if (gt_iir & GT_BSD_USER_INTERRUPT)
-		notify_ring(dev_priv->engine[VCS]);
+		intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]);
 	if (gt_iir & GT_BLT_USER_INTERRUPT)
-		notify_ring(dev_priv->engine[BCS]);
+		intel_engine_breadcrumbs_irq(dev_priv->engine[BCS]);
 
 	if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |
 		      GT_BSD_CS_ERROR_INTERRUPT |
@@ -1492,7 +1448,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 		tasklet = true;
 
 	if (iir & GT_RENDER_USER_INTERRUPT) {
-		notify_ring(engine);
+		intel_engine_breadcrumbs_irq(engine);
 		tasklet |= USES_GUC_SUBMISSION(engine->i915);
 	}
 
@@ -1838,7 +1794,7 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
 
 	if (HAS_VEBOX(dev_priv)) {
 		if (pm_iir & PM_VEBOX_USER_INTERRUPT)
-			notify_ring(dev_priv->engine[VECS]);
+			intel_engine_breadcrumbs_irq(dev_priv->engine[VECS]);
 
 		if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
 			DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
@@ -4262,7 +4218,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
 		I915_WRITE16(IIR, iir);
 
 		if (iir & I915_USER_INTERRUPT)
-			notify_ring(dev_priv->engine[RCS]);
+			intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
 
 		if (iir & I915_MASTER_ERROR_INTERRUPT)
 			i8xx_error_irq_handler(dev_priv, eir, eir_stuck);
@@ -4370,7 +4326,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)
 		I915_WRITE(IIR, iir);
 
 		if (iir & I915_USER_INTERRUPT)
-			notify_ring(dev_priv->engine[RCS]);
+			intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
 
 		if (iir & I915_MASTER_ERROR_INTERRUPT)
 			i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
@@ -4515,10 +4471,10 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
 		I915_WRITE(IIR, iir);
 
 		if (iir & I915_USER_INTERRUPT)
-			notify_ring(dev_priv->engine[RCS]);
+			intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
 
 		if (iir & I915_BSD_USER_INTERRUPT)
-			notify_ring(dev_priv->engine[VCS]);
+			intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]);
 
 		if (iir & I915_MASTER_ERROR_INTERRUPT)
 			i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
@@ -4581,16 +4537,10 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 	if (INTEL_GEN(dev_priv) >= 8)
 		rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
 
-	if (IS_GEN(dev_priv, 2)) {
-		/* Gen2 doesn't have a hardware frame counter */
-		dev->max_vblank_count = 0;
-	} else if (IS_G4X(dev_priv) || INTEL_GEN(dev_priv) >= 5) {
-		dev->max_vblank_count = 0xffffffff; /* full 32 bit counter */
+	if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv))
 		dev->driver->get_vblank_counter = g4x_get_vblank_counter;
-	} else {
+	else if (INTEL_GEN(dev_priv) >= 3)
 		dev->driver->get_vblank_counter = i915_get_vblank_counter;
-		dev->max_vblank_count = 0xffffff; /* only 24 bits of frame count */
-	}
 
 	/*
 	 * Opt out of the vblank disable timer on everything except gen2.
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 9f0539bdaa39..b5be0abbba35 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -97,8 +97,10 @@ i915_param_named_unsafe(disable_power_well, int, 0400,
 
 i915_param_named_unsafe(enable_ips, int, 0600, "Enable IPS (default: true)");
 
-i915_param_named(fastboot, bool, 0600,
-	"Try to skip unnecessary mode sets at boot time (default: false)");
+i915_param_named(fastboot, int, 0600,
+	"Try to skip unnecessary mode sets at boot time "
+	"(0=disabled, 1=enabled) "
+	"Default: -1 (use per-chip default)");
 
 i915_param_named_unsafe(prefault_disable, bool, 0600,
 	"Disable page prefaulting for pread/pwrite/reloc (default:false). "
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 6efcf330bdab..3f14e9881a0d 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -63,10 +63,10 @@ struct drm_printer;
 	param(int, edp_vswing, 0) \
 	param(int, reset, 2) \
 	param(unsigned int, inject_load_failure, 0) \
+	param(int, fastboot, -1) \
 	/* leave bools at the end to not create holes */ \
 	param(bool, alpha_support, IS_ENABLED(CONFIG_DRM_I915_ALPHA_SUPPORT)) \
 	param(bool, enable_hangcheck, true) \
-	param(bool, fastboot, false) \
 	param(bool, prefault_disable, false) \
 	param(bool, load_detect_test, false) \
 	param(bool, force_reset_modeset_test, false) \
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 44c23ac60347..5d05572c9ff4 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -69,9 +69,15 @@
 #define BDW_COLORS \
 	.color = { .degamma_lut_size = 512, .gamma_lut_size = 512 }
 #define CHV_COLORS \
-	.color = { .degamma_lut_size = 65, .gamma_lut_size = 257 }
+	.color = { .degamma_lut_size = 65, .gamma_lut_size = 257, \
+		   .degamma_lut_tests = DRM_COLOR_LUT_NON_DECREASING, \
+		   .gamma_lut_tests = DRM_COLOR_LUT_NON_DECREASING, \
+	}
 #define GLK_COLORS \
-	.color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 }
+	.color = { .degamma_lut_size = 0, .gamma_lut_size = 1024, \
+		   .degamma_lut_tests = DRM_COLOR_LUT_NON_DECREASING | \
+					DRM_COLOR_LUT_EQUAL_CHANNELS, \
+	}
 
 /* Keep in gen based order, and chronological order within a gen */
 
@@ -707,6 +713,7 @@ static const struct pci_device_id pciidlist[] = {
 	INTEL_AML_KBL_GT2_IDS(&intel_kabylake_gt2_info),
 	INTEL_CFL_S_GT1_IDS(&intel_coffeelake_gt1_info),
 	INTEL_CFL_S_GT2_IDS(&intel_coffeelake_gt2_info),
+	INTEL_CFL_H_GT1_IDS(&intel_coffeelake_gt1_info),
 	INTEL_CFL_H_GT2_IDS(&intel_coffeelake_gt2_info),
 	INTEL_CFL_U_GT2_IDS(&intel_coffeelake_gt2_info),
 	INTEL_CFL_U_GT3_IDS(&intel_coffeelake_gt3_info),
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f4e447437d75..ede54fdc1676 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2801,6 +2801,9 @@ enum i915_power_well_id {
 #define GEN6_RCS_PWR_FSM _MMIO(0x22ac)
 #define GEN9_RCS_FE_FSM2 _MMIO(0x22a4)
 
+#define GEN10_CACHE_MODE_SS			_MMIO(0xe420)
+#define   FLOAT_BLEND_OPTIMIZATION_ENABLE	(1 << 4)
+
 /* Fuse readout registers for GT */
 #define HSW_PAVP_FUSE1			_MMIO(0x911C)
 #define   HSW_F1_EU_DIS_SHIFT		16
@@ -4895,6 +4898,7 @@ enum {
 # define TV_OVERSAMPLE_NONE		(2 << 18)
 /* Selects 8x oversampling */
 # define TV_OVERSAMPLE_8X		(3 << 18)
+# define TV_OVERSAMPLE_MASK		(3 << 18)
 /* Selects progressive mode rather than interlaced */
 # define TV_PROGRESSIVE			(1 << 17)
 /* Sets the colorburst to PAL mode.  Required for non-M PAL modes. */
@@ -5709,6 +5713,12 @@ enum {
 #define   PIPEMISC_DITHER_TYPE_SP	(0 << 2)
 #define PIPEMISC(pipe)			_MMIO_PIPE2(pipe, _PIPE_MISC_A)
 
+/* Skylake+ pipe bottom (background) color */
+#define _SKL_BOTTOM_COLOR_A		0x70034
+#define   SKL_BOTTOM_COLOR_GAMMA_ENABLE	(1 << 31)
+#define   SKL_BOTTOM_COLOR_CSC_ENABLE	(1 << 30)
+#define SKL_BOTTOM_COLOR(pipe)		_MMIO_PIPE2(pipe, _SKL_BOTTOM_COLOR_A)
+
 #define VLV_DPFLIPSTAT				_MMIO(VLV_DISPLAY_BASE + 0x70028)
 #define   PIPEB_LINE_COMPARE_INT_EN		(1 << 29)
 #define   PIPEB_HLINE_INT_EN			(1 << 28)
@@ -9553,7 +9563,7 @@ enum skl_power_gate {
 #define _MG_PLL3_ENABLE		0x46038
 #define _MG_PLL4_ENABLE		0x4603C
 /* Bits are the same as DPLL0_ENABLE */
-#define MG_PLL_ENABLE(port)	_MMIO_PORT((port) - PORT_C, _MG_PLL1_ENABLE, \
+#define MG_PLL_ENABLE(tc_port)	_MMIO_PORT((tc_port), _MG_PLL1_ENABLE, \
 					   _MG_PLL2_ENABLE)
 
 #define _MG_REFCLKIN_CTL_PORT1				0x16892C
@@ -9562,9 +9572,9 @@ enum skl_power_gate {
 #define _MG_REFCLKIN_CTL_PORT4				0x16B92C
 #define   MG_REFCLKIN_CTL_OD_2_MUX(x)			((x) << 8)
 #define   MG_REFCLKIN_CTL_OD_2_MUX_MASK			(0x7 << 8)
-#define MG_REFCLKIN_CTL(port) _MMIO_PORT((port) - PORT_C, \
-					 _MG_REFCLKIN_CTL_PORT1, \
-					 _MG_REFCLKIN_CTL_PORT2)
+#define MG_REFCLKIN_CTL(tc_port) _MMIO_PORT((tc_port), \
+					    _MG_REFCLKIN_CTL_PORT1, \
+					    _MG_REFCLKIN_CTL_PORT2)
 
 #define _MG_CLKTOP2_CORECLKCTL1_PORT1			0x1688D8
 #define _MG_CLKTOP2_CORECLKCTL1_PORT2			0x1698D8
@@ -9574,9 +9584,9 @@ enum skl_power_gate {
 #define   MG_CLKTOP2_CORECLKCTL1_B_DIVRATIO_MASK	(0xff << 16)
 #define   MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO(x)		((x) << 8)
 #define   MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK	(0xff << 8)
-#define MG_CLKTOP2_CORECLKCTL1(port) _MMIO_PORT((port) - PORT_C, \
-						_MG_CLKTOP2_CORECLKCTL1_PORT1, \
-						_MG_CLKTOP2_CORECLKCTL1_PORT2)
+#define MG_CLKTOP2_CORECLKCTL1(tc_port) _MMIO_PORT((tc_port), \
+						   _MG_CLKTOP2_CORECLKCTL1_PORT1, \
+						   _MG_CLKTOP2_CORECLKCTL1_PORT2)
 
 #define _MG_CLKTOP2_HSCLKCTL_PORT1			0x1688D4
 #define _MG_CLKTOP2_HSCLKCTL_PORT2			0x1698D4
@@ -9594,9 +9604,9 @@ enum skl_power_gate {
 #define   MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO(x)		((x) << 8)
 #define   MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_SHIFT		8
 #define   MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK		(0xf << 8)
-#define MG_CLKTOP2_HSCLKCTL(port) _MMIO_PORT((port) - PORT_C, \
-					     _MG_CLKTOP2_HSCLKCTL_PORT1, \
-					     _MG_CLKTOP2_HSCLKCTL_PORT2)
+#define MG_CLKTOP2_HSCLKCTL(tc_port) _MMIO_PORT((tc_port), \
+						_MG_CLKTOP2_HSCLKCTL_PORT1, \
+						_MG_CLKTOP2_HSCLKCTL_PORT2)
 
 #define _MG_PLL_DIV0_PORT1				0x168A00
 #define _MG_PLL_DIV0_PORT2				0x169A00
@@ -9608,8 +9618,8 @@ enum skl_power_gate {
 #define   MG_PLL_DIV0_FBDIV_FRAC(x)			((x) << 8)
 #define   MG_PLL_DIV0_FBDIV_INT_MASK			(0xff << 0)
 #define   MG_PLL_DIV0_FBDIV_INT(x)			((x) << 0)
-#define MG_PLL_DIV0(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_DIV0_PORT1, \
-				     _MG_PLL_DIV0_PORT2)
+#define MG_PLL_DIV0(tc_port) _MMIO_PORT((tc_port), _MG_PLL_DIV0_PORT1, \
+					_MG_PLL_DIV0_PORT2)
 
 #define _MG_PLL_DIV1_PORT1				0x168A04
 #define _MG_PLL_DIV1_PORT2				0x169A04
@@ -9623,8 +9633,8 @@ enum skl_power_gate {
 #define   MG_PLL_DIV1_NDIVRATIO(x)			((x) << 4)
 #define   MG_PLL_DIV1_FBPREDIV_MASK			(0xf << 0)
 #define   MG_PLL_DIV1_FBPREDIV(x)			((x) << 0)
-#define MG_PLL_DIV1(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_DIV1_PORT1, \
-				     _MG_PLL_DIV1_PORT2)
+#define MG_PLL_DIV1(tc_port) _MMIO_PORT((tc_port), _MG_PLL_DIV1_PORT1, \
+					_MG_PLL_DIV1_PORT2)
 
 #define _MG_PLL_LF_PORT1				0x168A08
 #define _MG_PLL_LF_PORT2				0x169A08
@@ -9636,8 +9646,8 @@ enum skl_power_gate {
 #define   MG_PLL_LF_GAINCTRL(x)				((x) << 16)
 #define   MG_PLL_LF_INT_COEFF(x)			((x) << 8)
 #define   MG_PLL_LF_PROP_COEFF(x)			((x) << 0)
-#define MG_PLL_LF(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_LF_PORT1, \
-				   _MG_PLL_LF_PORT2)
+#define MG_PLL_LF(tc_port) _MMIO_PORT((tc_port), _MG_PLL_LF_PORT1, \
+				      _MG_PLL_LF_PORT2)
 
 #define _MG_PLL_FRAC_LOCK_PORT1				0x168A0C
 #define _MG_PLL_FRAC_LOCK_PORT2				0x169A0C
@@ -9649,9 +9659,9 @@ enum skl_power_gate {
 #define   MG_PLL_FRAC_LOCK_DCODITHEREN			(1 << 10)
 #define   MG_PLL_FRAC_LOCK_FEEDFWRDCAL_EN		(1 << 8)
 #define   MG_PLL_FRAC_LOCK_FEEDFWRDGAIN(x)		((x) << 0)
-#define MG_PLL_FRAC_LOCK(port) _MMIO_PORT((port) - PORT_C, \
-					  _MG_PLL_FRAC_LOCK_PORT1, \
-					  _MG_PLL_FRAC_LOCK_PORT2)
+#define MG_PLL_FRAC_LOCK(tc_port) _MMIO_PORT((tc_port), \
+					     _MG_PLL_FRAC_LOCK_PORT1, \
+					     _MG_PLL_FRAC_LOCK_PORT2)
 
 #define _MG_PLL_SSC_PORT1				0x168A10
 #define _MG_PLL_SSC_PORT2				0x169A10
@@ -9663,8 +9673,8 @@ enum skl_power_gate {
 #define   MG_PLL_SSC_STEPNUM(x)				((x) << 10)
 #define   MG_PLL_SSC_FLLEN				(1 << 9)
 #define   MG_PLL_SSC_STEPSIZE(x)			((x) << 0)
-#define MG_PLL_SSC(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_SSC_PORT1, \
-				    _MG_PLL_SSC_PORT2)
+#define MG_PLL_SSC(tc_port) _MMIO_PORT((tc_port), _MG_PLL_SSC_PORT1, \
+				       _MG_PLL_SSC_PORT2)
 
 #define _MG_PLL_BIAS_PORT1				0x168A14
 #define _MG_PLL_BIAS_PORT2				0x169A14
@@ -9683,8 +9693,8 @@ enum skl_power_gate {
 #define   MG_PLL_BIAS_VREF_RDAC_MASK			(0x7 << 5)
 #define   MG_PLL_BIAS_IREFTRIM(x)			((x) << 0)
 #define   MG_PLL_BIAS_IREFTRIM_MASK			(0x1f << 0)
-#define MG_PLL_BIAS(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_BIAS_PORT1, \
-				     _MG_PLL_BIAS_PORT2)
+#define MG_PLL_BIAS(tc_port) _MMIO_PORT((tc_port), _MG_PLL_BIAS_PORT1, \
+					_MG_PLL_BIAS_PORT2)
 
 #define _MG_PLL_TDC_COLDST_BIAS_PORT1			0x168A18
 #define _MG_PLL_TDC_COLDST_BIAS_PORT2			0x169A18
@@ -9695,9 +9705,9 @@ enum skl_power_gate {
 #define   MG_PLL_TDC_COLDST_COLDSTART			(1 << 16)
 #define   MG_PLL_TDC_TDCOVCCORR_EN			(1 << 2)
 #define   MG_PLL_TDC_TDCSEL(x)				((x) << 0)
-#define MG_PLL_TDC_COLDST_BIAS(port) _MMIO_PORT((port) - PORT_C, \
-						_MG_PLL_TDC_COLDST_BIAS_PORT1, \
-						_MG_PLL_TDC_COLDST_BIAS_PORT2)
+#define MG_PLL_TDC_COLDST_BIAS(tc_port) _MMIO_PORT((tc_port), \
+						   _MG_PLL_TDC_COLDST_BIAS_PORT1, \
+						   _MG_PLL_TDC_COLDST_BIAS_PORT2)
 
 #define _CNL_DPLL0_CFGCR0		0x6C000
 #define _CNL_DPLL1_CFGCR0		0x6C080
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index f941e40fd373..9ed5baf157a3 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -60,7 +60,7 @@ static bool i915_fence_signaled(struct dma_fence *fence)
 
 static bool i915_fence_enable_signaling(struct dma_fence *fence)
 {
-	return intel_engine_enable_signaling(to_request(fence), true);
+	return i915_request_enable_breadcrumb(to_request(fence));
 }
 
 static signed long i915_fence_wait(struct dma_fence *fence,
@@ -182,10 +182,11 @@ static void free_capture_list(struct i915_request *request)
 static void __retire_engine_request(struct intel_engine_cs *engine,
 				    struct i915_request *rq)
 {
-	GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d\n",
+	GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d:%d\n",
 		  __func__, engine->name,
 		  rq->fence.context, rq->fence.seqno,
 		  rq->global_seqno,
+		  hwsp_seqno(rq),
 		  intel_engine_get_seqno(engine));
 
 	GEM_BUG_ON(!i915_request_completed(rq));
@@ -198,10 +199,11 @@ static void __retire_engine_request(struct intel_engine_cs *engine,
 	spin_unlock(&engine->timeline.lock);
 
 	spin_lock(&rq->lock);
+	i915_request_mark_complete(rq);
 	if (!i915_request_signaled(rq))
 		dma_fence_signal_locked(&rq->fence);
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
-		intel_engine_cancel_signaling(rq);
+		i915_request_cancel_breadcrumb(rq);
 	if (rq->waitboost) {
 		GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
 		atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
@@ -244,10 +246,11 @@ static void i915_request_retire(struct i915_request *request)
 {
 	struct i915_gem_active *active, *next;
 
-	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n",
+	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n",
 		  request->engine->name,
 		  request->fence.context, request->fence.seqno,
 		  request->global_seqno,
+		  hwsp_seqno(request),
 		  intel_engine_get_seqno(request->engine));
 
 	lockdep_assert_held(&request->i915->drm.struct_mutex);
@@ -307,10 +310,11 @@ void i915_request_retire_upto(struct i915_request *rq)
 	struct intel_ring *ring = rq->ring;
 	struct i915_request *tmp;
 
-	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n",
+	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n",
 		  rq->engine->name,
 		  rq->fence.context, rq->fence.seqno,
 		  rq->global_seqno,
+		  hwsp_seqno(rq),
 		  intel_engine_get_seqno(rq->engine));
 
 	lockdep_assert_held(&rq->i915->drm.struct_mutex);
@@ -329,7 +333,7 @@ void i915_request_retire_upto(struct i915_request *rq)
 
 static u32 timeline_get_seqno(struct i915_timeline *tl)
 {
-	return ++tl->seqno;
+	return tl->seqno += 1 + tl->has_initial_breadcrumb;
 }
 
 static void move_to_timeline(struct i915_request *request,
@@ -355,10 +359,11 @@ void __i915_request_submit(struct i915_request *request)
 	struct intel_engine_cs *engine = request->engine;
 	u32 seqno;
 
-	GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d\n",
+	GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d:%d\n",
 		  engine->name,
 		  request->fence.context, request->fence.seqno,
 		  engine->timeline.seqno + 1,
+		  hwsp_seqno(request),
 		  intel_engine_get_seqno(engine));
 
 	GEM_BUG_ON(!irqs_disabled());
@@ -372,20 +377,21 @@ void __i915_request_submit(struct i915_request *request)
 
 	/* We may be recursing from the signal callback of another i915 fence */
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
+	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
+	set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
 	request->global_seqno = seqno;
-	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
-		intel_engine_enable_signaling(request, false);
+	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
+	    !i915_request_enable_breadcrumb(request))
+		intel_engine_queue_breadcrumbs(engine);
 	spin_unlock(&request->lock);
 
-	engine->emit_breadcrumb(request,
-				request->ring->vaddr + request->postfix);
+	engine->emit_fini_breadcrumb(request,
+				     request->ring->vaddr + request->postfix);
 
 	/* Transfer from per-context onto the global per-engine timeline */
 	move_to_timeline(request, &engine->timeline);
 
 	trace_i915_request_execute(request);
-
-	wake_up_all(&request->execute);
 }
 
 void i915_request_submit(struct i915_request *request)
@@ -405,10 +411,11 @@ void __i915_request_unsubmit(struct i915_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
 
-	GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d\n",
+	GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d:%d\n",
 		  engine->name,
 		  request->fence.context, request->fence.seqno,
 		  request->global_seqno,
+		  hwsp_seqno(request),
 		  intel_engine_get_seqno(engine));
 
 	GEM_BUG_ON(!irqs_disabled());
@@ -427,7 +434,9 @@ void __i915_request_unsubmit(struct i915_request *request)
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
 	request->global_seqno = 0;
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
-		intel_engine_cancel_signaling(request);
+		i915_request_cancel_breadcrumb(request);
+	GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
+	clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
 	spin_unlock(&request->lock);
 
 	/* Transfer back from the global per-engine timeline to per-context */
@@ -616,6 +625,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	rq->ring = ce->ring;
 	rq->timeline = ce->ring->timeline;
 	GEM_BUG_ON(rq->timeline == &engine->timeline);
+	rq->hwsp_seqno = rq->timeline->hwsp_seqno;
 
 	spin_lock_init(&rq->lock);
 	dma_fence_init(&rq->fence,
@@ -626,13 +636,11 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 
 	/* We bump the ref for the fence chain */
 	i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
-	init_waitqueue_head(&rq->execute);
 
 	i915_sched_node_init(&rq->sched);
 
 	/* No zalloc, must clear what we need by hand */
 	rq->global_seqno = 0;
-	rq->signaling.wait.seqno = 0;
 	rq->file_priv = NULL;
 	rq->batch = NULL;
 	rq->capture_list = NULL;
@@ -650,7 +658,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 * around inside i915_request_add() there is sufficient space at
 	 * the beginning of the ring as well.
 	 */
-	rq->reserved_space = 2 * engine->emit_breadcrumb_sz * sizeof(u32);
+	rq->reserved_space = 2 * engine->emit_fini_breadcrumb_dw * sizeof(u32);
 
 	/*
 	 * Record the position of the start of the request so that
@@ -901,7 +909,7 @@ void i915_request_add(struct i915_request *request)
 	 * GPU processing the request, we never over-estimate the
 	 * position of the ring's HEAD.
 	 */
-	cs = intel_ring_begin(request, engine->emit_breadcrumb_sz);
+	cs = intel_ring_begin(request, engine->emit_fini_breadcrumb_dw);
 	GEM_BUG_ON(IS_ERR(cs));
 	request->postfix = intel_ring_offset(request, cs);
 
@@ -1023,13 +1031,10 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu)
 	return this_cpu != cpu;
 }
 
-static bool __i915_spin_request(const struct i915_request *rq,
-				u32 seqno, int state, unsigned long timeout_us)
+static bool __i915_spin_request(const struct i915_request * const rq,
+				int state, unsigned long timeout_us)
 {
-	struct intel_engine_cs *engine = rq->engine;
-	unsigned int irq, cpu;
-
-	GEM_BUG_ON(!seqno);
+	unsigned int cpu;
 
 	/*
 	 * Only wait for the request if we know it is likely to complete.
@@ -1037,12 +1042,12 @@ static bool __i915_spin_request(const struct i915_request *rq,
 	 * We don't track the timestamps around requests, nor the average
 	 * request length, so we do not have a good indicator that this
 	 * request will complete within the timeout. What we do know is the
-	 * order in which requests are executed by the engine and so we can
-	 * tell if the request has started. If the request hasn't started yet,
-	 * it is a fair assumption that it will not complete within our
-	 * relatively short timeout.
+	 * order in which requests are executed by the context and so we can
+	 * tell if the request has been started. If the request is not even
+	 * running yet, it is a fair assumption that it will not complete
+	 * within our relatively short timeout.
 	 */
-	if (!intel_engine_has_started(engine, seqno))
+	if (!i915_request_is_running(rq))
 		return false;
 
 	/*
@@ -1056,20 +1061,10 @@ static bool __i915_spin_request(const struct i915_request *rq,
 	 * takes to sleep on a request, on the order of a microsecond.
 	 */
 
-	irq = READ_ONCE(engine->breadcrumbs.irq_count);
 	timeout_us += local_clock_us(&cpu);
 	do {
-		if (intel_engine_has_completed(engine, seqno))
-			return seqno == i915_request_global_seqno(rq);
-
-		/*
-		 * Seqno are meant to be ordered *before* the interrupt. If
-		 * we see an interrupt without a corresponding seqno advance,
-		 * assume we won't see one in the near future but require
-		 * the engine->seqno_barrier() to fixup coherency.
-		 */
-		if (READ_ONCE(engine->breadcrumbs.irq_count) != irq)
-			break;
+		if (i915_request_completed(rq))
+			return true;
 
 		if (signal_pending_state(state, current))
 			break;
@@ -1083,16 +1078,16 @@ static bool __i915_spin_request(const struct i915_request *rq,
 	return false;
 }
 
-static bool __i915_wait_request_check_and_reset(struct i915_request *request)
-{
-	struct i915_gpu_error *error = &request->i915->gpu_error;
+struct request_wait {
+	struct dma_fence_cb cb;
+	struct task_struct *tsk;
+};
 
-	if (likely(!i915_reset_handoff(error)))
-		return false;
+static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+	struct request_wait *wait = container_of(cb, typeof(*wait), cb);
 
-	__set_current_state(TASK_RUNNING);
-	i915_reset(request->i915, error->stalled_mask, error->reason);
-	return true;
+	wake_up_process(wait->tsk);
 }
 
 /**
@@ -1120,17 +1115,9 @@ long i915_request_wait(struct i915_request *rq,
 {
 	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
 		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
-	wait_queue_head_t *errq = &rq->i915->gpu_error.wait_queue;
-	DEFINE_WAIT_FUNC(reset, default_wake_function);
-	DEFINE_WAIT_FUNC(exec, default_wake_function);
-	struct intel_wait wait;
+	struct request_wait wait;
 
 	might_sleep();
-#if IS_ENABLED(CONFIG_LOCKDEP)
-	GEM_BUG_ON(debug_locks &&
-		   !!lockdep_is_held(&rq->i915->drm.struct_mutex) !=
-		   !!(flags & I915_WAIT_LOCKED));
-#endif
 	GEM_BUG_ON(timeout < 0);
 
 	if (i915_request_completed(rq))
@@ -1141,57 +1128,23 @@ long i915_request_wait(struct i915_request *rq,
 
 	trace_i915_request_wait_begin(rq, flags);
 
-	add_wait_queue(&rq->execute, &exec);
-	if (flags & I915_WAIT_LOCKED)
-		add_wait_queue(errq, &reset);
+	/* Optimistic short spin before touching IRQs */
+	if (__i915_spin_request(rq, state, 5))
+		goto out;
 
-	intel_wait_init(&wait);
 	if (flags & I915_WAIT_PRIORITY)
 		i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
 
-restart:
-	do {
-		set_current_state(state);
-		if (intel_wait_update_request(&wait, rq))
-			break;
-
-		if (flags & I915_WAIT_LOCKED &&
-		    __i915_wait_request_check_and_reset(rq))
-			continue;
-
-		if (signal_pending_state(state, current)) {
-			timeout = -ERESTARTSYS;
-			goto complete;
-		}
-
-		if (!timeout) {
-			timeout = -ETIME;
-			goto complete;
-		}
-
-		timeout = io_schedule_timeout(timeout);
-	} while (1);
-
-	GEM_BUG_ON(!intel_wait_has_seqno(&wait));
-	GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
-
-	/* Optimistic short spin before touching IRQs */
-	if (__i915_spin_request(rq, wait.seqno, state, 5))
-		goto complete;
+	wait.tsk = current;
+	if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake))
+		goto out;
 
-	set_current_state(state);
-	if (intel_engine_add_wait(rq->engine, &wait))
-		/*
-		 * In order to check that we haven't missed the interrupt
-		 * as we enabled it, we need to kick ourselves to do a
-		 * coherent check on the seqno before we sleep.
-		 */
-		goto wakeup;
+	for (;;) {
+		set_current_state(state);
 
-	if (flags & I915_WAIT_LOCKED)
-		__i915_wait_request_check_and_reset(rq);
+		if (i915_request_completed(rq))
+			break;
 
-	for (;;) {
 		if (signal_pending_state(state, current)) {
 			timeout = -ERESTARTSYS;
 			break;
@@ -1203,50 +1156,13 @@ restart:
 		}
 
 		timeout = io_schedule_timeout(timeout);
-
-		if (intel_wait_complete(&wait) &&
-		    intel_wait_check_request(&wait, rq))
-			break;
-
-		set_current_state(state);
-
-wakeup:
-		if (i915_request_completed(rq))
-			break;
-
-		/*
-		 * If the GPU is hung, and we hold the lock, reset the GPU
-		 * and then check for completion. On a full reset, the engine's
-		 * HW seqno will be advanced passed us and we are complete.
-		 * If we do a partial reset, we have to wait for the GPU to
-		 * resume and update the breadcrumb.
-		 *
-		 * If we don't hold the mutex, we can just wait for the worker
-		 * to come along and update the breadcrumb (either directly
-		 * itself, or indirectly by recovering the GPU).
-		 */
-		if (flags & I915_WAIT_LOCKED &&
-		    __i915_wait_request_check_and_reset(rq))
-			continue;
-
-		/* Only spin if we know the GPU is processing this request */
-		if (__i915_spin_request(rq, wait.seqno, state, 2))
-			break;
-
-		if (!intel_wait_check_request(&wait, rq)) {
-			intel_engine_remove_wait(rq->engine, &wait);
-			goto restart;
-		}
 	}
-
-	intel_engine_remove_wait(rq->engine, &wait);
-complete:
 	__set_current_state(TASK_RUNNING);
-	if (flags & I915_WAIT_LOCKED)
-		remove_wait_queue(errq, &reset);
-	remove_wait_queue(&rq->execute, &exec);
-	trace_i915_request_wait_end(rq);
 
+	dma_fence_remove_callback(&rq->fence, &wait.cb);
+
+out:
+	trace_i915_request_wait_end(rq);
 	return timeout;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index c0f084ca4f29..3cffb96203b9 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -38,23 +38,34 @@ struct drm_i915_gem_object;
 struct i915_request;
 struct i915_timeline;
 
-struct intel_wait {
-	struct rb_node node;
-	struct task_struct *tsk;
-	struct i915_request *request;
-	u32 seqno;
-};
-
-struct intel_signal_node {
-	struct intel_wait wait;
-	struct list_head link;
-};
-
 struct i915_capture_list {
 	struct i915_capture_list *next;
 	struct i915_vma *vma;
 };
 
+enum {
+	/*
+	 * I915_FENCE_FLAG_ACTIVE - this request is currently submitted to HW.
+	 *
+	 * Set by __i915_request_submit() on handing over to HW, and cleared
+	 * by __i915_request_unsubmit() if we preempt this request.
+	 *
+	 * Finally cleared for consistency on retiring the request, when
+	 * we know the HW is no longer running this request.
+	 *
+	 * See i915_request_is_active()
+	 */
+	I915_FENCE_FLAG_ACTIVE = DMA_FENCE_FLAG_USER_BITS,
+
+	/*
+	 * I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list
+	 *
+	 * Internal bookkeeping used by the breadcrumb code to track when
+	 * a request is on the various signal_list.
+	 */
+	I915_FENCE_FLAG_SIGNAL,
+};
+
 /**
  * Request queue structure.
  *
@@ -97,7 +108,7 @@ struct i915_request {
 	struct intel_context *hw_context;
 	struct intel_ring *ring;
 	struct i915_timeline *timeline;
-	struct intel_signal_node signaling;
+	struct list_head signal_link;
 
 	/*
 	 * The rcu epoch of when this request was allocated. Used to judiciously
@@ -116,7 +127,6 @@ struct i915_request {
 	 */
 	struct i915_sw_fence submit;
 	wait_queue_entry_t submitq;
-	wait_queue_head_t execute;
 
 	/*
 	 * A list of everyone we wait upon, and everyone who waits upon us.
@@ -130,6 +140,13 @@ struct i915_request {
 	struct i915_sched_node sched;
 	struct i915_dependency dep;
 
+	/*
+	 * A convenience pointer to the current breadcrumb value stored in
+	 * the HW status page (or our timeline's local equivalent). The full
+	 * path would be rq->hw_context->ring->timeline->hwsp_seqno.
+	 */
+	const u32 *hwsp_seqno;
+
 	/**
 	 * GEM sequence number associated with this request on the
 	 * global execution timeline. It is zero when the request is not
@@ -248,7 +265,7 @@ i915_request_put(struct i915_request *rq)
  * that it has passed the global seqno and the global seqno is unchanged
  * after the read, it is indeed complete).
  */
-static u32
+static inline u32
 i915_request_global_seqno(const struct i915_request *request)
 {
 	return READ_ONCE(request->global_seqno);
@@ -270,6 +287,10 @@ void i915_request_skip(struct i915_request *request, int error);
 void __i915_request_unsubmit(struct i915_request *request);
 void i915_request_unsubmit(struct i915_request *request);
 
+/* Note: part of the intel_breadcrumbs family */
+bool i915_request_enable_breadcrumb(struct i915_request *request);
+void i915_request_cancel_breadcrumb(struct i915_request *request);
+
 long i915_request_wait(struct i915_request *rq,
 		       unsigned int flags,
 		       long timeout)
@@ -282,13 +303,14 @@ long i915_request_wait(struct i915_request *rq,
 
 static inline bool i915_request_signaled(const struct i915_request *rq)
 {
+	/* The request may live longer than its HWSP, so check flags first! */
 	return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags);
 }
 
-static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
-					    u32 seqno);
-static inline bool intel_engine_has_completed(struct intel_engine_cs *engine,
-					      u32 seqno);
+static inline bool i915_request_is_active(const struct i915_request *rq)
+{
+	return test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
+}
 
 /**
  * Returns true if seq1 is later than seq2.
@@ -298,6 +320,40 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
 	return (s32)(seq1 - seq2) >= 0;
 }
 
+static inline u32 __hwsp_seqno(const struct i915_request *rq)
+{
+	return READ_ONCE(*rq->hwsp_seqno);
+}
+
+/**
+ * hwsp_seqno - the current breadcrumb value in the HW status page
+ * @rq: the request, to chase the relevant HW status page
+ *
+ * The emphasis in naming here is that hwsp_seqno() is not a property of the
+ * request, but an indication of the current HW state (associated with this
+ * request). Its value will change as the GPU executes more requests.
+ *
+ * Returns the current breadcrumb value in the associated HW status page (or
+ * the local timeline's equivalent) for this request. The request itself
+ * has the associated breadcrumb value of rq->fence.seqno, when the HW
+ * status page has that breadcrumb or later, this request is complete.
+ */
+static inline u32 hwsp_seqno(const struct i915_request *rq)
+{
+	u32 seqno;
+
+	rcu_read_lock(); /* the HWSP may be freed at runtime */
+	seqno = __hwsp_seqno(rq);
+	rcu_read_unlock();
+
+	return seqno;
+}
+
+static inline bool __i915_request_has_started(const struct i915_request *rq)
+{
+	return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1);
+}
+
 /**
  * i915_request_started - check if the request has begun being executed
  * @rq: the request
@@ -309,32 +365,40 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
  */
 static inline bool i915_request_started(const struct i915_request *rq)
 {
-	u32 seqno;
+	if (i915_request_signaled(rq))
+		return true;
 
-	seqno = i915_request_global_seqno(rq);
-	if (!seqno) /* not yet submitted to HW */
-		return false;
-
-	return intel_engine_has_started(rq->engine, seqno);
+	/* Remember: started but may have since been preempted! */
+	return __i915_request_has_started(rq);
 }
 
-static inline bool
-__i915_request_completed(const struct i915_request *rq, u32 seqno)
+/**
+ * i915_request_is_running - check if the request may actually be executing
+ * @rq: the request
+ *
+ * Returns true if the request is currently submitted to hardware, has passed
+ * its start point (i.e. the context is setup and not busywaiting). Note that
+ * it may no longer be running by the time the function returns!
+ */
+static inline bool i915_request_is_running(const struct i915_request *rq)
 {
-	GEM_BUG_ON(!seqno);
-	return intel_engine_has_completed(rq->engine, seqno) &&
-		seqno == i915_request_global_seqno(rq);
+	if (!i915_request_is_active(rq))
+		return false;
+
+	return __i915_request_has_started(rq);
 }
 
 static inline bool i915_request_completed(const struct i915_request *rq)
 {
-	u32 seqno;
+	if (i915_request_signaled(rq))
+		return true;
 
-	seqno = i915_request_global_seqno(rq);
-	if (!seqno)
-		return false;
+	return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno);
+}
 
-	return __i915_request_completed(rq, seqno);
+static inline void i915_request_mark_complete(struct i915_request *rq)
+{
+	rq->hwsp_seqno = (u32 *)&rq->fence.seqno; /* decouple from HWSP */
 }
 
 void i915_retire_requests(struct drm_i915_private *i915);
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index 342d9ee42601..4462007a681c 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -5,6 +5,7 @@
  */
 
 #include <linux/sched/mm.h>
+#include <linux/stop_machine.h>
 
 #include "i915_drv.h"
 #include "i915_gpu_error.h"
@@ -12,27 +13,33 @@
 
 #include "intel_guc.h"
 
+#define RESET_MAX_RETRIES 3
+
+/* XXX How to handle concurrent GGTT updates using tiling registers? */
+#define RESET_UNDER_STOP_MACHINE 0
+
 static void engine_skip_context(struct i915_request *rq)
 {
 	struct intel_engine_cs *engine = rq->engine;
 	struct i915_gem_context *hung_ctx = rq->gem_context;
 	struct i915_timeline *timeline = rq->timeline;
-	unsigned long flags;
 
+	lockdep_assert_held(&engine->timeline.lock);
 	GEM_BUG_ON(timeline == &engine->timeline);
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
 	spin_lock(&timeline->lock);
 
-	list_for_each_entry_continue(rq, &engine->timeline.requests, link)
-		if (rq->gem_context == hung_ctx)
-			i915_request_skip(rq, -EIO);
+	if (i915_request_is_active(rq)) {
+		list_for_each_entry_continue(rq,
+					     &engine->timeline.requests, link)
+			if (rq->gem_context == hung_ctx)
+				i915_request_skip(rq, -EIO);
+	}
 
 	list_for_each_entry(rq, &timeline->requests, link)
 		i915_request_skip(rq, -EIO);
 
 	spin_unlock(&timeline->lock);
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
 static void client_mark_guilty(struct drm_i915_file_private *file_priv,
@@ -59,7 +66,7 @@ static void client_mark_guilty(struct drm_i915_file_private *file_priv,
 	}
 }
 
-static void context_mark_guilty(struct i915_gem_context *ctx)
+static bool context_mark_guilty(struct i915_gem_context *ctx)
 {
 	unsigned int score;
 	bool banned, bannable;
@@ -72,7 +79,7 @@ static void context_mark_guilty(struct i915_gem_context *ctx)
 
 	/* Cool contexts don't accumulate client ban score */
 	if (!bannable)
-		return;
+		return false;
 
 	if (banned) {
 		DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n",
@@ -83,6 +90,8 @@ static void context_mark_guilty(struct i915_gem_context *ctx)
 
 	if (!IS_ERR_OR_NULL(ctx->file_priv))
 		client_mark_guilty(ctx->file_priv, ctx);
+
+	return banned;
 }
 
 static void context_mark_innocent(struct i915_gem_context *ctx)
@@ -90,6 +99,21 @@ static void context_mark_innocent(struct i915_gem_context *ctx)
 	atomic_inc(&ctx->active_count);
 }
 
+void i915_reset_request(struct i915_request *rq, bool guilty)
+{
+	lockdep_assert_held(&rq->engine->timeline.lock);
+	GEM_BUG_ON(i915_request_completed(rq));
+
+	if (guilty) {
+		i915_request_skip(rq, -EIO);
+		if (context_mark_guilty(rq->gem_context))
+			engine_skip_context(rq);
+	} else {
+		dma_fence_set_error(&rq->fence, -EAGAIN);
+		context_mark_innocent(rq->gem_context);
+	}
+}
+
 static void gen3_stop_engine(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
@@ -144,14 +168,14 @@ static int i915_do_reset(struct drm_i915_private *i915,
 
 	/* Assert reset for at least 20 usec, and wait for acknowledgement. */
 	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
-	usleep_range(50, 200);
-	err = wait_for(i915_in_reset(pdev), 500);
+	udelay(50);
+	err = wait_for_atomic(i915_in_reset(pdev), 50);
 
 	/* Clear the reset request. */
 	pci_write_config_byte(pdev, I915_GDRST, 0);
-	usleep_range(50, 200);
+	udelay(50);
 	if (!err)
-		err = wait_for(!i915_in_reset(pdev), 500);
+		err = wait_for_atomic(!i915_in_reset(pdev), 50);
 
 	return err;
 }
@@ -171,7 +195,7 @@ static int g33_do_reset(struct drm_i915_private *i915,
 	struct pci_dev *pdev = i915->drm.pdev;
 
 	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
-	return wait_for(g4x_reset_complete(pdev), 500);
+	return wait_for_atomic(g4x_reset_complete(pdev), 50);
 }
 
 static int g4x_do_reset(struct drm_i915_private *dev_priv,
@@ -182,13 +206,13 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv,
 	int ret;
 
 	/* WaVcpClkGateDisableForMediaReset:ctg,elk */
-	I915_WRITE(VDECCLK_GATE_D,
-		   I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE);
-	POSTING_READ(VDECCLK_GATE_D);
+	I915_WRITE_FW(VDECCLK_GATE_D,
+		      I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE);
+	POSTING_READ_FW(VDECCLK_GATE_D);
 
 	pci_write_config_byte(pdev, I915_GDRST,
 			      GRDOM_MEDIA | GRDOM_RESET_ENABLE);
-	ret =  wait_for(g4x_reset_complete(pdev), 500);
+	ret =  wait_for_atomic(g4x_reset_complete(pdev), 50);
 	if (ret) {
 		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
 		goto out;
@@ -196,7 +220,7 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv,
 
 	pci_write_config_byte(pdev, I915_GDRST,
 			      GRDOM_RENDER | GRDOM_RESET_ENABLE);
-	ret =  wait_for(g4x_reset_complete(pdev), 500);
+	ret =  wait_for_atomic(g4x_reset_complete(pdev), 50);
 	if (ret) {
 		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
 		goto out;
@@ -205,9 +229,9 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv,
 out:
 	pci_write_config_byte(pdev, I915_GDRST, 0);
 
-	I915_WRITE(VDECCLK_GATE_D,
-		   I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE);
-	POSTING_READ(VDECCLK_GATE_D);
+	I915_WRITE_FW(VDECCLK_GATE_D,
+		      I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE);
+	POSTING_READ_FW(VDECCLK_GATE_D);
 
 	return ret;
 }
@@ -218,27 +242,29 @@ static int ironlake_do_reset(struct drm_i915_private *dev_priv,
 {
 	int ret;
 
-	I915_WRITE(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
-	ret = intel_wait_for_register(dev_priv,
-				      ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0,
-				      500);
+	I915_WRITE_FW(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
+	ret = __intel_wait_for_register_fw(dev_priv, ILK_GDSR,
+					   ILK_GRDOM_RESET_ENABLE, 0,
+					   5000, 0,
+					   NULL);
 	if (ret) {
 		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
 		goto out;
 	}
 
-	I915_WRITE(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
-	ret = intel_wait_for_register(dev_priv,
-				      ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0,
-				      500);
+	I915_WRITE_FW(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
+	ret = __intel_wait_for_register_fw(dev_priv, ILK_GDSR,
+					   ILK_GRDOM_RESET_ENABLE, 0,
+					   5000, 0,
+					   NULL);
 	if (ret) {
 		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
 		goto out;
 	}
 
 out:
-	I915_WRITE(ILK_GDSR, 0);
-	POSTING_READ(ILK_GDSR);
+	I915_WRITE_FW(ILK_GDSR, 0);
+	POSTING_READ_FW(ILK_GDSR);
 	return ret;
 }
 
@@ -527,32 +553,21 @@ static reset_func intel_get_gpu_reset(struct drm_i915_private *i915)
 
 int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask)
 {
-	reset_func reset = intel_get_gpu_reset(i915);
+	const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1;
+	reset_func reset;
+	int ret = -ETIMEDOUT;
 	int retry;
-	int ret;
 
-	/*
-	 * We want to perform per-engine reset from atomic context (e.g.
-	 * softirq), which imposes the constraint that we cannot sleep.
-	 * However, experience suggests that spending a bit of time waiting
-	 * for a reset helps in various cases, so for a full-device reset
-	 * we apply the opposite rule and wait if we want to. As we should
-	 * always follow up a failed per-engine reset with a full device reset,
-	 * being a little faster, stricter and more error prone for the
-	 * atomic case seems an acceptable compromise.
-	 *
-	 * Unfortunately this leads to a bimodal routine, when the goal was
-	 * to have a single reset function that worked for resetting any
-	 * number of engines simultaneously.
-	 */
-	might_sleep_if(engine_mask == ALL_ENGINES);
+	reset = intel_get_gpu_reset(i915);
+	if (!reset)
+		return -ENODEV;
 
 	/*
 	 * If the power well sleeps during the reset, the reset
 	 * request may be dropped and never completes (causing -EIO).
 	 */
 	intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
-	for (retry = 0; retry < 3; retry++) {
+	for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) {
 		/*
 		 * We stop engines, otherwise we might get failed reset and a
 		 * dead gpu (on elk). Also as modern gpu as kbl can suffer
@@ -569,15 +584,10 @@ int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask)
 		 */
 		i915_stop_engines(i915, engine_mask);
 
-		ret = -ENODEV;
-		if (reset) {
-			GEM_TRACE("engine_mask=%x\n", engine_mask);
-			ret = reset(i915, engine_mask, retry);
-		}
-		if (ret != -ETIMEDOUT || engine_mask != ALL_ENGINES)
-			break;
-
-		cond_resched();
+		GEM_TRACE("engine_mask=%x\n", engine_mask);
+		preempt_disable();
+		ret = reset(i915, engine_mask, retry);
+		preempt_enable();
 	}
 	intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
 
@@ -586,6 +596,9 @@ int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask)
 
 bool intel_has_gpu_reset(struct drm_i915_private *i915)
 {
+	if (USES_GUC(i915))
+		return false;
+
 	return intel_get_gpu_reset(i915);
 }
 
@@ -613,11 +626,8 @@ int intel_reset_guc(struct drm_i915_private *i915)
  * Ensure irq handler finishes, and not run again.
  * Also return the active request so that we only search for it once.
  */
-static struct i915_request *
-reset_prepare_engine(struct intel_engine_cs *engine)
+static void reset_prepare_engine(struct intel_engine_cs *engine)
 {
-	struct i915_request *rq;
-
 	/*
 	 * During the reset sequence, we must prevent the engine from
 	 * entering RC6. As the context state is undefined until we restart
@@ -626,162 +636,85 @@ reset_prepare_engine(struct intel_engine_cs *engine)
 	 * GPU state upon resume, i.e. fail to restart after a reset.
 	 */
 	intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL);
-
-	rq = engine->reset.prepare(engine);
-	if (rq && rq->fence.error == -EIO)
-		rq = ERR_PTR(-EIO); /* Previous reset failed! */
-
-	return rq;
+	engine->reset.prepare(engine);
 }
 
-static int reset_prepare(struct drm_i915_private *i915)
+static void reset_prepare(struct drm_i915_private *i915)
 {
 	struct intel_engine_cs *engine;
-	struct i915_request *rq;
 	enum intel_engine_id id;
-	int err = 0;
-
-	for_each_engine(engine, i915, id) {
-		rq = reset_prepare_engine(engine);
-		if (IS_ERR(rq)) {
-			err = PTR_ERR(rq);
-			continue;
-		}
 
-		engine->hangcheck.active_request = rq;
-	}
+	for_each_engine(engine, i915, id)
+		reset_prepare_engine(engine);
 
-	i915_gem_revoke_fences(i915);
 	intel_uc_sanitize(i915);
-
-	return err;
 }
 
-/* Returns the request if it was guilty of the hang */
-static struct i915_request *
-reset_request(struct intel_engine_cs *engine,
-	      struct i915_request *rq,
-	      bool stalled)
+static int gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
 {
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err;
+
 	/*
-	 * The guilty request will get skipped on a hung engine.
-	 *
-	 * Users of client default contexts do not rely on logical
-	 * state preserved between batches so it is safe to execute
-	 * queued requests following the hang. Non default contexts
-	 * rely on preserved state, so skipping a batch loses the
-	 * evolution of the state and it needs to be considered corrupted.
-	 * Executing more queued batches on top of corrupted state is
-	 * risky. But we take the risk by trying to advance through
-	 * the queued requests in order to make the client behaviour
-	 * more predictable around resets, by not throwing away random
-	 * amount of batches it has prepared for execution. Sophisticated
-	 * clients can use gem_reset_stats_ioctl and dma fence status
-	 * (exported via sync_file info ioctl on explicit fences) to observe
-	 * when it loses the context state and should rebuild accordingly.
-	 *
-	 * The context ban, and ultimately the client ban, mechanism are safety
-	 * valves if client submission ends up resulting in nothing more than
-	 * subsequent hangs.
+	 * Everything depends on having the GTT running, so we need to start
+	 * there.
 	 */
+	err = i915_ggtt_enable_hw(i915);
+	if (err)
+		return err;
 
-	if (i915_request_completed(rq)) {
-		GEM_TRACE("%s pardoned global=%d (fence %llx:%lld), current %d\n",
-			  engine->name, rq->global_seqno,
-			  rq->fence.context, rq->fence.seqno,
-			  intel_engine_get_seqno(engine));
-		stalled = false;
-	}
-
-	if (stalled) {
-		context_mark_guilty(rq->gem_context);
-		i915_request_skip(rq, -EIO);
+	for_each_engine(engine, i915, id)
+		intel_engine_reset(engine, stalled_mask & ENGINE_MASK(id));
 
-		/* If this context is now banned, skip all pending requests. */
-		if (i915_gem_context_is_banned(rq->gem_context))
-			engine_skip_context(rq);
-	} else {
-		/*
-		 * Since this is not the hung engine, it may have advanced
-		 * since the hang declaration. Double check by refinding
-		 * the active request at the time of the reset.
-		 */
-		rq = i915_gem_find_active_request(engine);
-		if (rq) {
-			unsigned long flags;
-
-			context_mark_innocent(rq->gem_context);
-			dma_fence_set_error(&rq->fence, -EAGAIN);
-
-			/* Rewind the engine to replay the incomplete rq */
-			spin_lock_irqsave(&engine->timeline.lock, flags);
-			rq = list_prev_entry(rq, link);
-			if (&rq->link == &engine->timeline.requests)
-				rq = NULL;
-			spin_unlock_irqrestore(&engine->timeline.lock, flags);
-		}
-	}
+	i915_gem_restore_fences(i915);
 
-	return rq;
+	return err;
 }
 
-static void reset_engine(struct intel_engine_cs *engine,
-			 struct i915_request *rq,
-			 bool stalled)
+static void reset_finish_engine(struct intel_engine_cs *engine)
 {
-	if (rq)
-		rq = reset_request(engine, rq, stalled);
-
-	/* Setup the CS to resume from the breadcrumb of the hung request */
-	engine->reset.reset(engine, rq);
+	engine->reset.finish(engine);
+	intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
 }
 
-static void gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
+struct i915_gpu_restart {
+	struct work_struct work;
+	struct drm_i915_private *i915;
+};
+
+static void restart_work(struct work_struct *work)
 {
+	struct i915_gpu_restart *arg = container_of(work, typeof(*arg), work);
+	struct drm_i915_private *i915 = arg->i915;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
-	i915_retire_requests(i915);
+	wakeref = intel_runtime_pm_get(i915);
+	mutex_lock(&i915->drm.struct_mutex);
+	WRITE_ONCE(i915->gpu_error.restart, NULL);
 
 	for_each_engine(engine, i915, id) {
-		struct intel_context *ce;
-
-		reset_engine(engine,
-			     engine->hangcheck.active_request,
-			     stalled_mask & ENGINE_MASK(id));
-		ce = fetch_and_zero(&engine->last_retired_context);
-		if (ce)
-			intel_context_unpin(ce);
+		struct i915_request *rq;
 
 		/*
 		 * Ostensibily, we always want a context loaded for powersaving,
 		 * so if the engine is idle after the reset, send a request
 		 * to load our scratch kernel_context.
-		 *
-		 * More mysteriously, if we leave the engine idle after a reset,
-		 * the next userspace batch may hang, with what appears to be
-		 * an incoherent read by the CS (presumably stale TLB). An
-		 * empty request appears sufficient to paper over the glitch.
 		 */
-		if (intel_engine_is_idle(engine)) {
-			struct i915_request *rq;
+		if (!intel_engine_is_idle(engine))
+			continue;
 
-			rq = i915_request_alloc(engine, i915->kernel_context);
-			if (!IS_ERR(rq))
-				i915_request_add(rq);
-		}
+		rq = i915_request_alloc(engine, i915->kernel_context);
+		if (!IS_ERR(rq))
+			i915_request_add(rq);
 	}
 
-	i915_gem_restore_fences(i915);
-}
-
-static void reset_finish_engine(struct intel_engine_cs *engine)
-{
-	engine->reset.finish(engine);
+	mutex_unlock(&i915->drm.struct_mutex);
+	intel_runtime_pm_put(i915, wakeref);
 
-	intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
+	kfree(arg);
 }
 
 static void reset_finish(struct drm_i915_private *i915)
@@ -789,27 +722,49 @@ static void reset_finish(struct drm_i915_private *i915)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
-	for_each_engine(engine, i915, id) {
-		engine->hangcheck.active_request = NULL;
+	for_each_engine(engine, i915, id)
 		reset_finish_engine(engine);
+}
+
+static void reset_restart(struct drm_i915_private *i915)
+{
+	struct i915_gpu_restart *arg;
+
+	/*
+	 * Following the reset, ensure that we always reload context for
+	 * powersaving, and to correct engine->last_retired_context. Since
+	 * this requires us to submit a request, queue a worker to do that
+	 * task for us to evade any locking here.
+	 */
+	if (READ_ONCE(i915->gpu_error.restart))
+		return;
+
+	arg = kmalloc(sizeof(*arg), GFP_KERNEL);
+	if (arg) {
+		arg->i915 = i915;
+		INIT_WORK(&arg->work, restart_work);
+
+		WRITE_ONCE(i915->gpu_error.restart, arg);
+		queue_work(i915->wq, &arg->work);
 	}
 }
 
 static void nop_submit_request(struct i915_request *request)
 {
+	struct intel_engine_cs *engine = request->engine;
 	unsigned long flags;
 
 	GEM_TRACE("%s fence %llx:%lld -> -EIO\n",
-		  request->engine->name,
-		  request->fence.context, request->fence.seqno);
+		  engine->name, request->fence.context, request->fence.seqno);
 	dma_fence_set_error(&request->fence, -EIO);
 
-	spin_lock_irqsave(&request->engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->timeline.lock, flags);
 	__i915_request_submit(request);
-	intel_engine_write_global_seqno(request->engine, request->global_seqno);
-	spin_unlock_irqrestore(&request->engine->timeline.lock, flags);
+	i915_request_mark_complete(request);
+	intel_engine_write_global_seqno(engine, request->global_seqno);
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+
+	intel_engine_queue_breadcrumbs(engine);
 }
 
 void i915_gem_set_wedged(struct drm_i915_private *i915)
@@ -864,7 +819,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
 
 	for_each_engine(engine, i915, id) {
 		reset_finish_engine(engine);
-		intel_engine_wakeup(engine);
+		intel_engine_signal_breadcrumbs(engine);
 	}
 
 	smp_mb__before_atomic();
@@ -882,8 +837,6 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 	struct i915_timeline *tl;
 	bool ret = false;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
 	if (!test_bit(I915_WEDGED, &error->flags))
 		return true;
 
@@ -904,11 +857,12 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 	 *
 	 * No more can be submitted until we reset the wedged bit.
 	 */
-	list_for_each_entry(tl, &i915->gt.timelines, link) {
+	mutex_lock(&i915->gt.timelines.mutex);
+	list_for_each_entry(tl, &i915->gt.timelines.active_list, link) {
 		struct i915_request *rq;
+		long timeout;
 
-		rq = i915_gem_active_peek(&tl->last_request,
-					  &i915->drm.struct_mutex);
+		rq = i915_gem_active_get_unlocked(&tl->last_request);
 		if (!rq)
 			continue;
 
@@ -923,12 +877,15 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 		 * and when the seqno passes the fence, the signaler
 		 * then signals the fence waking us up).
 		 */
-		if (dma_fence_default_wait(&rq->fence, true,
-					   MAX_SCHEDULE_TIMEOUT) < 0)
+		timeout = dma_fence_default_wait(&rq->fence, true,
+						 MAX_SCHEDULE_TIMEOUT);
+		i915_request_put(rq);
+		if (timeout < 0) {
+			mutex_unlock(&i915->gt.timelines.mutex);
 			goto unlock;
+		}
 	}
-	i915_retire_requests(i915);
-	GEM_BUG_ON(i915->gt.active_requests);
+	mutex_unlock(&i915->gt.timelines.mutex);
 
 	intel_engines_sanitize(i915, false);
 
@@ -942,7 +899,6 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 	 * context and do not require stop_machine().
 	 */
 	intel_engines_reset_default_submission(i915);
-	i915_gem_contexts_lost(i915);
 
 	GEM_TRACE("end\n");
 
@@ -955,6 +911,52 @@ unlock:
 	return ret;
 }
 
+struct __i915_reset {
+	struct drm_i915_private *i915;
+	unsigned int stalled_mask;
+};
+
+static int __i915_reset__BKL(void *data)
+{
+	struct __i915_reset *arg = data;
+	int err;
+
+	err = intel_gpu_reset(arg->i915, ALL_ENGINES);
+	if (err)
+		return err;
+
+	return gt_reset(arg->i915, arg->stalled_mask);
+}
+
+#if RESET_UNDER_STOP_MACHINE
+/*
+ * XXX An alternative to using stop_machine would be to park only the
+ * processes that have a GGTT mmap. By remote parking the threads (SIGSTOP)
+ * we should be able to prevent their memmory accesses via the lost fence
+ * registers over the course of the reset without the potential recursive
+ * of mutexes between the pagefault handler and reset.
+ *
+ * See igt/gem_mmap_gtt/hang
+ */
+#define __do_reset(fn, arg) stop_machine(fn, arg, NULL)
+#else
+#define __do_reset(fn, arg) fn(arg)
+#endif
+
+static int do_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
+{
+	struct __i915_reset arg = { i915, stalled_mask };
+	int err, i;
+
+	err = __do_reset(__i915_reset__BKL, &arg);
+	for (i = 0; err && i < RESET_MAX_RETRIES; i++) {
+		msleep(100);
+		err = __do_reset(__i915_reset__BKL, &arg);
+	}
+
+	return err;
+}
+
 /**
  * i915_reset - reset chip after a hang
  * @i915: #drm_i915_private to reset
@@ -980,31 +982,22 @@ void i915_reset(struct drm_i915_private *i915,
 {
 	struct i915_gpu_error *error = &i915->gpu_error;
 	int ret;
-	int i;
 
 	GEM_TRACE("flags=%lx\n", error->flags);
 
 	might_sleep();
-	lockdep_assert_held(&i915->drm.struct_mutex);
 	assert_rpm_wakelock_held(i915);
 	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
 
-	if (!test_bit(I915_RESET_HANDOFF, &error->flags))
-		return;
-
 	/* Clear any previous failed attempts at recovery. Time to try again. */
 	if (!i915_gem_unset_wedged(i915))
-		goto wakeup;
+		return;
 
 	if (reason)
 		dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason);
 	error->reset_count++;
 
-	ret = reset_prepare(i915);
-	if (ret) {
-		dev_err(i915->drm.dev, "GPU recovery failed\n");
-		goto taint;
-	}
+	reset_prepare(i915);
 
 	if (!intel_has_gpu_reset(i915)) {
 		if (i915_modparams.reset)
@@ -1014,32 +1007,11 @@ void i915_reset(struct drm_i915_private *i915,
 		goto error;
 	}
 
-	for (i = 0; i < 3; i++) {
-		ret = intel_gpu_reset(i915, ALL_ENGINES);
-		if (ret == 0)
-			break;
-
-		msleep(100);
-	}
-	if (ret) {
+	if (do_reset(i915, stalled_mask)) {
 		dev_err(i915->drm.dev, "Failed to reset chip\n");
 		goto taint;
 	}
 
-	/* Ok, now get things going again... */
-
-	/*
-	 * Everything depends on having the GTT running, so we need to start
-	 * there.
-	 */
-	ret = i915_ggtt_enable_hw(i915);
-	if (ret) {
-		DRM_ERROR("Failed to re-enable GGTT following reset (%d)\n",
-			  ret);
-		goto error;
-	}
-
-	gt_reset(i915, stalled_mask);
 	intel_overlay_reset(i915);
 
 	/*
@@ -1061,9 +1033,8 @@ void i915_reset(struct drm_i915_private *i915,
 
 finish:
 	reset_finish(i915);
-wakeup:
-	clear_bit(I915_RESET_HANDOFF, &error->flags);
-	wake_up_bit(&error->flags, I915_RESET_HANDOFF);
+	if (!i915_terminally_wedged(error))
+		reset_restart(i915);
 	return;
 
 taint:
@@ -1082,7 +1053,6 @@ taint:
 	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
 error:
 	i915_gem_set_wedged(i915);
-	i915_retire_requests(i915);
 	goto finish;
 }
 
@@ -1108,18 +1078,12 @@ static inline int intel_gt_reset_engine(struct drm_i915_private *i915,
 int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
 {
 	struct i915_gpu_error *error = &engine->i915->gpu_error;
-	struct i915_request *active_request;
 	int ret;
 
 	GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
 	GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
 
-	active_request = reset_prepare_engine(engine);
-	if (IS_ERR_OR_NULL(active_request)) {
-		/* Either the previous reset failed, or we pardon the reset. */
-		ret = PTR_ERR(active_request);
-		goto out;
-	}
+	reset_prepare_engine(engine);
 
 	if (msg)
 		dev_notice(engine->i915->drm.dev,
@@ -1143,7 +1107,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
 	 * active request and can drop it, adjust head to skip the offending
 	 * request to resume executing remaining requests in the queue.
 	 */
-	reset_engine(engine, active_request, true);
+	intel_engine_reset(engine, true);
 
 	/*
 	 * The engine and its registers (and workarounds in case of render)
@@ -1180,30 +1144,7 @@ static void i915_reset_device(struct drm_i915_private *i915,
 	i915_wedge_on_timeout(&w, i915, 5 * HZ) {
 		intel_prepare_reset(i915);
 
-		error->reason = reason;
-		error->stalled_mask = engine_mask;
-
-		/* Signal that locked waiters should reset the GPU */
-		smp_mb__before_atomic();
-		set_bit(I915_RESET_HANDOFF, &error->flags);
-		wake_up_all(&error->wait_queue);
-
-		/*
-		 * Wait for anyone holding the lock to wakeup, without
-		 * blocking indefinitely on struct_mutex.
-		 */
-		do {
-			if (mutex_trylock(&i915->drm.struct_mutex)) {
-				i915_reset(i915, engine_mask, reason);
-				mutex_unlock(&i915->drm.struct_mutex);
-			}
-		} while (wait_on_bit_timeout(&error->flags,
-					     I915_RESET_HANDOFF,
-					     TASK_UNINTERRUPTIBLE,
-					     1));
-
-		error->stalled_mask = 0;
-		error->reason = NULL;
+		i915_reset(i915, engine_mask, reason);
 
 		intel_finish_reset(i915);
 	}
@@ -1359,6 +1300,25 @@ out:
 	intel_runtime_pm_put(i915, wakeref);
 }
 
+bool i915_reset_flush(struct drm_i915_private *i915)
+{
+	int err;
+
+	cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
+
+	flush_workqueue(i915->wq);
+	GEM_BUG_ON(READ_ONCE(i915->gpu_error.restart));
+
+	mutex_lock(&i915->drm.struct_mutex);
+	err = i915_gem_wait_for_idle(i915,
+				     I915_WAIT_LOCKED |
+				     I915_WAIT_FOR_IDLE_BOOST,
+				     MAX_SCHEDULE_TIMEOUT);
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	return !err;
+}
+
 static void i915_wedge_me(struct work_struct *work)
 {
 	struct i915_wedge_me *w = container_of(work, typeof(*w), work.work);
diff --git a/drivers/gpu/drm/i915/i915_reset.h b/drivers/gpu/drm/i915/i915_reset.h
index b6a519bde67d..f2d347f319df 100644
--- a/drivers/gpu/drm/i915/i915_reset.h
+++ b/drivers/gpu/drm/i915/i915_reset.h
@@ -29,6 +29,9 @@ void i915_reset(struct drm_i915_private *i915,
 int i915_reset_engine(struct intel_engine_cs *engine,
 		      const char *reason);
 
+void i915_reset_request(struct i915_request *rq, bool guilty);
+bool i915_reset_flush(struct drm_i915_private *i915);
+
 bool intel_has_gpu_reset(struct drm_i915_private *i915);
 bool intel_has_reset_engine(struct drm_i915_private *i915);
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 340faea6c08a..d01683167c77 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -127,8 +127,7 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 	return rb_entry(rb, struct i915_priolist, node);
 }
 
-static void assert_priolists(struct intel_engine_execlists * const execlists,
-			     long queue_priority)
+static void assert_priolists(struct intel_engine_execlists * const execlists)
 {
 	struct rb_node *rb;
 	long last_prio, i;
@@ -139,7 +138,7 @@ static void assert_priolists(struct intel_engine_execlists * const execlists,
 	GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
 		   rb_first(&execlists->queue.rb_root));
 
-	last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1;
+	last_prio = (INT_MAX >> I915_USER_PRIORITY_SHIFT) + 1;
 	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
 		const struct i915_priolist *p = to_priolist(rb);
 
@@ -166,7 +165,7 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
 	int idx, i;
 
 	lockdep_assert_held(&engine->timeline.lock);
-	assert_priolists(execlists, INT_MAX);
+	assert_priolists(execlists);
 
 	/* buckets sorted from highest [in slot 0] to lowest priority */
 	idx = I915_PRIORITY_COUNT - (prio & I915_PRIORITY_MASK) - 1;
@@ -239,6 +238,18 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
 	return engine;
 }
 
+static bool inflight(const struct i915_request *rq,
+		     const struct intel_engine_cs *engine)
+{
+	const struct i915_request *active;
+
+	if (!i915_request_is_active(rq))
+		return false;
+
+	active = port_request(engine->execlists.port);
+	return active->hw_context == rq->hw_context;
+}
+
 static void __i915_schedule(struct i915_request *rq,
 			    const struct i915_sched_attr *attr)
 {
@@ -328,6 +339,7 @@ static void __i915_schedule(struct i915_request *rq,
 		INIT_LIST_HEAD(&dep->dfs_link);
 
 		engine = sched_lock_engine(node, engine);
+		lockdep_assert_held(&engine->timeline.lock);
 
 		/* Recheck after acquiring the engine->timeline.lock */
 		if (prio <= node->attr.priority || node_signaled(node))
@@ -353,20 +365,19 @@ static void __i915_schedule(struct i915_request *rq,
 				continue;
 		}
 
-		if (prio <= engine->execlists.queue_priority)
+		if (prio <= engine->execlists.queue_priority_hint)
 			continue;
 
+		engine->execlists.queue_priority_hint = prio;
+
 		/*
 		 * If we are already the currently executing context, don't
 		 * bother evaluating if we should preempt ourselves.
 		 */
-		if (node_to_request(node)->global_seqno &&
-		    i915_seqno_passed(port_request(engine->execlists.port)->global_seqno,
-				      node_to_request(node)->global_seqno))
+		if (inflight(node_to_request(node), engine))
 			continue;
 
 		/* Defer (tasklet) submission until after all of our updates. */
-		engine->execlists.queue_priority = prio;
 		tasklet_hi_schedule(&engine->execlists.tasklet);
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h
index a73472dd12fd..207e21b478f2 100644
--- a/drivers/gpu/drm/i915/i915_selftest.h
+++ b/drivers/gpu/drm/i915/i915_selftest.h
@@ -31,6 +31,7 @@ struct i915_selftest {
 	unsigned long timeout_jiffies;
 	unsigned int timeout_ms;
 	unsigned int random_seed;
+	char *filter;
 	int mock;
 	int live;
 };
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 4667cc08c416..5ea3af393ffe 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -9,25 +9,155 @@
 #include "i915_timeline.h"
 #include "i915_syncmap.h"
 
-void i915_timeline_init(struct drm_i915_private *i915,
-			struct i915_timeline *timeline,
-			const char *name)
+struct i915_timeline_hwsp {
+	struct i915_vma *vma;
+	struct list_head free_link;
+	u64 free_bitmap;
+};
+
+static inline struct i915_timeline_hwsp *
+i915_timeline_hwsp(const struct i915_timeline *tl)
+{
+	return tl->hwsp_ggtt->private;
+}
+
+static struct i915_vma *__hwsp_alloc(struct drm_i915_private *i915)
+{
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+
+	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
+
+	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+	if (IS_ERR(vma))
+		i915_gem_object_put(obj);
+
+	return vma;
+}
+
+static struct i915_vma *
+hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
 {
-	lockdep_assert_held(&i915->drm.struct_mutex);
+	struct drm_i915_private *i915 = timeline->i915;
+	struct i915_gt_timelines *gt = &i915->gt.timelines;
+	struct i915_timeline_hwsp *hwsp;
+
+	BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE);
+
+	spin_lock(&gt->hwsp_lock);
+
+	/* hwsp_free_list only contains HWSP that have available cachelines */
+	hwsp = list_first_entry_or_null(&gt->hwsp_free_list,
+					typeof(*hwsp), free_link);
+	if (!hwsp) {
+		struct i915_vma *vma;
+
+		spin_unlock(&gt->hwsp_lock);
+
+		hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL);
+		if (!hwsp)
+			return ERR_PTR(-ENOMEM);
+
+		vma = __hwsp_alloc(i915);
+		if (IS_ERR(vma)) {
+			kfree(hwsp);
+			return vma;
+		}
+
+		vma->private = hwsp;
+		hwsp->vma = vma;
+		hwsp->free_bitmap = ~0ull;
+
+		spin_lock(&gt->hwsp_lock);
+		list_add(&hwsp->free_link, &gt->hwsp_free_list);
+	}
+
+	GEM_BUG_ON(!hwsp->free_bitmap);
+	*cacheline = __ffs64(hwsp->free_bitmap);
+	hwsp->free_bitmap &= ~BIT_ULL(*cacheline);
+	if (!hwsp->free_bitmap)
+		list_del(&hwsp->free_link);
+
+	spin_unlock(&gt->hwsp_lock);
+
+	GEM_BUG_ON(hwsp->vma->private != hwsp);
+	return hwsp->vma;
+}
+
+static void hwsp_free(struct i915_timeline *timeline)
+{
+	struct i915_gt_timelines *gt = &timeline->i915->gt.timelines;
+	struct i915_timeline_hwsp *hwsp;
+
+	hwsp = i915_timeline_hwsp(timeline);
+	if (!hwsp) /* leave global HWSP alone! */
+		return;
+
+	spin_lock(&gt->hwsp_lock);
+
+	/* As a cacheline becomes available, publish the HWSP on the freelist */
+	if (!hwsp->free_bitmap)
+		list_add_tail(&hwsp->free_link, &gt->hwsp_free_list);
+
+	hwsp->free_bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES);
+
+	/* And if no one is left using it, give the page back to the system */
+	if (hwsp->free_bitmap == ~0ull) {
+		i915_vma_put(hwsp->vma);
+		list_del(&hwsp->free_link);
+		kfree(hwsp);
+	}
+
+	spin_unlock(&gt->hwsp_lock);
+}
+
+int i915_timeline_init(struct drm_i915_private *i915,
+		       struct i915_timeline *timeline,
+		       const char *name,
+		       struct i915_vma *hwsp)
+{
+	void *vaddr;
 
 	/*
 	 * Ideally we want a set of engines on a single leaf as we expect
 	 * to mostly be tracking synchronisation between engines. It is not
 	 * a huge issue if this is not the case, but we may want to mitigate
 	 * any page crossing penalties if they become an issue.
+	 *
+	 * Called during early_init before we know how many engines there are.
 	 */
 	BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES);
 
+	timeline->i915 = i915;
 	timeline->name = name;
+	timeline->pin_count = 0;
+	timeline->has_initial_breadcrumb = !hwsp;
 
-	list_add(&timeline->link, &i915->gt.timelines);
+	timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
+	if (!hwsp) {
+		unsigned int cacheline;
+
+		hwsp = hwsp_alloc(timeline, &cacheline);
+		if (IS_ERR(hwsp))
+			return PTR_ERR(hwsp);
+
+		timeline->hwsp_offset = cacheline * CACHELINE_BYTES;
+	}
+	timeline->hwsp_ggtt = i915_vma_get(hwsp);
+
+	vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB);
+	if (IS_ERR(vaddr)) {
+		hwsp_free(timeline);
+		i915_vma_put(hwsp);
+		return PTR_ERR(vaddr);
+	}
 
-	/* Called during early_init before we know how many engines there are */
+	timeline->hwsp_seqno =
+		memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES);
 
 	timeline->fence_context = dma_fence_context_alloc(1);
 
@@ -37,6 +167,40 @@ void i915_timeline_init(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
+
+	return 0;
+}
+
+void i915_timelines_init(struct drm_i915_private *i915)
+{
+	struct i915_gt_timelines *gt = &i915->gt.timelines;
+
+	mutex_init(&gt->mutex);
+	INIT_LIST_HEAD(&gt->active_list);
+
+	spin_lock_init(&gt->hwsp_lock);
+	INIT_LIST_HEAD(&gt->hwsp_free_list);
+
+	/* via i915_gem_wait_for_idle() */
+	i915_gem_shrinker_taints_mutex(i915, &gt->mutex);
+}
+
+static void timeline_add_to_active(struct i915_timeline *tl)
+{
+	struct i915_gt_timelines *gt = &tl->i915->gt.timelines;
+
+	mutex_lock(&gt->mutex);
+	list_add(&tl->link, &gt->active_list);
+	mutex_unlock(&gt->mutex);
+}
+
+static void timeline_remove_from_active(struct i915_timeline *tl)
+{
+	struct i915_gt_timelines *gt = &tl->i915->gt.timelines;
+
+	mutex_lock(&gt->mutex);
+	list_del(&tl->link);
+	mutex_unlock(&gt->mutex);
 }
 
 /**
@@ -51,11 +215,11 @@ void i915_timeline_init(struct drm_i915_private *i915,
  */
 void i915_timelines_park(struct drm_i915_private *i915)
 {
+	struct i915_gt_timelines *gt = &i915->gt.timelines;
 	struct i915_timeline *timeline;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
-	list_for_each_entry(timeline, &i915->gt.timelines, link) {
+	mutex_lock(&gt->mutex);
+	list_for_each_entry(timeline, &gt->active_list, link) {
 		/*
 		 * All known fences are completed so we can scrap
 		 * the current sync point tracking and start afresh,
@@ -64,32 +228,87 @@ void i915_timelines_park(struct drm_i915_private *i915)
 		 */
 		i915_syncmap_free(&timeline->sync);
 	}
+	mutex_unlock(&gt->mutex);
 }
 
 void i915_timeline_fini(struct i915_timeline *timeline)
 {
+	GEM_BUG_ON(timeline->pin_count);
 	GEM_BUG_ON(!list_empty(&timeline->requests));
 
 	i915_syncmap_free(&timeline->sync);
+	hwsp_free(timeline);
 
-	list_del(&timeline->link);
+	i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
+	i915_vma_put(timeline->hwsp_ggtt);
 }
 
 struct i915_timeline *
-i915_timeline_create(struct drm_i915_private *i915, const char *name)
+i915_timeline_create(struct drm_i915_private *i915,
+		     const char *name,
+		     struct i915_vma *global_hwsp)
 {
 	struct i915_timeline *timeline;
+	int err;
 
 	timeline = kzalloc(sizeof(*timeline), GFP_KERNEL);
 	if (!timeline)
 		return ERR_PTR(-ENOMEM);
 
-	i915_timeline_init(i915, timeline, name);
+	err = i915_timeline_init(i915, timeline, name, global_hwsp);
+	if (err) {
+		kfree(timeline);
+		return ERR_PTR(err);
+	}
+
 	kref_init(&timeline->kref);
 
 	return timeline;
 }
 
+int i915_timeline_pin(struct i915_timeline *tl)
+{
+	int err;
+
+	if (tl->pin_count++)
+		return 0;
+	GEM_BUG_ON(!tl->pin_count);
+
+	err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
+	if (err)
+		goto unpin;
+
+	tl->hwsp_offset =
+		i915_ggtt_offset(tl->hwsp_ggtt) +
+		offset_in_page(tl->hwsp_offset);
+
+	timeline_add_to_active(tl);
+
+	return 0;
+
+unpin:
+	tl->pin_count = 0;
+	return err;
+}
+
+void i915_timeline_unpin(struct i915_timeline *tl)
+{
+	GEM_BUG_ON(!tl->pin_count);
+	if (--tl->pin_count)
+		return;
+
+	timeline_remove_from_active(tl);
+
+	/*
+	 * Since this timeline is idle, all bariers upon which we were waiting
+	 * must also be complete and so we can discard the last used barriers
+	 * without loss of information.
+	 */
+	i915_syncmap_free(&tl->sync);
+
+	__i915_vma_unpin(tl->hwsp_ggtt);
+}
+
 void __i915_timeline_free(struct kref *kref)
 {
 	struct i915_timeline *timeline =
@@ -99,6 +318,16 @@ void __i915_timeline_free(struct kref *kref)
 	kfree(timeline);
 }
 
+void i915_timelines_fini(struct drm_i915_private *i915)
+{
+	struct i915_gt_timelines *gt = &i915->gt.timelines;
+
+	GEM_BUG_ON(!list_empty(&gt->active_list));
+	GEM_BUG_ON(!list_empty(&gt->hwsp_free_list));
+
+	mutex_destroy(&gt->mutex);
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/mock_timeline.c"
 #include "selftests/i915_timeline.c"
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index 38c1e15e927a..8caeb66d1cd5 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -32,6 +32,9 @@
 #include "i915_syncmap.h"
 #include "i915_utils.h"
 
+struct i915_vma;
+struct i915_timeline_hwsp;
+
 struct i915_timeline {
 	u64 fence_context;
 	u32 seqno;
@@ -40,6 +43,13 @@ struct i915_timeline {
 #define TIMELINE_CLIENT 0 /* default subclass */
 #define TIMELINE_ENGINE 1
 
+	unsigned int pin_count;
+	const u32 *hwsp_seqno;
+	struct i915_vma *hwsp_ggtt;
+	u32 hwsp_offset;
+
+	bool has_initial_breadcrumb;
+
 	/**
 	 * List of breadcrumbs associated with GPU requests currently
 	 * outstanding.
@@ -66,13 +76,15 @@ struct i915_timeline {
 
 	struct list_head link;
 	const char *name;
+	struct drm_i915_private *i915;
 
 	struct kref kref;
 };
 
-void i915_timeline_init(struct drm_i915_private *i915,
-			struct i915_timeline *tl,
-			const char *name);
+int i915_timeline_init(struct drm_i915_private *i915,
+		       struct i915_timeline *tl,
+		       const char *name,
+		       struct i915_vma *hwsp);
 void i915_timeline_fini(struct i915_timeline *tl);
 
 static inline void
@@ -95,7 +107,9 @@ i915_timeline_set_subclass(struct i915_timeline *timeline,
 }
 
 struct i915_timeline *
-i915_timeline_create(struct drm_i915_private *i915, const char *name);
+i915_timeline_create(struct drm_i915_private *i915,
+		     const char *name,
+		     struct i915_vma *global_hwsp);
 
 static inline struct i915_timeline *
 i915_timeline_get(struct i915_timeline *timeline)
@@ -134,6 +148,11 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
 	return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno);
 }
 
+int i915_timeline_pin(struct i915_timeline *tl);
+void i915_timeline_unpin(struct i915_timeline *tl);
+
+void i915_timelines_init(struct drm_i915_private *i915);
 void i915_timelines_park(struct drm_i915_private *i915);
+void i915_timelines_fini(struct drm_i915_private *i915);
 
 #endif
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 43da14f08dc0..eab313c3163c 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -752,31 +752,6 @@ trace_i915_request_out(struct i915_request *rq)
 #endif
 #endif
 
-TRACE_EVENT(intel_engine_notify,
-	    TP_PROTO(struct intel_engine_cs *engine, bool waiters),
-	    TP_ARGS(engine, waiters),
-
-	    TP_STRUCT__entry(
-			     __field(u32, dev)
-			     __field(u16, class)
-			     __field(u16, instance)
-			     __field(u32, seqno)
-			     __field(bool, waiters)
-			     ),
-
-	    TP_fast_assign(
-			   __entry->dev = engine->i915->drm.primary->index;
-			   __entry->class = engine->uabi_class;
-			   __entry->instance = engine->instance;
-			   __entry->seqno = intel_engine_get_seqno(engine);
-			   __entry->waiters = waiters;
-			   ),
-
-	    TP_printk("dev=%u, engine=%u:%u, seqno=%u, waiters=%u",
-		      __entry->dev, __entry->class, __entry->instance,
-		      __entry->seqno, __entry->waiters)
-);
-
 DEFINE_EVENT(i915_request, i915_request_retire,
 	    TP_PROTO(struct i915_request *rq),
 	    TP_ARGS(rq)
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 5b4d78cdb4ca..d83b8ad5f859 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -79,9 +79,6 @@ __i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
 	if (--vma->active_count)
 		return;
 
-	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
-	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
-
 	GEM_BUG_ON(!i915_gem_object_is_active(obj));
 	if (--obj->active_count)
 		return;
@@ -190,33 +187,56 @@ vma_create(struct drm_i915_gem_object *obj,
 								i915_gem_object_get_stride(obj));
 		GEM_BUG_ON(!is_power_of_2(vma->fence_alignment));
 
-		/*
-		 * We put the GGTT vma at the start of the vma-list, followed
-		 * by the ppGGTT vma. This allows us to break early when
-		 * iterating over only the GGTT vma for an object, see
-		 * for_each_ggtt_vma()
-		 */
 		vma->flags |= I915_VMA_GGTT;
-		list_add(&vma->obj_link, &obj->vma_list);
-	} else {
-		list_add_tail(&vma->obj_link, &obj->vma_list);
 	}
 
+	spin_lock(&obj->vma.lock);
+
 	rb = NULL;
-	p = &obj->vma_tree.rb_node;
+	p = &obj->vma.tree.rb_node;
 	while (*p) {
 		struct i915_vma *pos;
+		long cmp;
 
 		rb = *p;
 		pos = rb_entry(rb, struct i915_vma, obj_node);
-		if (i915_vma_compare(pos, vm, view) < 0)
+
+		/*
+		 * If the view already exists in the tree, another thread
+		 * already created a matching vma, so return the older instance
+		 * and dispose of ours.
+		 */
+		cmp = i915_vma_compare(pos, vm, view);
+		if (cmp == 0) {
+			spin_unlock(&obj->vma.lock);
+			kmem_cache_free(vm->i915->vmas, vma);
+			return pos;
+		}
+
+		if (cmp < 0)
 			p = &rb->rb_right;
 		else
 			p = &rb->rb_left;
 	}
 	rb_link_node(&vma->obj_node, rb, p);
-	rb_insert_color(&vma->obj_node, &obj->vma_tree);
+	rb_insert_color(&vma->obj_node, &obj->vma.tree);
+
+	if (i915_vma_is_ggtt(vma))
+		/*
+		 * We put the GGTT vma at the start of the vma-list, followed
+		 * by the ppGGTT vma. This allows us to break early when
+		 * iterating over only the GGTT vma for an object, see
+		 * for_each_ggtt_vma()
+		 */
+		list_add(&vma->obj_link, &obj->vma.list);
+	else
+		list_add_tail(&vma->obj_link, &obj->vma.list);
+
+	spin_unlock(&obj->vma.lock);
+
+	mutex_lock(&vm->mutex);
 	list_add(&vma->vm_link, &vm->unbound_list);
+	mutex_unlock(&vm->mutex);
 
 	return vma;
 
@@ -232,7 +252,7 @@ vma_lookup(struct drm_i915_gem_object *obj,
 {
 	struct rb_node *rb;
 
-	rb = obj->vma_tree.rb_node;
+	rb = obj->vma.tree.rb_node;
 	while (rb) {
 		struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node);
 		long cmp;
@@ -272,16 +292,18 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
 {
 	struct i915_vma *vma;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
 	GEM_BUG_ON(view && !i915_is_ggtt(vm));
 	GEM_BUG_ON(vm->closed);
 
+	spin_lock(&obj->vma.lock);
 	vma = vma_lookup(obj, vm, view);
-	if (!vma)
+	spin_unlock(&obj->vma.lock);
+
+	/* vma_create() will resolve the race if another creates the vma */
+	if (unlikely(!vma))
 		vma = vma_create(obj, vm, view);
 
 	GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view));
-	GEM_BUG_ON(!IS_ERR(vma) && vma_lookup(obj, vm, view) != vma);
 	return vma;
 }
 
@@ -659,7 +681,9 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, cache_level));
 
-	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+	mutex_lock(&vma->vm->mutex);
+	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
+	mutex_unlock(&vma->vm->mutex);
 
 	if (vma->obj) {
 		struct drm_i915_gem_object *obj = vma->obj;
@@ -692,8 +716,10 @@ i915_vma_remove(struct i915_vma *vma)
 
 	vma->ops->clear_pages(vma);
 
+	mutex_lock(&vma->vm->mutex);
 	drm_mm_remove_node(&vma->node);
 	list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
+	mutex_unlock(&vma->vm->mutex);
 
 	/*
 	 * Since the unbound list is global, only move to that list if
@@ -804,10 +830,18 @@ static void __i915_vma_destroy(struct i915_vma *vma)
 
 	GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
 
-	list_del(&vma->obj_link);
+	mutex_lock(&vma->vm->mutex);
 	list_del(&vma->vm_link);
-	if (vma->obj)
-		rb_erase(&vma->obj_node, &vma->obj->vma_tree);
+	mutex_unlock(&vma->vm->mutex);
+
+	if (vma->obj) {
+		struct drm_i915_gem_object *obj = vma->obj;
+
+		spin_lock(&obj->vma.lock);
+		list_del(&vma->obj_link);
+		rb_erase(&vma->obj_node, &vma->obj->vma.tree);
+		spin_unlock(&obj->vma.lock);
+	}
 
 	rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) {
 		GEM_BUG_ON(i915_gem_active_isset(&iter->base));
@@ -1003,10 +1037,8 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 	 * add the active reference first and queue for it to be dropped
 	 * *last*.
 	 */
-	if (!i915_gem_active_isset(active) && !vma->active_count++) {
-		list_move_tail(&vma->vm_link, &vma->vm->active_list);
+	if (!i915_gem_active_isset(active) && !vma->active_count++)
 		obj->active_count++;
-	}
 	i915_gem_active_set(active, rq);
 	GEM_BUG_ON(!i915_vma_is_active(vma));
 	GEM_BUG_ON(!obj->active_count);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 4f7c1c7599f4..5793abe509a2 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -71,29 +71,42 @@ struct i915_vma {
 	unsigned int open_count;
 	unsigned long flags;
 	/**
-	 * How many users have pinned this object in GTT space. The following
-	 * users can each hold at most one reference: pwrite/pread, execbuffer
-	 * (objects are not allowed multiple times for the same batchbuffer),
-	 * and the framebuffer code. When switching/pageflipping, the
-	 * framebuffer code has at most two buffers pinned per crtc.
+	 * How many users have pinned this object in GTT space.
 	 *
-	 * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
-	 * bits with absolutely no headroom. So use 4 bits.
+	 * This is a tightly bound, fairly small number of users, so we
+	 * stuff inside the flags field so that we can both check for overflow
+	 * and detect a no-op i915_vma_pin() in a single check, while also
+	 * pinning the vma.
+	 *
+	 * The worst case display setup would have the same vma pinned for
+	 * use on each plane on each crtc, while also building the next atomic
+	 * state and holding a pin for the length of the cleanup queue. In the
+	 * future, the flip queue may be increased from 1.
+	 * Estimated worst case: 3 [qlen] * 4 [max crtcs] * 7 [max planes] = 84
+	 *
+	 * For GEM, the number of concurrent users for pwrite/pread is
+	 * unbounded. For execbuffer, it is currently one but will in future
+	 * be extended to allow multiple clients to pin vma concurrently.
+	 *
+	 * We also use suballocated pages, with each suballocation claiming
+	 * its own pin on the shared vma. At present, this is limited to
+	 * exclusive cachelines of a single page, so a maximum of 64 possible
+	 * users.
 	 */
-#define I915_VMA_PIN_MASK 0xf
-#define I915_VMA_PIN_OVERFLOW	BIT(5)
+#define I915_VMA_PIN_MASK 0xff
+#define I915_VMA_PIN_OVERFLOW	BIT(8)
 
 	/** Flags and address space this VMA is bound to */
-#define I915_VMA_GLOBAL_BIND	BIT(6)
-#define I915_VMA_LOCAL_BIND	BIT(7)
+#define I915_VMA_GLOBAL_BIND	BIT(9)
+#define I915_VMA_LOCAL_BIND	BIT(10)
 #define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW)
 
-#define I915_VMA_GGTT		BIT(8)
-#define I915_VMA_CAN_FENCE	BIT(9)
-#define I915_VMA_CLOSED		BIT(10)
-#define I915_VMA_USERFAULT_BIT	11
+#define I915_VMA_GGTT		BIT(11)
+#define I915_VMA_CAN_FENCE	BIT(12)
+#define I915_VMA_CLOSED		BIT(13)
+#define I915_VMA_USERFAULT_BIT	14
 #define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT)
-#define I915_VMA_GGTT_WRITE	BIT(12)
+#define I915_VMA_GGTT_WRITE	BIT(15)
 
 	unsigned int active_count;
 	struct rb_root active;
@@ -425,7 +438,7 @@ void i915_vma_parked(struct drm_i915_private *i915);
  * or the list is empty ofc.
  */
 #define for_each_ggtt_vma(V, OBJ) \
-	list_for_each_entry(V, &(OBJ)->vma_list, obj_link)		\
+	list_for_each_entry(V, &(OBJ)->vma.list, obj_link)		\
 		for_each_until(!i915_vma_is_ggtt(V))
 
 #endif
diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
index 9a2fdc77ebcb..a1a263026574 100644
--- a/drivers/gpu/drm/i915/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
@@ -110,41 +110,39 @@ intel_plane_destroy_state(struct drm_plane *plane,
 }
 
 int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state,
-					struct intel_crtc_state *crtc_state,
+					struct intel_crtc_state *new_crtc_state,
 					const struct intel_plane_state *old_plane_state,
-					struct intel_plane_state *intel_state)
+					struct intel_plane_state *new_plane_state)
 {
-	struct drm_plane *plane = intel_state->base.plane;
-	struct drm_plane_state *state = &intel_state->base;
-	struct intel_plane *intel_plane = to_intel_plane(plane);
+	struct intel_plane *plane = to_intel_plane(new_plane_state->base.plane);
 	int ret;
 
-	crtc_state->active_planes &= ~BIT(intel_plane->id);
-	crtc_state->nv12_planes &= ~BIT(intel_plane->id);
-	intel_state->base.visible = false;
+	new_crtc_state->active_planes &= ~BIT(plane->id);
+	new_crtc_state->nv12_planes &= ~BIT(plane->id);
+	new_plane_state->base.visible = false;
 
-	/* If this is a cursor plane, no further checks are needed. */
-	if (!intel_state->base.crtc && !old_plane_state->base.crtc)
+	if (!new_plane_state->base.crtc && !old_plane_state->base.crtc)
 		return 0;
 
-	ret = intel_plane->check_plane(crtc_state, intel_state);
+	ret = plane->check_plane(new_crtc_state, new_plane_state);
 	if (ret)
 		return ret;
 
 	/* FIXME pre-g4x don't work like this */
-	if (state->visible)
-		crtc_state->active_planes |= BIT(intel_plane->id);
+	if (new_plane_state->base.visible)
+		new_crtc_state->active_planes |= BIT(plane->id);
 
-	if (state->visible && state->fb->format->format == DRM_FORMAT_NV12)
-		crtc_state->nv12_planes |= BIT(intel_plane->id);
+	if (new_plane_state->base.visible &&
+	    new_plane_state->base.fb->format->format == DRM_FORMAT_NV12)
+		new_crtc_state->nv12_planes |= BIT(plane->id);
 
-	if (state->visible || old_plane_state->base.visible)
-		crtc_state->update_planes |= BIT(intel_plane->id);
+	if (new_plane_state->base.visible || old_plane_state->base.visible)
+		new_crtc_state->update_planes |= BIT(plane->id);
 
 	return intel_plane_atomic_calc_changes(old_crtc_state,
-					       &crtc_state->base,
+					       &new_crtc_state->base,
 					       old_plane_state,
-					       state);
+					       &new_plane_state->base);
 }
 
 static int intel_plane_atomic_check(struct drm_plane *plane,
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index 561a4f9f044c..b508d8a735e0 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -1663,6 +1663,13 @@ init_vbt_missing_defaults(struct drm_i915_private *dev_priv)
 		struct ddi_vbt_port_info *info =
 			&dev_priv->vbt.ddi_port_info[port];
 
+		/*
+		 * VBT has the TypeC mode (native,TBT/USB) and we don't want
+		 * to detect it.
+		 */
+		if (intel_port_is_tc(dev_priv, port))
+			continue;
+
 		info->supports_dvi = (port != PORT_A && port != PORT_E);
 		info->supports_hdmi = info->supports_dvi;
 		info->supports_dp = (port != PORT_E);
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index b58915b8708b..cacaa1d04d17 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -29,168 +29,146 @@
 
 #define task_asleep(tsk) ((tsk)->state & TASK_NORMAL && !(tsk)->on_rq)
 
-static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b)
+static void irq_enable(struct intel_engine_cs *engine)
 {
-	struct intel_wait *wait;
-	unsigned int result = 0;
-
-	lockdep_assert_held(&b->irq_lock);
-
-	wait = b->irq_wait;
-	if (wait) {
-		/*
-		 * N.B. Since task_asleep() and ttwu are not atomic, the
-		 * waiter may actually go to sleep after the check, causing
-		 * us to suppress a valid wakeup. We prefer to reduce the
-		 * number of false positive missed_breadcrumb() warnings
-		 * at the expense of a few false negatives, as it it easy
-		 * to trigger a false positive under heavy load. Enough
-		 * signal should remain from genuine missed_breadcrumb()
-		 * for us to detect in CI.
-		 */
-		bool was_asleep = task_asleep(wait->tsk);
-
-		result = ENGINE_WAKEUP_WAITER;
-		if (wake_up_process(wait->tsk) && was_asleep)
-			result |= ENGINE_WAKEUP_ASLEEP;
-	}
+	if (!engine->irq_enable)
+		return;
 
-	return result;
+	/* Caller disables interrupts */
+	spin_lock(&engine->i915->irq_lock);
+	engine->irq_enable(engine);
+	spin_unlock(&engine->i915->irq_lock);
 }
 
-unsigned int intel_engine_wakeup(struct intel_engine_cs *engine)
+static void irq_disable(struct intel_engine_cs *engine)
 {
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	unsigned long flags;
-	unsigned int result;
-
-	spin_lock_irqsave(&b->irq_lock, flags);
-	result = __intel_breadcrumbs_wakeup(b);
-	spin_unlock_irqrestore(&b->irq_lock, flags);
-
-	return result;
-}
+	if (!engine->irq_disable)
+		return;
 
-static unsigned long wait_timeout(void)
-{
-	return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES);
+	/* Caller disables interrupts */
+	spin_lock(&engine->i915->irq_lock);
+	engine->irq_disable(engine);
+	spin_unlock(&engine->i915->irq_lock);
 }
 
-static noinline void missed_breadcrumb(struct intel_engine_cs *engine)
+static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
 {
-	if (GEM_SHOW_DEBUG()) {
-		struct drm_printer p = drm_debug_printer(__func__);
+	lockdep_assert_held(&b->irq_lock);
 
-		intel_engine_dump(engine, &p,
-				  "%s missed breadcrumb at %pS\n",
-				  engine->name, __builtin_return_address(0));
-	}
+	GEM_BUG_ON(!b->irq_enabled);
+	if (!--b->irq_enabled)
+		irq_disable(container_of(b,
+					 struct intel_engine_cs,
+					 breadcrumbs));
 
-	set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
+	b->irq_armed = false;
 }
 
-static void intel_breadcrumbs_hangcheck(struct timer_list *t)
+void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
 {
-	struct intel_engine_cs *engine =
-		from_timer(engine, t, breadcrumbs.hangcheck);
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	unsigned int irq_count;
 
 	if (!b->irq_armed)
 		return;
 
-	irq_count = READ_ONCE(b->irq_count);
-	if (b->hangcheck_interrupts != irq_count) {
-		b->hangcheck_interrupts = irq_count;
-		mod_timer(&b->hangcheck, wait_timeout());
-		return;
-	}
+	spin_lock_irq(&b->irq_lock);
+	if (b->irq_armed)
+		__intel_breadcrumbs_disarm_irq(b);
+	spin_unlock_irq(&b->irq_lock);
+}
 
-	/* We keep the hangcheck timer alive until we disarm the irq, even
-	 * if there are no waiters at present.
-	 *
-	 * If the waiter was currently running, assume it hasn't had a chance
-	 * to process the pending interrupt (e.g, low priority task on a loaded
-	 * system) and wait until it sleeps before declaring a missed interrupt.
-	 *
-	 * If the waiter was asleep (and not even pending a wakeup), then we
-	 * must have missed an interrupt as the GPU has stopped advancing
-	 * but we still have a waiter. Assuming all batches complete within
-	 * DRM_I915_HANGCHECK_JIFFIES [1.5s]!
-	 */
-	if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP) {
-		missed_breadcrumb(engine);
-		mod_timer(&b->fake_irq, jiffies + 1);
-	} else {
-		mod_timer(&b->hangcheck, wait_timeout());
-	}
+static inline bool __request_completed(const struct i915_request *rq)
+{
+	return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
 }
 
-static void intel_breadcrumbs_fake_irq(struct timer_list *t)
+bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
 {
-	struct intel_engine_cs *engine =
-		from_timer(engine, t, breadcrumbs.fake_irq);
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	struct intel_context *ce, *cn;
+	struct list_head *pos, *next;
+	LIST_HEAD(signal);
 
-	/*
-	 * The timer persists in case we cannot enable interrupts,
-	 * or if we have previously seen seqno/interrupt incoherency
-	 * ("missed interrupt" syndrome, better known as a "missed breadcrumb").
-	 * Here the worker will wake up every jiffie in order to kick the
-	 * oldest waiter to do the coherent seqno check.
-	 */
+	spin_lock(&b->irq_lock);
 
-	spin_lock_irq(&b->irq_lock);
-	if (b->irq_armed && !__intel_breadcrumbs_wakeup(b))
-		__intel_engine_disarm_breadcrumbs(engine);
-	spin_unlock_irq(&b->irq_lock);
-	if (!b->irq_armed)
-		return;
+	if (b->irq_armed && list_empty(&b->signalers))
+		__intel_breadcrumbs_disarm_irq(b);
 
-	/* If the user has disabled the fake-irq, restore the hangchecking */
-	if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) {
-		mod_timer(&b->hangcheck, wait_timeout());
-		return;
+	list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) {
+		GEM_BUG_ON(list_empty(&ce->signals));
+
+		list_for_each_safe(pos, next, &ce->signals) {
+			struct i915_request *rq =
+				list_entry(pos, typeof(*rq), signal_link);
+
+			if (!__request_completed(rq))
+				break;
+
+			GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL,
+					     &rq->fence.flags));
+			clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+
+			/*
+			 * We may race with direct invocation of
+			 * dma_fence_signal(), e.g. i915_request_retire(),
+			 * in which case we can skip processing it ourselves.
+			 */
+			if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+				     &rq->fence.flags))
+				continue;
+
+			/*
+			 * Queue for execution after dropping the signaling
+			 * spinlock as the callback chain may end up adding
+			 * more signalers to the same context or engine.
+			 */
+			i915_request_get(rq);
+			list_add_tail(&rq->signal_link, &signal);
+		}
+
+		/*
+		 * We process the list deletion in bulk, only using a list_add
+		 * (not list_move) above but keeping the status of
+		 * rq->signal_link known with the I915_FENCE_FLAG_SIGNAL bit.
+		 */
+		if (!list_is_first(pos, &ce->signals)) {
+			/* Advance the list to the first incomplete request */
+			__list_del_many(&ce->signals, pos);
+			if (&ce->signals == pos) /* now empty */
+				list_del_init(&ce->signal_link);
+		}
 	}
 
-	mod_timer(&b->fake_irq, jiffies + 1);
-}
+	spin_unlock(&b->irq_lock);
 
-static void irq_enable(struct intel_engine_cs *engine)
-{
-	if (!engine->irq_enable)
-		return;
+	list_for_each_safe(pos, next, &signal) {
+		struct i915_request *rq =
+			list_entry(pos, typeof(*rq), signal_link);
 
-	/* Caller disables interrupts */
-	spin_lock(&engine->i915->irq_lock);
-	engine->irq_enable(engine);
-	spin_unlock(&engine->i915->irq_lock);
+		dma_fence_signal(&rq->fence);
+		i915_request_put(rq);
+	}
+
+	return !list_empty(&signal);
 }
 
-static void irq_disable(struct intel_engine_cs *engine)
+bool intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
 {
-	if (!engine->irq_disable)
-		return;
+	bool result;
 
-	/* Caller disables interrupts */
-	spin_lock(&engine->i915->irq_lock);
-	engine->irq_disable(engine);
-	spin_unlock(&engine->i915->irq_lock);
+	local_irq_disable();
+	result = intel_engine_breadcrumbs_irq(engine);
+	local_irq_enable();
+
+	return result;
 }
 
-void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
+static void signal_irq_work(struct irq_work *work)
 {
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-	lockdep_assert_held(&b->irq_lock);
-	GEM_BUG_ON(b->irq_wait);
-	GEM_BUG_ON(!b->irq_armed);
-
-	GEM_BUG_ON(!b->irq_enabled);
-	if (!--b->irq_enabled)
-		irq_disable(engine);
+	struct intel_engine_cs *engine =
+		container_of(work, typeof(*engine), breadcrumbs.irq_work);
 
-	b->irq_armed = false;
+	intel_engine_breadcrumbs_irq(engine);
 }
 
 void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine)
@@ -215,77 +193,14 @@ void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine)
 	spin_unlock_irq(&b->irq_lock);
 }
 
-void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	struct intel_wait *wait, *n;
-
-	if (!b->irq_armed)
-		return;
-
-	/*
-	 * We only disarm the irq when we are idle (all requests completed),
-	 * so if the bottom-half remains asleep, it missed the request
-	 * completion.
-	 */
-	if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP)
-		missed_breadcrumb(engine);
-
-	spin_lock_irq(&b->rb_lock);
-
-	spin_lock(&b->irq_lock);
-	b->irq_wait = NULL;
-	if (b->irq_armed)
-		__intel_engine_disarm_breadcrumbs(engine);
-	spin_unlock(&b->irq_lock);
-
-	rbtree_postorder_for_each_entry_safe(wait, n, &b->waiters, node) {
-		GEM_BUG_ON(!intel_engine_signaled(engine, wait->seqno));
-		RB_CLEAR_NODE(&wait->node);
-		wake_up_process(wait->tsk);
-	}
-	b->waiters = RB_ROOT;
-
-	spin_unlock_irq(&b->rb_lock);
-}
-
-static bool use_fake_irq(const struct intel_breadcrumbs *b)
-{
-	const struct intel_engine_cs *engine =
-		container_of(b, struct intel_engine_cs, breadcrumbs);
-
-	if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings))
-		return false;
-
-	/*
-	 * Only start with the heavy weight fake irq timer if we have not
-	 * seen any interrupts since enabling it the first time. If the
-	 * interrupts are still arriving, it means we made a mistake in our
-	 * engine->seqno_barrier(), a timing error that should be transient
-	 * and unlikely to reoccur.
-	 */
-	return READ_ONCE(b->irq_count) == b->hangcheck_interrupts;
-}
-
-static void enable_fake_irq(struct intel_breadcrumbs *b)
-{
-	/* Ensure we never sleep indefinitely */
-	if (!b->irq_enabled || use_fake_irq(b))
-		mod_timer(&b->fake_irq, jiffies + 1);
-	else
-		mod_timer(&b->hangcheck, wait_timeout());
-}
-
-static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
+static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
 {
 	struct intel_engine_cs *engine =
 		container_of(b, struct intel_engine_cs, breadcrumbs);
-	struct drm_i915_private *i915 = engine->i915;
-	bool enabled;
 
 	lockdep_assert_held(&b->irq_lock);
 	if (b->irq_armed)
-		return false;
+		return;
 
 	/*
 	 * The breadcrumb irq will be disarmed on the interrupt after the
@@ -303,548 +218,130 @@ static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
 	 * the driver is idle) we disarm the breadcrumbs.
 	 */
 
-	/* No interrupts? Kick the waiter every jiffie! */
-	enabled = false;
-	if (!b->irq_enabled++ &&
-	    !test_bit(engine->id, &i915->gpu_error.test_irq_rings)) {
+	if (!b->irq_enabled++)
 		irq_enable(engine);
-		enabled = true;
-	}
-
-	enable_fake_irq(b);
-	return enabled;
-}
-
-static inline struct intel_wait *to_wait(struct rb_node *node)
-{
-	return rb_entry(node, struct intel_wait, node);
 }
 
-static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b,
-					      struct intel_wait *wait)
-{
-	lockdep_assert_held(&b->rb_lock);
-	GEM_BUG_ON(b->irq_wait == wait);
-
-	/*
-	 * This request is completed, so remove it from the tree, mark it as
-	 * complete, and *then* wake up the associated task. N.B. when the
-	 * task wakes up, it will find the empty rb_node, discern that it
-	 * has already been removed from the tree and skip the serialisation
-	 * of the b->rb_lock and b->irq_lock. This means that the destruction
-	 * of the intel_wait is not serialised with the interrupt handler
-	 * by the waiter - it must instead be serialised by the caller.
-	 */
-	rb_erase(&wait->node, &b->waiters);
-	RB_CLEAR_NODE(&wait->node);
-
-	if (wait->tsk->state != TASK_RUNNING)
-		wake_up_process(wait->tsk); /* implicit smp_wmb() */
-}
-
-static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine,
-					    struct rb_node *next)
+void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
 
-	spin_lock(&b->irq_lock);
-	GEM_BUG_ON(!b->irq_armed);
-	GEM_BUG_ON(!b->irq_wait);
-	b->irq_wait = to_wait(next);
-	spin_unlock(&b->irq_lock);
-
-	/* We always wake up the next waiter that takes over as the bottom-half
-	 * as we may delegate not only the irq-seqno barrier to the next waiter
-	 * but also the task of waking up concurrent waiters.
-	 */
-	if (next)
-		wake_up_process(to_wait(next)->tsk);
-}
-
-static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
-				    struct intel_wait *wait)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	struct rb_node **p, *parent, *completed;
-	bool first, armed;
-	u32 seqno;
-
-	GEM_BUG_ON(!wait->seqno);
-
-	/* Insert the request into the retirement ordered list
-	 * of waiters by walking the rbtree. If we are the oldest
-	 * seqno in the tree (the first to be retired), then
-	 * set ourselves as the bottom-half.
-	 *
-	 * As we descend the tree, prune completed branches since we hold the
-	 * spinlock we know that the first_waiter must be delayed and can
-	 * reduce some of the sequential wake up latency if we take action
-	 * ourselves and wake up the completed tasks in parallel. Also, by
-	 * removing stale elements in the tree, we may be able to reduce the
-	 * ping-pong between the old bottom-half and ourselves as first-waiter.
-	 */
-	armed = false;
-	first = true;
-	parent = NULL;
-	completed = NULL;
-	seqno = intel_engine_get_seqno(engine);
-
-	 /* If the request completed before we managed to grab the spinlock,
-	  * return now before adding ourselves to the rbtree. We let the
-	  * current bottom-half handle any pending wakeups and instead
-	  * try and get out of the way quickly.
-	  */
-	if (i915_seqno_passed(seqno, wait->seqno)) {
-		RB_CLEAR_NODE(&wait->node);
-		return first;
-	}
-
-	p = &b->waiters.rb_node;
-	while (*p) {
-		parent = *p;
-		if (wait->seqno == to_wait(parent)->seqno) {
-			/* We have multiple waiters on the same seqno, select
-			 * the highest priority task (that with the smallest
-			 * task->prio) to serve as the bottom-half for this
-			 * group.
-			 */
-			if (wait->tsk->prio > to_wait(parent)->tsk->prio) {
-				p = &parent->rb_right;
-				first = false;
-			} else {
-				p = &parent->rb_left;
-			}
-		} else if (i915_seqno_passed(wait->seqno,
-					     to_wait(parent)->seqno)) {
-			p = &parent->rb_right;
-			if (i915_seqno_passed(seqno, to_wait(parent)->seqno))
-				completed = parent;
-			else
-				first = false;
-		} else {
-			p = &parent->rb_left;
-		}
-	}
-	rb_link_node(&wait->node, parent, p);
-	rb_insert_color(&wait->node, &b->waiters);
-
-	if (first) {
-		spin_lock(&b->irq_lock);
-		b->irq_wait = wait;
-		/* After assigning ourselves as the new bottom-half, we must
-		 * perform a cursory check to prevent a missed interrupt.
-		 * Either we miss the interrupt whilst programming the hardware,
-		 * or if there was a previous waiter (for a later seqno) they
-		 * may be woken instead of us (due to the inherent race
-		 * in the unlocked read of b->irq_seqno_bh in the irq handler)
-		 * and so we miss the wake up.
-		 */
-		armed = __intel_breadcrumbs_enable_irq(b);
-		spin_unlock(&b->irq_lock);
-	}
-
-	if (completed) {
-		/* Advance the bottom-half (b->irq_wait) before we wake up
-		 * the waiters who may scribble over their intel_wait
-		 * just as the interrupt handler is dereferencing it via
-		 * b->irq_wait.
-		 */
-		if (!first) {
-			struct rb_node *next = rb_next(completed);
-			GEM_BUG_ON(next == &wait->node);
-			__intel_breadcrumbs_next(engine, next);
-		}
-
-		do {
-			struct intel_wait *crumb = to_wait(completed);
-			completed = rb_prev(completed);
-			__intel_breadcrumbs_finish(b, crumb);
-		} while (completed);
-	}
-
-	GEM_BUG_ON(!b->irq_wait);
-	GEM_BUG_ON(!b->irq_armed);
-	GEM_BUG_ON(rb_first(&b->waiters) != &b->irq_wait->node);
+	spin_lock_init(&b->irq_lock);
+	INIT_LIST_HEAD(&b->signalers);
 
-	return armed;
+	init_irq_work(&b->irq_work, signal_irq_work);
 }
 
-bool intel_engine_add_wait(struct intel_engine_cs *engine,
-			   struct intel_wait *wait)
+void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	bool armed;
-
-	spin_lock_irq(&b->rb_lock);
-	armed = __intel_engine_add_wait(engine, wait);
-	spin_unlock_irq(&b->rb_lock);
-	if (armed)
-		return armed;
-
-	/* Make the caller recheck if its request has already started. */
-	return intel_engine_has_started(engine, wait->seqno);
-}
+	unsigned long flags;
 
-static inline bool chain_wakeup(struct rb_node *rb, int priority)
-{
-	return rb && to_wait(rb)->tsk->prio <= priority;
-}
+	spin_lock_irqsave(&b->irq_lock, flags);
 
-static inline int wakeup_priority(struct intel_breadcrumbs *b,
-				  struct task_struct *tsk)
-{
-	if (tsk == b->signaler)
-		return INT_MIN;
+	if (b->irq_enabled)
+		irq_enable(engine);
 	else
-		return tsk->prio;
-}
-
-static void __intel_engine_remove_wait(struct intel_engine_cs *engine,
-				       struct intel_wait *wait)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-	lockdep_assert_held(&b->rb_lock);
-
-	if (RB_EMPTY_NODE(&wait->node))
-		goto out;
-
-	if (b->irq_wait == wait) {
-		const int priority = wakeup_priority(b, wait->tsk);
-		struct rb_node *next;
-
-		/* We are the current bottom-half. Find the next candidate,
-		 * the first waiter in the queue on the remaining oldest
-		 * request. As multiple seqnos may complete in the time it
-		 * takes us to wake up and find the next waiter, we have to
-		 * wake up that waiter for it to perform its own coherent
-		 * completion check.
-		 */
-		next = rb_next(&wait->node);
-		if (chain_wakeup(next, priority)) {
-			/* If the next waiter is already complete,
-			 * wake it up and continue onto the next waiter. So
-			 * if have a small herd, they will wake up in parallel
-			 * rather than sequentially, which should reduce
-			 * the overall latency in waking all the completed
-			 * clients.
-			 *
-			 * However, waking up a chain adds extra latency to
-			 * the first_waiter. This is undesirable if that
-			 * waiter is a high priority task.
-			 */
-			u32 seqno = intel_engine_get_seqno(engine);
-
-			while (i915_seqno_passed(seqno, to_wait(next)->seqno)) {
-				struct rb_node *n = rb_next(next);
-
-				__intel_breadcrumbs_finish(b, to_wait(next));
-				next = n;
-				if (!chain_wakeup(next, priority))
-					break;
-			}
-		}
-
-		__intel_breadcrumbs_next(engine, next);
-	} else {
-		GEM_BUG_ON(rb_first(&b->waiters) == &wait->node);
-	}
-
-	GEM_BUG_ON(RB_EMPTY_NODE(&wait->node));
-	rb_erase(&wait->node, &b->waiters);
-	RB_CLEAR_NODE(&wait->node);
+		irq_disable(engine);
 
-out:
-	GEM_BUG_ON(b->irq_wait == wait);
-	GEM_BUG_ON(rb_first(&b->waiters) !=
-		   (b->irq_wait ? &b->irq_wait->node : NULL));
+	spin_unlock_irqrestore(&b->irq_lock, flags);
 }
 
-void intel_engine_remove_wait(struct intel_engine_cs *engine,
-			      struct intel_wait *wait)
+void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
 {
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-	/* Quick check to see if this waiter was already decoupled from
-	 * the tree by the bottom-half to avoid contention on the spinlock
-	 * by the herd.
-	 */
-	if (RB_EMPTY_NODE(&wait->node)) {
-		GEM_BUG_ON(READ_ONCE(b->irq_wait) == wait);
-		return;
-	}
-
-	spin_lock_irq(&b->rb_lock);
-	__intel_engine_remove_wait(engine, wait);
-	spin_unlock_irq(&b->rb_lock);
 }
 
-static void signaler_set_rtpriority(void)
+bool i915_request_enable_breadcrumb(struct i915_request *rq)
 {
-	 struct sched_param param = { .sched_priority = 1 };
-
-	 sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
-}
+	struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
 
-static int intel_breadcrumbs_signaler(void *arg)
-{
-	struct intel_engine_cs *engine = arg;
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	struct i915_request *rq, *n;
+	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
 
-	/* Install ourselves with high priority to reduce signalling latency */
-	signaler_set_rtpriority();
+	if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
+		return true;
 
-	do {
-		bool do_schedule = true;
-		LIST_HEAD(list);
-		u32 seqno;
+	spin_lock(&b->irq_lock);
+	if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags) &&
+	    !__request_completed(rq)) {
+		struct intel_context *ce = rq->hw_context;
+		struct list_head *pos;
 
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (list_empty(&b->signals))
-			goto sleep;
+		__intel_breadcrumbs_arm_irq(b);
 
 		/*
-		 * We are either woken up by the interrupt bottom-half,
-		 * or by a client adding a new signaller. In both cases,
-		 * the GPU seqno may have advanced beyond our oldest signal.
-		 * If it has, propagate the signal, remove the waiter and
-		 * check again with the next oldest signal. Otherwise we
-		 * need to wait for a new interrupt from the GPU or for
-		 * a new client.
+		 * We keep the seqno in retirement order, so we can break
+		 * inside intel_engine_breadcrumbs_irq as soon as we've passed
+		 * the last completed request (or seen a request that hasn't
+		 * event started). We could iterate the timeline->requests list,
+		 * but keeping a separate signalers_list has the advantage of
+		 * hopefully being much smaller than the full list and so
+		 * provides faster iteration and detection when there are no
+		 * more interrupts required for this context.
+		 *
+		 * We typically expect to add new signalers in order, so we
+		 * start looking for our insertion point from the tail of
+		 * the list.
 		 */
-		seqno = intel_engine_get_seqno(engine);
-
-		spin_lock_irq(&b->rb_lock);
-		list_for_each_entry_safe(rq, n, &b->signals, signaling.link) {
-			u32 this = rq->signaling.wait.seqno;
-
-			GEM_BUG_ON(!rq->signaling.wait.seqno);
+		list_for_each_prev(pos, &ce->signals) {
+			struct i915_request *it =
+				list_entry(pos, typeof(*it), signal_link);
 
-			if (!i915_seqno_passed(seqno, this))
+			if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno))
 				break;
-
-			if (likely(this == i915_request_global_seqno(rq))) {
-				__intel_engine_remove_wait(engine,
-							   &rq->signaling.wait);
-
-				rq->signaling.wait.seqno = 0;
-				__list_del_entry(&rq->signaling.link);
-
-				if (!i915_request_signaled(rq)) {
-					list_add_tail(&rq->signaling.link,
-						      &list);
-					i915_request_get(rq);
-				}
-			}
 		}
-		spin_unlock_irq(&b->rb_lock);
-
-		if (!list_empty(&list)) {
-			local_bh_disable();
-			list_for_each_entry_safe(rq, n, &list, signaling.link) {
-				dma_fence_signal(&rq->fence);
-				GEM_BUG_ON(!i915_request_completed(rq));
-				i915_request_put(rq);
-			}
-			local_bh_enable(); /* kick start the tasklets */
-
-			/*
-			 * If the engine is saturated we may be continually
-			 * processing completed requests. This angers the
-			 * NMI watchdog if we never let anything else
-			 * have access to the CPU. Let's pretend to be nice
-			 * and relinquish the CPU if we burn through the
-			 * entire RT timeslice!
-			 */
-			do_schedule = need_resched();
-		}
-
-		if (unlikely(do_schedule)) {
-sleep:
-			if (kthread_should_park())
-				kthread_parkme();
+		list_add(&rq->signal_link, pos);
+		if (pos == &ce->signals) /* catch transitions from empty list */
+			list_move_tail(&ce->signal_link, &b->signalers);
 
-			if (unlikely(kthread_should_stop()))
-				break;
-
-			schedule();
-		}
-	} while (1);
-	__set_current_state(TASK_RUNNING);
-
-	return 0;
-}
-
-static void insert_signal(struct intel_breadcrumbs *b,
-			  struct i915_request *request,
-			  const u32 seqno)
-{
-	struct i915_request *iter;
-
-	lockdep_assert_held(&b->rb_lock);
-
-	/*
-	 * A reasonable assumption is that we are called to add signals
-	 * in sequence, as the requests are submitted for execution and
-	 * assigned a global_seqno. This will be the case for the majority
-	 * of internally generated signals (inter-engine signaling).
-	 *
-	 * Out of order waiters triggering random signaling enabling will
-	 * be more problematic, but hopefully rare enough and the list
-	 * small enough that the O(N) insertion sort is not an issue.
-	 */
-
-	list_for_each_entry_reverse(iter, &b->signals, signaling.link)
-		if (i915_seqno_passed(seqno, iter->signaling.wait.seqno))
-			break;
-
-	list_add(&request->signaling.link, &iter->signaling.link);
-}
-
-bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup)
-{
-	struct intel_engine_cs *engine = request->engine;
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	struct intel_wait *wait = &request->signaling.wait;
-	u32 seqno;
-
-	/*
-	 * Note that we may be called from an interrupt handler on another
-	 * device (e.g. nouveau signaling a fence completion causing us
-	 * to submit a request, and so enable signaling). As such,
-	 * we need to make sure that all other users of b->rb_lock protect
-	 * against interrupts, i.e. use spin_lock_irqsave.
-	 */
-
-	/* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */
-	GEM_BUG_ON(!irqs_disabled());
-	lockdep_assert_held(&request->lock);
-
-	seqno = i915_request_global_seqno(request);
-	if (!seqno) /* will be enabled later upon execution */
-		return true;
-
-	GEM_BUG_ON(wait->seqno);
-	wait->tsk = b->signaler;
-	wait->request = request;
-	wait->seqno = seqno;
-
-	/*
-	 * Add ourselves into the list of waiters, but registering our
-	 * bottom-half as the signaller thread. As per usual, only the oldest
-	 * waiter (not just signaller) is tasked as the bottom-half waking
-	 * up all completed waiters after the user interrupt.
-	 *
-	 * If we are the oldest waiter, enable the irq (after which we
-	 * must double check that the seqno did not complete).
-	 */
-	spin_lock(&b->rb_lock);
-	insert_signal(b, request, seqno);
-	wakeup &= __intel_engine_add_wait(engine, wait);
-	spin_unlock(&b->rb_lock);
-
-	if (wakeup) {
-		wake_up_process(b->signaler);
-		return !intel_wait_complete(wait);
+		set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
 	}
+	spin_unlock(&b->irq_lock);
 
-	return true;
+	return !__request_completed(rq);
 }
 
-void intel_engine_cancel_signaling(struct i915_request *request)
+void i915_request_cancel_breadcrumb(struct i915_request *rq)
 {
-	struct intel_engine_cs *engine = request->engine;
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-	GEM_BUG_ON(!irqs_disabled());
-	lockdep_assert_held(&request->lock);
+	struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
 
-	if (!READ_ONCE(request->signaling.wait.seqno))
+	if (!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
 		return;
 
-	spin_lock(&b->rb_lock);
-	__intel_engine_remove_wait(engine, &request->signaling.wait);
-	if (fetch_and_zero(&request->signaling.wait.seqno))
-		__list_del_entry(&request->signaling.link);
-	spin_unlock(&b->rb_lock);
-}
-
-int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	struct task_struct *tsk;
-
-	spin_lock_init(&b->rb_lock);
-	spin_lock_init(&b->irq_lock);
-
-	timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0);
-	timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0);
-
-	INIT_LIST_HEAD(&b->signals);
-
-	/* Spawn a thread to provide a common bottom-half for all signals.
-	 * As this is an asynchronous interface we cannot steal the current
-	 * task for handling the bottom-half to the user interrupt, therefore
-	 * we create a thread to do the coherent seqno dance after the
-	 * interrupt and then signal the waitqueue (via the dma-buf/fence).
-	 */
-	tsk = kthread_run(intel_breadcrumbs_signaler, engine,
-			  "i915/signal:%d", engine->id);
-	if (IS_ERR(tsk))
-		return PTR_ERR(tsk);
-
-	b->signaler = tsk;
-
-	return 0;
-}
-
-static void cancel_fake_irq(struct intel_engine_cs *engine)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-	del_timer_sync(&b->fake_irq); /* may queue b->hangcheck */
-	del_timer_sync(&b->hangcheck);
-	clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
-}
-
-void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	unsigned long flags;
-
-	spin_lock_irqsave(&b->irq_lock, flags);
+	spin_lock(&b->irq_lock);
+	if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
+		struct intel_context *ce = rq->hw_context;
 
-	/*
-	 * Leave the fake_irq timer enabled (if it is running), but clear the
-	 * bit so that it turns itself off on its next wake up and goes back
-	 * to the long hangcheck interval if still required.
-	 */
-	clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
+		list_del(&rq->signal_link);
+		if (list_empty(&ce->signals))
+			list_del_init(&ce->signal_link);
 
-	if (b->irq_enabled)
-		irq_enable(engine);
-	else
-		irq_disable(engine);
-
-	spin_unlock_irqrestore(&b->irq_lock, flags);
+		clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+	}
+	spin_unlock(&b->irq_lock);
 }
 
-void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
+void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
+				    struct drm_printer *p)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	struct intel_context *ce;
+	struct i915_request *rq;
 
-	/* The engines should be idle and all requests accounted for! */
-	WARN_ON(READ_ONCE(b->irq_wait));
-	WARN_ON(!RB_EMPTY_ROOT(&b->waiters));
-	WARN_ON(!list_empty(&b->signals));
+	if (list_empty(&b->signalers))
+		return;
 
-	if (!IS_ERR_OR_NULL(b->signaler))
-		kthread_stop(b->signaler);
+	drm_printf(p, "Signals:\n");
 
-	cancel_fake_irq(engine);
+	spin_lock_irq(&b->irq_lock);
+	list_for_each_entry(ce, &b->signalers, signal_link) {
+		list_for_each_entry(rq, &ce->signals, signal_link) {
+			drm_printf(p, "\t[%llx:%llx%s] @ %dms\n",
+				   rq->fence.context, rq->fence.seqno,
+				   i915_request_completed(rq) ? "!" :
+				   i915_request_started(rq) ? "*" :
+				   "",
+				   jiffies_to_msecs(jiffies - rq->emitted_jiffies));
+		}
+	}
+	spin_unlock_irq(&b->irq_lock);
 }
-
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftests/intel_breadcrumbs.c"
-#endif
diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c
index bc7589656a8f..4b0044cdcf1a 100644
--- a/drivers/gpu/drm/i915/intel_color.c
+++ b/drivers/gpu/drm/i915/intel_color.c
@@ -605,48 +605,48 @@ void intel_color_load_luts(struct intel_crtc_state *crtc_state)
 	dev_priv->display.load_luts(crtc_state);
 }
 
+static int check_lut_size(const struct drm_property_blob *lut, int expected)
+{
+	int len;
+
+	if (!lut)
+		return 0;
+
+	len = drm_color_lut_size(lut);
+	if (len != expected) {
+		DRM_DEBUG_KMS("Invalid LUT size; got %d, expected %d\n",
+			      len, expected);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 int intel_color_check(struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
-	size_t gamma_length, degamma_length;
-	uint32_t tests = DRM_COLOR_LUT_NON_DECREASING;
+	int gamma_length, degamma_length;
+	u32 gamma_tests, degamma_tests;
 
 	degamma_length = INTEL_INFO(dev_priv)->color.degamma_lut_size;
 	gamma_length = INTEL_INFO(dev_priv)->color.gamma_lut_size;
+	degamma_tests = INTEL_INFO(dev_priv)->color.degamma_lut_tests;
+	gamma_tests = INTEL_INFO(dev_priv)->color.gamma_lut_tests;
 
-	/*
-	 * All of our platforms mandate that the degamma curve be
-	 * non-decreasing.  Additionally, GLK and gen11 only accept a single
-	 * value for red, green, and blue in the degamma table.  Make sure
-	 * userspace didn't try to pass us something we can't handle.
-	 *
-	 * We don't have any extra hardware constraints on the gamma table,
-	 * so no need to explicitly check it.
-	 */
-	if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10)
-		tests |= DRM_COLOR_LUT_EQUAL_CHANNELS;
+	/* Always allow legacy gamma LUT with no further checking. */
+	if (crtc_state_is_legacy_gamma(crtc_state))
+		return 0;
 
-	if (drm_color_lut_check(crtc_state->base.degamma_lut, tests) != 0)
+	if (check_lut_size(crtc_state->base.degamma_lut, degamma_length) ||
+	    check_lut_size(crtc_state->base.gamma_lut, gamma_length))
 		return -EINVAL;
 
-	/*
-	 * We allow both degamma & gamma luts at the right size or
-	 * NULL.
-	 */
-	if ((!crtc_state->base.degamma_lut ||
-	     drm_color_lut_size(crtc_state->base.degamma_lut) == degamma_length) &&
-	    (!crtc_state->base.gamma_lut ||
-	     drm_color_lut_size(crtc_state->base.gamma_lut) == gamma_length))
-		return 0;
+	if (drm_color_lut_check(crtc_state->base.degamma_lut, degamma_tests) ||
+	    drm_color_lut_check(crtc_state->base.gamma_lut, gamma_tests))
+		return -EINVAL;
 
-	/*
-	 * We also allow no degamma lut/ctm and a gamma lut at the legacy
-	 * size (256 entries).
-	 */
-	if (crtc_state_is_legacy_gamma(crtc_state))
-		return 0;
 
-	return -EINVAL;
+	return 0;
 }
 
 void intel_color_init(struct intel_crtc *crtc)
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index acd94354afc8..ca705546a0ab 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -995,7 +995,7 @@ static u32 hsw_pll_to_ddi_pll_sel(const struct intel_shared_dpll *pll)
 	}
 }
 
-static u32 icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder,
+static u32 icl_pll_to_ddi_clk_sel(struct intel_encoder *encoder,
 				  const struct intel_crtc_state *crtc_state)
 {
 	const struct intel_shared_dpll *pll = crtc_state->shared_dpll;
@@ -1004,10 +1004,11 @@ static u32 icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder,
 
 	switch (id) {
 	default:
+		/*
+		 * DPLL_ID_ICL_DPLL0 and DPLL_ID_ICL_DPLL1 should not be used
+		 * here, so do warn if this get passed in
+		 */
 		MISSING_CASE(id);
-		/* fall through */
-	case DPLL_ID_ICL_DPLL0:
-	case DPLL_ID_ICL_DPLL1:
 		return DDI_CLK_SEL_NONE;
 	case DPLL_ID_ICL_TBTPLL:
 		switch (clock) {
@@ -1021,7 +1022,7 @@ static u32 icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder,
 			return DDI_CLK_SEL_TBT_810;
 		default:
 			MISSING_CASE(clock);
-			break;
+			return DDI_CLK_SEL_NONE;
 		}
 	case DPLL_ID_ICL_MGPLL1:
 	case DPLL_ID_ICL_MGPLL2:
@@ -1391,16 +1392,17 @@ static int icl_calc_tbt_pll_link(struct drm_i915_private *dev_priv,
 static int icl_calc_mg_pll_link(struct drm_i915_private *dev_priv,
 				enum port port)
 {
+	enum tc_port tc_port = intel_port_to_tc(dev_priv, port);
 	u32 mg_pll_div0, mg_clktop_hsclkctl;
 	u32 m1, m2_int, m2_frac, div1, div2, refclk;
 	u64 tmp;
 
 	refclk = dev_priv->cdclk.hw.ref;
 
-	mg_pll_div0 = I915_READ(MG_PLL_DIV0(port));
-	mg_clktop_hsclkctl = I915_READ(MG_CLKTOP2_HSCLKCTL(port));
+	mg_pll_div0 = I915_READ(MG_PLL_DIV0(tc_port));
+	mg_clktop_hsclkctl = I915_READ(MG_CLKTOP2_HSCLKCTL(tc_port));
 
-	m1 = I915_READ(MG_PLL_DIV1(port)) & MG_PLL_DIV1_FBPREDIV_MASK;
+	m1 = I915_READ(MG_PLL_DIV1(tc_port)) & MG_PLL_DIV1_FBPREDIV_MASK;
 	m2_int = mg_pll_div0 & MG_PLL_DIV0_FBDIV_INT_MASK;
 	m2_frac = (mg_pll_div0 & MG_PLL_DIV0_FRACNEN_H) ?
 		  (mg_pll_div0 & MG_PLL_DIV0_FBDIV_FRAC_MASK) >>
@@ -2868,7 +2870,7 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder,
 	if (IS_ICELAKE(dev_priv)) {
 		if (!intel_port_is_combophy(dev_priv, port))
 			I915_WRITE(DDI_CLK_SEL(port),
-				   icl_pll_to_ddi_pll_sel(encoder, crtc_state));
+				   icl_pll_to_ddi_clk_sel(encoder, crtc_state));
 	} else if (IS_CANNONLAKE(dev_priv)) {
 		/* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */
 		val = I915_READ(DPCLKA_CFGCR0);
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 957c6527f76b..7bf09cef591a 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -189,6 +189,8 @@ struct intel_device_info {
 	struct color_luts {
 		u16 degamma_lut_size;
 		u16 gamma_lut_size;
+		u32 degamma_lut_tests;
+		u32 gamma_lut_tests;
 	} color;
 };
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index f6f0d78436e3..df7a7a310f2f 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1758,6 +1758,35 @@ enum pipe intel_crtc_pch_transcoder(struct intel_crtc *crtc)
 		return crtc->pipe;
 }
 
+static u32 intel_crtc_max_vblank_count(const struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+
+	/*
+	 * On i965gm the hardware frame counter reads
+	 * zero when the TV encoder is enabled :(
+	 */
+	if (IS_I965GM(dev_priv) &&
+	    (crtc_state->output_types & BIT(INTEL_OUTPUT_TVOUT)))
+		return 0;
+
+	if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv))
+		return 0xffffffff; /* full 32 bit counter */
+	else if (INTEL_GEN(dev_priv) >= 3)
+		return 0xffffff; /* only 24 bits of frame count */
+	else
+		return 0; /* Gen2 doesn't have a hardware frame counter */
+}
+
+static void intel_crtc_vblank_on(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+
+	drm_crtc_set_max_vblank_count(&crtc->base,
+				      intel_crtc_max_vblank_count(crtc_state));
+	drm_crtc_vblank_on(&crtc->base);
+}
+
 static void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
@@ -1810,7 +1839,7 @@ static void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state)
 	 * when it's derived from the timestamps. So let's wait for the
 	 * pipe to start properly before we call drm_crtc_vblank_on()
 	 */
-	if (dev_priv->drm.max_vblank_count == 0)
+	if (intel_crtc_max_vblank_count(new_crtc_state) == 0)
 		intel_wait_for_pipe_scanline_moving(crtc);
 }
 
@@ -3901,6 +3930,16 @@ static void intel_update_pipe_config(const struct intel_crtc_state *old_crtc_sta
 		else if (old_crtc_state->pch_pfit.enabled)
 			ironlake_pfit_disable(old_crtc_state);
 	}
+
+	/*
+	 * We don't (yet) allow userspace to control the pipe background color,
+	 * so force it to black, but apply pipe gamma and CSC so that its
+	 * handling will match how we program our planes.
+	 */
+	if (INTEL_GEN(dev_priv) >= 9)
+		I915_WRITE(SKL_BOTTOM_COLOR(crtc->pipe),
+			   SKL_BOTTOM_COLOR_GAMMA_ENABLE |
+			   SKL_BOTTOM_COLOR_CSC_ENABLE);
 }
 
 static void intel_fdi_normal_train(struct intel_crtc *crtc)
@@ -5678,7 +5717,7 @@ static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config,
 		ironlake_pch_enable(old_intel_state, pipe_config);
 
 	assert_vblank_disabled(crtc);
-	drm_crtc_vblank_on(crtc);
+	intel_crtc_vblank_on(pipe_config);
 
 	intel_encoders_enable(crtc, pipe_config, old_state);
 
@@ -5832,7 +5871,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config,
 		intel_ddi_set_vc_payload_alloc(pipe_config, true);
 
 	assert_vblank_disabled(crtc);
-	drm_crtc_vblank_on(crtc);
+	intel_crtc_vblank_on(pipe_config);
 
 	intel_encoders_enable(crtc, pipe_config, old_state);
 
@@ -6171,7 +6210,7 @@ static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config,
 	intel_enable_pipe(pipe_config);
 
 	assert_vblank_disabled(crtc);
-	drm_crtc_vblank_on(crtc);
+	intel_crtc_vblank_on(pipe_config);
 
 	intel_encoders_enable(crtc, pipe_config, old_state);
 }
@@ -6230,7 +6269,7 @@ static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config,
 	intel_enable_pipe(pipe_config);
 
 	assert_vblank_disabled(crtc);
-	drm_crtc_vblank_on(crtc);
+	intel_crtc_vblank_on(pipe_config);
 
 	intel_encoders_enable(crtc, pipe_config, old_state);
 }
@@ -9416,7 +9455,7 @@ static void icelake_get_ddi_pll(struct drm_i915_private *dev_priv,
 		if (WARN_ON(!intel_dpll_is_combophy(id)))
 			return;
 	} else if (intel_port_is_tc(dev_priv, port)) {
-		id = icl_port_to_mg_pll_id(port);
+		id = icl_tc_port_to_pll_id(intel_port_to_tc(dev_priv, port));
 	} else {
 		WARN(1, "Invalid port %x\n", port);
 		return;
@@ -11690,6 +11729,23 @@ pipe_config_err(bool adjust, const char *name, const char *format, ...)
 	va_end(args);
 }
 
+static bool fastboot_enabled(struct drm_i915_private *dev_priv)
+{
+	if (i915_modparams.fastboot != -1)
+		return i915_modparams.fastboot;
+
+	/* Enable fastboot by default on Skylake and newer */
+	if (INTEL_GEN(dev_priv) >= 9)
+		return true;
+
+	/* Enable fastboot by default on VLV and CHV */
+	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+		return true;
+
+	/* Disabled by default on all others */
+	return false;
+}
+
 static bool
 intel_pipe_config_compare(struct drm_i915_private *dev_priv,
 			  struct intel_crtc_state *current_config,
@@ -11701,7 +11757,7 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv,
 		(current_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED) &&
 		!(pipe_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED);
 
-	if (fixup_inherited && !i915_modparams.fastboot) {
+	if (fixup_inherited && !fastboot_enabled(dev_priv)) {
 		DRM_DEBUG_KMS("initial modeset and fastboot not set\n");
 		ret = false;
 	}
@@ -12778,8 +12834,9 @@ static int intel_atomic_prepare_commit(struct drm_device *dev,
 u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc)
 {
 	struct drm_device *dev = crtc->base.dev;
+	struct drm_vblank_crtc *vblank = &dev->vblank[drm_crtc_index(&crtc->base)];
 
-	if (!dev->max_vblank_count)
+	if (!vblank->max_vblank_count)
 		return (u32)drm_crtc_accurate_vblank_count(&crtc->base);
 
 	return dev->driver->get_vblank_counter(dev, crtc->pipe);
@@ -14327,8 +14384,10 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv)
 		/*
 		 * On some ICL SKUs port F is not present. No strap bits for
 		 * this, so rely on VBT.
+		 * Work around broken VBTs on SKUs known to have no port F.
 		 */
-		if (intel_bios_is_port_present(dev_priv, PORT_F))
+		if (IS_ICL_WITH_PORT_F(dev_priv) &&
+		    intel_bios_is_port_present(dev_priv, PORT_F))
 			intel_ddi_init(dev_priv, PORT_F);
 
 		icl_dsi_init(dev_priv);
@@ -14680,14 +14739,6 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 
 	drm_helper_mode_fill_fb_struct(&dev_priv->drm, fb, mode_cmd);
 
-	if (fb->format->format == DRM_FORMAT_NV12 &&
-	    (fb->width < SKL_MIN_YUV_420_SRC_W ||
-	     fb->height < SKL_MIN_YUV_420_SRC_H ||
-	     (fb->width % 4) != 0 || (fb->height % 4) != 0)) {
-		DRM_DEBUG_KMS("src dimensions not correct for NV12\n");
-		goto err;
-	}
-
 	for (i = 0; i < fb->format->num_planes; i++) {
 		u32 stride_alignment;
 
@@ -15457,6 +15508,15 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc,
 			    plane->base.type != DRM_PLANE_TYPE_PRIMARY)
 				intel_plane_disable_noatomic(crtc, plane);
 		}
+
+		/*
+		 * Disable any background color set by the BIOS, but enable the
+		 * gamma and CSC to match how we program our planes.
+		 */
+		if (INTEL_GEN(dev_priv) >= 9)
+			I915_WRITE(SKL_BOTTOM_COLOR(crtc->pipe),
+				   SKL_BOTTOM_COLOR_GAMMA_ENABLE |
+				   SKL_BOTTOM_COLOR_CSC_ENABLE);
 	}
 
 	/* Adjust the state of the output pipe according to whether we
@@ -15493,16 +15553,45 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc,
 	}
 }
 
+static bool has_bogus_dpll_config(const struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+
+	/*
+	 * Some SNB BIOSen (eg. ASUS K53SV) are known to misprogram
+	 * the hardware when a high res displays plugged in. DPLL P
+	 * divider is zero, and the pipe timings are bonkers. We'll
+	 * try to disable everything in that case.
+	 *
+	 * FIXME would be nice to be able to sanitize this state
+	 * without several WARNs, but for now let's take the easy
+	 * road.
+	 */
+	return IS_GEN(dev_priv, 6) &&
+		crtc_state->base.active &&
+		crtc_state->shared_dpll &&
+		crtc_state->port_clock == 0;
+}
+
 static void intel_sanitize_encoder(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_connector *connector;
+	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
+	struct intel_crtc_state *crtc_state = crtc ?
+		to_intel_crtc_state(crtc->base.state) : NULL;
 
 	/* We need to check both for a crtc link (meaning that the
 	 * encoder is active and trying to read from a pipe) and the
 	 * pipe itself being active. */
-	bool has_active_crtc = encoder->base.crtc &&
-		to_intel_crtc(encoder->base.crtc)->active;
+	bool has_active_crtc = crtc_state &&
+		crtc_state->base.active;
+
+	if (crtc_state && has_bogus_dpll_config(crtc_state)) {
+		DRM_DEBUG_KMS("BIOS has misprogrammed the hardware. Disabling pipe %c\n",
+			      pipe_name(crtc->pipe));
+		has_active_crtc = false;
+	}
 
 	connector = intel_encoder_find_connector(encoder);
 	if (connector && !has_active_crtc) {
@@ -15513,16 +15602,25 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder)
 		/* Connector is active, but has no active pipe. This is
 		 * fallout from our resume register restoring. Disable
 		 * the encoder manually again. */
-		if (encoder->base.crtc) {
-			struct drm_crtc_state *crtc_state = encoder->base.crtc->state;
+		if (crtc_state) {
+			struct drm_encoder *best_encoder;
 
 			DRM_DEBUG_KMS("[ENCODER:%d:%s] manually disabled\n",
 				      encoder->base.base.id,
 				      encoder->base.name);
+
+			/* avoid oopsing in case the hooks consult best_encoder */
+			best_encoder = connector->base.state->best_encoder;
+			connector->base.state->best_encoder = &encoder->base;
+
 			if (encoder->disable)
-				encoder->disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state);
+				encoder->disable(encoder, crtc_state,
+						 connector->base.state);
 			if (encoder->post_disable)
-				encoder->post_disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state);
+				encoder->post_disable(encoder, crtc_state,
+						      connector->base.state);
+
+			connector->base.state->best_encoder = best_encoder;
 		}
 		encoder->base.crtc = NULL;
 
@@ -15894,10 +15992,12 @@ intel_modeset_setup_hw_state(struct drm_device *dev,
 	 * waits, so we need vblank interrupts restored beforehand.
 	 */
 	for_each_intel_crtc(&dev_priv->drm, crtc) {
+		crtc_state = to_intel_crtc_state(crtc->base.state);
+
 		drm_crtc_vblank_reset(&crtc->base);
 
-		if (crtc->base.state->active)
-			drm_crtc_vblank_on(&crtc->base);
+		if (crtc_state->base.active)
+			intel_crtc_vblank_on(crtc_state);
 	}
 
 	intel_sanitize_plane_mapping(dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c
index 606f54dde086..0a42d11c4c33 100644
--- a/drivers/gpu/drm/i915/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c
@@ -247,7 +247,7 @@ intel_find_shared_dpll(struct intel_crtc *crtc,
 		       enum intel_dpll_id range_max)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	struct intel_shared_dpll *pll;
+	struct intel_shared_dpll *pll, *unused_pll = NULL;
 	struct intel_shared_dpll_state *shared_dpll;
 	enum intel_dpll_id i;
 
@@ -257,8 +257,11 @@ intel_find_shared_dpll(struct intel_crtc *crtc,
 		pll = &dev_priv->shared_dplls[i];
 
 		/* Only want to check enabled timings first */
-		if (shared_dpll[i].crtc_mask == 0)
+		if (shared_dpll[i].crtc_mask == 0) {
+			if (!unused_pll)
+				unused_pll = pll;
 			continue;
+		}
 
 		if (memcmp(&crtc_state->dpll_hw_state,
 			   &shared_dpll[i].hw_state,
@@ -273,14 +276,11 @@ intel_find_shared_dpll(struct intel_crtc *crtc,
 	}
 
 	/* Ok no matching timings, maybe there's a free one? */
-	for (i = range_min; i <= range_max; i++) {
-		pll = &dev_priv->shared_dplls[i];
-		if (shared_dpll[i].crtc_mask == 0) {
-			DRM_DEBUG_KMS("[CRTC:%d:%s] allocated %s\n",
-				      crtc->base.base.id, crtc->base.name,
-				      pll->info->name);
-			return pll;
-		}
+	if (unused_pll) {
+		DRM_DEBUG_KMS("[CRTC:%d:%s] allocated %s\n",
+			      crtc->base.base.id, crtc->base.name,
+			      unused_pll->info->name);
+		return unused_pll;
 	}
 
 	return NULL;
@@ -2639,14 +2639,14 @@ int icl_calc_dp_combo_pll_link(struct drm_i915_private *dev_priv,
 	return link_clock;
 }
 
-static enum port icl_mg_pll_id_to_port(enum intel_dpll_id id)
+static enum tc_port icl_pll_id_to_tc_port(enum intel_dpll_id id)
 {
-	return id - DPLL_ID_ICL_MGPLL1 + PORT_C;
+	return id - DPLL_ID_ICL_MGPLL1;
 }
 
-enum intel_dpll_id icl_port_to_mg_pll_id(enum port port)
+enum intel_dpll_id icl_tc_port_to_pll_id(enum tc_port tc_port)
 {
-	return port - PORT_C + DPLL_ID_ICL_MGPLL1;
+	return tc_port + DPLL_ID_ICL_MGPLL1;
 }
 
 bool intel_dpll_is_combophy(enum intel_dpll_id id)
@@ -2925,7 +2925,10 @@ icl_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state,
 			ret = icl_calc_dpll_state(crtc_state, encoder, clock,
 						  &pll_state);
 		} else {
-			min = icl_port_to_mg_pll_id(port);
+			enum tc_port tc_port;
+
+			tc_port = intel_port_to_tc(dev_priv, port);
+			min = icl_tc_port_to_pll_id(tc_port);
 			max = min;
 			ret = icl_calc_mg_pll_state(crtc_state, encoder, clock,
 						    &pll_state);
@@ -2959,12 +2962,8 @@ static i915_reg_t icl_pll_id_to_enable_reg(enum intel_dpll_id id)
 		return CNL_DPLL_ENABLE(id);
 	else if (id == DPLL_ID_ICL_TBTPLL)
 		return TBT_PLL_ENABLE;
-	else
-		/*
-		 * TODO: Make MG_PLL macros use
-		 * tc port id instead of port id
-		 */
-		return MG_PLL_ENABLE(icl_mg_pll_id_to_port(id));
+
+	return MG_PLL_ENABLE(icl_pll_id_to_tc_port(id));
 }
 
 static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv,
@@ -2974,7 +2973,6 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv,
 	const enum intel_dpll_id id = pll->info->id;
 	intel_wakeref_t wakeref;
 	bool ret = false;
-	enum port port;
 	u32 val;
 
 	wakeref = intel_display_power_get_if_enabled(dev_priv,
@@ -2991,32 +2989,33 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv,
 		hw_state->cfgcr0 = I915_READ(ICL_DPLL_CFGCR0(id));
 		hw_state->cfgcr1 = I915_READ(ICL_DPLL_CFGCR1(id));
 	} else {
-		port = icl_mg_pll_id_to_port(id);
-		hw_state->mg_refclkin_ctl = I915_READ(MG_REFCLKIN_CTL(port));
+		enum tc_port tc_port = icl_pll_id_to_tc_port(id);
+
+		hw_state->mg_refclkin_ctl = I915_READ(MG_REFCLKIN_CTL(tc_port));
 		hw_state->mg_refclkin_ctl &= MG_REFCLKIN_CTL_OD_2_MUX_MASK;
 
 		hw_state->mg_clktop2_coreclkctl1 =
-			I915_READ(MG_CLKTOP2_CORECLKCTL1(port));
+			I915_READ(MG_CLKTOP2_CORECLKCTL1(tc_port));
 		hw_state->mg_clktop2_coreclkctl1 &=
 			MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK;
 
 		hw_state->mg_clktop2_hsclkctl =
-			I915_READ(MG_CLKTOP2_HSCLKCTL(port));
+			I915_READ(MG_CLKTOP2_HSCLKCTL(tc_port));
 		hw_state->mg_clktop2_hsclkctl &=
 			MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK |
 			MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK |
 			MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK |
 			MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK;
 
-		hw_state->mg_pll_div0 = I915_READ(MG_PLL_DIV0(port));
-		hw_state->mg_pll_div1 = I915_READ(MG_PLL_DIV1(port));
-		hw_state->mg_pll_lf = I915_READ(MG_PLL_LF(port));
-		hw_state->mg_pll_frac_lock = I915_READ(MG_PLL_FRAC_LOCK(port));
-		hw_state->mg_pll_ssc = I915_READ(MG_PLL_SSC(port));
+		hw_state->mg_pll_div0 = I915_READ(MG_PLL_DIV0(tc_port));
+		hw_state->mg_pll_div1 = I915_READ(MG_PLL_DIV1(tc_port));
+		hw_state->mg_pll_lf = I915_READ(MG_PLL_LF(tc_port));
+		hw_state->mg_pll_frac_lock = I915_READ(MG_PLL_FRAC_LOCK(tc_port));
+		hw_state->mg_pll_ssc = I915_READ(MG_PLL_SSC(tc_port));
 
-		hw_state->mg_pll_bias = I915_READ(MG_PLL_BIAS(port));
+		hw_state->mg_pll_bias = I915_READ(MG_PLL_BIAS(tc_port));
 		hw_state->mg_pll_tdc_coldst_bias =
-			I915_READ(MG_PLL_TDC_COLDST_BIAS(port));
+			I915_READ(MG_PLL_TDC_COLDST_BIAS(tc_port));
 
 		if (dev_priv->cdclk.hw.ref == 38400) {
 			hw_state->mg_pll_tdc_coldst_bias_mask = MG_PLL_TDC_COLDST_COLDSTART;
@@ -3051,7 +3050,7 @@ static void icl_mg_pll_write(struct drm_i915_private *dev_priv,
 			     struct intel_shared_dpll *pll)
 {
 	struct intel_dpll_hw_state *hw_state = &pll->state.hw_state;
-	enum port port = icl_mg_pll_id_to_port(pll->info->id);
+	enum tc_port tc_port = icl_pll_id_to_tc_port(pll->info->id);
 	u32 val;
 
 	/*
@@ -3060,41 +3059,41 @@ static void icl_mg_pll_write(struct drm_i915_private *dev_priv,
 	 * during the calc/readout phase if the mask depends on some other HW
 	 * state like refclk, see icl_calc_mg_pll_state().
 	 */
-	val = I915_READ(MG_REFCLKIN_CTL(port));
+	val = I915_READ(MG_REFCLKIN_CTL(tc_port));
 	val &= ~MG_REFCLKIN_CTL_OD_2_MUX_MASK;
 	val |= hw_state->mg_refclkin_ctl;
-	I915_WRITE(MG_REFCLKIN_CTL(port), val);
+	I915_WRITE(MG_REFCLKIN_CTL(tc_port), val);
 
-	val = I915_READ(MG_CLKTOP2_CORECLKCTL1(port));
+	val = I915_READ(MG_CLKTOP2_CORECLKCTL1(tc_port));
 	val &= ~MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK;
 	val |= hw_state->mg_clktop2_coreclkctl1;
-	I915_WRITE(MG_CLKTOP2_CORECLKCTL1(port), val);
+	I915_WRITE(MG_CLKTOP2_CORECLKCTL1(tc_port), val);
 
-	val = I915_READ(MG_CLKTOP2_HSCLKCTL(port));
+	val = I915_READ(MG_CLKTOP2_HSCLKCTL(tc_port));
 	val &= ~(MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK |
 		 MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK |
 		 MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK |
 		 MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK);
 	val |= hw_state->mg_clktop2_hsclkctl;
-	I915_WRITE(MG_CLKTOP2_HSCLKCTL(port), val);
+	I915_WRITE(MG_CLKTOP2_HSCLKCTL(tc_port), val);
 
-	I915_WRITE(MG_PLL_DIV0(port), hw_state->mg_pll_div0);
-	I915_WRITE(MG_PLL_DIV1(port), hw_state->mg_pll_div1);
-	I915_WRITE(MG_PLL_LF(port), hw_state->mg_pll_lf);
-	I915_WRITE(MG_PLL_FRAC_LOCK(port), hw_state->mg_pll_frac_lock);
-	I915_WRITE(MG_PLL_SSC(port), hw_state->mg_pll_ssc);
+	I915_WRITE(MG_PLL_DIV0(tc_port), hw_state->mg_pll_div0);
+	I915_WRITE(MG_PLL_DIV1(tc_port), hw_state->mg_pll_div1);
+	I915_WRITE(MG_PLL_LF(tc_port), hw_state->mg_pll_lf);
+	I915_WRITE(MG_PLL_FRAC_LOCK(tc_port), hw_state->mg_pll_frac_lock);
+	I915_WRITE(MG_PLL_SSC(tc_port), hw_state->mg_pll_ssc);
 
-	val = I915_READ(MG_PLL_BIAS(port));
+	val = I915_READ(MG_PLL_BIAS(tc_port));
 	val &= ~hw_state->mg_pll_bias_mask;
 	val |= hw_state->mg_pll_bias;
-	I915_WRITE(MG_PLL_BIAS(port), val);
+	I915_WRITE(MG_PLL_BIAS(tc_port), val);
 
-	val = I915_READ(MG_PLL_TDC_COLDST_BIAS(port));
+	val = I915_READ(MG_PLL_TDC_COLDST_BIAS(tc_port));
 	val &= ~hw_state->mg_pll_tdc_coldst_bias_mask;
 	val |= hw_state->mg_pll_tdc_coldst_bias;
-	I915_WRITE(MG_PLL_TDC_COLDST_BIAS(port), val);
+	I915_WRITE(MG_PLL_TDC_COLDST_BIAS(tc_port), val);
 
-	POSTING_READ(MG_PLL_TDC_COLDST_BIAS(port));
+	POSTING_READ(MG_PLL_TDC_COLDST_BIAS(tc_port));
 }
 
 static void icl_pll_enable(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.h b/drivers/gpu/drm/i915/intel_dpll_mgr.h
index e96e79413b54..40e8391a92f2 100644
--- a/drivers/gpu/drm/i915/intel_dpll_mgr.h
+++ b/drivers/gpu/drm/i915/intel_dpll_mgr.h
@@ -344,7 +344,7 @@ void intel_dpll_dump_hw_state(struct drm_i915_private *dev_priv,
 int icl_calc_dp_combo_pll_link(struct drm_i915_private *dev_priv,
 			       u32 pll_id);
 int cnl_hdmi_pll_ref_clock(struct drm_i915_private *dev_priv);
-enum intel_dpll_id icl_port_to_mg_pll_id(enum port port);
+enum intel_dpll_id icl_tc_port_to_pll_id(enum tc_port tc_port);
 bool intel_dpll_is_combophy(enum intel_dpll_id id);
 
 #endif /* _INTEL_DPLL_MGR_H_ */
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 85b913ea6e80..90ba5436370e 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -630,9 +630,11 @@ struct intel_crtc_scaler_state {
 };
 
 /* drm_mode->private_flags */
-#define I915_MODE_FLAG_INHERITED 1
+#define I915_MODE_FLAG_INHERITED (1<<0)
 /* Flag to get scanline using frame time stamps */
 #define I915_MODE_FLAG_GET_SCANLINE_FROM_TIMESTAMP (1<<1)
+/* Flag to use the scanline counter instead of the pixel counter */
+#define I915_MODE_FLAG_USE_SCANLINE_COUNTER (1<<2)
 
 struct intel_pipe_wm {
 	struct intel_wm_level wm[5];
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 2f3c71f6d313..71c01eb13af1 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -458,12 +458,6 @@ cleanup:
 void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno)
 {
 	intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
-
-	/* After manually advancing the seqno, fake the interrupt in case
-	 * there are any waiters for that seqno.
-	 */
-	intel_engine_wakeup(engine);
-
 	GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno);
 }
 
@@ -480,53 +474,67 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
 	GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists)));
 	GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
 
-	execlists->queue_priority = INT_MIN;
+	execlists->queue_priority_hint = INT_MIN;
 	execlists->queue = RB_ROOT_CACHED;
 }
 
-/**
- * intel_engines_setup_common - setup engine state not requiring hw access
- * @engine: Engine to setup.
- *
- * Initializes @engine@ structure members shared between legacy and execlists
- * submission modes which do not require hardware access.
- *
- * Typically done early in the submission mode specific engine setup stage.
- */
-void intel_engine_setup_common(struct intel_engine_cs *engine)
-{
-	i915_timeline_init(engine->i915, &engine->timeline, engine->name);
-	i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
-
-	intel_engine_init_execlist(engine);
-	intel_engine_init_hangcheck(engine);
-	intel_engine_init_batch_pool(engine);
-	intel_engine_init_cmd_parser(engine);
-}
-
 static void cleanup_status_page(struct intel_engine_cs *engine)
 {
+	struct i915_vma *vma;
+
 	/* Prevent writes into HWSP after returning the page to the system */
 	intel_engine_set_hwsp_writemask(engine, ~0u);
 
-	if (HWS_NEEDS_PHYSICAL(engine->i915)) {
-		void *addr = fetch_and_zero(&engine->status_page.page_addr);
+	vma = fetch_and_zero(&engine->status_page.vma);
+	if (!vma)
+		return;
 
-		__free_page(virt_to_page(addr));
-	}
+	if (!HWS_NEEDS_PHYSICAL(engine->i915))
+		i915_vma_unpin(vma);
+
+	i915_gem_object_unpin_map(vma->obj);
+	__i915_gem_object_release_unless_active(vma->obj);
+}
+
+static int pin_ggtt_status_page(struct intel_engine_cs *engine,
+				struct i915_vma *vma)
+{
+	unsigned int flags;
+
+	flags = PIN_GLOBAL;
+	if (!HAS_LLC(engine->i915))
+		/*
+		 * On g33, we cannot place HWS above 256MiB, so
+		 * restrict its pinning to the low mappable arena.
+		 * Though this restriction is not documented for
+		 * gen4, gen5, or byt, they also behave similarly
+		 * and hang if the HWS is placed at the top of the
+		 * GTT. To generalise, it appears that all !llc
+		 * platforms have issues with us placing the HWS
+		 * above the mappable region (even though we never
+		 * actually map it).
+		 */
+		flags |= PIN_MAPPABLE;
+	else
+		flags |= PIN_HIGH;
 
-	i915_vma_unpin_and_release(&engine->status_page.vma,
-				   I915_VMA_RELEASE_MAP);
+	return i915_vma_pin(vma, 0, 0, flags);
 }
 
 static int init_status_page(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
-	unsigned int flags;
 	void *vaddr;
 	int ret;
 
+	/*
+	 * Though the HWS register does support 36bit addresses, historically
+	 * we have had hangs and corruption reported due to wild writes if
+	 * the HWS is placed above 4G. We only allow objects to be allocated
+	 * in GFP_DMA32 for i965, and no earlier physical address users had
+	 * access to more than 4G.
+	 */
 	obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
 	if (IS_ERR(obj)) {
 		DRM_ERROR("Failed to allocate status page\n");
@@ -543,59 +551,67 @@ static int init_status_page(struct intel_engine_cs *engine)
 		goto err;
 	}
 
-	flags = PIN_GLOBAL;
-	if (!HAS_LLC(engine->i915))
-		/* On g33, we cannot place HWS above 256MiB, so
-		 * restrict its pinning to the low mappable arena.
-		 * Though this restriction is not documented for
-		 * gen4, gen5, or byt, they also behave similarly
-		 * and hang if the HWS is placed at the top of the
-		 * GTT. To generalise, it appears that all !llc
-		 * platforms have issues with us placing the HWS
-		 * above the mappable region (even though we never
-		 * actually map it).
-		 */
-		flags |= PIN_MAPPABLE;
-	else
-		flags |= PIN_HIGH;
-	ret = i915_vma_pin(vma, 0, 0, flags);
-	if (ret)
-		goto err;
-
 	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
 	if (IS_ERR(vaddr)) {
 		ret = PTR_ERR(vaddr);
-		goto err_unpin;
+		goto err;
 	}
 
+	engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE);
 	engine->status_page.vma = vma;
-	engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
-	engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE);
+
+	if (!HWS_NEEDS_PHYSICAL(engine->i915)) {
+		ret = pin_ggtt_status_page(engine, vma);
+		if (ret)
+			goto err_unpin;
+	}
+
 	return 0;
 
 err_unpin:
-	i915_vma_unpin(vma);
+	i915_gem_object_unpin_map(obj);
 err:
 	i915_gem_object_put(obj);
 	return ret;
 }
 
-static int init_phys_status_page(struct intel_engine_cs *engine)
+/**
+ * intel_engines_setup_common - setup engine state not requiring hw access
+ * @engine: Engine to setup.
+ *
+ * Initializes @engine@ structure members shared between legacy and execlists
+ * submission modes which do not require hardware access.
+ *
+ * Typically done early in the submission mode specific engine setup stage.
+ */
+int intel_engine_setup_common(struct intel_engine_cs *engine)
 {
-	struct page *page;
+	int err;
 
-	/*
-	 * Though the HWS register does support 36bit addresses, historically
-	 * we have had hangs and corruption reported due to wild writes if
-	 * the HWS is placed above 4G.
-	 */
-	page = alloc_page(GFP_KERNEL | __GFP_DMA32 | __GFP_ZERO);
-	if (!page)
-		return -ENOMEM;
+	err = init_status_page(engine);
+	if (err)
+		return err;
+
+	err = i915_timeline_init(engine->i915,
+				 &engine->timeline,
+				 engine->name,
+				 engine->status_page.vma);
+	if (err)
+		goto err_hwsp;
+
+	i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
 
-	engine->status_page.page_addr = page_address(page);
+	intel_engine_init_breadcrumbs(engine);
+	intel_engine_init_execlist(engine);
+	intel_engine_init_hangcheck(engine);
+	intel_engine_init_batch_pool(engine);
+	intel_engine_init_cmd_parser(engine);
 
 	return 0;
+
+err_hwsp:
+	cleanup_status_page(engine);
+	return err;
 }
 
 static void __intel_context_unpin(struct i915_gem_context *ctx,
@@ -604,6 +620,56 @@ static void __intel_context_unpin(struct i915_gem_context *ctx,
 	intel_context_unpin(to_intel_context(ctx, engine));
 }
 
+struct measure_breadcrumb {
+	struct i915_request rq;
+	struct i915_timeline timeline;
+	struct intel_ring ring;
+	u32 cs[1024];
+};
+
+static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
+{
+	struct measure_breadcrumb *frame;
+	int dw = -ENOMEM;
+
+	GEM_BUG_ON(!engine->i915->gt.scratch);
+
+	frame = kzalloc(sizeof(*frame), GFP_KERNEL);
+	if (!frame)
+		return -ENOMEM;
+
+	if (i915_timeline_init(engine->i915,
+			       &frame->timeline, "measure",
+			       engine->status_page.vma))
+		goto out_frame;
+
+	INIT_LIST_HEAD(&frame->ring.request_list);
+	frame->ring.timeline = &frame->timeline;
+	frame->ring.vaddr = frame->cs;
+	frame->ring.size = sizeof(frame->cs);
+	frame->ring.effective_size = frame->ring.size;
+	intel_ring_update_space(&frame->ring);
+
+	frame->rq.i915 = engine->i915;
+	frame->rq.engine = engine;
+	frame->rq.ring = &frame->ring;
+	frame->rq.timeline = &frame->timeline;
+
+	dw = i915_timeline_pin(&frame->timeline);
+	if (dw < 0)
+		goto out_timeline;
+
+	dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs;
+
+	i915_timeline_unpin(&frame->timeline);
+
+out_timeline:
+	i915_timeline_fini(&frame->timeline);
+out_frame:
+	kfree(frame);
+	return dw;
+}
+
 /**
  * intel_engines_init_common - initialize cengine state which might require hw access
  * @engine: Engine to initialize.
@@ -646,21 +712,14 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 		}
 	}
 
-	ret = intel_engine_init_breadcrumbs(engine);
-	if (ret)
+	ret = measure_breadcrumb_dw(engine);
+	if (ret < 0)
 		goto err_unpin_preempt;
 
-	if (HWS_NEEDS_PHYSICAL(i915))
-		ret = init_phys_status_page(engine);
-	else
-		ret = init_status_page(engine);
-	if (ret)
-		goto err_breadcrumbs;
+	engine->emit_fini_breadcrumb_dw = ret;
 
 	return 0;
 
-err_breadcrumbs:
-	intel_engine_fini_breadcrumbs(engine);
 err_unpin_preempt:
 	if (i915->preempt_context)
 		__intel_context_unpin(i915->preempt_context, engine);
@@ -1071,10 +1130,8 @@ void intel_engines_sanitize(struct drm_i915_private *i915, bool force)
 	if (!reset_engines(i915) && !force)
 		return;
 
-	for_each_engine(engine, i915, id) {
-		if (engine->reset.reset)
-			engine->reset.reset(engine, NULL);
-	}
+	for_each_engine(engine, i915, id)
+		intel_engine_reset(engine, false);
 }
 
 /**
@@ -1110,7 +1167,7 @@ void intel_engines_park(struct drm_i915_private *i915)
 		}
 
 		/* Must be reset upon idling, or we may miss the busy wakeup. */
-		GEM_BUG_ON(engine->execlists.queue_priority != INT_MIN);
+		GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
 
 		if (engine->park)
 			engine->park(engine);
@@ -1226,10 +1283,14 @@ static void print_request(struct drm_printer *m,
 
 	x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf));
 
-	drm_printf(m, "%s%x%s [%llx:%llx]%s @ %dms: %s\n",
+	drm_printf(m, "%s%x%s%s [%llx:%llx]%s @ %dms: %s\n",
 		   prefix,
 		   rq->global_seqno,
-		   i915_request_completed(rq) ? "!" : "",
+		   i915_request_completed(rq) ? "!" :
+		   i915_request_started(rq) ? "*" :
+		   "",
+		   test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+			    &rq->fence.flags) ?  "+" : "",
 		   rq->fence.context, rq->fence.seqno,
 		   buf,
 		   jiffies_to_msecs(jiffies - rq->emitted_jiffies),
@@ -1320,7 +1381,8 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
 	}
 
 	if (HAS_EXECLISTS(dev_priv)) {
-		const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+		const u32 *hws =
+			&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
 		unsigned int idx;
 		u8 read, write;
 
@@ -1363,9 +1425,10 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
 				char hdr[80];
 
 				snprintf(hdr, sizeof(hdr),
-					 "\t\tELSP[%d] count=%d, ring->start=%08x, rq: ",
+					 "\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x}, rq: ",
 					 idx, count,
-					 i915_ggtt_offset(rq->ring->vma));
+					 i915_ggtt_offset(rq->ring->vma),
+					 rq->timeline->hwsp_offset);
 				print_request(m, rq, hdr);
 			} else {
 				drm_printf(m, "\t\tELSP[%d] idle\n", idx);
@@ -1420,12 +1483,9 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 		       struct drm_printer *m,
 		       const char *header, ...)
 {
-	struct intel_breadcrumbs * const b = &engine->breadcrumbs;
 	struct i915_gpu_error * const error = &engine->i915->gpu_error;
 	struct i915_request *rq;
 	intel_wakeref_t wakeref;
-	unsigned long flags;
-	struct rb_node *rb;
 
 	if (header) {
 		va_list ap;
@@ -1475,6 +1535,8 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 			   rq->ring->emit);
 		drm_printf(m, "\t\tring->space:  0x%08x\n",
 			   rq->ring->space);
+		drm_printf(m, "\t\tring->hwsp:   0x%08x\n",
+			   rq->timeline->hwsp_offset);
 
 		print_request_ring(m, rq);
 	}
@@ -1491,21 +1553,12 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 
 	intel_execlists_show_requests(engine, m, print_request, 8);
 
-	spin_lock_irqsave(&b->rb_lock, flags);
-	for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
-		struct intel_wait *w = rb_entry(rb, typeof(*w), node);
-
-		drm_printf(m, "\t%s [%d:%c] waiting for %x\n",
-			   w->tsk->comm, w->tsk->pid,
-			   task_state_to_char(w->tsk),
-			   w->seqno);
-	}
-	spin_unlock_irqrestore(&b->rb_lock, flags);
-
 	drm_printf(m, "HWSP:\n");
-	hexdump(m, engine->status_page.page_addr, PAGE_SIZE);
+	hexdump(m, engine->status_page.addr, PAGE_SIZE);
 
 	drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine)));
+
+	intel_engine_print_breadcrumbs(engine, m);
 }
 
 static u8 user_class_map[] = {
diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/intel_gpu_commands.h
index 105e2a9e874a..b96a31bc1080 100644
--- a/drivers/gpu/drm/i915/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/intel_gpu_commands.h
@@ -112,7 +112,6 @@
 #define   MI_MEM_VIRTUAL	(1 << 22) /* 945,g33,965 */
 #define   MI_USE_GGTT		(1 << 22) /* g4x+ */
 #define MI_STORE_DWORD_INDEX	MI_INSTR(0x21, 1)
-#define   MI_STORE_DWORD_INDEX_SHIFT 2
 /*
  * Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM:
  * - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index ab1c49b106f2..8bc8aa54aa35 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -81,6 +81,12 @@
  *
  */
 
+static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
+{
+	return (i915_ggtt_offset(engine->status_page.vma) +
+		I915_GEM_HWS_PREEMPT_ADDR);
+}
+
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 {
 	return rb_entry(rb, struct i915_priolist, node);
@@ -623,6 +629,8 @@ static void inject_preempt_context(struct work_struct *work)
 				       EXECLISTS_ACTIVE_PREEMPT);
 		tasklet_schedule(&engine->execlists.tasklet);
 	}
+
+	(void)I915_SELFTEST_ONLY(engine->execlists.preempt_hang.count++);
 }
 
 /*
@@ -666,7 +674,7 @@ static void complete_preempt_context(struct intel_engine_cs *engine)
 	execlists_unwind_incomplete_requests(execlists);
 
 	wait_for_guc_preempt_report(engine);
-	intel_write_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX, 0);
+	intel_write_status_page(engine, I915_GEM_HWS_PREEMPT, 0);
 }
 
 /**
@@ -731,7 +739,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 		if (intel_engine_has_preemption(engine)) {
 			struct guc_preempt_work *preempt_work =
 				&engine->i915->guc.preempt_work[engine->id];
-			int prio = execlists->queue_priority;
+			int prio = execlists->queue_priority_hint;
 
 			if (__execlists_need_preempt(prio, port_prio(port))) {
 				execlists_set_active(execlists,
@@ -777,7 +785,8 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 			kmem_cache_free(engine->i915->priorities, p);
 	}
 done:
-	execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
+	execlists->queue_priority_hint =
+		rb ? to_priolist(rb)->priority : INT_MIN;
 	if (submit)
 		port_assign(port, last);
 	if (last)
@@ -824,7 +833,7 @@ static void guc_submission_tasklet(unsigned long data)
 	}
 
 	if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) &&
-	    intel_read_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX) ==
+	    intel_read_status_page(engine, I915_GEM_HWS_PREEMPT) ==
 	    GUC_PREEMPT_FINISHED)
 		complete_preempt_context(engine);
 
@@ -834,8 +843,7 @@ static void guc_submission_tasklet(unsigned long data)
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
-static struct i915_request *
-guc_reset_prepare(struct intel_engine_cs *engine)
+static void guc_reset_prepare(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 
@@ -861,8 +869,6 @@ guc_reset_prepare(struct intel_engine_cs *engine)
 	 */
 	if (engine->i915->guc.preempt_wq)
 		flush_workqueue(engine->i915->guc.preempt_wq);
-
-	return i915_gem_find_active_request(engine);
 }
 
 /*
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index 741441daae32..a219c796e56d 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -25,6 +25,17 @@
 #include "i915_drv.h"
 #include "i915_reset.h"
 
+struct hangcheck {
+	u64 acthd;
+	u32 seqno;
+	enum intel_engine_hangcheck_action action;
+	unsigned long action_timestamp;
+	int deadlock;
+	struct intel_instdone instdone;
+	bool wedged:1;
+	bool stalled:1;
+};
+
 static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone)
 {
 	u32 tmp = current_instdone | *old_instdone;
@@ -119,25 +130,22 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
 }
 
 static void hangcheck_load_sample(struct intel_engine_cs *engine,
-				  struct intel_engine_hangcheck *hc)
+				  struct hangcheck *hc)
 {
 	hc->acthd = intel_engine_get_active_head(engine);
 	hc->seqno = intel_engine_get_seqno(engine);
 }
 
 static void hangcheck_store_sample(struct intel_engine_cs *engine,
-				   const struct intel_engine_hangcheck *hc)
+				   const struct hangcheck *hc)
 {
 	engine->hangcheck.acthd = hc->acthd;
 	engine->hangcheck.seqno = hc->seqno;
-	engine->hangcheck.action = hc->action;
-	engine->hangcheck.stalled = hc->stalled;
-	engine->hangcheck.wedged = hc->wedged;
 }
 
 static enum intel_engine_hangcheck_action
 hangcheck_get_action(struct intel_engine_cs *engine,
-		     const struct intel_engine_hangcheck *hc)
+		     const struct hangcheck *hc)
 {
 	if (engine->hangcheck.seqno != hc->seqno)
 		return ENGINE_ACTIVE_SEQNO;
@@ -149,7 +157,7 @@ hangcheck_get_action(struct intel_engine_cs *engine,
 }
 
 static void hangcheck_accumulate_sample(struct intel_engine_cs *engine,
-					struct intel_engine_hangcheck *hc)
+					struct hangcheck *hc)
 {
 	unsigned long timeout = I915_ENGINE_DEAD_TIMEOUT;
 
@@ -265,19 +273,21 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 	intel_uncore_arm_unclaimed_mmio_detection(dev_priv);
 
 	for_each_engine(engine, dev_priv, id) {
-		struct intel_engine_hangcheck hc;
+		struct hangcheck hc;
+
+		intel_engine_signal_breadcrumbs(engine);
 
 		hangcheck_load_sample(engine, &hc);
 		hangcheck_accumulate_sample(engine, &hc);
 		hangcheck_store_sample(engine, &hc);
 
-		if (engine->hangcheck.stalled) {
+		if (hc.stalled) {
 			hung |= intel_engine_flag(engine);
 			if (hc.action != ENGINE_DEAD)
 				stuck |= intel_engine_flag(engine);
 		}
 
-		if (engine->hangcheck.wedged)
+		if (hc.wedged)
 			wedged |= intel_engine_flag(engine);
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8aa8a4862543..a9eb0211ce77 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -136,6 +136,7 @@
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
+#include "i915_reset.h"
 #include "i915_vgpu.h"
 #include "intel_lrc_reg.h"
 #include "intel_mocs.h"
@@ -171,6 +172,12 @@ static void execlists_init_reg_state(u32 *reg_state,
 				     struct intel_engine_cs *engine,
 				     struct intel_ring *ring);
 
+static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
+{
+	return (i915_ggtt_offset(engine->status_page.vma) +
+		I915_GEM_HWS_INDEX_ADDR);
+}
+
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 {
 	return rb_entry(rb, struct i915_priolist, node);
@@ -181,13 +188,90 @@ static inline int rq_prio(const struct i915_request *rq)
 	return rq->sched.attr.priority;
 }
 
+static int queue_prio(const struct intel_engine_execlists *execlists)
+{
+	struct i915_priolist *p;
+	struct rb_node *rb;
+
+	rb = rb_first_cached(&execlists->queue);
+	if (!rb)
+		return INT_MIN;
+
+	/*
+	 * As the priolist[] are inverted, with the highest priority in [0],
+	 * we have to flip the index value to become priority.
+	 */
+	p = to_priolist(rb);
+	return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);
+}
+
 static inline bool need_preempt(const struct intel_engine_cs *engine,
-				const struct i915_request *last,
-				int prio)
+				const struct i915_request *rq)
 {
-	return (intel_engine_has_preemption(engine) &&
-		__execlists_need_preempt(prio, rq_prio(last)) &&
-		!i915_request_completed(last));
+	const int last_prio = rq_prio(rq);
+
+	if (!intel_engine_has_preemption(engine))
+		return false;
+
+	if (i915_request_completed(rq))
+		return false;
+
+	/*
+	 * Check if the current priority hint merits a preemption attempt.
+	 *
+	 * We record the highest value priority we saw during rescheduling
+	 * prior to this dequeue, therefore we know that if it is strictly
+	 * less than the current tail of ESLP[0], we do not need to force
+	 * a preempt-to-idle cycle.
+	 *
+	 * However, the priority hint is a mere hint that we may need to
+	 * preempt. If that hint is stale or we may be trying to preempt
+	 * ourselves, ignore the request.
+	 */
+	if (!__execlists_need_preempt(engine->execlists.queue_priority_hint,
+				      last_prio))
+		return false;
+
+	/*
+	 * Check against the first request in ELSP[1], it will, thanks to the
+	 * power of PI, be the highest priority of that context.
+	 */
+	if (!list_is_last(&rq->link, &engine->timeline.requests) &&
+	    rq_prio(list_next_entry(rq, link)) > last_prio)
+		return true;
+
+	/*
+	 * If the inflight context did not trigger the preemption, then maybe
+	 * it was the set of queued requests? Pick the highest priority in
+	 * the queue (the first active priolist) and see if it deserves to be
+	 * running instead of ELSP[0].
+	 *
+	 * The highest priority request in the queue can not be either
+	 * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
+	 * context, it's priority would not exceed ELSP[0] aka last_prio.
+	 */
+	return queue_prio(&engine->execlists) > last_prio;
+}
+
+__maybe_unused static inline bool
+assert_priority_queue(const struct intel_engine_execlists *execlists,
+		      const struct i915_request *prev,
+		      const struct i915_request *next)
+{
+	if (!prev)
+		return true;
+
+	/*
+	 * Without preemption, the prev may refer to the still active element
+	 * which we refuse to let go.
+	 *
+	 * Even with preemption, there are times when we think it is better not
+	 * to preempt and leave an ostensibly lower priority request in flight.
+	 */
+	if (port_request(execlists->port) == prev)
+		return true;
+
+	return rq_prio(prev) >= rq_prio(next);
 }
 
 /*
@@ -264,7 +348,8 @@ static void unwind_wa_tail(struct i915_request *rq)
 	assert_ring_tail_valid(rq->ring, rq->tail);
 }
 
-static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
+static struct i915_request *
+__unwind_incomplete_requests(struct intel_engine_cs *engine)
 {
 	struct i915_request *rq, *rn, *active = NULL;
 	struct list_head *uninitialized_var(pl);
@@ -306,6 +391,8 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 		list_move_tail(&active->sched.link,
 			       i915_sched_lookup_priolist(engine, prio));
 	}
+
+	return active;
 }
 
 void
@@ -436,11 +523,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 			desc = execlists_update_context(rq);
 			GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
 
-			GEM_TRACE("%s in[%d]:  ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n",
+			GEM_TRACE("%s in[%d]:  ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n",
 				  engine->name, n,
 				  port[n].context_id, count,
 				  rq->global_seqno,
 				  rq->fence.context, rq->fence.seqno,
+				  hwsp_seqno(rq),
 				  intel_engine_get_seqno(engine),
 				  rq_prio(rq));
 		} else {
@@ -512,6 +600,8 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
 
 	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
 	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
+
+	(void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
 }
 
 static void complete_preempt_context(struct intel_engine_execlists *execlists)
@@ -580,7 +670,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
 			return;
 
-		if (need_preempt(engine, last, execlists->queue_priority)) {
+		if (need_preempt(engine, last)) {
 			inject_preempt_context(engine);
 			return;
 		}
@@ -613,7 +703,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		 * WaIdleLiteRestore:bdw,skl
 		 * Apply the wa NOOPs to prevent
 		 * ring:HEAD == rq:TAIL as we resubmit the
-		 * request. See gen8_emit_breadcrumb() for
+		 * request. See gen8_emit_fini_breadcrumb() for
 		 * where we prepare the padding after the
 		 * end of the request.
 		 */
@@ -626,8 +716,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		int i;
 
 		priolist_for_each_request_consume(rq, rn, p, i) {
-			GEM_BUG_ON(last &&
-				   need_preempt(engine, last, rq_prio(rq)));
+			GEM_BUG_ON(!assert_priority_queue(execlists, last, rq));
 
 			/*
 			 * Can we combine this request with the current port?
@@ -688,20 +777,20 @@ done:
 	/*
 	 * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
 	 *
-	 * We choose queue_priority such that if we add a request of greater
+	 * We choose the priority hint such that if we add a request of greater
 	 * priority than this, we kick the submission tasklet to decide on
 	 * the right order of submitting the requests to hardware. We must
 	 * also be prepared to reorder requests as they are in-flight on the
-	 * HW. We derive the queue_priority then as the first "hole" in
+	 * HW. We derive the priority hint then as the first "hole" in
 	 * the HW submission ports and if there are no available slots,
 	 * the priority of the lowest executing request, i.e. last.
 	 *
 	 * When we do receive a higher priority request ready to run from the
-	 * user, see queue_request(), the queue_priority is bumped to that
+	 * user, see queue_request(), the priority hint is bumped to that
 	 * request triggering preemption on the next dequeue (or subsequent
 	 * interrupt for secondary ports).
 	 */
-	execlists->queue_priority =
+	execlists->queue_priority_hint =
 		port != execlists->port ? rq_prio(last) : INT_MIN;
 
 	if (submit) {
@@ -732,11 +821,12 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
 	while (num_ports-- && port_isset(port)) {
 		struct i915_request *rq = port_request(port);
 
-		GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d)\n",
+		GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d:%d)\n",
 			  rq->engine->name,
 			  (unsigned int)(port - execlists->port),
 			  rq->global_seqno,
 			  rq->fence.context, rq->fence.seqno,
+			  hwsp_seqno(rq),
 			  intel_engine_get_seqno(rq->engine));
 
 		GEM_BUG_ON(!execlists->active);
@@ -820,10 +910,10 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	list_for_each_entry(rq, &engine->timeline.requests, link) {
 		GEM_BUG_ON(!rq->global_seqno);
 
-		if (i915_request_signaled(rq))
-			continue;
+		if (!i915_request_signaled(rq))
+			dma_fence_set_error(&rq->fence, -EIO);
 
-		dma_fence_set_error(&rq->fence, -EIO);
+		i915_request_mark_complete(rq);
 	}
 
 	/* Flush the queued requests to the timeline list (for retiring). */
@@ -833,9 +923,9 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 
 		priolist_for_each_request_consume(rq, rn, p, i) {
 			list_del_init(&rq->sched.link);
-
-			dma_fence_set_error(&rq->fence, -EIO);
 			__i915_request_submit(rq);
+			dma_fence_set_error(&rq->fence, -EIO);
+			i915_request_mark_complete(rq);
 		}
 
 		rb_erase_cached(&p->node, &execlists->queue);
@@ -849,7 +939,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 
 	/* Remaining _unready_ requests will be nop'ed when submitted */
 
-	execlists->queue_priority = INT_MIN;
+	execlists->queue_priority_hint = INT_MIN;
 	execlists->queue = RB_ROOT_CACHED;
 	GEM_BUG_ON(port_isset(execlists->port));
 
@@ -872,6 +962,8 @@ static void process_csb(struct intel_engine_cs *engine)
 	const u32 * const buf = execlists->csb_status;
 	u8 head, tail;
 
+	lockdep_assert_held(&engine->timeline.lock);
+
 	/*
 	 * Note that csb_write, csb_status may be either in HWSP or mmio.
 	 * When reading from the csb_write mmio register, we have to be
@@ -960,12 +1052,13 @@ static void process_csb(struct intel_engine_cs *engine)
 						EXECLISTS_ACTIVE_USER));
 
 		rq = port_unpack(port, &count);
-		GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n",
+		GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n",
 			  engine->name,
 			  port->context_id, count,
 			  rq ? rq->global_seqno : 0,
 			  rq ? rq->fence.context : 0,
 			  rq ? rq->fence.seqno : 0,
+			  rq ? hwsp_seqno(rq) : 0,
 			  intel_engine_get_seqno(engine),
 			  rq ? rq_prio(rq) : 0);
 
@@ -1079,8 +1172,8 @@ static void __submit_queue_imm(struct intel_engine_cs *engine)
 
 static void submit_queue(struct intel_engine_cs *engine, int prio)
 {
-	if (prio > engine->execlists.queue_priority) {
-		engine->execlists.queue_priority = prio;
+	if (prio > engine->execlists.queue_priority_hint) {
+		engine->execlists.queue_priority_hint = prio;
 		__submit_queue_imm(engine);
 	}
 }
@@ -1173,6 +1266,24 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
 	return i915_vma_pin(vma, 0, 0, flags);
 }
 
+static u32 make_rpcs(struct drm_i915_private *dev_priv);
+
+static void
+__execlists_update_reg_state(struct intel_engine_cs *engine,
+			     struct intel_context *ce)
+{
+	u32 *regs = ce->lrc_reg_state;
+	struct intel_ring *ring = ce->ring;
+
+	regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(ring->vma);
+	regs[CTX_RING_HEAD + 1] = ring->head;
+	regs[CTX_RING_TAIL + 1] = ring->tail;
+
+	/* RPCS */
+	if (engine->class == RENDER_CLASS)
+		regs[CTX_R_PWR_CLK_STATE + 1] = make_rpcs(engine->i915);
+}
+
 static struct intel_context *
 __execlists_context_pin(struct intel_engine_cs *engine,
 			struct i915_gem_context *ctx,
@@ -1211,10 +1322,8 @@ __execlists_context_pin(struct intel_engine_cs *engine,
 	GEM_BUG_ON(!intel_ring_offset_valid(ce->ring, ce->ring->head));
 
 	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
-	ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
-		i915_ggtt_offset(ce->ring->vma);
-	ce->lrc_reg_state[CTX_RING_HEAD + 1] = ce->ring->head;
-	ce->lrc_reg_state[CTX_RING_TAIL + 1] = ce->ring->tail;
+
+	__execlists_update_reg_state(engine, ce);
 
 	ce->state->obj->pin_global++;
 	i915_gem_context_get(ctx);
@@ -1254,6 +1363,34 @@ execlists_context_pin(struct intel_engine_cs *engine,
 	return __execlists_context_pin(engine, ctx, ce);
 }
 
+static int gen8_emit_init_breadcrumb(struct i915_request *rq)
+{
+	u32 *cs;
+
+	GEM_BUG_ON(!rq->timeline->has_initial_breadcrumb);
+
+	cs = intel_ring_begin(rq, 6);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	/*
+	 * Check if we have been preempted before we even get started.
+	 *
+	 * After this point i915_request_started() reports true, even if
+	 * we get preempted and so are no longer running.
+	 */
+	*cs++ = MI_ARB_CHECK;
+	*cs++ = MI_NOOP;
+
+	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+	*cs++ = rq->timeline->hwsp_offset;
+	*cs++ = 0;
+	*cs++ = rq->fence.seqno - 1;
+
+	intel_ring_advance(rq, cs);
+	return 0;
+}
+
 static int emit_pdps(struct i915_request *rq)
 {
 	const struct intel_engine_cs * const engine = rq->engine;
@@ -1679,7 +1816,7 @@ static void enable_execlists(struct intel_engine_cs *engine)
 		   _MASKED_BIT_DISABLE(STOP_RING));
 
 	I915_WRITE(RING_HWS_PGA(engine->mmio_base),
-		   engine->status_page.ggtt_offset);
+		   i915_ggtt_offset(engine->status_page.vma));
 	POSTING_READ(RING_HWS_PGA(engine->mmio_base));
 }
 
@@ -1716,11 +1853,9 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 	return 0;
 }
 
-static struct i915_request *
-execlists_reset_prepare(struct intel_engine_cs *engine)
+static void execlists_reset_prepare(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct i915_request *request, *active;
 	unsigned long flags;
 
 	GEM_TRACE("%s: depth<-%d\n", engine->name,
@@ -1736,59 +1871,21 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
 	 * prevents the race.
 	 */
 	__tasklet_disable_sync_once(&execlists->tasklet);
+	GEM_BUG_ON(!reset_in_progress(execlists));
 
+	/* And flush any current direct submission. */
 	spin_lock_irqsave(&engine->timeline.lock, flags);
-
-	/*
-	 * We want to flush the pending context switches, having disabled
-	 * the tasklet above, we can assume exclusive access to the execlists.
-	 * For this allows us to catch up with an inflight preemption event,
-	 * and avoid blaming an innocent request if the stall was due to the
-	 * preemption itself.
-	 */
-	process_csb(engine);
-
-	/*
-	 * The last active request can then be no later than the last request
-	 * now in ELSP[0]. So search backwards from there, so that if the GPU
-	 * has advanced beyond the last CSB update, it will be pardoned.
-	 */
-	active = NULL;
-	request = port_request(execlists->port);
-	if (request) {
-		/*
-		 * Prevent the breadcrumb from advancing before we decide
-		 * which request is currently active.
-		 */
-		intel_engine_stop_cs(engine);
-
-		list_for_each_entry_from_reverse(request,
-						 &engine->timeline.requests,
-						 link) {
-			if (__i915_request_completed(request,
-						     request->global_seqno))
-				break;
-
-			active = request;
-		}
-	}
-
+	process_csb(engine); /* drain preemption events */
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
-
-	return active;
 }
 
-static void execlists_reset(struct intel_engine_cs *engine,
-			    struct i915_request *request)
+static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
+	struct i915_request *rq;
 	unsigned long flags;
 	u32 *regs;
 
-	GEM_TRACE("%s request global=%d, current=%d\n",
-		  engine->name, request ? request->global_seqno : 0,
-		  intel_engine_get_seqno(engine));
-
 	spin_lock_irqsave(&engine->timeline.lock, flags);
 
 	/*
@@ -1803,12 +1900,18 @@ static void execlists_reset(struct intel_engine_cs *engine,
 	execlists_cancel_port_requests(execlists);
 
 	/* Push back any incomplete requests for replay after the reset. */
-	__unwind_incomplete_requests(engine);
+	rq = __unwind_incomplete_requests(engine);
 
 	/* Following the reset, we need to reload the CSB read/write pointers */
 	reset_csb_pointers(&engine->execlists);
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n",
+		  engine->name,
+		  rq ? rq->global_seqno : 0,
+		  intel_engine_get_seqno(engine),
+		  yesno(stalled));
+	if (!rq)
+		goto out_unlock;
 
 	/*
 	 * If the request was innocent, we leave the request in the ELSP
@@ -1821,8 +1924,9 @@ static void execlists_reset(struct intel_engine_cs *engine,
 	 * and have to at least restore the RING register in the context
 	 * image back to the expected values to skip over the guilty request.
 	 */
-	if (!request || request->fence.error != -EIO)
-		return;
+	i915_reset_request(rq, stalled);
+	if (!stalled)
+		goto out_unlock;
 
 	/*
 	 * We want a simple context + ring to execute the breadcrumb update.
@@ -1832,25 +1936,22 @@ static void execlists_reset(struct intel_engine_cs *engine,
 	 * future request will be after userspace has had the opportunity
 	 * to recreate its own state.
 	 */
-	regs = request->hw_context->lrc_reg_state;
+	regs = rq->hw_context->lrc_reg_state;
 	if (engine->pinned_default_state) {
 		memcpy(regs, /* skip restoring the vanilla PPHWSP */
 		       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
 		       engine->context_size - PAGE_SIZE);
 	}
-	execlists_init_reg_state(regs,
-				 request->gem_context, engine, request->ring);
 
 	/* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
-	regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(request->ring->vma);
-
-	request->ring->head = intel_ring_wrap(request->ring, request->postfix);
-	regs[CTX_RING_HEAD + 1] = request->ring->head;
+	rq->ring->head = intel_ring_wrap(rq->ring, rq->postfix);
+	intel_ring_update_space(rq->ring);
 
-	intel_ring_update_space(request->ring);
+	execlists_init_reg_state(regs, rq->gem_context, engine, rq->ring);
+	__execlists_update_reg_state(engine, rq->hw_context);
 
-	/* Reset WaIdleLiteRestore:bdw,skl as well */
-	unwind_wa_tail(request);
+out_unlock:
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
 static void execlists_reset_finish(struct intel_engine_cs *engine)
@@ -1863,6 +1964,7 @@ static void execlists_reset_finish(struct intel_engine_cs *engine)
 	 * to sleep before we restart and reload a context.
 	 *
 	 */
+	GEM_BUG_ON(!reset_in_progress(execlists));
 	if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
 		execlists->tasklet.func(execlists->tasklet.data);
 
@@ -2035,53 +2137,62 @@ static int gen8_emit_flush_render(struct i915_request *request,
  * used as a workaround for not being allowed to do lite
  * restore with HEAD==TAIL (WaIdleLiteRestore).
  */
-static void gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
+static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
 {
 	/* Ensure there's always at least one preemption point per-request. */
 	*cs++ = MI_ARB_CHECK;
 	*cs++ = MI_NOOP;
 	request->wa_tail = intel_ring_offset(request, cs);
+
+	return cs;
 }
 
-static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
+static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
 {
 	/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
 	BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
 
-	cs = gen8_emit_ggtt_write(cs, request->global_seqno,
+	cs = gen8_emit_ggtt_write(cs,
+				  request->fence.seqno,
+				  request->timeline->hwsp_offset);
+
+	cs = gen8_emit_ggtt_write(cs,
+				  request->global_seqno,
 				  intel_hws_seqno_address(request->engine));
+
 	*cs++ = MI_USER_INTERRUPT;
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
 	request->tail = intel_ring_offset(request, cs);
 	assert_ring_tail_valid(request->ring, request->tail);
 
-	gen8_emit_wa_tail(request, cs);
+	return gen8_emit_wa_tail(request, cs);
 }
-static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
 
-static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
+static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 {
-	/* We're using qword write, seqno should be aligned to 8 bytes. */
-	BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
-
 	cs = gen8_emit_ggtt_write_rcs(cs,
-				      request->global_seqno,
-				      intel_hws_seqno_address(request->engine),
+				      request->fence.seqno,
+				      request->timeline->hwsp_offset,
 				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
 				      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
 				      PIPE_CONTROL_DC_FLUSH_ENABLE |
 				      PIPE_CONTROL_FLUSH_ENABLE |
 				      PIPE_CONTROL_CS_STALL);
 
+	cs = gen8_emit_ggtt_write_rcs(cs,
+				      request->global_seqno,
+				      intel_hws_seqno_address(request->engine),
+				      PIPE_CONTROL_CS_STALL);
+
 	*cs++ = MI_USER_INTERRUPT;
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 
 	request->tail = intel_ring_offset(request, cs);
 	assert_ring_tail_valid(request->ring, request->tail);
 
-	gen8_emit_wa_tail(request, cs);
+	return gen8_emit_wa_tail(request, cs);
 }
-static const int gen8_emit_breadcrumb_rcs_sz = 8 + WA_TAIL_DWORDS;
 
 static int gen8_init_rcs_context(struct i915_request *rq)
 {
@@ -2173,8 +2284,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 	engine->request_alloc = execlists_request_alloc;
 
 	engine->emit_flush = gen8_emit_flush;
-	engine->emit_breadcrumb = gen8_emit_breadcrumb;
-	engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz;
+	engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
+	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
 
 	engine->set_default_submission = intel_execlists_set_default_submission;
 
@@ -2213,10 +2324,14 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
 	engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
 }
 
-static void
+static int
 logical_ring_setup(struct intel_engine_cs *engine)
 {
-	intel_engine_setup_common(engine);
+	int err;
+
+	err = intel_engine_setup_common(engine);
+	if (err)
+		return err;
 
 	/* Intentionally left blank. */
 	engine->buffer = NULL;
@@ -2226,6 +2341,8 @@ logical_ring_setup(struct intel_engine_cs *engine)
 
 	logical_ring_default_vfuncs(engine);
 	logical_ring_default_irqs(engine);
+
+	return 0;
 }
 
 static int logical_ring_init(struct intel_engine_cs *engine)
@@ -2260,10 +2377,10 @@ static int logical_ring_init(struct intel_engine_cs *engine)
 	}
 
 	execlists->csb_status =
-		&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+		&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
 
 	execlists->csb_write =
-		&engine->status_page.page_addr[intel_hws_csb_write_index(i915)];
+		&engine->status_page.addr[intel_hws_csb_write_index(i915)];
 
 	reset_csb_pointers(execlists);
 
@@ -2274,13 +2391,14 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
 {
 	int ret;
 
-	logical_ring_setup(engine);
+	ret = logical_ring_setup(engine);
+	if (ret)
+		return ret;
 
 	/* Override some for render ring. */
 	engine->init_context = gen8_init_rcs_context;
 	engine->emit_flush = gen8_emit_flush_render;
-	engine->emit_breadcrumb = gen8_emit_breadcrumb_rcs;
-	engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_rcs_sz;
+	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
 
 	ret = logical_ring_init(engine);
 	if (ret)
@@ -2304,7 +2422,11 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
 
 int logical_xcs_ring_init(struct intel_engine_cs *engine)
 {
-	logical_ring_setup(engine);
+	int err;
+
+	err = logical_ring_setup(engine);
+	if (err)
+		return err;
 
 	return logical_ring_init(engine);
 }
@@ -2534,8 +2656,7 @@ static void execlists_init_reg_state(u32 *regs,
 
 	if (rcs) {
 		regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
-		CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
-			make_rpcs(dev_priv));
+		CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
 
 		i915_oa_init_reg_state(engine, ctx, regs);
 	}
@@ -2638,7 +2759,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 		goto error_deref_obj;
 	}
 
-	timeline = i915_timeline_create(ctx->i915, ctx->name);
+	timeline = i915_timeline_create(ctx->i915, ctx->name, NULL);
 	if (IS_ERR(timeline)) {
 		ret = PTR_ERR(timeline);
 		goto error_deref_obj;
@@ -2696,12 +2817,8 @@ void intel_lr_context_resume(struct drm_i915_private *i915)
 
 			intel_ring_reset(ce->ring, 0);
 
-			if (ce->pin_count) { /* otherwise done in context_pin */
-				u32 *regs = ce->lrc_reg_state;
-
-				regs[CTX_RING_HEAD + 1] = ce->ring->head;
-				regs[CTX_RING_TAIL + 1] = ce->ring->tail;
-			}
+			if (ce->pin_count) /* otherwise done in context_pin */
+				__execlists_update_reg_state(engine, ce);
 		}
 	}
 }
@@ -2740,7 +2857,9 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
 
 	last = NULL;
 	count = 0;
-	drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
+	if (execlists->queue_priority_hint != INT_MIN)
+		drm_printf(m, "\t\tQueue priority hint: %d\n",
+			   execlists->queue_priority_hint);
 	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
 		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
 		int i;
diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
index e976c5ce5479..331e7a678fb7 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -28,48 +28,60 @@
 struct drm_i915_mocs_entry {
 	u32 control_value;
 	u16 l3cc_value;
+	u16 used;
 };
 
 struct drm_i915_mocs_table {
-	u32 size;
+	unsigned int size;
+	unsigned int n_entries;
 	const struct drm_i915_mocs_entry *table;
 };
 
 /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */
-#define LE_CACHEABILITY(value)	((value) << 0)
-#define LE_TGT_CACHE(value)	((value) << 2)
+#define _LE_CACHEABILITY(value)	((value) << 0)
+#define _LE_TGT_CACHE(value)	((value) << 2)
 #define LE_LRUM(value)		((value) << 4)
 #define LE_AOM(value)		((value) << 6)
 #define LE_RSC(value)		((value) << 7)
 #define LE_SCC(value)		((value) << 8)
 #define LE_PFM(value)		((value) << 11)
 #define LE_SCF(value)		((value) << 14)
+#define LE_COS(value)		((value) << 15)
+#define LE_SSE(value)		((value) << 17)
 
 /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
 #define L3_ESC(value)		((value) << 0)
 #define L3_SCC(value)		((value) << 1)
-#define L3_CACHEABILITY(value)	((value) << 4)
+#define _L3_CACHEABILITY(value)	((value) << 4)
 
 /* Helper defines */
 #define GEN9_NUM_MOCS_ENTRIES	62  /* 62 out of 64 - 63 & 64 are reserved. */
+#define GEN11_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
 
 /* (e)LLC caching options */
-#define LE_PAGETABLE		0
-#define LE_UC			1
-#define LE_WT			2
-#define LE_WB			3
-
-/* L3 caching options */
-#define L3_DIRECT		0
-#define L3_UC			1
-#define L3_RESERVED		2
-#define L3_WB			3
+#define LE_0_PAGETABLE		_LE_CACHEABILITY(0)
+#define LE_1_UC			_LE_CACHEABILITY(1)
+#define LE_2_WT			_LE_CACHEABILITY(2)
+#define LE_3_WB			_LE_CACHEABILITY(3)
 
 /* Target cache */
-#define LE_TC_PAGETABLE		0
-#define LE_TC_LLC		1
-#define LE_TC_LLC_ELLC		2
-#define LE_TC_LLC_ELLC_ALT	3
+#define LE_TC_0_PAGETABLE	_LE_TGT_CACHE(0)
+#define LE_TC_1_LLC		_LE_TGT_CACHE(1)
+#define LE_TC_2_LLC_ELLC	_LE_TGT_CACHE(2)
+#define LE_TC_3_LLC_ELLC_ALT	_LE_TGT_CACHE(3)
+
+/* L3 caching options */
+#define L3_0_DIRECT		_L3_CACHEABILITY(0)
+#define L3_1_UC			_L3_CACHEABILITY(1)
+#define L3_2_RESERVED		_L3_CACHEABILITY(2)
+#define L3_3_WB			_L3_CACHEABILITY(3)
+
+#define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
+	[__idx] = { \
+		.control_value = __control_value, \
+		.l3cc_value = __l3cc_value, \
+		.used = 1, \
+	}
 
 /*
  * MOCS tables
@@ -80,85 +92,147 @@ struct drm_i915_mocs_table {
  * LNCFCMOCS0 - LNCFCMOCS32 registers.
  *
  * These tables are intended to be kept reasonably consistent across
- * platforms. However some of the fields are not applicable to all of
- * them.
+ * HW platforms, and for ICL+, be identical across OSes. To achieve
+ * that, for Icelake and above, list of entries is published as part
+ * of bspec.
  *
  * Entries not part of the following tables are undefined as far as
  * userspace is concerned and shouldn't be relied upon.  For the time
- * being they will be implicitly initialized to the strictest caching
- * configuration (uncached) to guarantee forwards compatibility with
- * userspace programs written against more recent kernels providing
- * additional MOCS entries.
+ * being they will be initialized to PTE.
  *
- * NOTE: These tables MUST start with being uncached and the length
- *       MUST be less than 63 as the last two registers are reserved
- *       by the hardware.  These tables are part of the kernel ABI and
- *       may only be updated incrementally by adding entries at the
- *       end.
+ * The last two entries are reserved by the hardware. For ICL+ they
+ * should be initialized according to bspec and never used, for older
+ * platforms they should never be written to.
+ *
+ * NOTE: These tables are part of bspec and defined as part of hardware
+ *       interface for ICL+. For older platforms, they are part of kernel
+ *       ABI. It is expected that, for specific hardware platform, existing
+ *       entries will remain constant and the table will only be updated by
+ *       adding new entries, filling unused positions.
  */
+#define GEN9_MOCS_ENTRIES \
+	MOCS_ENTRY(I915_MOCS_UNCACHED, \
+		   LE_1_UC | LE_TC_2_LLC_ELLC, \
+		   L3_1_UC), \
+	MOCS_ENTRY(I915_MOCS_PTE, \
+		   LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), \
+		   L3_3_WB)
+
 static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
-	[I915_MOCS_UNCACHED] = {
-	  /* 0x00000009 */
-	  .control_value = LE_CACHEABILITY(LE_UC) |
-			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-			   LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-			   LE_PFM(0) | LE_SCF(0),
-
-	  /* 0x0010 */
-	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
-	},
-	[I915_MOCS_PTE] = {
-	  /* 0x00000038 */
-	  .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
-			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-			   LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-			   LE_PFM(0) | LE_SCF(0),
-	  /* 0x0030 */
-	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
-	},
-	[I915_MOCS_CACHED] = {
-	  /* 0x0000003b */
-	  .control_value = LE_CACHEABILITY(LE_WB) |
-			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-			   LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-			   LE_PFM(0) | LE_SCF(0),
-	  /* 0x0030 */
-	  .l3cc_value =   L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
-	},
+	GEN9_MOCS_ENTRIES,
+	MOCS_ENTRY(I915_MOCS_CACHED,
+		   LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3),
+		   L3_3_WB)
 };
 
 /* NOTE: the LE_TGT_CACHE is not used on Broxton */
 static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
-	[I915_MOCS_UNCACHED] = {
-	  /* 0x00000009 */
-	  .control_value = LE_CACHEABILITY(LE_UC) |
-			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-			   LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-			   LE_PFM(0) | LE_SCF(0),
-
-	  /* 0x0010 */
-	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
-	},
-	[I915_MOCS_PTE] = {
-	  /* 0x00000038 */
-	  .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
-			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-			   LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-			   LE_PFM(0) | LE_SCF(0),
-
-	  /* 0x0030 */
-	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
-	},
-	[I915_MOCS_CACHED] = {
-	  /* 0x00000039 */
-	  .control_value = LE_CACHEABILITY(LE_UC) |
-			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-			   LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-			   LE_PFM(0) | LE_SCF(0),
-
-	  /* 0x0030 */
-	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
-	},
+	GEN9_MOCS_ENTRIES,
+	MOCS_ENTRY(I915_MOCS_CACHED,
+		   LE_1_UC | LE_TC_2_LLC_ELLC | LE_LRUM(3),
+		   L3_3_WB)
+};
+
+#define GEN11_MOCS_ENTRIES \
+	/* Base - Uncached (Deprecated) */ \
+	MOCS_ENTRY(I915_MOCS_UNCACHED, \
+		   LE_1_UC | LE_TC_1_LLC, \
+		   L3_1_UC), \
+	/* Base - L3 + LeCC:PAT (Deprecated) */ \
+	MOCS_ENTRY(I915_MOCS_PTE, \
+		   LE_0_PAGETABLE | LE_TC_1_LLC, \
+		   L3_3_WB), \
+	/* Base - L3 + LLC */ \
+	MOCS_ENTRY(2, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+		   L3_3_WB), \
+	/* Base - Uncached */ \
+	MOCS_ENTRY(3, \
+		   LE_1_UC | LE_TC_1_LLC, \
+		   L3_1_UC), \
+	/* Base - L3 */ \
+	MOCS_ENTRY(4, \
+		   LE_1_UC | LE_TC_1_LLC, \
+		   L3_3_WB), \
+	/* Base - LLC */ \
+	MOCS_ENTRY(5, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+		   L3_1_UC), \
+	/* Age 0 - LLC */ \
+	MOCS_ENTRY(6, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
+		   L3_1_UC), \
+	/* Age 0 - L3 + LLC */ \
+	MOCS_ENTRY(7, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
+		   L3_3_WB), \
+	/* Age: Don't Chg. - LLC */ \
+	MOCS_ENTRY(8, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
+		   L3_1_UC), \
+	/* Age: Don't Chg. - L3 + LLC */ \
+	MOCS_ENTRY(9, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
+		   L3_3_WB), \
+	/* No AOM - LLC */ \
+	MOCS_ENTRY(10, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
+		   L3_1_UC), \
+	/* No AOM - L3 + LLC */ \
+	MOCS_ENTRY(11, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
+		   L3_3_WB), \
+	/* No AOM; Age 0 - LLC */ \
+	MOCS_ENTRY(12, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
+		   L3_1_UC), \
+	/* No AOM; Age 0 - L3 + LLC */ \
+	MOCS_ENTRY(13, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
+		   L3_3_WB), \
+	/* No AOM; Age:DC - LLC */ \
+	MOCS_ENTRY(14, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
+		   L3_1_UC), \
+	/* No AOM; Age:DC - L3 + LLC */ \
+	MOCS_ENTRY(15, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
+		   L3_3_WB), \
+	/* Self-Snoop - L3 + LLC */ \
+	MOCS_ENTRY(18, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \
+		   L3_3_WB), \
+	/* Skip Caching - L3 + LLC(12.5%) */ \
+	MOCS_ENTRY(19, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \
+		   L3_3_WB), \
+	/* Skip Caching - L3 + LLC(25%) */ \
+	MOCS_ENTRY(20, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \
+		   L3_3_WB), \
+	/* Skip Caching - L3 + LLC(50%) */ \
+	MOCS_ENTRY(21, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \
+		   L3_3_WB), \
+	/* Skip Caching - L3 + LLC(75%) */ \
+	MOCS_ENTRY(22, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \
+		   L3_3_WB), \
+	/* Skip Caching - L3 + LLC(87.5%) */ \
+	MOCS_ENTRY(23, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \
+		   L3_3_WB), \
+	/* HW Reserved - SW program but never use */ \
+	MOCS_ENTRY(62, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+		   L3_1_UC), \
+	/* HW Reserved - SW program but never use */ \
+	MOCS_ENTRY(63, \
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+		   L3_1_UC)
+
+static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
+	GEN11_MOCS_ENTRIES
 };
 
 /**
@@ -178,13 +252,19 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv,
 {
 	bool result = false;
 
-	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) ||
-	    IS_ICELAKE(dev_priv)) {
+	if (IS_ICELAKE(dev_priv)) {
+		table->size  = ARRAY_SIZE(icelake_mocs_table);
+		table->table = icelake_mocs_table;
+		table->n_entries = GEN11_NUM_MOCS_ENTRIES;
+		result = true;
+	} else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
 		table->size  = ARRAY_SIZE(skylake_mocs_table);
+		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
 		table->table = skylake_mocs_table;
 		result = true;
 	} else if (IS_GEN9_LP(dev_priv)) {
 		table->size  = ARRAY_SIZE(broxton_mocs_table);
+		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
 		table->table = broxton_mocs_table;
 		result = true;
 	} else {
@@ -226,6 +306,19 @@ static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index)
 	}
 }
 
+/*
+ * Get control_value from MOCS entry taking into account when it's not used:
+ * I915_MOCS_PTE's value is returned in this case.
+ */
+static u32 get_entry_control(const struct drm_i915_mocs_table *table,
+			     unsigned int index)
+{
+	if (table->table[index].used)
+		return table->table[index].control_value;
+
+	return table->table[I915_MOCS_PTE].control_value;
+}
+
 /**
  * intel_mocs_init_engine() - emit the mocs control table
  * @engine:	The engine for whom to emit the registers.
@@ -238,27 +331,23 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
 	struct drm_i915_private *dev_priv = engine->i915;
 	struct drm_i915_mocs_table table;
 	unsigned int index;
+	u32 unused_value;
 
 	if (!get_mocs_settings(dev_priv, &table))
 		return;
 
-	GEM_BUG_ON(table.size > GEN9_NUM_MOCS_ENTRIES);
-
-	for (index = 0; index < table.size; index++)
-		I915_WRITE(mocs_register(engine->id, index),
-			   table.table[index].control_value);
-
-	/*
-	 * Ok, now set the unused entries to uncached. These entries
-	 * are officially undefined and no contract for the contents
-	 * and settings is given for these entries.
-	 *
-	 * Entry 0 in the table is uncached - so we are just writing
-	 * that value to all the used entries.
-	 */
-	for (; index < GEN9_NUM_MOCS_ENTRIES; index++)
-		I915_WRITE(mocs_register(engine->id, index),
-			   table.table[0].control_value);
+	/* Set unused values to PTE */
+	unused_value = table.table[I915_MOCS_PTE].control_value;
+
+	for (index = 0; index < table.size; index++) {
+		u32 value = get_entry_control(&table, index);
+
+		I915_WRITE(mocs_register(engine->id, index), value);
+	}
+
+	/* All remaining entries are also unused */
+	for (; index < table.n_entries; index++)
+		I915_WRITE(mocs_register(engine->id, index), unused_value);
 }
 
 /**
@@ -276,33 +365,32 @@ static int emit_mocs_control_table(struct i915_request *rq,
 {
 	enum intel_engine_id engine = rq->engine->id;
 	unsigned int index;
+	u32 unused_value;
 	u32 *cs;
 
-	if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
+	if (GEM_WARN_ON(table->size > table->n_entries))
 		return -ENODEV;
 
-	cs = intel_ring_begin(rq, 2 + 2 * GEN9_NUM_MOCS_ENTRIES);
+	/* Set unused values to PTE */
+	unused_value = table->table[I915_MOCS_PTE].control_value;
+
+	cs = intel_ring_begin(rq, 2 + 2 * table->n_entries);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES);
+	*cs++ = MI_LOAD_REGISTER_IMM(table->n_entries);
 
 	for (index = 0; index < table->size; index++) {
+		u32 value = get_entry_control(table, index);
+
 		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
-		*cs++ = table->table[index].control_value;
+		*cs++ = value;
 	}
 
-	/*
-	 * Ok, now set the unused entries to uncached. These entries
-	 * are officially undefined and no contract for the contents
-	 * and settings is given for these entries.
-	 *
-	 * Entry 0 in the table is uncached - so we are just writing
-	 * that value to all the used entries.
-	 */
-	for (; index < GEN9_NUM_MOCS_ENTRIES; index++) {
+	/* All remaining entries are also unused */
+	for (; index < table->n_entries; index++) {
 		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
-		*cs++ = table->table[0].control_value;
+		*cs++ = unused_value;
 	}
 
 	*cs++ = MI_NOOP;
@@ -311,12 +399,24 @@ static int emit_mocs_control_table(struct i915_request *rq,
 	return 0;
 }
 
+/*
+ * Get l3cc_value from MOCS entry taking into account when it's not used:
+ * I915_MOCS_PTE's value is returned in this case.
+ */
+static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table,
+			  unsigned int index)
+{
+	if (table->table[index].used)
+		return table->table[index].l3cc_value;
+
+	return table->table[I915_MOCS_PTE].l3cc_value;
+}
+
 static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
 			       u16 low,
 			       u16 high)
 {
-	return table->table[low].l3cc_value |
-	       table->table[high].l3cc_value << 16;
+	return low | high << 16;
 }
 
 /**
@@ -333,38 +433,43 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
 static int emit_mocs_l3cc_table(struct i915_request *rq,
 				const struct drm_i915_mocs_table *table)
 {
+	u16 unused_value;
 	unsigned int i;
 	u32 *cs;
 
-	if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
+	if (GEM_WARN_ON(table->size > table->n_entries))
 		return -ENODEV;
 
-	cs = intel_ring_begin(rq, 2 + GEN9_NUM_MOCS_ENTRIES);
+	/* Set unused values to PTE */
+	unused_value = table->table[I915_MOCS_PTE].l3cc_value;
+
+	cs = intel_ring_begin(rq, 2 + table->n_entries);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2);
+	*cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2);
+
+	for (i = 0; i < table->size / 2; i++) {
+		u16 low = get_entry_l3cc(table, 2 * i);
+		u16 high = get_entry_l3cc(table, 2 * i + 1);
 
-	for (i = 0; i < table->size/2; i++) {
 		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
-		*cs++ = l3cc_combine(table, 2 * i, 2 * i + 1);
+		*cs++ = l3cc_combine(table, low, high);
 	}
 
+	/* Odd table size - 1 left over */
 	if (table->size & 0x01) {
-		/* Odd table size - 1 left over */
+		u16 low = get_entry_l3cc(table, 2 * i);
+
 		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
-		*cs++ = l3cc_combine(table, 2 * i, 0);
+		*cs++ = l3cc_combine(table, low, unused_value);
 		i++;
 	}
 
-	/*
-	 * Now set the rest of the table to uncached - use entry 0 as
-	 * this will be uncached. Leave the last pair uninitialised as
-	 * they are reserved by the hardware.
-	 */
-	for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) {
+	/* All remaining entries are also unused */
+	for (; i < table->n_entries / 2; i++) {
 		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
-		*cs++ = l3cc_combine(table, 0, 0);
+		*cs++ = l3cc_combine(table, unused_value, unused_value);
 	}
 
 	*cs++ = MI_NOOP;
@@ -391,26 +496,35 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv)
 {
 	struct drm_i915_mocs_table table;
 	unsigned int i;
+	u16 unused_value;
 
 	if (!get_mocs_settings(dev_priv, &table))
 		return;
 
-	for (i = 0; i < table.size/2; i++)
-		I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 2*i, 2*i+1));
+	/* Set unused values to PTE */
+	unused_value = table.table[I915_MOCS_PTE].l3cc_value;
+
+	for (i = 0; i < table.size / 2; i++) {
+		u16 low = get_entry_l3cc(&table, 2 * i);
+		u16 high = get_entry_l3cc(&table, 2 * i + 1);
+
+		I915_WRITE(GEN9_LNCFCMOCS(i),
+			   l3cc_combine(&table, low, high));
+	}
 
 	/* Odd table size - 1 left over */
 	if (table.size & 0x01) {
-		I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 2*i, 0));
+		u16 low = get_entry_l3cc(&table, 2 * i);
+
+		I915_WRITE(GEN9_LNCFCMOCS(i),
+			   l3cc_combine(&table, low, unused_value));
 		i++;
 	}
 
-	/*
-	 * Now set the rest of the table to uncached - use entry 0 as
-	 * this will be uncached. Leave the last pair as initialised as
-	 * they are reserved by the hardware.
-	 */
-	for (; i < (GEN9_NUM_MOCS_ENTRIES / 2); i++)
-		I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 0, 0));
+	/* All remaining entries are also unused */
+	for (; i < table.n_entries / 2; i++)
+		I915_WRITE(GEN9_LNCFCMOCS(i),
+			   l3cc_combine(&table, unused_value, unused_value));
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index c300e5787b3c..a9238fd07e30 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -480,8 +480,6 @@ void intel_overlay_reset(struct drm_i915_private *dev_priv)
 	if (!overlay)
 		return;
 
-	intel_overlay_release_old_vid(overlay);
-
 	overlay->old_xscale = 0;
 	overlay->old_yscale = 0;
 	overlay->crtc = NULL;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index fdc28a3d2936..ed9786241307 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3631,14 +3631,9 @@ static u8 intel_enabled_dbuf_slices_num(struct drm_i915_private *dev_priv)
  * FIXME: We still don't have the proper code detect if we need to apply the WA,
  * so assume we'll always need it in order to avoid underruns.
  */
-static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state)
+static bool skl_needs_memory_bw_wa(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
-
-	if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv))
-		return true;
-
-	return false;
+	return IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv);
 }
 
 static bool
@@ -3670,25 +3665,25 @@ intel_enable_sagv(struct drm_i915_private *dev_priv)
 	if (dev_priv->sagv_status == I915_SAGV_ENABLED)
 		return 0;
 
-	DRM_DEBUG_KMS("Enabling the SAGV\n");
+	DRM_DEBUG_KMS("Enabling SAGV\n");
 	mutex_lock(&dev_priv->pcu_lock);
 
 	ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,
 				      GEN9_SAGV_ENABLE);
 
-	/* We don't need to wait for the SAGV when enabling */
+	/* We don't need to wait for SAGV when enabling */
 	mutex_unlock(&dev_priv->pcu_lock);
 
 	/*
 	 * Some skl systems, pre-release machines in particular,
-	 * don't actually have an SAGV.
+	 * don't actually have SAGV.
 	 */
 	if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
 		DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
 		dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
 		return 0;
 	} else if (ret < 0) {
-		DRM_ERROR("Failed to enable the SAGV\n");
+		DRM_ERROR("Failed to enable SAGV\n");
 		return ret;
 	}
 
@@ -3707,7 +3702,7 @@ intel_disable_sagv(struct drm_i915_private *dev_priv)
 	if (dev_priv->sagv_status == I915_SAGV_DISABLED)
 		return 0;
 
-	DRM_DEBUG_KMS("Disabling the SAGV\n");
+	DRM_DEBUG_KMS("Disabling SAGV\n");
 	mutex_lock(&dev_priv->pcu_lock);
 
 	/* bspec says to keep retrying for at least 1 ms */
@@ -3719,14 +3714,14 @@ intel_disable_sagv(struct drm_i915_private *dev_priv)
 
 	/*
 	 * Some skl systems, pre-release machines in particular,
-	 * don't actually have an SAGV.
+	 * don't actually have SAGV.
 	 */
 	if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
 		DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
 		dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
 		return 0;
 	} else if (ret < 0) {
-		DRM_ERROR("Failed to disable the SAGV (%d)\n", ret);
+		DRM_ERROR("Failed to disable SAGV (%d)\n", ret);
 		return ret;
 	}
 
@@ -3757,7 +3752,7 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state)
 		sagv_block_time_us = 10;
 
 	/*
-	 * SKL+ workaround: bspec recommends we disable the SAGV when we have
+	 * SKL+ workaround: bspec recommends we disable SAGV when we have
 	 * more then one pipe enabled
 	 *
 	 * If there are no active CRTCs, no additional checks need be performed
@@ -3790,7 +3785,7 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state)
 
 		latency = dev_priv->wm.skl_latency[level];
 
-		if (skl_needs_memory_bw_wa(intel_state) &&
+		if (skl_needs_memory_bw_wa(dev_priv) &&
 		    plane->base.state->fb->modifier ==
 		    I915_FORMAT_MOD_X_TILED)
 			latency += 15;
@@ -3798,7 +3793,7 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state)
 		/*
 		 * If any of the planes on this pipe don't enable wm levels that
 		 * incur memory latencies higher than sagv_block_time_us we
-		 * can't enable the SAGV.
+		 * can't enable SAGV.
 		 */
 		if (latency < sagv_block_time_us)
 			return false;
@@ -3827,8 +3822,13 @@ static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv,
 
 	/*
 	 * 12GB/s is maximum BW supported by single DBuf slice.
+	 *
+	 * FIXME dbuf slice code is broken:
+	 * - must wait for planes to stop using the slice before powering it off
+	 * - plane straddling both slices is illegal in multi-pipe scenarios
+	 * - should validate we stay within the hw bandwidth limits
 	 */
-	if (num_active > 1 || total_data_bw >= GBps(12)) {
+	if (0 && (num_active > 1 || total_data_bw >= GBps(12))) {
 		ddb->enabled_slices = 2;
 	} else {
 		ddb->enabled_slices = 1;
@@ -4371,8 +4371,8 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
 				continue;
 
 			wm = &cstate->wm.skl.optimal.planes[plane_id];
-			blocks += wm->wm[level].plane_res_b;
-			blocks += wm->uv_wm[level].plane_res_b;
+			blocks += wm->wm[level].min_ddb_alloc;
+			blocks += wm->uv_wm[level].min_ddb_alloc;
 		}
 
 		if (blocks < alloc_size) {
@@ -4413,7 +4413,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
 		extra = min_t(u16, alloc_size,
 			      DIV64_U64_ROUND_UP(alloc_size * rate,
 						 total_data_rate));
-		total[plane_id] = wm->wm[level].plane_res_b + extra;
+		total[plane_id] = wm->wm[level].min_ddb_alloc + extra;
 		alloc_size -= extra;
 		total_data_rate -= rate;
 
@@ -4424,7 +4424,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
 		extra = min_t(u16, alloc_size,
 			      DIV64_U64_ROUND_UP(alloc_size * rate,
 						 total_data_rate));
-		uv_total[plane_id] = wm->uv_wm[level].plane_res_b + extra;
+		uv_total[plane_id] = wm->uv_wm[level].min_ddb_alloc + extra;
 		alloc_size -= extra;
 		total_data_rate -= rate;
 	}
@@ -4477,7 +4477,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
 	 */
 	for_each_plane_id_on_crtc(intel_crtc, plane_id) {
 		wm = &cstate->wm.skl.optimal.planes[plane_id];
-		if (wm->trans_wm.plane_res_b > total[plane_id])
+		if (wm->trans_wm.plane_res_b >= total[plane_id])
 			memset(&wm->trans_wm, 0, sizeof(wm->trans_wm));
 	}
 
@@ -4579,9 +4579,6 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate,
 	const struct drm_plane_state *pstate = &intel_pstate->base;
 	const struct drm_framebuffer *fb = pstate->fb;
 	u32 interm_pbpl;
-	struct intel_atomic_state *state =
-		to_intel_atomic_state(cstate->base.state);
-	bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
 
 	/* only NV12 format has two planes */
 	if (color_plane == 1 && fb->format->format != DRM_FORMAT_NV12) {
@@ -4617,7 +4614,7 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate,
 							     intel_pstate);
 
 	if (INTEL_GEN(dev_priv) >= 11 &&
-	    fb->modifier == I915_FORMAT_MOD_Yf_TILED && wp->cpp == 8)
+	    fb->modifier == I915_FORMAT_MOD_Yf_TILED && wp->cpp == 1)
 		wp->dbuf_block_size = 256;
 	else
 		wp->dbuf_block_size = 512;
@@ -4642,7 +4639,7 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate,
 		wp->y_min_scanlines = 4;
 	}
 
-	if (apply_memory_bw_wa)
+	if (skl_needs_memory_bw_wa(dev_priv))
 		wp->y_min_scanlines *= 2;
 
 	wp->plane_bytes_per_line = wp->width * wp->cpp;
@@ -4674,6 +4671,15 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate,
 	return 0;
 }
 
+static bool skl_wm_has_lines(struct drm_i915_private *dev_priv, int level)
+{
+	if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
+		return true;
+
+	/* The number of lines are ignored for the level 0 watermark. */
+	return level > 0;
+}
+
 static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,
 				 const struct intel_plane_state *intel_pstate,
 				 int level,
@@ -4686,10 +4692,10 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,
 	u32 latency = dev_priv->wm.skl_latency[level];
 	uint_fixed_16_16_t method1, method2;
 	uint_fixed_16_16_t selected_result;
-	u32 res_blocks, res_lines;
-	struct intel_atomic_state *state =
-		to_intel_atomic_state(cstate->base.state);
-	bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
+	u32 res_blocks, res_lines, min_ddb_alloc = 0;
+
+	if (latency == 0)
+		return;
 
 	/* Display WA #1141: kbl,cfl */
 	if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) ||
@@ -4697,7 +4703,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,
 	    dev_priv->ipc_enabled)
 		latency += 4;
 
-	if (apply_memory_bw_wa && wp->x_tiled)
+	if (skl_needs_memory_bw_wa(dev_priv) && wp->x_tiled)
 		latency += 15;
 
 	method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate,
@@ -4756,8 +4762,28 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,
 		}
 	}
 
-	/* The number of lines are ignored for the level 0 watermark. */
-	if (level > 0 && res_lines > 31)
+	if (INTEL_GEN(dev_priv) >= 11) {
+		if (wp->y_tiled) {
+			int extra_lines;
+
+			if (res_lines % wp->y_min_scanlines == 0)
+				extra_lines = wp->y_min_scanlines;
+			else
+				extra_lines = wp->y_min_scanlines * 2 -
+					res_lines % wp->y_min_scanlines;
+
+			min_ddb_alloc = mul_round_up_u32_fixed16(res_lines + extra_lines,
+								 wp->plane_blocks_per_line);
+		} else {
+			min_ddb_alloc = res_blocks +
+				DIV_ROUND_UP(res_blocks, 10);
+		}
+	}
+
+	if (!skl_wm_has_lines(dev_priv, level))
+		res_lines = 0;
+
+	if (res_lines > 31)
 		return;
 
 	/*
@@ -4768,6 +4794,8 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,
 	 */
 	result->plane_res_b = res_blocks;
 	result->plane_res_l = res_lines;
+	/* Bspec says: value >= plane ddb allocation -> invalid, hence the +1 here */
+	result->min_ddb_alloc = max(min_ddb_alloc, res_blocks) + 1;
 	result->plane_en = true;
 }
 
@@ -4801,15 +4829,10 @@ skl_compute_linetime_wm(const struct intel_crtc_state *cstate)
 	u32 linetime_wm;
 
 	linetime_us = intel_get_linetime_us(cstate);
-
-	if (is_fixed16_zero(linetime_us))
-		return 0;
-
 	linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us));
 
-	/* Display WA #1135: bxt:ALL GLK:ALL */
-	if ((IS_BROXTON(dev_priv) || IS_GEMINILAKE(dev_priv)) &&
-	    dev_priv->ipc_enabled)
+	/* Display WA #1135: BXT:ALL GLK:ALL */
+	if (IS_GEN9_LP(dev_priv) && dev_priv->ipc_enabled)
 		linetime_wm /= 2;
 
 	return linetime_wm;
@@ -5118,6 +5141,23 @@ static bool skl_plane_wm_equals(struct drm_i915_private *dev_priv,
 	return skl_wm_level_equals(&wm1->trans_wm, &wm2->trans_wm);
 }
 
+static bool skl_pipe_wm_equals(struct intel_crtc *crtc,
+			       const struct skl_pipe_wm *wm1,
+			       const struct skl_pipe_wm *wm2)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	enum plane_id plane_id;
+
+	for_each_plane_id_on_crtc(crtc, plane_id) {
+		if (!skl_plane_wm_equals(dev_priv,
+					 &wm1->planes[plane_id],
+					 &wm2->planes[plane_id]))
+			return false;
+	}
+
+	return wm1->linetime == wm2->linetime;
+}
+
 static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a,
 					   const struct skl_ddb_entry *b)
 {
@@ -5144,16 +5184,14 @@ static int skl_update_pipe_wm(struct intel_crtc_state *cstate,
 			      struct skl_pipe_wm *pipe_wm, /* out */
 			      bool *changed /* out */)
 {
+	struct intel_crtc *crtc = to_intel_crtc(cstate->base.crtc);
 	int ret;
 
 	ret = skl_build_pipe_wm(cstate, pipe_wm);
 	if (ret)
 		return ret;
 
-	if (!memcmp(old_pipe_wm, pipe_wm, sizeof(*pipe_wm)))
-		*changed = false;
-	else
-		*changed = true;
+	*changed = !skl_pipe_wm_equals(crtc, old_pipe_wm, pipe_wm);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e39e483d8d16..b889b27f8aeb 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -33,6 +33,7 @@
 
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
+#include "i915_reset.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
 #include "intel_workarounds.h"
@@ -42,6 +43,12 @@
  */
 #define LEGACY_REQUEST_SIZE 200
 
+static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
+{
+	return (i915_ggtt_offset(engine->status_page.vma) +
+		I915_GEM_HWS_INDEX_ADDR);
+}
+
 static unsigned int __intel_ring_space(unsigned int head,
 				       unsigned int tail,
 				       unsigned int size)
@@ -299,7 +306,7 @@ gen6_render_ring_flush(struct i915_request *rq, u32 mode)
 	return 0;
 }
 
-static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
 	/* First we do the gen6_emit_post_sync_nonzero_flush w/a */
 	*cs++ = GFX_OP_PIPE_CONTROL(4);
@@ -319,6 +326,11 @@ static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 		 PIPE_CONTROL_DC_FLUSH_ENABLE |
 		 PIPE_CONTROL_QW_WRITE |
 		 PIPE_CONTROL_CS_STALL);
+	*cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT;
+	*cs++ = rq->fence.seqno;
+
+	*cs++ = GFX_OP_PIPE_CONTROL(4);
+	*cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
 	*cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT;
 	*cs++ = rq->global_seqno;
 
@@ -327,8 +339,9 @@ static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
+
+	return cs;
 }
-static const int gen6_rcs_emit_breadcrumb_sz = 14;
 
 static int
 gen7_render_ring_cs_stall_wa(struct i915_request *rq)
@@ -409,7 +422,7 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode)
 	return 0;
 }
 
-static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
 	*cs++ = GFX_OP_PIPE_CONTROL(4);
 	*cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
@@ -419,6 +432,13 @@ static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 		 PIPE_CONTROL_QW_WRITE |
 		 PIPE_CONTROL_GLOBAL_GTT_IVB |
 		 PIPE_CONTROL_CS_STALL);
+	*cs++ = rq->timeline->hwsp_offset;
+	*cs++ = rq->fence.seqno;
+
+	*cs++ = GFX_OP_PIPE_CONTROL(4);
+	*cs++ = (PIPE_CONTROL_QW_WRITE |
+		 PIPE_CONTROL_GLOBAL_GTT_IVB |
+		 PIPE_CONTROL_CS_STALL);
 	*cs++ = intel_hws_seqno_address(rq->engine);
 	*cs++ = rq->global_seqno;
 
@@ -427,34 +447,52 @@ static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
+
+	return cs;
 }
-static const int gen7_rcs_emit_breadcrumb_sz = 6;
 
-static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
-	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW;
-	*cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT;
+	GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
+	GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+
+	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+	*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
+	*cs++ = rq->fence.seqno;
+
+	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+	*cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT;
 	*cs++ = rq->global_seqno;
+
 	*cs++ = MI_USER_INTERRUPT;
+	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
+
+	return cs;
 }
-static const int gen6_xcs_emit_breadcrumb_sz = 4;
 
 #define GEN7_XCS_WA 32
-static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
 	int i;
 
-	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW;
-	*cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT;
+	GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
+	GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+
+	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+	*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
+	*cs++ = rq->fence.seqno;
+
+	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+	*cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT;
 	*cs++ = rq->global_seqno;
 
 	for (i = 0; i < GEN7_XCS_WA; i++) {
 		*cs++ = MI_STORE_DWORD_INDEX;
-		*cs++ = I915_GEM_HWS_INDEX_ADDR;
-		*cs++ = rq->global_seqno;
+		*cs++ = I915_GEM_HWS_SEQNO_ADDR;
+		*cs++ = rq->fence.seqno;
 	}
 
 	*cs++ = MI_FLUSH_DW;
@@ -462,12 +500,12 @@ static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = 0;
 
 	*cs++ = MI_USER_INTERRUPT;
-	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
+
+	return cs;
 }
-static const int gen7_xcs_emit_breadcrumb_sz = 8 + GEN7_XCS_WA * 3;
 #undef GEN7_XCS_WA
 
 static void set_hwstam(struct intel_engine_cs *engine, u32 mask)
@@ -498,12 +536,17 @@ static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys)
 	I915_WRITE(HWS_PGA, addr);
 }
 
-static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
+static struct page *status_page(struct intel_engine_cs *engine)
 {
-	struct page *page = virt_to_page(engine->status_page.page_addr);
-	phys_addr_t phys = PFN_PHYS(page_to_pfn(page));
+	struct drm_i915_gem_object *obj = engine->status_page.vma->obj;
 
-	set_hws_pga(engine, phys);
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+	return sg_page(obj->mm.pages->sgl);
+}
+
+static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
+{
+	set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine))));
 	set_hwstam(engine, ~0u);
 }
 
@@ -570,7 +613,7 @@ static void flush_cs_tlb(struct intel_engine_cs *engine)
 
 static void ring_setup_status_page(struct intel_engine_cs *engine)
 {
-	set_hwsp(engine, engine->status_page.ggtt_offset);
+	set_hwsp(engine, i915_ggtt_offset(engine->status_page.vma));
 	set_hwstam(engine, ~0u);
 
 	flush_cs_tlb(engine);
@@ -700,59 +743,87 @@ static int init_ring_common(struct intel_engine_cs *engine)
 	}
 
 	/* Papering over lost _interrupts_ immediately following the restart */
-	intel_engine_wakeup(engine);
+	intel_engine_queue_breadcrumbs(engine);
 out:
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 
 	return ret;
 }
 
-static struct i915_request *reset_prepare(struct intel_engine_cs *engine)
+static void reset_prepare(struct intel_engine_cs *engine)
 {
 	intel_engine_stop_cs(engine);
-	return i915_gem_find_active_request(engine);
 }
 
-static void skip_request(struct i915_request *rq)
+static void reset_ring(struct intel_engine_cs *engine, bool stalled)
 {
-	void *vaddr = rq->ring->vaddr;
+	struct i915_timeline *tl = &engine->timeline;
+	struct i915_request *pos, *rq;
+	unsigned long flags;
 	u32 head;
 
-	head = rq->infix;
-	if (rq->postfix < head) {
-		memset32(vaddr + head, MI_NOOP,
-			 (rq->ring->size - head) / sizeof(u32));
-		head = 0;
+	rq = NULL;
+	spin_lock_irqsave(&tl->lock, flags);
+	list_for_each_entry(pos, &tl->requests, link) {
+		if (!i915_request_completed(pos)) {
+			rq = pos;
+			break;
+		}
 	}
-	memset32(vaddr + head, MI_NOOP, (rq->postfix - head) / sizeof(u32));
-}
-
-static void reset_ring(struct intel_engine_cs *engine, struct i915_request *rq)
-{
-	GEM_TRACE("%s request global=%d, current=%d\n",
-		  engine->name, rq ? rq->global_seqno : 0,
-		  intel_engine_get_seqno(engine));
 
+	GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n",
+		  engine->name,
+		  rq ? rq->global_seqno : 0,
+		  intel_engine_get_seqno(engine),
+		  yesno(stalled));
 	/*
-	 * Try to restore the logical GPU state to match the continuation
-	 * of the request queue. If we skip the context/PD restore, then
-	 * the next request may try to execute assuming that its context
-	 * is valid and loaded on the GPU and so may try to access invalid
-	 * memory, prompting repeated GPU hangs.
+	 * The guilty request will get skipped on a hung engine.
 	 *
-	 * If the request was guilty, we still restore the logical state
-	 * in case the next request requires it (e.g. the aliasing ppgtt),
-	 * but skip over the hung batch.
+	 * Users of client default contexts do not rely on logical
+	 * state preserved between batches so it is safe to execute
+	 * queued requests following the hang. Non default contexts
+	 * rely on preserved state, so skipping a batch loses the
+	 * evolution of the state and it needs to be considered corrupted.
+	 * Executing more queued batches on top of corrupted state is
+	 * risky. But we take the risk by trying to advance through
+	 * the queued requests in order to make the client behaviour
+	 * more predictable around resets, by not throwing away random
+	 * amount of batches it has prepared for execution. Sophisticated
+	 * clients can use gem_reset_stats_ioctl and dma fence status
+	 * (exported via sync_file info ioctl on explicit fences) to observe
+	 * when it loses the context state and should rebuild accordingly.
 	 *
-	 * If the request was innocent, we try to replay the request with
-	 * the restored context.
+	 * The context ban, and ultimately the client ban, mechanism are safety
+	 * valves if client submission ends up resulting in nothing more than
+	 * subsequent hangs.
 	 */
+
 	if (rq) {
-		/* If the rq hung, jump to its breadcrumb and skip the batch */
-		rq->ring->head = intel_ring_wrap(rq->ring, rq->head);
-		if (rq->fence.error == -EIO)
-			skip_request(rq);
+		/*
+		 * Try to restore the logical GPU state to match the
+		 * continuation of the request queue. If we skip the
+		 * context/PD restore, then the next request may try to execute
+		 * assuming that its context is valid and loaded on the GPU and
+		 * so may try to access invalid memory, prompting repeated GPU
+		 * hangs.
+		 *
+		 * If the request was guilty, we still restore the logical
+		 * state in case the next request requires it (e.g. the
+		 * aliasing ppgtt), but skip over the hung batch.
+		 *
+		 * If the request was innocent, we try to replay the request
+		 * with the restored context.
+		 */
+		i915_reset_request(rq, stalled);
+
+		GEM_BUG_ON(rq->ring != engine->buffer);
+		head = rq->head;
+	} else {
+		head = engine->buffer->tail;
 	}
+	engine->buffer->head = intel_ring_wrap(engine->buffer, head);
+
+	spin_unlock_irqrestore(&tl->lock, flags);
 }
 
 static void reset_finish(struct intel_engine_cs *engine)
@@ -836,10 +907,10 @@ static void cancel_requests(struct intel_engine_cs *engine)
 	list_for_each_entry(request, &engine->timeline.requests, link) {
 		GEM_BUG_ON(!request->global_seqno);
 
-		if (i915_request_signaled(request))
-			continue;
+		if (!i915_request_signaled(request))
+			dma_fence_set_error(&request->fence, -EIO);
 
-		dma_fence_set_error(&request->fence, -EIO);
+		i915_request_mark_complete(request);
 	}
 
 	intel_write_status_page(engine,
@@ -861,29 +932,43 @@ static void i9xx_submit_request(struct i915_request *request)
 			intel_ring_set_tail(request->ring, request->tail));
 }
 
-static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
+	GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
+	GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+
 	*cs++ = MI_FLUSH;
 
 	*cs++ = MI_STORE_DWORD_INDEX;
+	*cs++ = I915_GEM_HWS_SEQNO_ADDR;
+	*cs++ = rq->fence.seqno;
+
+	*cs++ = MI_STORE_DWORD_INDEX;
 	*cs++ = I915_GEM_HWS_INDEX_ADDR;
 	*cs++ = rq->global_seqno;
 
 	*cs++ = MI_USER_INTERRUPT;
-	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
+
+	return cs;
 }
-static const int i9xx_emit_breadcrumb_sz = 6;
 
 #define GEN5_WA_STORES 8 /* must be at least 1! */
-static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
 	int i;
 
+	GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
+	GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+
 	*cs++ = MI_FLUSH;
 
+	*cs++ = MI_STORE_DWORD_INDEX;
+	*cs++ = I915_GEM_HWS_SEQNO_ADDR;
+	*cs++ = rq->fence.seqno;
+
 	BUILD_BUG_ON(GEN5_WA_STORES < 1);
 	for (i = 0; i < GEN5_WA_STORES; i++) {
 		*cs++ = MI_STORE_DWORD_INDEX;
@@ -892,11 +977,13 @@ static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	}
 
 	*cs++ = MI_USER_INTERRUPT;
+	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
+
+	return cs;
 }
-static const int gen5_emit_breadcrumb_sz = GEN5_WA_STORES * 3 + 2;
 #undef GEN5_WA_STORES
 
 static void
@@ -1123,6 +1210,10 @@ int intel_ring_pin(struct intel_ring *ring)
 
 	GEM_BUG_ON(ring->vaddr);
 
+	ret = i915_timeline_pin(ring->timeline);
+	if (ret)
+		return ret;
+
 	flags = PIN_GLOBAL;
 
 	/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
@@ -1139,28 +1230,32 @@ int intel_ring_pin(struct intel_ring *ring)
 		else
 			ret = i915_gem_object_set_to_cpu_domain(vma->obj, true);
 		if (unlikely(ret))
-			return ret;
+			goto unpin_timeline;
 	}
 
 	ret = i915_vma_pin(vma, 0, 0, flags);
 	if (unlikely(ret))
-		return ret;
+		goto unpin_timeline;
 
 	if (i915_vma_is_map_and_fenceable(vma))
 		addr = (void __force *)i915_vma_pin_iomap(vma);
 	else
 		addr = i915_gem_object_pin_map(vma->obj, map);
-	if (IS_ERR(addr))
-		goto err;
+	if (IS_ERR(addr)) {
+		ret = PTR_ERR(addr);
+		goto unpin_ring;
+	}
 
 	vma->obj->pin_global++;
 
 	ring->vaddr = addr;
 	return 0;
 
-err:
+unpin_ring:
 	i915_vma_unpin(vma);
-	return PTR_ERR(addr);
+unpin_timeline:
+	i915_timeline_unpin(ring->timeline);
+	return ret;
 }
 
 void intel_ring_reset(struct intel_ring *ring, u32 tail)
@@ -1189,6 +1284,8 @@ void intel_ring_unpin(struct intel_ring *ring)
 
 	ring->vma->obj->pin_global--;
 	i915_vma_unpin(ring->vma);
+
+	i915_timeline_unpin(ring->timeline);
 }
 
 static struct i915_vma *
@@ -1499,13 +1596,18 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 	struct intel_ring *ring;
 	int err;
 
-	intel_engine_setup_common(engine);
+	err = intel_engine_setup_common(engine);
+	if (err)
+		return err;
 
-	timeline = i915_timeline_create(engine->i915, engine->name);
+	timeline = i915_timeline_create(engine->i915,
+					engine->name,
+					engine->status_page.vma);
 	if (IS_ERR(timeline)) {
 		err = PTR_ERR(timeline);
 		goto err;
 	}
+	GEM_BUG_ON(timeline->has_initial_breadcrumb);
 
 	ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE);
 	i915_timeline_put(timeline);
@@ -1525,6 +1627,8 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 	if (err)
 		goto err_unpin;
 
+	GEM_BUG_ON(ring->timeline->hwsp_ggtt != engine->status_page.vma);
+
 	return 0;
 
 err_unpin:
@@ -1857,6 +1961,7 @@ static int ring_request_alloc(struct i915_request *request)
 	int ret;
 
 	GEM_BUG_ON(!request->hw_context->pin_count);
+	GEM_BUG_ON(request->timeline->has_initial_breadcrumb);
 
 	/*
 	 * Flush enough space to reduce the likelihood of waiting after
@@ -2193,12 +2298,14 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 	engine->context_pin = intel_ring_context_pin;
 	engine->request_alloc = ring_request_alloc;
 
-	engine->emit_breadcrumb = i9xx_emit_breadcrumb;
-	engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz;
-	if (IS_GEN(dev_priv, 5)) {
-		engine->emit_breadcrumb = gen5_emit_breadcrumb;
-		engine->emit_breadcrumb_sz = gen5_emit_breadcrumb_sz;
-	}
+	/*
+	 * Using a global execution timeline; the previous final breadcrumb is
+	 * equivalent to our next initial bread so we can elide
+	 * engine->emit_init_breadcrumb().
+	 */
+	engine->emit_fini_breadcrumb = i9xx_emit_breadcrumb;
+	if (IS_GEN(dev_priv, 5))
+		engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;
 
 	engine->set_default_submission = i9xx_set_default_submission;
 
@@ -2227,13 +2334,11 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
 	if (INTEL_GEN(dev_priv) >= 7) {
 		engine->init_context = intel_rcs_ctx_init;
 		engine->emit_flush = gen7_render_ring_flush;
-		engine->emit_breadcrumb = gen7_rcs_emit_breadcrumb;
-		engine->emit_breadcrumb_sz = gen7_rcs_emit_breadcrumb_sz;
+		engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb;
 	} else if (IS_GEN(dev_priv, 6)) {
 		engine->init_context = intel_rcs_ctx_init;
 		engine->emit_flush = gen6_render_ring_flush;
-		engine->emit_breadcrumb = gen6_rcs_emit_breadcrumb;
-		engine->emit_breadcrumb_sz = gen6_rcs_emit_breadcrumb_sz;
+		engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb;
 	} else if (IS_GEN(dev_priv, 5)) {
 		engine->emit_flush = gen4_render_ring_flush;
 	} else {
@@ -2269,13 +2374,10 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
 		engine->emit_flush = gen6_bsd_ring_flush;
 		engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
 
-		if (IS_GEN(dev_priv, 6)) {
-			engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
-			engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
-		} else {
-			engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb;
-			engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
-		}
+		if (IS_GEN(dev_priv, 6))
+			engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
+		else
+			engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
 	} else {
 		engine->emit_flush = bsd_ring_flush;
 		if (IS_GEN(dev_priv, 5))
@@ -2298,13 +2400,10 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
 	engine->emit_flush = gen6_ring_flush;
 	engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
 
-	if (IS_GEN(dev_priv, 6)) {
-		engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
-		engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
-	} else {
-		engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb;
-		engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
-	}
+	if (IS_GEN(dev_priv, 6))
+		engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
+	else
+		engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
 
 	return intel_init_ring_buffer(engine);
 }
@@ -2322,8 +2421,7 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
 	engine->irq_enable = hsw_vebox_irq_enable;
 	engine->irq_disable = hsw_vebox_irq_disable;
 
-	engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb;
-	engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
+	engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
 
 	return intel_init_ring_buffer(engine);
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c3ef0f9bf321..34d0a148e664 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -5,6 +5,7 @@
 #include <drm/drm_util.h>
 
 #include <linux/hashtable.h>
+#include <linux/irq_work.h>
 #include <linux/seqlock.h>
 
 #include "i915_gem_batch_pool.h"
@@ -32,8 +33,7 @@ struct i915_sched_attr;
 
 struct intel_hw_status_page {
 	struct i915_vma *vma;
-	u32 *page_addr;
-	u32 ggtt_offset;
+	u32 *addr;
 };
 
 #define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base))
@@ -120,13 +120,8 @@ struct intel_instdone {
 struct intel_engine_hangcheck {
 	u64 acthd;
 	u32 seqno;
-	enum intel_engine_hangcheck_action action;
 	unsigned long action_timestamp;
-	int deadlock;
 	struct intel_instdone instdone;
-	struct i915_request *active_request;
-	bool stalled:1;
-	bool wedged:1;
 };
 
 struct intel_ring {
@@ -209,6 +204,7 @@ struct i915_priolist {
 
 struct st_preempt_hang {
 	struct completion completion;
+	unsigned int count;
 	bool inject_hang;
 };
 
@@ -299,14 +295,18 @@ struct intel_engine_execlists {
 	unsigned int port_mask;
 
 	/**
-	 * @queue_priority: Highest pending priority.
+	 * @queue_priority_hint: Highest pending priority.
 	 *
 	 * When we add requests into the queue, or adjust the priority of
 	 * executing requests, we compute the maximum priority of those
 	 * pending requests. We can then use this value to determine if
 	 * we need to preempt the executing requests to service the queue.
+	 * However, since the we may have recorded the priority of an inflight
+	 * request we wanted to preempt but since completed, at the time of
+	 * dequeuing the priority hint may no longer may match the highest
+	 * available request priority.
 	 */
-	int queue_priority;
+	int queue_priority_hint;
 
 	/**
 	 * @queue: queue of requests, in priority lists
@@ -382,22 +382,14 @@ struct intel_engine_cs {
 	 * the overhead of waking that client is much preferred.
 	 */
 	struct intel_breadcrumbs {
-		spinlock_t irq_lock; /* protects irq_*; irqsafe */
-		struct intel_wait *irq_wait; /* oldest waiter by retirement */
+		spinlock_t irq_lock;
+		struct list_head signalers;
 
-		spinlock_t rb_lock; /* protects the rb and wraps irq_lock */
-		struct rb_root waiters; /* sorted by retirement, priority */
-		struct list_head signals; /* sorted by retirement */
-		struct task_struct *signaler; /* used for fence signalling */
+		struct irq_work irq_work; /* for use from inside irq_lock */
 
-		struct timer_list fake_irq; /* used after a missed interrupt */
-		struct timer_list hangcheck; /* detect missed interrupts */
-
-		unsigned int hangcheck_interrupts;
 		unsigned int irq_enabled;
-		unsigned int irq_count;
 
-		bool irq_armed : 1;
+		bool irq_armed;
 	} breadcrumbs;
 
 	struct {
@@ -444,9 +436,8 @@ struct intel_engine_cs {
 	int		(*init_hw)(struct intel_engine_cs *engine);
 
 	struct {
-		struct i915_request *(*prepare)(struct intel_engine_cs *engine);
-		void (*reset)(struct intel_engine_cs *engine,
-			      struct i915_request *rq);
+		void (*prepare)(struct intel_engine_cs *engine);
+		void (*reset)(struct intel_engine_cs *engine, bool stalled);
 		void (*finish)(struct intel_engine_cs *engine);
 	} reset;
 
@@ -470,8 +461,10 @@ struct intel_engine_cs {
 					 unsigned int dispatch_flags);
 #define I915_DISPATCH_SECURE BIT(0)
 #define I915_DISPATCH_PINNED BIT(1)
-	void		(*emit_breadcrumb)(struct i915_request *rq, u32 *cs);
-	int		emit_breadcrumb_sz;
+	int		 (*emit_init_breadcrumb)(struct i915_request *rq);
+	u32		*(*emit_fini_breadcrumb)(struct i915_request *rq,
+						 u32 *cs);
+	unsigned int	emit_fini_breadcrumb_dw;
 
 	/* Pass the request to the hardware queue (e.g. directly into
 	 * the legacy ringbuffer or to the end of an execlist).
@@ -677,7 +670,7 @@ static inline u32
 intel_read_status_page(const struct intel_engine_cs *engine, int reg)
 {
 	/* Ensure that the compiler doesn't optimize away the load. */
-	return READ_ONCE(engine->status_page.page_addr[reg]);
+	return READ_ONCE(engine->status_page.addr[reg]);
 }
 
 static inline void
@@ -690,12 +683,12 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
 	 */
 	if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
 		mb();
-		clflush(&engine->status_page.page_addr[reg]);
-		engine->status_page.page_addr[reg] = value;
-		clflush(&engine->status_page.page_addr[reg]);
+		clflush(&engine->status_page.addr[reg]);
+		engine->status_page.addr[reg] = value;
+		clflush(&engine->status_page.addr[reg]);
 		mb();
 	} else {
-		WRITE_ONCE(engine->status_page.page_addr[reg], value);
+		WRITE_ONCE(engine->status_page.addr[reg], value);
 	}
 }
 
@@ -716,11 +709,13 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
  * The area from dword 0x30 to 0x3ff is available for driver usage.
  */
 #define I915_GEM_HWS_INDEX		0x30
-#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
-#define I915_GEM_HWS_PREEMPT_INDEX	0x32
-#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
-#define I915_GEM_HWS_SCRATCH_INDEX	0x40
-#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
+#define I915_GEM_HWS_INDEX_ADDR		(I915_GEM_HWS_INDEX * sizeof(u32))
+#define I915_GEM_HWS_PREEMPT		0x32
+#define I915_GEM_HWS_PREEMPT_ADDR	(I915_GEM_HWS_PREEMPT * sizeof(u32))
+#define I915_GEM_HWS_SEQNO		0x40
+#define I915_GEM_HWS_SEQNO_ADDR		(I915_GEM_HWS_SEQNO * sizeof(u32))
+#define I915_GEM_HWS_SCRATCH		0x80
+#define I915_GEM_HWS_SCRATCH_ADDR	(I915_GEM_HWS_SCRATCH * sizeof(u32))
 
 #define I915_HWS_CSB_BUF0_INDEX		0x10
 #define I915_HWS_CSB_WRITE_INDEX	0x1f
@@ -825,7 +820,7 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
 
 void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno);
 
-void intel_engine_setup_common(struct intel_engine_cs *engine);
+int intel_engine_setup_common(struct intel_engine_cs *engine);
 int intel_engine_init_common(struct intel_engine_cs *engine);
 void intel_engine_cleanup_common(struct intel_engine_cs *engine);
 
@@ -883,93 +878,29 @@ static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
 void intel_engine_get_instdone(struct intel_engine_cs *engine,
 			       struct intel_instdone *instdone);
 
-static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
-{
-	return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR;
-}
-
-static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
-{
-	return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR;
-}
-
-/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
-int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
-
-static inline void intel_wait_init(struct intel_wait *wait)
-{
-	wait->tsk = current;
-	wait->request = NULL;
-}
-
-static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno)
-{
-	wait->tsk = current;
-	wait->seqno = seqno;
-}
-
-static inline bool intel_wait_has_seqno(const struct intel_wait *wait)
-{
-	return wait->seqno;
-}
-
-static inline bool
-intel_wait_update_seqno(struct intel_wait *wait, u32 seqno)
-{
-	wait->seqno = seqno;
-	return intel_wait_has_seqno(wait);
-}
-
-static inline bool
-intel_wait_update_request(struct intel_wait *wait,
-			  const struct i915_request *rq)
-{
-	return intel_wait_update_seqno(wait, i915_request_global_seqno(rq));
-}
-
-static inline bool
-intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno)
-{
-	return wait->seqno == seqno;
-}
-
-static inline bool
-intel_wait_check_request(const struct intel_wait *wait,
-			 const struct i915_request *rq)
-{
-	return intel_wait_check_seqno(wait, i915_request_global_seqno(rq));
-}
+void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
+void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
 
-static inline bool intel_wait_complete(const struct intel_wait *wait)
-{
-	return RB_EMPTY_NODE(&wait->node);
-}
+void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine);
+void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine);
 
-bool intel_engine_add_wait(struct intel_engine_cs *engine,
-			   struct intel_wait *wait);
-void intel_engine_remove_wait(struct intel_engine_cs *engine,
-			      struct intel_wait *wait);
-bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup);
-void intel_engine_cancel_signaling(struct i915_request *request);
+bool intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine);
+void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
 
-static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
+static inline void
+intel_engine_queue_breadcrumbs(struct intel_engine_cs *engine)
 {
-	return READ_ONCE(engine->breadcrumbs.irq_wait);
+	irq_work_queue(&engine->breadcrumbs.irq_work);
 }
 
-unsigned int intel_engine_wakeup(struct intel_engine_cs *engine);
-#define ENGINE_WAKEUP_WAITER BIT(0)
-#define ENGINE_WAKEUP_ASLEEP BIT(1)
-
-void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine);
-void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine);
-
-void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
-void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
+bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine);
 
 void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
 
+void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
+				    struct drm_printer *p);
+
 static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
 {
 	memset(batch, 0, 6 * sizeof(u32));
@@ -1018,6 +949,13 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset)
 	return cs;
 }
 
+static inline void intel_engine_reset(struct intel_engine_cs *engine,
+				      bool stalled)
+{
+	if (engine->reset.reset)
+		engine->reset.reset(engine, stalled);
+}
+
 void intel_engines_sanitize(struct drm_i915_private *i915, bool force);
 
 bool intel_engine_is_idle(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index b02d3d9809e3..cd42e81f8a90 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -493,7 +493,7 @@ skl_program_plane(struct intel_plane *plane,
 
 	keymax = (key->max_value & 0xffffff) | PLANE_KEYMAX_ALPHA(alpha);
 
-	keymsk = key->channel_mask & 0x3ffffff;
+	keymsk = key->channel_mask & 0x7ffffff;
 	if (alpha < 0xff)
 		keymsk |= PLANE_KEYMSK_ALPHA_ENABLE;
 
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index bd5536f0ec92..3924c4944e1f 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -306,7 +306,7 @@ struct tv_mode {
 
 	u32 clock;
 	u16 refresh; /* in millihertz (for precision) */
-	u32 oversample;
+	u8 oversample;
 	u8 hsync_end;
 	u16 hblank_start, hblank_end, htotal;
 	bool progressive : 1, trilevel_sync : 1, component_only : 1;
@@ -339,7 +339,6 @@ struct tv_mode {
 	const struct video_levels *composite_levels, *svideo_levels;
 	const struct color_conversion *composite_color, *svideo_color;
 	const u32 *filter_table;
-	u16 max_srcw;
 };
 
 
@@ -378,8 +377,8 @@ static const struct tv_mode tv_modes[] = {
 		.name		= "NTSC-M",
 		.clock		= 108000,
 		.refresh	= 59940,
-		.oversample	= TV_OVERSAMPLE_8X,
-		.component_only = 0,
+		.oversample	= 8,
+		.component_only = false,
 		/* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */
 
 		.hsync_end	= 64,		    .hblank_end		= 124,
@@ -421,8 +420,8 @@ static const struct tv_mode tv_modes[] = {
 		.name		= "NTSC-443",
 		.clock		= 108000,
 		.refresh	= 59940,
-		.oversample	= TV_OVERSAMPLE_8X,
-		.component_only = 0,
+		.oversample	= 8,
+		.component_only = false,
 		/* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 4.43MHz */
 		.hsync_end	= 64,		    .hblank_end		= 124,
 		.hblank_start	= 836,		    .htotal		= 857,
@@ -463,8 +462,8 @@ static const struct tv_mode tv_modes[] = {
 		.name		= "NTSC-J",
 		.clock		= 108000,
 		.refresh	= 59940,
-		.oversample	= TV_OVERSAMPLE_8X,
-		.component_only = 0,
+		.oversample	= 8,
+		.component_only = false,
 
 		/* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */
 		.hsync_end	= 64,		    .hblank_end		= 124,
@@ -506,8 +505,8 @@ static const struct tv_mode tv_modes[] = {
 		.name		= "PAL-M",
 		.clock		= 108000,
 		.refresh	= 59940,
-		.oversample	= TV_OVERSAMPLE_8X,
-		.component_only = 0,
+		.oversample	= 8,
+		.component_only = false,
 
 		/* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */
 		.hsync_end	= 64,		  .hblank_end		= 124,
@@ -550,8 +549,8 @@ static const struct tv_mode tv_modes[] = {
 		.name	    = "PAL-N",
 		.clock		= 108000,
 		.refresh	= 50000,
-		.oversample	= TV_OVERSAMPLE_8X,
-		.component_only = 0,
+		.oversample	= 8,
+		.component_only = false,
 
 		.hsync_end	= 64,		    .hblank_end		= 128,
 		.hblank_start = 844,	    .htotal		= 863,
@@ -595,8 +594,8 @@ static const struct tv_mode tv_modes[] = {
 		.name	    = "PAL",
 		.clock		= 108000,
 		.refresh	= 50000,
-		.oversample	= TV_OVERSAMPLE_8X,
-		.component_only = 0,
+		.oversample	= 8,
+		.component_only = false,
 
 		.hsync_end	= 64,		    .hblank_end		= 142,
 		.hblank_start	= 844,	    .htotal		= 863,
@@ -635,10 +634,10 @@ static const struct tv_mode tv_modes[] = {
 	},
 	{
 		.name       = "480p",
-		.clock		= 107520,
+		.clock		= 108000,
 		.refresh	= 59940,
-		.oversample     = TV_OVERSAMPLE_4X,
-		.component_only = 1,
+		.oversample     = 4,
+		.component_only = true,
 
 		.hsync_end      = 64,               .hblank_end         = 122,
 		.hblank_start   = 842,              .htotal             = 857,
@@ -659,10 +658,10 @@ static const struct tv_mode tv_modes[] = {
 	},
 	{
 		.name       = "576p",
-		.clock		= 107520,
+		.clock		= 108000,
 		.refresh	= 50000,
-		.oversample     = TV_OVERSAMPLE_4X,
-		.component_only = 1,
+		.oversample     = 4,
+		.component_only = true,
 
 		.hsync_end      = 64,               .hblank_end         = 139,
 		.hblank_start   = 859,              .htotal             = 863,
@@ -683,10 +682,10 @@ static const struct tv_mode tv_modes[] = {
 	},
 	{
 		.name       = "720p@60Hz",
-		.clock		= 148800,
+		.clock		= 148500,
 		.refresh	= 60000,
-		.oversample     = TV_OVERSAMPLE_2X,
-		.component_only = 1,
+		.oversample     = 2,
+		.component_only = true,
 
 		.hsync_end      = 80,               .hblank_end         = 300,
 		.hblank_start   = 1580,             .htotal             = 1649,
@@ -707,10 +706,10 @@ static const struct tv_mode tv_modes[] = {
 	},
 	{
 		.name       = "720p@50Hz",
-		.clock		= 148800,
+		.clock		= 148500,
 		.refresh	= 50000,
-		.oversample     = TV_OVERSAMPLE_2X,
-		.component_only = 1,
+		.oversample     = 2,
+		.component_only = true,
 
 		.hsync_end      = 80,               .hblank_end         = 300,
 		.hblank_start   = 1580,             .htotal             = 1979,
@@ -728,14 +727,13 @@ static const struct tv_mode tv_modes[] = {
 		.burst_ena      = false,
 
 		.filter_table = filter_table,
-		.max_srcw = 800
 	},
 	{
 		.name       = "1080i@50Hz",
-		.clock		= 148800,
+		.clock		= 148500,
 		.refresh	= 50000,
-		.oversample     = TV_OVERSAMPLE_2X,
-		.component_only = 1,
+		.oversample     = 2,
+		.component_only = true,
 
 		.hsync_end      = 88,               .hblank_end         = 235,
 		.hblank_start   = 2155,             .htotal             = 2639,
@@ -758,10 +756,10 @@ static const struct tv_mode tv_modes[] = {
 	},
 	{
 		.name       = "1080i@60Hz",
-		.clock		= 148800,
+		.clock		= 148500,
 		.refresh	= 60000,
-		.oversample     = TV_OVERSAMPLE_2X,
-		.component_only = 1,
+		.oversample     = 2,
+		.component_only = true,
 
 		.hsync_end      = 88,               .hblank_end         = 235,
 		.hblank_start   = 2155,             .htotal             = 2199,
@@ -782,8 +780,115 @@ static const struct tv_mode tv_modes[] = {
 
 		.filter_table = filter_table,
 	},
+
+	{
+		.name       = "1080p@30Hz",
+		.clock		= 148500,
+		.refresh	= 30000,
+		.oversample     = 2,
+		.component_only = true,
+
+		.hsync_end      = 88,               .hblank_end         = 235,
+		.hblank_start   = 2155,             .htotal             = 2199,
+
+		.progressive	= true,		    .trilevel_sync = true,
+
+		.vsync_start_f1 = 8,               .vsync_start_f2     = 8,
+		.vsync_len      = 10,
+
+		.veq_ena	= false,	.veq_start_f1	= 0,
+		.veq_start_f2	= 0,		    .veq_len		= 0,
+
+		.vi_end_f1      = 44,               .vi_end_f2          = 44,
+		.nbr_end        = 1079,
+
+		.burst_ena      = false,
+
+		.filter_table = filter_table,
+	},
+
+	{
+		.name       = "1080p@50Hz",
+		.clock		= 148500,
+		.refresh	= 50000,
+		.oversample     = 1,
+		.component_only = true,
+
+		.hsync_end      = 88,               .hblank_end         = 235,
+		.hblank_start   = 2155,             .htotal             = 2639,
+
+		.progressive	= true,		    .trilevel_sync = true,
+
+		.vsync_start_f1 = 8,               .vsync_start_f2     = 8,
+		.vsync_len      = 10,
+
+		.veq_ena	= false,	.veq_start_f1	= 0,
+		.veq_start_f2	= 0,		    .veq_len		= 0,
+
+		.vi_end_f1      = 44,               .vi_end_f2          = 44,
+		.nbr_end        = 1079,
+
+		.burst_ena      = false,
+
+		.filter_table = filter_table,
+	},
+
+	{
+		.name       = "1080p@60Hz",
+		.clock		= 148500,
+		.refresh	= 60000,
+		.oversample     = 1,
+		.component_only = true,
+
+		.hsync_end      = 88,               .hblank_end         = 235,
+		.hblank_start   = 2155,             .htotal             = 2199,
+
+		.progressive	= true,		    .trilevel_sync = true,
+
+		.vsync_start_f1 = 8,               .vsync_start_f2     = 8,
+		.vsync_len      = 10,
+
+		.veq_ena	= false,		    .veq_start_f1	= 0,
+		.veq_start_f2	= 0,		    .veq_len		= 0,
+
+		.vi_end_f1      = 44,               .vi_end_f2          = 44,
+		.nbr_end        = 1079,
+
+		.burst_ena      = false,
+
+		.filter_table = filter_table,
+	},
+};
+
+struct intel_tv_connector_state {
+	struct drm_connector_state base;
+
+	/*
+	 * May need to override the user margins for
+	 * gen3 >1024 wide source vertical centering.
+	 */
+	struct {
+		u16 top, bottom;
+	} margins;
+
+	bool bypass_vfilter;
 };
 
+#define to_intel_tv_connector_state(x) container_of(x, struct intel_tv_connector_state, base)
+
+static struct drm_connector_state *
+intel_tv_connector_duplicate_state(struct drm_connector *connector)
+{
+	struct intel_tv_connector_state *state;
+
+	state = kmemdup(connector->state, sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return NULL;
+
+	__drm_atomic_helper_connector_duplicate_state(connector, &state->base);
+	return &state->base;
+}
+
 static struct intel_tv *enc_to_tv(struct intel_encoder *encoder)
 {
 	return container_of(encoder, struct intel_tv, base);
@@ -859,14 +964,215 @@ intel_tv_mode_valid(struct drm_connector *connector,
 	return MODE_CLOCK_RANGE;
 }
 
+static int
+intel_tv_mode_vdisplay(const struct tv_mode *tv_mode)
+{
+	if (tv_mode->progressive)
+		return tv_mode->nbr_end + 1;
+	else
+		return 2 * (tv_mode->nbr_end + 1);
+}
+
+static void
+intel_tv_mode_to_mode(struct drm_display_mode *mode,
+		      const struct tv_mode *tv_mode)
+{
+	mode->clock = tv_mode->clock /
+		(tv_mode->oversample >> !tv_mode->progressive);
+
+	/*
+	 * tv_mode horizontal timings:
+	 *
+	 * hsync_end
+	 *    | hblank_end
+	 *    |    | hblank_start
+	 *    |    |       | htotal
+	 *    |     _______    |
+	 *     ____/       \___
+	 * \__/                \
+	 */
+	mode->hdisplay =
+		tv_mode->hblank_start - tv_mode->hblank_end;
+	mode->hsync_start = mode->hdisplay +
+		tv_mode->htotal - tv_mode->hblank_start;
+	mode->hsync_end = mode->hsync_start +
+		tv_mode->hsync_end;
+	mode->htotal = tv_mode->htotal + 1;
+
+	/*
+	 * tv_mode vertical timings:
+	 *
+	 * vsync_start
+	 *    | vsync_end
+	 *    |  | vi_end nbr_end
+	 *    |  |    |       |
+	 *    |  |     _______
+	 * \__    ____/       \
+	 *    \__/
+	 */
+	mode->vdisplay = intel_tv_mode_vdisplay(tv_mode);
+	if (tv_mode->progressive) {
+		mode->vsync_start = mode->vdisplay +
+			tv_mode->vsync_start_f1 + 1;
+		mode->vsync_end = mode->vsync_start +
+			tv_mode->vsync_len;
+		mode->vtotal = mode->vdisplay +
+			tv_mode->vi_end_f1 + 1;
+	} else {
+		mode->vsync_start = mode->vdisplay +
+			tv_mode->vsync_start_f1 + 1 +
+			tv_mode->vsync_start_f2 + 1;
+		mode->vsync_end = mode->vsync_start +
+			2 * tv_mode->vsync_len;
+		mode->vtotal = mode->vdisplay +
+			tv_mode->vi_end_f1 + 1 +
+			tv_mode->vi_end_f2 + 1;
+	}
+
+	/* TV has it's own notion of sync and other mode flags, so clear them. */
+	mode->flags = 0;
+
+	mode->vrefresh = 0;
+	mode->vrefresh = drm_mode_vrefresh(mode);
+
+	snprintf(mode->name, sizeof(mode->name),
+		 "%dx%d%c (%s)",
+		 mode->hdisplay, mode->vdisplay,
+		 tv_mode->progressive ? 'p' : 'i',
+		 tv_mode->name);
+}
+
+static void intel_tv_scale_mode_horiz(struct drm_display_mode *mode,
+				      int hdisplay, int left_margin,
+				      int right_margin)
+{
+	int hsync_start = mode->hsync_start - mode->hdisplay + right_margin;
+	int hsync_end = mode->hsync_end - mode->hdisplay + right_margin;
+	int new_htotal = mode->htotal * hdisplay /
+		(mode->hdisplay - left_margin - right_margin);
+
+	mode->clock = mode->clock * new_htotal / mode->htotal;
+
+	mode->hdisplay = hdisplay;
+	mode->hsync_start = hdisplay + hsync_start * new_htotal / mode->htotal;
+	mode->hsync_end = hdisplay + hsync_end * new_htotal / mode->htotal;
+	mode->htotal = new_htotal;
+}
+
+static void intel_tv_scale_mode_vert(struct drm_display_mode *mode,
+				     int vdisplay, int top_margin,
+				     int bottom_margin)
+{
+	int vsync_start = mode->vsync_start - mode->vdisplay + bottom_margin;
+	int vsync_end = mode->vsync_end - mode->vdisplay + bottom_margin;
+	int new_vtotal = mode->vtotal * vdisplay /
+		(mode->vdisplay - top_margin - bottom_margin);
+
+	mode->clock = mode->clock * new_vtotal / mode->vtotal;
+
+	mode->vdisplay = vdisplay;
+	mode->vsync_start = vdisplay + vsync_start * new_vtotal / mode->vtotal;
+	mode->vsync_end = vdisplay + vsync_end * new_vtotal / mode->vtotal;
+	mode->vtotal = new_vtotal;
+}
 
 static void
 intel_tv_get_config(struct intel_encoder *encoder,
 		    struct intel_crtc_state *pipe_config)
 {
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	struct drm_display_mode *adjusted_mode =
+		&pipe_config->base.adjusted_mode;
+	struct drm_display_mode mode = {};
+	u32 tv_ctl, hctl1, hctl3, vctl1, vctl2, tmp;
+	struct tv_mode tv_mode = {};
+	int hdisplay = adjusted_mode->crtc_hdisplay;
+	int vdisplay = adjusted_mode->crtc_vdisplay;
+	int xsize, ysize, xpos, ypos;
+
 	pipe_config->output_types |= BIT(INTEL_OUTPUT_TVOUT);
 
-	pipe_config->base.adjusted_mode.crtc_clock = pipe_config->port_clock;
+	tv_ctl = I915_READ(TV_CTL);
+	hctl1 = I915_READ(TV_H_CTL_1);
+	hctl3 = I915_READ(TV_H_CTL_3);
+	vctl1 = I915_READ(TV_V_CTL_1);
+	vctl2 = I915_READ(TV_V_CTL_2);
+
+	tv_mode.htotal = (hctl1 & TV_HTOTAL_MASK) >> TV_HTOTAL_SHIFT;
+	tv_mode.hsync_end = (hctl1 & TV_HSYNC_END_MASK) >> TV_HSYNC_END_SHIFT;
+
+	tv_mode.hblank_start = (hctl3 & TV_HBLANK_START_MASK) >> TV_HBLANK_START_SHIFT;
+	tv_mode.hblank_end = (hctl3 & TV_HSYNC_END_MASK) >> TV_HBLANK_END_SHIFT;
+
+	tv_mode.nbr_end = (vctl1 & TV_NBR_END_MASK) >> TV_NBR_END_SHIFT;
+	tv_mode.vi_end_f1 = (vctl1 & TV_VI_END_F1_MASK) >> TV_VI_END_F1_SHIFT;
+	tv_mode.vi_end_f2 = (vctl1 & TV_VI_END_F2_MASK) >> TV_VI_END_F2_SHIFT;
+
+	tv_mode.vsync_len = (vctl2 & TV_VSYNC_LEN_MASK) >> TV_VSYNC_LEN_SHIFT;
+	tv_mode.vsync_start_f1 = (vctl2 & TV_VSYNC_START_F1_MASK) >> TV_VSYNC_START_F1_SHIFT;
+	tv_mode.vsync_start_f2 = (vctl2 & TV_VSYNC_START_F2_MASK) >> TV_VSYNC_START_F2_SHIFT;
+
+	tv_mode.clock = pipe_config->port_clock;
+
+	tv_mode.progressive = tv_ctl & TV_PROGRESSIVE;
+
+	switch (tv_ctl & TV_OVERSAMPLE_MASK) {
+	case TV_OVERSAMPLE_8X:
+		tv_mode.oversample = 8;
+		break;
+	case TV_OVERSAMPLE_4X:
+		tv_mode.oversample = 4;
+		break;
+	case TV_OVERSAMPLE_2X:
+		tv_mode.oversample = 2;
+		break;
+	default:
+		tv_mode.oversample = 1;
+		break;
+	}
+
+	tmp = I915_READ(TV_WIN_POS);
+	xpos = tmp >> 16;
+	ypos = tmp & 0xffff;
+
+	tmp = I915_READ(TV_WIN_SIZE);
+	xsize = tmp >> 16;
+	ysize = tmp & 0xffff;
+
+	intel_tv_mode_to_mode(&mode, &tv_mode);
+
+	DRM_DEBUG_KMS("TV mode:\n");
+	drm_mode_debug_printmodeline(&mode);
+
+	intel_tv_scale_mode_horiz(&mode, hdisplay,
+				  xpos, mode.hdisplay - xsize - xpos);
+	intel_tv_scale_mode_vert(&mode, vdisplay,
+				 ypos, mode.vdisplay - ysize - ypos);
+
+	adjusted_mode->crtc_clock = mode.clock;
+	if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE)
+		adjusted_mode->crtc_clock /= 2;
+
+	/* pixel counter doesn't work on i965gm TV output */
+	if (IS_I965GM(dev_priv))
+		adjusted_mode->private_flags |=
+			I915_MODE_FLAG_USE_SCANLINE_COUNTER;
+}
+
+static bool intel_tv_source_too_wide(struct drm_i915_private *dev_priv,
+				     int hdisplay)
+{
+	return IS_GEN(dev_priv, 3) && hdisplay > 1024;
+}
+
+static bool intel_tv_vert_scaling(const struct drm_display_mode *tv_mode,
+				  const struct drm_connector_state *conn_state,
+				  int vdisplay)
+{
+	return tv_mode->crtc_vdisplay -
+		conn_state->tv.margins.top -
+		conn_state->tv.margins.bottom !=
+		vdisplay;
 }
 
 static int
@@ -874,9 +1180,14 @@ intel_tv_compute_config(struct intel_encoder *encoder,
 			struct intel_crtc_state *pipe_config,
 			struct drm_connector_state *conn_state)
 {
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	struct intel_tv_connector_state *tv_conn_state =
+		to_intel_tv_connector_state(conn_state);
 	const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state);
 	struct drm_display_mode *adjusted_mode =
 		&pipe_config->base.adjusted_mode;
+	int hdisplay = adjusted_mode->crtc_hdisplay;
+	int vdisplay = adjusted_mode->crtc_vdisplay;
 
 	if (!tv_mode)
 		return -EINVAL;
@@ -885,17 +1196,136 @@ intel_tv_compute_config(struct intel_encoder *encoder,
 		return -EINVAL;
 
 	pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB;
-	adjusted_mode->crtc_clock = tv_mode->clock;
+
 	DRM_DEBUG_KMS("forcing bpc to 8 for TV\n");
 	pipe_config->pipe_bpp = 8*3;
 
-	/* TV has it's own notion of sync and other mode flags, so clear them. */
-	adjusted_mode->flags = 0;
+	pipe_config->port_clock = tv_mode->clock;
+
+	intel_tv_mode_to_mode(adjusted_mode, tv_mode);
+	drm_mode_set_crtcinfo(adjusted_mode, 0);
+
+	if (intel_tv_source_too_wide(dev_priv, hdisplay) ||
+	    !intel_tv_vert_scaling(adjusted_mode, conn_state, vdisplay)) {
+		int extra, top, bottom;
+
+		extra = adjusted_mode->crtc_vdisplay - vdisplay;
+
+		if (extra < 0) {
+			DRM_DEBUG_KMS("No vertical scaling for >1024 pixel wide modes\n");
+			return -EINVAL;
+		}
+
+		/* Need to turn off the vertical filter and center the image */
+
+		/* Attempt to maintain the relative sizes of the margins */
+		top = conn_state->tv.margins.top;
+		bottom = conn_state->tv.margins.bottom;
+
+		if (top + bottom)
+			top = extra * top / (top + bottom);
+		else
+			top = extra / 2;
+		bottom = extra - top;
+
+		tv_conn_state->margins.top = top;
+		tv_conn_state->margins.bottom = bottom;
+
+		tv_conn_state->bypass_vfilter = true;
+
+		if (!tv_mode->progressive) {
+			adjusted_mode->clock /= 2;
+			adjusted_mode->crtc_clock /= 2;
+			adjusted_mode->flags |= DRM_MODE_FLAG_INTERLACE;
+		}
+	} else {
+		tv_conn_state->margins.top = conn_state->tv.margins.top;
+		tv_conn_state->margins.bottom = conn_state->tv.margins.bottom;
+
+		tv_conn_state->bypass_vfilter = false;
+	}
+
+	DRM_DEBUG_KMS("TV mode:\n");
+	drm_mode_debug_printmodeline(adjusted_mode);
 
 	/*
-	 * FIXME: We don't check whether the input mode is actually what we want
-	 * or whether userspace is doing something stupid.
+	 * The pipe scanline counter behaviour looks as follows when
+	 * using the TV encoder:
+	 *
+	 * time ->
+	 *
+	 * dsl=vtotal-1       |             |
+	 *                   ||            ||
+	 *               ___| |        ___| |
+	 *              /     |       /     |
+	 *             /      |      /      |
+	 * dsl=0   ___/       |_____/       |
+	 *        | | |  |  | |
+	 *         ^ ^ ^   ^ ^
+	 *         | | |   | pipe vblank/first part of tv vblank
+	 *         | | |   bottom margin
+	 *         | | active
+	 *         | top margin
+	 *         remainder of tv vblank
+	 *
+	 * When the TV encoder is used the pipe wants to run faster
+	 * than expected rate. During the active portion the TV
+	 * encoder stalls the pipe every few lines to keep it in
+	 * check. When the TV encoder reaches the bottom margin the
+	 * pipe simply stops. Once we reach the TV vblank the pipe is
+	 * no longer stalled and it runs at the max rate (apparently
+	 * oversample clock on gen3, cdclk on gen4). Once the pipe
+	 * reaches the pipe vtotal the pipe stops for the remainder
+	 * of the TV vblank/top margin. The pipe starts up again when
+	 * the TV encoder exits the top margin.
+	 *
+	 * To avoid huge hassles for vblank timestamping we scale
+	 * the pipe timings as if the pipe always runs at the average
+	 * rate it maintains during the active period. This also
+	 * gives us a reasonable guesstimate as to the pixel rate.
+	 * Due to the variation in the actual pipe speed the scanline
+	 * counter will give us slightly erroneous results during the
+	 * TV vblank/margins. But since vtotal was selected such that
+	 * it matches the average rate of the pipe during the active
+	 * portion the error shouldn't cause any serious grief to
+	 * vblank timestamps.
+	 *
+	 * For posterity here is the empirically derived formula
+	 * that gives us the maximum length of the pipe vblank
+	 * we can use without causing display corruption. Following
+	 * this would allow us to have a ticking scanline counter
+	 * everywhere except during the bottom margin (there the
+	 * pipe always stops). Ie. this would eliminate the second
+	 * flat portion of the above graph. However this would also
+	 * complicate vblank timestamping as the pipe vtotal would
+	 * no longer match the average rate the pipe runs at during
+	 * the active portion. Hence following this formula seems
+	 * more trouble that it's worth.
+	 *
+	 * if (IS_GEN(dev_priv, 4)) {
+	 *	num = cdclk * (tv_mode->oversample >> !tv_mode->progressive);
+	 *	den = tv_mode->clock;
+	 * } else {
+	 *	num = tv_mode->oversample >> !tv_mode->progressive;
+	 *	den = 1;
+	 * }
+	 * max_pipe_vblank_len ~=
+	 *	(num * tv_htotal * (tv_vblank_len + top_margin)) /
+	 *	(den * pipe_htotal);
 	 */
+	intel_tv_scale_mode_horiz(adjusted_mode, hdisplay,
+				  conn_state->tv.margins.left,
+				  conn_state->tv.margins.right);
+	intel_tv_scale_mode_vert(adjusted_mode, vdisplay,
+				 tv_conn_state->margins.top,
+				 tv_conn_state->margins.bottom);
+	drm_mode_set_crtcinfo(adjusted_mode, 0);
+	adjusted_mode->name[0] = '\0';
+
+	/* pixel counter doesn't work on i965gm TV output */
+	if (IS_I965GM(dev_priv))
+		adjusted_mode->private_flags |=
+			I915_MODE_FLAG_USE_SCANLINE_COUNTER;
 
 	return 0;
 }
@@ -986,14 +1416,16 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc);
 	struct intel_tv *intel_tv = enc_to_tv(encoder);
+	const struct intel_tv_connector_state *tv_conn_state =
+		to_intel_tv_connector_state(conn_state);
 	const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state);
-	u32 tv_ctl;
+	u32 tv_ctl, tv_filter_ctl;
 	u32 scctl1, scctl2, scctl3;
 	int i, j;
 	const struct video_levels *video_levels;
 	const struct color_conversion *color_conversion;
 	bool burst_ena;
-	int xpos = 0x0, ypos = 0x0;
+	int xpos, ypos;
 	unsigned int xsize, ysize;
 
 	if (!tv_mode)
@@ -1029,7 +1461,21 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,
 	}
 
 	tv_ctl |= TV_ENC_PIPE_SEL(intel_crtc->pipe);
-	tv_ctl |= tv_mode->oversample;
+
+	switch (tv_mode->oversample) {
+	case 8:
+		tv_ctl |= TV_OVERSAMPLE_8X;
+		break;
+	case 4:
+		tv_ctl |= TV_OVERSAMPLE_4X;
+		break;
+	case 2:
+		tv_ctl |= TV_OVERSAMPLE_2X;
+		break;
+	default:
+		tv_ctl |= TV_OVERSAMPLE_NONE;
+		break;
+	}
 
 	if (tv_mode->progressive)
 		tv_ctl |= TV_PROGRESSIVE;
@@ -1081,19 +1527,20 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,
 	assert_pipe_disabled(dev_priv, intel_crtc->pipe);
 
 	/* Filter ctl must be set before TV_WIN_SIZE */
-	I915_WRITE(TV_FILTER_CTL_1, TV_AUTO_SCALE);
+	tv_filter_ctl = TV_AUTO_SCALE;
+	if (tv_conn_state->bypass_vfilter)
+		tv_filter_ctl |= TV_V_FILTER_BYPASS;
+	I915_WRITE(TV_FILTER_CTL_1, tv_filter_ctl);
+
 	xsize = tv_mode->hblank_start - tv_mode->hblank_end;
-	if (tv_mode->progressive)
-		ysize = tv_mode->nbr_end + 1;
-	else
-		ysize = 2*tv_mode->nbr_end + 1;
+	ysize = intel_tv_mode_vdisplay(tv_mode);
 
-	xpos += conn_state->tv.margins.left;
-	ypos += conn_state->tv.margins.top;
+	xpos = conn_state->tv.margins.left;
+	ypos = tv_conn_state->margins.top;
 	xsize -= (conn_state->tv.margins.left +
 		  conn_state->tv.margins.right);
-	ysize -= (conn_state->tv.margins.top +
-		  conn_state->tv.margins.bottom);
+	ysize -= (tv_conn_state->margins.top +
+		  tv_conn_state->margins.bottom);
 	I915_WRITE(TV_WIN_POS, (xpos<<16)|ypos);
 	I915_WRITE(TV_WIN_SIZE, (xsize<<16)|ysize);
 
@@ -1110,23 +1557,6 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,
 	I915_WRITE(TV_CTL, tv_ctl);
 }
 
-static const struct drm_display_mode reported_modes[] = {
-	{
-		.name = "NTSC 480i",
-		.clock = 107520,
-		.hdisplay = 1280,
-		.hsync_start = 1368,
-		.hsync_end = 1496,
-		.htotal = 1712,
-
-		.vdisplay = 1024,
-		.vsync_start = 1027,
-		.vsync_end = 1034,
-		.vtotal = 1104,
-		.type = DRM_MODE_TYPE_DRIVER,
-	},
-};
-
 static int
 intel_tv_detect_type(struct intel_tv *intel_tv,
 		      struct drm_connector *connector)
@@ -1233,16 +1663,18 @@ static void intel_tv_find_better_format(struct drm_connector *connector)
 	const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state);
 	int i;
 
-	if ((intel_tv->type == DRM_MODE_CONNECTOR_Component) ==
-		tv_mode->component_only)
+	/* Component supports everything so we can keep the current mode */
+	if (intel_tv->type == DRM_MODE_CONNECTOR_Component)
 		return;
 
+	/* If the current mode is fine don't change it */
+	if (!tv_mode->component_only)
+		return;
 
 	for (i = 0; i < ARRAY_SIZE(tv_modes); i++) {
-		tv_mode = tv_modes + i;
+		tv_mode = &tv_modes[i];
 
-		if ((intel_tv->type == DRM_MODE_CONNECTOR_Component) ==
-			tv_mode->component_only)
+		if (!tv_mode->component_only)
 			break;
 	}
 
@@ -1254,7 +1686,6 @@ intel_tv_detect(struct drm_connector *connector,
 		struct drm_modeset_acquire_ctx *ctx,
 		bool force)
 {
-	struct drm_display_mode mode;
 	struct intel_tv *intel_tv = intel_attached_tv(connector);
 	enum drm_connector_status status;
 	int type;
@@ -1263,13 +1694,11 @@ intel_tv_detect(struct drm_connector *connector,
 		      connector->base.id, connector->name,
 		      force);
 
-	mode = reported_modes[0];
-
 	if (force) {
 		struct intel_load_detect_pipe tmp;
 		int ret;
 
-		ret = intel_get_load_detect_pipe(connector, &mode, &tmp, ctx);
+		ret = intel_get_load_detect_pipe(connector, NULL, &tmp, ctx);
 		if (ret < 0)
 			return ret;
 
@@ -1293,84 +1722,85 @@ intel_tv_detect(struct drm_connector *connector,
 }
 
 static const struct input_res {
-	const char *name;
-	int w, h;
+	u16 w, h;
 } input_res_table[] = {
-	{"640x480", 640, 480},
-	{"800x600", 800, 600},
-	{"1024x768", 1024, 768},
-	{"1280x1024", 1280, 1024},
-	{"848x480", 848, 480},
-	{"1280x720", 1280, 720},
-	{"1920x1080", 1920, 1080},
+	{ 640, 480 },
+	{ 800, 600 },
+	{ 1024, 768 },
+	{ 1280, 1024 },
+	{ 848, 480 },
+	{ 1280, 720 },
+	{ 1920, 1080 },
 };
 
-/*
- * Chose preferred mode  according to line number of TV format
- */
+/* Choose preferred mode according to line number of TV format */
+static bool
+intel_tv_is_preferred_mode(const struct drm_display_mode *mode,
+			   const struct tv_mode *tv_mode)
+{
+	int vdisplay = intel_tv_mode_vdisplay(tv_mode);
+
+	/* prefer 480 line modes for all SD TV modes */
+	if (vdisplay <= 576)
+		vdisplay = 480;
+
+	return vdisplay == mode->vdisplay;
+}
+
 static void
-intel_tv_choose_preferred_modes(const struct tv_mode *tv_mode,
-			       struct drm_display_mode *mode_ptr)
+intel_tv_set_mode_type(struct drm_display_mode *mode,
+		       const struct tv_mode *tv_mode)
 {
-	if (tv_mode->nbr_end < 480 && mode_ptr->vdisplay == 480)
-		mode_ptr->type |= DRM_MODE_TYPE_PREFERRED;
-	else if (tv_mode->nbr_end > 480) {
-		if (tv_mode->progressive == true && tv_mode->nbr_end < 720) {
-			if (mode_ptr->vdisplay == 720)
-				mode_ptr->type |= DRM_MODE_TYPE_PREFERRED;
-		} else if (mode_ptr->vdisplay == 1080)
-				mode_ptr->type |= DRM_MODE_TYPE_PREFERRED;
-	}
+	mode->type = DRM_MODE_TYPE_DRIVER;
+
+	if (intel_tv_is_preferred_mode(mode, tv_mode))
+		mode->type |= DRM_MODE_TYPE_PREFERRED;
 }
 
 static int
 intel_tv_get_modes(struct drm_connector *connector)
 {
-	struct drm_display_mode *mode_ptr;
+	struct drm_i915_private *dev_priv = to_i915(connector->dev);
 	const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state);
-	int j, count = 0;
-	u64 tmp;
+	int i, count = 0;
 
-	for (j = 0; j < ARRAY_SIZE(input_res_table);
-	     j++) {
-		const struct input_res *input = &input_res_table[j];
-		unsigned int hactive_s = input->w;
-		unsigned int vactive_s = input->h;
+	for (i = 0; i < ARRAY_SIZE(input_res_table); i++) {
+		const struct input_res *input = &input_res_table[i];
+		struct drm_display_mode *mode;
 
-		if (tv_mode->max_srcw && input->w > tv_mode->max_srcw)
+		if (input->w > 1024 &&
+		    !tv_mode->progressive &&
+		    !tv_mode->component_only)
 			continue;
 
-		if (input->w > 1024 && (!tv_mode->progressive
-					&& !tv_mode->component_only))
+		/* no vertical scaling with wide sources on gen3 */
+		if (IS_GEN(dev_priv, 3) && input->w > 1024 &&
+		    input->h > intel_tv_mode_vdisplay(tv_mode))
 			continue;
 
-		mode_ptr = drm_mode_create(connector->dev);
-		if (!mode_ptr)
+		mode = drm_mode_create(connector->dev);
+		if (!mode)
 			continue;
-		strlcpy(mode_ptr->name, input->name, DRM_DISPLAY_MODE_LEN);
-
-		mode_ptr->hdisplay = hactive_s;
-		mode_ptr->hsync_start = hactive_s + 1;
-		mode_ptr->hsync_end = hactive_s + 64;
-		if (mode_ptr->hsync_end <= mode_ptr->hsync_start)
-			mode_ptr->hsync_end = mode_ptr->hsync_start + 1;
-		mode_ptr->htotal = hactive_s + 96;
-
-		mode_ptr->vdisplay = vactive_s;
-		mode_ptr->vsync_start = vactive_s + 1;
-		mode_ptr->vsync_end = vactive_s + 32;
-		if (mode_ptr->vsync_end <= mode_ptr->vsync_start)
-			mode_ptr->vsync_end = mode_ptr->vsync_start  + 1;
-		mode_ptr->vtotal = vactive_s + 33;
-
-		tmp = mul_u32_u32(tv_mode->refresh, mode_ptr->vtotal);
-		tmp *= mode_ptr->htotal;
-		tmp = div_u64(tmp, 1000000);
-		mode_ptr->clock = (int) tmp;
-
-		mode_ptr->type = DRM_MODE_TYPE_DRIVER;
-		intel_tv_choose_preferred_modes(tv_mode, mode_ptr);
-		drm_mode_probed_add(connector, mode_ptr);
+
+		/*
+		 * We take the TV mode and scale it to look
+		 * like it had the expected h/vdisplay. This
+		 * provides the most information to userspace
+		 * about the actual timings of the mode. We
+		 * do ignore the margins though.
+		 */
+		intel_tv_mode_to_mode(mode, tv_mode);
+		if (count == 0) {
+			DRM_DEBUG_KMS("TV mode:\n");
+			drm_mode_debug_printmodeline(mode);
+		}
+		intel_tv_scale_mode_horiz(mode, input->w, 0, 0);
+		intel_tv_scale_mode_vert(mode, input->h, 0, 0);
+		intel_tv_set_mode_type(mode, tv_mode);
+
+		drm_mode_set_name(mode);
+
+		drm_mode_probed_add(connector, mode);
 		count++;
 	}
 
@@ -1383,7 +1813,7 @@ static const struct drm_connector_funcs intel_tv_connector_funcs = {
 	.destroy = intel_connector_destroy,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
-	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_duplicate_state = intel_tv_connector_duplicate_state,
 };
 
 static int intel_tv_atomic_check(struct drm_connector *connector,
@@ -1530,11 +1960,15 @@ intel_tv_init(struct drm_i915_private *dev_priv)
 	connector->doublescan_allowed = false;
 
 	/* Create TV properties then attach current values */
-	for (i = 0; i < ARRAY_SIZE(tv_modes); i++)
+	for (i = 0; i < ARRAY_SIZE(tv_modes); i++) {
+		/* 1080p50/1080p60 not supported on gen3 */
+		if (IS_GEN(dev_priv, 3) &&
+		    tv_modes[i].oversample == 1)
+			break;
+
 		tv_format_names[i] = tv_modes[i].name;
-	drm_mode_create_tv_properties(dev,
-				      ARRAY_SIZE(tv_modes),
-				      tv_format_names);
+	}
+	drm_mode_create_tv_properties(dev, i, tv_format_names);
 
 	drm_object_attach_property(&connector->base, dev->mode_config.tv_mode_property,
 				   state->tv.mode);
diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
index 3210ad4e08f7..15f4a6dee5aa 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -142,7 +142,8 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
 }
 
 static void
-__wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val)
+wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
+		   u32 val)
 {
 	struct i915_wa wa = {
 		.reg = reg,
@@ -153,16 +154,32 @@ __wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val)
 	_wa_add(wal, &wa);
 }
 
-#define WA_REG(addr, mask, val) __wa_add(wal, (addr), (mask), (val))
+static void
+wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
+{
+	wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val));
+}
+
+static void
+wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
+{
+	wa_write_masked_or(wal, reg, ~0, val);
+}
+
+static void
+wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
+{
+	wa_write_masked_or(wal, reg, val, val);
+}
 
 #define WA_SET_BIT_MASKED(addr, mask) \
-	WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
+	wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask))
 
 #define WA_CLR_BIT_MASKED(addr, mask) \
-	WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
+	wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask))
 
 #define WA_SET_FIELD_MASKED(addr, mask, value) \
-	WA_REG(addr, (mask), _MASKED_FIELD(mask, value))
+	wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value)))
 
 static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine)
 {
@@ -532,6 +549,12 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine)
 	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
 		WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
 				  GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
+
+	/* WaEnableFloatBlendOptimization:icl */
+	wa_write_masked_or(wal,
+			   GEN10_CACHE_MODE_SS,
+			   0, /* write-only, so skip validation */
+			   _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
 }
 
 void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
@@ -603,43 +626,6 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
 }
 
 static void
-wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
-{
-	struct i915_wa wa = {
-		.reg = reg,
-		.mask = val,
-		.val = _MASKED_BIT_ENABLE(val)
-	};
-
-	_wa_add(wal, &wa);
-}
-
-static void
-wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
-		   u32 val)
-{
-	struct i915_wa wa = {
-		.reg = reg,
-		.mask = mask,
-		.val = val
-	};
-
-	_wa_add(wal, &wa);
-}
-
-static void
-wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
-{
-	wa_write_masked_or(wal, reg, ~0, val);
-}
-
-static void
-wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
-{
-	wa_write_masked_or(wal, reg, val, val);
-}
-
-static void
 gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 {
 	/* WaDisableKillLogic:bxt,skl,kbl */
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index d0553bc69705..32dce7176f63 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -84,7 +84,7 @@ static int populate_ggtt(struct drm_i915_private *i915,
 		return -EINVAL;
 	}
 
-	if (list_empty(&i915->ggtt.vm.inactive_list)) {
+	if (list_empty(&i915->ggtt.vm.bound_list)) {
 		pr_err("No objects on the GGTT inactive list!\n");
 		return -EINVAL;
 	}
@@ -94,11 +94,14 @@ static int populate_ggtt(struct drm_i915_private *i915,
 
 static void unpin_ggtt(struct drm_i915_private *i915)
 {
+	struct i915_ggtt *ggtt = &i915->ggtt;
 	struct i915_vma *vma;
 
-	list_for_each_entry(vma, &i915->ggtt.vm.inactive_list, vm_link)
+	mutex_lock(&ggtt->vm.mutex);
+	list_for_each_entry(vma, &i915->ggtt.vm.bound_list, vm_link)
 		if (vma->obj->mm.quirked)
 			i915_vma_unpin(vma);
+	mutex_unlock(&ggtt->vm.mutex);
 }
 
 static void cleanup_objects(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 06bde4a273cb..3850ef4a5ec8 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -1237,7 +1237,10 @@ static void track_vma_bind(struct i915_vma *vma)
 	__i915_gem_object_pin_pages(obj);
 
 	vma->pages = obj->mm.pages;
-	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+
+	mutex_lock(&vma->vm->mutex);
+	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
+	mutex_unlock(&vma->vm->mutex);
 }
 
 static int exercise_mock(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
index a15713cae3b3..76b4f87fc853 100644
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -13,6 +13,7 @@ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */
 selftest(uncore, intel_uncore_live_selftests)
 selftest(workarounds, intel_workarounds_live_selftests)
 selftest(requests, i915_request_live_selftests)
+selftest(timelines, i915_timeline_live_selftests)
 selftest(objects, i915_gem_object_live_selftests)
 selftest(dmabuf, i915_gem_dmabuf_live_selftests)
 selftest(coherency, i915_gem_coherency_live_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
index 1b70208eeea7..88e5ab586337 100644
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -15,8 +15,7 @@ selftest(scatterlist, scatterlist_mock_selftests)
 selftest(syncmap, i915_syncmap_mock_selftests)
 selftest(uncore, intel_uncore_mock_selftests)
 selftest(engine, intel_engine_cs_mock_selftests)
-selftest(breadcrumbs, intel_breadcrumbs_mock_selftests)
-selftest(timelines, i915_gem_timeline_mock_selftests)
+selftest(timelines, i915_timeline_mock_selftests)
 selftest(requests, i915_request_mock_selftests)
 selftest(objects, i915_gem_object_mock_selftests)
 selftest(dmabuf, i915_gem_dmabuf_mock_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_random.c b/drivers/gpu/drm/i915/selftests/i915_random.c
index 1f415ce47018..716a3f19f030 100644
--- a/drivers/gpu/drm/i915/selftests/i915_random.c
+++ b/drivers/gpu/drm/i915/selftests/i915_random.c
@@ -41,18 +41,37 @@ u64 i915_prandom_u64_state(struct rnd_state *rnd)
 	return x;
 }
 
-void i915_random_reorder(unsigned int *order, unsigned int count,
-			 struct rnd_state *state)
+void i915_prandom_shuffle(void *arr, size_t elsz, size_t count,
+			  struct rnd_state *state)
 {
-	unsigned int i, j;
+	char stack[128];
+
+	if (WARN_ON(elsz > sizeof(stack) || count > U32_MAX))
+		return;
+
+	if (!elsz || !count)
+		return;
+
+	/* Fisher-Yates shuffle courtesy of Knuth */
+	while (--count) {
+		size_t swp;
+
+		swp = i915_prandom_u32_max_state(count + 1, state);
+		if (swp == count)
+			continue;
 
-	for (i = 0; i < count; i++) {
-		BUILD_BUG_ON(sizeof(unsigned int) > sizeof(u32));
-		j = i915_prandom_u32_max_state(count, state);
-		swap(order[i], order[j]);
+		memcpy(stack, arr + count * elsz, elsz);
+		memcpy(arr + count * elsz, arr + swp * elsz, elsz);
+		memcpy(arr + swp * elsz, stack, elsz);
 	}
 }
 
+void i915_random_reorder(unsigned int *order, unsigned int count,
+			 struct rnd_state *state)
+{
+	i915_prandom_shuffle(order, sizeof(*order), count, state);
+}
+
 unsigned int *i915_random_order(unsigned int count, struct rnd_state *state)
 {
 	unsigned int *order, i;
diff --git a/drivers/gpu/drm/i915/selftests/i915_random.h b/drivers/gpu/drm/i915/selftests/i915_random.h
index 7dffedc501ca..8e1ff9c105b6 100644
--- a/drivers/gpu/drm/i915/selftests/i915_random.h
+++ b/drivers/gpu/drm/i915/selftests/i915_random.h
@@ -54,4 +54,7 @@ void i915_random_reorder(unsigned int *order,
 			 unsigned int count,
 			 struct rnd_state *state);
 
+void i915_prandom_shuffle(void *arr, size_t elsz, size_t count,
+			  struct rnd_state *state);
+
 #endif /* !__I915_SELFTESTS_RANDOM_H__ */
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 4d4b86b5fa11..6733dc5b6b4c 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -25,9 +25,12 @@
 #include <linux/prime_numbers.h>
 
 #include "../i915_selftest.h"
+#include "i915_random.h"
 #include "igt_live_test.h"
+#include "lib_sw_fence.h"
 
 #include "mock_context.h"
+#include "mock_drm.h"
 #include "mock_gem_device.h"
 
 static int igt_add_request(void *arg)
@@ -247,6 +250,254 @@ err_context_0:
 	return err;
 }
 
+struct smoketest {
+	struct intel_engine_cs *engine;
+	struct i915_gem_context **contexts;
+	atomic_long_t num_waits, num_fences;
+	int ncontexts, max_batch;
+	struct i915_request *(*request_alloc)(struct i915_gem_context *,
+					      struct intel_engine_cs *);
+};
+
+static struct i915_request *
+__mock_request_alloc(struct i915_gem_context *ctx,
+		     struct intel_engine_cs *engine)
+{
+	return mock_request(engine, ctx, 0);
+}
+
+static struct i915_request *
+__live_request_alloc(struct i915_gem_context *ctx,
+		     struct intel_engine_cs *engine)
+{
+	return i915_request_alloc(engine, ctx);
+}
+
+static int __igt_breadcrumbs_smoketest(void *arg)
+{
+	struct smoketest *t = arg;
+	struct mutex * const BKL = &t->engine->i915->drm.struct_mutex;
+	const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
+	const unsigned int total = 4 * t->ncontexts + 1;
+	unsigned int num_waits = 0, num_fences = 0;
+	struct i915_request **requests;
+	I915_RND_STATE(prng);
+	unsigned int *order;
+	int err = 0;
+
+	/*
+	 * A very simple test to catch the most egregious of list handling bugs.
+	 *
+	 * At its heart, we simply create oodles of requests running across
+	 * multiple kthreads and enable signaling on them, for the sole purpose
+	 * of stressing our breadcrumb handling. The only inspection we do is
+	 * that the fences were marked as signaled.
+	 */
+
+	requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL);
+	if (!requests)
+		return -ENOMEM;
+
+	order = i915_random_order(total, &prng);
+	if (!order) {
+		err = -ENOMEM;
+		goto out_requests;
+	}
+
+	while (!kthread_should_stop()) {
+		struct i915_sw_fence *submit, *wait;
+		unsigned int n, count;
+
+		submit = heap_fence_create(GFP_KERNEL);
+		if (!submit) {
+			err = -ENOMEM;
+			break;
+		}
+
+		wait = heap_fence_create(GFP_KERNEL);
+		if (!wait) {
+			i915_sw_fence_commit(submit);
+			heap_fence_put(submit);
+			err = ENOMEM;
+			break;
+		}
+
+		i915_random_reorder(order, total, &prng);
+		count = 1 + i915_prandom_u32_max_state(max_batch, &prng);
+
+		for (n = 0; n < count; n++) {
+			struct i915_gem_context *ctx =
+				t->contexts[order[n] % t->ncontexts];
+			struct i915_request *rq;
+
+			mutex_lock(BKL);
+
+			rq = t->request_alloc(ctx, t->engine);
+			if (IS_ERR(rq)) {
+				mutex_unlock(BKL);
+				err = PTR_ERR(rq);
+				count = n;
+				break;
+			}
+
+			err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
+							       submit,
+							       GFP_KERNEL);
+
+			requests[n] = i915_request_get(rq);
+			i915_request_add(rq);
+
+			mutex_unlock(BKL);
+
+			if (err >= 0)
+				err = i915_sw_fence_await_dma_fence(wait,
+								    &rq->fence,
+								    0,
+								    GFP_KERNEL);
+
+			if (err < 0) {
+				i915_request_put(rq);
+				count = n;
+				break;
+			}
+		}
+
+		i915_sw_fence_commit(submit);
+		i915_sw_fence_commit(wait);
+
+		if (!wait_event_timeout(wait->wait,
+					i915_sw_fence_done(wait),
+					HZ / 2)) {
+			struct i915_request *rq = requests[count - 1];
+
+			pr_err("waiting for %d fences (last %llx:%lld) on %s timed out!\n",
+			       count,
+			       rq->fence.context, rq->fence.seqno,
+			       t->engine->name);
+			i915_gem_set_wedged(t->engine->i915);
+			GEM_BUG_ON(!i915_request_completed(rq));
+			i915_sw_fence_wait(wait);
+			err = -EIO;
+		}
+
+		for (n = 0; n < count; n++) {
+			struct i915_request *rq = requests[n];
+
+			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+				      &rq->fence.flags)) {
+				pr_err("%llu:%llu was not signaled!\n",
+				       rq->fence.context, rq->fence.seqno);
+				err = -EINVAL;
+			}
+
+			i915_request_put(rq);
+		}
+
+		heap_fence_put(wait);
+		heap_fence_put(submit);
+
+		if (err < 0)
+			break;
+
+		num_fences += count;
+		num_waits++;
+
+		cond_resched();
+	}
+
+	atomic_long_add(num_fences, &t->num_fences);
+	atomic_long_add(num_waits, &t->num_waits);
+
+	kfree(order);
+out_requests:
+	kfree(requests);
+	return err;
+}
+
+static int mock_breadcrumbs_smoketest(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct smoketest t = {
+		.engine = i915->engine[RCS],
+		.ncontexts = 1024,
+		.max_batch = 1024,
+		.request_alloc = __mock_request_alloc
+	};
+	unsigned int ncpus = num_online_cpus();
+	struct task_struct **threads;
+	unsigned int n;
+	int ret = 0;
+
+	/*
+	 * Smoketest our breadcrumb/signal handling for requests across multiple
+	 * threads. A very simple test to only catch the most egregious of bugs.
+	 * See __igt_breadcrumbs_smoketest();
+	 */
+
+	threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL);
+	if (!threads)
+		return -ENOMEM;
+
+	t.contexts =
+		kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
+	if (!t.contexts) {
+		ret = -ENOMEM;
+		goto out_threads;
+	}
+
+	mutex_lock(&t.engine->i915->drm.struct_mutex);
+	for (n = 0; n < t.ncontexts; n++) {
+		t.contexts[n] = mock_context(t.engine->i915, "mock");
+		if (!t.contexts[n]) {
+			ret = -ENOMEM;
+			goto out_contexts;
+		}
+	}
+	mutex_unlock(&t.engine->i915->drm.struct_mutex);
+
+	for (n = 0; n < ncpus; n++) {
+		threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
+					 &t, "igt/%d", n);
+		if (IS_ERR(threads[n])) {
+			ret = PTR_ERR(threads[n]);
+			ncpus = n;
+			break;
+		}
+
+		get_task_struct(threads[n]);
+	}
+
+	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
+
+	for (n = 0; n < ncpus; n++) {
+		int err;
+
+		err = kthread_stop(threads[n]);
+		if (err < 0 && !ret)
+			ret = err;
+
+		put_task_struct(threads[n]);
+	}
+	pr_info("Completed %lu waits for %lu fence across %d cpus\n",
+		atomic_long_read(&t.num_waits),
+		atomic_long_read(&t.num_fences),
+		ncpus);
+
+	mutex_lock(&t.engine->i915->drm.struct_mutex);
+out_contexts:
+	for (n = 0; n < t.ncontexts; n++) {
+		if (!t.contexts[n])
+			break;
+		mock_context_close(t.contexts[n]);
+	}
+	mutex_unlock(&t.engine->i915->drm.struct_mutex);
+	kfree(t.contexts);
+out_threads:
+	kfree(threads);
+
+	return ret;
+}
+
 int i915_request_mock_selftests(void)
 {
 	static const struct i915_subtest tests[] = {
@@ -254,6 +505,7 @@ int i915_request_mock_selftests(void)
 		SUBTEST(igt_wait_request),
 		SUBTEST(igt_fence_wait),
 		SUBTEST(igt_request_rewind),
+		SUBTEST(mock_breadcrumbs_smoketest),
 	};
 	struct drm_i915_private *i915;
 	intel_wakeref_t wakeref;
@@ -812,6 +1064,178 @@ out_unlock:
 	return err;
 }
 
+static int
+max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
+{
+	struct i915_request *rq;
+	int ret;
+
+	/*
+	 * Before execlists, all contexts share the same ringbuffer. With
+	 * execlists, each context/engine has a separate ringbuffer and
+	 * for the purposes of this test, inexhaustible.
+	 *
+	 * For the global ringbuffer though, we have to be very careful
+	 * that we do not wrap while preventing the execution of requests
+	 * with a unsignaled fence.
+	 */
+	if (HAS_EXECLISTS(ctx->i915))
+		return INT_MAX;
+
+	rq = i915_request_alloc(engine, ctx);
+	if (IS_ERR(rq)) {
+		ret = PTR_ERR(rq);
+	} else {
+		int sz;
+
+		ret = rq->ring->size - rq->reserved_space;
+		i915_request_add(rq);
+
+		sz = rq->ring->emit - rq->head;
+		if (sz < 0)
+			sz += rq->ring->size;
+		ret /= sz;
+		ret /= 2; /* leave half spare, in case of emergency! */
+	}
+
+	return ret;
+}
+
+static int live_breadcrumbs_smoketest(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct smoketest t[I915_NUM_ENGINES];
+	unsigned int ncpus = num_online_cpus();
+	unsigned long num_waits, num_fences;
+	struct intel_engine_cs *engine;
+	struct task_struct **threads;
+	struct igt_live_test live;
+	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
+	struct drm_file *file;
+	unsigned int n;
+	int ret = 0;
+
+	/*
+	 * Smoketest our breadcrumb/signal handling for requests across multiple
+	 * threads. A very simple test to only catch the most egregious of bugs.
+	 * See __igt_breadcrumbs_smoketest();
+	 *
+	 * On real hardware this time.
+	 */
+
+	wakeref = intel_runtime_pm_get(i915);
+
+	file = mock_file(i915);
+	if (IS_ERR(file)) {
+		ret = PTR_ERR(file);
+		goto out_rpm;
+	}
+
+	threads = kcalloc(ncpus * I915_NUM_ENGINES,
+			  sizeof(*threads),
+			  GFP_KERNEL);
+	if (!threads) {
+		ret = -ENOMEM;
+		goto out_file;
+	}
+
+	memset(&t[0], 0, sizeof(t[0]));
+	t[0].request_alloc = __live_request_alloc;
+	t[0].ncontexts = 64;
+	t[0].contexts = kmalloc_array(t[0].ncontexts,
+				      sizeof(*t[0].contexts),
+				      GFP_KERNEL);
+	if (!t[0].contexts) {
+		ret = -ENOMEM;
+		goto out_threads;
+	}
+
+	mutex_lock(&i915->drm.struct_mutex);
+	for (n = 0; n < t[0].ncontexts; n++) {
+		t[0].contexts[n] = live_context(i915, file);
+		if (!t[0].contexts[n]) {
+			ret = -ENOMEM;
+			goto out_contexts;
+		}
+	}
+
+	ret = igt_live_test_begin(&live, i915, __func__, "");
+	if (ret)
+		goto out_contexts;
+
+	for_each_engine(engine, i915, id) {
+		t[id] = t[0];
+		t[id].engine = engine;
+		t[id].max_batch = max_batches(t[0].contexts[0], engine);
+		if (t[id].max_batch < 0) {
+			ret = t[id].max_batch;
+			mutex_unlock(&i915->drm.struct_mutex);
+			goto out_flush;
+		}
+		/* One ring interleaved between requests from all cpus */
+		t[id].max_batch /= num_online_cpus() + 1;
+		pr_debug("Limiting batches to %d requests on %s\n",
+			 t[id].max_batch, engine->name);
+
+		for (n = 0; n < ncpus; n++) {
+			struct task_struct *tsk;
+
+			tsk = kthread_run(__igt_breadcrumbs_smoketest,
+					  &t[id], "igt/%d.%d", id, n);
+			if (IS_ERR(tsk)) {
+				ret = PTR_ERR(tsk);
+				mutex_unlock(&i915->drm.struct_mutex);
+				goto out_flush;
+			}
+
+			get_task_struct(tsk);
+			threads[id * ncpus + n] = tsk;
+		}
+	}
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
+
+out_flush:
+	num_waits = 0;
+	num_fences = 0;
+	for_each_engine(engine, i915, id) {
+		for (n = 0; n < ncpus; n++) {
+			struct task_struct *tsk = threads[id * ncpus + n];
+			int err;
+
+			if (!tsk)
+				continue;
+
+			err = kthread_stop(tsk);
+			if (err < 0 && !ret)
+				ret = err;
+
+			put_task_struct(tsk);
+		}
+
+		num_waits += atomic_long_read(&t[id].num_waits);
+		num_fences += atomic_long_read(&t[id].num_fences);
+	}
+	pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
+		num_waits, num_fences, RUNTIME_INFO(i915)->num_rings, ncpus);
+
+	mutex_lock(&i915->drm.struct_mutex);
+	ret = igt_live_test_end(&live) ?: ret;
+out_contexts:
+	mutex_unlock(&i915->drm.struct_mutex);
+	kfree(t[0].contexts);
+out_threads:
+	kfree(threads);
+out_file:
+	mock_file_free(i915, file);
+out_rpm:
+	intel_runtime_pm_put(i915, wakeref);
+
+	return ret;
+}
+
 int i915_request_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
@@ -819,6 +1243,7 @@ int i915_request_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_all_engines),
 		SUBTEST(live_sequential_engines),
 		SUBTEST(live_empty_request),
+		SUBTEST(live_breadcrumbs_smoketest),
 	};
 
 	if (i915_terminally_wedged(&i915->gpu_error))
diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c
index 86c54ea37f48..10ef0e636a24 100644
--- a/drivers/gpu/drm/i915/selftests/i915_selftest.c
+++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c
@@ -197,6 +197,49 @@ int i915_live_selftests(struct pci_dev *pdev)
 	return 0;
 }
 
+static bool apply_subtest_filter(const char *caller, const char *name)
+{
+	char *filter, *sep, *tok;
+	bool result = true;
+
+	filter = kstrdup(i915_selftest.filter, GFP_KERNEL);
+	for (sep = filter; (tok = strsep(&sep, ","));) {
+		bool allow = true;
+		char *sl;
+
+		if (*tok == '!') {
+			allow = false;
+			tok++;
+		}
+
+		if (*tok == '\0')
+			continue;
+
+		sl = strchr(tok, '/');
+		if (sl) {
+			*sl++ = '\0';
+			if (strcmp(tok, caller)) {
+				if (allow)
+					result = false;
+				continue;
+			}
+			tok = sl;
+		}
+
+		if (strcmp(tok, name)) {
+			if (allow)
+				result = false;
+			continue;
+		}
+
+		result = allow;
+		break;
+	}
+	kfree(filter);
+
+	return result;
+}
+
 int __i915_subtests(const char *caller,
 		    const struct i915_subtest *st,
 		    unsigned int count,
@@ -209,6 +252,9 @@ int __i915_subtests(const char *caller,
 		if (signal_pending(current))
 			return -EINTR;
 
+		if (!apply_subtest_filter(caller, st->name))
+			continue;
+
 		pr_debug(DRIVER_NAME ": Running %s/%s\n", caller, st->name);
 		GEM_TRACE("Running %s/%s\n", caller, st->name);
 
@@ -244,6 +290,7 @@ bool __igt_timeout(unsigned long timeout, const char *fmt, ...)
 
 module_param_named(st_random_seed, i915_selftest.random_seed, uint, 0400);
 module_param_named(st_timeout, i915_selftest.timeout_ms, uint, 0400);
+module_param_named(st_filter, i915_selftest.filter, charp, 0400);
 
 module_param_named_unsafe(mock_selftests, i915_selftest.mock, int, 0400);
 MODULE_PARM_DESC(mock_selftests, "Run selftests before loading, using mock hardware (0:disabled [default], 1:run tests then load driver, -1:run tests then exit module)");
diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c
index 19f1c6a5c8fb..12ea69b1a1e5 100644
--- a/drivers/gpu/drm/i915/selftests/i915_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c
@@ -4,12 +4,155 @@
  * Copyright © 2017-2018 Intel Corporation
  */
 
+#include <linux/prime_numbers.h>
+
 #include "../i915_selftest.h"
 #include "i915_random.h"
 
+#include "igt_flush_test.h"
 #include "mock_gem_device.h"
 #include "mock_timeline.h"
 
+static struct page *hwsp_page(struct i915_timeline *tl)
+{
+	struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj;
+
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+	return sg_page(obj->mm.pages->sgl);
+}
+
+static unsigned long hwsp_cacheline(struct i915_timeline *tl)
+{
+	unsigned long address = (unsigned long)page_address(hwsp_page(tl));
+
+	return (address + tl->hwsp_offset) / CACHELINE_BYTES;
+}
+
+#define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES)
+
+struct mock_hwsp_freelist {
+	struct drm_i915_private *i915;
+	struct radix_tree_root cachelines;
+	struct i915_timeline **history;
+	unsigned long count, max;
+	struct rnd_state prng;
+};
+
+enum {
+	SHUFFLE = BIT(0),
+};
+
+static void __mock_hwsp_record(struct mock_hwsp_freelist *state,
+			       unsigned int idx,
+			       struct i915_timeline *tl)
+{
+	tl = xchg(&state->history[idx], tl);
+	if (tl) {
+		radix_tree_delete(&state->cachelines, hwsp_cacheline(tl));
+		i915_timeline_put(tl);
+	}
+}
+
+static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
+				unsigned int count,
+				unsigned int flags)
+{
+	struct i915_timeline *tl;
+	unsigned int idx;
+
+	while (count--) {
+		unsigned long cacheline;
+		int err;
+
+		tl = i915_timeline_create(state->i915, "mock", NULL);
+		if (IS_ERR(tl))
+			return PTR_ERR(tl);
+
+		cacheline = hwsp_cacheline(tl);
+		err = radix_tree_insert(&state->cachelines, cacheline, tl);
+		if (err) {
+			if (err == -EEXIST) {
+				pr_err("HWSP cacheline %lu already used; duplicate allocation!\n",
+				       cacheline);
+			}
+			i915_timeline_put(tl);
+			return err;
+		}
+
+		idx = state->count++ % state->max;
+		__mock_hwsp_record(state, idx, tl);
+	}
+
+	if (flags & SHUFFLE)
+		i915_prandom_shuffle(state->history,
+				     sizeof(*state->history),
+				     min(state->count, state->max),
+				     &state->prng);
+
+	count = i915_prandom_u32_max_state(min(state->count, state->max),
+					   &state->prng);
+	while (count--) {
+		idx = --state->count % state->max;
+		__mock_hwsp_record(state, idx, NULL);
+	}
+
+	return 0;
+}
+
+static int mock_hwsp_freelist(void *arg)
+{
+	struct mock_hwsp_freelist state;
+	const struct {
+		const char *name;
+		unsigned int flags;
+	} phases[] = {
+		{ "linear", 0 },
+		{ "shuffled", SHUFFLE },
+		{ },
+	}, *p;
+	unsigned int na;
+	int err = 0;
+
+	INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
+	state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
+
+	state.i915 = mock_gem_device();
+	if (!state.i915)
+		return -ENOMEM;
+
+	/*
+	 * Create a bunch of timelines and check that their HWSP do not overlap.
+	 * Free some, and try again.
+	 */
+
+	state.max = PAGE_SIZE / sizeof(*state.history);
+	state.count = 0;
+	state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL);
+	if (!state.history) {
+		err = -ENOMEM;
+		goto err_put;
+	}
+
+	mutex_lock(&state.i915->drm.struct_mutex);
+	for (p = phases; p->name; p++) {
+		pr_debug("%s(%s)\n", __func__, p->name);
+		for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
+			err = __mock_hwsp_timeline(&state, na, p->flags);
+			if (err)
+				goto out;
+		}
+	}
+
+out:
+	for (na = 0; na < state.max; na++)
+		__mock_hwsp_record(&state, na, NULL);
+	mutex_unlock(&state.i915->drm.struct_mutex);
+	kfree(state.history);
+err_put:
+	drm_dev_put(&state.i915->drm);
+	return err;
+}
+
 struct __igt_sync {
 	const char *name;
 	u32 seqno;
@@ -256,12 +399,331 @@ static int bench_sync(void *arg)
 	return 0;
 }
 
-int i915_gem_timeline_mock_selftests(void)
+int i915_timeline_mock_selftests(void)
 {
 	static const struct i915_subtest tests[] = {
+		SUBTEST(mock_hwsp_freelist),
 		SUBTEST(igt_sync),
 		SUBTEST(bench_sync),
 	};
 
 	return i915_subtests(tests, NULL);
 }
+
+static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
+{
+	u32 *cs;
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	if (INTEL_GEN(rq->i915) >= 8) {
+		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+		*cs++ = addr;
+		*cs++ = 0;
+		*cs++ = value;
+	} else if (INTEL_GEN(rq->i915) >= 4) {
+		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+		*cs++ = 0;
+		*cs++ = addr;
+		*cs++ = value;
+	} else {
+		*cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+		*cs++ = addr;
+		*cs++ = value;
+		*cs++ = MI_NOOP;
+	}
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static struct i915_request *
+tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value)
+{
+	struct i915_request *rq;
+	int err;
+
+	lockdep_assert_held(&tl->i915->drm.struct_mutex); /* lazy rq refs */
+
+	err = i915_timeline_pin(tl);
+	if (err) {
+		rq = ERR_PTR(err);
+		goto out;
+	}
+
+	rq = i915_request_alloc(engine, engine->i915->kernel_context);
+	if (IS_ERR(rq))
+		goto out_unpin;
+
+	err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
+	i915_request_add(rq);
+	if (err)
+		rq = ERR_PTR(err);
+
+out_unpin:
+	i915_timeline_unpin(tl);
+out:
+	if (IS_ERR(rq))
+		pr_err("Failed to write to timeline!\n");
+	return rq;
+}
+
+static struct i915_timeline *
+checked_i915_timeline_create(struct drm_i915_private *i915)
+{
+	struct i915_timeline *tl;
+
+	tl = i915_timeline_create(i915, "live", NULL);
+	if (IS_ERR(tl))
+		return tl;
+
+	if (*tl->hwsp_seqno != tl->seqno) {
+		pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n",
+		       *tl->hwsp_seqno, tl->seqno);
+		i915_timeline_put(tl);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return tl;
+}
+
+static int live_hwsp_engine(void *arg)
+{
+#define NUM_TIMELINES 4096
+	struct drm_i915_private *i915 = arg;
+	struct i915_timeline **timelines;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
+	unsigned long count, n;
+	int err = 0;
+
+	/*
+	 * Create a bunch of timelines and check we can write
+	 * independently to each of their breadcrumb slots.
+	 */
+
+	timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
+				   sizeof(*timelines),
+				   GFP_KERNEL);
+	if (!timelines)
+		return -ENOMEM;
+
+	mutex_lock(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(i915);
+
+	count = 0;
+	for_each_engine(engine, i915, id) {
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		for (n = 0; n < NUM_TIMELINES; n++) {
+			struct i915_timeline *tl;
+			struct i915_request *rq;
+
+			tl = checked_i915_timeline_create(i915);
+			if (IS_ERR(tl)) {
+				err = PTR_ERR(tl);
+				goto out;
+			}
+
+			rq = tl_write(tl, engine, count);
+			if (IS_ERR(rq)) {
+				i915_timeline_put(tl);
+				err = PTR_ERR(rq);
+				goto out;
+			}
+
+			timelines[count++] = tl;
+		}
+	}
+
+out:
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+
+	for (n = 0; n < count; n++) {
+		struct i915_timeline *tl = timelines[n];
+
+		if (!err && *tl->hwsp_seqno != n) {
+			pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
+			       n, *tl->hwsp_seqno);
+			err = -EINVAL;
+		}
+		i915_timeline_put(tl);
+	}
+
+	intel_runtime_pm_put(i915, wakeref);
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	kvfree(timelines);
+
+	return err;
+#undef NUM_TIMELINES
+}
+
+static int live_hwsp_alternate(void *arg)
+{
+#define NUM_TIMELINES 4096
+	struct drm_i915_private *i915 = arg;
+	struct i915_timeline **timelines;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
+	unsigned long count, n;
+	int err = 0;
+
+	/*
+	 * Create a bunch of timelines and check we can write
+	 * independently to each of their breadcrumb slots with adjacent
+	 * engines.
+	 */
+
+	timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
+				   sizeof(*timelines),
+				   GFP_KERNEL);
+	if (!timelines)
+		return -ENOMEM;
+
+	mutex_lock(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(i915);
+
+	count = 0;
+	for (n = 0; n < NUM_TIMELINES; n++) {
+		for_each_engine(engine, i915, id) {
+			struct i915_timeline *tl;
+			struct i915_request *rq;
+
+			if (!intel_engine_can_store_dword(engine))
+				continue;
+
+			tl = checked_i915_timeline_create(i915);
+			if (IS_ERR(tl)) {
+				err = PTR_ERR(tl);
+				goto out;
+			}
+
+			rq = tl_write(tl, engine, count);
+			if (IS_ERR(rq)) {
+				i915_timeline_put(tl);
+				err = PTR_ERR(rq);
+				goto out;
+			}
+
+			timelines[count++] = tl;
+		}
+	}
+
+out:
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+
+	for (n = 0; n < count; n++) {
+		struct i915_timeline *tl = timelines[n];
+
+		if (!err && *tl->hwsp_seqno != n) {
+			pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
+			       n, *tl->hwsp_seqno);
+			err = -EINVAL;
+		}
+		i915_timeline_put(tl);
+	}
+
+	intel_runtime_pm_put(i915, wakeref);
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	kvfree(timelines);
+
+	return err;
+#undef NUM_TIMELINES
+}
+
+static int live_hwsp_recycle(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
+	unsigned long count;
+	int err = 0;
+
+	/*
+	 * Check seqno writes into one timeline at a time. We expect to
+	 * recycle the breadcrumb slot between iterations and neither
+	 * want to confuse ourselves or the GPU.
+	 */
+
+	mutex_lock(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(i915);
+
+	count = 0;
+	for_each_engine(engine, i915, id) {
+		IGT_TIMEOUT(end_time);
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		do {
+			struct i915_timeline *tl;
+			struct i915_request *rq;
+
+			tl = checked_i915_timeline_create(i915);
+			if (IS_ERR(tl)) {
+				err = PTR_ERR(tl);
+				goto out;
+			}
+
+			rq = tl_write(tl, engine, count);
+			if (IS_ERR(rq)) {
+				i915_timeline_put(tl);
+				err = PTR_ERR(rq);
+				goto out;
+			}
+
+			if (i915_request_wait(rq,
+					      I915_WAIT_LOCKED,
+					      HZ / 5) < 0) {
+				pr_err("Wait for timeline writes timed out!\n");
+				i915_timeline_put(tl);
+				err = -EIO;
+				goto out;
+			}
+
+			if (*tl->hwsp_seqno != count) {
+				pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
+				       count, *tl->hwsp_seqno);
+				err = -EINVAL;
+			}
+
+			i915_timeline_put(tl);
+			count++;
+
+			if (err)
+				goto out;
+
+			i915_timelines_park(i915); /* Encourage recycling! */
+		} while (!__igt_timeout(end_time, NULL));
+	}
+
+out:
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+	intel_runtime_pm_put(i915, wakeref);
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	return err;
+}
+
+int i915_timeline_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(live_hwsp_recycle),
+		SUBTEST(live_hwsp_engine),
+		SUBTEST(live_hwsp_alternate),
+	};
+
+	return i915_subtests(tests, i915);
+}
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index f0a32edfb9b1..cf1de82741fa 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -672,7 +672,7 @@ static int igt_vma_partial(void *arg)
 		}
 
 		count = 0;
-		list_for_each_entry(vma, &obj->vma_list, obj_link)
+		list_for_each_entry(vma, &obj->vma.list, obj_link)
 			count++;
 		if (count != nvma) {
 			pr_err("(%s) All partial vma were not recorded on the obj->vma_list: found %u, expected %u\n",
@@ -701,7 +701,7 @@ static int igt_vma_partial(void *arg)
 		i915_vma_unpin(vma);
 
 		count = 0;
-		list_for_each_entry(vma, &obj->vma_list, obj_link)
+		list_for_each_entry(vma, &obj->vma.list, obj_link)
 			count++;
 		if (count != nvma) {
 			pr_err("(%s) allocated an extra full vma!\n", p->name);
diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c
index 5deb485fb942..3e902761cd16 100644
--- a/drivers/gpu/drm/i915/selftests/igt_live_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c
@@ -35,7 +35,6 @@ int igt_live_test_begin(struct igt_live_test *t,
 		return err;
 	}
 
-	i915->gpu_error.missed_irq_rings = 0;
 	t->reset_global = i915_reset_count(&i915->gpu_error);
 
 	for_each_engine(engine, i915, id)
@@ -75,11 +74,5 @@ int igt_live_test_end(struct igt_live_test *t)
 		return -EIO;
 	}
 
-	if (i915->gpu_error.missed_irq_rings) {
-		pr_err("%s(%s): Missed interrupts on engines %lx\n",
-		       t->func, t->name, i915->gpu_error.missed_irq_rings);
-		return -EIO;
-	}
-
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c
index 0e70df0230b8..9ebd9225684e 100644
--- a/drivers/gpu/drm/i915/selftests/igt_spinner.c
+++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c
@@ -185,11 +185,6 @@ void igt_spinner_fini(struct igt_spinner *spin)
 
 bool igt_wait_for_spinner(struct igt_spinner *spin, struct i915_request *rq)
 {
-	if (!wait_event_timeout(rq->execute,
-				READ_ONCE(rq->global_seqno),
-				msecs_to_jiffies(10)))
-		return false;
-
 	return !(wait_for_us(i915_seqno_passed(hws_seqno(spin, rq),
 					       rq->fence.seqno),
 			     10) &&
diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
deleted file mode 100644
index f03b407fdbe2..000000000000
--- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
+++ /dev/null
@@ -1,470 +0,0 @@
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#include "../i915_selftest.h"
-#include "i915_random.h"
-
-#include "mock_gem_device.h"
-#include "mock_engine.h"
-
-static int check_rbtree(struct intel_engine_cs *engine,
-			const unsigned long *bitmap,
-			const struct intel_wait *waiters,
-			const int count)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	struct rb_node *rb;
-	int n;
-
-	if (&b->irq_wait->node != rb_first(&b->waiters)) {
-		pr_err("First waiter does not match first element of wait-tree\n");
-		return -EINVAL;
-	}
-
-	n = find_first_bit(bitmap, count);
-	for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
-		struct intel_wait *w = container_of(rb, typeof(*w), node);
-		int idx = w - waiters;
-
-		if (!test_bit(idx, bitmap)) {
-			pr_err("waiter[%d, seqno=%d] removed but still in wait-tree\n",
-			       idx, w->seqno);
-			return -EINVAL;
-		}
-
-		if (n != idx) {
-			pr_err("waiter[%d, seqno=%d] does not match expected next element in tree [%d]\n",
-			       idx, w->seqno, n);
-			return -EINVAL;
-		}
-
-		n = find_next_bit(bitmap, count, n + 1);
-	}
-
-	return 0;
-}
-
-static int check_completion(struct intel_engine_cs *engine,
-			    const unsigned long *bitmap,
-			    const struct intel_wait *waiters,
-			    const int count)
-{
-	int n;
-
-	for (n = 0; n < count; n++) {
-		if (intel_wait_complete(&waiters[n]) != !!test_bit(n, bitmap))
-			continue;
-
-		pr_err("waiter[%d, seqno=%d] is %s, but expected %s\n",
-		       n, waiters[n].seqno,
-		       intel_wait_complete(&waiters[n]) ? "complete" : "active",
-		       test_bit(n, bitmap) ? "active" : "complete");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int check_rbtree_empty(struct intel_engine_cs *engine)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-	if (b->irq_wait) {
-		pr_err("Empty breadcrumbs still has a waiter\n");
-		return -EINVAL;
-	}
-
-	if (!RB_EMPTY_ROOT(&b->waiters)) {
-		pr_err("Empty breadcrumbs, but wait-tree not empty\n");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int igt_random_insert_remove(void *arg)
-{
-	const u32 seqno_bias = 0x1000;
-	I915_RND_STATE(prng);
-	struct intel_engine_cs *engine = arg;
-	struct intel_wait *waiters;
-	const int count = 4096;
-	unsigned int *order;
-	unsigned long *bitmap;
-	int err = -ENOMEM;
-	int n;
-
-	mock_engine_reset(engine);
-
-	waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL);
-	if (!waiters)
-		goto out_engines;
-
-	bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap),
-			 GFP_KERNEL);
-	if (!bitmap)
-		goto out_waiters;
-
-	order = i915_random_order(count, &prng);
-	if (!order)
-		goto out_bitmap;
-
-	for (n = 0; n < count; n++)
-		intel_wait_init_for_seqno(&waiters[n], seqno_bias + n);
-
-	err = check_rbtree(engine, bitmap, waiters, count);
-	if (err)
-		goto out_order;
-
-	/* Add and remove waiters into the rbtree in random order. At each
-	 * step, we verify that the rbtree is correctly ordered.
-	 */
-	for (n = 0; n < count; n++) {
-		int i = order[n];
-
-		intel_engine_add_wait(engine, &waiters[i]);
-		__set_bit(i, bitmap);
-
-		err = check_rbtree(engine, bitmap, waiters, count);
-		if (err)
-			goto out_order;
-	}
-
-	i915_random_reorder(order, count, &prng);
-	for (n = 0; n < count; n++) {
-		int i = order[n];
-
-		intel_engine_remove_wait(engine, &waiters[i]);
-		__clear_bit(i, bitmap);
-
-		err = check_rbtree(engine, bitmap, waiters, count);
-		if (err)
-			goto out_order;
-	}
-
-	err = check_rbtree_empty(engine);
-out_order:
-	kfree(order);
-out_bitmap:
-	kfree(bitmap);
-out_waiters:
-	kvfree(waiters);
-out_engines:
-	mock_engine_flush(engine);
-	return err;
-}
-
-static int igt_insert_complete(void *arg)
-{
-	const u32 seqno_bias = 0x1000;
-	struct intel_engine_cs *engine = arg;
-	struct intel_wait *waiters;
-	const int count = 4096;
-	unsigned long *bitmap;
-	int err = -ENOMEM;
-	int n, m;
-
-	mock_engine_reset(engine);
-
-	waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL);
-	if (!waiters)
-		goto out_engines;
-
-	bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap),
-			 GFP_KERNEL);
-	if (!bitmap)
-		goto out_waiters;
-
-	for (n = 0; n < count; n++) {
-		intel_wait_init_for_seqno(&waiters[n], n + seqno_bias);
-		intel_engine_add_wait(engine, &waiters[n]);
-		__set_bit(n, bitmap);
-	}
-	err = check_rbtree(engine, bitmap, waiters, count);
-	if (err)
-		goto out_bitmap;
-
-	/* On each step, we advance the seqno so that several waiters are then
-	 * complete (we increase the seqno by increasingly larger values to
-	 * retire more and more waiters at once). All retired waiters should
-	 * be woken and removed from the rbtree, and so that we check.
-	 */
-	for (n = 0; n < count; n = m) {
-		int seqno = 2 * n;
-
-		GEM_BUG_ON(find_first_bit(bitmap, count) != n);
-
-		if (intel_wait_complete(&waiters[n])) {
-			pr_err("waiter[%d, seqno=%d] completed too early\n",
-			       n, waiters[n].seqno);
-			err = -EINVAL;
-			goto out_bitmap;
-		}
-
-		/* complete the following waiters */
-		mock_seqno_advance(engine, seqno + seqno_bias);
-		for (m = n; m <= seqno; m++) {
-			if (m == count)
-				break;
-
-			GEM_BUG_ON(!test_bit(m, bitmap));
-			__clear_bit(m, bitmap);
-		}
-
-		intel_engine_remove_wait(engine, &waiters[n]);
-		RB_CLEAR_NODE(&waiters[n].node);
-
-		err = check_rbtree(engine, bitmap, waiters, count);
-		if (err) {
-			pr_err("rbtree corrupt after seqno advance to %d\n",
-			       seqno + seqno_bias);
-			goto out_bitmap;
-		}
-
-		err = check_completion(engine, bitmap, waiters, count);
-		if (err) {
-			pr_err("completions after seqno advance to %d failed\n",
-			       seqno + seqno_bias);
-			goto out_bitmap;
-		}
-	}
-
-	err = check_rbtree_empty(engine);
-out_bitmap:
-	kfree(bitmap);
-out_waiters:
-	kvfree(waiters);
-out_engines:
-	mock_engine_flush(engine);
-	return err;
-}
-
-struct igt_wakeup {
-	struct task_struct *tsk;
-	atomic_t *ready, *set, *done;
-	struct intel_engine_cs *engine;
-	unsigned long flags;
-#define STOP 0
-#define IDLE 1
-	wait_queue_head_t *wq;
-	u32 seqno;
-};
-
-static bool wait_for_ready(struct igt_wakeup *w)
-{
-	DEFINE_WAIT(ready);
-
-	set_bit(IDLE, &w->flags);
-	if (atomic_dec_and_test(w->done))
-		wake_up_var(w->done);
-
-	if (test_bit(STOP, &w->flags))
-		goto out;
-
-	for (;;) {
-		prepare_to_wait(w->wq, &ready, TASK_INTERRUPTIBLE);
-		if (atomic_read(w->ready) == 0)
-			break;
-
-		schedule();
-	}
-	finish_wait(w->wq, &ready);
-
-out:
-	clear_bit(IDLE, &w->flags);
-	if (atomic_dec_and_test(w->set))
-		wake_up_var(w->set);
-
-	return !test_bit(STOP, &w->flags);
-}
-
-static int igt_wakeup_thread(void *arg)
-{
-	struct igt_wakeup *w = arg;
-	struct intel_wait wait;
-
-	while (wait_for_ready(w)) {
-		GEM_BUG_ON(kthread_should_stop());
-
-		intel_wait_init_for_seqno(&wait, w->seqno);
-		intel_engine_add_wait(w->engine, &wait);
-		for (;;) {
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			if (i915_seqno_passed(intel_engine_get_seqno(w->engine),
-					      w->seqno))
-				break;
-
-			if (test_bit(STOP, &w->flags)) /* emergency escape */
-				break;
-
-			schedule();
-		}
-		intel_engine_remove_wait(w->engine, &wait);
-		__set_current_state(TASK_RUNNING);
-	}
-
-	return 0;
-}
-
-static void igt_wake_all_sync(atomic_t *ready,
-			      atomic_t *set,
-			      atomic_t *done,
-			      wait_queue_head_t *wq,
-			      int count)
-{
-	atomic_set(set, count);
-	atomic_set(ready, 0);
-	wake_up_all(wq);
-
-	wait_var_event(set, !atomic_read(set));
-	atomic_set(ready, count);
-	atomic_set(done, count);
-}
-
-static int igt_wakeup(void *arg)
-{
-	I915_RND_STATE(prng);
-	struct intel_engine_cs *engine = arg;
-	struct igt_wakeup *waiters;
-	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
-	const int count = 4096;
-	const u32 max_seqno = count / 4;
-	atomic_t ready, set, done;
-	int err = -ENOMEM;
-	int n, step;
-
-	mock_engine_reset(engine);
-
-	waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL);
-	if (!waiters)
-		goto out_engines;
-
-	/* Create a large number of threads, each waiting on a random seqno.
-	 * Multiple waiters will be waiting for the same seqno.
-	 */
-	atomic_set(&ready, count);
-	for (n = 0; n < count; n++) {
-		waiters[n].wq = &wq;
-		waiters[n].ready = &ready;
-		waiters[n].set = &set;
-		waiters[n].done = &done;
-		waiters[n].engine = engine;
-		waiters[n].flags = BIT(IDLE);
-
-		waiters[n].tsk = kthread_run(igt_wakeup_thread, &waiters[n],
-					     "i915/igt:%d", n);
-		if (IS_ERR(waiters[n].tsk))
-			goto out_waiters;
-
-		get_task_struct(waiters[n].tsk);
-	}
-
-	for (step = 1; step <= max_seqno; step <<= 1) {
-		u32 seqno;
-
-		/* The waiter threads start paused as we assign them a random
-		 * seqno and reset the engine. Once the engine is reset,
-		 * we signal that the threads may begin their wait upon their
-		 * seqno.
-		 */
-		for (n = 0; n < count; n++) {
-			GEM_BUG_ON(!test_bit(IDLE, &waiters[n].flags));
-			waiters[n].seqno =
-				1 + prandom_u32_state(&prng) % max_seqno;
-		}
-		mock_seqno_advance(engine, 0);
-		igt_wake_all_sync(&ready, &set, &done, &wq, count);
-
-		/* Simulate the GPU doing chunks of work, with one or more
-		 * seqno appearing to finish at the same time. A random number
-		 * of threads will be waiting upon the update and hopefully be
-		 * woken.
-		 */
-		for (seqno = 1; seqno <= max_seqno + step; seqno += step) {
-			usleep_range(50, 500);
-			mock_seqno_advance(engine, seqno);
-		}
-		GEM_BUG_ON(intel_engine_get_seqno(engine) < 1 + max_seqno);
-
-		/* With the seqno now beyond any of the waiting threads, they
-		 * should all be woken, see that they are complete and signal
-		 * that they are ready for the next test. We wait until all
-		 * threads are complete and waiting for us (i.e. not a seqno).
-		 */
-		if (!wait_var_event_timeout(&done,
-					    !atomic_read(&done), 10 * HZ)) {
-			pr_err("Timed out waiting for %d remaining waiters\n",
-			       atomic_read(&done));
-			err = -ETIMEDOUT;
-			break;
-		}
-
-		err = check_rbtree_empty(engine);
-		if (err)
-			break;
-	}
-
-out_waiters:
-	for (n = 0; n < count; n++) {
-		if (IS_ERR(waiters[n].tsk))
-			break;
-
-		set_bit(STOP, &waiters[n].flags);
-	}
-	mock_seqno_advance(engine, INT_MAX); /* wakeup any broken waiters */
-	igt_wake_all_sync(&ready, &set, &done, &wq, n);
-
-	for (n = 0; n < count; n++) {
-		if (IS_ERR(waiters[n].tsk))
-			break;
-
-		kthread_stop(waiters[n].tsk);
-		put_task_struct(waiters[n].tsk);
-	}
-
-	kvfree(waiters);
-out_engines:
-	mock_engine_flush(engine);
-	return err;
-}
-
-int intel_breadcrumbs_mock_selftests(void)
-{
-	static const struct i915_subtest tests[] = {
-		SUBTEST(igt_random_insert_remove),
-		SUBTEST(igt_insert_complete),
-		SUBTEST(igt_wakeup),
-	};
-	struct drm_i915_private *i915;
-	int err;
-
-	i915 = mock_gem_device();
-	if (!i915)
-		return -ENOMEM;
-
-	err = i915_subtests(tests, i915->engine[RCS]);
-	drm_dev_put(&i915->drm);
-
-	return err;
-}
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 12550b55c42f..7b6f3bea9ef8 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -363,9 +363,7 @@ static int igt_global_reset(void *arg)
 	/* Check that we can issue a global GPU reset */
 
 	igt_global_reset_lock(i915);
-	set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
 
-	mutex_lock(&i915->drm.struct_mutex);
 	reset_count = i915_reset_count(&i915->gpu_error);
 
 	i915_reset(i915, ALL_ENGINES, NULL);
@@ -374,9 +372,7 @@ static int igt_global_reset(void *arg)
 		pr_err("No GPU reset recorded!\n");
 		err = -EINVAL;
 	}
-	mutex_unlock(&i915->drm.struct_mutex);
 
-	GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
 	igt_global_reset_unlock(i915);
 
 	if (i915_terminally_wedged(&i915->gpu_error))
@@ -393,18 +389,16 @@ static int igt_wedged_reset(void *arg)
 	/* Check that we can recover a wedged device with a GPU reset */
 
 	igt_global_reset_lock(i915);
-	mutex_lock(&i915->drm.struct_mutex);
 	wakeref = intel_runtime_pm_get(i915);
 
 	i915_gem_set_wedged(i915);
-	GEM_BUG_ON(!i915_terminally_wedged(&i915->gpu_error));
 
-	set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
+	mutex_lock(&i915->drm.struct_mutex);
+	GEM_BUG_ON(!i915_terminally_wedged(&i915->gpu_error));
 	i915_reset(i915, ALL_ENGINES, NULL);
-	GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
+	mutex_unlock(&i915->drm.struct_mutex);
 
 	intel_runtime_pm_put(i915, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	igt_global_reset_unlock(i915);
 
 	return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0;
@@ -455,8 +449,6 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
 
 		set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
 		do {
-			u32 seqno = intel_engine_get_seqno(engine);
-
 			if (active) {
 				struct i915_request *rq;
 
@@ -485,8 +477,6 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
 					break;
 				}
 
-				GEM_BUG_ON(!rq->global_seqno);
-				seqno = rq->global_seqno - 1;
 				i915_request_put(rq);
 			}
 
@@ -502,16 +492,15 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
 				break;
 			}
 
-			reset_engine_count += active;
 			if (i915_reset_engine_count(&i915->gpu_error, engine) !=
-			    reset_engine_count) {
-				pr_err("%s engine reset %srecorded!\n",
-				       engine->name, active ? "not " : "");
+			    ++reset_engine_count) {
+				pr_err("%s engine reset not recorded!\n",
+				       engine->name);
 				err = -EINVAL;
 				break;
 			}
 
-			if (!wait_for_idle(engine)) {
+			if (!i915_reset_flush(i915)) {
 				struct drm_printer p =
 					drm_info_printer(i915->drm.dev);
 
@@ -734,7 +723,6 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
 
 		set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
 		do {
-			u32 seqno = intel_engine_get_seqno(engine);
 			struct i915_request *rq = NULL;
 
 			if (flags & TEST_ACTIVE) {
@@ -762,9 +750,6 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
 					err = -EIO;
 					break;
 				}
-
-				GEM_BUG_ON(!rq->global_seqno);
-				seqno = rq->global_seqno - 1;
 			}
 
 			err = i915_reset_engine(engine, NULL);
@@ -801,10 +786,9 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
 
 		reported = i915_reset_engine_count(&i915->gpu_error, engine);
 		reported -= threads[engine->id].resets;
-		if (reported != (flags & TEST_ACTIVE ? count : 0)) {
-			pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu, expected %lu reported\n",
-			       engine->name, test_name, count, reported,
-			       (flags & TEST_ACTIVE ? count : 0));
+		if (reported != count) {
+			pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n",
+			       engine->name, test_name, count, reported);
 			if (!err)
 				err = -EINVAL;
 		}
@@ -903,20 +887,13 @@ static int igt_reset_engines(void *arg)
 	return 0;
 }
 
-static u32 fake_hangcheck(struct i915_request *rq, u32 mask)
+static u32 fake_hangcheck(struct drm_i915_private *i915, u32 mask)
 {
-	struct i915_gpu_error *error = &rq->i915->gpu_error;
-	u32 reset_count = i915_reset_count(error);
-
-	error->stalled_mask = mask;
-
-	/* set_bit() must be after we have setup the backchannel (mask) */
-	smp_mb__before_atomic();
-	set_bit(I915_RESET_HANDOFF, &error->flags);
+	u32 count = i915_reset_count(&i915->gpu_error);
 
-	wake_up_all(&error->wait_queue);
+	i915_reset(i915, mask, NULL);
 
-	return reset_count;
+	return count;
 }
 
 static int igt_reset_wait(void *arg)
@@ -962,7 +939,7 @@ static int igt_reset_wait(void *arg)
 		goto out_rq;
 	}
 
-	reset_count = fake_hangcheck(rq, ALL_ENGINES);
+	reset_count = fake_hangcheck(i915, ALL_ENGINES);
 
 	timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10);
 	if (timeout < 0) {
@@ -972,7 +949,6 @@ static int igt_reset_wait(void *arg)
 		goto out_rq;
 	}
 
-	GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
 	if (i915_reset_count(&i915->gpu_error) == reset_count) {
 		pr_err("No GPU reset recorded!\n");
 		err = -EINVAL;
@@ -1151,7 +1127,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915,
 
 	wait_for_completion(&arg.completion);
 
-	if (wait_for(waitqueue_active(&rq->execute), 10)) {
+	if (wait_for(!list_empty(&rq->fence.cb_list), 10)) {
 		struct drm_printer p = drm_info_printer(i915->drm.dev);
 
 		pr_err("igt/evict_vma kthread did not wait\n");
@@ -1162,7 +1138,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915,
 	}
 
 out_reset:
-	fake_hangcheck(rq, intel_engine_flag(rq->engine));
+	fake_hangcheck(rq->i915, intel_engine_flag(rq->engine));
 
 	if (tsk) {
 		struct igt_wedge_me w;
@@ -1341,12 +1317,7 @@ static int igt_reset_queue(void *arg)
 				goto fini;
 			}
 
-			reset_count = fake_hangcheck(prev, ENGINE_MASK(id));
-
-			i915_reset(i915, ENGINE_MASK(id), NULL);
-
-			GEM_BUG_ON(test_bit(I915_RESET_HANDOFF,
-					    &i915->gpu_error.flags));
+			reset_count = fake_hangcheck(i915, ENGINE_MASK(id));
 
 			if (prev->fence.error != -EIO) {
 				pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n",
@@ -1565,6 +1536,7 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine,
 		pr_err("%s(%s): Failed to start request %llx, at %x\n",
 		       __func__, engine->name,
 		       rq->fence.seqno, hws_seqno(&h, rq));
+		i915_gem_set_wedged(i915);
 		err = -EIO;
 	}
 
@@ -1588,7 +1560,6 @@ out:
 static void force_reset(struct drm_i915_private *i915)
 {
 	i915_gem_set_wedged(i915);
-	set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
 	i915_reset(i915, 0, NULL);
 }
 
@@ -1618,6 +1589,26 @@ static int igt_atomic_reset(void *arg)
 	if (i915_terminally_wedged(&i915->gpu_error))
 		goto unlock;
 
+	if (intel_has_gpu_reset(i915)) {
+		const typeof(*phases) *p;
+
+		for (p = phases; p->name; p++) {
+			GEM_TRACE("intel_gpu_reset under %s\n", p->name);
+
+			p->critical_section_begin();
+			err = intel_gpu_reset(i915, ALL_ENGINES);
+			p->critical_section_end();
+
+			if (err) {
+				pr_err("intel_gpu_reset failed under %s\n",
+				       p->name);
+				goto out;
+			}
+		}
+
+		force_reset(i915);
+	}
+
 	if (intel_has_reset_engine(i915)) {
 		struct intel_engine_cs *engine;
 		enum intel_engine_id id;
@@ -1674,6 +1665,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
 
 	wakeref = intel_runtime_pm_get(i915);
 	saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck);
+	drain_delayed_work(&i915->gpu_error.hangcheck_work); /* flush param */
 
 	err = i915_subtests(tests, i915);
 
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index 2b2ecd76c2ac..fb35f53c9ce3 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -268,6 +268,143 @@ err_wedged:
 	goto err_ctx_lo;
 }
 
+struct preempt_client {
+	struct igt_spinner spin;
+	struct i915_gem_context *ctx;
+};
+
+static int preempt_client_init(struct drm_i915_private *i915,
+			       struct preempt_client *c)
+{
+	c->ctx = kernel_context(i915);
+	if (!c->ctx)
+		return -ENOMEM;
+
+	if (igt_spinner_init(&c->spin, i915))
+		goto err_ctx;
+
+	return 0;
+
+err_ctx:
+	kernel_context_close(c->ctx);
+	return -ENOMEM;
+}
+
+static void preempt_client_fini(struct preempt_client *c)
+{
+	igt_spinner_fini(&c->spin);
+	kernel_context_close(c->ctx);
+}
+
+static int live_suppress_self_preempt(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *engine;
+	struct i915_sched_attr attr = {
+		.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
+	};
+	struct preempt_client a, b;
+	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
+	int err = -ENOMEM;
+
+	/*
+	 * Verify that if a preemption request does not cause a change in
+	 * the current execution order, the preempt-to-idle injection is
+	 * skipped and that we do not accidentally apply it after the CS
+	 * completion event.
+	 */
+
+	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+		return 0;
+
+	if (USES_GUC_SUBMISSION(i915))
+		return 0; /* presume black blox */
+
+	mutex_lock(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(i915);
+
+	if (preempt_client_init(i915, &a))
+		goto err_unlock;
+	if (preempt_client_init(i915, &b))
+		goto err_client_a;
+
+	for_each_engine(engine, i915, id) {
+		struct i915_request *rq_a, *rq_b;
+		int depth;
+
+		engine->execlists.preempt_hang.count = 0;
+
+		rq_a = igt_spinner_create_request(&a.spin,
+						  a.ctx, engine,
+						  MI_NOOP);
+		if (IS_ERR(rq_a)) {
+			err = PTR_ERR(rq_a);
+			goto err_client_b;
+		}
+
+		i915_request_add(rq_a);
+		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
+			pr_err("First client failed to start\n");
+			goto err_wedged;
+		}
+
+		for (depth = 0; depth < 8; depth++) {
+			rq_b = igt_spinner_create_request(&b.spin,
+							  b.ctx, engine,
+							  MI_NOOP);
+			if (IS_ERR(rq_b)) {
+				err = PTR_ERR(rq_b);
+				goto err_client_b;
+			}
+			i915_request_add(rq_b);
+
+			GEM_BUG_ON(i915_request_completed(rq_a));
+			engine->schedule(rq_a, &attr);
+			igt_spinner_end(&a.spin);
+
+			if (!igt_wait_for_spinner(&b.spin, rq_b)) {
+				pr_err("Second client failed to start\n");
+				goto err_wedged;
+			}
+
+			swap(a, b);
+			rq_a = rq_b;
+		}
+		igt_spinner_end(&a.spin);
+
+		if (engine->execlists.preempt_hang.count) {
+			pr_err("Preemption recorded x%d, depth %d; should have been suppressed!\n",
+			       engine->execlists.preempt_hang.count,
+			       depth);
+			err = -EINVAL;
+			goto err_client_b;
+		}
+
+		if (igt_flush_test(i915, I915_WAIT_LOCKED))
+			goto err_wedged;
+	}
+
+	err = 0;
+err_client_b:
+	preempt_client_fini(&b);
+err_client_a:
+	preempt_client_fini(&a);
+err_unlock:
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+	intel_runtime_pm_put(i915, wakeref);
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+
+err_wedged:
+	igt_spinner_end(&b.spin);
+	igt_spinner_end(&a.spin);
+	i915_gem_set_wedged(i915);
+	err = -EIO;
+	goto err_client_b;
+}
+
 static int live_preempt_hang(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
@@ -647,6 +784,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_sanitycheck),
 		SUBTEST(live_preempt),
 		SUBTEST(live_late_preempt),
+		SUBTEST(live_suppress_self_preempt),
 		SUBTEST(live_preempt_hang),
 		SUBTEST(live_preempt_smoke),
 	};
diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
index a8cac56be835..b15c4f26c593 100644
--- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
@@ -214,7 +214,6 @@ out_put:
 
 static int do_device_reset(struct intel_engine_cs *engine)
 {
-	set_bit(I915_RESET_HANDOFF, &engine->i915->gpu_error.flags);
 	i915_reset(engine->i915, ENGINE_MASK(engine->id), "live_workarounds");
 	return 0;
 }
@@ -394,7 +393,6 @@ static int
 live_gpu_reset_gt_engine_workarounds(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
-	struct i915_gpu_error *error = &i915->gpu_error;
 	intel_wakeref_t wakeref;
 	struct wa_lists lists;
 	bool ok;
@@ -413,7 +411,6 @@ live_gpu_reset_gt_engine_workarounds(void *arg)
 	if (!ok)
 		goto out;
 
-	set_bit(I915_RESET_HANDOFF, &error->flags);
 	i915_reset(i915, ALL_ENGINES, "live_workarounds");
 
 	ok = verify_gt_engine_wa(i915, &lists, "after reset");
diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
index b26f07b55d86..2bfa72c1654b 100644
--- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
+++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
@@ -76,3 +76,57 @@ void timed_fence_fini(struct timed_fence *tf)
 	destroy_timer_on_stack(&tf->timer);
 	i915_sw_fence_fini(&tf->fence);
 }
+
+struct heap_fence {
+	struct i915_sw_fence fence;
+	union {
+		struct kref ref;
+		struct rcu_head rcu;
+	};
+};
+
+static int __i915_sw_fence_call
+heap_fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
+{
+	struct heap_fence *h = container_of(fence, typeof(*h), fence);
+
+	switch (state) {
+	case FENCE_COMPLETE:
+		break;
+
+	case FENCE_FREE:
+		heap_fence_put(&h->fence);
+	}
+
+	return NOTIFY_DONE;
+}
+
+struct i915_sw_fence *heap_fence_create(gfp_t gfp)
+{
+	struct heap_fence *h;
+
+	h = kmalloc(sizeof(*h), gfp);
+	if (!h)
+		return NULL;
+
+	i915_sw_fence_init(&h->fence, heap_fence_notify);
+	refcount_set(&h->ref.refcount, 2);
+
+	return &h->fence;
+}
+
+static void heap_fence_release(struct kref *ref)
+{
+	struct heap_fence *h = container_of(ref, typeof(*h), ref);
+
+	i915_sw_fence_fini(&h->fence);
+
+	kfree_rcu(h, rcu);
+}
+
+void heap_fence_put(struct i915_sw_fence *fence)
+{
+	struct heap_fence *h = container_of(fence, typeof(*h), fence);
+
+	kref_put(&h->ref, heap_fence_release);
+}
diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.h b/drivers/gpu/drm/i915/selftests/lib_sw_fence.h
index 474aafb92ae1..1f9927e10f3a 100644
--- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.h
+++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.h
@@ -39,4 +39,7 @@ struct timed_fence {
 void timed_fence_init(struct timed_fence *tf, unsigned long expires);
 void timed_fence_fini(struct timed_fence *tf);
 
+struct i915_sw_fence *heap_fence_create(gfp_t gfp);
+void heap_fence_put(struct i915_sw_fence *fence);
+
 #endif /* _LIB_SW_FENCE_H_ */
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 442ec2aeec81..08f0cab02e0f 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -30,6 +30,17 @@ struct mock_ring {
 	struct i915_timeline timeline;
 };
 
+static void mock_timeline_pin(struct i915_timeline *tl)
+{
+	tl->pin_count++;
+}
+
+static void mock_timeline_unpin(struct i915_timeline *tl)
+{
+	GEM_BUG_ON(!tl->pin_count);
+	tl->pin_count--;
+}
+
 static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
 {
 	const unsigned long sz = PAGE_SIZE / 2;
@@ -39,7 +50,12 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
 	if (!ring)
 		return NULL;
 
-	i915_timeline_init(engine->i915, &ring->timeline, engine->name);
+	if (i915_timeline_init(engine->i915,
+			       &ring->timeline, engine->name,
+			       NULL)) {
+		kfree(ring);
+		return NULL;
+	}
 
 	ring->base.size = sz;
 	ring->base.effective_size = sz;
@@ -70,15 +86,21 @@ static struct mock_request *first_request(struct mock_engine *engine)
 static void advance(struct mock_request *request)
 {
 	list_del_init(&request->link);
-	mock_seqno_advance(request->base.engine, request->base.global_seqno);
+	intel_engine_write_global_seqno(request->base.engine,
+					request->base.global_seqno);
+	i915_request_mark_complete(&request->base);
+	GEM_BUG_ON(!i915_request_completed(&request->base));
+
+	intel_engine_queue_breadcrumbs(request->base.engine);
 }
 
 static void hw_delay_complete(struct timer_list *t)
 {
 	struct mock_engine *engine = from_timer(engine, t, hw_delay);
 	struct mock_request *request;
+	unsigned long flags;
 
-	spin_lock(&engine->hw_lock);
+	spin_lock_irqsave(&engine->hw_lock, flags);
 
 	/* Timer fired, first request is complete */
 	request = first_request(engine);
@@ -98,11 +120,12 @@ static void hw_delay_complete(struct timer_list *t)
 		advance(request);
 	}
 
-	spin_unlock(&engine->hw_lock);
+	spin_unlock_irqrestore(&engine->hw_lock, flags);
 }
 
 static void mock_context_unpin(struct intel_context *ce)
 {
+	mock_timeline_unpin(ce->ring->timeline);
 	i915_gem_context_put(ce->gem_context);
 }
 
@@ -124,6 +147,7 @@ mock_context_pin(struct intel_engine_cs *engine,
 		 struct i915_gem_context *ctx)
 {
 	struct intel_context *ce = to_intel_context(ctx, engine);
+	int err = -ENOMEM;
 
 	if (ce->pin_count++)
 		return ce;
@@ -134,13 +158,15 @@ mock_context_pin(struct intel_engine_cs *engine,
 			goto err;
 	}
 
+	mock_timeline_pin(ce->ring->timeline);
+
 	ce->ops = &mock_context_ops;
 	i915_gem_context_get(ctx);
 	return ce;
 
 err:
 	ce->pin_count = 0;
-	return ERR_PTR(-ENOMEM);
+	return ERR_PTR(err);
 }
 
 static int mock_request_alloc(struct i915_request *request)
@@ -159,9 +185,9 @@ static int mock_emit_flush(struct i915_request *request,
 	return 0;
 }
 
-static void mock_emit_breadcrumb(struct i915_request *request,
-				 u32 *flags)
+static u32 *mock_emit_breadcrumb(struct i915_request *request, u32 *cs)
 {
+	return cs;
 }
 
 static void mock_submit_request(struct i915_request *request)
@@ -169,11 +195,12 @@ static void mock_submit_request(struct i915_request *request)
 	struct mock_request *mock = container_of(request, typeof(*mock), base);
 	struct mock_engine *engine =
 		container_of(request->engine, typeof(*engine), base);
+	unsigned long flags;
 
 	i915_request_submit(request);
 	GEM_BUG_ON(!request->global_seqno);
 
-	spin_lock_irq(&engine->hw_lock);
+	spin_lock_irqsave(&engine->hw_lock, flags);
 	list_add_tail(&mock->link, &engine->hw_queue);
 	if (mock->link.prev == &engine->hw_queue) {
 		if (mock->delay)
@@ -181,7 +208,7 @@ static void mock_submit_request(struct i915_request *request)
 		else
 			advance(mock);
 	}
-	spin_unlock_irq(&engine->hw_lock);
+	spin_unlock_irqrestore(&engine->hw_lock, flags);
 }
 
 struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
@@ -200,15 +227,19 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 	engine->base.i915 = i915;
 	snprintf(engine->base.name, sizeof(engine->base.name), "%s", name);
 	engine->base.id = id;
-	engine->base.status_page.page_addr = (void *)(engine + 1);
+	engine->base.status_page.addr = (void *)(engine + 1);
 
 	engine->base.context_pin = mock_context_pin;
 	engine->base.request_alloc = mock_request_alloc;
 	engine->base.emit_flush = mock_emit_flush;
-	engine->base.emit_breadcrumb = mock_emit_breadcrumb;
+	engine->base.emit_fini_breadcrumb = mock_emit_breadcrumb;
 	engine->base.submit_request = mock_submit_request;
 
-	i915_timeline_init(i915, &engine->base.timeline, engine->base.name);
+	if (i915_timeline_init(i915,
+			       &engine->base.timeline,
+			       engine->base.name,
+			       NULL))
+		goto err_free;
 	i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE);
 
 	intel_engine_init_breadcrumbs(&engine->base);
@@ -226,6 +257,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 err_breadcrumbs:
 	intel_engine_fini_breadcrumbs(&engine->base);
 	i915_timeline_fini(&engine->base.timeline);
+err_free:
 	kfree(engine);
 	return NULL;
 }
@@ -246,7 +278,7 @@ void mock_engine_flush(struct intel_engine_cs *engine)
 
 void mock_engine_reset(struct intel_engine_cs *engine)
 {
-	intel_write_status_page(engine, I915_GEM_HWS_INDEX, 0);
+	intel_engine_write_global_seqno(engine, 0);
 }
 
 void mock_engine_free(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.h b/drivers/gpu/drm/i915/selftests/mock_engine.h
index 133d0c21790d..b9cc3a245f16 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.h
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.h
@@ -46,10 +46,4 @@ void mock_engine_flush(struct intel_engine_cs *engine);
 void mock_engine_reset(struct intel_engine_cs *engine);
 void mock_engine_free(struct intel_engine_cs *engine);
 
-static inline void mock_seqno_advance(struct intel_engine_cs *engine, u32 seqno)
-{
-	intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
-	intel_engine_wakeup(engine);
-}
-
 #endif /* !__MOCK_ENGINE_H__ */
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 5477ad4a7e7d..14ae46fda49f 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -58,8 +58,8 @@ static void mock_device_release(struct drm_device *dev)
 	i915_gem_contexts_lost(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 
-	cancel_delayed_work_sync(&i915->gt.retire_work);
-	cancel_delayed_work_sync(&i915->gt.idle_work);
+	drain_delayed_work(&i915->gt.retire_work);
+	drain_delayed_work(&i915->gt.idle_work);
 	i915_gem_drain_workqueue(i915);
 
 	mutex_lock(&i915->drm.struct_mutex);
@@ -68,13 +68,14 @@ static void mock_device_release(struct drm_device *dev)
 	i915_gem_contexts_fini(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 
+	i915_timelines_fini(i915);
+
 	drain_workqueue(i915->wq);
 	i915_gem_drain_freed_objects(i915);
 
 	mutex_lock(&i915->drm.struct_mutex);
 	mock_fini_ggtt(&i915->ggtt);
 	mutex_unlock(&i915->drm.struct_mutex);
-	WARN_ON(!list_empty(&i915->gt.timelines));
 
 	destroy_workqueue(i915->wq);
 
@@ -226,7 +227,8 @@ struct drm_i915_private *mock_gem_device(void)
 	if (!i915->priorities)
 		goto err_dependencies;
 
-	INIT_LIST_HEAD(&i915->gt.timelines);
+	i915_timelines_init(i915);
+
 	INIT_LIST_HEAD(&i915->gt.active_rings);
 	INIT_LIST_HEAD(&i915->gt.closed_vma);
 
@@ -253,6 +255,7 @@ err_context:
 	i915_gem_contexts_fini(i915);
 err_unlock:
 	mutex_unlock(&i915->drm.struct_mutex);
+	i915_timelines_fini(i915);
 	kmem_cache_destroy(i915->priorities);
 err_dependencies:
 	kmem_cache_destroy(i915->dependencies);
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
index dcf3b16f5a07..cf39ccd9fc05 100644
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -10,6 +10,7 @@
 
 void mock_timeline_init(struct i915_timeline *timeline, u64 context)
 {
+	timeline->i915 = NULL;
 	timeline->fence_context = context;
 
 	spin_lock_init(&timeline->lock);
@@ -24,5 +25,5 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
 
 void mock_timeline_fini(struct i915_timeline *timeline)
 {
-	i915_timeline_fini(timeline);
+	i915_syncmap_free(&timeline->sync);
 }
diff --git a/include/drm/drm_color_mgmt.h b/include/drm/drm_color_mgmt.h
index 6affbda6d9cb..d1c662d92ab7 100644
--- a/include/drm/drm_color_mgmt.h
+++ b/include/drm/drm_color_mgmt.h
@@ -96,6 +96,5 @@ enum drm_color_lut_tests {
 	DRM_COLOR_LUT_NON_DECREASING = BIT(1),
 };
 
-int drm_color_lut_check(struct drm_property_blob *lut,
-			uint32_t tests);
+int drm_color_lut_check(const struct drm_property_blob *lut, u32 tests);
 #endif
diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index df72be7e8b88..d2fad7b0fcf6 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -394,6 +394,9 @@
 	INTEL_VGA_DEVICE(0x3E9A, info)  /* SRV GT2 */
 
 /* CFL H */
+#define INTEL_CFL_H_GT1_IDS(info) \
+	INTEL_VGA_DEVICE(0x3E9C, info)
+
 #define INTEL_CFL_H_GT2_IDS(info) \
 	INTEL_VGA_DEVICE(0x3E9B, info), /* Halo GT2 */ \
 	INTEL_VGA_DEVICE(0x3E94, info)  /* Halo GT2 */
@@ -426,6 +429,7 @@
 #define INTEL_CFL_IDS(info)	   \
 	INTEL_CFL_S_GT1_IDS(info), \
 	INTEL_CFL_S_GT2_IDS(info), \
+	INTEL_CFL_H_GT1_IDS(info), \
 	INTEL_CFL_H_GT2_IDS(info), \
 	INTEL_CFL_U_GT2_IDS(info), \
 	INTEL_CFL_U_GT3_IDS(info), \