diff options
88 files changed, 4768 insertions, 3429 deletions
diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c index 968ca7c91ad8..d5d34d0c79c7 100644 --- a/drivers/gpu/drm/drm_color_mgmt.c +++ b/drivers/gpu/drm/drm_color_mgmt.c @@ -474,10 +474,9 @@ EXPORT_SYMBOL(drm_plane_create_color_properties);   *   * Returns 0 on success, -EINVAL on failure.   */ -int drm_color_lut_check(struct drm_property_blob *lut, -			uint32_t tests) +int drm_color_lut_check(const struct drm_property_blob *lut, u32 tests)  { -	struct drm_color_lut *entry; +	const struct drm_color_lut *entry;  	int i;  	if (!lut || !tests) diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 8300efe60fe1..210d0e8777b6 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -22,6 +22,7 @@ subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable)  subdir-ccflags-y += $(call cc-disable-warning, sign-compare)  subdir-ccflags-y += $(call cc-disable-warning, sometimes-uninitialized)  subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides) +subdir-ccflags-y += $(call cc-disable-warning, uninitialized)  subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror  # Fine grained warnings disable diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 4f25b6b7728e..035479e273be 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -342,6 +342,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num,  	port->dpcd->data_valid = true;  	port->dpcd->data[DPCD_SINK_COUNT] = 0x1;  	port->type = type; +	port->id = resolution;  	emulate_monitor_status_change(vgpu); @@ -445,6 +446,36 @@ void intel_gvt_emulate_vblank(struct intel_gvt *gvt)  }  /** + * intel_vgpu_emulate_hotplug - trigger hotplug event for vGPU + * @vgpu: a vGPU + * @conncted: link state + * + * This function is used to trigger hotplug interrupt for vGPU + * + */ +void intel_vgpu_emulate_hotplug(struct intel_vgpu *vgpu, bool connected) +{ +	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + +	/* TODO: add more platforms support */ +	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { +		if (connected) { +			vgpu_vreg_t(vgpu, SFUSE_STRAP) |= +				SFUSE_STRAP_DDID_DETECTED; +			vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT; +		} else { +			vgpu_vreg_t(vgpu, SFUSE_STRAP) &= +				~SFUSE_STRAP_DDID_DETECTED; +			vgpu_vreg_t(vgpu, SDEISR) &= ~SDE_PORTD_HOTPLUG_CPT; +		} +		vgpu_vreg_t(vgpu, SDEIIR) |= SDE_PORTD_HOTPLUG_CPT; +		vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= +				PORTD_HOTPLUG_STATUS_MASK; +		intel_vgpu_trigger_virtual_event(vgpu, DP_D_HOTPLUG); +	} +} + +/**   * intel_vgpu_clean_display - clean vGPU virtual display emulation   * @vgpu: a vGPU   * diff --git a/drivers/gpu/drm/i915/gvt/display.h b/drivers/gpu/drm/i915/gvt/display.h index ea7c1c525b8c..a87f33e6a23c 100644 --- a/drivers/gpu/drm/i915/gvt/display.h +++ b/drivers/gpu/drm/i915/gvt/display.h @@ -146,18 +146,19 @@ enum intel_vgpu_port_type {  	GVT_PORT_MAX  }; +enum intel_vgpu_edid { +	GVT_EDID_1024_768, +	GVT_EDID_1920_1200, +	GVT_EDID_NUM, +}; +  struct intel_vgpu_port {  	/* per display EDID information */  	struct intel_vgpu_edid_data *edid;  	/* per display DPCD information */  	struct intel_vgpu_dpcd_data *dpcd;  	int type; -}; - -enum intel_vgpu_edid { -	GVT_EDID_1024_768, -	GVT_EDID_1920_1200, -	GVT_EDID_NUM, +	enum intel_vgpu_edid id;  };  static inline char *vgpu_edid_str(enum intel_vgpu_edid id) @@ -172,6 +173,30 @@ static inline char *vgpu_edid_str(enum intel_vgpu_edid id)  	}  } +static inline unsigned int vgpu_edid_xres(enum intel_vgpu_edid id) +{ +	switch (id) { +	case GVT_EDID_1024_768: +		return 1024; +	case GVT_EDID_1920_1200: +		return 1920; +	default: +		return 0; +	} +} + +static inline unsigned int vgpu_edid_yres(enum intel_vgpu_edid id) +{ +	switch (id) { +	case GVT_EDID_1024_768: +		return 768; +	case GVT_EDID_1920_1200: +		return 1200; +	default: +		return 0; +	} +} +  void intel_gvt_emulate_vblank(struct intel_gvt *gvt);  void intel_gvt_check_vblank_emulation(struct intel_gvt *gvt); diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index 4e8947f33bd0..43f4242062dd 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -185,6 +185,7 @@ static const struct intel_gvt_ops intel_gvt_ops = {  	.vgpu_query_plane = intel_vgpu_query_plane,  	.vgpu_get_dmabuf = intel_vgpu_get_dmabuf,  	.write_protect_handler = intel_vgpu_page_track_handler, +	.emulate_hotplug = intel_vgpu_emulate_hotplug,  };  static void init_device_info(struct intel_gvt *gvt) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index fb9cc980e120..8bce09de4b82 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -536,6 +536,8 @@ int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset,  int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset,  		void *p_data, unsigned int bytes); +void intel_vgpu_emulate_hotplug(struct intel_vgpu *vgpu, bool connected); +  static inline u64 intel_vgpu_get_bar_gpa(struct intel_vgpu *vgpu, int bar)  {  	/* We are 64bit bar. */ @@ -577,6 +579,7 @@ struct intel_gvt_ops {  	int (*vgpu_get_dmabuf)(struct intel_vgpu *vgpu, unsigned int);  	int (*write_protect_handler)(struct intel_vgpu *, u64, void *,  				     unsigned int); +	void (*emulate_hotplug)(struct intel_vgpu *vgpu, bool connected);  }; diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h index 50798868ab15..5e01cc8d9b16 100644 --- a/drivers/gpu/drm/i915/gvt/hypercall.h +++ b/drivers/gpu/drm/i915/gvt/hypercall.h @@ -67,6 +67,7 @@ struct intel_gvt_mpt {  	int (*set_trap_area)(unsigned long handle, u64 start, u64 end,  			     bool map);  	int (*set_opregion)(void *vgpu); +	int (*set_edid)(void *vgpu, int port_num);  	int (*get_vfio_device)(void *vgpu);  	void (*put_vfio_device)(void *vgpu);  	bool (*is_valid_gfn)(unsigned long handle, unsigned long gfn); diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index f8d44e8f86a6..63eef86a2a85 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -57,6 +57,8 @@ static const struct intel_gvt_ops *intel_gvt_ops;  #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)  #define VFIO_PCI_OFFSET_MASK    (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) +#define EDID_BLOB_OFFSET (PAGE_SIZE/2) +  #define OPREGION_SIGNATURE "IntelGraphicsMem"  struct vfio_region; @@ -76,6 +78,11 @@ struct vfio_region {  	void				*data;  }; +struct vfio_edid_region { +	struct vfio_region_gfx_edid vfio_edid_regs; +	void *edid_blob; +}; +  struct kvmgt_pgfn {  	gfn_t gfn;  	struct hlist_node hnode; @@ -427,6 +434,111 @@ static const struct intel_vgpu_regops intel_vgpu_regops_opregion = {  	.release = intel_vgpu_reg_release_opregion,  }; +static int handle_edid_regs(struct intel_vgpu *vgpu, +			struct vfio_edid_region *region, char *buf, +			size_t count, u16 offset, bool is_write) +{ +	struct vfio_region_gfx_edid *regs = ®ion->vfio_edid_regs; +	unsigned int data; + +	if (offset + count > sizeof(*regs)) +		return -EINVAL; + +	if (count != 4) +		return -EINVAL; + +	if (is_write) { +		data = *((unsigned int *)buf); +		switch (offset) { +		case offsetof(struct vfio_region_gfx_edid, link_state): +			if (data == VFIO_DEVICE_GFX_LINK_STATE_UP) { +				if (!drm_edid_block_valid( +					(u8 *)region->edid_blob, +					0, +					true, +					NULL)) { +					gvt_vgpu_err("invalid EDID blob\n"); +					return -EINVAL; +				} +				intel_gvt_ops->emulate_hotplug(vgpu, true); +			} else if (data == VFIO_DEVICE_GFX_LINK_STATE_DOWN) +				intel_gvt_ops->emulate_hotplug(vgpu, false); +			else { +				gvt_vgpu_err("invalid EDID link state %d\n", +					regs->link_state); +				return -EINVAL; +			} +			regs->link_state = data; +			break; +		case offsetof(struct vfio_region_gfx_edid, edid_size): +			if (data > regs->edid_max_size) { +				gvt_vgpu_err("EDID size is bigger than %d!\n", +					regs->edid_max_size); +				return -EINVAL; +			} +			regs->edid_size = data; +			break; +		default: +			/* read-only regs */ +			gvt_vgpu_err("write read-only EDID region at offset %d\n", +				offset); +			return -EPERM; +		} +	} else { +		memcpy(buf, (char *)regs + offset, count); +	} + +	return count; +} + +static int handle_edid_blob(struct vfio_edid_region *region, char *buf, +			size_t count, u16 offset, bool is_write) +{ +	if (offset + count > region->vfio_edid_regs.edid_size) +		return -EINVAL; + +	if (is_write) +		memcpy(region->edid_blob + offset, buf, count); +	else +		memcpy(buf, region->edid_blob + offset, count); + +	return count; +} + +static size_t intel_vgpu_reg_rw_edid(struct intel_vgpu *vgpu, char *buf, +		size_t count, loff_t *ppos, bool iswrite) +{ +	int ret; +	unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - +			VFIO_PCI_NUM_REGIONS; +	struct vfio_edid_region *region = +		(struct vfio_edid_region *)vgpu->vdev.region[i].data; +	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; + +	if (pos < region->vfio_edid_regs.edid_offset) { +		ret = handle_edid_regs(vgpu, region, buf, count, pos, iswrite); +	} else { +		pos -= EDID_BLOB_OFFSET; +		ret = handle_edid_blob(region, buf, count, pos, iswrite); +	} + +	if (ret < 0) +		gvt_vgpu_err("failed to access EDID region\n"); + +	return ret; +} + +static void intel_vgpu_reg_release_edid(struct intel_vgpu *vgpu, +					struct vfio_region *region) +{ +	kfree(region->data); +} + +static const struct intel_vgpu_regops intel_vgpu_regops_edid = { +	.rw = intel_vgpu_reg_rw_edid, +	.release = intel_vgpu_reg_release_edid, +}; +  static int intel_vgpu_register_reg(struct intel_vgpu *vgpu,  		unsigned int type, unsigned int subtype,  		const struct intel_vgpu_regops *ops, @@ -493,6 +605,36 @@ static int kvmgt_set_opregion(void *p_vgpu)  	return ret;  } +static int kvmgt_set_edid(void *p_vgpu, int port_num) +{ +	struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu; +	struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num); +	struct vfio_edid_region *base; +	int ret; + +	base = kzalloc(sizeof(*base), GFP_KERNEL); +	if (!base) +		return -ENOMEM; + +	/* TODO: Add multi-port and EDID extension block support */ +	base->vfio_edid_regs.edid_offset = EDID_BLOB_OFFSET; +	base->vfio_edid_regs.edid_max_size = EDID_SIZE; +	base->vfio_edid_regs.edid_size = EDID_SIZE; +	base->vfio_edid_regs.max_xres = vgpu_edid_xres(port->id); +	base->vfio_edid_regs.max_yres = vgpu_edid_yres(port->id); +	base->edid_blob = port->edid->edid_block; + +	ret = intel_vgpu_register_reg(vgpu, +			VFIO_REGION_TYPE_GFX, +			VFIO_REGION_SUBTYPE_GFX_EDID, +			&intel_vgpu_regops_edid, EDID_SIZE, +			VFIO_REGION_INFO_FLAG_READ | +			VFIO_REGION_INFO_FLAG_WRITE | +			VFIO_REGION_INFO_FLAG_CAPS, base); + +	return ret; +} +  static void kvmgt_put_vfio_device(void *vgpu)  {  	if (WARN_ON(!((struct intel_vgpu *)vgpu)->vdev.vfio_device)) @@ -1874,6 +2016,7 @@ static struct intel_gvt_mpt kvmgt_mpt = {  	.dma_map_guest_page = kvmgt_dma_map_guest_page,  	.dma_unmap_guest_page = kvmgt_dma_unmap_guest_page,  	.set_opregion = kvmgt_set_opregion, +	.set_edid = kvmgt_set_edid,  	.get_vfio_device = kvmgt_get_vfio_device,  	.put_vfio_device = kvmgt_put_vfio_device,  	.is_valid_gfn = kvmgt_is_valid_gfn, diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index 9b4225d44243..5d8b8f228d8f 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -314,6 +314,23 @@ static inline int intel_gvt_hypervisor_set_opregion(struct intel_vgpu *vgpu)  }  /** + * intel_gvt_hypervisor_set_edid - Set EDID region for guest + * @vgpu: a vGPU + * @port_num: display port number + * + * Returns: + * Zero on success, negative error code if failed. + */ +static inline int intel_gvt_hypervisor_set_edid(struct intel_vgpu *vgpu, +						int port_num) +{ +	if (!intel_gvt_host.mpt->set_edid) +		return 0; + +	return intel_gvt_host.mpt->set_edid(vgpu, port_num); +} + +/**   * intel_gvt_hypervisor_get_vfio_device - increase vfio device ref count   * @vgpu: a vGPU   * diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index e1c860f80eb0..720e2b10adaa 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -428,6 +428,12 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt,  	if (ret)  		goto out_clean_sched_policy; +	/*TODO: add more platforms support */ +	if (IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv)) +		ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_D); +	if (ret) +		goto out_clean_sched_policy; +  	return vgpu;  out_clean_sched_policy: diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e80903114ca8..fa2c226fc779 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -160,14 +160,14 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)  		   obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : "");  	if (obj->base.name)  		seq_printf(m, " (name: %d)", obj->base.name); -	list_for_each_entry(vma, &obj->vma_list, obj_link) { +	list_for_each_entry(vma, &obj->vma.list, obj_link) {  		if (i915_vma_is_pinned(vma))  			pin_count++;  	}  	seq_printf(m, " (pinned x %d)", pin_count);  	if (obj->pin_global)  		seq_printf(m, " (global)"); -	list_for_each_entry(vma, &obj->vma_list, obj_link) { +	list_for_each_entry(vma, &obj->vma.list, obj_link) {  		if (!drm_mm_node_allocated(&vma->node))  			continue; @@ -323,7 +323,7 @@ static int per_file_stats(int id, void *ptr, void *data)  	if (obj->base.name || obj->base.dma_buf)  		stats->shared += obj->base.size; -	list_for_each_entry(vma, &obj->vma_list, obj_link) { +	list_for_each_entry(vma, &obj->vma.list, obj_link) {  		if (!drm_mm_node_allocated(&vma->node))  			continue; @@ -1285,8 +1285,6 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)  		seq_puts(m, "Wedged\n");  	if (test_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags))  		seq_puts(m, "Reset in progress: struct_mutex backoff\n"); -	if (test_bit(I915_RESET_HANDOFF, &dev_priv->gpu_error.flags)) -		seq_puts(m, "Reset in progress: reset handoff to waiter\n");  	if (waitqueue_active(&dev_priv->gpu_error.wait_queue))  		seq_puts(m, "Waiter holding struct mutex\n");  	if (waitqueue_active(&dev_priv->gpu_error.reset_queue)) @@ -1318,37 +1316,16 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)  	seq_printf(m, "GT active? %s\n", yesno(dev_priv->gt.awake));  	for_each_engine(engine, dev_priv, id) { -		struct intel_breadcrumbs *b = &engine->breadcrumbs; -		struct rb_node *rb; -  		seq_printf(m, "%s:\n", engine->name); -		seq_printf(m, "\tseqno = %x [current %x, last %x]\n", +		seq_printf(m, "\tseqno = %x [current %x, last %x], %dms ago\n",  			   engine->hangcheck.seqno, seqno[id], -			   intel_engine_last_submit(engine)); -		seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s, wedged? %s\n", -			   yesno(intel_engine_has_waiter(engine)), -			   yesno(test_bit(engine->id, -					  &dev_priv->gpu_error.missed_irq_rings)), -			   yesno(engine->hangcheck.stalled), -			   yesno(engine->hangcheck.wedged)); - -		spin_lock_irq(&b->rb_lock); -		for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { -			struct intel_wait *w = rb_entry(rb, typeof(*w), node); - -			seq_printf(m, "\t%s [%d] waiting for %x\n", -				   w->tsk->comm, w->tsk->pid, w->seqno); -		} -		spin_unlock_irq(&b->rb_lock); +			   intel_engine_last_submit(engine), +			   jiffies_to_msecs(jiffies - +					    engine->hangcheck.action_timestamp));  		seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",  			   (long long)engine->hangcheck.acthd,  			   (long long)acthd[id]); -		seq_printf(m, "\taction = %s(%d) %d ms ago\n", -			   hangcheck_action_to_str(engine->hangcheck.action), -			   engine->hangcheck.action, -			   jiffies_to_msecs(jiffies - -					    engine->hangcheck.action_timestamp));  		if (engine->id == RCS) {  			seq_puts(m, "\tinstdone read =\n"); @@ -2029,18 +2006,6 @@ static int i915_swizzle_info(struct seq_file *m, void *data)  	return 0;  } -static int count_irq_waiters(struct drm_i915_private *i915) -{ -	struct intel_engine_cs *engine; -	enum intel_engine_id id; -	int count = 0; - -	for_each_engine(engine, i915, id) -		count += intel_engine_has_waiter(engine); - -	return count; -} -  static const char *rps_power_to_str(unsigned int power)  {  	static const char * const strings[] = { @@ -2080,7 +2045,6 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)  	seq_printf(m, "RPS enabled? %d\n", rps->enabled);  	seq_printf(m, "GPU busy? %s [%d requests]\n",  		   yesno(dev_priv->gt.awake), dev_priv->gt.active_requests); -	seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));  	seq_printf(m, "Boosts outstanding? %d\n",  		   atomic_read(&rps->num_waiters));  	seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive)); @@ -3912,8 +3876,6 @@ static int  i915_wedged_set(void *data, u64 val)  {  	struct drm_i915_private *i915 = data; -	struct intel_engine_cs *engine; -	unsigned int tmp;  	/*  	 * There is no safeguard against this debugfs entry colliding @@ -3926,18 +3888,8 @@ i915_wedged_set(void *data, u64 val)  	if (i915_reset_backoff(&i915->gpu_error))  		return -EAGAIN; -	for_each_engine_masked(engine, i915, val, tmp) { -		engine->hangcheck.seqno = intel_engine_get_seqno(engine); -		engine->hangcheck.stalled = true; -	} -  	i915_handle_error(i915, val, I915_ERROR_CAPTURE,  			  "Manually set wedged engine mask = %llx", val); - -	wait_on_bit(&i915->gpu_error.flags, -		    I915_RESET_HANDOFF, -		    TASK_UNINTERRUPTIBLE); -  	return 0;  } @@ -3945,94 +3897,6 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,  			i915_wedged_get, i915_wedged_set,  			"%llu\n"); -static int -fault_irq_set(struct drm_i915_private *i915, -	      unsigned long *irq, -	      unsigned long val) -{ -	int err; - -	err = mutex_lock_interruptible(&i915->drm.struct_mutex); -	if (err) -		return err; - -	err = i915_gem_wait_for_idle(i915, -				     I915_WAIT_LOCKED | -				     I915_WAIT_INTERRUPTIBLE, -				     MAX_SCHEDULE_TIMEOUT); -	if (err) -		goto err_unlock; - -	*irq = val; -	mutex_unlock(&i915->drm.struct_mutex); - -	/* Flush idle worker to disarm irq */ -	drain_delayed_work(&i915->gt.idle_work); - -	return 0; - -err_unlock: -	mutex_unlock(&i915->drm.struct_mutex); -	return err; -} - -static int -i915_ring_missed_irq_get(void *data, u64 *val) -{ -	struct drm_i915_private *dev_priv = data; - -	*val = dev_priv->gpu_error.missed_irq_rings; -	return 0; -} - -static int -i915_ring_missed_irq_set(void *data, u64 val) -{ -	struct drm_i915_private *i915 = data; - -	return fault_irq_set(i915, &i915->gpu_error.missed_irq_rings, val); -} - -DEFINE_SIMPLE_ATTRIBUTE(i915_ring_missed_irq_fops, -			i915_ring_missed_irq_get, i915_ring_missed_irq_set, -			"0x%08llx\n"); - -static int -i915_ring_test_irq_get(void *data, u64 *val) -{ -	struct drm_i915_private *dev_priv = data; - -	*val = dev_priv->gpu_error.test_irq_rings; - -	return 0; -} - -static int -i915_ring_test_irq_set(void *data, u64 val) -{ -	struct drm_i915_private *i915 = data; - -	/* GuC keeps the user interrupt permanently enabled for submission */ -	if (USES_GUC_SUBMISSION(i915)) -		return -ENODEV; - -	/* -	 * From icl, we can no longer individually mask interrupt generation -	 * from each engine. -	 */ -	if (INTEL_GEN(i915) >= 11) -		return -ENODEV; - -	val &= INTEL_INFO(i915)->ring_mask; -	DRM_DEBUG_DRIVER("Masking interrupts on rings 0x%08llx\n", val); - -	return fault_irq_set(i915, &i915->gpu_error.test_irq_rings, val); -} - -DEFINE_SIMPLE_ATTRIBUTE(i915_ring_test_irq_fops, -			i915_ring_test_irq_get, i915_ring_test_irq_set, -			"0x%08llx\n"); -  #define DROP_UNBOUND	BIT(0)  #define DROP_BOUND	BIT(1)  #define DROP_RETIRE	BIT(2) @@ -4070,7 +3934,8 @@ i915_drop_caches_set(void *data, u64 val)  		  val, val & DROP_ALL);  	wakeref = intel_runtime_pm_get(i915); -	if (val & DROP_RESET_ACTIVE && !intel_engines_are_idle(i915)) +	if (val & DROP_RESET_ACTIVE && +	    wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT))  		i915_gem_set_wedged(i915);  	/* No need to check and wait for gpu resets, only libdrm auto-restarts @@ -4092,13 +3957,8 @@ i915_drop_caches_set(void *data, u64 val)  		mutex_unlock(&i915->drm.struct_mutex);  	} -	if (val & DROP_RESET_ACTIVE && -	    i915_terminally_wedged(&i915->gpu_error)) { +	if (val & DROP_RESET_ACTIVE && i915_terminally_wedged(&i915->gpu_error))  		i915_handle_error(i915, ALL_ENGINES, 0, NULL); -		wait_on_bit(&i915->gpu_error.flags, -			    I915_RESET_HANDOFF, -			    TASK_UNINTERRUPTIBLE); -	}  	fs_reclaim_acquire(GFP_KERNEL);  	if (val & DROP_BOUND) @@ -4800,8 +4660,6 @@ static const struct i915_debugfs_files {  } i915_debugfs_files[] = {  	{"i915_wedged", &i915_wedged_fops},  	{"i915_cache_sharing", &i915_cache_sharing_fops}, -	{"i915_ring_missed_irq", &i915_ring_missed_irq_fops}, -	{"i915_ring_test_irq", &i915_ring_test_irq_fops},  	{"i915_gem_drop_caches", &i915_drop_caches_fops},  #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)  	{"i915_error_state", &i915_error_state_fops}, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3c111ad09922..534e52e3a8da 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -91,8 +91,8 @@  #define DRIVER_NAME		"i915"  #define DRIVER_DESC		"Intel Graphics" -#define DRIVER_DATE		"20190124" -#define DRIVER_TIMESTAMP	1548370857 +#define DRIVER_DATE		"20190202" +#define DRIVER_TIMESTAMP	1549095268  /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and   * WARN_ON()) for hw state sanity checks to check for unexpected conditions @@ -1114,6 +1114,7 @@ struct skl_ddb_values {  };  struct skl_wm_level { +	u16 min_ddb_alloc;  	u16 plane_res_b;  	u8 plane_res_l;  	bool plane_en; @@ -1975,7 +1976,14 @@ struct drm_i915_private {  		void (*resume)(struct drm_i915_private *);  		void (*cleanup_engine)(struct intel_engine_cs *engine); -		struct list_head timelines; +		struct i915_gt_timelines { +			struct mutex mutex; /* protects list, tainted by GPU */ +			struct list_head active_list; + +			/* Pack multiple timelines' seqnos into the same page */ +			spinlock_t hwsp_lock; +			struct list_head hwsp_free_list; +		} timelines;  		struct list_head active_rings;  		struct list_head closed_vma; @@ -2345,6 +2353,8 @@ static inline unsigned int i915_sg_segment_size(void)  				 INTEL_INFO(dev_priv)->gt == 3)  #define IS_CNL_WITH_PORT_F(dev_priv)   (IS_CANNONLAKE(dev_priv) && \  					(INTEL_DEVID(dev_priv) & 0x0004) == 0x0004) +#define IS_ICL_WITH_PORT_F(dev_priv)   (IS_ICELAKE(dev_priv) && \ +					INTEL_DEVID(dev_priv) != 0x8A51)  #define IS_ALPHA_SUPPORT(intel_info) ((intel_info)->is_alpha_support) @@ -3001,11 +3011,6 @@ static inline bool i915_reset_backoff(struct i915_gpu_error *error)  	return unlikely(test_bit(I915_RESET_BACKOFF, &error->flags));  } -static inline bool i915_reset_handoff(struct i915_gpu_error *error) -{ -	return unlikely(test_bit(I915_RESET_HANDOFF, &error->flags)); -} -  static inline bool i915_terminally_wedged(struct i915_gpu_error *error)  {  	return unlikely(test_bit(I915_WEDGED, &error->flags)); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 44c82a6b9934..e802af64d628 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -247,21 +247,19 @@ int  i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,  			    struct drm_file *file)  { -	struct drm_i915_private *dev_priv = to_i915(dev); -	struct i915_ggtt *ggtt = &dev_priv->ggtt; +	struct i915_ggtt *ggtt = &to_i915(dev)->ggtt;  	struct drm_i915_gem_get_aperture *args = data;  	struct i915_vma *vma;  	u64 pinned; +	mutex_lock(&ggtt->vm.mutex); +  	pinned = ggtt->vm.reserved; -	mutex_lock(&dev->struct_mutex); -	list_for_each_entry(vma, &ggtt->vm.active_list, vm_link) +	list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)  		if (i915_vma_is_pinned(vma))  			pinned += vma->node.size; -	list_for_each_entry(vma, &ggtt->vm.inactive_list, vm_link) -		if (i915_vma_is_pinned(vma)) -			pinned += vma->node.size; -	mutex_unlock(&dev->struct_mutex); + +	mutex_unlock(&ggtt->vm.mutex);  	args->aper_size = ggtt->vm.total;  	args->aper_available_size = args->aper_size - pinned; @@ -441,15 +439,19 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)  	if (ret)  		return ret; -	while ((vma = list_first_entry_or_null(&obj->vma_list, -					       struct i915_vma, -					       obj_link))) { +	spin_lock(&obj->vma.lock); +	while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, +						       struct i915_vma, +						       obj_link))) {  		list_move_tail(&vma->obj_link, &still_in_list); +		spin_unlock(&obj->vma.lock); +  		ret = i915_vma_unbind(vma); -		if (ret) -			break; + +		spin_lock(&obj->vma.lock);  	} -	list_splice(&still_in_list, &obj->vma_list); +	list_splice(&still_in_list, &obj->vma.list); +	spin_unlock(&obj->vma.lock);  	return ret;  } @@ -659,11 +661,6 @@ i915_gem_object_wait(struct drm_i915_gem_object *obj,  		     struct intel_rps_client *rps_client)  {  	might_sleep(); -#if IS_ENABLED(CONFIG_LOCKDEP) -	GEM_BUG_ON(debug_locks && -		   !!lockdep_is_held(&obj->base.dev->struct_mutex) != -		   !!(flags & I915_WAIT_LOCKED)); -#endif  	GEM_BUG_ON(timeout < 0);  	timeout = i915_gem_object_wait_reservation(obj->resv, @@ -1539,23 +1536,21 @@ err:  static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)  { -	struct drm_i915_private *i915; +	struct drm_i915_private *i915 = to_i915(obj->base.dev);  	struct list_head *list;  	struct i915_vma *vma;  	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); +	mutex_lock(&i915->ggtt.vm.mutex);  	for_each_ggtt_vma(vma, obj) { -		if (i915_vma_is_active(vma)) -			continue; -  		if (!drm_mm_node_allocated(&vma->node))  			continue; -		list_move_tail(&vma->vm_link, &vma->vm->inactive_list); +		list_move_tail(&vma->vm_link, &vma->vm->bound_list);  	} +	mutex_unlock(&i915->ggtt.vm.mutex); -	i915 = to_i915(obj->base.dev);  	spin_lock(&i915->mm.obj_lock);  	list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;  	list_move_tail(&obj->mm.link, list); @@ -2878,6 +2873,14 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,  	return 0;  } +static bool match_ring(struct i915_request *rq) +{ +	struct drm_i915_private *dev_priv = rq->i915; +	u32 ring = I915_READ(RING_START(rq->engine->mmio_base)); + +	return ring == i915_ggtt_offset(rq->ring->vma); +} +  struct i915_request *  i915_gem_find_active_request(struct intel_engine_cs *engine)  { @@ -2897,9 +2900,16 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)  	 */  	spin_lock_irqsave(&engine->timeline.lock, flags);  	list_for_each_entry(request, &engine->timeline.requests, link) { -		if (__i915_request_completed(request, request->global_seqno)) +		if (i915_request_completed(request))  			continue; +		if (!i915_request_started(request)) +			break; + +		/* More than one preemptible request may match! */ +		if (!match_ring(request)) +			break; +  		active = request;  		break;  	} @@ -3229,33 +3239,6 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)  	return ret;  } -static long wait_for_timeline(struct i915_timeline *tl, -			      unsigned int flags, long timeout) -{ -	struct i915_request *rq; - -	rq = i915_gem_active_get_unlocked(&tl->last_request); -	if (!rq) -		return timeout; - -	/* -	 * "Race-to-idle". -	 * -	 * Switching to the kernel context is often used a synchronous -	 * step prior to idling, e.g. in suspend for flushing all -	 * current operations to memory before sleeping. These we -	 * want to complete as quickly as possible to avoid prolonged -	 * stalls, so allow the gpu to boost to maximum clocks. -	 */ -	if (flags & I915_WAIT_FOR_IDLE_BOOST) -		gen6_rps_boost(rq, NULL); - -	timeout = i915_request_wait(rq, flags, timeout); -	i915_request_put(rq); - -	return timeout; -} -  static int wait_for_engines(struct drm_i915_private *i915)  {  	if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { @@ -3269,6 +3252,52 @@ static int wait_for_engines(struct drm_i915_private *i915)  	return 0;  } +static long +wait_for_timelines(struct drm_i915_private *i915, +		   unsigned int flags, long timeout) +{ +	struct i915_gt_timelines *gt = &i915->gt.timelines; +	struct i915_timeline *tl; + +	if (!READ_ONCE(i915->gt.active_requests)) +		return timeout; + +	mutex_lock(>->mutex); +	list_for_each_entry(tl, >->active_list, link) { +		struct i915_request *rq; + +		rq = i915_gem_active_get_unlocked(&tl->last_request); +		if (!rq) +			continue; + +		mutex_unlock(>->mutex); + +		/* +		 * "Race-to-idle". +		 * +		 * Switching to the kernel context is often used a synchronous +		 * step prior to idling, e.g. in suspend for flushing all +		 * current operations to memory before sleeping. These we +		 * want to complete as quickly as possible to avoid prolonged +		 * stalls, so allow the gpu to boost to maximum clocks. +		 */ +		if (flags & I915_WAIT_FOR_IDLE_BOOST) +			gen6_rps_boost(rq, NULL); + +		timeout = i915_request_wait(rq, flags, timeout); +		i915_request_put(rq); +		if (timeout < 0) +			return timeout; + +		/* restart after reacquiring the lock */ +		mutex_lock(>->mutex); +		tl = list_entry(>->active_list, typeof(*tl), link); +	} +	mutex_unlock(>->mutex); + +	return timeout; +} +  int i915_gem_wait_for_idle(struct drm_i915_private *i915,  			   unsigned int flags, long timeout)  { @@ -3280,17 +3309,15 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,  	if (!READ_ONCE(i915->gt.awake))  		return 0; +	timeout = wait_for_timelines(i915, flags, timeout); +	if (timeout < 0) +		return timeout; +  	if (flags & I915_WAIT_LOCKED) { -		struct i915_timeline *tl;  		int err;  		lockdep_assert_held(&i915->drm.struct_mutex); -		list_for_each_entry(tl, &i915->gt.timelines, link) { -			timeout = wait_for_timeline(tl, flags, timeout); -			if (timeout < 0) -				return timeout; -		}  		if (GEM_SHOW_DEBUG() && !timeout) {  			/* Presume that timeout was non-zero to begin with! */  			dev_warn(&i915->drm.pdev->dev, @@ -3304,17 +3331,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,  		i915_retire_requests(i915);  		GEM_BUG_ON(i915->gt.active_requests); -	} else { -		struct intel_engine_cs *engine; -		enum intel_engine_id id; - -		for_each_engine(engine, i915, id) { -			struct i915_timeline *tl = &engine->timeline; - -			timeout = wait_for_timeline(tl, flags, timeout); -			if (timeout < 0) -				return timeout; -		}  	}  	return 0; @@ -3500,7 +3516,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,  	 * reading an invalid PTE on older architectures.  	 */  restart: -	list_for_each_entry(vma, &obj->vma_list, obj_link) { +	list_for_each_entry(vma, &obj->vma.list, obj_link) {  		if (!drm_mm_node_allocated(&vma->node))  			continue; @@ -3578,7 +3594,7 @@ restart:  			 */  		} -		list_for_each_entry(vma, &obj->vma_list, obj_link) { +		list_for_each_entry(vma, &obj->vma.list, obj_link) {  			if (!drm_mm_node_allocated(&vma->node))  				continue; @@ -3588,7 +3604,7 @@ restart:  		}  	} -	list_for_each_entry(vma, &obj->vma_list, obj_link) +	list_for_each_entry(vma, &obj->vma.list, obj_link)  		vma->node.color = cache_level;  	i915_gem_object_set_cache_coherency(obj, cache_level);  	obj->cache_dirty = true; /* Always invalidate stale cachelines */ @@ -4164,7 +4180,9 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,  {  	mutex_init(&obj->mm.lock); -	INIT_LIST_HEAD(&obj->vma_list); +	spin_lock_init(&obj->vma.lock); +	INIT_LIST_HEAD(&obj->vma.list); +  	INIT_LIST_HEAD(&obj->lut_list);  	INIT_LIST_HEAD(&obj->batch_pool_link); @@ -4330,14 +4348,13 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,  		mutex_lock(&i915->drm.struct_mutex);  		GEM_BUG_ON(i915_gem_object_is_active(obj)); -		list_for_each_entry_safe(vma, vn, -					 &obj->vma_list, obj_link) { +		list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {  			GEM_BUG_ON(i915_vma_is_active(vma));  			vma->flags &= ~I915_VMA_PIN_MASK;  			i915_vma_destroy(vma);  		} -		GEM_BUG_ON(!list_empty(&obj->vma_list)); -		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); +		GEM_BUG_ON(!list_empty(&obj->vma.list)); +		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));  		/* This serializes freeing with the shrinker. Since the free  		 * is delayed, first by RCU then by the workqueue, we want the @@ -4495,8 +4512,6 @@ void i915_gem_sanitize(struct drm_i915_private *i915)  	GEM_TRACE("\n"); -	mutex_lock(&i915->drm.struct_mutex); -  	wakeref = intel_runtime_pm_get(i915);  	intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); @@ -4522,6 +4537,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915)  	intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);  	intel_runtime_pm_put(i915, wakeref); +	mutex_lock(&i915->drm.struct_mutex);  	i915_gem_contexts_lost(i915);  	mutex_unlock(&i915->drm.struct_mutex);  } @@ -4536,6 +4552,8 @@ int i915_gem_suspend(struct drm_i915_private *i915)  	wakeref = intel_runtime_pm_get(i915);  	intel_suspend_gt_powersave(i915); +	flush_workqueue(i915->wq); +  	mutex_lock(&i915->drm.struct_mutex);  	/* @@ -4565,11 +4583,9 @@ int i915_gem_suspend(struct drm_i915_private *i915)  	i915_retire_requests(i915); /* ensure we flush after wedging */  	mutex_unlock(&i915->drm.struct_mutex); +	i915_reset_flush(i915); -	intel_uc_suspend(i915); - -	cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); -	cancel_delayed_work_sync(&i915->gt.retire_work); +	drain_delayed_work(&i915->gt.retire_work);  	/*  	 * As the idle_work is rearming if it detects a race, play safe and @@ -4577,6 +4593,8 @@ int i915_gem_suspend(struct drm_i915_private *i915)  	 */  	drain_delayed_work(&i915->gt.idle_work); +	intel_uc_suspend(i915); +  	/*  	 * Assert that we successfully flushed all the work and  	 * reset the GPU back to its idle, low power state. @@ -5013,6 +5031,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)  		dev_priv->gt.cleanup_engine = intel_engine_cleanup;  	} +	i915_timelines_init(dev_priv); +  	ret = i915_gem_init_userptr(dev_priv);  	if (ret)  		return ret; @@ -5135,8 +5155,10 @@ err_unlock:  err_uc_misc:  	intel_uc_fini_misc(dev_priv); -	if (ret != -EIO) +	if (ret != -EIO) {  		i915_gem_cleanup_userptr(dev_priv); +		i915_timelines_fini(dev_priv); +	}  	if (ret == -EIO) {  		mutex_lock(&dev_priv->drm.struct_mutex); @@ -5187,6 +5209,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv)  	intel_uc_fini_misc(dev_priv);  	i915_gem_cleanup_userptr(dev_priv); +	i915_timelines_fini(dev_priv);  	i915_gem_drain_freed_objects(dev_priv); @@ -5289,7 +5312,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)  	if (!dev_priv->priorities)  		goto err_dependencies; -	INIT_LIST_HEAD(&dev_priv->gt.timelines);  	INIT_LIST_HEAD(&dev_priv->gt.active_rings);  	INIT_LIST_HEAD(&dev_priv->gt.closed_vma); @@ -5333,7 +5355,6 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)  	GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));  	GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));  	WARN_ON(dev_priv->mm.object_count); -	WARN_ON(!list_empty(&dev_priv->gt.timelines));  	kmem_cache_destroy(dev_priv->priorities);  	kmem_cache_destroy(dev_priv->dependencies); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 93e84751370f..6faf1f6faab5 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -327,6 +327,9 @@ intel_context_init(struct intel_context *ce,  		   struct intel_engine_cs *engine)  {  	ce->gem_context = ctx; + +	INIT_LIST_HEAD(&ce->signal_link); +	INIT_LIST_HEAD(&ce->signals);  }  static struct i915_gem_context * diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 3769438228f6..6ba40ff6b91f 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -164,6 +164,8 @@ struct i915_gem_context {  	struct intel_context {  		struct i915_gem_context *gem_context;  		struct intel_engine_cs *active; +		struct list_head signal_link; +		struct list_head signals;  		struct i915_vma *state;  		struct intel_ring *ring;  		u32 *lrc_reg_state; diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index f6855401f247..68d74c50ac39 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -126,31 +126,25 @@ i915_gem_evict_something(struct i915_address_space *vm,  	struct drm_i915_private *dev_priv = vm->i915;  	struct drm_mm_scan scan;  	struct list_head eviction_list; -	struct list_head *phases[] = { -		&vm->inactive_list, -		&vm->active_list, -		NULL, -	}, **phase;  	struct i915_vma *vma, *next;  	struct drm_mm_node *node;  	enum drm_mm_insert_mode mode; +	struct i915_vma *active;  	int ret;  	lockdep_assert_held(&vm->i915->drm.struct_mutex);  	trace_i915_gem_evict(vm, min_size, alignment, flags);  	/* -	 * The goal is to evict objects and amalgamate space in LRU order. -	 * The oldest idle objects reside on the inactive list, which is in -	 * retirement order. The next objects to retire are those in flight, -	 * on the active list, again in retirement order. +	 * The goal is to evict objects and amalgamate space in rough LRU order. +	 * Since both active and inactive objects reside on the same list, +	 * in a mix of creation and last scanned order, as we process the list +	 * we sort it into inactive/active, which keeps the active portion +	 * in a rough MRU order.  	 *  	 * The retirement sequence is thus: -	 *   1. Inactive objects (already retired) -	 *   2. Active objects (will stall on unbinding) -	 * -	 * On each list, the oldest objects lie at the HEAD with the freshest -	 * object on the TAIL. +	 *   1. Inactive objects (already retired, random order) +	 *   2. Active objects (will stall on unbinding, oldest scanned first)  	 */  	mode = DRM_MM_INSERT_BEST;  	if (flags & PIN_HIGH) @@ -169,17 +163,46 @@ i915_gem_evict_something(struct i915_address_space *vm,  	 */  	if (!(flags & PIN_NONBLOCK))  		i915_retire_requests(dev_priv); -	else -		phases[1] = NULL;  search_again: +	active = NULL;  	INIT_LIST_HEAD(&eviction_list); -	phase = phases; -	do { -		list_for_each_entry(vma, *phase, vm_link) -			if (mark_free(&scan, vma, flags, &eviction_list)) -				goto found; -	} while (*++phase); +	list_for_each_entry_safe(vma, next, &vm->bound_list, vm_link) { +		/* +		 * We keep this list in a rough least-recently scanned order +		 * of active elements (inactive elements are cheap to reap). +		 * New entries are added to the end, and we move anything we +		 * scan to the end. The assumption is that the working set +		 * of applications is either steady state (and thanks to the +		 * userspace bo cache it almost always is) or volatile and +		 * frequently replaced after a frame, which are self-evicting! +		 * Given that assumption, the MRU order of the scan list is +		 * fairly static, and keeping it in least-recently scan order +		 * is suitable. +		 * +		 * To notice when we complete one full cycle, we record the +		 * first active element seen, before moving it to the tail. +		 */ +		if (i915_vma_is_active(vma)) { +			if (vma == active) { +				if (flags & PIN_NONBLOCK) +					break; + +				active = ERR_PTR(-EAGAIN); +			} + +			if (active != ERR_PTR(-EAGAIN)) { +				if (!active) +					active = vma; + +				list_move_tail(&vma->vm_link, &vm->bound_list); +				continue; +			} +		} + +		if (mark_free(&scan, vma, flags, &eviction_list)) +			goto found; +	}  	/* Nothing found, clean up and bail out! */  	list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { @@ -388,11 +411,6 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,   */  int i915_gem_evict_vm(struct i915_address_space *vm)  { -	struct list_head *phases[] = { -		&vm->inactive_list, -		&vm->active_list, -		NULL -	}, **phase;  	struct list_head eviction_list;  	struct i915_vma *vma, *next;  	int ret; @@ -412,16 +430,15 @@ int i915_gem_evict_vm(struct i915_address_space *vm)  	}  	INIT_LIST_HEAD(&eviction_list); -	phase = phases; -	do { -		list_for_each_entry(vma, *phase, vm_link) { -			if (i915_vma_is_pinned(vma)) -				continue; +	mutex_lock(&vm->mutex); +	list_for_each_entry(vma, &vm->bound_list, vm_link) { +		if (i915_vma_is_pinned(vma)) +			continue; -			__i915_vma_pin(vma); -			list_add(&vma->evict_link, &eviction_list); -		} -	} while (*++phase); +		__i915_vma_pin(vma); +		list_add(&vma->evict_link, &eviction_list); +	} +	mutex_unlock(&vm->mutex);  	ret = 0;  	list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index f250109e1f66..8eedf7cac493 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1976,6 +1976,18 @@ static int eb_submit(struct i915_execbuffer *eb)  			return err;  	} +	/* +	 * After we completed waiting for other engines (using HW semaphores) +	 * then we can signal that this request/batch is ready to run. This +	 * allows us to determine if the batch is still waiting on the GPU +	 * or actually running by checking the breadcrumb. +	 */ +	if (eb->engine->emit_init_breadcrumb) { +		err = eb->engine->emit_init_breadcrumb(eb->request); +		if (err) +			return err; +	} +  	err = eb->engine->emit_bb_start(eb->request,  					eb->batch->node.start +  					eb->batch_start_offset, diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h index 99a31ded4dfd..09dcaf14121b 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.h +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h @@ -50,4 +50,3 @@ struct drm_i915_fence_reg {  };  #endif - diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 9081e3bc5a59..49b00996a15e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -491,9 +491,8 @@ static void i915_address_space_init(struct i915_address_space *vm, int subclass)  	stash_init(&vm->free_pages); -	INIT_LIST_HEAD(&vm->active_list); -	INIT_LIST_HEAD(&vm->inactive_list);  	INIT_LIST_HEAD(&vm->unbound_list); +	INIT_LIST_HEAD(&vm->bound_list);  }  static void i915_address_space_fini(struct i915_address_space *vm) @@ -1932,7 +1931,10 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)  	vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */  	INIT_LIST_HEAD(&vma->obj_link); + +	mutex_lock(&vma->vm->mutex);  	list_add(&vma->vm_link, &vma->vm->unbound_list); +	mutex_unlock(&vma->vm->mutex);  	return vma;  } @@ -2111,8 +2113,7 @@ void i915_ppgtt_close(struct i915_address_space *vm)  static void ppgtt_destroy_vma(struct i915_address_space *vm)  {  	struct list_head *phases[] = { -		&vm->active_list, -		&vm->inactive_list, +		&vm->bound_list,  		&vm->unbound_list,  		NULL,  	}, **phase; @@ -2135,8 +2136,7 @@ void i915_ppgtt_release(struct kref *kref)  	ppgtt_destroy_vma(&ppgtt->vm); -	GEM_BUG_ON(!list_empty(&ppgtt->vm.active_list)); -	GEM_BUG_ON(!list_empty(&ppgtt->vm.inactive_list)); +	GEM_BUG_ON(!list_empty(&ppgtt->vm.bound_list));  	GEM_BUG_ON(!list_empty(&ppgtt->vm.unbound_list));  	ppgtt->vm.cleanup(&ppgtt->vm); @@ -2801,8 +2801,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)  	mutex_lock(&dev_priv->drm.struct_mutex);  	i915_gem_fini_aliasing_ppgtt(dev_priv); -	GEM_BUG_ON(!list_empty(&ggtt->vm.active_list)); -	list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link) +	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link)  		WARN_ON(i915_vma_unbind(vma));  	if (drm_mm_node_allocated(&ggtt->error_capture)) @@ -3508,32 +3507,39 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)  	i915_check_and_clear_faults(dev_priv); +	mutex_lock(&ggtt->vm.mutex); +  	/* First fill our portion of the GTT with scratch pages */  	ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); -  	ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */  	/* clflush objects bound into the GGTT and rebind them. */ -	GEM_BUG_ON(!list_empty(&ggtt->vm.active_list)); -	list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link) { +	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {  		struct drm_i915_gem_object *obj = vma->obj;  		if (!(vma->flags & I915_VMA_GLOBAL_BIND))  			continue; +		mutex_unlock(&ggtt->vm.mutex); +  		if (!i915_vma_unbind(vma)) -			continue; +			goto lock;  		WARN_ON(i915_vma_bind(vma,  				      obj ? obj->cache_level : 0,  				      PIN_UPDATE));  		if (obj)  			WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); + +lock: +		mutex_lock(&ggtt->vm.mutex);  	}  	ggtt->vm.closed = false;  	i915_ggtt_invalidate(dev_priv); +	mutex_unlock(&ggtt->vm.mutex); +  	if (INTEL_GEN(dev_priv) >= 8) {  		struct intel_ppat *ppat = &dev_priv->ppat; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 9229b03d629b..03ade71b8d9a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -39,6 +39,7 @@  #include <linux/pagevec.h>  #include "i915_request.h" +#include "i915_reset.h"  #include "i915_selftest.h"  #include "i915_timeline.h" @@ -298,32 +299,12 @@ struct i915_address_space {  	struct i915_page_directory_pointer *scratch_pdp; /* GEN8+ & 48b PPGTT */  	/** -	 * List of objects currently involved in rendering. -	 * -	 * Includes buffers having the contents of their GPU caches -	 * flushed, not necessarily primitives. last_read_req -	 * represents when the rendering involved will be completed. -	 * -	 * A reference is held on the buffer while on this list. +	 * List of vma currently bound.  	 */ -	struct list_head active_list; +	struct list_head bound_list;  	/** -	 * LRU list of objects which are not in the ringbuffer and -	 * are ready to unbind, but are still in the GTT. -	 * -	 * last_read_req is NULL while an object is in this list. -	 * -	 * A reference is not held on the buffer while on this list, -	 * as merely being GTT-bound shouldn't prevent its being -	 * freed, and we'll pull it off the list in the free path. -	 */ -	struct list_head inactive_list; - -	/** -	 * List of vma that have been unbound. -	 * -	 * A reference is not held on the buffer while on this list. +	 * List of vma that are not unbound.  	 */  	struct list_head unbound_list; @@ -661,19 +642,19 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,  /* Flags used by pin/bind&friends. */  #define PIN_NONBLOCK		BIT_ULL(0) -#define PIN_MAPPABLE		BIT_ULL(1) -#define PIN_ZONE_4G		BIT_ULL(2) -#define PIN_NONFAULT		BIT_ULL(3) -#define PIN_NOEVICT		BIT_ULL(4) - -#define PIN_MBZ			BIT_ULL(5) /* I915_VMA_PIN_OVERFLOW */ -#define PIN_GLOBAL		BIT_ULL(6) /* I915_VMA_GLOBAL_BIND */ -#define PIN_USER		BIT_ULL(7) /* I915_VMA_LOCAL_BIND */ -#define PIN_UPDATE		BIT_ULL(8) - -#define PIN_HIGH		BIT_ULL(9) -#define PIN_OFFSET_BIAS		BIT_ULL(10) -#define PIN_OFFSET_FIXED	BIT_ULL(11) +#define PIN_NONFAULT		BIT_ULL(1) +#define PIN_NOEVICT		BIT_ULL(2) +#define PIN_MAPPABLE		BIT_ULL(3) +#define PIN_ZONE_4G		BIT_ULL(4) +#define PIN_HIGH		BIT_ULL(5) +#define PIN_OFFSET_BIAS		BIT_ULL(6) +#define PIN_OFFSET_FIXED	BIT_ULL(7) + +#define PIN_MBZ			BIT_ULL(8) /* I915_VMA_PIN_OVERFLOW */ +#define PIN_GLOBAL		BIT_ULL(9) /* I915_VMA_GLOBAL_BIND */ +#define PIN_USER		BIT_ULL(10) /* I915_VMA_LOCAL_BIND */ +#define PIN_UPDATE		BIT_ULL(11) +  #define PIN_OFFSET_MASK		(-I915_GTT_PAGE_SIZE)  #endif diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index cb1b0144d274..73fec917d097 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -87,24 +87,33 @@ struct drm_i915_gem_object {  	const struct drm_i915_gem_object_ops *ops; -	/** -	 * @vma_list: List of VMAs backed by this object -	 * -	 * The VMA on this list are ordered by type, all GGTT vma are placed -	 * at the head and all ppGTT vma are placed at the tail. The different -	 * types of GGTT vma are unordered between themselves, use the -	 * @vma_tree (which has a defined order between all VMA) to find an -	 * exact match. -	 */ -	struct list_head vma_list; -	/** -	 * @vma_tree: Ordered tree of VMAs backed by this object -	 * -	 * All VMA created for this object are placed in the @vma_tree for -	 * fast retrieval via a binary search in i915_vma_instance(). -	 * They are also added to @vma_list for easy iteration. -	 */ -	struct rb_root vma_tree; +	struct { +		/** +		 * @vma.lock: protect the list/tree of vmas +		 */ +		spinlock_t lock; + +		/** +		 * @vma.list: List of VMAs backed by this object +		 * +		 * The VMA on this list are ordered by type, all GGTT vma are +		 * placed at the head and all ppGTT vma are placed at the tail. +		 * The different types of GGTT vma are unordered between +		 * themselves, use the @vma.tree (which has a defined order +		 * between all VMA) to quickly find an exact match. +		 */ +		struct list_head list; + +		/** +		 * @vma.tree: Ordered tree of VMAs backed by this object +		 * +		 * All VMA created for this object are placed in the @vma.tree +		 * for fast retrieval via a binary search in +		 * i915_vma_instance(). They are also added to @vma.list for +		 * easy iteration. +		 */ +		struct rb_root tree; +	} vma;  	/**  	 * @lut_list: List of vma lookup entries in use for this object. diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 8ceecb026910..6da795c7e62e 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -461,12 +461,20 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr  					       I915_SHRINK_VMAPS);  	/* We also want to clear any cached iomaps as they wrap vmap */ +	mutex_lock(&i915->ggtt.vm.mutex);  	list_for_each_entry_safe(vma, next, -				 &i915->ggtt.vm.inactive_list, vm_link) { +				 &i915->ggtt.vm.bound_list, vm_link) {  		unsigned long count = vma->node.size >> PAGE_SHIFT; -		if (vma->iomap && i915_vma_unbind(vma) == 0) + +		if (!vma->iomap || i915_vma_is_active(vma)) +			continue; + +		mutex_unlock(&i915->ggtt.vm.mutex); +		if (i915_vma_unbind(vma) == 0)  			freed_pages += count; +		mutex_lock(&i915->ggtt.vm.mutex);  	} +	mutex_unlock(&i915->ggtt.vm.mutex);  out:  	shrinker_unlock(i915, unlock); diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 9df615eea2d8..74a9661479ca 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -701,7 +701,10 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv  	vma->pages = obj->mm.pages;  	vma->flags |= I915_VMA_GLOBAL_BIND;  	__i915_vma_set_map_and_fenceable(vma); -	list_move_tail(&vma->vm_link, &ggtt->vm.inactive_list); + +	mutex_lock(&ggtt->vm.mutex); +	list_move_tail(&vma->vm_link, &ggtt->vm.bound_list); +	mutex_unlock(&ggtt->vm.mutex);  	spin_lock(&dev_priv->mm.obj_lock);  	list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 1f8e80e31b49..6e2e5ed2bd0a 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -447,9 +447,14 @@ static void error_print_request(struct drm_i915_error_state_buf *m,  	if (!erq->seqno)  		return; -	err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n", +	err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x%s%s, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n",  		   prefix, erq->pid, erq->ban_score, -		   erq->context, erq->seqno, erq->sched_attr.priority, +		   erq->context, erq->seqno, +		   test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, +			    &erq->flags) ? "!" : "", +		   test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, +			    &erq->flags) ? "+" : "", +		   erq->sched_attr.priority,  		   jiffies_to_msecs(erq->jiffies - epoch),  		   erq->start, erq->head, erq->tail);  } @@ -530,13 +535,9 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,  	}  	err_printf(m, "  seqno: 0x%08x\n", ee->seqno);  	err_printf(m, "  last_seqno: 0x%08x\n", ee->last_seqno); -	err_printf(m, "  waiting: %s\n", yesno(ee->waiting));  	err_printf(m, "  ring->head: 0x%08x\n", ee->cpu_ring_head);  	err_printf(m, "  ring->tail: 0x%08x\n", ee->cpu_ring_tail); -	err_printf(m, "  hangcheck stall: %s\n", yesno(ee->hangcheck_stalled)); -	err_printf(m, "  hangcheck action: %s\n", -		   hangcheck_action_to_str(ee->hangcheck_action)); -	err_printf(m, "  hangcheck action timestamp: %dms (%lu%s)\n", +	err_printf(m, "  hangcheck timestamp: %dms (%lu%s)\n",  		   jiffies_to_msecs(ee->hangcheck_timestamp - epoch),  		   ee->hangcheck_timestamp,  		   ee->hangcheck_timestamp == epoch ? "; epoch" : ""); @@ -684,15 +685,15 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,  		   jiffies_to_msecs(error->capture - error->epoch));  	for (i = 0; i < ARRAY_SIZE(error->engine); i++) { -		if (error->engine[i].hangcheck_stalled && -		    error->engine[i].context.pid) { -			err_printf(m, "Active process (on ring %s): %s [%d], score %d%s\n", -				   engine_name(m->i915, i), -				   error->engine[i].context.comm, -				   error->engine[i].context.pid, -				   error->engine[i].context.ban_score, -				   bannable(&error->engine[i].context)); -		} +		if (!error->engine[i].context.pid) +			continue; + +		err_printf(m, "Active process (on ring %s): %s [%d], score %d%s\n", +			   engine_name(m->i915, i), +			   error->engine[i].context.comm, +			   error->engine[i].context.pid, +			   error->engine[i].context.ban_score, +			   bannable(&error->engine[i].context));  	}  	err_printf(m, "Reset count: %u\n", error->reset_count);  	err_printf(m, "Suspend count: %u\n", error->suspend_count); @@ -722,8 +723,6 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,  	err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake);  	err_printf(m, "DERRMR: 0x%08x\n", error->derrmr);  	err_printf(m, "CCID: 0x%08x\n", error->ccid); -	err_printf(m, "Missed interrupts: 0x%08lx\n", -		   m->i915->gpu_error.missed_irq_rings);  	for (i = 0; i < error->nfence; i++)  		err_printf(m, "  fence[%d] = %08llx\n", i, error->fence[i]); @@ -807,21 +806,6 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,  						    error->epoch);  		} -		if (IS_ERR(ee->waiters)) { -			err_printf(m, "%s --- ? waiters [unable to acquire spinlock]\n", -				   m->i915->engine[i]->name); -		} else if (ee->num_waiters) { -			err_printf(m, "%s --- %d waiters\n", -				   m->i915->engine[i]->name, -				   ee->num_waiters); -			for (j = 0; j < ee->num_waiters; j++) { -				err_printf(m, " seqno 0x%08x for %s [%d]\n", -					   ee->waiters[j].seqno, -					   ee->waiters[j].comm, -					   ee->waiters[j].pid); -			} -		} -  		print_error_obj(m, m->i915->engine[i],  				"ringbuffer", ee->ringbuffer); @@ -1003,8 +987,6 @@ void __i915_gpu_state_free(struct kref *error_ref)  		i915_error_object_free(ee->wa_ctx);  		kfree(ee->requests); -		if (!IS_ERR_OR_NULL(ee->waiters)) -			kfree(ee->waiters);  	}  	for (i = 0; i < ARRAY_SIZE(error->active_bo); i++) @@ -1124,7 +1106,9 @@ static void capture_bo(struct drm_i915_error_buffer *err,  static u32 capture_error_bo(struct drm_i915_error_buffer *err,  			    int count, struct list_head *head, -			    bool pinned_only) +			    unsigned int flags) +#define ACTIVE_ONLY BIT(0) +#define PINNED_ONLY BIT(1)  {  	struct i915_vma *vma;  	int i = 0; @@ -1133,7 +1117,10 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err,  		if (!vma->obj)  			continue; -		if (pinned_only && !i915_vma_is_pinned(vma)) +		if (flags & ACTIVE_ONLY && !i915_vma_is_active(vma)) +			continue; + +		if (flags & PINNED_ONLY && !i915_vma_is_pinned(vma))  			continue;  		capture_bo(err++, vma); @@ -1144,7 +1131,8 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err,  	return i;  } -/* Generate a semi-unique error code. The code is not meant to have meaning, The +/* + * Generate a semi-unique error code. The code is not meant to have meaning, The   * code's only purpose is to try to prevent false duplicated bug reports by   * grossly estimating a GPU error state.   * @@ -1153,29 +1141,23 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err,   *   * It's only a small step better than a random number in its current form.   */ -static u32 i915_error_generate_code(struct drm_i915_private *dev_priv, -				    struct i915_gpu_state *error, -				    int *engine_id) +static u32 i915_error_generate_code(struct i915_gpu_state *error, +				    unsigned long engine_mask)  { -	u32 error_code = 0; -	int i; - -	/* IPEHR would be an ideal way to detect errors, as it's the gross +	/* +	 * IPEHR would be an ideal way to detect errors, as it's the gross  	 * measure of "the command that hung." However, has some very common  	 * synchronization commands which almost always appear in the case  	 * strictly a client bug. Use instdone to differentiate those some.  	 */ -	for (i = 0; i < I915_NUM_ENGINES; i++) { -		if (error->engine[i].hangcheck_stalled) { -			if (engine_id) -				*engine_id = i; +	if (engine_mask) { +		struct drm_i915_error_engine *ee = +			&error->engine[ffs(engine_mask)]; -			return error->engine[i].ipehr ^ -			       error->engine[i].instdone.instdone; -		} +		return ee->ipehr ^ ee->instdone.instdone;  	} -	return error_code; +	return 0;  }  static void gem_record_fences(struct i915_gpu_state *error) @@ -1208,59 +1190,6 @@ static void gen6_record_semaphore_state(struct intel_engine_cs *engine,  			I915_READ(RING_SYNC_2(engine->mmio_base));  } -static void error_record_engine_waiters(struct intel_engine_cs *engine, -					struct drm_i915_error_engine *ee) -{ -	struct intel_breadcrumbs *b = &engine->breadcrumbs; -	struct drm_i915_error_waiter *waiter; -	struct rb_node *rb; -	int count; - -	ee->num_waiters = 0; -	ee->waiters = NULL; - -	if (RB_EMPTY_ROOT(&b->waiters)) -		return; - -	if (!spin_trylock_irq(&b->rb_lock)) { -		ee->waiters = ERR_PTR(-EDEADLK); -		return; -	} - -	count = 0; -	for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb)) -		count++; -	spin_unlock_irq(&b->rb_lock); - -	waiter = NULL; -	if (count) -		waiter = kmalloc_array(count, -				       sizeof(struct drm_i915_error_waiter), -				       GFP_ATOMIC); -	if (!waiter) -		return; - -	if (!spin_trylock_irq(&b->rb_lock)) { -		kfree(waiter); -		ee->waiters = ERR_PTR(-EDEADLK); -		return; -	} - -	ee->waiters = waiter; -	for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { -		struct intel_wait *w = rb_entry(rb, typeof(*w), node); - -		strcpy(waiter->comm, w->tsk->comm); -		waiter->pid = w->tsk->pid; -		waiter->seqno = w->seqno; -		waiter++; - -		if (++ee->num_waiters == count) -			break; -	} -	spin_unlock_irq(&b->rb_lock); -} -  static void error_record_engine_registers(struct i915_gpu_state *error,  					  struct intel_engine_cs *engine,  					  struct drm_i915_error_engine *ee) @@ -1296,7 +1225,6 @@ static void error_record_engine_registers(struct i915_gpu_state *error,  	intel_engine_get_instdone(engine, &ee->instdone); -	ee->waiting = intel_engine_has_waiter(engine);  	ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base));  	ee->acthd = intel_engine_get_active_head(engine);  	ee->seqno = intel_engine_get_seqno(engine); @@ -1338,9 +1266,8 @@ static void error_record_engine_registers(struct i915_gpu_state *error,  	}  	ee->idle = intel_engine_is_idle(engine); -	ee->hangcheck_timestamp = engine->hangcheck.action_timestamp; -	ee->hangcheck_action = engine->hangcheck.action; -	ee->hangcheck_stalled = engine->hangcheck.stalled; +	if (!ee->idle) +		ee->hangcheck_timestamp = engine->hangcheck.action_timestamp;  	ee->reset_count = i915_reset_engine_count(&dev_priv->gpu_error,  						  engine); @@ -1371,6 +1298,7 @@ static void record_request(struct i915_request *request,  {  	struct i915_gem_context *ctx = request->gem_context; +	erq->flags = request->fence.flags;  	erq->context = ctx->hw_id;  	erq->sched_attr = request->sched.attr;  	erq->ban_score = atomic_read(&ctx->ban_score); @@ -1546,7 +1474,6 @@ static void gem_record_rings(struct i915_gpu_state *error)  		ee->engine_id = i;  		error_record_engine_registers(error, engine, ee); -		error_record_engine_waiters(engine, ee);  		error_record_engine_execlists(engine, ee);  		request = i915_gem_find_active_request(engine); @@ -1610,14 +1537,17 @@ static void gem_capture_vm(struct i915_gpu_state *error,  	int count;  	count = 0; -	list_for_each_entry(vma, &vm->active_list, vm_link) -		count++; +	list_for_each_entry(vma, &vm->bound_list, vm_link) +		if (i915_vma_is_active(vma)) +			count++;  	active_bo = NULL;  	if (count)  		active_bo = kcalloc(count, sizeof(*active_bo), GFP_ATOMIC);  	if (active_bo) -		count = capture_error_bo(active_bo, count, &vm->active_list, false); +		count = capture_error_bo(active_bo, +					 count, &vm->bound_list, +					 ACTIVE_ONLY);  	else  		count = 0; @@ -1655,28 +1585,20 @@ static void capture_pinned_buffers(struct i915_gpu_state *error)  	struct i915_address_space *vm = &error->i915->ggtt.vm;  	struct drm_i915_error_buffer *bo;  	struct i915_vma *vma; -	int count_inactive, count_active; - -	count_inactive = 0; -	list_for_each_entry(vma, &vm->inactive_list, vm_link) -		count_inactive++; +	int count; -	count_active = 0; -	list_for_each_entry(vma, &vm->active_list, vm_link) -		count_active++; +	count = 0; +	list_for_each_entry(vma, &vm->bound_list, vm_link) +		count++;  	bo = NULL; -	if (count_inactive + count_active) -		bo = kcalloc(count_inactive + count_active, -			     sizeof(*bo), GFP_ATOMIC); +	if (count) +		bo = kcalloc(count, sizeof(*bo), GFP_ATOMIC);  	if (!bo)  		return; -	count_inactive = capture_error_bo(bo, count_inactive, -					  &vm->active_list, true); -	count_active = capture_error_bo(bo + count_inactive, count_active, -					&vm->inactive_list, true); -	error->pinned_bo_count = count_inactive + count_active; +	error->pinned_bo_count = +		capture_error_bo(bo, count, &vm->bound_list, PINNED_ONLY);  	error->pinned_bo = bo;  } @@ -1783,31 +1705,35 @@ static void capture_reg_state(struct i915_gpu_state *error)  	error->pgtbl_er = I915_READ(PGTBL_ER);  } -static void i915_error_capture_msg(struct drm_i915_private *dev_priv, -				   struct i915_gpu_state *error, -				   u32 engine_mask, -				   const char *error_msg) +static const char * +error_msg(struct i915_gpu_state *error, unsigned long engines, const char *msg)  { -	u32 ecode; -	int engine_id = -1, len; +	int len; +	int i; -	ecode = i915_error_generate_code(dev_priv, error, &engine_id); +	for (i = 0; i < ARRAY_SIZE(error->engine); i++) +		if (!error->engine[i].context.pid) +			engines &= ~BIT(i);  	len = scnprintf(error->error_msg, sizeof(error->error_msg), -			"GPU HANG: ecode %d:%d:0x%08x", -			INTEL_GEN(dev_priv), engine_id, ecode); - -	if (engine_id != -1 && error->engine[engine_id].context.pid) +			"GPU HANG: ecode %d:%lx:0x%08x", +			INTEL_GEN(error->i915), engines, +			i915_error_generate_code(error, engines)); +	if (engines) { +		/* Just show the first executing process, more is confusing */ +		i = ffs(engines);  		len += scnprintf(error->error_msg + len,  				 sizeof(error->error_msg) - len,  				 ", in %s [%d]", -				 error->engine[engine_id].context.comm, -				 error->engine[engine_id].context.pid); +				 error->engine[i].context.comm, +				 error->engine[i].context.pid); +	} +	if (msg) +		len += scnprintf(error->error_msg + len, +				 sizeof(error->error_msg) - len, +				 ", %s", msg); -	scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, -		  ", reason: %s, action: %s", -		  error_msg, -		  engine_mask ? "reset" : "continue"); +	return error->error_msg;  }  static void capture_gen_state(struct i915_gpu_state *error) @@ -1847,7 +1773,7 @@ static unsigned long capture_find_epoch(const struct i915_gpu_state *error)  	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {  		const struct drm_i915_error_engine *ee = &error->engine[i]; -		if (ee->hangcheck_stalled && +		if (ee->hangcheck_timestamp &&  		    time_before(ee->hangcheck_timestamp, epoch))  			epoch = ee->hangcheck_timestamp;  	} @@ -1921,7 +1847,7 @@ i915_capture_gpu_state(struct drm_i915_private *i915)   * i915_capture_error_state - capture an error record for later analysis   * @i915: i915 device   * @engine_mask: the mask of engines triggering the hang - * @error_msg: a message to insert into the error capture header + * @msg: a message to insert into the error capture header   *   * Should be called when an error is detected (either a hang or an error   * interrupt) to capture error state from the time of the error.  Fills @@ -1929,8 +1855,8 @@ i915_capture_gpu_state(struct drm_i915_private *i915)   * to pick up.   */  void i915_capture_error_state(struct drm_i915_private *i915, -			      u32 engine_mask, -			      const char *error_msg) +			      unsigned long engine_mask, +			      const char *msg)  {  	static bool warned;  	struct i915_gpu_state *error; @@ -1946,8 +1872,7 @@ void i915_capture_error_state(struct drm_i915_private *i915,  	if (IS_ERR(error))  		return; -	i915_error_capture_msg(i915, error, engine_mask, error_msg); -	DRM_INFO("%s\n", error->error_msg); +	dev_info(i915->drm.dev, "%s\n", error_msg(error, engine_mask, msg));  	if (!error->simulated) {  		spin_lock_irqsave(&i915->gpu_error.lock, flags); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 604291f7762d..53b1f22dd365 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -82,11 +82,7 @@ struct i915_gpu_state {  		int engine_id;  		/* Software tracked state */  		bool idle; -		bool waiting; -		int num_waiters;  		unsigned long hangcheck_timestamp; -		bool hangcheck_stalled; -		enum intel_engine_hangcheck_action hangcheck_action;  		struct i915_address_space *vm;  		int num_requests;  		u32 reset_count; @@ -149,6 +145,7 @@ struct i915_gpu_state {  		struct drm_i915_error_object *default_state;  		struct drm_i915_error_request { +			unsigned long flags;  			long jiffies;  			pid_t pid;  			u32 context; @@ -161,12 +158,6 @@ struct i915_gpu_state {  		} *requests, execlist[EXECLIST_MAX_PORTS];  		unsigned int num_ports; -		struct drm_i915_error_waiter { -			char comm[TASK_COMM_LEN]; -			pid_t pid; -			u32 seqno; -		} *waiters; -  		struct {  			u32 gfx_mode;  			union { @@ -197,6 +188,8 @@ struct i915_gpu_state {  	struct scatterlist *sgl, *fit;  }; +struct i915_gpu_restart; +  struct i915_gpu_error {  	/* For hangcheck timer */  #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */ @@ -211,8 +204,6 @@ struct i915_gpu_error {  	atomic_t pending_fb_pin; -	unsigned long missed_irq_rings; -  	/**  	 * State variable controlling the reset flow and count  	 * @@ -247,15 +238,6 @@ struct i915_gpu_error {  	 * i915_mutex_lock_interruptible()?). I915_RESET_BACKOFF serves a  	 * secondary role in preventing two concurrent global reset attempts.  	 * -	 * #I915_RESET_HANDOFF - To perform the actual GPU reset, we need the -	 * struct_mutex. We try to acquire the struct_mutex in the reset worker, -	 * but it may be held by some long running waiter (that we cannot -	 * interrupt without causing trouble). Once we are ready to do the GPU -	 * reset, we set the I915_RESET_HANDOFF bit and wakeup any waiters. If -	 * they already hold the struct_mutex and want to participate they can -	 * inspect the bit and do the reset directly, otherwise the worker -	 * waits for the struct_mutex. -	 *  	 * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to  	 * acquire the struct_mutex to reset an engine, we need an explicit  	 * flag to prevent two concurrent reset attempts in the same engine. @@ -269,20 +251,13 @@ struct i915_gpu_error {  	 */  	unsigned long flags;  #define I915_RESET_BACKOFF	0 -#define I915_RESET_HANDOFF	1 -#define I915_RESET_MODESET	2 -#define I915_RESET_ENGINE	3 +#define I915_RESET_MODESET	1 +#define I915_RESET_ENGINE	2  #define I915_WEDGED		(BITS_PER_LONG - 1)  	/** Number of times an engine has been reset */  	u32 reset_engine_count[I915_NUM_ENGINES]; -	/** Set of stalled engines with guilty requests, in the current reset */ -	u32 stalled_mask; - -	/** Reason for the current *global* reset */ -	const char *reason; -  	struct mutex wedge_mutex; /* serialises wedging/unwedging */  	/** @@ -297,8 +272,7 @@ struct i915_gpu_error {  	 */  	wait_queue_head_t reset_queue; -	/* For missed irq/seqno simulation. */ -	unsigned long test_irq_rings; +	struct i915_gpu_restart *restart;  };  struct drm_i915_error_state_buf { @@ -320,7 +294,7 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);  struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915);  void i915_capture_error_state(struct drm_i915_private *dev_priv, -			      u32 engine_mask, +			      unsigned long engine_mask,  			      const char *error_msg);  static inline struct i915_gpu_state * diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 7e56611b3d60..441d2674b272 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -823,11 +823,26 @@ static void i915_enable_asle_pipestat(struct drm_i915_private *dev_priv)  static u32 i915_get_vblank_counter(struct drm_device *dev, unsigned int pipe)  {  	struct drm_i915_private *dev_priv = to_i915(dev); +	struct drm_vblank_crtc *vblank = &dev->vblank[pipe]; +	const struct drm_display_mode *mode = &vblank->hwmode;  	i915_reg_t high_frame, low_frame;  	u32 high1, high2, low, pixel, vbl_start, hsync_start, htotal; -	const struct drm_display_mode *mode = &dev->vblank[pipe].hwmode;  	unsigned long irqflags; +	/* +	 * On i965gm TV output the frame counter only works up to +	 * the point when we enable the TV encoder. After that the +	 * frame counter ceases to work and reads zero. We need a +	 * vblank wait before enabling the TV encoder and so we +	 * have to enable vblank interrupts while the frame counter +	 * is still in a working state. However the core vblank code +	 * does not like us returning non-zero frame counter values +	 * when we've told it that we don't have a working frame +	 * counter. Thus we must stop non-zero values leaking out. +	 */ +	if (!vblank->max_vblank_count) +		return 0; +  	htotal = mode->crtc_htotal;  	hsync_start = mode->crtc_hsync_start;  	vbl_start = mode->crtc_vblank_start; @@ -999,6 +1014,9 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,  	int position;  	int vbl_start, vbl_end, hsync_start, htotal, vtotal;  	unsigned long irqflags; +	bool use_scanline_counter = INTEL_GEN(dev_priv) >= 5 || +		IS_G4X(dev_priv) || IS_GEN(dev_priv, 2) || +		mode->private_flags & I915_MODE_FLAG_USE_SCANLINE_COUNTER;  	if (WARN_ON(!mode->crtc_clock)) {  		DRM_DEBUG_DRIVER("trying to get scanoutpos for disabled " @@ -1031,7 +1049,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,  	if (stime)  		*stime = ktime_get(); -	if (IS_GEN(dev_priv, 2) || IS_G4X(dev_priv) || INTEL_GEN(dev_priv) >= 5) { +	if (use_scanline_counter) {  		/* No obvious pixelcount register. Only query vertical  		 * scanout position from Display scan line register.  		 */ @@ -1091,7 +1109,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,  	else  		position += vtotal - vbl_end; -	if (IS_GEN(dev_priv, 2) || IS_G4X(dev_priv) || INTEL_GEN(dev_priv) >= 5) { +	if (use_scanline_counter) {  		*vpos = position;  		*hpos = 0;  	} else { @@ -1153,68 +1171,6 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)  	return;  } -static void notify_ring(struct intel_engine_cs *engine) -{ -	const u32 seqno = intel_engine_get_seqno(engine); -	struct i915_request *rq = NULL; -	struct task_struct *tsk = NULL; -	struct intel_wait *wait; - -	if (unlikely(!engine->breadcrumbs.irq_armed)) -		return; - -	rcu_read_lock(); - -	spin_lock(&engine->breadcrumbs.irq_lock); -	wait = engine->breadcrumbs.irq_wait; -	if (wait) { -		/* -		 * We use a callback from the dma-fence to submit -		 * requests after waiting on our own requests. To -		 * ensure minimum delay in queuing the next request to -		 * hardware, signal the fence now rather than wait for -		 * the signaler to be woken up. We still wake up the -		 * waiter in order to handle the irq-seqno coherency -		 * issues (we may receive the interrupt before the -		 * seqno is written, see __i915_request_irq_complete()) -		 * and to handle coalescing of multiple seqno updates -		 * and many waiters. -		 */ -		if (i915_seqno_passed(seqno, wait->seqno)) { -			struct i915_request *waiter = wait->request; - -			if (waiter && -			    !i915_request_signaled(waiter) && -			    intel_wait_check_request(wait, waiter)) -				rq = i915_request_get(waiter); - -			tsk = wait->tsk; -		} - -		engine->breadcrumbs.irq_count++; -	} else { -		if (engine->breadcrumbs.irq_armed) -			__intel_engine_disarm_breadcrumbs(engine); -	} -	spin_unlock(&engine->breadcrumbs.irq_lock); - -	if (rq) { -		spin_lock(&rq->lock); -		dma_fence_signal_locked(&rq->fence); -		GEM_BUG_ON(!i915_request_completed(rq)); -		spin_unlock(&rq->lock); - -		i915_request_put(rq); -	} - -	if (tsk && tsk->state & TASK_NORMAL) -		wake_up_process(tsk); - -	rcu_read_unlock(); - -	trace_intel_engine_notify(engine, wait); -} -  static void vlv_c0_read(struct drm_i915_private *dev_priv,  			struct intel_rps_ei *ei)  { @@ -1459,20 +1415,20 @@ static void ilk_gt_irq_handler(struct drm_i915_private *dev_priv,  			       u32 gt_iir)  {  	if (gt_iir & GT_RENDER_USER_INTERRUPT) -		notify_ring(dev_priv->engine[RCS]); +		intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);  	if (gt_iir & ILK_BSD_USER_INTERRUPT) -		notify_ring(dev_priv->engine[VCS]); +		intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]);  }  static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,  			       u32 gt_iir)  {  	if (gt_iir & GT_RENDER_USER_INTERRUPT) -		notify_ring(dev_priv->engine[RCS]); +		intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);  	if (gt_iir & GT_BSD_USER_INTERRUPT) -		notify_ring(dev_priv->engine[VCS]); +		intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]);  	if (gt_iir & GT_BLT_USER_INTERRUPT) -		notify_ring(dev_priv->engine[BCS]); +		intel_engine_breadcrumbs_irq(dev_priv->engine[BCS]);  	if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |  		      GT_BSD_CS_ERROR_INTERRUPT | @@ -1492,7 +1448,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)  		tasklet = true;  	if (iir & GT_RENDER_USER_INTERRUPT) { -		notify_ring(engine); +		intel_engine_breadcrumbs_irq(engine);  		tasklet |= USES_GUC_SUBMISSION(engine->i915);  	} @@ -1838,7 +1794,7 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)  	if (HAS_VEBOX(dev_priv)) {  		if (pm_iir & PM_VEBOX_USER_INTERRUPT) -			notify_ring(dev_priv->engine[VECS]); +			intel_engine_breadcrumbs_irq(dev_priv->engine[VECS]);  		if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)  			DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir); @@ -4262,7 +4218,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)  		I915_WRITE16(IIR, iir);  		if (iir & I915_USER_INTERRUPT) -			notify_ring(dev_priv->engine[RCS]); +			intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);  		if (iir & I915_MASTER_ERROR_INTERRUPT)  			i8xx_error_irq_handler(dev_priv, eir, eir_stuck); @@ -4370,7 +4326,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)  		I915_WRITE(IIR, iir);  		if (iir & I915_USER_INTERRUPT) -			notify_ring(dev_priv->engine[RCS]); +			intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);  		if (iir & I915_MASTER_ERROR_INTERRUPT)  			i9xx_error_irq_handler(dev_priv, eir, eir_stuck); @@ -4515,10 +4471,10 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)  		I915_WRITE(IIR, iir);  		if (iir & I915_USER_INTERRUPT) -			notify_ring(dev_priv->engine[RCS]); +			intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);  		if (iir & I915_BSD_USER_INTERRUPT) -			notify_ring(dev_priv->engine[VCS]); +			intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]);  		if (iir & I915_MASTER_ERROR_INTERRUPT)  			i9xx_error_irq_handler(dev_priv, eir, eir_stuck); @@ -4581,16 +4537,10 @@ void intel_irq_init(struct drm_i915_private *dev_priv)  	if (INTEL_GEN(dev_priv) >= 8)  		rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; -	if (IS_GEN(dev_priv, 2)) { -		/* Gen2 doesn't have a hardware frame counter */ -		dev->max_vblank_count = 0; -	} else if (IS_G4X(dev_priv) || INTEL_GEN(dev_priv) >= 5) { -		dev->max_vblank_count = 0xffffffff; /* full 32 bit counter */ +	if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv))  		dev->driver->get_vblank_counter = g4x_get_vblank_counter; -	} else { +	else if (INTEL_GEN(dev_priv) >= 3)  		dev->driver->get_vblank_counter = i915_get_vblank_counter; -		dev->max_vblank_count = 0xffffff; /* only 24 bits of frame count */ -	}  	/*  	 * Opt out of the vblank disable timer on everything except gen2. diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 9f0539bdaa39..b5be0abbba35 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -97,8 +97,10 @@ i915_param_named_unsafe(disable_power_well, int, 0400,  i915_param_named_unsafe(enable_ips, int, 0600, "Enable IPS (default: true)"); -i915_param_named(fastboot, bool, 0600, -	"Try to skip unnecessary mode sets at boot time (default: false)"); +i915_param_named(fastboot, int, 0600, +	"Try to skip unnecessary mode sets at boot time " +	"(0=disabled, 1=enabled) " +	"Default: -1 (use per-chip default)");  i915_param_named_unsafe(prefault_disable, bool, 0600,  	"Disable page prefaulting for pread/pwrite/reloc (default:false). " diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 6efcf330bdab..3f14e9881a0d 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -63,10 +63,10 @@ struct drm_printer;  	param(int, edp_vswing, 0) \  	param(int, reset, 2) \  	param(unsigned int, inject_load_failure, 0) \ +	param(int, fastboot, -1) \  	/* leave bools at the end to not create holes */ \  	param(bool, alpha_support, IS_ENABLED(CONFIG_DRM_I915_ALPHA_SUPPORT)) \  	param(bool, enable_hangcheck, true) \ -	param(bool, fastboot, false) \  	param(bool, prefault_disable, false) \  	param(bool, load_detect_test, false) \  	param(bool, force_reset_modeset_test, false) \ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 44c23ac60347..5d05572c9ff4 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -69,9 +69,15 @@  #define BDW_COLORS \  	.color = { .degamma_lut_size = 512, .gamma_lut_size = 512 }  #define CHV_COLORS \ -	.color = { .degamma_lut_size = 65, .gamma_lut_size = 257 } +	.color = { .degamma_lut_size = 65, .gamma_lut_size = 257, \ +		   .degamma_lut_tests = DRM_COLOR_LUT_NON_DECREASING, \ +		   .gamma_lut_tests = DRM_COLOR_LUT_NON_DECREASING, \ +	}  #define GLK_COLORS \ -	.color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } +	.color = { .degamma_lut_size = 0, .gamma_lut_size = 1024, \ +		   .degamma_lut_tests = DRM_COLOR_LUT_NON_DECREASING | \ +					DRM_COLOR_LUT_EQUAL_CHANNELS, \ +	}  /* Keep in gen based order, and chronological order within a gen */ @@ -707,6 +713,7 @@ static const struct pci_device_id pciidlist[] = {  	INTEL_AML_KBL_GT2_IDS(&intel_kabylake_gt2_info),  	INTEL_CFL_S_GT1_IDS(&intel_coffeelake_gt1_info),  	INTEL_CFL_S_GT2_IDS(&intel_coffeelake_gt2_info), +	INTEL_CFL_H_GT1_IDS(&intel_coffeelake_gt1_info),  	INTEL_CFL_H_GT2_IDS(&intel_coffeelake_gt2_info),  	INTEL_CFL_U_GT2_IDS(&intel_coffeelake_gt2_info),  	INTEL_CFL_U_GT3_IDS(&intel_coffeelake_gt3_info), diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f4e447437d75..ede54fdc1676 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2801,6 +2801,9 @@ enum i915_power_well_id {  #define GEN6_RCS_PWR_FSM _MMIO(0x22ac)  #define GEN9_RCS_FE_FSM2 _MMIO(0x22a4) +#define GEN10_CACHE_MODE_SS			_MMIO(0xe420) +#define   FLOAT_BLEND_OPTIMIZATION_ENABLE	(1 << 4) +  /* Fuse readout registers for GT */  #define HSW_PAVP_FUSE1			_MMIO(0x911C)  #define   HSW_F1_EU_DIS_SHIFT		16 @@ -4895,6 +4898,7 @@ enum {  # define TV_OVERSAMPLE_NONE		(2 << 18)  /* Selects 8x oversampling */  # define TV_OVERSAMPLE_8X		(3 << 18) +# define TV_OVERSAMPLE_MASK		(3 << 18)  /* Selects progressive mode rather than interlaced */  # define TV_PROGRESSIVE			(1 << 17)  /* Sets the colorburst to PAL mode.  Required for non-M PAL modes. */ @@ -5709,6 +5713,12 @@ enum {  #define   PIPEMISC_DITHER_TYPE_SP	(0 << 2)  #define PIPEMISC(pipe)			_MMIO_PIPE2(pipe, _PIPE_MISC_A) +/* Skylake+ pipe bottom (background) color */ +#define _SKL_BOTTOM_COLOR_A		0x70034 +#define   SKL_BOTTOM_COLOR_GAMMA_ENABLE	(1 << 31) +#define   SKL_BOTTOM_COLOR_CSC_ENABLE	(1 << 30) +#define SKL_BOTTOM_COLOR(pipe)		_MMIO_PIPE2(pipe, _SKL_BOTTOM_COLOR_A) +  #define VLV_DPFLIPSTAT				_MMIO(VLV_DISPLAY_BASE + 0x70028)  #define   PIPEB_LINE_COMPARE_INT_EN		(1 << 29)  #define   PIPEB_HLINE_INT_EN			(1 << 28) @@ -9553,7 +9563,7 @@ enum skl_power_gate {  #define _MG_PLL3_ENABLE		0x46038  #define _MG_PLL4_ENABLE		0x4603C  /* Bits are the same as DPLL0_ENABLE */ -#define MG_PLL_ENABLE(port)	_MMIO_PORT((port) - PORT_C, _MG_PLL1_ENABLE, \ +#define MG_PLL_ENABLE(tc_port)	_MMIO_PORT((tc_port), _MG_PLL1_ENABLE, \  					   _MG_PLL2_ENABLE)  #define _MG_REFCLKIN_CTL_PORT1				0x16892C @@ -9562,9 +9572,9 @@ enum skl_power_gate {  #define _MG_REFCLKIN_CTL_PORT4				0x16B92C  #define   MG_REFCLKIN_CTL_OD_2_MUX(x)			((x) << 8)  #define   MG_REFCLKIN_CTL_OD_2_MUX_MASK			(0x7 << 8) -#define MG_REFCLKIN_CTL(port) _MMIO_PORT((port) - PORT_C, \ -					 _MG_REFCLKIN_CTL_PORT1, \ -					 _MG_REFCLKIN_CTL_PORT2) +#define MG_REFCLKIN_CTL(tc_port) _MMIO_PORT((tc_port), \ +					    _MG_REFCLKIN_CTL_PORT1, \ +					    _MG_REFCLKIN_CTL_PORT2)  #define _MG_CLKTOP2_CORECLKCTL1_PORT1			0x1688D8  #define _MG_CLKTOP2_CORECLKCTL1_PORT2			0x1698D8 @@ -9574,9 +9584,9 @@ enum skl_power_gate {  #define   MG_CLKTOP2_CORECLKCTL1_B_DIVRATIO_MASK	(0xff << 16)  #define   MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO(x)		((x) << 8)  #define   MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK	(0xff << 8) -#define MG_CLKTOP2_CORECLKCTL1(port) _MMIO_PORT((port) - PORT_C, \ -						_MG_CLKTOP2_CORECLKCTL1_PORT1, \ -						_MG_CLKTOP2_CORECLKCTL1_PORT2) +#define MG_CLKTOP2_CORECLKCTL1(tc_port) _MMIO_PORT((tc_port), \ +						   _MG_CLKTOP2_CORECLKCTL1_PORT1, \ +						   _MG_CLKTOP2_CORECLKCTL1_PORT2)  #define _MG_CLKTOP2_HSCLKCTL_PORT1			0x1688D4  #define _MG_CLKTOP2_HSCLKCTL_PORT2			0x1698D4 @@ -9594,9 +9604,9 @@ enum skl_power_gate {  #define   MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO(x)		((x) << 8)  #define   MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_SHIFT		8  #define   MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK		(0xf << 8) -#define MG_CLKTOP2_HSCLKCTL(port) _MMIO_PORT((port) - PORT_C, \ -					     _MG_CLKTOP2_HSCLKCTL_PORT1, \ -					     _MG_CLKTOP2_HSCLKCTL_PORT2) +#define MG_CLKTOP2_HSCLKCTL(tc_port) _MMIO_PORT((tc_port), \ +						_MG_CLKTOP2_HSCLKCTL_PORT1, \ +						_MG_CLKTOP2_HSCLKCTL_PORT2)  #define _MG_PLL_DIV0_PORT1				0x168A00  #define _MG_PLL_DIV0_PORT2				0x169A00 @@ -9608,8 +9618,8 @@ enum skl_power_gate {  #define   MG_PLL_DIV0_FBDIV_FRAC(x)			((x) << 8)  #define   MG_PLL_DIV0_FBDIV_INT_MASK			(0xff << 0)  #define   MG_PLL_DIV0_FBDIV_INT(x)			((x) << 0) -#define MG_PLL_DIV0(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_DIV0_PORT1, \ -				     _MG_PLL_DIV0_PORT2) +#define MG_PLL_DIV0(tc_port) _MMIO_PORT((tc_port), _MG_PLL_DIV0_PORT1, \ +					_MG_PLL_DIV0_PORT2)  #define _MG_PLL_DIV1_PORT1				0x168A04  #define _MG_PLL_DIV1_PORT2				0x169A04 @@ -9623,8 +9633,8 @@ enum skl_power_gate {  #define   MG_PLL_DIV1_NDIVRATIO(x)			((x) << 4)  #define   MG_PLL_DIV1_FBPREDIV_MASK			(0xf << 0)  #define   MG_PLL_DIV1_FBPREDIV(x)			((x) << 0) -#define MG_PLL_DIV1(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_DIV1_PORT1, \ -				     _MG_PLL_DIV1_PORT2) +#define MG_PLL_DIV1(tc_port) _MMIO_PORT((tc_port), _MG_PLL_DIV1_PORT1, \ +					_MG_PLL_DIV1_PORT2)  #define _MG_PLL_LF_PORT1				0x168A08  #define _MG_PLL_LF_PORT2				0x169A08 @@ -9636,8 +9646,8 @@ enum skl_power_gate {  #define   MG_PLL_LF_GAINCTRL(x)				((x) << 16)  #define   MG_PLL_LF_INT_COEFF(x)			((x) << 8)  #define   MG_PLL_LF_PROP_COEFF(x)			((x) << 0) -#define MG_PLL_LF(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_LF_PORT1, \ -				   _MG_PLL_LF_PORT2) +#define MG_PLL_LF(tc_port) _MMIO_PORT((tc_port), _MG_PLL_LF_PORT1, \ +				      _MG_PLL_LF_PORT2)  #define _MG_PLL_FRAC_LOCK_PORT1				0x168A0C  #define _MG_PLL_FRAC_LOCK_PORT2				0x169A0C @@ -9649,9 +9659,9 @@ enum skl_power_gate {  #define   MG_PLL_FRAC_LOCK_DCODITHEREN			(1 << 10)  #define   MG_PLL_FRAC_LOCK_FEEDFWRDCAL_EN		(1 << 8)  #define   MG_PLL_FRAC_LOCK_FEEDFWRDGAIN(x)		((x) << 0) -#define MG_PLL_FRAC_LOCK(port) _MMIO_PORT((port) - PORT_C, \ -					  _MG_PLL_FRAC_LOCK_PORT1, \ -					  _MG_PLL_FRAC_LOCK_PORT2) +#define MG_PLL_FRAC_LOCK(tc_port) _MMIO_PORT((tc_port), \ +					     _MG_PLL_FRAC_LOCK_PORT1, \ +					     _MG_PLL_FRAC_LOCK_PORT2)  #define _MG_PLL_SSC_PORT1				0x168A10  #define _MG_PLL_SSC_PORT2				0x169A10 @@ -9663,8 +9673,8 @@ enum skl_power_gate {  #define   MG_PLL_SSC_STEPNUM(x)				((x) << 10)  #define   MG_PLL_SSC_FLLEN				(1 << 9)  #define   MG_PLL_SSC_STEPSIZE(x)			((x) << 0) -#define MG_PLL_SSC(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_SSC_PORT1, \ -				    _MG_PLL_SSC_PORT2) +#define MG_PLL_SSC(tc_port) _MMIO_PORT((tc_port), _MG_PLL_SSC_PORT1, \ +				       _MG_PLL_SSC_PORT2)  #define _MG_PLL_BIAS_PORT1				0x168A14  #define _MG_PLL_BIAS_PORT2				0x169A14 @@ -9683,8 +9693,8 @@ enum skl_power_gate {  #define   MG_PLL_BIAS_VREF_RDAC_MASK			(0x7 << 5)  #define   MG_PLL_BIAS_IREFTRIM(x)			((x) << 0)  #define   MG_PLL_BIAS_IREFTRIM_MASK			(0x1f << 0) -#define MG_PLL_BIAS(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_BIAS_PORT1, \ -				     _MG_PLL_BIAS_PORT2) +#define MG_PLL_BIAS(tc_port) _MMIO_PORT((tc_port), _MG_PLL_BIAS_PORT1, \ +					_MG_PLL_BIAS_PORT2)  #define _MG_PLL_TDC_COLDST_BIAS_PORT1			0x168A18  #define _MG_PLL_TDC_COLDST_BIAS_PORT2			0x169A18 @@ -9695,9 +9705,9 @@ enum skl_power_gate {  #define   MG_PLL_TDC_COLDST_COLDSTART			(1 << 16)  #define   MG_PLL_TDC_TDCOVCCORR_EN			(1 << 2)  #define   MG_PLL_TDC_TDCSEL(x)				((x) << 0) -#define MG_PLL_TDC_COLDST_BIAS(port) _MMIO_PORT((port) - PORT_C, \ -						_MG_PLL_TDC_COLDST_BIAS_PORT1, \ -						_MG_PLL_TDC_COLDST_BIAS_PORT2) +#define MG_PLL_TDC_COLDST_BIAS(tc_port) _MMIO_PORT((tc_port), \ +						   _MG_PLL_TDC_COLDST_BIAS_PORT1, \ +						   _MG_PLL_TDC_COLDST_BIAS_PORT2)  #define _CNL_DPLL0_CFGCR0		0x6C000  #define _CNL_DPLL1_CFGCR0		0x6C080 diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index f941e40fd373..9ed5baf157a3 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -60,7 +60,7 @@ static bool i915_fence_signaled(struct dma_fence *fence)  static bool i915_fence_enable_signaling(struct dma_fence *fence)  { -	return intel_engine_enable_signaling(to_request(fence), true); +	return i915_request_enable_breadcrumb(to_request(fence));  }  static signed long i915_fence_wait(struct dma_fence *fence, @@ -182,10 +182,11 @@ static void free_capture_list(struct i915_request *request)  static void __retire_engine_request(struct intel_engine_cs *engine,  				    struct i915_request *rq)  { -	GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d\n", +	GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d:%d\n",  		  __func__, engine->name,  		  rq->fence.context, rq->fence.seqno,  		  rq->global_seqno, +		  hwsp_seqno(rq),  		  intel_engine_get_seqno(engine));  	GEM_BUG_ON(!i915_request_completed(rq)); @@ -198,10 +199,11 @@ static void __retire_engine_request(struct intel_engine_cs *engine,  	spin_unlock(&engine->timeline.lock);  	spin_lock(&rq->lock); +	i915_request_mark_complete(rq);  	if (!i915_request_signaled(rq))  		dma_fence_signal_locked(&rq->fence);  	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) -		intel_engine_cancel_signaling(rq); +		i915_request_cancel_breadcrumb(rq);  	if (rq->waitboost) {  		GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));  		atomic_dec(&rq->i915->gt_pm.rps.num_waiters); @@ -244,10 +246,11 @@ static void i915_request_retire(struct i915_request *request)  {  	struct i915_gem_active *active, *next; -	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n", +	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n",  		  request->engine->name,  		  request->fence.context, request->fence.seqno,  		  request->global_seqno, +		  hwsp_seqno(request),  		  intel_engine_get_seqno(request->engine));  	lockdep_assert_held(&request->i915->drm.struct_mutex); @@ -307,10 +310,11 @@ void i915_request_retire_upto(struct i915_request *rq)  	struct intel_ring *ring = rq->ring;  	struct i915_request *tmp; -	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n", +	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n",  		  rq->engine->name,  		  rq->fence.context, rq->fence.seqno,  		  rq->global_seqno, +		  hwsp_seqno(rq),  		  intel_engine_get_seqno(rq->engine));  	lockdep_assert_held(&rq->i915->drm.struct_mutex); @@ -329,7 +333,7 @@ void i915_request_retire_upto(struct i915_request *rq)  static u32 timeline_get_seqno(struct i915_timeline *tl)  { -	return ++tl->seqno; +	return tl->seqno += 1 + tl->has_initial_breadcrumb;  }  static void move_to_timeline(struct i915_request *request, @@ -355,10 +359,11 @@ void __i915_request_submit(struct i915_request *request)  	struct intel_engine_cs *engine = request->engine;  	u32 seqno; -	GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d\n", +	GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d:%d\n",  		  engine->name,  		  request->fence.context, request->fence.seqno,  		  engine->timeline.seqno + 1, +		  hwsp_seqno(request),  		  intel_engine_get_seqno(engine));  	GEM_BUG_ON(!irqs_disabled()); @@ -372,20 +377,21 @@ void __i915_request_submit(struct i915_request *request)  	/* We may be recursing from the signal callback of another i915 fence */  	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); +	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); +	set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);  	request->global_seqno = seqno; -	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) -		intel_engine_enable_signaling(request, false); +	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) && +	    !i915_request_enable_breadcrumb(request)) +		intel_engine_queue_breadcrumbs(engine);  	spin_unlock(&request->lock); -	engine->emit_breadcrumb(request, -				request->ring->vaddr + request->postfix); +	engine->emit_fini_breadcrumb(request, +				     request->ring->vaddr + request->postfix);  	/* Transfer from per-context onto the global per-engine timeline */  	move_to_timeline(request, &engine->timeline);  	trace_i915_request_execute(request); - -	wake_up_all(&request->execute);  }  void i915_request_submit(struct i915_request *request) @@ -405,10 +411,11 @@ void __i915_request_unsubmit(struct i915_request *request)  {  	struct intel_engine_cs *engine = request->engine; -	GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d\n", +	GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d:%d\n",  		  engine->name,  		  request->fence.context, request->fence.seqno,  		  request->global_seqno, +		  hwsp_seqno(request),  		  intel_engine_get_seqno(engine));  	GEM_BUG_ON(!irqs_disabled()); @@ -427,7 +434,9 @@ void __i915_request_unsubmit(struct i915_request *request)  	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);  	request->global_seqno = 0;  	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) -		intel_engine_cancel_signaling(request); +		i915_request_cancel_breadcrumb(request); +	GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); +	clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);  	spin_unlock(&request->lock);  	/* Transfer back from the global per-engine timeline to per-context */ @@ -616,6 +625,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)  	rq->ring = ce->ring;  	rq->timeline = ce->ring->timeline;  	GEM_BUG_ON(rq->timeline == &engine->timeline); +	rq->hwsp_seqno = rq->timeline->hwsp_seqno;  	spin_lock_init(&rq->lock);  	dma_fence_init(&rq->fence, @@ -626,13 +636,11 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)  	/* We bump the ref for the fence chain */  	i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify); -	init_waitqueue_head(&rq->execute);  	i915_sched_node_init(&rq->sched);  	/* No zalloc, must clear what we need by hand */  	rq->global_seqno = 0; -	rq->signaling.wait.seqno = 0;  	rq->file_priv = NULL;  	rq->batch = NULL;  	rq->capture_list = NULL; @@ -650,7 +658,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)  	 * around inside i915_request_add() there is sufficient space at  	 * the beginning of the ring as well.  	 */ -	rq->reserved_space = 2 * engine->emit_breadcrumb_sz * sizeof(u32); +	rq->reserved_space = 2 * engine->emit_fini_breadcrumb_dw * sizeof(u32);  	/*  	 * Record the position of the start of the request so that @@ -901,7 +909,7 @@ void i915_request_add(struct i915_request *request)  	 * GPU processing the request, we never over-estimate the  	 * position of the ring's HEAD.  	 */ -	cs = intel_ring_begin(request, engine->emit_breadcrumb_sz); +	cs = intel_ring_begin(request, engine->emit_fini_breadcrumb_dw);  	GEM_BUG_ON(IS_ERR(cs));  	request->postfix = intel_ring_offset(request, cs); @@ -1023,13 +1031,10 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu)  	return this_cpu != cpu;  } -static bool __i915_spin_request(const struct i915_request *rq, -				u32 seqno, int state, unsigned long timeout_us) +static bool __i915_spin_request(const struct i915_request * const rq, +				int state, unsigned long timeout_us)  { -	struct intel_engine_cs *engine = rq->engine; -	unsigned int irq, cpu; - -	GEM_BUG_ON(!seqno); +	unsigned int cpu;  	/*  	 * Only wait for the request if we know it is likely to complete. @@ -1037,12 +1042,12 @@ static bool __i915_spin_request(const struct i915_request *rq,  	 * We don't track the timestamps around requests, nor the average  	 * request length, so we do not have a good indicator that this  	 * request will complete within the timeout. What we do know is the -	 * order in which requests are executed by the engine and so we can -	 * tell if the request has started. If the request hasn't started yet, -	 * it is a fair assumption that it will not complete within our -	 * relatively short timeout. +	 * order in which requests are executed by the context and so we can +	 * tell if the request has been started. If the request is not even +	 * running yet, it is a fair assumption that it will not complete +	 * within our relatively short timeout.  	 */ -	if (!intel_engine_has_started(engine, seqno)) +	if (!i915_request_is_running(rq))  		return false;  	/* @@ -1056,20 +1061,10 @@ static bool __i915_spin_request(const struct i915_request *rq,  	 * takes to sleep on a request, on the order of a microsecond.  	 */ -	irq = READ_ONCE(engine->breadcrumbs.irq_count);  	timeout_us += local_clock_us(&cpu);  	do { -		if (intel_engine_has_completed(engine, seqno)) -			return seqno == i915_request_global_seqno(rq); - -		/* -		 * Seqno are meant to be ordered *before* the interrupt. If -		 * we see an interrupt without a corresponding seqno advance, -		 * assume we won't see one in the near future but require -		 * the engine->seqno_barrier() to fixup coherency. -		 */ -		if (READ_ONCE(engine->breadcrumbs.irq_count) != irq) -			break; +		if (i915_request_completed(rq)) +			return true;  		if (signal_pending_state(state, current))  			break; @@ -1083,16 +1078,16 @@ static bool __i915_spin_request(const struct i915_request *rq,  	return false;  } -static bool __i915_wait_request_check_and_reset(struct i915_request *request) -{ -	struct i915_gpu_error *error = &request->i915->gpu_error; +struct request_wait { +	struct dma_fence_cb cb; +	struct task_struct *tsk; +}; -	if (likely(!i915_reset_handoff(error))) -		return false; +static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb) +{ +	struct request_wait *wait = container_of(cb, typeof(*wait), cb); -	__set_current_state(TASK_RUNNING); -	i915_reset(request->i915, error->stalled_mask, error->reason); -	return true; +	wake_up_process(wait->tsk);  }  /** @@ -1120,17 +1115,9 @@ long i915_request_wait(struct i915_request *rq,  {  	const int state = flags & I915_WAIT_INTERRUPTIBLE ?  		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; -	wait_queue_head_t *errq = &rq->i915->gpu_error.wait_queue; -	DEFINE_WAIT_FUNC(reset, default_wake_function); -	DEFINE_WAIT_FUNC(exec, default_wake_function); -	struct intel_wait wait; +	struct request_wait wait;  	might_sleep(); -#if IS_ENABLED(CONFIG_LOCKDEP) -	GEM_BUG_ON(debug_locks && -		   !!lockdep_is_held(&rq->i915->drm.struct_mutex) != -		   !!(flags & I915_WAIT_LOCKED)); -#endif  	GEM_BUG_ON(timeout < 0);  	if (i915_request_completed(rq)) @@ -1141,57 +1128,23 @@ long i915_request_wait(struct i915_request *rq,  	trace_i915_request_wait_begin(rq, flags); -	add_wait_queue(&rq->execute, &exec); -	if (flags & I915_WAIT_LOCKED) -		add_wait_queue(errq, &reset); +	/* Optimistic short spin before touching IRQs */ +	if (__i915_spin_request(rq, state, 5)) +		goto out; -	intel_wait_init(&wait);  	if (flags & I915_WAIT_PRIORITY)  		i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT); -restart: -	do { -		set_current_state(state); -		if (intel_wait_update_request(&wait, rq)) -			break; - -		if (flags & I915_WAIT_LOCKED && -		    __i915_wait_request_check_and_reset(rq)) -			continue; - -		if (signal_pending_state(state, current)) { -			timeout = -ERESTARTSYS; -			goto complete; -		} - -		if (!timeout) { -			timeout = -ETIME; -			goto complete; -		} - -		timeout = io_schedule_timeout(timeout); -	} while (1); - -	GEM_BUG_ON(!intel_wait_has_seqno(&wait)); -	GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); - -	/* Optimistic short spin before touching IRQs */ -	if (__i915_spin_request(rq, wait.seqno, state, 5)) -		goto complete; +	wait.tsk = current; +	if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake)) +		goto out; -	set_current_state(state); -	if (intel_engine_add_wait(rq->engine, &wait)) -		/* -		 * In order to check that we haven't missed the interrupt -		 * as we enabled it, we need to kick ourselves to do a -		 * coherent check on the seqno before we sleep. -		 */ -		goto wakeup; +	for (;;) { +		set_current_state(state); -	if (flags & I915_WAIT_LOCKED) -		__i915_wait_request_check_and_reset(rq); +		if (i915_request_completed(rq)) +			break; -	for (;;) {  		if (signal_pending_state(state, current)) {  			timeout = -ERESTARTSYS;  			break; @@ -1203,50 +1156,13 @@ restart:  		}  		timeout = io_schedule_timeout(timeout); - -		if (intel_wait_complete(&wait) && -		    intel_wait_check_request(&wait, rq)) -			break; - -		set_current_state(state); - -wakeup: -		if (i915_request_completed(rq)) -			break; - -		/* -		 * If the GPU is hung, and we hold the lock, reset the GPU -		 * and then check for completion. On a full reset, the engine's -		 * HW seqno will be advanced passed us and we are complete. -		 * If we do a partial reset, we have to wait for the GPU to -		 * resume and update the breadcrumb. -		 * -		 * If we don't hold the mutex, we can just wait for the worker -		 * to come along and update the breadcrumb (either directly -		 * itself, or indirectly by recovering the GPU). -		 */ -		if (flags & I915_WAIT_LOCKED && -		    __i915_wait_request_check_and_reset(rq)) -			continue; - -		/* Only spin if we know the GPU is processing this request */ -		if (__i915_spin_request(rq, wait.seqno, state, 2)) -			break; - -		if (!intel_wait_check_request(&wait, rq)) { -			intel_engine_remove_wait(rq->engine, &wait); -			goto restart; -		}  	} - -	intel_engine_remove_wait(rq->engine, &wait); -complete:  	__set_current_state(TASK_RUNNING); -	if (flags & I915_WAIT_LOCKED) -		remove_wait_queue(errq, &reset); -	remove_wait_queue(&rq->execute, &exec); -	trace_i915_request_wait_end(rq); +	dma_fence_remove_callback(&rq->fence, &wait.cb); + +out: +	trace_i915_request_wait_end(rq);  	return timeout;  } diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index c0f084ca4f29..3cffb96203b9 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -38,23 +38,34 @@ struct drm_i915_gem_object;  struct i915_request;  struct i915_timeline; -struct intel_wait { -	struct rb_node node; -	struct task_struct *tsk; -	struct i915_request *request; -	u32 seqno; -}; - -struct intel_signal_node { -	struct intel_wait wait; -	struct list_head link; -}; -  struct i915_capture_list {  	struct i915_capture_list *next;  	struct i915_vma *vma;  }; +enum { +	/* +	 * I915_FENCE_FLAG_ACTIVE - this request is currently submitted to HW. +	 * +	 * Set by __i915_request_submit() on handing over to HW, and cleared +	 * by __i915_request_unsubmit() if we preempt this request. +	 * +	 * Finally cleared for consistency on retiring the request, when +	 * we know the HW is no longer running this request. +	 * +	 * See i915_request_is_active() +	 */ +	I915_FENCE_FLAG_ACTIVE = DMA_FENCE_FLAG_USER_BITS, + +	/* +	 * I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list +	 * +	 * Internal bookkeeping used by the breadcrumb code to track when +	 * a request is on the various signal_list. +	 */ +	I915_FENCE_FLAG_SIGNAL, +}; +  /**   * Request queue structure.   * @@ -97,7 +108,7 @@ struct i915_request {  	struct intel_context *hw_context;  	struct intel_ring *ring;  	struct i915_timeline *timeline; -	struct intel_signal_node signaling; +	struct list_head signal_link;  	/*  	 * The rcu epoch of when this request was allocated. Used to judiciously @@ -116,7 +127,6 @@ struct i915_request {  	 */  	struct i915_sw_fence submit;  	wait_queue_entry_t submitq; -	wait_queue_head_t execute;  	/*  	 * A list of everyone we wait upon, and everyone who waits upon us. @@ -130,6 +140,13 @@ struct i915_request {  	struct i915_sched_node sched;  	struct i915_dependency dep; +	/* +	 * A convenience pointer to the current breadcrumb value stored in +	 * the HW status page (or our timeline's local equivalent). The full +	 * path would be rq->hw_context->ring->timeline->hwsp_seqno. +	 */ +	const u32 *hwsp_seqno; +  	/**  	 * GEM sequence number associated with this request on the  	 * global execution timeline. It is zero when the request is not @@ -248,7 +265,7 @@ i915_request_put(struct i915_request *rq)   * that it has passed the global seqno and the global seqno is unchanged   * after the read, it is indeed complete).   */ -static u32 +static inline u32  i915_request_global_seqno(const struct i915_request *request)  {  	return READ_ONCE(request->global_seqno); @@ -270,6 +287,10 @@ void i915_request_skip(struct i915_request *request, int error);  void __i915_request_unsubmit(struct i915_request *request);  void i915_request_unsubmit(struct i915_request *request); +/* Note: part of the intel_breadcrumbs family */ +bool i915_request_enable_breadcrumb(struct i915_request *request); +void i915_request_cancel_breadcrumb(struct i915_request *request); +  long i915_request_wait(struct i915_request *rq,  		       unsigned int flags,  		       long timeout) @@ -282,13 +303,14 @@ long i915_request_wait(struct i915_request *rq,  static inline bool i915_request_signaled(const struct i915_request *rq)  { +	/* The request may live longer than its HWSP, so check flags first! */  	return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags);  } -static inline bool intel_engine_has_started(struct intel_engine_cs *engine, -					    u32 seqno); -static inline bool intel_engine_has_completed(struct intel_engine_cs *engine, -					      u32 seqno); +static inline bool i915_request_is_active(const struct i915_request *rq) +{ +	return test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); +}  /**   * Returns true if seq1 is later than seq2. @@ -298,6 +320,40 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2)  	return (s32)(seq1 - seq2) >= 0;  } +static inline u32 __hwsp_seqno(const struct i915_request *rq) +{ +	return READ_ONCE(*rq->hwsp_seqno); +} + +/** + * hwsp_seqno - the current breadcrumb value in the HW status page + * @rq: the request, to chase the relevant HW status page + * + * The emphasis in naming here is that hwsp_seqno() is not a property of the + * request, but an indication of the current HW state (associated with this + * request). Its value will change as the GPU executes more requests. + * + * Returns the current breadcrumb value in the associated HW status page (or + * the local timeline's equivalent) for this request. The request itself + * has the associated breadcrumb value of rq->fence.seqno, when the HW + * status page has that breadcrumb or later, this request is complete. + */ +static inline u32 hwsp_seqno(const struct i915_request *rq) +{ +	u32 seqno; + +	rcu_read_lock(); /* the HWSP may be freed at runtime */ +	seqno = __hwsp_seqno(rq); +	rcu_read_unlock(); + +	return seqno; +} + +static inline bool __i915_request_has_started(const struct i915_request *rq) +{ +	return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1); +} +  /**   * i915_request_started - check if the request has begun being executed   * @rq: the request @@ -309,32 +365,40 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2)   */  static inline bool i915_request_started(const struct i915_request *rq)  { -	u32 seqno; +	if (i915_request_signaled(rq)) +		return true; -	seqno = i915_request_global_seqno(rq); -	if (!seqno) /* not yet submitted to HW */ -		return false; - -	return intel_engine_has_started(rq->engine, seqno); +	/* Remember: started but may have since been preempted! */ +	return __i915_request_has_started(rq);  } -static inline bool -__i915_request_completed(const struct i915_request *rq, u32 seqno) +/** + * i915_request_is_running - check if the request may actually be executing + * @rq: the request + * + * Returns true if the request is currently submitted to hardware, has passed + * its start point (i.e. the context is setup and not busywaiting). Note that + * it may no longer be running by the time the function returns! + */ +static inline bool i915_request_is_running(const struct i915_request *rq)  { -	GEM_BUG_ON(!seqno); -	return intel_engine_has_completed(rq->engine, seqno) && -		seqno == i915_request_global_seqno(rq); +	if (!i915_request_is_active(rq)) +		return false; + +	return __i915_request_has_started(rq);  }  static inline bool i915_request_completed(const struct i915_request *rq)  { -	u32 seqno; +	if (i915_request_signaled(rq)) +		return true; -	seqno = i915_request_global_seqno(rq); -	if (!seqno) -		return false; +	return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno); +} -	return __i915_request_completed(rq, seqno); +static inline void i915_request_mark_complete(struct i915_request *rq) +{ +	rq->hwsp_seqno = (u32 *)&rq->fence.seqno; /* decouple from HWSP */  }  void i915_retire_requests(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c index 342d9ee42601..4462007a681c 100644 --- a/drivers/gpu/drm/i915/i915_reset.c +++ b/drivers/gpu/drm/i915/i915_reset.c @@ -5,6 +5,7 @@   */  #include <linux/sched/mm.h> +#include <linux/stop_machine.h>  #include "i915_drv.h"  #include "i915_gpu_error.h" @@ -12,27 +13,33 @@  #include "intel_guc.h" +#define RESET_MAX_RETRIES 3 + +/* XXX How to handle concurrent GGTT updates using tiling registers? */ +#define RESET_UNDER_STOP_MACHINE 0 +  static void engine_skip_context(struct i915_request *rq)  {  	struct intel_engine_cs *engine = rq->engine;  	struct i915_gem_context *hung_ctx = rq->gem_context;  	struct i915_timeline *timeline = rq->timeline; -	unsigned long flags; +	lockdep_assert_held(&engine->timeline.lock);  	GEM_BUG_ON(timeline == &engine->timeline); -	spin_lock_irqsave(&engine->timeline.lock, flags);  	spin_lock(&timeline->lock); -	list_for_each_entry_continue(rq, &engine->timeline.requests, link) -		if (rq->gem_context == hung_ctx) -			i915_request_skip(rq, -EIO); +	if (i915_request_is_active(rq)) { +		list_for_each_entry_continue(rq, +					     &engine->timeline.requests, link) +			if (rq->gem_context == hung_ctx) +				i915_request_skip(rq, -EIO); +	}  	list_for_each_entry(rq, &timeline->requests, link)  		i915_request_skip(rq, -EIO);  	spin_unlock(&timeline->lock); -	spin_unlock_irqrestore(&engine->timeline.lock, flags);  }  static void client_mark_guilty(struct drm_i915_file_private *file_priv, @@ -59,7 +66,7 @@ static void client_mark_guilty(struct drm_i915_file_private *file_priv,  	}  } -static void context_mark_guilty(struct i915_gem_context *ctx) +static bool context_mark_guilty(struct i915_gem_context *ctx)  {  	unsigned int score;  	bool banned, bannable; @@ -72,7 +79,7 @@ static void context_mark_guilty(struct i915_gem_context *ctx)  	/* Cool contexts don't accumulate client ban score */  	if (!bannable) -		return; +		return false;  	if (banned) {  		DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n", @@ -83,6 +90,8 @@ static void context_mark_guilty(struct i915_gem_context *ctx)  	if (!IS_ERR_OR_NULL(ctx->file_priv))  		client_mark_guilty(ctx->file_priv, ctx); + +	return banned;  }  static void context_mark_innocent(struct i915_gem_context *ctx) @@ -90,6 +99,21 @@ static void context_mark_innocent(struct i915_gem_context *ctx)  	atomic_inc(&ctx->active_count);  } +void i915_reset_request(struct i915_request *rq, bool guilty) +{ +	lockdep_assert_held(&rq->engine->timeline.lock); +	GEM_BUG_ON(i915_request_completed(rq)); + +	if (guilty) { +		i915_request_skip(rq, -EIO); +		if (context_mark_guilty(rq->gem_context)) +			engine_skip_context(rq); +	} else { +		dma_fence_set_error(&rq->fence, -EAGAIN); +		context_mark_innocent(rq->gem_context); +	} +} +  static void gen3_stop_engine(struct intel_engine_cs *engine)  {  	struct drm_i915_private *dev_priv = engine->i915; @@ -144,14 +168,14 @@ static int i915_do_reset(struct drm_i915_private *i915,  	/* Assert reset for at least 20 usec, and wait for acknowledgement. */  	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); -	usleep_range(50, 200); -	err = wait_for(i915_in_reset(pdev), 500); +	udelay(50); +	err = wait_for_atomic(i915_in_reset(pdev), 50);  	/* Clear the reset request. */  	pci_write_config_byte(pdev, I915_GDRST, 0); -	usleep_range(50, 200); +	udelay(50);  	if (!err) -		err = wait_for(!i915_in_reset(pdev), 500); +		err = wait_for_atomic(!i915_in_reset(pdev), 50);  	return err;  } @@ -171,7 +195,7 @@ static int g33_do_reset(struct drm_i915_private *i915,  	struct pci_dev *pdev = i915->drm.pdev;  	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); -	return wait_for(g4x_reset_complete(pdev), 500); +	return wait_for_atomic(g4x_reset_complete(pdev), 50);  }  static int g4x_do_reset(struct drm_i915_private *dev_priv, @@ -182,13 +206,13 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv,  	int ret;  	/* WaVcpClkGateDisableForMediaReset:ctg,elk */ -	I915_WRITE(VDECCLK_GATE_D, -		   I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE); -	POSTING_READ(VDECCLK_GATE_D); +	I915_WRITE_FW(VDECCLK_GATE_D, +		      I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE); +	POSTING_READ_FW(VDECCLK_GATE_D);  	pci_write_config_byte(pdev, I915_GDRST,  			      GRDOM_MEDIA | GRDOM_RESET_ENABLE); -	ret =  wait_for(g4x_reset_complete(pdev), 500); +	ret =  wait_for_atomic(g4x_reset_complete(pdev), 50);  	if (ret) {  		DRM_DEBUG_DRIVER("Wait for media reset failed\n");  		goto out; @@ -196,7 +220,7 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv,  	pci_write_config_byte(pdev, I915_GDRST,  			      GRDOM_RENDER | GRDOM_RESET_ENABLE); -	ret =  wait_for(g4x_reset_complete(pdev), 500); +	ret =  wait_for_atomic(g4x_reset_complete(pdev), 50);  	if (ret) {  		DRM_DEBUG_DRIVER("Wait for render reset failed\n");  		goto out; @@ -205,9 +229,9 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv,  out:  	pci_write_config_byte(pdev, I915_GDRST, 0); -	I915_WRITE(VDECCLK_GATE_D, -		   I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE); -	POSTING_READ(VDECCLK_GATE_D); +	I915_WRITE_FW(VDECCLK_GATE_D, +		      I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE); +	POSTING_READ_FW(VDECCLK_GATE_D);  	return ret;  } @@ -218,27 +242,29 @@ static int ironlake_do_reset(struct drm_i915_private *dev_priv,  {  	int ret; -	I915_WRITE(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE); -	ret = intel_wait_for_register(dev_priv, -				      ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0, -				      500); +	I915_WRITE_FW(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE); +	ret = __intel_wait_for_register_fw(dev_priv, ILK_GDSR, +					   ILK_GRDOM_RESET_ENABLE, 0, +					   5000, 0, +					   NULL);  	if (ret) {  		DRM_DEBUG_DRIVER("Wait for render reset failed\n");  		goto out;  	} -	I915_WRITE(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE); -	ret = intel_wait_for_register(dev_priv, -				      ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0, -				      500); +	I915_WRITE_FW(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE); +	ret = __intel_wait_for_register_fw(dev_priv, ILK_GDSR, +					   ILK_GRDOM_RESET_ENABLE, 0, +					   5000, 0, +					   NULL);  	if (ret) {  		DRM_DEBUG_DRIVER("Wait for media reset failed\n");  		goto out;  	}  out: -	I915_WRITE(ILK_GDSR, 0); -	POSTING_READ(ILK_GDSR); +	I915_WRITE_FW(ILK_GDSR, 0); +	POSTING_READ_FW(ILK_GDSR);  	return ret;  } @@ -527,32 +553,21 @@ static reset_func intel_get_gpu_reset(struct drm_i915_private *i915)  int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask)  { -	reset_func reset = intel_get_gpu_reset(i915); +	const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1; +	reset_func reset; +	int ret = -ETIMEDOUT;  	int retry; -	int ret; -	/* -	 * We want to perform per-engine reset from atomic context (e.g. -	 * softirq), which imposes the constraint that we cannot sleep. -	 * However, experience suggests that spending a bit of time waiting -	 * for a reset helps in various cases, so for a full-device reset -	 * we apply the opposite rule and wait if we want to. As we should -	 * always follow up a failed per-engine reset with a full device reset, -	 * being a little faster, stricter and more error prone for the -	 * atomic case seems an acceptable compromise. -	 * -	 * Unfortunately this leads to a bimodal routine, when the goal was -	 * to have a single reset function that worked for resetting any -	 * number of engines simultaneously. -	 */ -	might_sleep_if(engine_mask == ALL_ENGINES); +	reset = intel_get_gpu_reset(i915); +	if (!reset) +		return -ENODEV;  	/*  	 * If the power well sleeps during the reset, the reset  	 * request may be dropped and never completes (causing -EIO).  	 */  	intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); -	for (retry = 0; retry < 3; retry++) { +	for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) {  		/*  		 * We stop engines, otherwise we might get failed reset and a  		 * dead gpu (on elk). Also as modern gpu as kbl can suffer @@ -569,15 +584,10 @@ int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask)  		 */  		i915_stop_engines(i915, engine_mask); -		ret = -ENODEV; -		if (reset) { -			GEM_TRACE("engine_mask=%x\n", engine_mask); -			ret = reset(i915, engine_mask, retry); -		} -		if (ret != -ETIMEDOUT || engine_mask != ALL_ENGINES) -			break; - -		cond_resched(); +		GEM_TRACE("engine_mask=%x\n", engine_mask); +		preempt_disable(); +		ret = reset(i915, engine_mask, retry); +		preempt_enable();  	}  	intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); @@ -586,6 +596,9 @@ int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask)  bool intel_has_gpu_reset(struct drm_i915_private *i915)  { +	if (USES_GUC(i915)) +		return false; +  	return intel_get_gpu_reset(i915);  } @@ -613,11 +626,8 @@ int intel_reset_guc(struct drm_i915_private *i915)   * Ensure irq handler finishes, and not run again.   * Also return the active request so that we only search for it once.   */ -static struct i915_request * -reset_prepare_engine(struct intel_engine_cs *engine) +static void reset_prepare_engine(struct intel_engine_cs *engine)  { -	struct i915_request *rq; -  	/*  	 * During the reset sequence, we must prevent the engine from  	 * entering RC6. As the context state is undefined until we restart @@ -626,162 +636,85 @@ reset_prepare_engine(struct intel_engine_cs *engine)  	 * GPU state upon resume, i.e. fail to restart after a reset.  	 */  	intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); - -	rq = engine->reset.prepare(engine); -	if (rq && rq->fence.error == -EIO) -		rq = ERR_PTR(-EIO); /* Previous reset failed! */ - -	return rq; +	engine->reset.prepare(engine);  } -static int reset_prepare(struct drm_i915_private *i915) +static void reset_prepare(struct drm_i915_private *i915)  {  	struct intel_engine_cs *engine; -	struct i915_request *rq;  	enum intel_engine_id id; -	int err = 0; - -	for_each_engine(engine, i915, id) { -		rq = reset_prepare_engine(engine); -		if (IS_ERR(rq)) { -			err = PTR_ERR(rq); -			continue; -		} -		engine->hangcheck.active_request = rq; -	} +	for_each_engine(engine, i915, id) +		reset_prepare_engine(engine); -	i915_gem_revoke_fences(i915);  	intel_uc_sanitize(i915); - -	return err;  } -/* Returns the request if it was guilty of the hang */ -static struct i915_request * -reset_request(struct intel_engine_cs *engine, -	      struct i915_request *rq, -	      bool stalled) +static int gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask)  { +	struct intel_engine_cs *engine; +	enum intel_engine_id id; +	int err; +  	/* -	 * The guilty request will get skipped on a hung engine. -	 * -	 * Users of client default contexts do not rely on logical -	 * state preserved between batches so it is safe to execute -	 * queued requests following the hang. Non default contexts -	 * rely on preserved state, so skipping a batch loses the -	 * evolution of the state and it needs to be considered corrupted. -	 * Executing more queued batches on top of corrupted state is -	 * risky. But we take the risk by trying to advance through -	 * the queued requests in order to make the client behaviour -	 * more predictable around resets, by not throwing away random -	 * amount of batches it has prepared for execution. Sophisticated -	 * clients can use gem_reset_stats_ioctl and dma fence status -	 * (exported via sync_file info ioctl on explicit fences) to observe -	 * when it loses the context state and should rebuild accordingly. -	 * -	 * The context ban, and ultimately the client ban, mechanism are safety -	 * valves if client submission ends up resulting in nothing more than -	 * subsequent hangs. +	 * Everything depends on having the GTT running, so we need to start +	 * there.  	 */ +	err = i915_ggtt_enable_hw(i915); +	if (err) +		return err; -	if (i915_request_completed(rq)) { -		GEM_TRACE("%s pardoned global=%d (fence %llx:%lld), current %d\n", -			  engine->name, rq->global_seqno, -			  rq->fence.context, rq->fence.seqno, -			  intel_engine_get_seqno(engine)); -		stalled = false; -	} - -	if (stalled) { -		context_mark_guilty(rq->gem_context); -		i915_request_skip(rq, -EIO); +	for_each_engine(engine, i915, id) +		intel_engine_reset(engine, stalled_mask & ENGINE_MASK(id)); -		/* If this context is now banned, skip all pending requests. */ -		if (i915_gem_context_is_banned(rq->gem_context)) -			engine_skip_context(rq); -	} else { -		/* -		 * Since this is not the hung engine, it may have advanced -		 * since the hang declaration. Double check by refinding -		 * the active request at the time of the reset. -		 */ -		rq = i915_gem_find_active_request(engine); -		if (rq) { -			unsigned long flags; - -			context_mark_innocent(rq->gem_context); -			dma_fence_set_error(&rq->fence, -EAGAIN); - -			/* Rewind the engine to replay the incomplete rq */ -			spin_lock_irqsave(&engine->timeline.lock, flags); -			rq = list_prev_entry(rq, link); -			if (&rq->link == &engine->timeline.requests) -				rq = NULL; -			spin_unlock_irqrestore(&engine->timeline.lock, flags); -		} -	} +	i915_gem_restore_fences(i915); -	return rq; +	return err;  } -static void reset_engine(struct intel_engine_cs *engine, -			 struct i915_request *rq, -			 bool stalled) +static void reset_finish_engine(struct intel_engine_cs *engine)  { -	if (rq) -		rq = reset_request(engine, rq, stalled); - -	/* Setup the CS to resume from the breadcrumb of the hung request */ -	engine->reset.reset(engine, rq); +	engine->reset.finish(engine); +	intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);  } -static void gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask) +struct i915_gpu_restart { +	struct work_struct work; +	struct drm_i915_private *i915; +}; + +static void restart_work(struct work_struct *work)  { +	struct i915_gpu_restart *arg = container_of(work, typeof(*arg), work); +	struct drm_i915_private *i915 = arg->i915;  	struct intel_engine_cs *engine;  	enum intel_engine_id id; +	intel_wakeref_t wakeref; -	lockdep_assert_held(&i915->drm.struct_mutex); - -	i915_retire_requests(i915); +	wakeref = intel_runtime_pm_get(i915); +	mutex_lock(&i915->drm.struct_mutex); +	WRITE_ONCE(i915->gpu_error.restart, NULL);  	for_each_engine(engine, i915, id) { -		struct intel_context *ce; - -		reset_engine(engine, -			     engine->hangcheck.active_request, -			     stalled_mask & ENGINE_MASK(id)); -		ce = fetch_and_zero(&engine->last_retired_context); -		if (ce) -			intel_context_unpin(ce); +		struct i915_request *rq;  		/*  		 * Ostensibily, we always want a context loaded for powersaving,  		 * so if the engine is idle after the reset, send a request  		 * to load our scratch kernel_context. -		 * -		 * More mysteriously, if we leave the engine idle after a reset, -		 * the next userspace batch may hang, with what appears to be -		 * an incoherent read by the CS (presumably stale TLB). An -		 * empty request appears sufficient to paper over the glitch.  		 */ -		if (intel_engine_is_idle(engine)) { -			struct i915_request *rq; +		if (!intel_engine_is_idle(engine)) +			continue; -			rq = i915_request_alloc(engine, i915->kernel_context); -			if (!IS_ERR(rq)) -				i915_request_add(rq); -		} +		rq = i915_request_alloc(engine, i915->kernel_context); +		if (!IS_ERR(rq)) +			i915_request_add(rq);  	} -	i915_gem_restore_fences(i915); -} - -static void reset_finish_engine(struct intel_engine_cs *engine) -{ -	engine->reset.finish(engine); +	mutex_unlock(&i915->drm.struct_mutex); +	intel_runtime_pm_put(i915, wakeref); -	intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); +	kfree(arg);  }  static void reset_finish(struct drm_i915_private *i915) @@ -789,27 +722,49 @@ static void reset_finish(struct drm_i915_private *i915)  	struct intel_engine_cs *engine;  	enum intel_engine_id id; -	lockdep_assert_held(&i915->drm.struct_mutex); - -	for_each_engine(engine, i915, id) { -		engine->hangcheck.active_request = NULL; +	for_each_engine(engine, i915, id)  		reset_finish_engine(engine); +} + +static void reset_restart(struct drm_i915_private *i915) +{ +	struct i915_gpu_restart *arg; + +	/* +	 * Following the reset, ensure that we always reload context for +	 * powersaving, and to correct engine->last_retired_context. Since +	 * this requires us to submit a request, queue a worker to do that +	 * task for us to evade any locking here. +	 */ +	if (READ_ONCE(i915->gpu_error.restart)) +		return; + +	arg = kmalloc(sizeof(*arg), GFP_KERNEL); +	if (arg) { +		arg->i915 = i915; +		INIT_WORK(&arg->work, restart_work); + +		WRITE_ONCE(i915->gpu_error.restart, arg); +		queue_work(i915->wq, &arg->work);  	}  }  static void nop_submit_request(struct i915_request *request)  { +	struct intel_engine_cs *engine = request->engine;  	unsigned long flags;  	GEM_TRACE("%s fence %llx:%lld -> -EIO\n", -		  request->engine->name, -		  request->fence.context, request->fence.seqno); +		  engine->name, request->fence.context, request->fence.seqno);  	dma_fence_set_error(&request->fence, -EIO); -	spin_lock_irqsave(&request->engine->timeline.lock, flags); +	spin_lock_irqsave(&engine->timeline.lock, flags);  	__i915_request_submit(request); -	intel_engine_write_global_seqno(request->engine, request->global_seqno); -	spin_unlock_irqrestore(&request->engine->timeline.lock, flags); +	i915_request_mark_complete(request); +	intel_engine_write_global_seqno(engine, request->global_seqno); +	spin_unlock_irqrestore(&engine->timeline.lock, flags); + +	intel_engine_queue_breadcrumbs(engine);  }  void i915_gem_set_wedged(struct drm_i915_private *i915) @@ -864,7 +819,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)  	for_each_engine(engine, i915, id) {  		reset_finish_engine(engine); -		intel_engine_wakeup(engine); +		intel_engine_signal_breadcrumbs(engine);  	}  	smp_mb__before_atomic(); @@ -882,8 +837,6 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)  	struct i915_timeline *tl;  	bool ret = false; -	lockdep_assert_held(&i915->drm.struct_mutex); -  	if (!test_bit(I915_WEDGED, &error->flags))  		return true; @@ -904,11 +857,12 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)  	 *  	 * No more can be submitted until we reset the wedged bit.  	 */ -	list_for_each_entry(tl, &i915->gt.timelines, link) { +	mutex_lock(&i915->gt.timelines.mutex); +	list_for_each_entry(tl, &i915->gt.timelines.active_list, link) {  		struct i915_request *rq; +		long timeout; -		rq = i915_gem_active_peek(&tl->last_request, -					  &i915->drm.struct_mutex); +		rq = i915_gem_active_get_unlocked(&tl->last_request);  		if (!rq)  			continue; @@ -923,12 +877,15 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)  		 * and when the seqno passes the fence, the signaler  		 * then signals the fence waking us up).  		 */ -		if (dma_fence_default_wait(&rq->fence, true, -					   MAX_SCHEDULE_TIMEOUT) < 0) +		timeout = dma_fence_default_wait(&rq->fence, true, +						 MAX_SCHEDULE_TIMEOUT); +		i915_request_put(rq); +		if (timeout < 0) { +			mutex_unlock(&i915->gt.timelines.mutex);  			goto unlock; +		}  	} -	i915_retire_requests(i915); -	GEM_BUG_ON(i915->gt.active_requests); +	mutex_unlock(&i915->gt.timelines.mutex);  	intel_engines_sanitize(i915, false); @@ -942,7 +899,6 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)  	 * context and do not require stop_machine().  	 */  	intel_engines_reset_default_submission(i915); -	i915_gem_contexts_lost(i915);  	GEM_TRACE("end\n"); @@ -955,6 +911,52 @@ unlock:  	return ret;  } +struct __i915_reset { +	struct drm_i915_private *i915; +	unsigned int stalled_mask; +}; + +static int __i915_reset__BKL(void *data) +{ +	struct __i915_reset *arg = data; +	int err; + +	err = intel_gpu_reset(arg->i915, ALL_ENGINES); +	if (err) +		return err; + +	return gt_reset(arg->i915, arg->stalled_mask); +} + +#if RESET_UNDER_STOP_MACHINE +/* + * XXX An alternative to using stop_machine would be to park only the + * processes that have a GGTT mmap. By remote parking the threads (SIGSTOP) + * we should be able to prevent their memmory accesses via the lost fence + * registers over the course of the reset without the potential recursive + * of mutexes between the pagefault handler and reset. + * + * See igt/gem_mmap_gtt/hang + */ +#define __do_reset(fn, arg) stop_machine(fn, arg, NULL) +#else +#define __do_reset(fn, arg) fn(arg) +#endif + +static int do_reset(struct drm_i915_private *i915, unsigned int stalled_mask) +{ +	struct __i915_reset arg = { i915, stalled_mask }; +	int err, i; + +	err = __do_reset(__i915_reset__BKL, &arg); +	for (i = 0; err && i < RESET_MAX_RETRIES; i++) { +		msleep(100); +		err = __do_reset(__i915_reset__BKL, &arg); +	} + +	return err; +} +  /**   * i915_reset - reset chip after a hang   * @i915: #drm_i915_private to reset @@ -980,31 +982,22 @@ void i915_reset(struct drm_i915_private *i915,  {  	struct i915_gpu_error *error = &i915->gpu_error;  	int ret; -	int i;  	GEM_TRACE("flags=%lx\n", error->flags);  	might_sleep(); -	lockdep_assert_held(&i915->drm.struct_mutex);  	assert_rpm_wakelock_held(i915);  	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags)); -	if (!test_bit(I915_RESET_HANDOFF, &error->flags)) -		return; -  	/* Clear any previous failed attempts at recovery. Time to try again. */  	if (!i915_gem_unset_wedged(i915)) -		goto wakeup; +		return;  	if (reason)  		dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason);  	error->reset_count++; -	ret = reset_prepare(i915); -	if (ret) { -		dev_err(i915->drm.dev, "GPU recovery failed\n"); -		goto taint; -	} +	reset_prepare(i915);  	if (!intel_has_gpu_reset(i915)) {  		if (i915_modparams.reset) @@ -1014,32 +1007,11 @@ void i915_reset(struct drm_i915_private *i915,  		goto error;  	} -	for (i = 0; i < 3; i++) { -		ret = intel_gpu_reset(i915, ALL_ENGINES); -		if (ret == 0) -			break; - -		msleep(100); -	} -	if (ret) { +	if (do_reset(i915, stalled_mask)) {  		dev_err(i915->drm.dev, "Failed to reset chip\n");  		goto taint;  	} -	/* Ok, now get things going again... */ - -	/* -	 * Everything depends on having the GTT running, so we need to start -	 * there. -	 */ -	ret = i915_ggtt_enable_hw(i915); -	if (ret) { -		DRM_ERROR("Failed to re-enable GGTT following reset (%d)\n", -			  ret); -		goto error; -	} - -	gt_reset(i915, stalled_mask);  	intel_overlay_reset(i915);  	/* @@ -1061,9 +1033,8 @@ void i915_reset(struct drm_i915_private *i915,  finish:  	reset_finish(i915); -wakeup: -	clear_bit(I915_RESET_HANDOFF, &error->flags); -	wake_up_bit(&error->flags, I915_RESET_HANDOFF); +	if (!i915_terminally_wedged(error)) +		reset_restart(i915);  	return;  taint: @@ -1082,7 +1053,6 @@ taint:  	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);  error:  	i915_gem_set_wedged(i915); -	i915_retire_requests(i915);  	goto finish;  } @@ -1108,18 +1078,12 @@ static inline int intel_gt_reset_engine(struct drm_i915_private *i915,  int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)  {  	struct i915_gpu_error *error = &engine->i915->gpu_error; -	struct i915_request *active_request;  	int ret;  	GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);  	GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags)); -	active_request = reset_prepare_engine(engine); -	if (IS_ERR_OR_NULL(active_request)) { -		/* Either the previous reset failed, or we pardon the reset. */ -		ret = PTR_ERR(active_request); -		goto out; -	} +	reset_prepare_engine(engine);  	if (msg)  		dev_notice(engine->i915->drm.dev, @@ -1143,7 +1107,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)  	 * active request and can drop it, adjust head to skip the offending  	 * request to resume executing remaining requests in the queue.  	 */ -	reset_engine(engine, active_request, true); +	intel_engine_reset(engine, true);  	/*  	 * The engine and its registers (and workarounds in case of render) @@ -1180,30 +1144,7 @@ static void i915_reset_device(struct drm_i915_private *i915,  	i915_wedge_on_timeout(&w, i915, 5 * HZ) {  		intel_prepare_reset(i915); -		error->reason = reason; -		error->stalled_mask = engine_mask; - -		/* Signal that locked waiters should reset the GPU */ -		smp_mb__before_atomic(); -		set_bit(I915_RESET_HANDOFF, &error->flags); -		wake_up_all(&error->wait_queue); - -		/* -		 * Wait for anyone holding the lock to wakeup, without -		 * blocking indefinitely on struct_mutex. -		 */ -		do { -			if (mutex_trylock(&i915->drm.struct_mutex)) { -				i915_reset(i915, engine_mask, reason); -				mutex_unlock(&i915->drm.struct_mutex); -			} -		} while (wait_on_bit_timeout(&error->flags, -					     I915_RESET_HANDOFF, -					     TASK_UNINTERRUPTIBLE, -					     1)); - -		error->stalled_mask = 0; -		error->reason = NULL; +		i915_reset(i915, engine_mask, reason);  		intel_finish_reset(i915);  	} @@ -1359,6 +1300,25 @@ out:  	intel_runtime_pm_put(i915, wakeref);  } +bool i915_reset_flush(struct drm_i915_private *i915) +{ +	int err; + +	cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); + +	flush_workqueue(i915->wq); +	GEM_BUG_ON(READ_ONCE(i915->gpu_error.restart)); + +	mutex_lock(&i915->drm.struct_mutex); +	err = i915_gem_wait_for_idle(i915, +				     I915_WAIT_LOCKED | +				     I915_WAIT_FOR_IDLE_BOOST, +				     MAX_SCHEDULE_TIMEOUT); +	mutex_unlock(&i915->drm.struct_mutex); + +	return !err; +} +  static void i915_wedge_me(struct work_struct *work)  {  	struct i915_wedge_me *w = container_of(work, typeof(*w), work.work); diff --git a/drivers/gpu/drm/i915/i915_reset.h b/drivers/gpu/drm/i915/i915_reset.h index b6a519bde67d..f2d347f319df 100644 --- a/drivers/gpu/drm/i915/i915_reset.h +++ b/drivers/gpu/drm/i915/i915_reset.h @@ -29,6 +29,9 @@ void i915_reset(struct drm_i915_private *i915,  int i915_reset_engine(struct intel_engine_cs *engine,  		      const char *reason); +void i915_reset_request(struct i915_request *rq, bool guilty); +bool i915_reset_flush(struct drm_i915_private *i915); +  bool intel_has_gpu_reset(struct drm_i915_private *i915);  bool intel_has_reset_engine(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 340faea6c08a..d01683167c77 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -127,8 +127,7 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb)  	return rb_entry(rb, struct i915_priolist, node);  } -static void assert_priolists(struct intel_engine_execlists * const execlists, -			     long queue_priority) +static void assert_priolists(struct intel_engine_execlists * const execlists)  {  	struct rb_node *rb;  	long last_prio, i; @@ -139,7 +138,7 @@ static void assert_priolists(struct intel_engine_execlists * const execlists,  	GEM_BUG_ON(rb_first_cached(&execlists->queue) !=  		   rb_first(&execlists->queue.rb_root)); -	last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1; +	last_prio = (INT_MAX >> I915_USER_PRIORITY_SHIFT) + 1;  	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {  		const struct i915_priolist *p = to_priolist(rb); @@ -166,7 +165,7 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)  	int idx, i;  	lockdep_assert_held(&engine->timeline.lock); -	assert_priolists(execlists, INT_MAX); +	assert_priolists(execlists);  	/* buckets sorted from highest [in slot 0] to lowest priority */  	idx = I915_PRIORITY_COUNT - (prio & I915_PRIORITY_MASK) - 1; @@ -239,6 +238,18 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)  	return engine;  } +static bool inflight(const struct i915_request *rq, +		     const struct intel_engine_cs *engine) +{ +	const struct i915_request *active; + +	if (!i915_request_is_active(rq)) +		return false; + +	active = port_request(engine->execlists.port); +	return active->hw_context == rq->hw_context; +} +  static void __i915_schedule(struct i915_request *rq,  			    const struct i915_sched_attr *attr)  { @@ -328,6 +339,7 @@ static void __i915_schedule(struct i915_request *rq,  		INIT_LIST_HEAD(&dep->dfs_link);  		engine = sched_lock_engine(node, engine); +		lockdep_assert_held(&engine->timeline.lock);  		/* Recheck after acquiring the engine->timeline.lock */  		if (prio <= node->attr.priority || node_signaled(node)) @@ -353,20 +365,19 @@ static void __i915_schedule(struct i915_request *rq,  				continue;  		} -		if (prio <= engine->execlists.queue_priority) +		if (prio <= engine->execlists.queue_priority_hint)  			continue; +		engine->execlists.queue_priority_hint = prio; +  		/*  		 * If we are already the currently executing context, don't  		 * bother evaluating if we should preempt ourselves.  		 */ -		if (node_to_request(node)->global_seqno && -		    i915_seqno_passed(port_request(engine->execlists.port)->global_seqno, -				      node_to_request(node)->global_seqno)) +		if (inflight(node_to_request(node), engine))  			continue;  		/* Defer (tasklet) submission until after all of our updates. */ -		engine->execlists.queue_priority = prio;  		tasklet_hi_schedule(&engine->execlists.tasklet);  	} diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h index a73472dd12fd..207e21b478f2 100644 --- a/drivers/gpu/drm/i915/i915_selftest.h +++ b/drivers/gpu/drm/i915/i915_selftest.h @@ -31,6 +31,7 @@ struct i915_selftest {  	unsigned long timeout_jiffies;  	unsigned int timeout_ms;  	unsigned int random_seed; +	char *filter;  	int mock;  	int live;  }; diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c index 4667cc08c416..5ea3af393ffe 100644 --- a/drivers/gpu/drm/i915/i915_timeline.c +++ b/drivers/gpu/drm/i915/i915_timeline.c @@ -9,25 +9,155 @@  #include "i915_timeline.h"  #include "i915_syncmap.h" -void i915_timeline_init(struct drm_i915_private *i915, -			struct i915_timeline *timeline, -			const char *name) +struct i915_timeline_hwsp { +	struct i915_vma *vma; +	struct list_head free_link; +	u64 free_bitmap; +}; + +static inline struct i915_timeline_hwsp * +i915_timeline_hwsp(const struct i915_timeline *tl) +{ +	return tl->hwsp_ggtt->private; +} + +static struct i915_vma *__hwsp_alloc(struct drm_i915_private *i915) +{ +	struct drm_i915_gem_object *obj; +	struct i915_vma *vma; + +	obj = i915_gem_object_create_internal(i915, PAGE_SIZE); +	if (IS_ERR(obj)) +		return ERR_CAST(obj); + +	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); + +	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); +	if (IS_ERR(vma)) +		i915_gem_object_put(obj); + +	return vma; +} + +static struct i915_vma * +hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)  { -	lockdep_assert_held(&i915->drm.struct_mutex); +	struct drm_i915_private *i915 = timeline->i915; +	struct i915_gt_timelines *gt = &i915->gt.timelines; +	struct i915_timeline_hwsp *hwsp; + +	BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE); + +	spin_lock(>->hwsp_lock); + +	/* hwsp_free_list only contains HWSP that have available cachelines */ +	hwsp = list_first_entry_or_null(>->hwsp_free_list, +					typeof(*hwsp), free_link); +	if (!hwsp) { +		struct i915_vma *vma; + +		spin_unlock(>->hwsp_lock); + +		hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL); +		if (!hwsp) +			return ERR_PTR(-ENOMEM); + +		vma = __hwsp_alloc(i915); +		if (IS_ERR(vma)) { +			kfree(hwsp); +			return vma; +		} + +		vma->private = hwsp; +		hwsp->vma = vma; +		hwsp->free_bitmap = ~0ull; + +		spin_lock(>->hwsp_lock); +		list_add(&hwsp->free_link, >->hwsp_free_list); +	} + +	GEM_BUG_ON(!hwsp->free_bitmap); +	*cacheline = __ffs64(hwsp->free_bitmap); +	hwsp->free_bitmap &= ~BIT_ULL(*cacheline); +	if (!hwsp->free_bitmap) +		list_del(&hwsp->free_link); + +	spin_unlock(>->hwsp_lock); + +	GEM_BUG_ON(hwsp->vma->private != hwsp); +	return hwsp->vma; +} + +static void hwsp_free(struct i915_timeline *timeline) +{ +	struct i915_gt_timelines *gt = &timeline->i915->gt.timelines; +	struct i915_timeline_hwsp *hwsp; + +	hwsp = i915_timeline_hwsp(timeline); +	if (!hwsp) /* leave global HWSP alone! */ +		return; + +	spin_lock(>->hwsp_lock); + +	/* As a cacheline becomes available, publish the HWSP on the freelist */ +	if (!hwsp->free_bitmap) +		list_add_tail(&hwsp->free_link, >->hwsp_free_list); + +	hwsp->free_bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES); + +	/* And if no one is left using it, give the page back to the system */ +	if (hwsp->free_bitmap == ~0ull) { +		i915_vma_put(hwsp->vma); +		list_del(&hwsp->free_link); +		kfree(hwsp); +	} + +	spin_unlock(>->hwsp_lock); +} + +int i915_timeline_init(struct drm_i915_private *i915, +		       struct i915_timeline *timeline, +		       const char *name, +		       struct i915_vma *hwsp) +{ +	void *vaddr;  	/*  	 * Ideally we want a set of engines on a single leaf as we expect  	 * to mostly be tracking synchronisation between engines. It is not  	 * a huge issue if this is not the case, but we may want to mitigate  	 * any page crossing penalties if they become an issue. +	 * +	 * Called during early_init before we know how many engines there are.  	 */  	BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES); +	timeline->i915 = i915;  	timeline->name = name; +	timeline->pin_count = 0; +	timeline->has_initial_breadcrumb = !hwsp; -	list_add(&timeline->link, &i915->gt.timelines); +	timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR; +	if (!hwsp) { +		unsigned int cacheline; + +		hwsp = hwsp_alloc(timeline, &cacheline); +		if (IS_ERR(hwsp)) +			return PTR_ERR(hwsp); + +		timeline->hwsp_offset = cacheline * CACHELINE_BYTES; +	} +	timeline->hwsp_ggtt = i915_vma_get(hwsp); + +	vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB); +	if (IS_ERR(vaddr)) { +		hwsp_free(timeline); +		i915_vma_put(hwsp); +		return PTR_ERR(vaddr); +	} -	/* Called during early_init before we know how many engines there are */ +	timeline->hwsp_seqno = +		memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES);  	timeline->fence_context = dma_fence_context_alloc(1); @@ -37,6 +167,40 @@ void i915_timeline_init(struct drm_i915_private *i915,  	INIT_LIST_HEAD(&timeline->requests);  	i915_syncmap_init(&timeline->sync); + +	return 0; +} + +void i915_timelines_init(struct drm_i915_private *i915) +{ +	struct i915_gt_timelines *gt = &i915->gt.timelines; + +	mutex_init(>->mutex); +	INIT_LIST_HEAD(>->active_list); + +	spin_lock_init(>->hwsp_lock); +	INIT_LIST_HEAD(>->hwsp_free_list); + +	/* via i915_gem_wait_for_idle() */ +	i915_gem_shrinker_taints_mutex(i915, >->mutex); +} + +static void timeline_add_to_active(struct i915_timeline *tl) +{ +	struct i915_gt_timelines *gt = &tl->i915->gt.timelines; + +	mutex_lock(>->mutex); +	list_add(&tl->link, >->active_list); +	mutex_unlock(>->mutex); +} + +static void timeline_remove_from_active(struct i915_timeline *tl) +{ +	struct i915_gt_timelines *gt = &tl->i915->gt.timelines; + +	mutex_lock(>->mutex); +	list_del(&tl->link); +	mutex_unlock(>->mutex);  }  /** @@ -51,11 +215,11 @@ void i915_timeline_init(struct drm_i915_private *i915,   */  void i915_timelines_park(struct drm_i915_private *i915)  { +	struct i915_gt_timelines *gt = &i915->gt.timelines;  	struct i915_timeline *timeline; -	lockdep_assert_held(&i915->drm.struct_mutex); - -	list_for_each_entry(timeline, &i915->gt.timelines, link) { +	mutex_lock(>->mutex); +	list_for_each_entry(timeline, >->active_list, link) {  		/*  		 * All known fences are completed so we can scrap  		 * the current sync point tracking and start afresh, @@ -64,32 +228,87 @@ void i915_timelines_park(struct drm_i915_private *i915)  		 */  		i915_syncmap_free(&timeline->sync);  	} +	mutex_unlock(>->mutex);  }  void i915_timeline_fini(struct i915_timeline *timeline)  { +	GEM_BUG_ON(timeline->pin_count);  	GEM_BUG_ON(!list_empty(&timeline->requests));  	i915_syncmap_free(&timeline->sync); +	hwsp_free(timeline); -	list_del(&timeline->link); +	i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); +	i915_vma_put(timeline->hwsp_ggtt);  }  struct i915_timeline * -i915_timeline_create(struct drm_i915_private *i915, const char *name) +i915_timeline_create(struct drm_i915_private *i915, +		     const char *name, +		     struct i915_vma *global_hwsp)  {  	struct i915_timeline *timeline; +	int err;  	timeline = kzalloc(sizeof(*timeline), GFP_KERNEL);  	if (!timeline)  		return ERR_PTR(-ENOMEM); -	i915_timeline_init(i915, timeline, name); +	err = i915_timeline_init(i915, timeline, name, global_hwsp); +	if (err) { +		kfree(timeline); +		return ERR_PTR(err); +	} +  	kref_init(&timeline->kref);  	return timeline;  } +int i915_timeline_pin(struct i915_timeline *tl) +{ +	int err; + +	if (tl->pin_count++) +		return 0; +	GEM_BUG_ON(!tl->pin_count); + +	err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH); +	if (err) +		goto unpin; + +	tl->hwsp_offset = +		i915_ggtt_offset(tl->hwsp_ggtt) + +		offset_in_page(tl->hwsp_offset); + +	timeline_add_to_active(tl); + +	return 0; + +unpin: +	tl->pin_count = 0; +	return err; +} + +void i915_timeline_unpin(struct i915_timeline *tl) +{ +	GEM_BUG_ON(!tl->pin_count); +	if (--tl->pin_count) +		return; + +	timeline_remove_from_active(tl); + +	/* +	 * Since this timeline is idle, all bariers upon which we were waiting +	 * must also be complete and so we can discard the last used barriers +	 * without loss of information. +	 */ +	i915_syncmap_free(&tl->sync); + +	__i915_vma_unpin(tl->hwsp_ggtt); +} +  void __i915_timeline_free(struct kref *kref)  {  	struct i915_timeline *timeline = @@ -99,6 +318,16 @@ void __i915_timeline_free(struct kref *kref)  	kfree(timeline);  } +void i915_timelines_fini(struct drm_i915_private *i915) +{ +	struct i915_gt_timelines *gt = &i915->gt.timelines; + +	GEM_BUG_ON(!list_empty(>->active_list)); +	GEM_BUG_ON(!list_empty(>->hwsp_free_list)); + +	mutex_destroy(>->mutex); +} +  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)  #include "selftests/mock_timeline.c"  #include "selftests/i915_timeline.c" diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index 38c1e15e927a..8caeb66d1cd5 100644 --- a/drivers/gpu/drm/i915/i915_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -32,6 +32,9 @@  #include "i915_syncmap.h"  #include "i915_utils.h" +struct i915_vma; +struct i915_timeline_hwsp; +  struct i915_timeline {  	u64 fence_context;  	u32 seqno; @@ -40,6 +43,13 @@ struct i915_timeline {  #define TIMELINE_CLIENT 0 /* default subclass */  #define TIMELINE_ENGINE 1 +	unsigned int pin_count; +	const u32 *hwsp_seqno; +	struct i915_vma *hwsp_ggtt; +	u32 hwsp_offset; + +	bool has_initial_breadcrumb; +  	/**  	 * List of breadcrumbs associated with GPU requests currently  	 * outstanding. @@ -66,13 +76,15 @@ struct i915_timeline {  	struct list_head link;  	const char *name; +	struct drm_i915_private *i915;  	struct kref kref;  }; -void i915_timeline_init(struct drm_i915_private *i915, -			struct i915_timeline *tl, -			const char *name); +int i915_timeline_init(struct drm_i915_private *i915, +		       struct i915_timeline *tl, +		       const char *name, +		       struct i915_vma *hwsp);  void i915_timeline_fini(struct i915_timeline *tl);  static inline void @@ -95,7 +107,9 @@ i915_timeline_set_subclass(struct i915_timeline *timeline,  }  struct i915_timeline * -i915_timeline_create(struct drm_i915_private *i915, const char *name); +i915_timeline_create(struct drm_i915_private *i915, +		     const char *name, +		     struct i915_vma *global_hwsp);  static inline struct i915_timeline *  i915_timeline_get(struct i915_timeline *timeline) @@ -134,6 +148,11 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,  	return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno);  } +int i915_timeline_pin(struct i915_timeline *tl); +void i915_timeline_unpin(struct i915_timeline *tl); + +void i915_timelines_init(struct drm_i915_private *i915);  void i915_timelines_park(struct drm_i915_private *i915); +void i915_timelines_fini(struct drm_i915_private *i915);  #endif diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 43da14f08dc0..eab313c3163c 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -752,31 +752,6 @@ trace_i915_request_out(struct i915_request *rq)  #endif  #endif -TRACE_EVENT(intel_engine_notify, -	    TP_PROTO(struct intel_engine_cs *engine, bool waiters), -	    TP_ARGS(engine, waiters), - -	    TP_STRUCT__entry( -			     __field(u32, dev) -			     __field(u16, class) -			     __field(u16, instance) -			     __field(u32, seqno) -			     __field(bool, waiters) -			     ), - -	    TP_fast_assign( -			   __entry->dev = engine->i915->drm.primary->index; -			   __entry->class = engine->uabi_class; -			   __entry->instance = engine->instance; -			   __entry->seqno = intel_engine_get_seqno(engine); -			   __entry->waiters = waiters; -			   ), - -	    TP_printk("dev=%u, engine=%u:%u, seqno=%u, waiters=%u", -		      __entry->dev, __entry->class, __entry->instance, -		      __entry->seqno, __entry->waiters) -); -  DEFINE_EVENT(i915_request, i915_request_retire,  	    TP_PROTO(struct i915_request *rq),  	    TP_ARGS(rq) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 5b4d78cdb4ca..d83b8ad5f859 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -79,9 +79,6 @@ __i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)  	if (--vma->active_count)  		return; -	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); -	list_move_tail(&vma->vm_link, &vma->vm->inactive_list); -  	GEM_BUG_ON(!i915_gem_object_is_active(obj));  	if (--obj->active_count)  		return; @@ -190,33 +187,56 @@ vma_create(struct drm_i915_gem_object *obj,  								i915_gem_object_get_stride(obj));  		GEM_BUG_ON(!is_power_of_2(vma->fence_alignment)); -		/* -		 * We put the GGTT vma at the start of the vma-list, followed -		 * by the ppGGTT vma. This allows us to break early when -		 * iterating over only the GGTT vma for an object, see -		 * for_each_ggtt_vma() -		 */  		vma->flags |= I915_VMA_GGTT; -		list_add(&vma->obj_link, &obj->vma_list); -	} else { -		list_add_tail(&vma->obj_link, &obj->vma_list);  	} +	spin_lock(&obj->vma.lock); +  	rb = NULL; -	p = &obj->vma_tree.rb_node; +	p = &obj->vma.tree.rb_node;  	while (*p) {  		struct i915_vma *pos; +		long cmp;  		rb = *p;  		pos = rb_entry(rb, struct i915_vma, obj_node); -		if (i915_vma_compare(pos, vm, view) < 0) + +		/* +		 * If the view already exists in the tree, another thread +		 * already created a matching vma, so return the older instance +		 * and dispose of ours. +		 */ +		cmp = i915_vma_compare(pos, vm, view); +		if (cmp == 0) { +			spin_unlock(&obj->vma.lock); +			kmem_cache_free(vm->i915->vmas, vma); +			return pos; +		} + +		if (cmp < 0)  			p = &rb->rb_right;  		else  			p = &rb->rb_left;  	}  	rb_link_node(&vma->obj_node, rb, p); -	rb_insert_color(&vma->obj_node, &obj->vma_tree); +	rb_insert_color(&vma->obj_node, &obj->vma.tree); + +	if (i915_vma_is_ggtt(vma)) +		/* +		 * We put the GGTT vma at the start of the vma-list, followed +		 * by the ppGGTT vma. This allows us to break early when +		 * iterating over only the GGTT vma for an object, see +		 * for_each_ggtt_vma() +		 */ +		list_add(&vma->obj_link, &obj->vma.list); +	else +		list_add_tail(&vma->obj_link, &obj->vma.list); + +	spin_unlock(&obj->vma.lock); + +	mutex_lock(&vm->mutex);  	list_add(&vma->vm_link, &vm->unbound_list); +	mutex_unlock(&vm->mutex);  	return vma; @@ -232,7 +252,7 @@ vma_lookup(struct drm_i915_gem_object *obj,  {  	struct rb_node *rb; -	rb = obj->vma_tree.rb_node; +	rb = obj->vma.tree.rb_node;  	while (rb) {  		struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node);  		long cmp; @@ -272,16 +292,18 @@ i915_vma_instance(struct drm_i915_gem_object *obj,  {  	struct i915_vma *vma; -	lockdep_assert_held(&obj->base.dev->struct_mutex);  	GEM_BUG_ON(view && !i915_is_ggtt(vm));  	GEM_BUG_ON(vm->closed); +	spin_lock(&obj->vma.lock);  	vma = vma_lookup(obj, vm, view); -	if (!vma) +	spin_unlock(&obj->vma.lock); + +	/* vma_create() will resolve the race if another creates the vma */ +	if (unlikely(!vma))  		vma = vma_create(obj, vm, view);  	GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view)); -	GEM_BUG_ON(!IS_ERR(vma) && vma_lookup(obj, vm, view) != vma);  	return vma;  } @@ -659,7 +681,9 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)  	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));  	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, cache_level)); -	list_move_tail(&vma->vm_link, &vma->vm->inactive_list); +	mutex_lock(&vma->vm->mutex); +	list_move_tail(&vma->vm_link, &vma->vm->bound_list); +	mutex_unlock(&vma->vm->mutex);  	if (vma->obj) {  		struct drm_i915_gem_object *obj = vma->obj; @@ -692,8 +716,10 @@ i915_vma_remove(struct i915_vma *vma)  	vma->ops->clear_pages(vma); +	mutex_lock(&vma->vm->mutex);  	drm_mm_remove_node(&vma->node);  	list_move_tail(&vma->vm_link, &vma->vm->unbound_list); +	mutex_unlock(&vma->vm->mutex);  	/*  	 * Since the unbound list is global, only move to that list if @@ -804,10 +830,18 @@ static void __i915_vma_destroy(struct i915_vma *vma)  	GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence)); -	list_del(&vma->obj_link); +	mutex_lock(&vma->vm->mutex);  	list_del(&vma->vm_link); -	if (vma->obj) -		rb_erase(&vma->obj_node, &vma->obj->vma_tree); +	mutex_unlock(&vma->vm->mutex); + +	if (vma->obj) { +		struct drm_i915_gem_object *obj = vma->obj; + +		spin_lock(&obj->vma.lock); +		list_del(&vma->obj_link); +		rb_erase(&vma->obj_node, &vma->obj->vma.tree); +		spin_unlock(&obj->vma.lock); +	}  	rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) {  		GEM_BUG_ON(i915_gem_active_isset(&iter->base)); @@ -1003,10 +1037,8 @@ int i915_vma_move_to_active(struct i915_vma *vma,  	 * add the active reference first and queue for it to be dropped  	 * *last*.  	 */ -	if (!i915_gem_active_isset(active) && !vma->active_count++) { -		list_move_tail(&vma->vm_link, &vma->vm->active_list); +	if (!i915_gem_active_isset(active) && !vma->active_count++)  		obj->active_count++; -	}  	i915_gem_active_set(active, rq);  	GEM_BUG_ON(!i915_vma_is_active(vma));  	GEM_BUG_ON(!obj->active_count); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 4f7c1c7599f4..5793abe509a2 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -71,29 +71,42 @@ struct i915_vma {  	unsigned int open_count;  	unsigned long flags;  	/** -	 * How many users have pinned this object in GTT space. The following -	 * users can each hold at most one reference: pwrite/pread, execbuffer -	 * (objects are not allowed multiple times for the same batchbuffer), -	 * and the framebuffer code. When switching/pageflipping, the -	 * framebuffer code has at most two buffers pinned per crtc. +	 * How many users have pinned this object in GTT space.  	 * -	 * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3 -	 * bits with absolutely no headroom. So use 4 bits. +	 * This is a tightly bound, fairly small number of users, so we +	 * stuff inside the flags field so that we can both check for overflow +	 * and detect a no-op i915_vma_pin() in a single check, while also +	 * pinning the vma. +	 * +	 * The worst case display setup would have the same vma pinned for +	 * use on each plane on each crtc, while also building the next atomic +	 * state and holding a pin for the length of the cleanup queue. In the +	 * future, the flip queue may be increased from 1. +	 * Estimated worst case: 3 [qlen] * 4 [max crtcs] * 7 [max planes] = 84 +	 * +	 * For GEM, the number of concurrent users for pwrite/pread is +	 * unbounded. For execbuffer, it is currently one but will in future +	 * be extended to allow multiple clients to pin vma concurrently. +	 * +	 * We also use suballocated pages, with each suballocation claiming +	 * its own pin on the shared vma. At present, this is limited to +	 * exclusive cachelines of a single page, so a maximum of 64 possible +	 * users.  	 */ -#define I915_VMA_PIN_MASK 0xf -#define I915_VMA_PIN_OVERFLOW	BIT(5) +#define I915_VMA_PIN_MASK 0xff +#define I915_VMA_PIN_OVERFLOW	BIT(8)  	/** Flags and address space this VMA is bound to */ -#define I915_VMA_GLOBAL_BIND	BIT(6) -#define I915_VMA_LOCAL_BIND	BIT(7) +#define I915_VMA_GLOBAL_BIND	BIT(9) +#define I915_VMA_LOCAL_BIND	BIT(10)  #define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW) -#define I915_VMA_GGTT		BIT(8) -#define I915_VMA_CAN_FENCE	BIT(9) -#define I915_VMA_CLOSED		BIT(10) -#define I915_VMA_USERFAULT_BIT	11 +#define I915_VMA_GGTT		BIT(11) +#define I915_VMA_CAN_FENCE	BIT(12) +#define I915_VMA_CLOSED		BIT(13) +#define I915_VMA_USERFAULT_BIT	14  #define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT) -#define I915_VMA_GGTT_WRITE	BIT(12) +#define I915_VMA_GGTT_WRITE	BIT(15)  	unsigned int active_count;  	struct rb_root active; @@ -425,7 +438,7 @@ void i915_vma_parked(struct drm_i915_private *i915);   * or the list is empty ofc.   */  #define for_each_ggtt_vma(V, OBJ) \ -	list_for_each_entry(V, &(OBJ)->vma_list, obj_link)		\ +	list_for_each_entry(V, &(OBJ)->vma.list, obj_link)		\  		for_each_until(!i915_vma_is_ggtt(V))  #endif diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c index 9a2fdc77ebcb..a1a263026574 100644 --- a/drivers/gpu/drm/i915/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c @@ -110,41 +110,39 @@ intel_plane_destroy_state(struct drm_plane *plane,  }  int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state, -					struct intel_crtc_state *crtc_state, +					struct intel_crtc_state *new_crtc_state,  					const struct intel_plane_state *old_plane_state, -					struct intel_plane_state *intel_state) +					struct intel_plane_state *new_plane_state)  { -	struct drm_plane *plane = intel_state->base.plane; -	struct drm_plane_state *state = &intel_state->base; -	struct intel_plane *intel_plane = to_intel_plane(plane); +	struct intel_plane *plane = to_intel_plane(new_plane_state->base.plane);  	int ret; -	crtc_state->active_planes &= ~BIT(intel_plane->id); -	crtc_state->nv12_planes &= ~BIT(intel_plane->id); -	intel_state->base.visible = false; +	new_crtc_state->active_planes &= ~BIT(plane->id); +	new_crtc_state->nv12_planes &= ~BIT(plane->id); +	new_plane_state->base.visible = false; -	/* If this is a cursor plane, no further checks are needed. */ -	if (!intel_state->base.crtc && !old_plane_state->base.crtc) +	if (!new_plane_state->base.crtc && !old_plane_state->base.crtc)  		return 0; -	ret = intel_plane->check_plane(crtc_state, intel_state); +	ret = plane->check_plane(new_crtc_state, new_plane_state);  	if (ret)  		return ret;  	/* FIXME pre-g4x don't work like this */ -	if (state->visible) -		crtc_state->active_planes |= BIT(intel_plane->id); +	if (new_plane_state->base.visible) +		new_crtc_state->active_planes |= BIT(plane->id); -	if (state->visible && state->fb->format->format == DRM_FORMAT_NV12) -		crtc_state->nv12_planes |= BIT(intel_plane->id); +	if (new_plane_state->base.visible && +	    new_plane_state->base.fb->format->format == DRM_FORMAT_NV12) +		new_crtc_state->nv12_planes |= BIT(plane->id); -	if (state->visible || old_plane_state->base.visible) -		crtc_state->update_planes |= BIT(intel_plane->id); +	if (new_plane_state->base.visible || old_plane_state->base.visible) +		new_crtc_state->update_planes |= BIT(plane->id);  	return intel_plane_atomic_calc_changes(old_crtc_state, -					       &crtc_state->base, +					       &new_crtc_state->base,  					       old_plane_state, -					       state); +					       &new_plane_state->base);  }  static int intel_plane_atomic_check(struct drm_plane *plane, diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 561a4f9f044c..b508d8a735e0 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -1663,6 +1663,13 @@ init_vbt_missing_defaults(struct drm_i915_private *dev_priv)  		struct ddi_vbt_port_info *info =  			&dev_priv->vbt.ddi_port_info[port]; +		/* +		 * VBT has the TypeC mode (native,TBT/USB) and we don't want +		 * to detect it. +		 */ +		if (intel_port_is_tc(dev_priv, port)) +			continue; +  		info->supports_dvi = (port != PORT_A && port != PORT_E);  		info->supports_hdmi = info->supports_dvi;  		info->supports_dp = (port != PORT_E); diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index b58915b8708b..cacaa1d04d17 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -29,168 +29,146 @@  #define task_asleep(tsk) ((tsk)->state & TASK_NORMAL && !(tsk)->on_rq) -static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b) +static void irq_enable(struct intel_engine_cs *engine)  { -	struct intel_wait *wait; -	unsigned int result = 0; - -	lockdep_assert_held(&b->irq_lock); - -	wait = b->irq_wait; -	if (wait) { -		/* -		 * N.B. Since task_asleep() and ttwu are not atomic, the -		 * waiter may actually go to sleep after the check, causing -		 * us to suppress a valid wakeup. We prefer to reduce the -		 * number of false positive missed_breadcrumb() warnings -		 * at the expense of a few false negatives, as it it easy -		 * to trigger a false positive under heavy load. Enough -		 * signal should remain from genuine missed_breadcrumb() -		 * for us to detect in CI. -		 */ -		bool was_asleep = task_asleep(wait->tsk); - -		result = ENGINE_WAKEUP_WAITER; -		if (wake_up_process(wait->tsk) && was_asleep) -			result |= ENGINE_WAKEUP_ASLEEP; -	} +	if (!engine->irq_enable) +		return; -	return result; +	/* Caller disables interrupts */ +	spin_lock(&engine->i915->irq_lock); +	engine->irq_enable(engine); +	spin_unlock(&engine->i915->irq_lock);  } -unsigned int intel_engine_wakeup(struct intel_engine_cs *engine) +static void irq_disable(struct intel_engine_cs *engine)  { -	struct intel_breadcrumbs *b = &engine->breadcrumbs; -	unsigned long flags; -	unsigned int result; - -	spin_lock_irqsave(&b->irq_lock, flags); -	result = __intel_breadcrumbs_wakeup(b); -	spin_unlock_irqrestore(&b->irq_lock, flags); - -	return result; -} +	if (!engine->irq_disable) +		return; -static unsigned long wait_timeout(void) -{ -	return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES); +	/* Caller disables interrupts */ +	spin_lock(&engine->i915->irq_lock); +	engine->irq_disable(engine); +	spin_unlock(&engine->i915->irq_lock);  } -static noinline void missed_breadcrumb(struct intel_engine_cs *engine) +static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)  { -	if (GEM_SHOW_DEBUG()) { -		struct drm_printer p = drm_debug_printer(__func__); +	lockdep_assert_held(&b->irq_lock); -		intel_engine_dump(engine, &p, -				  "%s missed breadcrumb at %pS\n", -				  engine->name, __builtin_return_address(0)); -	} +	GEM_BUG_ON(!b->irq_enabled); +	if (!--b->irq_enabled) +		irq_disable(container_of(b, +					 struct intel_engine_cs, +					 breadcrumbs)); -	set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); +	b->irq_armed = false;  } -static void intel_breadcrumbs_hangcheck(struct timer_list *t) +void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)  { -	struct intel_engine_cs *engine = -		from_timer(engine, t, breadcrumbs.hangcheck);  	struct intel_breadcrumbs *b = &engine->breadcrumbs; -	unsigned int irq_count;  	if (!b->irq_armed)  		return; -	irq_count = READ_ONCE(b->irq_count); -	if (b->hangcheck_interrupts != irq_count) { -		b->hangcheck_interrupts = irq_count; -		mod_timer(&b->hangcheck, wait_timeout()); -		return; -	} +	spin_lock_irq(&b->irq_lock); +	if (b->irq_armed) +		__intel_breadcrumbs_disarm_irq(b); +	spin_unlock_irq(&b->irq_lock); +} -	/* We keep the hangcheck timer alive until we disarm the irq, even -	 * if there are no waiters at present. -	 * -	 * If the waiter was currently running, assume it hasn't had a chance -	 * to process the pending interrupt (e.g, low priority task on a loaded -	 * system) and wait until it sleeps before declaring a missed interrupt. -	 * -	 * If the waiter was asleep (and not even pending a wakeup), then we -	 * must have missed an interrupt as the GPU has stopped advancing -	 * but we still have a waiter. Assuming all batches complete within -	 * DRM_I915_HANGCHECK_JIFFIES [1.5s]! -	 */ -	if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP) { -		missed_breadcrumb(engine); -		mod_timer(&b->fake_irq, jiffies + 1); -	} else { -		mod_timer(&b->hangcheck, wait_timeout()); -	} +static inline bool __request_completed(const struct i915_request *rq) +{ +	return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);  } -static void intel_breadcrumbs_fake_irq(struct timer_list *t) +bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)  { -	struct intel_engine_cs *engine = -		from_timer(engine, t, breadcrumbs.fake_irq);  	struct intel_breadcrumbs *b = &engine->breadcrumbs; +	struct intel_context *ce, *cn; +	struct list_head *pos, *next; +	LIST_HEAD(signal); -	/* -	 * The timer persists in case we cannot enable interrupts, -	 * or if we have previously seen seqno/interrupt incoherency -	 * ("missed interrupt" syndrome, better known as a "missed breadcrumb"). -	 * Here the worker will wake up every jiffie in order to kick the -	 * oldest waiter to do the coherent seqno check. -	 */ +	spin_lock(&b->irq_lock); -	spin_lock_irq(&b->irq_lock); -	if (b->irq_armed && !__intel_breadcrumbs_wakeup(b)) -		__intel_engine_disarm_breadcrumbs(engine); -	spin_unlock_irq(&b->irq_lock); -	if (!b->irq_armed) -		return; +	if (b->irq_armed && list_empty(&b->signalers)) +		__intel_breadcrumbs_disarm_irq(b); -	/* If the user has disabled the fake-irq, restore the hangchecking */ -	if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) { -		mod_timer(&b->hangcheck, wait_timeout()); -		return; +	list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) { +		GEM_BUG_ON(list_empty(&ce->signals)); + +		list_for_each_safe(pos, next, &ce->signals) { +			struct i915_request *rq = +				list_entry(pos, typeof(*rq), signal_link); + +			if (!__request_completed(rq)) +				break; + +			GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL, +					     &rq->fence.flags)); +			clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); + +			/* +			 * We may race with direct invocation of +			 * dma_fence_signal(), e.g. i915_request_retire(), +			 * in which case we can skip processing it ourselves. +			 */ +			if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, +				     &rq->fence.flags)) +				continue; + +			/* +			 * Queue for execution after dropping the signaling +			 * spinlock as the callback chain may end up adding +			 * more signalers to the same context or engine. +			 */ +			i915_request_get(rq); +			list_add_tail(&rq->signal_link, &signal); +		} + +		/* +		 * We process the list deletion in bulk, only using a list_add +		 * (not list_move) above but keeping the status of +		 * rq->signal_link known with the I915_FENCE_FLAG_SIGNAL bit. +		 */ +		if (!list_is_first(pos, &ce->signals)) { +			/* Advance the list to the first incomplete request */ +			__list_del_many(&ce->signals, pos); +			if (&ce->signals == pos) /* now empty */ +				list_del_init(&ce->signal_link); +		}  	} -	mod_timer(&b->fake_irq, jiffies + 1); -} +	spin_unlock(&b->irq_lock); -static void irq_enable(struct intel_engine_cs *engine) -{ -	if (!engine->irq_enable) -		return; +	list_for_each_safe(pos, next, &signal) { +		struct i915_request *rq = +			list_entry(pos, typeof(*rq), signal_link); -	/* Caller disables interrupts */ -	spin_lock(&engine->i915->irq_lock); -	engine->irq_enable(engine); -	spin_unlock(&engine->i915->irq_lock); +		dma_fence_signal(&rq->fence); +		i915_request_put(rq); +	} + +	return !list_empty(&signal);  } -static void irq_disable(struct intel_engine_cs *engine) +bool intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)  { -	if (!engine->irq_disable) -		return; +	bool result; -	/* Caller disables interrupts */ -	spin_lock(&engine->i915->irq_lock); -	engine->irq_disable(engine); -	spin_unlock(&engine->i915->irq_lock); +	local_irq_disable(); +	result = intel_engine_breadcrumbs_irq(engine); +	local_irq_enable(); + +	return result;  } -void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) +static void signal_irq_work(struct irq_work *work)  { -	struct intel_breadcrumbs *b = &engine->breadcrumbs; - -	lockdep_assert_held(&b->irq_lock); -	GEM_BUG_ON(b->irq_wait); -	GEM_BUG_ON(!b->irq_armed); - -	GEM_BUG_ON(!b->irq_enabled); -	if (!--b->irq_enabled) -		irq_disable(engine); +	struct intel_engine_cs *engine = +		container_of(work, typeof(*engine), breadcrumbs.irq_work); -	b->irq_armed = false; +	intel_engine_breadcrumbs_irq(engine);  }  void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine) @@ -215,77 +193,14 @@ void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine)  	spin_unlock_irq(&b->irq_lock);  } -void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) -{ -	struct intel_breadcrumbs *b = &engine->breadcrumbs; -	struct intel_wait *wait, *n; - -	if (!b->irq_armed) -		return; - -	/* -	 * We only disarm the irq when we are idle (all requests completed), -	 * so if the bottom-half remains asleep, it missed the request -	 * completion. -	 */ -	if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP) -		missed_breadcrumb(engine); - -	spin_lock_irq(&b->rb_lock); - -	spin_lock(&b->irq_lock); -	b->irq_wait = NULL; -	if (b->irq_armed) -		__intel_engine_disarm_breadcrumbs(engine); -	spin_unlock(&b->irq_lock); - -	rbtree_postorder_for_each_entry_safe(wait, n, &b->waiters, node) { -		GEM_BUG_ON(!intel_engine_signaled(engine, wait->seqno)); -		RB_CLEAR_NODE(&wait->node); -		wake_up_process(wait->tsk); -	} -	b->waiters = RB_ROOT; - -	spin_unlock_irq(&b->rb_lock); -} - -static bool use_fake_irq(const struct intel_breadcrumbs *b) -{ -	const struct intel_engine_cs *engine = -		container_of(b, struct intel_engine_cs, breadcrumbs); - -	if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) -		return false; - -	/* -	 * Only start with the heavy weight fake irq timer if we have not -	 * seen any interrupts since enabling it the first time. If the -	 * interrupts are still arriving, it means we made a mistake in our -	 * engine->seqno_barrier(), a timing error that should be transient -	 * and unlikely to reoccur. -	 */ -	return READ_ONCE(b->irq_count) == b->hangcheck_interrupts; -} - -static void enable_fake_irq(struct intel_breadcrumbs *b) -{ -	/* Ensure we never sleep indefinitely */ -	if (!b->irq_enabled || use_fake_irq(b)) -		mod_timer(&b->fake_irq, jiffies + 1); -	else -		mod_timer(&b->hangcheck, wait_timeout()); -} - -static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) +static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)  {  	struct intel_engine_cs *engine =  		container_of(b, struct intel_engine_cs, breadcrumbs); -	struct drm_i915_private *i915 = engine->i915; -	bool enabled;  	lockdep_assert_held(&b->irq_lock);  	if (b->irq_armed) -		return false; +		return;  	/*  	 * The breadcrumb irq will be disarmed on the interrupt after the @@ -303,548 +218,130 @@ static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)  	 * the driver is idle) we disarm the breadcrumbs.  	 */ -	/* No interrupts? Kick the waiter every jiffie! */ -	enabled = false; -	if (!b->irq_enabled++ && -	    !test_bit(engine->id, &i915->gpu_error.test_irq_rings)) { +	if (!b->irq_enabled++)  		irq_enable(engine); -		enabled = true; -	} - -	enable_fake_irq(b); -	return enabled; -} - -static inline struct intel_wait *to_wait(struct rb_node *node) -{ -	return rb_entry(node, struct intel_wait, node);  } -static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b, -					      struct intel_wait *wait) -{ -	lockdep_assert_held(&b->rb_lock); -	GEM_BUG_ON(b->irq_wait == wait); - -	/* -	 * This request is completed, so remove it from the tree, mark it as -	 * complete, and *then* wake up the associated task. N.B. when the -	 * task wakes up, it will find the empty rb_node, discern that it -	 * has already been removed from the tree and skip the serialisation -	 * of the b->rb_lock and b->irq_lock. This means that the destruction -	 * of the intel_wait is not serialised with the interrupt handler -	 * by the waiter - it must instead be serialised by the caller. -	 */ -	rb_erase(&wait->node, &b->waiters); -	RB_CLEAR_NODE(&wait->node); - -	if (wait->tsk->state != TASK_RUNNING) -		wake_up_process(wait->tsk); /* implicit smp_wmb() */ -} - -static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine, -					    struct rb_node *next) +void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)  {  	struct intel_breadcrumbs *b = &engine->breadcrumbs; -	spin_lock(&b->irq_lock); -	GEM_BUG_ON(!b->irq_armed); -	GEM_BUG_ON(!b->irq_wait); -	b->irq_wait = to_wait(next); -	spin_unlock(&b->irq_lock); - -	/* We always wake up the next waiter that takes over as the bottom-half -	 * as we may delegate not only the irq-seqno barrier to the next waiter -	 * but also the task of waking up concurrent waiters. -	 */ -	if (next) -		wake_up_process(to_wait(next)->tsk); -} - -static bool __intel_engine_add_wait(struct intel_engine_cs *engine, -				    struct intel_wait *wait) -{ -	struct intel_breadcrumbs *b = &engine->breadcrumbs; -	struct rb_node **p, *parent, *completed; -	bool first, armed; -	u32 seqno; - -	GEM_BUG_ON(!wait->seqno); - -	/* Insert the request into the retirement ordered list -	 * of waiters by walking the rbtree. If we are the oldest -	 * seqno in the tree (the first to be retired), then -	 * set ourselves as the bottom-half. -	 * -	 * As we descend the tree, prune completed branches since we hold the -	 * spinlock we know that the first_waiter must be delayed and can -	 * reduce some of the sequential wake up latency if we take action -	 * ourselves and wake up the completed tasks in parallel. Also, by -	 * removing stale elements in the tree, we may be able to reduce the -	 * ping-pong between the old bottom-half and ourselves as first-waiter. -	 */ -	armed = false; -	first = true; -	parent = NULL; -	completed = NULL; -	seqno = intel_engine_get_seqno(engine); - -	 /* If the request completed before we managed to grab the spinlock, -	  * return now before adding ourselves to the rbtree. We let the -	  * current bottom-half handle any pending wakeups and instead -	  * try and get out of the way quickly. -	  */ -	if (i915_seqno_passed(seqno, wait->seqno)) { -		RB_CLEAR_NODE(&wait->node); -		return first; -	} - -	p = &b->waiters.rb_node; -	while (*p) { -		parent = *p; -		if (wait->seqno == to_wait(parent)->seqno) { -			/* We have multiple waiters on the same seqno, select -			 * the highest priority task (that with the smallest -			 * task->prio) to serve as the bottom-half for this -			 * group. -			 */ -			if (wait->tsk->prio > to_wait(parent)->tsk->prio) { -				p = &parent->rb_right; -				first = false; -			} else { -				p = &parent->rb_left; -			} -		} else if (i915_seqno_passed(wait->seqno, -					     to_wait(parent)->seqno)) { -			p = &parent->rb_right; -			if (i915_seqno_passed(seqno, to_wait(parent)->seqno)) -				completed = parent; -			else -				first = false; -		} else { -			p = &parent->rb_left; -		} -	} -	rb_link_node(&wait->node, parent, p); -	rb_insert_color(&wait->node, &b->waiters); - -	if (first) { -		spin_lock(&b->irq_lock); -		b->irq_wait = wait; -		/* After assigning ourselves as the new bottom-half, we must -		 * perform a cursory check to prevent a missed interrupt. -		 * Either we miss the interrupt whilst programming the hardware, -		 * or if there was a previous waiter (for a later seqno) they -		 * may be woken instead of us (due to the inherent race -		 * in the unlocked read of b->irq_seqno_bh in the irq handler) -		 * and so we miss the wake up. -		 */ -		armed = __intel_breadcrumbs_enable_irq(b); -		spin_unlock(&b->irq_lock); -	} - -	if (completed) { -		/* Advance the bottom-half (b->irq_wait) before we wake up -		 * the waiters who may scribble over their intel_wait -		 * just as the interrupt handler is dereferencing it via -		 * b->irq_wait. -		 */ -		if (!first) { -			struct rb_node *next = rb_next(completed); -			GEM_BUG_ON(next == &wait->node); -			__intel_breadcrumbs_next(engine, next); -		} - -		do { -			struct intel_wait *crumb = to_wait(completed); -			completed = rb_prev(completed); -			__intel_breadcrumbs_finish(b, crumb); -		} while (completed); -	} - -	GEM_BUG_ON(!b->irq_wait); -	GEM_BUG_ON(!b->irq_armed); -	GEM_BUG_ON(rb_first(&b->waiters) != &b->irq_wait->node); +	spin_lock_init(&b->irq_lock); +	INIT_LIST_HEAD(&b->signalers); -	return armed; +	init_irq_work(&b->irq_work, signal_irq_work);  } -bool intel_engine_add_wait(struct intel_engine_cs *engine, -			   struct intel_wait *wait) +void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)  {  	struct intel_breadcrumbs *b = &engine->breadcrumbs; -	bool armed; - -	spin_lock_irq(&b->rb_lock); -	armed = __intel_engine_add_wait(engine, wait); -	spin_unlock_irq(&b->rb_lock); -	if (armed) -		return armed; - -	/* Make the caller recheck if its request has already started. */ -	return intel_engine_has_started(engine, wait->seqno); -} +	unsigned long flags; -static inline bool chain_wakeup(struct rb_node *rb, int priority) -{ -	return rb && to_wait(rb)->tsk->prio <= priority; -} +	spin_lock_irqsave(&b->irq_lock, flags); -static inline int wakeup_priority(struct intel_breadcrumbs *b, -				  struct task_struct *tsk) -{ -	if (tsk == b->signaler) -		return INT_MIN; +	if (b->irq_enabled) +		irq_enable(engine);  	else -		return tsk->prio; -} - -static void __intel_engine_remove_wait(struct intel_engine_cs *engine, -				       struct intel_wait *wait) -{ -	struct intel_breadcrumbs *b = &engine->breadcrumbs; - -	lockdep_assert_held(&b->rb_lock); - -	if (RB_EMPTY_NODE(&wait->node)) -		goto out; - -	if (b->irq_wait == wait) { -		const int priority = wakeup_priority(b, wait->tsk); -		struct rb_node *next; - -		/* We are the current bottom-half. Find the next candidate, -		 * the first waiter in the queue on the remaining oldest -		 * request. As multiple seqnos may complete in the time it -		 * takes us to wake up and find the next waiter, we have to -		 * wake up that waiter for it to perform its own coherent -		 * completion check. -		 */ -		next = rb_next(&wait->node); -		if (chain_wakeup(next, priority)) { -			/* If the next waiter is already complete, -			 * wake it up and continue onto the next waiter. So -			 * if have a small herd, they will wake up in parallel -			 * rather than sequentially, which should reduce -			 * the overall latency in waking all the completed -			 * clients. -			 * -			 * However, waking up a chain adds extra latency to -			 * the first_waiter. This is undesirable if that -			 * waiter is a high priority task. -			 */ -			u32 seqno = intel_engine_get_seqno(engine); - -			while (i915_seqno_passed(seqno, to_wait(next)->seqno)) { -				struct rb_node *n = rb_next(next); - -				__intel_breadcrumbs_finish(b, to_wait(next)); -				next = n; -				if (!chain_wakeup(next, priority)) -					break; -			} -		} - -		__intel_breadcrumbs_next(engine, next); -	} else { -		GEM_BUG_ON(rb_first(&b->waiters) == &wait->node); -	} - -	GEM_BUG_ON(RB_EMPTY_NODE(&wait->node)); -	rb_erase(&wait->node, &b->waiters); -	RB_CLEAR_NODE(&wait->node); +		irq_disable(engine); -out: -	GEM_BUG_ON(b->irq_wait == wait); -	GEM_BUG_ON(rb_first(&b->waiters) != -		   (b->irq_wait ? &b->irq_wait->node : NULL)); +	spin_unlock_irqrestore(&b->irq_lock, flags);  } -void intel_engine_remove_wait(struct intel_engine_cs *engine, -			      struct intel_wait *wait) +void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)  { -	struct intel_breadcrumbs *b = &engine->breadcrumbs; - -	/* Quick check to see if this waiter was already decoupled from -	 * the tree by the bottom-half to avoid contention on the spinlock -	 * by the herd. -	 */ -	if (RB_EMPTY_NODE(&wait->node)) { -		GEM_BUG_ON(READ_ONCE(b->irq_wait) == wait); -		return; -	} - -	spin_lock_irq(&b->rb_lock); -	__intel_engine_remove_wait(engine, wait); -	spin_unlock_irq(&b->rb_lock);  } -static void signaler_set_rtpriority(void) +bool i915_request_enable_breadcrumb(struct i915_request *rq)  { -	 struct sched_param param = { .sched_priority = 1 }; - -	 sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m); -} +	struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; -static int intel_breadcrumbs_signaler(void *arg) -{ -	struct intel_engine_cs *engine = arg; -	struct intel_breadcrumbs *b = &engine->breadcrumbs; -	struct i915_request *rq, *n; +	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)); -	/* Install ourselves with high priority to reduce signalling latency */ -	signaler_set_rtpriority(); +	if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) +		return true; -	do { -		bool do_schedule = true; -		LIST_HEAD(list); -		u32 seqno; +	spin_lock(&b->irq_lock); +	if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags) && +	    !__request_completed(rq)) { +		struct intel_context *ce = rq->hw_context; +		struct list_head *pos; -		set_current_state(TASK_INTERRUPTIBLE); -		if (list_empty(&b->signals)) -			goto sleep; +		__intel_breadcrumbs_arm_irq(b);  		/* -		 * We are either woken up by the interrupt bottom-half, -		 * or by a client adding a new signaller. In both cases, -		 * the GPU seqno may have advanced beyond our oldest signal. -		 * If it has, propagate the signal, remove the waiter and -		 * check again with the next oldest signal. Otherwise we -		 * need to wait for a new interrupt from the GPU or for -		 * a new client. +		 * We keep the seqno in retirement order, so we can break +		 * inside intel_engine_breadcrumbs_irq as soon as we've passed +		 * the last completed request (or seen a request that hasn't +		 * event started). We could iterate the timeline->requests list, +		 * but keeping a separate signalers_list has the advantage of +		 * hopefully being much smaller than the full list and so +		 * provides faster iteration and detection when there are no +		 * more interrupts required for this context. +		 * +		 * We typically expect to add new signalers in order, so we +		 * start looking for our insertion point from the tail of +		 * the list.  		 */ -		seqno = intel_engine_get_seqno(engine); - -		spin_lock_irq(&b->rb_lock); -		list_for_each_entry_safe(rq, n, &b->signals, signaling.link) { -			u32 this = rq->signaling.wait.seqno; - -			GEM_BUG_ON(!rq->signaling.wait.seqno); +		list_for_each_prev(pos, &ce->signals) { +			struct i915_request *it = +				list_entry(pos, typeof(*it), signal_link); -			if (!i915_seqno_passed(seqno, this)) +			if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno))  				break; - -			if (likely(this == i915_request_global_seqno(rq))) { -				__intel_engine_remove_wait(engine, -							   &rq->signaling.wait); - -				rq->signaling.wait.seqno = 0; -				__list_del_entry(&rq->signaling.link); - -				if (!i915_request_signaled(rq)) { -					list_add_tail(&rq->signaling.link, -						      &list); -					i915_request_get(rq); -				} -			}  		} -		spin_unlock_irq(&b->rb_lock); - -		if (!list_empty(&list)) { -			local_bh_disable(); -			list_for_each_entry_safe(rq, n, &list, signaling.link) { -				dma_fence_signal(&rq->fence); -				GEM_BUG_ON(!i915_request_completed(rq)); -				i915_request_put(rq); -			} -			local_bh_enable(); /* kick start the tasklets */ - -			/* -			 * If the engine is saturated we may be continually -			 * processing completed requests. This angers the -			 * NMI watchdog if we never let anything else -			 * have access to the CPU. Let's pretend to be nice -			 * and relinquish the CPU if we burn through the -			 * entire RT timeslice! -			 */ -			do_schedule = need_resched(); -		} - -		if (unlikely(do_schedule)) { -sleep: -			if (kthread_should_park()) -				kthread_parkme(); +		list_add(&rq->signal_link, pos); +		if (pos == &ce->signals) /* catch transitions from empty list */ +			list_move_tail(&ce->signal_link, &b->signalers); -			if (unlikely(kthread_should_stop())) -				break; - -			schedule(); -		} -	} while (1); -	__set_current_state(TASK_RUNNING); - -	return 0; -} - -static void insert_signal(struct intel_breadcrumbs *b, -			  struct i915_request *request, -			  const u32 seqno) -{ -	struct i915_request *iter; - -	lockdep_assert_held(&b->rb_lock); - -	/* -	 * A reasonable assumption is that we are called to add signals -	 * in sequence, as the requests are submitted for execution and -	 * assigned a global_seqno. This will be the case for the majority -	 * of internally generated signals (inter-engine signaling). -	 * -	 * Out of order waiters triggering random signaling enabling will -	 * be more problematic, but hopefully rare enough and the list -	 * small enough that the O(N) insertion sort is not an issue. -	 */ - -	list_for_each_entry_reverse(iter, &b->signals, signaling.link) -		if (i915_seqno_passed(seqno, iter->signaling.wait.seqno)) -			break; - -	list_add(&request->signaling.link, &iter->signaling.link); -} - -bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup) -{ -	struct intel_engine_cs *engine = request->engine; -	struct intel_breadcrumbs *b = &engine->breadcrumbs; -	struct intel_wait *wait = &request->signaling.wait; -	u32 seqno; - -	/* -	 * Note that we may be called from an interrupt handler on another -	 * device (e.g. nouveau signaling a fence completion causing us -	 * to submit a request, and so enable signaling). As such, -	 * we need to make sure that all other users of b->rb_lock protect -	 * against interrupts, i.e. use spin_lock_irqsave. -	 */ - -	/* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */ -	GEM_BUG_ON(!irqs_disabled()); -	lockdep_assert_held(&request->lock); - -	seqno = i915_request_global_seqno(request); -	if (!seqno) /* will be enabled later upon execution */ -		return true; - -	GEM_BUG_ON(wait->seqno); -	wait->tsk = b->signaler; -	wait->request = request; -	wait->seqno = seqno; - -	/* -	 * Add ourselves into the list of waiters, but registering our -	 * bottom-half as the signaller thread. As per usual, only the oldest -	 * waiter (not just signaller) is tasked as the bottom-half waking -	 * up all completed waiters after the user interrupt. -	 * -	 * If we are the oldest waiter, enable the irq (after which we -	 * must double check that the seqno did not complete). -	 */ -	spin_lock(&b->rb_lock); -	insert_signal(b, request, seqno); -	wakeup &= __intel_engine_add_wait(engine, wait); -	spin_unlock(&b->rb_lock); - -	if (wakeup) { -		wake_up_process(b->signaler); -		return !intel_wait_complete(wait); +		set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);  	} +	spin_unlock(&b->irq_lock); -	return true; +	return !__request_completed(rq);  } -void intel_engine_cancel_signaling(struct i915_request *request) +void i915_request_cancel_breadcrumb(struct i915_request *rq)  { -	struct intel_engine_cs *engine = request->engine; -	struct intel_breadcrumbs *b = &engine->breadcrumbs; - -	GEM_BUG_ON(!irqs_disabled()); -	lockdep_assert_held(&request->lock); +	struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; -	if (!READ_ONCE(request->signaling.wait.seqno)) +	if (!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))  		return; -	spin_lock(&b->rb_lock); -	__intel_engine_remove_wait(engine, &request->signaling.wait); -	if (fetch_and_zero(&request->signaling.wait.seqno)) -		__list_del_entry(&request->signaling.link); -	spin_unlock(&b->rb_lock); -} - -int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) -{ -	struct intel_breadcrumbs *b = &engine->breadcrumbs; -	struct task_struct *tsk; - -	spin_lock_init(&b->rb_lock); -	spin_lock_init(&b->irq_lock); - -	timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0); -	timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0); - -	INIT_LIST_HEAD(&b->signals); - -	/* Spawn a thread to provide a common bottom-half for all signals. -	 * As this is an asynchronous interface we cannot steal the current -	 * task for handling the bottom-half to the user interrupt, therefore -	 * we create a thread to do the coherent seqno dance after the -	 * interrupt and then signal the waitqueue (via the dma-buf/fence). -	 */ -	tsk = kthread_run(intel_breadcrumbs_signaler, engine, -			  "i915/signal:%d", engine->id); -	if (IS_ERR(tsk)) -		return PTR_ERR(tsk); - -	b->signaler = tsk; - -	return 0; -} - -static void cancel_fake_irq(struct intel_engine_cs *engine) -{ -	struct intel_breadcrumbs *b = &engine->breadcrumbs; - -	del_timer_sync(&b->fake_irq); /* may queue b->hangcheck */ -	del_timer_sync(&b->hangcheck); -	clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); -} - -void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) -{ -	struct intel_breadcrumbs *b = &engine->breadcrumbs; -	unsigned long flags; - -	spin_lock_irqsave(&b->irq_lock, flags); +	spin_lock(&b->irq_lock); +	if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) { +		struct intel_context *ce = rq->hw_context; -	/* -	 * Leave the fake_irq timer enabled (if it is running), but clear the -	 * bit so that it turns itself off on its next wake up and goes back -	 * to the long hangcheck interval if still required. -	 */ -	clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); +		list_del(&rq->signal_link); +		if (list_empty(&ce->signals)) +			list_del_init(&ce->signal_link); -	if (b->irq_enabled) -		irq_enable(engine); -	else -		irq_disable(engine); - -	spin_unlock_irqrestore(&b->irq_lock, flags); +		clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); +	} +	spin_unlock(&b->irq_lock);  } -void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) +void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, +				    struct drm_printer *p)  {  	struct intel_breadcrumbs *b = &engine->breadcrumbs; +	struct intel_context *ce; +	struct i915_request *rq; -	/* The engines should be idle and all requests accounted for! */ -	WARN_ON(READ_ONCE(b->irq_wait)); -	WARN_ON(!RB_EMPTY_ROOT(&b->waiters)); -	WARN_ON(!list_empty(&b->signals)); +	if (list_empty(&b->signalers)) +		return; -	if (!IS_ERR_OR_NULL(b->signaler)) -		kthread_stop(b->signaler); +	drm_printf(p, "Signals:\n"); -	cancel_fake_irq(engine); +	spin_lock_irq(&b->irq_lock); +	list_for_each_entry(ce, &b->signalers, signal_link) { +		list_for_each_entry(rq, &ce->signals, signal_link) { +			drm_printf(p, "\t[%llx:%llx%s] @ %dms\n", +				   rq->fence.context, rq->fence.seqno, +				   i915_request_completed(rq) ? "!" : +				   i915_request_started(rq) ? "*" : +				   "", +				   jiffies_to_msecs(jiffies - rq->emitted_jiffies)); +		} +	} +	spin_unlock_irq(&b->irq_lock);  } - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/intel_breadcrumbs.c" -#endif diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index bc7589656a8f..4b0044cdcf1a 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -605,48 +605,48 @@ void intel_color_load_luts(struct intel_crtc_state *crtc_state)  	dev_priv->display.load_luts(crtc_state);  } +static int check_lut_size(const struct drm_property_blob *lut, int expected) +{ +	int len; + +	if (!lut) +		return 0; + +	len = drm_color_lut_size(lut); +	if (len != expected) { +		DRM_DEBUG_KMS("Invalid LUT size; got %d, expected %d\n", +			      len, expected); +		return -EINVAL; +	} + +	return 0; +} +  int intel_color_check(struct intel_crtc_state *crtc_state)  {  	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); -	size_t gamma_length, degamma_length; -	uint32_t tests = DRM_COLOR_LUT_NON_DECREASING; +	int gamma_length, degamma_length; +	u32 gamma_tests, degamma_tests;  	degamma_length = INTEL_INFO(dev_priv)->color.degamma_lut_size;  	gamma_length = INTEL_INFO(dev_priv)->color.gamma_lut_size; +	degamma_tests = INTEL_INFO(dev_priv)->color.degamma_lut_tests; +	gamma_tests = INTEL_INFO(dev_priv)->color.gamma_lut_tests; -	/* -	 * All of our platforms mandate that the degamma curve be -	 * non-decreasing.  Additionally, GLK and gen11 only accept a single -	 * value for red, green, and blue in the degamma table.  Make sure -	 * userspace didn't try to pass us something we can't handle. -	 * -	 * We don't have any extra hardware constraints on the gamma table, -	 * so no need to explicitly check it. -	 */ -	if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10) -		tests |= DRM_COLOR_LUT_EQUAL_CHANNELS; +	/* Always allow legacy gamma LUT with no further checking. */ +	if (crtc_state_is_legacy_gamma(crtc_state)) +		return 0; -	if (drm_color_lut_check(crtc_state->base.degamma_lut, tests) != 0) +	if (check_lut_size(crtc_state->base.degamma_lut, degamma_length) || +	    check_lut_size(crtc_state->base.gamma_lut, gamma_length))  		return -EINVAL; -	/* -	 * We allow both degamma & gamma luts at the right size or -	 * NULL. -	 */ -	if ((!crtc_state->base.degamma_lut || -	     drm_color_lut_size(crtc_state->base.degamma_lut) == degamma_length) && -	    (!crtc_state->base.gamma_lut || -	     drm_color_lut_size(crtc_state->base.gamma_lut) == gamma_length)) -		return 0; +	if (drm_color_lut_check(crtc_state->base.degamma_lut, degamma_tests) || +	    drm_color_lut_check(crtc_state->base.gamma_lut, gamma_tests)) +		return -EINVAL; -	/* -	 * We also allow no degamma lut/ctm and a gamma lut at the legacy -	 * size (256 entries). -	 */ -	if (crtc_state_is_legacy_gamma(crtc_state)) -		return 0; -	return -EINVAL; +	return 0;  }  void intel_color_init(struct intel_crtc *crtc) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index acd94354afc8..ca705546a0ab 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -995,7 +995,7 @@ static u32 hsw_pll_to_ddi_pll_sel(const struct intel_shared_dpll *pll)  	}  } -static u32 icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder, +static u32 icl_pll_to_ddi_clk_sel(struct intel_encoder *encoder,  				  const struct intel_crtc_state *crtc_state)  {  	const struct intel_shared_dpll *pll = crtc_state->shared_dpll; @@ -1004,10 +1004,11 @@ static u32 icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder,  	switch (id) {  	default: +		/* +		 * DPLL_ID_ICL_DPLL0 and DPLL_ID_ICL_DPLL1 should not be used +		 * here, so do warn if this get passed in +		 */  		MISSING_CASE(id); -		/* fall through */ -	case DPLL_ID_ICL_DPLL0: -	case DPLL_ID_ICL_DPLL1:  		return DDI_CLK_SEL_NONE;  	case DPLL_ID_ICL_TBTPLL:  		switch (clock) { @@ -1021,7 +1022,7 @@ static u32 icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder,  			return DDI_CLK_SEL_TBT_810;  		default:  			MISSING_CASE(clock); -			break; +			return DDI_CLK_SEL_NONE;  		}  	case DPLL_ID_ICL_MGPLL1:  	case DPLL_ID_ICL_MGPLL2: @@ -1391,16 +1392,17 @@ static int icl_calc_tbt_pll_link(struct drm_i915_private *dev_priv,  static int icl_calc_mg_pll_link(struct drm_i915_private *dev_priv,  				enum port port)  { +	enum tc_port tc_port = intel_port_to_tc(dev_priv, port);  	u32 mg_pll_div0, mg_clktop_hsclkctl;  	u32 m1, m2_int, m2_frac, div1, div2, refclk;  	u64 tmp;  	refclk = dev_priv->cdclk.hw.ref; -	mg_pll_div0 = I915_READ(MG_PLL_DIV0(port)); -	mg_clktop_hsclkctl = I915_READ(MG_CLKTOP2_HSCLKCTL(port)); +	mg_pll_div0 = I915_READ(MG_PLL_DIV0(tc_port)); +	mg_clktop_hsclkctl = I915_READ(MG_CLKTOP2_HSCLKCTL(tc_port)); -	m1 = I915_READ(MG_PLL_DIV1(port)) & MG_PLL_DIV1_FBPREDIV_MASK; +	m1 = I915_READ(MG_PLL_DIV1(tc_port)) & MG_PLL_DIV1_FBPREDIV_MASK;  	m2_int = mg_pll_div0 & MG_PLL_DIV0_FBDIV_INT_MASK;  	m2_frac = (mg_pll_div0 & MG_PLL_DIV0_FRACNEN_H) ?  		  (mg_pll_div0 & MG_PLL_DIV0_FBDIV_FRAC_MASK) >> @@ -2868,7 +2870,7 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder,  	if (IS_ICELAKE(dev_priv)) {  		if (!intel_port_is_combophy(dev_priv, port))  			I915_WRITE(DDI_CLK_SEL(port), -				   icl_pll_to_ddi_pll_sel(encoder, crtc_state)); +				   icl_pll_to_ddi_clk_sel(encoder, crtc_state));  	} else if (IS_CANNONLAKE(dev_priv)) {  		/* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */  		val = I915_READ(DPCLKA_CFGCR0); diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 957c6527f76b..7bf09cef591a 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -189,6 +189,8 @@ struct intel_device_info {  	struct color_luts {  		u16 degamma_lut_size;  		u16 gamma_lut_size; +		u32 degamma_lut_tests; +		u32 gamma_lut_tests;  	} color;  }; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f6f0d78436e3..df7a7a310f2f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1758,6 +1758,35 @@ enum pipe intel_crtc_pch_transcoder(struct intel_crtc *crtc)  		return crtc->pipe;  } +static u32 intel_crtc_max_vblank_count(const struct intel_crtc_state *crtc_state) +{ +	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + +	/* +	 * On i965gm the hardware frame counter reads +	 * zero when the TV encoder is enabled :( +	 */ +	if (IS_I965GM(dev_priv) && +	    (crtc_state->output_types & BIT(INTEL_OUTPUT_TVOUT))) +		return 0; + +	if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) +		return 0xffffffff; /* full 32 bit counter */ +	else if (INTEL_GEN(dev_priv) >= 3) +		return 0xffffff; /* only 24 bits of frame count */ +	else +		return 0; /* Gen2 doesn't have a hardware frame counter */ +} + +static void intel_crtc_vblank_on(const struct intel_crtc_state *crtc_state) +{ +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + +	drm_crtc_set_max_vblank_count(&crtc->base, +				      intel_crtc_max_vblank_count(crtc_state)); +	drm_crtc_vblank_on(&crtc->base); +} +  static void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state)  {  	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); @@ -1810,7 +1839,7 @@ static void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state)  	 * when it's derived from the timestamps. So let's wait for the  	 * pipe to start properly before we call drm_crtc_vblank_on()  	 */ -	if (dev_priv->drm.max_vblank_count == 0) +	if (intel_crtc_max_vblank_count(new_crtc_state) == 0)  		intel_wait_for_pipe_scanline_moving(crtc);  } @@ -3901,6 +3930,16 @@ static void intel_update_pipe_config(const struct intel_crtc_state *old_crtc_sta  		else if (old_crtc_state->pch_pfit.enabled)  			ironlake_pfit_disable(old_crtc_state);  	} + +	/* +	 * We don't (yet) allow userspace to control the pipe background color, +	 * so force it to black, but apply pipe gamma and CSC so that its +	 * handling will match how we program our planes. +	 */ +	if (INTEL_GEN(dev_priv) >= 9) +		I915_WRITE(SKL_BOTTOM_COLOR(crtc->pipe), +			   SKL_BOTTOM_COLOR_GAMMA_ENABLE | +			   SKL_BOTTOM_COLOR_CSC_ENABLE);  }  static void intel_fdi_normal_train(struct intel_crtc *crtc) @@ -5678,7 +5717,7 @@ static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config,  		ironlake_pch_enable(old_intel_state, pipe_config);  	assert_vblank_disabled(crtc); -	drm_crtc_vblank_on(crtc); +	intel_crtc_vblank_on(pipe_config);  	intel_encoders_enable(crtc, pipe_config, old_state); @@ -5832,7 +5871,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config,  		intel_ddi_set_vc_payload_alloc(pipe_config, true);  	assert_vblank_disabled(crtc); -	drm_crtc_vblank_on(crtc); +	intel_crtc_vblank_on(pipe_config);  	intel_encoders_enable(crtc, pipe_config, old_state); @@ -6171,7 +6210,7 @@ static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config,  	intel_enable_pipe(pipe_config);  	assert_vblank_disabled(crtc); -	drm_crtc_vblank_on(crtc); +	intel_crtc_vblank_on(pipe_config);  	intel_encoders_enable(crtc, pipe_config, old_state);  } @@ -6230,7 +6269,7 @@ static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config,  	intel_enable_pipe(pipe_config);  	assert_vblank_disabled(crtc); -	drm_crtc_vblank_on(crtc); +	intel_crtc_vblank_on(pipe_config);  	intel_encoders_enable(crtc, pipe_config, old_state);  } @@ -9416,7 +9455,7 @@ static void icelake_get_ddi_pll(struct drm_i915_private *dev_priv,  		if (WARN_ON(!intel_dpll_is_combophy(id)))  			return;  	} else if (intel_port_is_tc(dev_priv, port)) { -		id = icl_port_to_mg_pll_id(port); +		id = icl_tc_port_to_pll_id(intel_port_to_tc(dev_priv, port));  	} else {  		WARN(1, "Invalid port %x\n", port);  		return; @@ -11690,6 +11729,23 @@ pipe_config_err(bool adjust, const char *name, const char *format, ...)  	va_end(args);  } +static bool fastboot_enabled(struct drm_i915_private *dev_priv) +{ +	if (i915_modparams.fastboot != -1) +		return i915_modparams.fastboot; + +	/* Enable fastboot by default on Skylake and newer */ +	if (INTEL_GEN(dev_priv) >= 9) +		return true; + +	/* Enable fastboot by default on VLV and CHV */ +	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) +		return true; + +	/* Disabled by default on all others */ +	return false; +} +  static bool  intel_pipe_config_compare(struct drm_i915_private *dev_priv,  			  struct intel_crtc_state *current_config, @@ -11701,7 +11757,7 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv,  		(current_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED) &&  		!(pipe_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED); -	if (fixup_inherited && !i915_modparams.fastboot) { +	if (fixup_inherited && !fastboot_enabled(dev_priv)) {  		DRM_DEBUG_KMS("initial modeset and fastboot not set\n");  		ret = false;  	} @@ -12778,8 +12834,9 @@ static int intel_atomic_prepare_commit(struct drm_device *dev,  u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc)  {  	struct drm_device *dev = crtc->base.dev; +	struct drm_vblank_crtc *vblank = &dev->vblank[drm_crtc_index(&crtc->base)]; -	if (!dev->max_vblank_count) +	if (!vblank->max_vblank_count)  		return (u32)drm_crtc_accurate_vblank_count(&crtc->base);  	return dev->driver->get_vblank_counter(dev, crtc->pipe); @@ -14327,8 +14384,10 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv)  		/*  		 * On some ICL SKUs port F is not present. No strap bits for  		 * this, so rely on VBT. +		 * Work around broken VBTs on SKUs known to have no port F.  		 */ -		if (intel_bios_is_port_present(dev_priv, PORT_F)) +		if (IS_ICL_WITH_PORT_F(dev_priv) && +		    intel_bios_is_port_present(dev_priv, PORT_F))  			intel_ddi_init(dev_priv, PORT_F);  		icl_dsi_init(dev_priv); @@ -14680,14 +14739,6 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,  	drm_helper_mode_fill_fb_struct(&dev_priv->drm, fb, mode_cmd); -	if (fb->format->format == DRM_FORMAT_NV12 && -	    (fb->width < SKL_MIN_YUV_420_SRC_W || -	     fb->height < SKL_MIN_YUV_420_SRC_H || -	     (fb->width % 4) != 0 || (fb->height % 4) != 0)) { -		DRM_DEBUG_KMS("src dimensions not correct for NV12\n"); -		goto err; -	} -  	for (i = 0; i < fb->format->num_planes; i++) {  		u32 stride_alignment; @@ -15457,6 +15508,15 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc,  			    plane->base.type != DRM_PLANE_TYPE_PRIMARY)  				intel_plane_disable_noatomic(crtc, plane);  		} + +		/* +		 * Disable any background color set by the BIOS, but enable the +		 * gamma and CSC to match how we program our planes. +		 */ +		if (INTEL_GEN(dev_priv) >= 9) +			I915_WRITE(SKL_BOTTOM_COLOR(crtc->pipe), +				   SKL_BOTTOM_COLOR_GAMMA_ENABLE | +				   SKL_BOTTOM_COLOR_CSC_ENABLE);  	}  	/* Adjust the state of the output pipe according to whether we @@ -15493,16 +15553,45 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc,  	}  } +static bool has_bogus_dpll_config(const struct intel_crtc_state *crtc_state) +{ +	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + +	/* +	 * Some SNB BIOSen (eg. ASUS K53SV) are known to misprogram +	 * the hardware when a high res displays plugged in. DPLL P +	 * divider is zero, and the pipe timings are bonkers. We'll +	 * try to disable everything in that case. +	 * +	 * FIXME would be nice to be able to sanitize this state +	 * without several WARNs, but for now let's take the easy +	 * road. +	 */ +	return IS_GEN(dev_priv, 6) && +		crtc_state->base.active && +		crtc_state->shared_dpll && +		crtc_state->port_clock == 0; +} +  static void intel_sanitize_encoder(struct intel_encoder *encoder)  {  	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);  	struct intel_connector *connector; +	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); +	struct intel_crtc_state *crtc_state = crtc ? +		to_intel_crtc_state(crtc->base.state) : NULL;  	/* We need to check both for a crtc link (meaning that the  	 * encoder is active and trying to read from a pipe) and the  	 * pipe itself being active. */ -	bool has_active_crtc = encoder->base.crtc && -		to_intel_crtc(encoder->base.crtc)->active; +	bool has_active_crtc = crtc_state && +		crtc_state->base.active; + +	if (crtc_state && has_bogus_dpll_config(crtc_state)) { +		DRM_DEBUG_KMS("BIOS has misprogrammed the hardware. Disabling pipe %c\n", +			      pipe_name(crtc->pipe)); +		has_active_crtc = false; +	}  	connector = intel_encoder_find_connector(encoder);  	if (connector && !has_active_crtc) { @@ -15513,16 +15602,25 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder)  		/* Connector is active, but has no active pipe. This is  		 * fallout from our resume register restoring. Disable  		 * the encoder manually again. */ -		if (encoder->base.crtc) { -			struct drm_crtc_state *crtc_state = encoder->base.crtc->state; +		if (crtc_state) { +			struct drm_encoder *best_encoder;  			DRM_DEBUG_KMS("[ENCODER:%d:%s] manually disabled\n",  				      encoder->base.base.id,  				      encoder->base.name); + +			/* avoid oopsing in case the hooks consult best_encoder */ +			best_encoder = connector->base.state->best_encoder; +			connector->base.state->best_encoder = &encoder->base; +  			if (encoder->disable) -				encoder->disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state); +				encoder->disable(encoder, crtc_state, +						 connector->base.state);  			if (encoder->post_disable) -				encoder->post_disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state); +				encoder->post_disable(encoder, crtc_state, +						      connector->base.state); + +			connector->base.state->best_encoder = best_encoder;  		}  		encoder->base.crtc = NULL; @@ -15894,10 +15992,12 @@ intel_modeset_setup_hw_state(struct drm_device *dev,  	 * waits, so we need vblank interrupts restored beforehand.  	 */  	for_each_intel_crtc(&dev_priv->drm, crtc) { +		crtc_state = to_intel_crtc_state(crtc->base.state); +  		drm_crtc_vblank_reset(&crtc->base); -		if (crtc->base.state->active) -			drm_crtc_vblank_on(&crtc->base); +		if (crtc_state->base.active) +			intel_crtc_vblank_on(crtc_state);  	}  	intel_sanitize_plane_mapping(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index 606f54dde086..0a42d11c4c33 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -247,7 +247,7 @@ intel_find_shared_dpll(struct intel_crtc *crtc,  		       enum intel_dpll_id range_max)  {  	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); -	struct intel_shared_dpll *pll; +	struct intel_shared_dpll *pll, *unused_pll = NULL;  	struct intel_shared_dpll_state *shared_dpll;  	enum intel_dpll_id i; @@ -257,8 +257,11 @@ intel_find_shared_dpll(struct intel_crtc *crtc,  		pll = &dev_priv->shared_dplls[i];  		/* Only want to check enabled timings first */ -		if (shared_dpll[i].crtc_mask == 0) +		if (shared_dpll[i].crtc_mask == 0) { +			if (!unused_pll) +				unused_pll = pll;  			continue; +		}  		if (memcmp(&crtc_state->dpll_hw_state,  			   &shared_dpll[i].hw_state, @@ -273,14 +276,11 @@ intel_find_shared_dpll(struct intel_crtc *crtc,  	}  	/* Ok no matching timings, maybe there's a free one? */ -	for (i = range_min; i <= range_max; i++) { -		pll = &dev_priv->shared_dplls[i]; -		if (shared_dpll[i].crtc_mask == 0) { -			DRM_DEBUG_KMS("[CRTC:%d:%s] allocated %s\n", -				      crtc->base.base.id, crtc->base.name, -				      pll->info->name); -			return pll; -		} +	if (unused_pll) { +		DRM_DEBUG_KMS("[CRTC:%d:%s] allocated %s\n", +			      crtc->base.base.id, crtc->base.name, +			      unused_pll->info->name); +		return unused_pll;  	}  	return NULL; @@ -2639,14 +2639,14 @@ int icl_calc_dp_combo_pll_link(struct drm_i915_private *dev_priv,  	return link_clock;  } -static enum port icl_mg_pll_id_to_port(enum intel_dpll_id id) +static enum tc_port icl_pll_id_to_tc_port(enum intel_dpll_id id)  { -	return id - DPLL_ID_ICL_MGPLL1 + PORT_C; +	return id - DPLL_ID_ICL_MGPLL1;  } -enum intel_dpll_id icl_port_to_mg_pll_id(enum port port) +enum intel_dpll_id icl_tc_port_to_pll_id(enum tc_port tc_port)  { -	return port - PORT_C + DPLL_ID_ICL_MGPLL1; +	return tc_port + DPLL_ID_ICL_MGPLL1;  }  bool intel_dpll_is_combophy(enum intel_dpll_id id) @@ -2925,7 +2925,10 @@ icl_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state,  			ret = icl_calc_dpll_state(crtc_state, encoder, clock,  						  &pll_state);  		} else { -			min = icl_port_to_mg_pll_id(port); +			enum tc_port tc_port; + +			tc_port = intel_port_to_tc(dev_priv, port); +			min = icl_tc_port_to_pll_id(tc_port);  			max = min;  			ret = icl_calc_mg_pll_state(crtc_state, encoder, clock,  						    &pll_state); @@ -2959,12 +2962,8 @@ static i915_reg_t icl_pll_id_to_enable_reg(enum intel_dpll_id id)  		return CNL_DPLL_ENABLE(id);  	else if (id == DPLL_ID_ICL_TBTPLL)  		return TBT_PLL_ENABLE; -	else -		/* -		 * TODO: Make MG_PLL macros use -		 * tc port id instead of port id -		 */ -		return MG_PLL_ENABLE(icl_mg_pll_id_to_port(id)); + +	return MG_PLL_ENABLE(icl_pll_id_to_tc_port(id));  }  static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, @@ -2974,7 +2973,6 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv,  	const enum intel_dpll_id id = pll->info->id;  	intel_wakeref_t wakeref;  	bool ret = false; -	enum port port;  	u32 val;  	wakeref = intel_display_power_get_if_enabled(dev_priv, @@ -2991,32 +2989,33 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv,  		hw_state->cfgcr0 = I915_READ(ICL_DPLL_CFGCR0(id));  		hw_state->cfgcr1 = I915_READ(ICL_DPLL_CFGCR1(id));  	} else { -		port = icl_mg_pll_id_to_port(id); -		hw_state->mg_refclkin_ctl = I915_READ(MG_REFCLKIN_CTL(port)); +		enum tc_port tc_port = icl_pll_id_to_tc_port(id); + +		hw_state->mg_refclkin_ctl = I915_READ(MG_REFCLKIN_CTL(tc_port));  		hw_state->mg_refclkin_ctl &= MG_REFCLKIN_CTL_OD_2_MUX_MASK;  		hw_state->mg_clktop2_coreclkctl1 = -			I915_READ(MG_CLKTOP2_CORECLKCTL1(port)); +			I915_READ(MG_CLKTOP2_CORECLKCTL1(tc_port));  		hw_state->mg_clktop2_coreclkctl1 &=  			MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK;  		hw_state->mg_clktop2_hsclkctl = -			I915_READ(MG_CLKTOP2_HSCLKCTL(port)); +			I915_READ(MG_CLKTOP2_HSCLKCTL(tc_port));  		hw_state->mg_clktop2_hsclkctl &=  			MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK |  			MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK |  			MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK |  			MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK; -		hw_state->mg_pll_div0 = I915_READ(MG_PLL_DIV0(port)); -		hw_state->mg_pll_div1 = I915_READ(MG_PLL_DIV1(port)); -		hw_state->mg_pll_lf = I915_READ(MG_PLL_LF(port)); -		hw_state->mg_pll_frac_lock = I915_READ(MG_PLL_FRAC_LOCK(port)); -		hw_state->mg_pll_ssc = I915_READ(MG_PLL_SSC(port)); +		hw_state->mg_pll_div0 = I915_READ(MG_PLL_DIV0(tc_port)); +		hw_state->mg_pll_div1 = I915_READ(MG_PLL_DIV1(tc_port)); +		hw_state->mg_pll_lf = I915_READ(MG_PLL_LF(tc_port)); +		hw_state->mg_pll_frac_lock = I915_READ(MG_PLL_FRAC_LOCK(tc_port)); +		hw_state->mg_pll_ssc = I915_READ(MG_PLL_SSC(tc_port)); -		hw_state->mg_pll_bias = I915_READ(MG_PLL_BIAS(port)); +		hw_state->mg_pll_bias = I915_READ(MG_PLL_BIAS(tc_port));  		hw_state->mg_pll_tdc_coldst_bias = -			I915_READ(MG_PLL_TDC_COLDST_BIAS(port)); +			I915_READ(MG_PLL_TDC_COLDST_BIAS(tc_port));  		if (dev_priv->cdclk.hw.ref == 38400) {  			hw_state->mg_pll_tdc_coldst_bias_mask = MG_PLL_TDC_COLDST_COLDSTART; @@ -3051,7 +3050,7 @@ static void icl_mg_pll_write(struct drm_i915_private *dev_priv,  			     struct intel_shared_dpll *pll)  {  	struct intel_dpll_hw_state *hw_state = &pll->state.hw_state; -	enum port port = icl_mg_pll_id_to_port(pll->info->id); +	enum tc_port tc_port = icl_pll_id_to_tc_port(pll->info->id);  	u32 val;  	/* @@ -3060,41 +3059,41 @@ static void icl_mg_pll_write(struct drm_i915_private *dev_priv,  	 * during the calc/readout phase if the mask depends on some other HW  	 * state like refclk, see icl_calc_mg_pll_state().  	 */ -	val = I915_READ(MG_REFCLKIN_CTL(port)); +	val = I915_READ(MG_REFCLKIN_CTL(tc_port));  	val &= ~MG_REFCLKIN_CTL_OD_2_MUX_MASK;  	val |= hw_state->mg_refclkin_ctl; -	I915_WRITE(MG_REFCLKIN_CTL(port), val); +	I915_WRITE(MG_REFCLKIN_CTL(tc_port), val); -	val = I915_READ(MG_CLKTOP2_CORECLKCTL1(port)); +	val = I915_READ(MG_CLKTOP2_CORECLKCTL1(tc_port));  	val &= ~MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK;  	val |= hw_state->mg_clktop2_coreclkctl1; -	I915_WRITE(MG_CLKTOP2_CORECLKCTL1(port), val); +	I915_WRITE(MG_CLKTOP2_CORECLKCTL1(tc_port), val); -	val = I915_READ(MG_CLKTOP2_HSCLKCTL(port)); +	val = I915_READ(MG_CLKTOP2_HSCLKCTL(tc_port));  	val &= ~(MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK |  		 MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK |  		 MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK |  		 MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK);  	val |= hw_state->mg_clktop2_hsclkctl; -	I915_WRITE(MG_CLKTOP2_HSCLKCTL(port), val); +	I915_WRITE(MG_CLKTOP2_HSCLKCTL(tc_port), val); -	I915_WRITE(MG_PLL_DIV0(port), hw_state->mg_pll_div0); -	I915_WRITE(MG_PLL_DIV1(port), hw_state->mg_pll_div1); -	I915_WRITE(MG_PLL_LF(port), hw_state->mg_pll_lf); -	I915_WRITE(MG_PLL_FRAC_LOCK(port), hw_state->mg_pll_frac_lock); -	I915_WRITE(MG_PLL_SSC(port), hw_state->mg_pll_ssc); +	I915_WRITE(MG_PLL_DIV0(tc_port), hw_state->mg_pll_div0); +	I915_WRITE(MG_PLL_DIV1(tc_port), hw_state->mg_pll_div1); +	I915_WRITE(MG_PLL_LF(tc_port), hw_state->mg_pll_lf); +	I915_WRITE(MG_PLL_FRAC_LOCK(tc_port), hw_state->mg_pll_frac_lock); +	I915_WRITE(MG_PLL_SSC(tc_port), hw_state->mg_pll_ssc); -	val = I915_READ(MG_PLL_BIAS(port)); +	val = I915_READ(MG_PLL_BIAS(tc_port));  	val &= ~hw_state->mg_pll_bias_mask;  	val |= hw_state->mg_pll_bias; -	I915_WRITE(MG_PLL_BIAS(port), val); +	I915_WRITE(MG_PLL_BIAS(tc_port), val); -	val = I915_READ(MG_PLL_TDC_COLDST_BIAS(port)); +	val = I915_READ(MG_PLL_TDC_COLDST_BIAS(tc_port));  	val &= ~hw_state->mg_pll_tdc_coldst_bias_mask;  	val |= hw_state->mg_pll_tdc_coldst_bias; -	I915_WRITE(MG_PLL_TDC_COLDST_BIAS(port), val); +	I915_WRITE(MG_PLL_TDC_COLDST_BIAS(tc_port), val); -	POSTING_READ(MG_PLL_TDC_COLDST_BIAS(port)); +	POSTING_READ(MG_PLL_TDC_COLDST_BIAS(tc_port));  }  static void icl_pll_enable(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.h b/drivers/gpu/drm/i915/intel_dpll_mgr.h index e96e79413b54..40e8391a92f2 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.h +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.h @@ -344,7 +344,7 @@ void intel_dpll_dump_hw_state(struct drm_i915_private *dev_priv,  int icl_calc_dp_combo_pll_link(struct drm_i915_private *dev_priv,  			       u32 pll_id);  int cnl_hdmi_pll_ref_clock(struct drm_i915_private *dev_priv); -enum intel_dpll_id icl_port_to_mg_pll_id(enum port port); +enum intel_dpll_id icl_tc_port_to_pll_id(enum tc_port tc_port);  bool intel_dpll_is_combophy(enum intel_dpll_id id);  #endif /* _INTEL_DPLL_MGR_H_ */ diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 85b913ea6e80..90ba5436370e 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -630,9 +630,11 @@ struct intel_crtc_scaler_state {  };  /* drm_mode->private_flags */ -#define I915_MODE_FLAG_INHERITED 1 +#define I915_MODE_FLAG_INHERITED (1<<0)  /* Flag to get scanline using frame time stamps */  #define I915_MODE_FLAG_GET_SCANLINE_FROM_TIMESTAMP (1<<1) +/* Flag to use the scanline counter instead of the pixel counter */ +#define I915_MODE_FLAG_USE_SCANLINE_COUNTER (1<<2)  struct intel_pipe_wm {  	struct intel_wm_level wm[5]; diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 2f3c71f6d313..71c01eb13af1 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -458,12 +458,6 @@ cleanup:  void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno)  {  	intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); - -	/* After manually advancing the seqno, fake the interrupt in case -	 * there are any waiters for that seqno. -	 */ -	intel_engine_wakeup(engine); -  	GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno);  } @@ -480,53 +474,67 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)  	GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists)));  	GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); -	execlists->queue_priority = INT_MIN; +	execlists->queue_priority_hint = INT_MIN;  	execlists->queue = RB_ROOT_CACHED;  } -/** - * intel_engines_setup_common - setup engine state not requiring hw access - * @engine: Engine to setup. - * - * Initializes @engine@ structure members shared between legacy and execlists - * submission modes which do not require hardware access. - * - * Typically done early in the submission mode specific engine setup stage. - */ -void intel_engine_setup_common(struct intel_engine_cs *engine) -{ -	i915_timeline_init(engine->i915, &engine->timeline, engine->name); -	i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE); - -	intel_engine_init_execlist(engine); -	intel_engine_init_hangcheck(engine); -	intel_engine_init_batch_pool(engine); -	intel_engine_init_cmd_parser(engine); -} -  static void cleanup_status_page(struct intel_engine_cs *engine)  { +	struct i915_vma *vma; +  	/* Prevent writes into HWSP after returning the page to the system */  	intel_engine_set_hwsp_writemask(engine, ~0u); -	if (HWS_NEEDS_PHYSICAL(engine->i915)) { -		void *addr = fetch_and_zero(&engine->status_page.page_addr); +	vma = fetch_and_zero(&engine->status_page.vma); +	if (!vma) +		return; -		__free_page(virt_to_page(addr)); -	} +	if (!HWS_NEEDS_PHYSICAL(engine->i915)) +		i915_vma_unpin(vma); + +	i915_gem_object_unpin_map(vma->obj); +	__i915_gem_object_release_unless_active(vma->obj); +} + +static int pin_ggtt_status_page(struct intel_engine_cs *engine, +				struct i915_vma *vma) +{ +	unsigned int flags; + +	flags = PIN_GLOBAL; +	if (!HAS_LLC(engine->i915)) +		/* +		 * On g33, we cannot place HWS above 256MiB, so +		 * restrict its pinning to the low mappable arena. +		 * Though this restriction is not documented for +		 * gen4, gen5, or byt, they also behave similarly +		 * and hang if the HWS is placed at the top of the +		 * GTT. To generalise, it appears that all !llc +		 * platforms have issues with us placing the HWS +		 * above the mappable region (even though we never +		 * actually map it). +		 */ +		flags |= PIN_MAPPABLE; +	else +		flags |= PIN_HIGH; -	i915_vma_unpin_and_release(&engine->status_page.vma, -				   I915_VMA_RELEASE_MAP); +	return i915_vma_pin(vma, 0, 0, flags);  }  static int init_status_page(struct intel_engine_cs *engine)  {  	struct drm_i915_gem_object *obj;  	struct i915_vma *vma; -	unsigned int flags;  	void *vaddr;  	int ret; +	/* +	 * Though the HWS register does support 36bit addresses, historically +	 * we have had hangs and corruption reported due to wild writes if +	 * the HWS is placed above 4G. We only allow objects to be allocated +	 * in GFP_DMA32 for i965, and no earlier physical address users had +	 * access to more than 4G. +	 */  	obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);  	if (IS_ERR(obj)) {  		DRM_ERROR("Failed to allocate status page\n"); @@ -543,59 +551,67 @@ static int init_status_page(struct intel_engine_cs *engine)  		goto err;  	} -	flags = PIN_GLOBAL; -	if (!HAS_LLC(engine->i915)) -		/* On g33, we cannot place HWS above 256MiB, so -		 * restrict its pinning to the low mappable arena. -		 * Though this restriction is not documented for -		 * gen4, gen5, or byt, they also behave similarly -		 * and hang if the HWS is placed at the top of the -		 * GTT. To generalise, it appears that all !llc -		 * platforms have issues with us placing the HWS -		 * above the mappable region (even though we never -		 * actually map it). -		 */ -		flags |= PIN_MAPPABLE; -	else -		flags |= PIN_HIGH; -	ret = i915_vma_pin(vma, 0, 0, flags); -	if (ret) -		goto err; -  	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);  	if (IS_ERR(vaddr)) {  		ret = PTR_ERR(vaddr); -		goto err_unpin; +		goto err;  	} +	engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE);  	engine->status_page.vma = vma; -	engine->status_page.ggtt_offset = i915_ggtt_offset(vma); -	engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE); + +	if (!HWS_NEEDS_PHYSICAL(engine->i915)) { +		ret = pin_ggtt_status_page(engine, vma); +		if (ret) +			goto err_unpin; +	} +  	return 0;  err_unpin: -	i915_vma_unpin(vma); +	i915_gem_object_unpin_map(obj);  err:  	i915_gem_object_put(obj);  	return ret;  } -static int init_phys_status_page(struct intel_engine_cs *engine) +/** + * intel_engines_setup_common - setup engine state not requiring hw access + * @engine: Engine to setup. + * + * Initializes @engine@ structure members shared between legacy and execlists + * submission modes which do not require hardware access. + * + * Typically done early in the submission mode specific engine setup stage. + */ +int intel_engine_setup_common(struct intel_engine_cs *engine)  { -	struct page *page; +	int err; -	/* -	 * Though the HWS register does support 36bit addresses, historically -	 * we have had hangs and corruption reported due to wild writes if -	 * the HWS is placed above 4G. -	 */ -	page = alloc_page(GFP_KERNEL | __GFP_DMA32 | __GFP_ZERO); -	if (!page) -		return -ENOMEM; +	err = init_status_page(engine); +	if (err) +		return err; + +	err = i915_timeline_init(engine->i915, +				 &engine->timeline, +				 engine->name, +				 engine->status_page.vma); +	if (err) +		goto err_hwsp; + +	i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE); -	engine->status_page.page_addr = page_address(page); +	intel_engine_init_breadcrumbs(engine); +	intel_engine_init_execlist(engine); +	intel_engine_init_hangcheck(engine); +	intel_engine_init_batch_pool(engine); +	intel_engine_init_cmd_parser(engine);  	return 0; + +err_hwsp: +	cleanup_status_page(engine); +	return err;  }  static void __intel_context_unpin(struct i915_gem_context *ctx, @@ -604,6 +620,56 @@ static void __intel_context_unpin(struct i915_gem_context *ctx,  	intel_context_unpin(to_intel_context(ctx, engine));  } +struct measure_breadcrumb { +	struct i915_request rq; +	struct i915_timeline timeline; +	struct intel_ring ring; +	u32 cs[1024]; +}; + +static int measure_breadcrumb_dw(struct intel_engine_cs *engine) +{ +	struct measure_breadcrumb *frame; +	int dw = -ENOMEM; + +	GEM_BUG_ON(!engine->i915->gt.scratch); + +	frame = kzalloc(sizeof(*frame), GFP_KERNEL); +	if (!frame) +		return -ENOMEM; + +	if (i915_timeline_init(engine->i915, +			       &frame->timeline, "measure", +			       engine->status_page.vma)) +		goto out_frame; + +	INIT_LIST_HEAD(&frame->ring.request_list); +	frame->ring.timeline = &frame->timeline; +	frame->ring.vaddr = frame->cs; +	frame->ring.size = sizeof(frame->cs); +	frame->ring.effective_size = frame->ring.size; +	intel_ring_update_space(&frame->ring); + +	frame->rq.i915 = engine->i915; +	frame->rq.engine = engine; +	frame->rq.ring = &frame->ring; +	frame->rq.timeline = &frame->timeline; + +	dw = i915_timeline_pin(&frame->timeline); +	if (dw < 0) +		goto out_timeline; + +	dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs; + +	i915_timeline_unpin(&frame->timeline); + +out_timeline: +	i915_timeline_fini(&frame->timeline); +out_frame: +	kfree(frame); +	return dw; +} +  /**   * intel_engines_init_common - initialize cengine state which might require hw access   * @engine: Engine to initialize. @@ -646,21 +712,14 @@ int intel_engine_init_common(struct intel_engine_cs *engine)  		}  	} -	ret = intel_engine_init_breadcrumbs(engine); -	if (ret) +	ret = measure_breadcrumb_dw(engine); +	if (ret < 0)  		goto err_unpin_preempt; -	if (HWS_NEEDS_PHYSICAL(i915)) -		ret = init_phys_status_page(engine); -	else -		ret = init_status_page(engine); -	if (ret) -		goto err_breadcrumbs; +	engine->emit_fini_breadcrumb_dw = ret;  	return 0; -err_breadcrumbs: -	intel_engine_fini_breadcrumbs(engine);  err_unpin_preempt:  	if (i915->preempt_context)  		__intel_context_unpin(i915->preempt_context, engine); @@ -1071,10 +1130,8 @@ void intel_engines_sanitize(struct drm_i915_private *i915, bool force)  	if (!reset_engines(i915) && !force)  		return; -	for_each_engine(engine, i915, id) { -		if (engine->reset.reset) -			engine->reset.reset(engine, NULL); -	} +	for_each_engine(engine, i915, id) +		intel_engine_reset(engine, false);  }  /** @@ -1110,7 +1167,7 @@ void intel_engines_park(struct drm_i915_private *i915)  		}  		/* Must be reset upon idling, or we may miss the busy wakeup. */ -		GEM_BUG_ON(engine->execlists.queue_priority != INT_MIN); +		GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);  		if (engine->park)  			engine->park(engine); @@ -1226,10 +1283,14 @@ static void print_request(struct drm_printer *m,  	x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf)); -	drm_printf(m, "%s%x%s [%llx:%llx]%s @ %dms: %s\n", +	drm_printf(m, "%s%x%s%s [%llx:%llx]%s @ %dms: %s\n",  		   prefix,  		   rq->global_seqno, -		   i915_request_completed(rq) ? "!" : "", +		   i915_request_completed(rq) ? "!" : +		   i915_request_started(rq) ? "*" : +		   "", +		   test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, +			    &rq->fence.flags) ?  "+" : "",  		   rq->fence.context, rq->fence.seqno,  		   buf,  		   jiffies_to_msecs(jiffies - rq->emitted_jiffies), @@ -1320,7 +1381,8 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,  	}  	if (HAS_EXECLISTS(dev_priv)) { -		const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; +		const u32 *hws = +			&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];  		unsigned int idx;  		u8 read, write; @@ -1363,9 +1425,10 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,  				char hdr[80];  				snprintf(hdr, sizeof(hdr), -					 "\t\tELSP[%d] count=%d, ring->start=%08x, rq: ", +					 "\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x}, rq: ",  					 idx, count, -					 i915_ggtt_offset(rq->ring->vma)); +					 i915_ggtt_offset(rq->ring->vma), +					 rq->timeline->hwsp_offset);  				print_request(m, rq, hdr);  			} else {  				drm_printf(m, "\t\tELSP[%d] idle\n", idx); @@ -1420,12 +1483,9 @@ void intel_engine_dump(struct intel_engine_cs *engine,  		       struct drm_printer *m,  		       const char *header, ...)  { -	struct intel_breadcrumbs * const b = &engine->breadcrumbs;  	struct i915_gpu_error * const error = &engine->i915->gpu_error;  	struct i915_request *rq;  	intel_wakeref_t wakeref; -	unsigned long flags; -	struct rb_node *rb;  	if (header) {  		va_list ap; @@ -1475,6 +1535,8 @@ void intel_engine_dump(struct intel_engine_cs *engine,  			   rq->ring->emit);  		drm_printf(m, "\t\tring->space:  0x%08x\n",  			   rq->ring->space); +		drm_printf(m, "\t\tring->hwsp:   0x%08x\n", +			   rq->timeline->hwsp_offset);  		print_request_ring(m, rq);  	} @@ -1491,21 +1553,12 @@ void intel_engine_dump(struct intel_engine_cs *engine,  	intel_execlists_show_requests(engine, m, print_request, 8); -	spin_lock_irqsave(&b->rb_lock, flags); -	for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { -		struct intel_wait *w = rb_entry(rb, typeof(*w), node); - -		drm_printf(m, "\t%s [%d:%c] waiting for %x\n", -			   w->tsk->comm, w->tsk->pid, -			   task_state_to_char(w->tsk), -			   w->seqno); -	} -	spin_unlock_irqrestore(&b->rb_lock, flags); -  	drm_printf(m, "HWSP:\n"); -	hexdump(m, engine->status_page.page_addr, PAGE_SIZE); +	hexdump(m, engine->status_page.addr, PAGE_SIZE);  	drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine))); + +	intel_engine_print_breadcrumbs(engine, m);  }  static u8 user_class_map[] = { diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/intel_gpu_commands.h index 105e2a9e874a..b96a31bc1080 100644 --- a/drivers/gpu/drm/i915/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/intel_gpu_commands.h @@ -112,7 +112,6 @@  #define   MI_MEM_VIRTUAL	(1 << 22) /* 945,g33,965 */  #define   MI_USE_GGTT		(1 << 22) /* g4x+ */  #define MI_STORE_DWORD_INDEX	MI_INSTR(0x21, 1) -#define   MI_STORE_DWORD_INDEX_SHIFT 2  /*   * Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM:   * - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index ab1c49b106f2..8bc8aa54aa35 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -81,6 +81,12 @@   *   */ +static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine) +{ +	return (i915_ggtt_offset(engine->status_page.vma) + +		I915_GEM_HWS_PREEMPT_ADDR); +} +  static inline struct i915_priolist *to_priolist(struct rb_node *rb)  {  	return rb_entry(rb, struct i915_priolist, node); @@ -623,6 +629,8 @@ static void inject_preempt_context(struct work_struct *work)  				       EXECLISTS_ACTIVE_PREEMPT);  		tasklet_schedule(&engine->execlists.tasklet);  	} + +	(void)I915_SELFTEST_ONLY(engine->execlists.preempt_hang.count++);  }  /* @@ -666,7 +674,7 @@ static void complete_preempt_context(struct intel_engine_cs *engine)  	execlists_unwind_incomplete_requests(execlists);  	wait_for_guc_preempt_report(engine); -	intel_write_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX, 0); +	intel_write_status_page(engine, I915_GEM_HWS_PREEMPT, 0);  }  /** @@ -731,7 +739,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)  		if (intel_engine_has_preemption(engine)) {  			struct guc_preempt_work *preempt_work =  				&engine->i915->guc.preempt_work[engine->id]; -			int prio = execlists->queue_priority; +			int prio = execlists->queue_priority_hint;  			if (__execlists_need_preempt(prio, port_prio(port))) {  				execlists_set_active(execlists, @@ -777,7 +785,8 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)  			kmem_cache_free(engine->i915->priorities, p);  	}  done: -	execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN; +	execlists->queue_priority_hint = +		rb ? to_priolist(rb)->priority : INT_MIN;  	if (submit)  		port_assign(port, last);  	if (last) @@ -824,7 +833,7 @@ static void guc_submission_tasklet(unsigned long data)  	}  	if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) && -	    intel_read_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX) == +	    intel_read_status_page(engine, I915_GEM_HWS_PREEMPT) ==  	    GUC_PREEMPT_FINISHED)  		complete_preempt_context(engine); @@ -834,8 +843,7 @@ static void guc_submission_tasklet(unsigned long data)  	spin_unlock_irqrestore(&engine->timeline.lock, flags);  } -static struct i915_request * -guc_reset_prepare(struct intel_engine_cs *engine) +static void guc_reset_prepare(struct intel_engine_cs *engine)  {  	struct intel_engine_execlists * const execlists = &engine->execlists; @@ -861,8 +869,6 @@ guc_reset_prepare(struct intel_engine_cs *engine)  	 */  	if (engine->i915->guc.preempt_wq)  		flush_workqueue(engine->i915->guc.preempt_wq); - -	return i915_gem_find_active_request(engine);  }  /* diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index 741441daae32..a219c796e56d 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -25,6 +25,17 @@  #include "i915_drv.h"  #include "i915_reset.h" +struct hangcheck { +	u64 acthd; +	u32 seqno; +	enum intel_engine_hangcheck_action action; +	unsigned long action_timestamp; +	int deadlock; +	struct intel_instdone instdone; +	bool wedged:1; +	bool stalled:1; +}; +  static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone)  {  	u32 tmp = current_instdone | *old_instdone; @@ -119,25 +130,22 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)  }  static void hangcheck_load_sample(struct intel_engine_cs *engine, -				  struct intel_engine_hangcheck *hc) +				  struct hangcheck *hc)  {  	hc->acthd = intel_engine_get_active_head(engine);  	hc->seqno = intel_engine_get_seqno(engine);  }  static void hangcheck_store_sample(struct intel_engine_cs *engine, -				   const struct intel_engine_hangcheck *hc) +				   const struct hangcheck *hc)  {  	engine->hangcheck.acthd = hc->acthd;  	engine->hangcheck.seqno = hc->seqno; -	engine->hangcheck.action = hc->action; -	engine->hangcheck.stalled = hc->stalled; -	engine->hangcheck.wedged = hc->wedged;  }  static enum intel_engine_hangcheck_action  hangcheck_get_action(struct intel_engine_cs *engine, -		     const struct intel_engine_hangcheck *hc) +		     const struct hangcheck *hc)  {  	if (engine->hangcheck.seqno != hc->seqno)  		return ENGINE_ACTIVE_SEQNO; @@ -149,7 +157,7 @@ hangcheck_get_action(struct intel_engine_cs *engine,  }  static void hangcheck_accumulate_sample(struct intel_engine_cs *engine, -					struct intel_engine_hangcheck *hc) +					struct hangcheck *hc)  {  	unsigned long timeout = I915_ENGINE_DEAD_TIMEOUT; @@ -265,19 +273,21 @@ static void i915_hangcheck_elapsed(struct work_struct *work)  	intel_uncore_arm_unclaimed_mmio_detection(dev_priv);  	for_each_engine(engine, dev_priv, id) { -		struct intel_engine_hangcheck hc; +		struct hangcheck hc; + +		intel_engine_signal_breadcrumbs(engine);  		hangcheck_load_sample(engine, &hc);  		hangcheck_accumulate_sample(engine, &hc);  		hangcheck_store_sample(engine, &hc); -		if (engine->hangcheck.stalled) { +		if (hc.stalled) {  			hung |= intel_engine_flag(engine);  			if (hc.action != ENGINE_DEAD)  				stuck |= intel_engine_flag(engine);  		} -		if (engine->hangcheck.wedged) +		if (hc.wedged)  			wedged |= intel_engine_flag(engine);  	} diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 8aa8a4862543..a9eb0211ce77 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -136,6 +136,7 @@  #include <drm/i915_drm.h>  #include "i915_drv.h"  #include "i915_gem_render_state.h" +#include "i915_reset.h"  #include "i915_vgpu.h"  #include "intel_lrc_reg.h"  #include "intel_mocs.h" @@ -171,6 +172,12 @@ static void execlists_init_reg_state(u32 *reg_state,  				     struct intel_engine_cs *engine,  				     struct intel_ring *ring); +static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) +{ +	return (i915_ggtt_offset(engine->status_page.vma) + +		I915_GEM_HWS_INDEX_ADDR); +} +  static inline struct i915_priolist *to_priolist(struct rb_node *rb)  {  	return rb_entry(rb, struct i915_priolist, node); @@ -181,13 +188,90 @@ static inline int rq_prio(const struct i915_request *rq)  	return rq->sched.attr.priority;  } +static int queue_prio(const struct intel_engine_execlists *execlists) +{ +	struct i915_priolist *p; +	struct rb_node *rb; + +	rb = rb_first_cached(&execlists->queue); +	if (!rb) +		return INT_MIN; + +	/* +	 * As the priolist[] are inverted, with the highest priority in [0], +	 * we have to flip the index value to become priority. +	 */ +	p = to_priolist(rb); +	return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used); +} +  static inline bool need_preempt(const struct intel_engine_cs *engine, -				const struct i915_request *last, -				int prio) +				const struct i915_request *rq)  { -	return (intel_engine_has_preemption(engine) && -		__execlists_need_preempt(prio, rq_prio(last)) && -		!i915_request_completed(last)); +	const int last_prio = rq_prio(rq); + +	if (!intel_engine_has_preemption(engine)) +		return false; + +	if (i915_request_completed(rq)) +		return false; + +	/* +	 * Check if the current priority hint merits a preemption attempt. +	 * +	 * We record the highest value priority we saw during rescheduling +	 * prior to this dequeue, therefore we know that if it is strictly +	 * less than the current tail of ESLP[0], we do not need to force +	 * a preempt-to-idle cycle. +	 * +	 * However, the priority hint is a mere hint that we may need to +	 * preempt. If that hint is stale or we may be trying to preempt +	 * ourselves, ignore the request. +	 */ +	if (!__execlists_need_preempt(engine->execlists.queue_priority_hint, +				      last_prio)) +		return false; + +	/* +	 * Check against the first request in ELSP[1], it will, thanks to the +	 * power of PI, be the highest priority of that context. +	 */ +	if (!list_is_last(&rq->link, &engine->timeline.requests) && +	    rq_prio(list_next_entry(rq, link)) > last_prio) +		return true; + +	/* +	 * If the inflight context did not trigger the preemption, then maybe +	 * it was the set of queued requests? Pick the highest priority in +	 * the queue (the first active priolist) and see if it deserves to be +	 * running instead of ELSP[0]. +	 * +	 * The highest priority request in the queue can not be either +	 * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same +	 * context, it's priority would not exceed ELSP[0] aka last_prio. +	 */ +	return queue_prio(&engine->execlists) > last_prio; +} + +__maybe_unused static inline bool +assert_priority_queue(const struct intel_engine_execlists *execlists, +		      const struct i915_request *prev, +		      const struct i915_request *next) +{ +	if (!prev) +		return true; + +	/* +	 * Without preemption, the prev may refer to the still active element +	 * which we refuse to let go. +	 * +	 * Even with preemption, there are times when we think it is better not +	 * to preempt and leave an ostensibly lower priority request in flight. +	 */ +	if (port_request(execlists->port) == prev) +		return true; + +	return rq_prio(prev) >= rq_prio(next);  }  /* @@ -264,7 +348,8 @@ static void unwind_wa_tail(struct i915_request *rq)  	assert_ring_tail_valid(rq->ring, rq->tail);  } -static void __unwind_incomplete_requests(struct intel_engine_cs *engine) +static struct i915_request * +__unwind_incomplete_requests(struct intel_engine_cs *engine)  {  	struct i915_request *rq, *rn, *active = NULL;  	struct list_head *uninitialized_var(pl); @@ -306,6 +391,8 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)  		list_move_tail(&active->sched.link,  			       i915_sched_lookup_priolist(engine, prio));  	} + +	return active;  }  void @@ -436,11 +523,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)  			desc = execlists_update_context(rq);  			GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); -			GEM_TRACE("%s in[%d]:  ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n", +			GEM_TRACE("%s in[%d]:  ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n",  				  engine->name, n,  				  port[n].context_id, count,  				  rq->global_seqno,  				  rq->fence.context, rq->fence.seqno, +				  hwsp_seqno(rq),  				  intel_engine_get_seqno(engine),  				  rq_prio(rq));  		} else { @@ -512,6 +600,8 @@ static void inject_preempt_context(struct intel_engine_cs *engine)  	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);  	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT); + +	(void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);  }  static void complete_preempt_context(struct intel_engine_execlists *execlists) @@ -580,7 +670,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)  		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))  			return; -		if (need_preempt(engine, last, execlists->queue_priority)) { +		if (need_preempt(engine, last)) {  			inject_preempt_context(engine);  			return;  		} @@ -613,7 +703,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)  		 * WaIdleLiteRestore:bdw,skl  		 * Apply the wa NOOPs to prevent  		 * ring:HEAD == rq:TAIL as we resubmit the -		 * request. See gen8_emit_breadcrumb() for +		 * request. See gen8_emit_fini_breadcrumb() for  		 * where we prepare the padding after the  		 * end of the request.  		 */ @@ -626,8 +716,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)  		int i;  		priolist_for_each_request_consume(rq, rn, p, i) { -			GEM_BUG_ON(last && -				   need_preempt(engine, last, rq_prio(rq))); +			GEM_BUG_ON(!assert_priority_queue(execlists, last, rq));  			/*  			 * Can we combine this request with the current port? @@ -688,20 +777,20 @@ done:  	/*  	 * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.  	 * -	 * We choose queue_priority such that if we add a request of greater +	 * We choose the priority hint such that if we add a request of greater  	 * priority than this, we kick the submission tasklet to decide on  	 * the right order of submitting the requests to hardware. We must  	 * also be prepared to reorder requests as they are in-flight on the -	 * HW. We derive the queue_priority then as the first "hole" in +	 * HW. We derive the priority hint then as the first "hole" in  	 * the HW submission ports and if there are no available slots,  	 * the priority of the lowest executing request, i.e. last.  	 *  	 * When we do receive a higher priority request ready to run from the -	 * user, see queue_request(), the queue_priority is bumped to that +	 * user, see queue_request(), the priority hint is bumped to that  	 * request triggering preemption on the next dequeue (or subsequent  	 * interrupt for secondary ports).  	 */ -	execlists->queue_priority = +	execlists->queue_priority_hint =  		port != execlists->port ? rq_prio(last) : INT_MIN;  	if (submit) { @@ -732,11 +821,12 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)  	while (num_ports-- && port_isset(port)) {  		struct i915_request *rq = port_request(port); -		GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d)\n", +		GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d:%d)\n",  			  rq->engine->name,  			  (unsigned int)(port - execlists->port),  			  rq->global_seqno,  			  rq->fence.context, rq->fence.seqno, +			  hwsp_seqno(rq),  			  intel_engine_get_seqno(rq->engine));  		GEM_BUG_ON(!execlists->active); @@ -820,10 +910,10 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)  	list_for_each_entry(rq, &engine->timeline.requests, link) {  		GEM_BUG_ON(!rq->global_seqno); -		if (i915_request_signaled(rq)) -			continue; +		if (!i915_request_signaled(rq)) +			dma_fence_set_error(&rq->fence, -EIO); -		dma_fence_set_error(&rq->fence, -EIO); +		i915_request_mark_complete(rq);  	}  	/* Flush the queued requests to the timeline list (for retiring). */ @@ -833,9 +923,9 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)  		priolist_for_each_request_consume(rq, rn, p, i) {  			list_del_init(&rq->sched.link); - -			dma_fence_set_error(&rq->fence, -EIO);  			__i915_request_submit(rq); +			dma_fence_set_error(&rq->fence, -EIO); +			i915_request_mark_complete(rq);  		}  		rb_erase_cached(&p->node, &execlists->queue); @@ -849,7 +939,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)  	/* Remaining _unready_ requests will be nop'ed when submitted */ -	execlists->queue_priority = INT_MIN; +	execlists->queue_priority_hint = INT_MIN;  	execlists->queue = RB_ROOT_CACHED;  	GEM_BUG_ON(port_isset(execlists->port)); @@ -872,6 +962,8 @@ static void process_csb(struct intel_engine_cs *engine)  	const u32 * const buf = execlists->csb_status;  	u8 head, tail; +	lockdep_assert_held(&engine->timeline.lock); +  	/*  	 * Note that csb_write, csb_status may be either in HWSP or mmio.  	 * When reading from the csb_write mmio register, we have to be @@ -960,12 +1052,13 @@ static void process_csb(struct intel_engine_cs *engine)  						EXECLISTS_ACTIVE_USER));  		rq = port_unpack(port, &count); -		GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n", +		GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n",  			  engine->name,  			  port->context_id, count,  			  rq ? rq->global_seqno : 0,  			  rq ? rq->fence.context : 0,  			  rq ? rq->fence.seqno : 0, +			  rq ? hwsp_seqno(rq) : 0,  			  intel_engine_get_seqno(engine),  			  rq ? rq_prio(rq) : 0); @@ -1079,8 +1172,8 @@ static void __submit_queue_imm(struct intel_engine_cs *engine)  static void submit_queue(struct intel_engine_cs *engine, int prio)  { -	if (prio > engine->execlists.queue_priority) { -		engine->execlists.queue_priority = prio; +	if (prio > engine->execlists.queue_priority_hint) { +		engine->execlists.queue_priority_hint = prio;  		__submit_queue_imm(engine);  	}  } @@ -1173,6 +1266,24 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)  	return i915_vma_pin(vma, 0, 0, flags);  } +static u32 make_rpcs(struct drm_i915_private *dev_priv); + +static void +__execlists_update_reg_state(struct intel_engine_cs *engine, +			     struct intel_context *ce) +{ +	u32 *regs = ce->lrc_reg_state; +	struct intel_ring *ring = ce->ring; + +	regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(ring->vma); +	regs[CTX_RING_HEAD + 1] = ring->head; +	regs[CTX_RING_TAIL + 1] = ring->tail; + +	/* RPCS */ +	if (engine->class == RENDER_CLASS) +		regs[CTX_R_PWR_CLK_STATE + 1] = make_rpcs(engine->i915); +} +  static struct intel_context *  __execlists_context_pin(struct intel_engine_cs *engine,  			struct i915_gem_context *ctx, @@ -1211,10 +1322,8 @@ __execlists_context_pin(struct intel_engine_cs *engine,  	GEM_BUG_ON(!intel_ring_offset_valid(ce->ring, ce->ring->head));  	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; -	ce->lrc_reg_state[CTX_RING_BUFFER_START+1] = -		i915_ggtt_offset(ce->ring->vma); -	ce->lrc_reg_state[CTX_RING_HEAD + 1] = ce->ring->head; -	ce->lrc_reg_state[CTX_RING_TAIL + 1] = ce->ring->tail; + +	__execlists_update_reg_state(engine, ce);  	ce->state->obj->pin_global++;  	i915_gem_context_get(ctx); @@ -1254,6 +1363,34 @@ execlists_context_pin(struct intel_engine_cs *engine,  	return __execlists_context_pin(engine, ctx, ce);  } +static int gen8_emit_init_breadcrumb(struct i915_request *rq) +{ +	u32 *cs; + +	GEM_BUG_ON(!rq->timeline->has_initial_breadcrumb); + +	cs = intel_ring_begin(rq, 6); +	if (IS_ERR(cs)) +		return PTR_ERR(cs); + +	/* +	 * Check if we have been preempted before we even get started. +	 * +	 * After this point i915_request_started() reports true, even if +	 * we get preempted and so are no longer running. +	 */ +	*cs++ = MI_ARB_CHECK; +	*cs++ = MI_NOOP; + +	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; +	*cs++ = rq->timeline->hwsp_offset; +	*cs++ = 0; +	*cs++ = rq->fence.seqno - 1; + +	intel_ring_advance(rq, cs); +	return 0; +} +  static int emit_pdps(struct i915_request *rq)  {  	const struct intel_engine_cs * const engine = rq->engine; @@ -1679,7 +1816,7 @@ static void enable_execlists(struct intel_engine_cs *engine)  		   _MASKED_BIT_DISABLE(STOP_RING));  	I915_WRITE(RING_HWS_PGA(engine->mmio_base), -		   engine->status_page.ggtt_offset); +		   i915_ggtt_offset(engine->status_page.vma));  	POSTING_READ(RING_HWS_PGA(engine->mmio_base));  } @@ -1716,11 +1853,9 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)  	return 0;  } -static struct i915_request * -execlists_reset_prepare(struct intel_engine_cs *engine) +static void execlists_reset_prepare(struct intel_engine_cs *engine)  {  	struct intel_engine_execlists * const execlists = &engine->execlists; -	struct i915_request *request, *active;  	unsigned long flags;  	GEM_TRACE("%s: depth<-%d\n", engine->name, @@ -1736,59 +1871,21 @@ execlists_reset_prepare(struct intel_engine_cs *engine)  	 * prevents the race.  	 */  	__tasklet_disable_sync_once(&execlists->tasklet); +	GEM_BUG_ON(!reset_in_progress(execlists)); +	/* And flush any current direct submission. */  	spin_lock_irqsave(&engine->timeline.lock, flags); - -	/* -	 * We want to flush the pending context switches, having disabled -	 * the tasklet above, we can assume exclusive access to the execlists. -	 * For this allows us to catch up with an inflight preemption event, -	 * and avoid blaming an innocent request if the stall was due to the -	 * preemption itself. -	 */ -	process_csb(engine); - -	/* -	 * The last active request can then be no later than the last request -	 * now in ELSP[0]. So search backwards from there, so that if the GPU -	 * has advanced beyond the last CSB update, it will be pardoned. -	 */ -	active = NULL; -	request = port_request(execlists->port); -	if (request) { -		/* -		 * Prevent the breadcrumb from advancing before we decide -		 * which request is currently active. -		 */ -		intel_engine_stop_cs(engine); - -		list_for_each_entry_from_reverse(request, -						 &engine->timeline.requests, -						 link) { -			if (__i915_request_completed(request, -						     request->global_seqno)) -				break; - -			active = request; -		} -	} - +	process_csb(engine); /* drain preemption events */  	spin_unlock_irqrestore(&engine->timeline.lock, flags); - -	return active;  } -static void execlists_reset(struct intel_engine_cs *engine, -			    struct i915_request *request) +static void execlists_reset(struct intel_engine_cs *engine, bool stalled)  {  	struct intel_engine_execlists * const execlists = &engine->execlists; +	struct i915_request *rq;  	unsigned long flags;  	u32 *regs; -	GEM_TRACE("%s request global=%d, current=%d\n", -		  engine->name, request ? request->global_seqno : 0, -		  intel_engine_get_seqno(engine)); -  	spin_lock_irqsave(&engine->timeline.lock, flags);  	/* @@ -1803,12 +1900,18 @@ static void execlists_reset(struct intel_engine_cs *engine,  	execlists_cancel_port_requests(execlists);  	/* Push back any incomplete requests for replay after the reset. */ -	__unwind_incomplete_requests(engine); +	rq = __unwind_incomplete_requests(engine);  	/* Following the reset, we need to reload the CSB read/write pointers */  	reset_csb_pointers(&engine->execlists); -	spin_unlock_irqrestore(&engine->timeline.lock, flags); +	GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n", +		  engine->name, +		  rq ? rq->global_seqno : 0, +		  intel_engine_get_seqno(engine), +		  yesno(stalled)); +	if (!rq) +		goto out_unlock;  	/*  	 * If the request was innocent, we leave the request in the ELSP @@ -1821,8 +1924,9 @@ static void execlists_reset(struct intel_engine_cs *engine,  	 * and have to at least restore the RING register in the context  	 * image back to the expected values to skip over the guilty request.  	 */ -	if (!request || request->fence.error != -EIO) -		return; +	i915_reset_request(rq, stalled); +	if (!stalled) +		goto out_unlock;  	/*  	 * We want a simple context + ring to execute the breadcrumb update. @@ -1832,25 +1936,22 @@ static void execlists_reset(struct intel_engine_cs *engine,  	 * future request will be after userspace has had the opportunity  	 * to recreate its own state.  	 */ -	regs = request->hw_context->lrc_reg_state; +	regs = rq->hw_context->lrc_reg_state;  	if (engine->pinned_default_state) {  		memcpy(regs, /* skip restoring the vanilla PPHWSP */  		       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,  		       engine->context_size - PAGE_SIZE);  	} -	execlists_init_reg_state(regs, -				 request->gem_context, engine, request->ring);  	/* Move the RING_HEAD onto the breadcrumb, past the hanging batch */ -	regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(request->ring->vma); - -	request->ring->head = intel_ring_wrap(request->ring, request->postfix); -	regs[CTX_RING_HEAD + 1] = request->ring->head; +	rq->ring->head = intel_ring_wrap(rq->ring, rq->postfix); +	intel_ring_update_space(rq->ring); -	intel_ring_update_space(request->ring); +	execlists_init_reg_state(regs, rq->gem_context, engine, rq->ring); +	__execlists_update_reg_state(engine, rq->hw_context); -	/* Reset WaIdleLiteRestore:bdw,skl as well */ -	unwind_wa_tail(request); +out_unlock: +	spin_unlock_irqrestore(&engine->timeline.lock, flags);  }  static void execlists_reset_finish(struct intel_engine_cs *engine) @@ -1863,6 +1964,7 @@ static void execlists_reset_finish(struct intel_engine_cs *engine)  	 * to sleep before we restart and reload a context.  	 *  	 */ +	GEM_BUG_ON(!reset_in_progress(execlists));  	if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))  		execlists->tasklet.func(execlists->tasklet.data); @@ -2035,53 +2137,62 @@ static int gen8_emit_flush_render(struct i915_request *request,   * used as a workaround for not being allowed to do lite   * restore with HEAD==TAIL (WaIdleLiteRestore).   */ -static void gen8_emit_wa_tail(struct i915_request *request, u32 *cs) +static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)  {  	/* Ensure there's always at least one preemption point per-request. */  	*cs++ = MI_ARB_CHECK;  	*cs++ = MI_NOOP;  	request->wa_tail = intel_ring_offset(request, cs); + +	return cs;  } -static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) +static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)  {  	/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */  	BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); -	cs = gen8_emit_ggtt_write(cs, request->global_seqno, +	cs = gen8_emit_ggtt_write(cs, +				  request->fence.seqno, +				  request->timeline->hwsp_offset); + +	cs = gen8_emit_ggtt_write(cs, +				  request->global_seqno,  				  intel_hws_seqno_address(request->engine)); +  	*cs++ = MI_USER_INTERRUPT;  	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; +  	request->tail = intel_ring_offset(request, cs);  	assert_ring_tail_valid(request->ring, request->tail); -	gen8_emit_wa_tail(request, cs); +	return gen8_emit_wa_tail(request, cs);  } -static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS; -static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs) +static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)  { -	/* We're using qword write, seqno should be aligned to 8 bytes. */ -	BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1); -  	cs = gen8_emit_ggtt_write_rcs(cs, -				      request->global_seqno, -				      intel_hws_seqno_address(request->engine), +				      request->fence.seqno, +				      request->timeline->hwsp_offset,  				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |  				      PIPE_CONTROL_DEPTH_CACHE_FLUSH |  				      PIPE_CONTROL_DC_FLUSH_ENABLE |  				      PIPE_CONTROL_FLUSH_ENABLE |  				      PIPE_CONTROL_CS_STALL); +	cs = gen8_emit_ggtt_write_rcs(cs, +				      request->global_seqno, +				      intel_hws_seqno_address(request->engine), +				      PIPE_CONTROL_CS_STALL); +  	*cs++ = MI_USER_INTERRUPT;  	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;  	request->tail = intel_ring_offset(request, cs);  	assert_ring_tail_valid(request->ring, request->tail); -	gen8_emit_wa_tail(request, cs); +	return gen8_emit_wa_tail(request, cs);  } -static const int gen8_emit_breadcrumb_rcs_sz = 8 + WA_TAIL_DWORDS;  static int gen8_init_rcs_context(struct i915_request *rq)  { @@ -2173,8 +2284,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)  	engine->request_alloc = execlists_request_alloc;  	engine->emit_flush = gen8_emit_flush; -	engine->emit_breadcrumb = gen8_emit_breadcrumb; -	engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz; +	engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; +	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;  	engine->set_default_submission = intel_execlists_set_default_submission; @@ -2213,10 +2324,14 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)  	engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;  } -static void +static int  logical_ring_setup(struct intel_engine_cs *engine)  { -	intel_engine_setup_common(engine); +	int err; + +	err = intel_engine_setup_common(engine); +	if (err) +		return err;  	/* Intentionally left blank. */  	engine->buffer = NULL; @@ -2226,6 +2341,8 @@ logical_ring_setup(struct intel_engine_cs *engine)  	logical_ring_default_vfuncs(engine);  	logical_ring_default_irqs(engine); + +	return 0;  }  static int logical_ring_init(struct intel_engine_cs *engine) @@ -2260,10 +2377,10 @@ static int logical_ring_init(struct intel_engine_cs *engine)  	}  	execlists->csb_status = -		&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; +		&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];  	execlists->csb_write = -		&engine->status_page.page_addr[intel_hws_csb_write_index(i915)]; +		&engine->status_page.addr[intel_hws_csb_write_index(i915)];  	reset_csb_pointers(execlists); @@ -2274,13 +2391,14 @@ int logical_render_ring_init(struct intel_engine_cs *engine)  {  	int ret; -	logical_ring_setup(engine); +	ret = logical_ring_setup(engine); +	if (ret) +		return ret;  	/* Override some for render ring. */  	engine->init_context = gen8_init_rcs_context;  	engine->emit_flush = gen8_emit_flush_render; -	engine->emit_breadcrumb = gen8_emit_breadcrumb_rcs; -	engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_rcs_sz; +	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;  	ret = logical_ring_init(engine);  	if (ret) @@ -2304,7 +2422,11 @@ int logical_render_ring_init(struct intel_engine_cs *engine)  int logical_xcs_ring_init(struct intel_engine_cs *engine)  { -	logical_ring_setup(engine); +	int err; + +	err = logical_ring_setup(engine); +	if (err) +		return err;  	return logical_ring_init(engine);  } @@ -2534,8 +2656,7 @@ static void execlists_init_reg_state(u32 *regs,  	if (rcs) {  		regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); -		CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, -			make_rpcs(dev_priv)); +		CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);  		i915_oa_init_reg_state(engine, ctx, regs);  	} @@ -2638,7 +2759,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,  		goto error_deref_obj;  	} -	timeline = i915_timeline_create(ctx->i915, ctx->name); +	timeline = i915_timeline_create(ctx->i915, ctx->name, NULL);  	if (IS_ERR(timeline)) {  		ret = PTR_ERR(timeline);  		goto error_deref_obj; @@ -2696,12 +2817,8 @@ void intel_lr_context_resume(struct drm_i915_private *i915)  			intel_ring_reset(ce->ring, 0); -			if (ce->pin_count) { /* otherwise done in context_pin */ -				u32 *regs = ce->lrc_reg_state; - -				regs[CTX_RING_HEAD + 1] = ce->ring->head; -				regs[CTX_RING_TAIL + 1] = ce->ring->tail; -			} +			if (ce->pin_count) /* otherwise done in context_pin */ +				__execlists_update_reg_state(engine, ce);  		}  	}  } @@ -2740,7 +2857,9 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,  	last = NULL;  	count = 0; -	drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority); +	if (execlists->queue_priority_hint != INT_MIN) +		drm_printf(m, "\t\tQueue priority hint: %d\n", +			   execlists->queue_priority_hint);  	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {  		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);  		int i; diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index e976c5ce5479..331e7a678fb7 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -28,48 +28,60 @@  struct drm_i915_mocs_entry {  	u32 control_value;  	u16 l3cc_value; +	u16 used;  };  struct drm_i915_mocs_table { -	u32 size; +	unsigned int size; +	unsigned int n_entries;  	const struct drm_i915_mocs_entry *table;  };  /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ -#define LE_CACHEABILITY(value)	((value) << 0) -#define LE_TGT_CACHE(value)	((value) << 2) +#define _LE_CACHEABILITY(value)	((value) << 0) +#define _LE_TGT_CACHE(value)	((value) << 2)  #define LE_LRUM(value)		((value) << 4)  #define LE_AOM(value)		((value) << 6)  #define LE_RSC(value)		((value) << 7)  #define LE_SCC(value)		((value) << 8)  #define LE_PFM(value)		((value) << 11)  #define LE_SCF(value)		((value) << 14) +#define LE_COS(value)		((value) << 15) +#define LE_SSE(value)		((value) << 17)  /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */  #define L3_ESC(value)		((value) << 0)  #define L3_SCC(value)		((value) << 1) -#define L3_CACHEABILITY(value)	((value) << 4) +#define _L3_CACHEABILITY(value)	((value) << 4)  /* Helper defines */  #define GEN9_NUM_MOCS_ENTRIES	62  /* 62 out of 64 - 63 & 64 are reserved. */ +#define GEN11_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */  /* (e)LLC caching options */ -#define LE_PAGETABLE		0 -#define LE_UC			1 -#define LE_WT			2 -#define LE_WB			3 - -/* L3 caching options */ -#define L3_DIRECT		0 -#define L3_UC			1 -#define L3_RESERVED		2 -#define L3_WB			3 +#define LE_0_PAGETABLE		_LE_CACHEABILITY(0) +#define LE_1_UC			_LE_CACHEABILITY(1) +#define LE_2_WT			_LE_CACHEABILITY(2) +#define LE_3_WB			_LE_CACHEABILITY(3)  /* Target cache */ -#define LE_TC_PAGETABLE		0 -#define LE_TC_LLC		1 -#define LE_TC_LLC_ELLC		2 -#define LE_TC_LLC_ELLC_ALT	3 +#define LE_TC_0_PAGETABLE	_LE_TGT_CACHE(0) +#define LE_TC_1_LLC		_LE_TGT_CACHE(1) +#define LE_TC_2_LLC_ELLC	_LE_TGT_CACHE(2) +#define LE_TC_3_LLC_ELLC_ALT	_LE_TGT_CACHE(3) + +/* L3 caching options */ +#define L3_0_DIRECT		_L3_CACHEABILITY(0) +#define L3_1_UC			_L3_CACHEABILITY(1) +#define L3_2_RESERVED		_L3_CACHEABILITY(2) +#define L3_3_WB			_L3_CACHEABILITY(3) + +#define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \ +	[__idx] = { \ +		.control_value = __control_value, \ +		.l3cc_value = __l3cc_value, \ +		.used = 1, \ +	}  /*   * MOCS tables @@ -80,85 +92,147 @@ struct drm_i915_mocs_table {   * LNCFCMOCS0 - LNCFCMOCS32 registers.   *   * These tables are intended to be kept reasonably consistent across - * platforms. However some of the fields are not applicable to all of - * them. + * HW platforms, and for ICL+, be identical across OSes. To achieve + * that, for Icelake and above, list of entries is published as part + * of bspec.   *   * Entries not part of the following tables are undefined as far as   * userspace is concerned and shouldn't be relied upon.  For the time - * being they will be implicitly initialized to the strictest caching - * configuration (uncached) to guarantee forwards compatibility with - * userspace programs written against more recent kernels providing - * additional MOCS entries. + * being they will be initialized to PTE.   * - * NOTE: These tables MUST start with being uncached and the length - *       MUST be less than 63 as the last two registers are reserved - *       by the hardware.  These tables are part of the kernel ABI and - *       may only be updated incrementally by adding entries at the - *       end. + * The last two entries are reserved by the hardware. For ICL+ they + * should be initialized according to bspec and never used, for older + * platforms they should never be written to. + * + * NOTE: These tables are part of bspec and defined as part of hardware + *       interface for ICL+. For older platforms, they are part of kernel + *       ABI. It is expected that, for specific hardware platform, existing + *       entries will remain constant and the table will only be updated by + *       adding new entries, filling unused positions.   */ +#define GEN9_MOCS_ENTRIES \ +	MOCS_ENTRY(I915_MOCS_UNCACHED, \ +		   LE_1_UC | LE_TC_2_LLC_ELLC, \ +		   L3_1_UC), \ +	MOCS_ENTRY(I915_MOCS_PTE, \ +		   LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), \ +		   L3_3_WB) +  static const struct drm_i915_mocs_entry skylake_mocs_table[] = { -	[I915_MOCS_UNCACHED] = { -	  /* 0x00000009 */ -	  .control_value = LE_CACHEABILITY(LE_UC) | -			   LE_TGT_CACHE(LE_TC_LLC_ELLC) | -			   LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | -			   LE_PFM(0) | LE_SCF(0), - -	  /* 0x0010 */ -	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), -	}, -	[I915_MOCS_PTE] = { -	  /* 0x00000038 */ -	  .control_value = LE_CACHEABILITY(LE_PAGETABLE) | -			   LE_TGT_CACHE(LE_TC_LLC_ELLC) | -			   LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | -			   LE_PFM(0) | LE_SCF(0), -	  /* 0x0030 */ -	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), -	}, -	[I915_MOCS_CACHED] = { -	  /* 0x0000003b */ -	  .control_value = LE_CACHEABILITY(LE_WB) | -			   LE_TGT_CACHE(LE_TC_LLC_ELLC) | -			   LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | -			   LE_PFM(0) | LE_SCF(0), -	  /* 0x0030 */ -	  .l3cc_value =   L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), -	}, +	GEN9_MOCS_ENTRIES, +	MOCS_ENTRY(I915_MOCS_CACHED, +		   LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3), +		   L3_3_WB)  };  /* NOTE: the LE_TGT_CACHE is not used on Broxton */  static const struct drm_i915_mocs_entry broxton_mocs_table[] = { -	[I915_MOCS_UNCACHED] = { -	  /* 0x00000009 */ -	  .control_value = LE_CACHEABILITY(LE_UC) | -			   LE_TGT_CACHE(LE_TC_LLC_ELLC) | -			   LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | -			   LE_PFM(0) | LE_SCF(0), - -	  /* 0x0010 */ -	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), -	}, -	[I915_MOCS_PTE] = { -	  /* 0x00000038 */ -	  .control_value = LE_CACHEABILITY(LE_PAGETABLE) | -			   LE_TGT_CACHE(LE_TC_LLC_ELLC) | -			   LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | -			   LE_PFM(0) | LE_SCF(0), - -	  /* 0x0030 */ -	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), -	}, -	[I915_MOCS_CACHED] = { -	  /* 0x00000039 */ -	  .control_value = LE_CACHEABILITY(LE_UC) | -			   LE_TGT_CACHE(LE_TC_LLC_ELLC) | -			   LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | -			   LE_PFM(0) | LE_SCF(0), - -	  /* 0x0030 */ -	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), -	}, +	GEN9_MOCS_ENTRIES, +	MOCS_ENTRY(I915_MOCS_CACHED, +		   LE_1_UC | LE_TC_2_LLC_ELLC | LE_LRUM(3), +		   L3_3_WB) +}; + +#define GEN11_MOCS_ENTRIES \ +	/* Base - Uncached (Deprecated) */ \ +	MOCS_ENTRY(I915_MOCS_UNCACHED, \ +		   LE_1_UC | LE_TC_1_LLC, \ +		   L3_1_UC), \ +	/* Base - L3 + LeCC:PAT (Deprecated) */ \ +	MOCS_ENTRY(I915_MOCS_PTE, \ +		   LE_0_PAGETABLE | LE_TC_1_LLC, \ +		   L3_3_WB), \ +	/* Base - L3 + LLC */ \ +	MOCS_ENTRY(2, \ +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ +		   L3_3_WB), \ +	/* Base - Uncached */ \ +	MOCS_ENTRY(3, \ +		   LE_1_UC | LE_TC_1_LLC, \ +		   L3_1_UC), \ +	/* Base - L3 */ \ +	MOCS_ENTRY(4, \ +		   LE_1_UC | LE_TC_1_LLC, \ +		   L3_3_WB), \ +	/* Base - LLC */ \ +	MOCS_ENTRY(5, \ +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ +		   L3_1_UC), \ +	/* Age 0 - LLC */ \ +	MOCS_ENTRY(6, \ +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \ +		   L3_1_UC), \ +	/* Age 0 - L3 + LLC */ \ +	MOCS_ENTRY(7, \ +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \ +		   L3_3_WB), \ +	/* Age: Don't Chg. - LLC */ \ +	MOCS_ENTRY(8, \ +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \ +		   L3_1_UC), \ +	/* Age: Don't Chg. - L3 + LLC */ \ +	MOCS_ENTRY(9, \ +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \ +		   L3_3_WB), \ +	/* No AOM - LLC */ \ +	MOCS_ENTRY(10, \ +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \ +		   L3_1_UC), \ +	/* No AOM - L3 + LLC */ \ +	MOCS_ENTRY(11, \ +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \ +		   L3_3_WB), \ +	/* No AOM; Age 0 - LLC */ \ +	MOCS_ENTRY(12, \ +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \ +		   L3_1_UC), \ +	/* No AOM; Age 0 - L3 + LLC */ \ +	MOCS_ENTRY(13, \ +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \ +		   L3_3_WB), \ +	/* No AOM; Age:DC - LLC */ \ +	MOCS_ENTRY(14, \ +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \ +		   L3_1_UC), \ +	/* No AOM; Age:DC - L3 + LLC */ \ +	MOCS_ENTRY(15, \ +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \ +		   L3_3_WB), \ +	/* Self-Snoop - L3 + LLC */ \ +	MOCS_ENTRY(18, \ +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \ +		   L3_3_WB), \ +	/* Skip Caching - L3 + LLC(12.5%) */ \ +	MOCS_ENTRY(19, \ +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \ +		   L3_3_WB), \ +	/* Skip Caching - L3 + LLC(25%) */ \ +	MOCS_ENTRY(20, \ +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \ +		   L3_3_WB), \ +	/* Skip Caching - L3 + LLC(50%) */ \ +	MOCS_ENTRY(21, \ +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \ +		   L3_3_WB), \ +	/* Skip Caching - L3 + LLC(75%) */ \ +	MOCS_ENTRY(22, \ +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \ +		   L3_3_WB), \ +	/* Skip Caching - L3 + LLC(87.5%) */ \ +	MOCS_ENTRY(23, \ +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \ +		   L3_3_WB), \ +	/* HW Reserved - SW program but never use */ \ +	MOCS_ENTRY(62, \ +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ +		   L3_1_UC), \ +	/* HW Reserved - SW program but never use */ \ +	MOCS_ENTRY(63, \ +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ +		   L3_1_UC) + +static const struct drm_i915_mocs_entry icelake_mocs_table[] = { +	GEN11_MOCS_ENTRIES  };  /** @@ -178,13 +252,19 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv,  {  	bool result = false; -	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) || -	    IS_ICELAKE(dev_priv)) { +	if (IS_ICELAKE(dev_priv)) { +		table->size  = ARRAY_SIZE(icelake_mocs_table); +		table->table = icelake_mocs_table; +		table->n_entries = GEN11_NUM_MOCS_ENTRIES; +		result = true; +	} else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {  		table->size  = ARRAY_SIZE(skylake_mocs_table); +		table->n_entries = GEN9_NUM_MOCS_ENTRIES;  		table->table = skylake_mocs_table;  		result = true;  	} else if (IS_GEN9_LP(dev_priv)) {  		table->size  = ARRAY_SIZE(broxton_mocs_table); +		table->n_entries = GEN9_NUM_MOCS_ENTRIES;  		table->table = broxton_mocs_table;  		result = true;  	} else { @@ -226,6 +306,19 @@ static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index)  	}  } +/* + * Get control_value from MOCS entry taking into account when it's not used: + * I915_MOCS_PTE's value is returned in this case. + */ +static u32 get_entry_control(const struct drm_i915_mocs_table *table, +			     unsigned int index) +{ +	if (table->table[index].used) +		return table->table[index].control_value; + +	return table->table[I915_MOCS_PTE].control_value; +} +  /**   * intel_mocs_init_engine() - emit the mocs control table   * @engine:	The engine for whom to emit the registers. @@ -238,27 +331,23 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)  	struct drm_i915_private *dev_priv = engine->i915;  	struct drm_i915_mocs_table table;  	unsigned int index; +	u32 unused_value;  	if (!get_mocs_settings(dev_priv, &table))  		return; -	GEM_BUG_ON(table.size > GEN9_NUM_MOCS_ENTRIES); - -	for (index = 0; index < table.size; index++) -		I915_WRITE(mocs_register(engine->id, index), -			   table.table[index].control_value); - -	/* -	 * Ok, now set the unused entries to uncached. These entries -	 * are officially undefined and no contract for the contents -	 * and settings is given for these entries. -	 * -	 * Entry 0 in the table is uncached - so we are just writing -	 * that value to all the used entries. -	 */ -	for (; index < GEN9_NUM_MOCS_ENTRIES; index++) -		I915_WRITE(mocs_register(engine->id, index), -			   table.table[0].control_value); +	/* Set unused values to PTE */ +	unused_value = table.table[I915_MOCS_PTE].control_value; + +	for (index = 0; index < table.size; index++) { +		u32 value = get_entry_control(&table, index); + +		I915_WRITE(mocs_register(engine->id, index), value); +	} + +	/* All remaining entries are also unused */ +	for (; index < table.n_entries; index++) +		I915_WRITE(mocs_register(engine->id, index), unused_value);  }  /** @@ -276,33 +365,32 @@ static int emit_mocs_control_table(struct i915_request *rq,  {  	enum intel_engine_id engine = rq->engine->id;  	unsigned int index; +	u32 unused_value;  	u32 *cs; -	if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) +	if (GEM_WARN_ON(table->size > table->n_entries))  		return -ENODEV; -	cs = intel_ring_begin(rq, 2 + 2 * GEN9_NUM_MOCS_ENTRIES); +	/* Set unused values to PTE */ +	unused_value = table->table[I915_MOCS_PTE].control_value; + +	cs = intel_ring_begin(rq, 2 + 2 * table->n_entries);  	if (IS_ERR(cs))  		return PTR_ERR(cs); -	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES); +	*cs++ = MI_LOAD_REGISTER_IMM(table->n_entries);  	for (index = 0; index < table->size; index++) { +		u32 value = get_entry_control(table, index); +  		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); -		*cs++ = table->table[index].control_value; +		*cs++ = value;  	} -	/* -	 * Ok, now set the unused entries to uncached. These entries -	 * are officially undefined and no contract for the contents -	 * and settings is given for these entries. -	 * -	 * Entry 0 in the table is uncached - so we are just writing -	 * that value to all the used entries. -	 */ -	for (; index < GEN9_NUM_MOCS_ENTRIES; index++) { +	/* All remaining entries are also unused */ +	for (; index < table->n_entries; index++) {  		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); -		*cs++ = table->table[0].control_value; +		*cs++ = unused_value;  	}  	*cs++ = MI_NOOP; @@ -311,12 +399,24 @@ static int emit_mocs_control_table(struct i915_request *rq,  	return 0;  } +/* + * Get l3cc_value from MOCS entry taking into account when it's not used: + * I915_MOCS_PTE's value is returned in this case. + */ +static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table, +			  unsigned int index) +{ +	if (table->table[index].used) +		return table->table[index].l3cc_value; + +	return table->table[I915_MOCS_PTE].l3cc_value; +} +  static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,  			       u16 low,  			       u16 high)  { -	return table->table[low].l3cc_value | -	       table->table[high].l3cc_value << 16; +	return low | high << 16;  }  /** @@ -333,38 +433,43 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,  static int emit_mocs_l3cc_table(struct i915_request *rq,  				const struct drm_i915_mocs_table *table)  { +	u16 unused_value;  	unsigned int i;  	u32 *cs; -	if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) +	if (GEM_WARN_ON(table->size > table->n_entries))  		return -ENODEV; -	cs = intel_ring_begin(rq, 2 + GEN9_NUM_MOCS_ENTRIES); +	/* Set unused values to PTE */ +	unused_value = table->table[I915_MOCS_PTE].l3cc_value; + +	cs = intel_ring_begin(rq, 2 + table->n_entries);  	if (IS_ERR(cs))  		return PTR_ERR(cs); -	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2); +	*cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2); + +	for (i = 0; i < table->size / 2; i++) { +		u16 low = get_entry_l3cc(table, 2 * i); +		u16 high = get_entry_l3cc(table, 2 * i + 1); -	for (i = 0; i < table->size/2; i++) {  		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); -		*cs++ = l3cc_combine(table, 2 * i, 2 * i + 1); +		*cs++ = l3cc_combine(table, low, high);  	} +	/* Odd table size - 1 left over */  	if (table->size & 0x01) { -		/* Odd table size - 1 left over */ +		u16 low = get_entry_l3cc(table, 2 * i); +  		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); -		*cs++ = l3cc_combine(table, 2 * i, 0); +		*cs++ = l3cc_combine(table, low, unused_value);  		i++;  	} -	/* -	 * Now set the rest of the table to uncached - use entry 0 as -	 * this will be uncached. Leave the last pair uninitialised as -	 * they are reserved by the hardware. -	 */ -	for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) { +	/* All remaining entries are also unused */ +	for (; i < table->n_entries / 2; i++) {  		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); -		*cs++ = l3cc_combine(table, 0, 0); +		*cs++ = l3cc_combine(table, unused_value, unused_value);  	}  	*cs++ = MI_NOOP; @@ -391,26 +496,35 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv)  {  	struct drm_i915_mocs_table table;  	unsigned int i; +	u16 unused_value;  	if (!get_mocs_settings(dev_priv, &table))  		return; -	for (i = 0; i < table.size/2; i++) -		I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 2*i, 2*i+1)); +	/* Set unused values to PTE */ +	unused_value = table.table[I915_MOCS_PTE].l3cc_value; + +	for (i = 0; i < table.size / 2; i++) { +		u16 low = get_entry_l3cc(&table, 2 * i); +		u16 high = get_entry_l3cc(&table, 2 * i + 1); + +		I915_WRITE(GEN9_LNCFCMOCS(i), +			   l3cc_combine(&table, low, high)); +	}  	/* Odd table size - 1 left over */  	if (table.size & 0x01) { -		I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 2*i, 0)); +		u16 low = get_entry_l3cc(&table, 2 * i); + +		I915_WRITE(GEN9_LNCFCMOCS(i), +			   l3cc_combine(&table, low, unused_value));  		i++;  	} -	/* -	 * Now set the rest of the table to uncached - use entry 0 as -	 * this will be uncached. Leave the last pair as initialised as -	 * they are reserved by the hardware. -	 */ -	for (; i < (GEN9_NUM_MOCS_ENTRIES / 2); i++) -		I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 0, 0)); +	/* All remaining entries are also unused */ +	for (; i < table.n_entries / 2; i++) +		I915_WRITE(GEN9_LNCFCMOCS(i), +			   l3cc_combine(&table, unused_value, unused_value));  }  /** diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index c300e5787b3c..a9238fd07e30 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -480,8 +480,6 @@ void intel_overlay_reset(struct drm_i915_private *dev_priv)  	if (!overlay)  		return; -	intel_overlay_release_old_vid(overlay); -  	overlay->old_xscale = 0;  	overlay->old_yscale = 0;  	overlay->crtc = NULL; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index fdc28a3d2936..ed9786241307 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3631,14 +3631,9 @@ static u8 intel_enabled_dbuf_slices_num(struct drm_i915_private *dev_priv)   * FIXME: We still don't have the proper code detect if we need to apply the WA,   * so assume we'll always need it in order to avoid underruns.   */ -static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state) +static bool skl_needs_memory_bw_wa(struct drm_i915_private *dev_priv)  { -	struct drm_i915_private *dev_priv = to_i915(state->base.dev); - -	if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) -		return true; - -	return false; +	return IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv);  }  static bool @@ -3670,25 +3665,25 @@ intel_enable_sagv(struct drm_i915_private *dev_priv)  	if (dev_priv->sagv_status == I915_SAGV_ENABLED)  		return 0; -	DRM_DEBUG_KMS("Enabling the SAGV\n"); +	DRM_DEBUG_KMS("Enabling SAGV\n");  	mutex_lock(&dev_priv->pcu_lock);  	ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,  				      GEN9_SAGV_ENABLE); -	/* We don't need to wait for the SAGV when enabling */ +	/* We don't need to wait for SAGV when enabling */  	mutex_unlock(&dev_priv->pcu_lock);  	/*  	 * Some skl systems, pre-release machines in particular, -	 * don't actually have an SAGV. +	 * don't actually have SAGV.  	 */  	if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {  		DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");  		dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;  		return 0;  	} else if (ret < 0) { -		DRM_ERROR("Failed to enable the SAGV\n"); +		DRM_ERROR("Failed to enable SAGV\n");  		return ret;  	} @@ -3707,7 +3702,7 @@ intel_disable_sagv(struct drm_i915_private *dev_priv)  	if (dev_priv->sagv_status == I915_SAGV_DISABLED)  		return 0; -	DRM_DEBUG_KMS("Disabling the SAGV\n"); +	DRM_DEBUG_KMS("Disabling SAGV\n");  	mutex_lock(&dev_priv->pcu_lock);  	/* bspec says to keep retrying for at least 1 ms */ @@ -3719,14 +3714,14 @@ intel_disable_sagv(struct drm_i915_private *dev_priv)  	/*  	 * Some skl systems, pre-release machines in particular, -	 * don't actually have an SAGV. +	 * don't actually have SAGV.  	 */  	if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {  		DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");  		dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;  		return 0;  	} else if (ret < 0) { -		DRM_ERROR("Failed to disable the SAGV (%d)\n", ret); +		DRM_ERROR("Failed to disable SAGV (%d)\n", ret);  		return ret;  	} @@ -3757,7 +3752,7 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state)  		sagv_block_time_us = 10;  	/* -	 * SKL+ workaround: bspec recommends we disable the SAGV when we have +	 * SKL+ workaround: bspec recommends we disable SAGV when we have  	 * more then one pipe enabled  	 *  	 * If there are no active CRTCs, no additional checks need be performed @@ -3790,7 +3785,7 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state)  		latency = dev_priv->wm.skl_latency[level]; -		if (skl_needs_memory_bw_wa(intel_state) && +		if (skl_needs_memory_bw_wa(dev_priv) &&  		    plane->base.state->fb->modifier ==  		    I915_FORMAT_MOD_X_TILED)  			latency += 15; @@ -3798,7 +3793,7 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state)  		/*  		 * If any of the planes on this pipe don't enable wm levels that  		 * incur memory latencies higher than sagv_block_time_us we -		 * can't enable the SAGV. +		 * can't enable SAGV.  		 */  		if (latency < sagv_block_time_us)  			return false; @@ -3827,8 +3822,13 @@ static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv,  	/*  	 * 12GB/s is maximum BW supported by single DBuf slice. +	 * +	 * FIXME dbuf slice code is broken: +	 * - must wait for planes to stop using the slice before powering it off +	 * - plane straddling both slices is illegal in multi-pipe scenarios +	 * - should validate we stay within the hw bandwidth limits  	 */ -	if (num_active > 1 || total_data_bw >= GBps(12)) { +	if (0 && (num_active > 1 || total_data_bw >= GBps(12))) {  		ddb->enabled_slices = 2;  	} else {  		ddb->enabled_slices = 1; @@ -4371,8 +4371,8 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,  				continue;  			wm = &cstate->wm.skl.optimal.planes[plane_id]; -			blocks += wm->wm[level].plane_res_b; -			blocks += wm->uv_wm[level].plane_res_b; +			blocks += wm->wm[level].min_ddb_alloc; +			blocks += wm->uv_wm[level].min_ddb_alloc;  		}  		if (blocks < alloc_size) { @@ -4413,7 +4413,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,  		extra = min_t(u16, alloc_size,  			      DIV64_U64_ROUND_UP(alloc_size * rate,  						 total_data_rate)); -		total[plane_id] = wm->wm[level].plane_res_b + extra; +		total[plane_id] = wm->wm[level].min_ddb_alloc + extra;  		alloc_size -= extra;  		total_data_rate -= rate; @@ -4424,7 +4424,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,  		extra = min_t(u16, alloc_size,  			      DIV64_U64_ROUND_UP(alloc_size * rate,  						 total_data_rate)); -		uv_total[plane_id] = wm->uv_wm[level].plane_res_b + extra; +		uv_total[plane_id] = wm->uv_wm[level].min_ddb_alloc + extra;  		alloc_size -= extra;  		total_data_rate -= rate;  	} @@ -4477,7 +4477,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,  	 */  	for_each_plane_id_on_crtc(intel_crtc, plane_id) {  		wm = &cstate->wm.skl.optimal.planes[plane_id]; -		if (wm->trans_wm.plane_res_b > total[plane_id]) +		if (wm->trans_wm.plane_res_b >= total[plane_id])  			memset(&wm->trans_wm, 0, sizeof(wm->trans_wm));  	} @@ -4579,9 +4579,6 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate,  	const struct drm_plane_state *pstate = &intel_pstate->base;  	const struct drm_framebuffer *fb = pstate->fb;  	u32 interm_pbpl; -	struct intel_atomic_state *state = -		to_intel_atomic_state(cstate->base.state); -	bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);  	/* only NV12 format has two planes */  	if (color_plane == 1 && fb->format->format != DRM_FORMAT_NV12) { @@ -4617,7 +4614,7 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate,  							     intel_pstate);  	if (INTEL_GEN(dev_priv) >= 11 && -	    fb->modifier == I915_FORMAT_MOD_Yf_TILED && wp->cpp == 8) +	    fb->modifier == I915_FORMAT_MOD_Yf_TILED && wp->cpp == 1)  		wp->dbuf_block_size = 256;  	else  		wp->dbuf_block_size = 512; @@ -4642,7 +4639,7 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate,  		wp->y_min_scanlines = 4;  	} -	if (apply_memory_bw_wa) +	if (skl_needs_memory_bw_wa(dev_priv))  		wp->y_min_scanlines *= 2;  	wp->plane_bytes_per_line = wp->width * wp->cpp; @@ -4674,6 +4671,15 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate,  	return 0;  } +static bool skl_wm_has_lines(struct drm_i915_private *dev_priv, int level) +{ +	if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) +		return true; + +	/* The number of lines are ignored for the level 0 watermark. */ +	return level > 0; +} +  static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,  				 const struct intel_plane_state *intel_pstate,  				 int level, @@ -4686,10 +4692,10 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,  	u32 latency = dev_priv->wm.skl_latency[level];  	uint_fixed_16_16_t method1, method2;  	uint_fixed_16_16_t selected_result; -	u32 res_blocks, res_lines; -	struct intel_atomic_state *state = -		to_intel_atomic_state(cstate->base.state); -	bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); +	u32 res_blocks, res_lines, min_ddb_alloc = 0; + +	if (latency == 0) +		return;  	/* Display WA #1141: kbl,cfl */  	if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) || @@ -4697,7 +4703,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,  	    dev_priv->ipc_enabled)  		latency += 4; -	if (apply_memory_bw_wa && wp->x_tiled) +	if (skl_needs_memory_bw_wa(dev_priv) && wp->x_tiled)  		latency += 15;  	method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate, @@ -4756,8 +4762,28 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,  		}  	} -	/* The number of lines are ignored for the level 0 watermark. */ -	if (level > 0 && res_lines > 31) +	if (INTEL_GEN(dev_priv) >= 11) { +		if (wp->y_tiled) { +			int extra_lines; + +			if (res_lines % wp->y_min_scanlines == 0) +				extra_lines = wp->y_min_scanlines; +			else +				extra_lines = wp->y_min_scanlines * 2 - +					res_lines % wp->y_min_scanlines; + +			min_ddb_alloc = mul_round_up_u32_fixed16(res_lines + extra_lines, +								 wp->plane_blocks_per_line); +		} else { +			min_ddb_alloc = res_blocks + +				DIV_ROUND_UP(res_blocks, 10); +		} +	} + +	if (!skl_wm_has_lines(dev_priv, level)) +		res_lines = 0; + +	if (res_lines > 31)  		return;  	/* @@ -4768,6 +4794,8 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,  	 */  	result->plane_res_b = res_blocks;  	result->plane_res_l = res_lines; +	/* Bspec says: value >= plane ddb allocation -> invalid, hence the +1 here */ +	result->min_ddb_alloc = max(min_ddb_alloc, res_blocks) + 1;  	result->plane_en = true;  } @@ -4801,15 +4829,10 @@ skl_compute_linetime_wm(const struct intel_crtc_state *cstate)  	u32 linetime_wm;  	linetime_us = intel_get_linetime_us(cstate); - -	if (is_fixed16_zero(linetime_us)) -		return 0; -  	linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us)); -	/* Display WA #1135: bxt:ALL GLK:ALL */ -	if ((IS_BROXTON(dev_priv) || IS_GEMINILAKE(dev_priv)) && -	    dev_priv->ipc_enabled) +	/* Display WA #1135: BXT:ALL GLK:ALL */ +	if (IS_GEN9_LP(dev_priv) && dev_priv->ipc_enabled)  		linetime_wm /= 2;  	return linetime_wm; @@ -5118,6 +5141,23 @@ static bool skl_plane_wm_equals(struct drm_i915_private *dev_priv,  	return skl_wm_level_equals(&wm1->trans_wm, &wm2->trans_wm);  } +static bool skl_pipe_wm_equals(struct intel_crtc *crtc, +			       const struct skl_pipe_wm *wm1, +			       const struct skl_pipe_wm *wm2) +{ +	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); +	enum plane_id plane_id; + +	for_each_plane_id_on_crtc(crtc, plane_id) { +		if (!skl_plane_wm_equals(dev_priv, +					 &wm1->planes[plane_id], +					 &wm2->planes[plane_id])) +			return false; +	} + +	return wm1->linetime == wm2->linetime; +} +  static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a,  					   const struct skl_ddb_entry *b)  { @@ -5144,16 +5184,14 @@ static int skl_update_pipe_wm(struct intel_crtc_state *cstate,  			      struct skl_pipe_wm *pipe_wm, /* out */  			      bool *changed /* out */)  { +	struct intel_crtc *crtc = to_intel_crtc(cstate->base.crtc);  	int ret;  	ret = skl_build_pipe_wm(cstate, pipe_wm);  	if (ret)  		return ret; -	if (!memcmp(old_pipe_wm, pipe_wm, sizeof(*pipe_wm))) -		*changed = false; -	else -		*changed = true; +	*changed = !skl_pipe_wm_equals(crtc, old_pipe_wm, pipe_wm);  	return 0;  } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e39e483d8d16..b889b27f8aeb 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -33,6 +33,7 @@  #include "i915_drv.h"  #include "i915_gem_render_state.h" +#include "i915_reset.h"  #include "i915_trace.h"  #include "intel_drv.h"  #include "intel_workarounds.h" @@ -42,6 +43,12 @@   */  #define LEGACY_REQUEST_SIZE 200 +static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) +{ +	return (i915_ggtt_offset(engine->status_page.vma) + +		I915_GEM_HWS_INDEX_ADDR); +} +  static unsigned int __intel_ring_space(unsigned int head,  				       unsigned int tail,  				       unsigned int size) @@ -299,7 +306,7 @@ gen6_render_ring_flush(struct i915_request *rq, u32 mode)  	return 0;  } -static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) +static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)  {  	/* First we do the gen6_emit_post_sync_nonzero_flush w/a */  	*cs++ = GFX_OP_PIPE_CONTROL(4); @@ -319,6 +326,11 @@ static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)  		 PIPE_CONTROL_DC_FLUSH_ENABLE |  		 PIPE_CONTROL_QW_WRITE |  		 PIPE_CONTROL_CS_STALL); +	*cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT; +	*cs++ = rq->fence.seqno; + +	*cs++ = GFX_OP_PIPE_CONTROL(4); +	*cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;  	*cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT;  	*cs++ = rq->global_seqno; @@ -327,8 +339,9 @@ static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)  	rq->tail = intel_ring_offset(rq, cs);  	assert_ring_tail_valid(rq->ring, rq->tail); + +	return cs;  } -static const int gen6_rcs_emit_breadcrumb_sz = 14;  static int  gen7_render_ring_cs_stall_wa(struct i915_request *rq) @@ -409,7 +422,7 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode)  	return 0;  } -static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) +static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)  {  	*cs++ = GFX_OP_PIPE_CONTROL(4);  	*cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | @@ -419,6 +432,13 @@ static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)  		 PIPE_CONTROL_QW_WRITE |  		 PIPE_CONTROL_GLOBAL_GTT_IVB |  		 PIPE_CONTROL_CS_STALL); +	*cs++ = rq->timeline->hwsp_offset; +	*cs++ = rq->fence.seqno; + +	*cs++ = GFX_OP_PIPE_CONTROL(4); +	*cs++ = (PIPE_CONTROL_QW_WRITE | +		 PIPE_CONTROL_GLOBAL_GTT_IVB | +		 PIPE_CONTROL_CS_STALL);  	*cs++ = intel_hws_seqno_address(rq->engine);  	*cs++ = rq->global_seqno; @@ -427,34 +447,52 @@ static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)  	rq->tail = intel_ring_offset(rq, cs);  	assert_ring_tail_valid(rq->ring, rq->tail); + +	return cs;  } -static const int gen7_rcs_emit_breadcrumb_sz = 6; -static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) +static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)  { -	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW; -	*cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT; +	GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); +	GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + +	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; +	*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; +	*cs++ = rq->fence.seqno; + +	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; +	*cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT;  	*cs++ = rq->global_seqno; +  	*cs++ = MI_USER_INTERRUPT; +	*cs++ = MI_NOOP;  	rq->tail = intel_ring_offset(rq, cs);  	assert_ring_tail_valid(rq->ring, rq->tail); + +	return cs;  } -static const int gen6_xcs_emit_breadcrumb_sz = 4;  #define GEN7_XCS_WA 32 -static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) +static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)  {  	int i; -	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW; -	*cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT; +	GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); +	GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + +	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; +	*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; +	*cs++ = rq->fence.seqno; + +	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; +	*cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT;  	*cs++ = rq->global_seqno;  	for (i = 0; i < GEN7_XCS_WA; i++) {  		*cs++ = MI_STORE_DWORD_INDEX; -		*cs++ = I915_GEM_HWS_INDEX_ADDR; -		*cs++ = rq->global_seqno; +		*cs++ = I915_GEM_HWS_SEQNO_ADDR; +		*cs++ = rq->fence.seqno;  	}  	*cs++ = MI_FLUSH_DW; @@ -462,12 +500,12 @@ static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)  	*cs++ = 0;  	*cs++ = MI_USER_INTERRUPT; -	*cs++ = MI_NOOP;  	rq->tail = intel_ring_offset(rq, cs);  	assert_ring_tail_valid(rq->ring, rq->tail); + +	return cs;  } -static const int gen7_xcs_emit_breadcrumb_sz = 8 + GEN7_XCS_WA * 3;  #undef GEN7_XCS_WA  static void set_hwstam(struct intel_engine_cs *engine, u32 mask) @@ -498,12 +536,17 @@ static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys)  	I915_WRITE(HWS_PGA, addr);  } -static void ring_setup_phys_status_page(struct intel_engine_cs *engine) +static struct page *status_page(struct intel_engine_cs *engine)  { -	struct page *page = virt_to_page(engine->status_page.page_addr); -	phys_addr_t phys = PFN_PHYS(page_to_pfn(page)); +	struct drm_i915_gem_object *obj = engine->status_page.vma->obj; -	set_hws_pga(engine, phys); +	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); +	return sg_page(obj->mm.pages->sgl); +} + +static void ring_setup_phys_status_page(struct intel_engine_cs *engine) +{ +	set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine))));  	set_hwstam(engine, ~0u);  } @@ -570,7 +613,7 @@ static void flush_cs_tlb(struct intel_engine_cs *engine)  static void ring_setup_status_page(struct intel_engine_cs *engine)  { -	set_hwsp(engine, engine->status_page.ggtt_offset); +	set_hwsp(engine, i915_ggtt_offset(engine->status_page.vma));  	set_hwstam(engine, ~0u);  	flush_cs_tlb(engine); @@ -700,59 +743,87 @@ static int init_ring_common(struct intel_engine_cs *engine)  	}  	/* Papering over lost _interrupts_ immediately following the restart */ -	intel_engine_wakeup(engine); +	intel_engine_queue_breadcrumbs(engine);  out:  	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);  	return ret;  } -static struct i915_request *reset_prepare(struct intel_engine_cs *engine) +static void reset_prepare(struct intel_engine_cs *engine)  {  	intel_engine_stop_cs(engine); -	return i915_gem_find_active_request(engine);  } -static void skip_request(struct i915_request *rq) +static void reset_ring(struct intel_engine_cs *engine, bool stalled)  { -	void *vaddr = rq->ring->vaddr; +	struct i915_timeline *tl = &engine->timeline; +	struct i915_request *pos, *rq; +	unsigned long flags;  	u32 head; -	head = rq->infix; -	if (rq->postfix < head) { -		memset32(vaddr + head, MI_NOOP, -			 (rq->ring->size - head) / sizeof(u32)); -		head = 0; +	rq = NULL; +	spin_lock_irqsave(&tl->lock, flags); +	list_for_each_entry(pos, &tl->requests, link) { +		if (!i915_request_completed(pos)) { +			rq = pos; +			break; +		}  	} -	memset32(vaddr + head, MI_NOOP, (rq->postfix - head) / sizeof(u32)); -} - -static void reset_ring(struct intel_engine_cs *engine, struct i915_request *rq) -{ -	GEM_TRACE("%s request global=%d, current=%d\n", -		  engine->name, rq ? rq->global_seqno : 0, -		  intel_engine_get_seqno(engine)); +	GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n", +		  engine->name, +		  rq ? rq->global_seqno : 0, +		  intel_engine_get_seqno(engine), +		  yesno(stalled));  	/* -	 * Try to restore the logical GPU state to match the continuation -	 * of the request queue. If we skip the context/PD restore, then -	 * the next request may try to execute assuming that its context -	 * is valid and loaded on the GPU and so may try to access invalid -	 * memory, prompting repeated GPU hangs. +	 * The guilty request will get skipped on a hung engine.  	 * -	 * If the request was guilty, we still restore the logical state -	 * in case the next request requires it (e.g. the aliasing ppgtt), -	 * but skip over the hung batch. +	 * Users of client default contexts do not rely on logical +	 * state preserved between batches so it is safe to execute +	 * queued requests following the hang. Non default contexts +	 * rely on preserved state, so skipping a batch loses the +	 * evolution of the state and it needs to be considered corrupted. +	 * Executing more queued batches on top of corrupted state is +	 * risky. But we take the risk by trying to advance through +	 * the queued requests in order to make the client behaviour +	 * more predictable around resets, by not throwing away random +	 * amount of batches it has prepared for execution. Sophisticated +	 * clients can use gem_reset_stats_ioctl and dma fence status +	 * (exported via sync_file info ioctl on explicit fences) to observe +	 * when it loses the context state and should rebuild accordingly.  	 * -	 * If the request was innocent, we try to replay the request with -	 * the restored context. +	 * The context ban, and ultimately the client ban, mechanism are safety +	 * valves if client submission ends up resulting in nothing more than +	 * subsequent hangs.  	 */ +  	if (rq) { -		/* If the rq hung, jump to its breadcrumb and skip the batch */ -		rq->ring->head = intel_ring_wrap(rq->ring, rq->head); -		if (rq->fence.error == -EIO) -			skip_request(rq); +		/* +		 * Try to restore the logical GPU state to match the +		 * continuation of the request queue. If we skip the +		 * context/PD restore, then the next request may try to execute +		 * assuming that its context is valid and loaded on the GPU and +		 * so may try to access invalid memory, prompting repeated GPU +		 * hangs. +		 * +		 * If the request was guilty, we still restore the logical +		 * state in case the next request requires it (e.g. the +		 * aliasing ppgtt), but skip over the hung batch. +		 * +		 * If the request was innocent, we try to replay the request +		 * with the restored context. +		 */ +		i915_reset_request(rq, stalled); + +		GEM_BUG_ON(rq->ring != engine->buffer); +		head = rq->head; +	} else { +		head = engine->buffer->tail;  	} +	engine->buffer->head = intel_ring_wrap(engine->buffer, head); + +	spin_unlock_irqrestore(&tl->lock, flags);  }  static void reset_finish(struct intel_engine_cs *engine) @@ -836,10 +907,10 @@ static void cancel_requests(struct intel_engine_cs *engine)  	list_for_each_entry(request, &engine->timeline.requests, link) {  		GEM_BUG_ON(!request->global_seqno); -		if (i915_request_signaled(request)) -			continue; +		if (!i915_request_signaled(request)) +			dma_fence_set_error(&request->fence, -EIO); -		dma_fence_set_error(&request->fence, -EIO); +		i915_request_mark_complete(request);  	}  	intel_write_status_page(engine, @@ -861,29 +932,43 @@ static void i9xx_submit_request(struct i915_request *request)  			intel_ring_set_tail(request->ring, request->tail));  } -static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) +static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)  { +	GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); +	GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); +  	*cs++ = MI_FLUSH;  	*cs++ = MI_STORE_DWORD_INDEX; +	*cs++ = I915_GEM_HWS_SEQNO_ADDR; +	*cs++ = rq->fence.seqno; + +	*cs++ = MI_STORE_DWORD_INDEX;  	*cs++ = I915_GEM_HWS_INDEX_ADDR;  	*cs++ = rq->global_seqno;  	*cs++ = MI_USER_INTERRUPT; -	*cs++ = MI_NOOP;  	rq->tail = intel_ring_offset(rq, cs);  	assert_ring_tail_valid(rq->ring, rq->tail); + +	return cs;  } -static const int i9xx_emit_breadcrumb_sz = 6;  #define GEN5_WA_STORES 8 /* must be at least 1! */ -static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) +static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)  {  	int i; +	GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); +	GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); +  	*cs++ = MI_FLUSH; +	*cs++ = MI_STORE_DWORD_INDEX; +	*cs++ = I915_GEM_HWS_SEQNO_ADDR; +	*cs++ = rq->fence.seqno; +  	BUILD_BUG_ON(GEN5_WA_STORES < 1);  	for (i = 0; i < GEN5_WA_STORES; i++) {  		*cs++ = MI_STORE_DWORD_INDEX; @@ -892,11 +977,13 @@ static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)  	}  	*cs++ = MI_USER_INTERRUPT; +	*cs++ = MI_NOOP;  	rq->tail = intel_ring_offset(rq, cs);  	assert_ring_tail_valid(rq->ring, rq->tail); + +	return cs;  } -static const int gen5_emit_breadcrumb_sz = GEN5_WA_STORES * 3 + 2;  #undef GEN5_WA_STORES  static void @@ -1123,6 +1210,10 @@ int intel_ring_pin(struct intel_ring *ring)  	GEM_BUG_ON(ring->vaddr); +	ret = i915_timeline_pin(ring->timeline); +	if (ret) +		return ret; +  	flags = PIN_GLOBAL;  	/* Ring wraparound at offset 0 sometimes hangs. No idea why. */ @@ -1139,28 +1230,32 @@ int intel_ring_pin(struct intel_ring *ring)  		else  			ret = i915_gem_object_set_to_cpu_domain(vma->obj, true);  		if (unlikely(ret)) -			return ret; +			goto unpin_timeline;  	}  	ret = i915_vma_pin(vma, 0, 0, flags);  	if (unlikely(ret)) -		return ret; +		goto unpin_timeline;  	if (i915_vma_is_map_and_fenceable(vma))  		addr = (void __force *)i915_vma_pin_iomap(vma);  	else  		addr = i915_gem_object_pin_map(vma->obj, map); -	if (IS_ERR(addr)) -		goto err; +	if (IS_ERR(addr)) { +		ret = PTR_ERR(addr); +		goto unpin_ring; +	}  	vma->obj->pin_global++;  	ring->vaddr = addr;  	return 0; -err: +unpin_ring:  	i915_vma_unpin(vma); -	return PTR_ERR(addr); +unpin_timeline: +	i915_timeline_unpin(ring->timeline); +	return ret;  }  void intel_ring_reset(struct intel_ring *ring, u32 tail) @@ -1189,6 +1284,8 @@ void intel_ring_unpin(struct intel_ring *ring)  	ring->vma->obj->pin_global--;  	i915_vma_unpin(ring->vma); + +	i915_timeline_unpin(ring->timeline);  }  static struct i915_vma * @@ -1499,13 +1596,18 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)  	struct intel_ring *ring;  	int err; -	intel_engine_setup_common(engine); +	err = intel_engine_setup_common(engine); +	if (err) +		return err; -	timeline = i915_timeline_create(engine->i915, engine->name); +	timeline = i915_timeline_create(engine->i915, +					engine->name, +					engine->status_page.vma);  	if (IS_ERR(timeline)) {  		err = PTR_ERR(timeline);  		goto err;  	} +	GEM_BUG_ON(timeline->has_initial_breadcrumb);  	ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE);  	i915_timeline_put(timeline); @@ -1525,6 +1627,8 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)  	if (err)  		goto err_unpin; +	GEM_BUG_ON(ring->timeline->hwsp_ggtt != engine->status_page.vma); +  	return 0;  err_unpin: @@ -1857,6 +1961,7 @@ static int ring_request_alloc(struct i915_request *request)  	int ret;  	GEM_BUG_ON(!request->hw_context->pin_count); +	GEM_BUG_ON(request->timeline->has_initial_breadcrumb);  	/*  	 * Flush enough space to reduce the likelihood of waiting after @@ -2193,12 +2298,14 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,  	engine->context_pin = intel_ring_context_pin;  	engine->request_alloc = ring_request_alloc; -	engine->emit_breadcrumb = i9xx_emit_breadcrumb; -	engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz; -	if (IS_GEN(dev_priv, 5)) { -		engine->emit_breadcrumb = gen5_emit_breadcrumb; -		engine->emit_breadcrumb_sz = gen5_emit_breadcrumb_sz; -	} +	/* +	 * Using a global execution timeline; the previous final breadcrumb is +	 * equivalent to our next initial bread so we can elide +	 * engine->emit_init_breadcrumb(). +	 */ +	engine->emit_fini_breadcrumb = i9xx_emit_breadcrumb; +	if (IS_GEN(dev_priv, 5)) +		engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;  	engine->set_default_submission = i9xx_set_default_submission; @@ -2227,13 +2334,11 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)  	if (INTEL_GEN(dev_priv) >= 7) {  		engine->init_context = intel_rcs_ctx_init;  		engine->emit_flush = gen7_render_ring_flush; -		engine->emit_breadcrumb = gen7_rcs_emit_breadcrumb; -		engine->emit_breadcrumb_sz = gen7_rcs_emit_breadcrumb_sz; +		engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb;  	} else if (IS_GEN(dev_priv, 6)) {  		engine->init_context = intel_rcs_ctx_init;  		engine->emit_flush = gen6_render_ring_flush; -		engine->emit_breadcrumb = gen6_rcs_emit_breadcrumb; -		engine->emit_breadcrumb_sz = gen6_rcs_emit_breadcrumb_sz; +		engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb;  	} else if (IS_GEN(dev_priv, 5)) {  		engine->emit_flush = gen4_render_ring_flush;  	} else { @@ -2269,13 +2374,10 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)  		engine->emit_flush = gen6_bsd_ring_flush;  		engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; -		if (IS_GEN(dev_priv, 6)) { -			engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb; -			engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz; -		} else { -			engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb; -			engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz; -		} +		if (IS_GEN(dev_priv, 6)) +			engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb; +		else +			engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;  	} else {  		engine->emit_flush = bsd_ring_flush;  		if (IS_GEN(dev_priv, 5)) @@ -2298,13 +2400,10 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)  	engine->emit_flush = gen6_ring_flush;  	engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; -	if (IS_GEN(dev_priv, 6)) { -		engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb; -		engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz; -	} else { -		engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb; -		engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz; -	} +	if (IS_GEN(dev_priv, 6)) +		engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb; +	else +		engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;  	return intel_init_ring_buffer(engine);  } @@ -2322,8 +2421,7 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)  	engine->irq_enable = hsw_vebox_irq_enable;  	engine->irq_disable = hsw_vebox_irq_disable; -	engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb; -	engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz; +	engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;  	return intel_init_ring_buffer(engine);  } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index c3ef0f9bf321..34d0a148e664 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -5,6 +5,7 @@  #include <drm/drm_util.h>  #include <linux/hashtable.h> +#include <linux/irq_work.h>  #include <linux/seqlock.h>  #include "i915_gem_batch_pool.h" @@ -32,8 +33,7 @@ struct i915_sched_attr;  struct intel_hw_status_page {  	struct i915_vma *vma; -	u32 *page_addr; -	u32 ggtt_offset; +	u32 *addr;  };  #define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base)) @@ -120,13 +120,8 @@ struct intel_instdone {  struct intel_engine_hangcheck {  	u64 acthd;  	u32 seqno; -	enum intel_engine_hangcheck_action action;  	unsigned long action_timestamp; -	int deadlock;  	struct intel_instdone instdone; -	struct i915_request *active_request; -	bool stalled:1; -	bool wedged:1;  };  struct intel_ring { @@ -209,6 +204,7 @@ struct i915_priolist {  struct st_preempt_hang {  	struct completion completion; +	unsigned int count;  	bool inject_hang;  }; @@ -299,14 +295,18 @@ struct intel_engine_execlists {  	unsigned int port_mask;  	/** -	 * @queue_priority: Highest pending priority. +	 * @queue_priority_hint: Highest pending priority.  	 *  	 * When we add requests into the queue, or adjust the priority of  	 * executing requests, we compute the maximum priority of those  	 * pending requests. We can then use this value to determine if  	 * we need to preempt the executing requests to service the queue. +	 * However, since the we may have recorded the priority of an inflight +	 * request we wanted to preempt but since completed, at the time of +	 * dequeuing the priority hint may no longer may match the highest +	 * available request priority.  	 */ -	int queue_priority; +	int queue_priority_hint;  	/**  	 * @queue: queue of requests, in priority lists @@ -382,22 +382,14 @@ struct intel_engine_cs {  	 * the overhead of waking that client is much preferred.  	 */  	struct intel_breadcrumbs { -		spinlock_t irq_lock; /* protects irq_*; irqsafe */ -		struct intel_wait *irq_wait; /* oldest waiter by retirement */ +		spinlock_t irq_lock; +		struct list_head signalers; -		spinlock_t rb_lock; /* protects the rb and wraps irq_lock */ -		struct rb_root waiters; /* sorted by retirement, priority */ -		struct list_head signals; /* sorted by retirement */ -		struct task_struct *signaler; /* used for fence signalling */ +		struct irq_work irq_work; /* for use from inside irq_lock */ -		struct timer_list fake_irq; /* used after a missed interrupt */ -		struct timer_list hangcheck; /* detect missed interrupts */ - -		unsigned int hangcheck_interrupts;  		unsigned int irq_enabled; -		unsigned int irq_count; -		bool irq_armed : 1; +		bool irq_armed;  	} breadcrumbs;  	struct { @@ -444,9 +436,8 @@ struct intel_engine_cs {  	int		(*init_hw)(struct intel_engine_cs *engine);  	struct { -		struct i915_request *(*prepare)(struct intel_engine_cs *engine); -		void (*reset)(struct intel_engine_cs *engine, -			      struct i915_request *rq); +		void (*prepare)(struct intel_engine_cs *engine); +		void (*reset)(struct intel_engine_cs *engine, bool stalled);  		void (*finish)(struct intel_engine_cs *engine);  	} reset; @@ -470,8 +461,10 @@ struct intel_engine_cs {  					 unsigned int dispatch_flags);  #define I915_DISPATCH_SECURE BIT(0)  #define I915_DISPATCH_PINNED BIT(1) -	void		(*emit_breadcrumb)(struct i915_request *rq, u32 *cs); -	int		emit_breadcrumb_sz; +	int		 (*emit_init_breadcrumb)(struct i915_request *rq); +	u32		*(*emit_fini_breadcrumb)(struct i915_request *rq, +						 u32 *cs); +	unsigned int	emit_fini_breadcrumb_dw;  	/* Pass the request to the hardware queue (e.g. directly into  	 * the legacy ringbuffer or to the end of an execlist). @@ -677,7 +670,7 @@ static inline u32  intel_read_status_page(const struct intel_engine_cs *engine, int reg)  {  	/* Ensure that the compiler doesn't optimize away the load. */ -	return READ_ONCE(engine->status_page.page_addr[reg]); +	return READ_ONCE(engine->status_page.addr[reg]);  }  static inline void @@ -690,12 +683,12 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)  	 */  	if (static_cpu_has(X86_FEATURE_CLFLUSH)) {  		mb(); -		clflush(&engine->status_page.page_addr[reg]); -		engine->status_page.page_addr[reg] = value; -		clflush(&engine->status_page.page_addr[reg]); +		clflush(&engine->status_page.addr[reg]); +		engine->status_page.addr[reg] = value; +		clflush(&engine->status_page.addr[reg]);  		mb();  	} else { -		WRITE_ONCE(engine->status_page.page_addr[reg], value); +		WRITE_ONCE(engine->status_page.addr[reg], value);  	}  } @@ -716,11 +709,13 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)   * The area from dword 0x30 to 0x3ff is available for driver usage.   */  #define I915_GEM_HWS_INDEX		0x30 -#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT) -#define I915_GEM_HWS_PREEMPT_INDEX	0x32 -#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT) -#define I915_GEM_HWS_SCRATCH_INDEX	0x40 -#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) +#define I915_GEM_HWS_INDEX_ADDR		(I915_GEM_HWS_INDEX * sizeof(u32)) +#define I915_GEM_HWS_PREEMPT		0x32 +#define I915_GEM_HWS_PREEMPT_ADDR	(I915_GEM_HWS_PREEMPT * sizeof(u32)) +#define I915_GEM_HWS_SEQNO		0x40 +#define I915_GEM_HWS_SEQNO_ADDR		(I915_GEM_HWS_SEQNO * sizeof(u32)) +#define I915_GEM_HWS_SCRATCH		0x80 +#define I915_GEM_HWS_SCRATCH_ADDR	(I915_GEM_HWS_SCRATCH * sizeof(u32))  #define I915_HWS_CSB_BUF0_INDEX		0x10  #define I915_HWS_CSB_WRITE_INDEX	0x1f @@ -825,7 +820,7 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)  void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno); -void intel_engine_setup_common(struct intel_engine_cs *engine); +int intel_engine_setup_common(struct intel_engine_cs *engine);  int intel_engine_init_common(struct intel_engine_cs *engine);  void intel_engine_cleanup_common(struct intel_engine_cs *engine); @@ -883,93 +878,29 @@ static inline bool intel_engine_has_started(struct intel_engine_cs *engine,  void intel_engine_get_instdone(struct intel_engine_cs *engine,  			       struct intel_instdone *instdone); -static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) -{ -	return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR; -} - -static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine) -{ -	return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR; -} - -/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ -int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); - -static inline void intel_wait_init(struct intel_wait *wait) -{ -	wait->tsk = current; -	wait->request = NULL; -} - -static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno) -{ -	wait->tsk = current; -	wait->seqno = seqno; -} - -static inline bool intel_wait_has_seqno(const struct intel_wait *wait) -{ -	return wait->seqno; -} - -static inline bool -intel_wait_update_seqno(struct intel_wait *wait, u32 seqno) -{ -	wait->seqno = seqno; -	return intel_wait_has_seqno(wait); -} - -static inline bool -intel_wait_update_request(struct intel_wait *wait, -			  const struct i915_request *rq) -{ -	return intel_wait_update_seqno(wait, i915_request_global_seqno(rq)); -} - -static inline bool -intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno) -{ -	return wait->seqno == seqno; -} - -static inline bool -intel_wait_check_request(const struct intel_wait *wait, -			 const struct i915_request *rq) -{ -	return intel_wait_check_seqno(wait, i915_request_global_seqno(rq)); -} +void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); +void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); -static inline bool intel_wait_complete(const struct intel_wait *wait) -{ -	return RB_EMPTY_NODE(&wait->node); -} +void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine); +void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine); -bool intel_engine_add_wait(struct intel_engine_cs *engine, -			   struct intel_wait *wait); -void intel_engine_remove_wait(struct intel_engine_cs *engine, -			      struct intel_wait *wait); -bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup); -void intel_engine_cancel_signaling(struct i915_request *request); +bool intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine); +void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); -static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) +static inline void +intel_engine_queue_breadcrumbs(struct intel_engine_cs *engine)  { -	return READ_ONCE(engine->breadcrumbs.irq_wait); +	irq_work_queue(&engine->breadcrumbs.irq_work);  } -unsigned int intel_engine_wakeup(struct intel_engine_cs *engine); -#define ENGINE_WAKEUP_WAITER BIT(0) -#define ENGINE_WAKEUP_ASLEEP BIT(1) - -void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine); -void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine); - -void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); -void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); +bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine);  void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);  void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); +void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, +				    struct drm_printer *p); +  static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)  {  	memset(batch, 0, 6 * sizeof(u32)); @@ -1018,6 +949,13 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset)  	return cs;  } +static inline void intel_engine_reset(struct intel_engine_cs *engine, +				      bool stalled) +{ +	if (engine->reset.reset) +		engine->reset.reset(engine, stalled); +} +  void intel_engines_sanitize(struct drm_i915_private *i915, bool force);  bool intel_engine_is_idle(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index b02d3d9809e3..cd42e81f8a90 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -493,7 +493,7 @@ skl_program_plane(struct intel_plane *plane,  	keymax = (key->max_value & 0xffffff) | PLANE_KEYMAX_ALPHA(alpha); -	keymsk = key->channel_mask & 0x3ffffff; +	keymsk = key->channel_mask & 0x7ffffff;  	if (alpha < 0xff)  		keymsk |= PLANE_KEYMSK_ALPHA_ENABLE; diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index bd5536f0ec92..3924c4944e1f 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -306,7 +306,7 @@ struct tv_mode {  	u32 clock;  	u16 refresh; /* in millihertz (for precision) */ -	u32 oversample; +	u8 oversample;  	u8 hsync_end;  	u16 hblank_start, hblank_end, htotal;  	bool progressive : 1, trilevel_sync : 1, component_only : 1; @@ -339,7 +339,6 @@ struct tv_mode {  	const struct video_levels *composite_levels, *svideo_levels;  	const struct color_conversion *composite_color, *svideo_color;  	const u32 *filter_table; -	u16 max_srcw;  }; @@ -378,8 +377,8 @@ static const struct tv_mode tv_modes[] = {  		.name		= "NTSC-M",  		.clock		= 108000,  		.refresh	= 59940, -		.oversample	= TV_OVERSAMPLE_8X, -		.component_only = 0, +		.oversample	= 8, +		.component_only = false,  		/* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */  		.hsync_end	= 64,		    .hblank_end		= 124, @@ -421,8 +420,8 @@ static const struct tv_mode tv_modes[] = {  		.name		= "NTSC-443",  		.clock		= 108000,  		.refresh	= 59940, -		.oversample	= TV_OVERSAMPLE_8X, -		.component_only = 0, +		.oversample	= 8, +		.component_only = false,  		/* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 4.43MHz */  		.hsync_end	= 64,		    .hblank_end		= 124,  		.hblank_start	= 836,		    .htotal		= 857, @@ -463,8 +462,8 @@ static const struct tv_mode tv_modes[] = {  		.name		= "NTSC-J",  		.clock		= 108000,  		.refresh	= 59940, -		.oversample	= TV_OVERSAMPLE_8X, -		.component_only = 0, +		.oversample	= 8, +		.component_only = false,  		/* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */  		.hsync_end	= 64,		    .hblank_end		= 124, @@ -506,8 +505,8 @@ static const struct tv_mode tv_modes[] = {  		.name		= "PAL-M",  		.clock		= 108000,  		.refresh	= 59940, -		.oversample	= TV_OVERSAMPLE_8X, -		.component_only = 0, +		.oversample	= 8, +		.component_only = false,  		/* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */  		.hsync_end	= 64,		  .hblank_end		= 124, @@ -550,8 +549,8 @@ static const struct tv_mode tv_modes[] = {  		.name	    = "PAL-N",  		.clock		= 108000,  		.refresh	= 50000, -		.oversample	= TV_OVERSAMPLE_8X, -		.component_only = 0, +		.oversample	= 8, +		.component_only = false,  		.hsync_end	= 64,		    .hblank_end		= 128,  		.hblank_start = 844,	    .htotal		= 863, @@ -595,8 +594,8 @@ static const struct tv_mode tv_modes[] = {  		.name	    = "PAL",  		.clock		= 108000,  		.refresh	= 50000, -		.oversample	= TV_OVERSAMPLE_8X, -		.component_only = 0, +		.oversample	= 8, +		.component_only = false,  		.hsync_end	= 64,		    .hblank_end		= 142,  		.hblank_start	= 844,	    .htotal		= 863, @@ -635,10 +634,10 @@ static const struct tv_mode tv_modes[] = {  	},  	{  		.name       = "480p", -		.clock		= 107520, +		.clock		= 108000,  		.refresh	= 59940, -		.oversample     = TV_OVERSAMPLE_4X, -		.component_only = 1, +		.oversample     = 4, +		.component_only = true,  		.hsync_end      = 64,               .hblank_end         = 122,  		.hblank_start   = 842,              .htotal             = 857, @@ -659,10 +658,10 @@ static const struct tv_mode tv_modes[] = {  	},  	{  		.name       = "576p", -		.clock		= 107520, +		.clock		= 108000,  		.refresh	= 50000, -		.oversample     = TV_OVERSAMPLE_4X, -		.component_only = 1, +		.oversample     = 4, +		.component_only = true,  		.hsync_end      = 64,               .hblank_end         = 139,  		.hblank_start   = 859,              .htotal             = 863, @@ -683,10 +682,10 @@ static const struct tv_mode tv_modes[] = {  	},  	{  		.name       = "720p@60Hz", -		.clock		= 148800, +		.clock		= 148500,  		.refresh	= 60000, -		.oversample     = TV_OVERSAMPLE_2X, -		.component_only = 1, +		.oversample     = 2, +		.component_only = true,  		.hsync_end      = 80,               .hblank_end         = 300,  		.hblank_start   = 1580,             .htotal             = 1649, @@ -707,10 +706,10 @@ static const struct tv_mode tv_modes[] = {  	},  	{  		.name       = "720p@50Hz", -		.clock		= 148800, +		.clock		= 148500,  		.refresh	= 50000, -		.oversample     = TV_OVERSAMPLE_2X, -		.component_only = 1, +		.oversample     = 2, +		.component_only = true,  		.hsync_end      = 80,               .hblank_end         = 300,  		.hblank_start   = 1580,             .htotal             = 1979, @@ -728,14 +727,13 @@ static const struct tv_mode tv_modes[] = {  		.burst_ena      = false,  		.filter_table = filter_table, -		.max_srcw = 800  	},  	{  		.name       = "1080i@50Hz", -		.clock		= 148800, +		.clock		= 148500,  		.refresh	= 50000, -		.oversample     = TV_OVERSAMPLE_2X, -		.component_only = 1, +		.oversample     = 2, +		.component_only = true,  		.hsync_end      = 88,               .hblank_end         = 235,  		.hblank_start   = 2155,             .htotal             = 2639, @@ -758,10 +756,10 @@ static const struct tv_mode tv_modes[] = {  	},  	{  		.name       = "1080i@60Hz", -		.clock		= 148800, +		.clock		= 148500,  		.refresh	= 60000, -		.oversample     = TV_OVERSAMPLE_2X, -		.component_only = 1, +		.oversample     = 2, +		.component_only = true,  		.hsync_end      = 88,               .hblank_end         = 235,  		.hblank_start   = 2155,             .htotal             = 2199, @@ -782,8 +780,115 @@ static const struct tv_mode tv_modes[] = {  		.filter_table = filter_table,  	}, + +	{ +		.name       = "1080p@30Hz", +		.clock		= 148500, +		.refresh	= 30000, +		.oversample     = 2, +		.component_only = true, + +		.hsync_end      = 88,               .hblank_end         = 235, +		.hblank_start   = 2155,             .htotal             = 2199, + +		.progressive	= true,		    .trilevel_sync = true, + +		.vsync_start_f1 = 8,               .vsync_start_f2     = 8, +		.vsync_len      = 10, + +		.veq_ena	= false,	.veq_start_f1	= 0, +		.veq_start_f2	= 0,		    .veq_len		= 0, + +		.vi_end_f1      = 44,               .vi_end_f2          = 44, +		.nbr_end        = 1079, + +		.burst_ena      = false, + +		.filter_table = filter_table, +	}, + +	{ +		.name       = "1080p@50Hz", +		.clock		= 148500, +		.refresh	= 50000, +		.oversample     = 1, +		.component_only = true, + +		.hsync_end      = 88,               .hblank_end         = 235, +		.hblank_start   = 2155,             .htotal             = 2639, + +		.progressive	= true,		    .trilevel_sync = true, + +		.vsync_start_f1 = 8,               .vsync_start_f2     = 8, +		.vsync_len      = 10, + +		.veq_ena	= false,	.veq_start_f1	= 0, +		.veq_start_f2	= 0,		    .veq_len		= 0, + +		.vi_end_f1      = 44,               .vi_end_f2          = 44, +		.nbr_end        = 1079, + +		.burst_ena      = false, + +		.filter_table = filter_table, +	}, + +	{ +		.name       = "1080p@60Hz", +		.clock		= 148500, +		.refresh	= 60000, +		.oversample     = 1, +		.component_only = true, + +		.hsync_end      = 88,               .hblank_end         = 235, +		.hblank_start   = 2155,             .htotal             = 2199, + +		.progressive	= true,		    .trilevel_sync = true, + +		.vsync_start_f1 = 8,               .vsync_start_f2     = 8, +		.vsync_len      = 10, + +		.veq_ena	= false,		    .veq_start_f1	= 0, +		.veq_start_f2	= 0,		    .veq_len		= 0, + +		.vi_end_f1      = 44,               .vi_end_f2          = 44, +		.nbr_end        = 1079, + +		.burst_ena      = false, + +		.filter_table = filter_table, +	}, +}; + +struct intel_tv_connector_state { +	struct drm_connector_state base; + +	/* +	 * May need to override the user margins for +	 * gen3 >1024 wide source vertical centering. +	 */ +	struct { +		u16 top, bottom; +	} margins; + +	bool bypass_vfilter;  }; +#define to_intel_tv_connector_state(x) container_of(x, struct intel_tv_connector_state, base) + +static struct drm_connector_state * +intel_tv_connector_duplicate_state(struct drm_connector *connector) +{ +	struct intel_tv_connector_state *state; + +	state = kmemdup(connector->state, sizeof(*state), GFP_KERNEL); +	if (!state) +		return NULL; + +	__drm_atomic_helper_connector_duplicate_state(connector, &state->base); +	return &state->base; +} +  static struct intel_tv *enc_to_tv(struct intel_encoder *encoder)  {  	return container_of(encoder, struct intel_tv, base); @@ -859,14 +964,215 @@ intel_tv_mode_valid(struct drm_connector *connector,  	return MODE_CLOCK_RANGE;  } +static int +intel_tv_mode_vdisplay(const struct tv_mode *tv_mode) +{ +	if (tv_mode->progressive) +		return tv_mode->nbr_end + 1; +	else +		return 2 * (tv_mode->nbr_end + 1); +} + +static void +intel_tv_mode_to_mode(struct drm_display_mode *mode, +		      const struct tv_mode *tv_mode) +{ +	mode->clock = tv_mode->clock / +		(tv_mode->oversample >> !tv_mode->progressive); + +	/* +	 * tv_mode horizontal timings: +	 * +	 * hsync_end +	 *    | hblank_end +	 *    |    | hblank_start +	 *    |    |       | htotal +	 *    |     _______    | +	 *     ____/       \___ +	 * \__/                \ +	 */ +	mode->hdisplay = +		tv_mode->hblank_start - tv_mode->hblank_end; +	mode->hsync_start = mode->hdisplay + +		tv_mode->htotal - tv_mode->hblank_start; +	mode->hsync_end = mode->hsync_start + +		tv_mode->hsync_end; +	mode->htotal = tv_mode->htotal + 1; + +	/* +	 * tv_mode vertical timings: +	 * +	 * vsync_start +	 *    | vsync_end +	 *    |  | vi_end nbr_end +	 *    |  |    |       | +	 *    |  |     _______ +	 * \__    ____/       \ +	 *    \__/ +	 */ +	mode->vdisplay = intel_tv_mode_vdisplay(tv_mode); +	if (tv_mode->progressive) { +		mode->vsync_start = mode->vdisplay + +			tv_mode->vsync_start_f1 + 1; +		mode->vsync_end = mode->vsync_start + +			tv_mode->vsync_len; +		mode->vtotal = mode->vdisplay + +			tv_mode->vi_end_f1 + 1; +	} else { +		mode->vsync_start = mode->vdisplay + +			tv_mode->vsync_start_f1 + 1 + +			tv_mode->vsync_start_f2 + 1; +		mode->vsync_end = mode->vsync_start + +			2 * tv_mode->vsync_len; +		mode->vtotal = mode->vdisplay + +			tv_mode->vi_end_f1 + 1 + +			tv_mode->vi_end_f2 + 1; +	} + +	/* TV has it's own notion of sync and other mode flags, so clear them. */ +	mode->flags = 0; + +	mode->vrefresh = 0; +	mode->vrefresh = drm_mode_vrefresh(mode); + +	snprintf(mode->name, sizeof(mode->name), +		 "%dx%d%c (%s)", +		 mode->hdisplay, mode->vdisplay, +		 tv_mode->progressive ? 'p' : 'i', +		 tv_mode->name); +} + +static void intel_tv_scale_mode_horiz(struct drm_display_mode *mode, +				      int hdisplay, int left_margin, +				      int right_margin) +{ +	int hsync_start = mode->hsync_start - mode->hdisplay + right_margin; +	int hsync_end = mode->hsync_end - mode->hdisplay + right_margin; +	int new_htotal = mode->htotal * hdisplay / +		(mode->hdisplay - left_margin - right_margin); + +	mode->clock = mode->clock * new_htotal / mode->htotal; + +	mode->hdisplay = hdisplay; +	mode->hsync_start = hdisplay + hsync_start * new_htotal / mode->htotal; +	mode->hsync_end = hdisplay + hsync_end * new_htotal / mode->htotal; +	mode->htotal = new_htotal; +} + +static void intel_tv_scale_mode_vert(struct drm_display_mode *mode, +				     int vdisplay, int top_margin, +				     int bottom_margin) +{ +	int vsync_start = mode->vsync_start - mode->vdisplay + bottom_margin; +	int vsync_end = mode->vsync_end - mode->vdisplay + bottom_margin; +	int new_vtotal = mode->vtotal * vdisplay / +		(mode->vdisplay - top_margin - bottom_margin); + +	mode->clock = mode->clock * new_vtotal / mode->vtotal; + +	mode->vdisplay = vdisplay; +	mode->vsync_start = vdisplay + vsync_start * new_vtotal / mode->vtotal; +	mode->vsync_end = vdisplay + vsync_end * new_vtotal / mode->vtotal; +	mode->vtotal = new_vtotal; +}  static void  intel_tv_get_config(struct intel_encoder *encoder,  		    struct intel_crtc_state *pipe_config)  { +	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); +	struct drm_display_mode *adjusted_mode = +		&pipe_config->base.adjusted_mode; +	struct drm_display_mode mode = {}; +	u32 tv_ctl, hctl1, hctl3, vctl1, vctl2, tmp; +	struct tv_mode tv_mode = {}; +	int hdisplay = adjusted_mode->crtc_hdisplay; +	int vdisplay = adjusted_mode->crtc_vdisplay; +	int xsize, ysize, xpos, ypos; +  	pipe_config->output_types |= BIT(INTEL_OUTPUT_TVOUT); -	pipe_config->base.adjusted_mode.crtc_clock = pipe_config->port_clock; +	tv_ctl = I915_READ(TV_CTL); +	hctl1 = I915_READ(TV_H_CTL_1); +	hctl3 = I915_READ(TV_H_CTL_3); +	vctl1 = I915_READ(TV_V_CTL_1); +	vctl2 = I915_READ(TV_V_CTL_2); + +	tv_mode.htotal = (hctl1 & TV_HTOTAL_MASK) >> TV_HTOTAL_SHIFT; +	tv_mode.hsync_end = (hctl1 & TV_HSYNC_END_MASK) >> TV_HSYNC_END_SHIFT; + +	tv_mode.hblank_start = (hctl3 & TV_HBLANK_START_MASK) >> TV_HBLANK_START_SHIFT; +	tv_mode.hblank_end = (hctl3 & TV_HSYNC_END_MASK) >> TV_HBLANK_END_SHIFT; + +	tv_mode.nbr_end = (vctl1 & TV_NBR_END_MASK) >> TV_NBR_END_SHIFT; +	tv_mode.vi_end_f1 = (vctl1 & TV_VI_END_F1_MASK) >> TV_VI_END_F1_SHIFT; +	tv_mode.vi_end_f2 = (vctl1 & TV_VI_END_F2_MASK) >> TV_VI_END_F2_SHIFT; + +	tv_mode.vsync_len = (vctl2 & TV_VSYNC_LEN_MASK) >> TV_VSYNC_LEN_SHIFT; +	tv_mode.vsync_start_f1 = (vctl2 & TV_VSYNC_START_F1_MASK) >> TV_VSYNC_START_F1_SHIFT; +	tv_mode.vsync_start_f2 = (vctl2 & TV_VSYNC_START_F2_MASK) >> TV_VSYNC_START_F2_SHIFT; + +	tv_mode.clock = pipe_config->port_clock; + +	tv_mode.progressive = tv_ctl & TV_PROGRESSIVE; + +	switch (tv_ctl & TV_OVERSAMPLE_MASK) { +	case TV_OVERSAMPLE_8X: +		tv_mode.oversample = 8; +		break; +	case TV_OVERSAMPLE_4X: +		tv_mode.oversample = 4; +		break; +	case TV_OVERSAMPLE_2X: +		tv_mode.oversample = 2; +		break; +	default: +		tv_mode.oversample = 1; +		break; +	} + +	tmp = I915_READ(TV_WIN_POS); +	xpos = tmp >> 16; +	ypos = tmp & 0xffff; + +	tmp = I915_READ(TV_WIN_SIZE); +	xsize = tmp >> 16; +	ysize = tmp & 0xffff; + +	intel_tv_mode_to_mode(&mode, &tv_mode); + +	DRM_DEBUG_KMS("TV mode:\n"); +	drm_mode_debug_printmodeline(&mode); + +	intel_tv_scale_mode_horiz(&mode, hdisplay, +				  xpos, mode.hdisplay - xsize - xpos); +	intel_tv_scale_mode_vert(&mode, vdisplay, +				 ypos, mode.vdisplay - ysize - ypos); + +	adjusted_mode->crtc_clock = mode.clock; +	if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) +		adjusted_mode->crtc_clock /= 2; + +	/* pixel counter doesn't work on i965gm TV output */ +	if (IS_I965GM(dev_priv)) +		adjusted_mode->private_flags |= +			I915_MODE_FLAG_USE_SCANLINE_COUNTER; +} + +static bool intel_tv_source_too_wide(struct drm_i915_private *dev_priv, +				     int hdisplay) +{ +	return IS_GEN(dev_priv, 3) && hdisplay > 1024; +} + +static bool intel_tv_vert_scaling(const struct drm_display_mode *tv_mode, +				  const struct drm_connector_state *conn_state, +				  int vdisplay) +{ +	return tv_mode->crtc_vdisplay - +		conn_state->tv.margins.top - +		conn_state->tv.margins.bottom != +		vdisplay;  }  static int @@ -874,9 +1180,14 @@ intel_tv_compute_config(struct intel_encoder *encoder,  			struct intel_crtc_state *pipe_config,  			struct drm_connector_state *conn_state)  { +	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); +	struct intel_tv_connector_state *tv_conn_state = +		to_intel_tv_connector_state(conn_state);  	const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state);  	struct drm_display_mode *adjusted_mode =  		&pipe_config->base.adjusted_mode; +	int hdisplay = adjusted_mode->crtc_hdisplay; +	int vdisplay = adjusted_mode->crtc_vdisplay;  	if (!tv_mode)  		return -EINVAL; @@ -885,17 +1196,136 @@ intel_tv_compute_config(struct intel_encoder *encoder,  		return -EINVAL;  	pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; -	adjusted_mode->crtc_clock = tv_mode->clock; +  	DRM_DEBUG_KMS("forcing bpc to 8 for TV\n");  	pipe_config->pipe_bpp = 8*3; -	/* TV has it's own notion of sync and other mode flags, so clear them. */ -	adjusted_mode->flags = 0; +	pipe_config->port_clock = tv_mode->clock; + +	intel_tv_mode_to_mode(adjusted_mode, tv_mode); +	drm_mode_set_crtcinfo(adjusted_mode, 0); + +	if (intel_tv_source_too_wide(dev_priv, hdisplay) || +	    !intel_tv_vert_scaling(adjusted_mode, conn_state, vdisplay)) { +		int extra, top, bottom; + +		extra = adjusted_mode->crtc_vdisplay - vdisplay; + +		if (extra < 0) { +			DRM_DEBUG_KMS("No vertical scaling for >1024 pixel wide modes\n"); +			return -EINVAL; +		} + +		/* Need to turn off the vertical filter and center the image */ + +		/* Attempt to maintain the relative sizes of the margins */ +		top = conn_state->tv.margins.top; +		bottom = conn_state->tv.margins.bottom; + +		if (top + bottom) +			top = extra * top / (top + bottom); +		else +			top = extra / 2; +		bottom = extra - top; + +		tv_conn_state->margins.top = top; +		tv_conn_state->margins.bottom = bottom; + +		tv_conn_state->bypass_vfilter = true; + +		if (!tv_mode->progressive) { +			adjusted_mode->clock /= 2; +			adjusted_mode->crtc_clock /= 2; +			adjusted_mode->flags |= DRM_MODE_FLAG_INTERLACE; +		} +	} else { +		tv_conn_state->margins.top = conn_state->tv.margins.top; +		tv_conn_state->margins.bottom = conn_state->tv.margins.bottom; + +		tv_conn_state->bypass_vfilter = false; +	} + +	DRM_DEBUG_KMS("TV mode:\n"); +	drm_mode_debug_printmodeline(adjusted_mode);  	/* -	 * FIXME: We don't check whether the input mode is actually what we want -	 * or whether userspace is doing something stupid. +	 * The pipe scanline counter behaviour looks as follows when +	 * using the TV encoder: +	 * +	 * time -> +	 * +	 * dsl=vtotal-1       |             | +	 *                   ||            || +	 *               ___| |        ___| | +	 *              /     |       /     | +	 *             /      |      /      | +	 * dsl=0   ___/       |_____/       | +	 *        | | |  |  | | +	 *         ^ ^ ^   ^ ^ +	 *         | | |   | pipe vblank/first part of tv vblank +	 *         | | |   bottom margin +	 *         | | active +	 *         | top margin +	 *         remainder of tv vblank +	 * +	 * When the TV encoder is used the pipe wants to run faster +	 * than expected rate. During the active portion the TV +	 * encoder stalls the pipe every few lines to keep it in +	 * check. When the TV encoder reaches the bottom margin the +	 * pipe simply stops. Once we reach the TV vblank the pipe is +	 * no longer stalled and it runs at the max rate (apparently +	 * oversample clock on gen3, cdclk on gen4). Once the pipe +	 * reaches the pipe vtotal the pipe stops for the remainder +	 * of the TV vblank/top margin. The pipe starts up again when +	 * the TV encoder exits the top margin. +	 * +	 * To avoid huge hassles for vblank timestamping we scale +	 * the pipe timings as if the pipe always runs at the average +	 * rate it maintains during the active period. This also +	 * gives us a reasonable guesstimate as to the pixel rate. +	 * Due to the variation in the actual pipe speed the scanline +	 * counter will give us slightly erroneous results during the +	 * TV vblank/margins. But since vtotal was selected such that +	 * it matches the average rate of the pipe during the active +	 * portion the error shouldn't cause any serious grief to +	 * vblank timestamps. +	 * +	 * For posterity here is the empirically derived formula +	 * that gives us the maximum length of the pipe vblank +	 * we can use without causing display corruption. Following +	 * this would allow us to have a ticking scanline counter +	 * everywhere except during the bottom margin (there the +	 * pipe always stops). Ie. this would eliminate the second +	 * flat portion of the above graph. However this would also +	 * complicate vblank timestamping as the pipe vtotal would +	 * no longer match the average rate the pipe runs at during +	 * the active portion. Hence following this formula seems +	 * more trouble that it's worth. +	 * +	 * if (IS_GEN(dev_priv, 4)) { +	 *	num = cdclk * (tv_mode->oversample >> !tv_mode->progressive); +	 *	den = tv_mode->clock; +	 * } else { +	 *	num = tv_mode->oversample >> !tv_mode->progressive; +	 *	den = 1; +	 * } +	 * max_pipe_vblank_len ~= +	 *	(num * tv_htotal * (tv_vblank_len + top_margin)) / +	 *	(den * pipe_htotal);  	 */ +	intel_tv_scale_mode_horiz(adjusted_mode, hdisplay, +				  conn_state->tv.margins.left, +				  conn_state->tv.margins.right); +	intel_tv_scale_mode_vert(adjusted_mode, vdisplay, +				 tv_conn_state->margins.top, +				 tv_conn_state->margins.bottom); +	drm_mode_set_crtcinfo(adjusted_mode, 0); +	adjusted_mode->name[0] = '\0'; + +	/* pixel counter doesn't work on i965gm TV output */ +	if (IS_I965GM(dev_priv)) +		adjusted_mode->private_flags |= +			I915_MODE_FLAG_USE_SCANLINE_COUNTER;  	return 0;  } @@ -986,14 +1416,16 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,  	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);  	struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc);  	struct intel_tv *intel_tv = enc_to_tv(encoder); +	const struct intel_tv_connector_state *tv_conn_state = +		to_intel_tv_connector_state(conn_state);  	const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state); -	u32 tv_ctl; +	u32 tv_ctl, tv_filter_ctl;  	u32 scctl1, scctl2, scctl3;  	int i, j;  	const struct video_levels *video_levels;  	const struct color_conversion *color_conversion;  	bool burst_ena; -	int xpos = 0x0, ypos = 0x0; +	int xpos, ypos;  	unsigned int xsize, ysize;  	if (!tv_mode) @@ -1029,7 +1461,21 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,  	}  	tv_ctl |= TV_ENC_PIPE_SEL(intel_crtc->pipe); -	tv_ctl |= tv_mode->oversample; + +	switch (tv_mode->oversample) { +	case 8: +		tv_ctl |= TV_OVERSAMPLE_8X; +		break; +	case 4: +		tv_ctl |= TV_OVERSAMPLE_4X; +		break; +	case 2: +		tv_ctl |= TV_OVERSAMPLE_2X; +		break; +	default: +		tv_ctl |= TV_OVERSAMPLE_NONE; +		break; +	}  	if (tv_mode->progressive)  		tv_ctl |= TV_PROGRESSIVE; @@ -1081,19 +1527,20 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,  	assert_pipe_disabled(dev_priv, intel_crtc->pipe);  	/* Filter ctl must be set before TV_WIN_SIZE */ -	I915_WRITE(TV_FILTER_CTL_1, TV_AUTO_SCALE); +	tv_filter_ctl = TV_AUTO_SCALE; +	if (tv_conn_state->bypass_vfilter) +		tv_filter_ctl |= TV_V_FILTER_BYPASS; +	I915_WRITE(TV_FILTER_CTL_1, tv_filter_ctl); +  	xsize = tv_mode->hblank_start - tv_mode->hblank_end; -	if (tv_mode->progressive) -		ysize = tv_mode->nbr_end + 1; -	else -		ysize = 2*tv_mode->nbr_end + 1; +	ysize = intel_tv_mode_vdisplay(tv_mode); -	xpos += conn_state->tv.margins.left; -	ypos += conn_state->tv.margins.top; +	xpos = conn_state->tv.margins.left; +	ypos = tv_conn_state->margins.top;  	xsize -= (conn_state->tv.margins.left +  		  conn_state->tv.margins.right); -	ysize -= (conn_state->tv.margins.top + -		  conn_state->tv.margins.bottom); +	ysize -= (tv_conn_state->margins.top + +		  tv_conn_state->margins.bottom);  	I915_WRITE(TV_WIN_POS, (xpos<<16)|ypos);  	I915_WRITE(TV_WIN_SIZE, (xsize<<16)|ysize); @@ -1110,23 +1557,6 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,  	I915_WRITE(TV_CTL, tv_ctl);  } -static const struct drm_display_mode reported_modes[] = { -	{ -		.name = "NTSC 480i", -		.clock = 107520, -		.hdisplay = 1280, -		.hsync_start = 1368, -		.hsync_end = 1496, -		.htotal = 1712, - -		.vdisplay = 1024, -		.vsync_start = 1027, -		.vsync_end = 1034, -		.vtotal = 1104, -		.type = DRM_MODE_TYPE_DRIVER, -	}, -}; -  static int  intel_tv_detect_type(struct intel_tv *intel_tv,  		      struct drm_connector *connector) @@ -1233,16 +1663,18 @@ static void intel_tv_find_better_format(struct drm_connector *connector)  	const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state);  	int i; -	if ((intel_tv->type == DRM_MODE_CONNECTOR_Component) == -		tv_mode->component_only) +	/* Component supports everything so we can keep the current mode */ +	if (intel_tv->type == DRM_MODE_CONNECTOR_Component)  		return; +	/* If the current mode is fine don't change it */ +	if (!tv_mode->component_only) +		return;  	for (i = 0; i < ARRAY_SIZE(tv_modes); i++) { -		tv_mode = tv_modes + i; +		tv_mode = &tv_modes[i]; -		if ((intel_tv->type == DRM_MODE_CONNECTOR_Component) == -			tv_mode->component_only) +		if (!tv_mode->component_only)  			break;  	} @@ -1254,7 +1686,6 @@ intel_tv_detect(struct drm_connector *connector,  		struct drm_modeset_acquire_ctx *ctx,  		bool force)  { -	struct drm_display_mode mode;  	struct intel_tv *intel_tv = intel_attached_tv(connector);  	enum drm_connector_status status;  	int type; @@ -1263,13 +1694,11 @@ intel_tv_detect(struct drm_connector *connector,  		      connector->base.id, connector->name,  		      force); -	mode = reported_modes[0]; -  	if (force) {  		struct intel_load_detect_pipe tmp;  		int ret; -		ret = intel_get_load_detect_pipe(connector, &mode, &tmp, ctx); +		ret = intel_get_load_detect_pipe(connector, NULL, &tmp, ctx);  		if (ret < 0)  			return ret; @@ -1293,84 +1722,85 @@ intel_tv_detect(struct drm_connector *connector,  }  static const struct input_res { -	const char *name; -	int w, h; +	u16 w, h;  } input_res_table[] = { -	{"640x480", 640, 480}, -	{"800x600", 800, 600}, -	{"1024x768", 1024, 768}, -	{"1280x1024", 1280, 1024}, -	{"848x480", 848, 480}, -	{"1280x720", 1280, 720}, -	{"1920x1080", 1920, 1080}, +	{ 640, 480 }, +	{ 800, 600 }, +	{ 1024, 768 }, +	{ 1280, 1024 }, +	{ 848, 480 }, +	{ 1280, 720 }, +	{ 1920, 1080 },  }; -/* - * Chose preferred mode  according to line number of TV format - */ +/* Choose preferred mode according to line number of TV format */ +static bool +intel_tv_is_preferred_mode(const struct drm_display_mode *mode, +			   const struct tv_mode *tv_mode) +{ +	int vdisplay = intel_tv_mode_vdisplay(tv_mode); + +	/* prefer 480 line modes for all SD TV modes */ +	if (vdisplay <= 576) +		vdisplay = 480; + +	return vdisplay == mode->vdisplay; +} +  static void -intel_tv_choose_preferred_modes(const struct tv_mode *tv_mode, -			       struct drm_display_mode *mode_ptr) +intel_tv_set_mode_type(struct drm_display_mode *mode, +		       const struct tv_mode *tv_mode)  { -	if (tv_mode->nbr_end < 480 && mode_ptr->vdisplay == 480) -		mode_ptr->type |= DRM_MODE_TYPE_PREFERRED; -	else if (tv_mode->nbr_end > 480) { -		if (tv_mode->progressive == true && tv_mode->nbr_end < 720) { -			if (mode_ptr->vdisplay == 720) -				mode_ptr->type |= DRM_MODE_TYPE_PREFERRED; -		} else if (mode_ptr->vdisplay == 1080) -				mode_ptr->type |= DRM_MODE_TYPE_PREFERRED; -	} +	mode->type = DRM_MODE_TYPE_DRIVER; + +	if (intel_tv_is_preferred_mode(mode, tv_mode)) +		mode->type |= DRM_MODE_TYPE_PREFERRED;  }  static int  intel_tv_get_modes(struct drm_connector *connector)  { -	struct drm_display_mode *mode_ptr; +	struct drm_i915_private *dev_priv = to_i915(connector->dev);  	const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state); -	int j, count = 0; -	u64 tmp; +	int i, count = 0; -	for (j = 0; j < ARRAY_SIZE(input_res_table); -	     j++) { -		const struct input_res *input = &input_res_table[j]; -		unsigned int hactive_s = input->w; -		unsigned int vactive_s = input->h; +	for (i = 0; i < ARRAY_SIZE(input_res_table); i++) { +		const struct input_res *input = &input_res_table[i]; +		struct drm_display_mode *mode; -		if (tv_mode->max_srcw && input->w > tv_mode->max_srcw) +		if (input->w > 1024 && +		    !tv_mode->progressive && +		    !tv_mode->component_only)  			continue; -		if (input->w > 1024 && (!tv_mode->progressive -					&& !tv_mode->component_only)) +		/* no vertical scaling with wide sources on gen3 */ +		if (IS_GEN(dev_priv, 3) && input->w > 1024 && +		    input->h > intel_tv_mode_vdisplay(tv_mode))  			continue; -		mode_ptr = drm_mode_create(connector->dev); -		if (!mode_ptr) +		mode = drm_mode_create(connector->dev); +		if (!mode)  			continue; -		strlcpy(mode_ptr->name, input->name, DRM_DISPLAY_MODE_LEN); - -		mode_ptr->hdisplay = hactive_s; -		mode_ptr->hsync_start = hactive_s + 1; -		mode_ptr->hsync_end = hactive_s + 64; -		if (mode_ptr->hsync_end <= mode_ptr->hsync_start) -			mode_ptr->hsync_end = mode_ptr->hsync_start + 1; -		mode_ptr->htotal = hactive_s + 96; - -		mode_ptr->vdisplay = vactive_s; -		mode_ptr->vsync_start = vactive_s + 1; -		mode_ptr->vsync_end = vactive_s + 32; -		if (mode_ptr->vsync_end <= mode_ptr->vsync_start) -			mode_ptr->vsync_end = mode_ptr->vsync_start  + 1; -		mode_ptr->vtotal = vactive_s + 33; - -		tmp = mul_u32_u32(tv_mode->refresh, mode_ptr->vtotal); -		tmp *= mode_ptr->htotal; -		tmp = div_u64(tmp, 1000000); -		mode_ptr->clock = (int) tmp; - -		mode_ptr->type = DRM_MODE_TYPE_DRIVER; -		intel_tv_choose_preferred_modes(tv_mode, mode_ptr); -		drm_mode_probed_add(connector, mode_ptr); + +		/* +		 * We take the TV mode and scale it to look +		 * like it had the expected h/vdisplay. This +		 * provides the most information to userspace +		 * about the actual timings of the mode. We +		 * do ignore the margins though. +		 */ +		intel_tv_mode_to_mode(mode, tv_mode); +		if (count == 0) { +			DRM_DEBUG_KMS("TV mode:\n"); +			drm_mode_debug_printmodeline(mode); +		} +		intel_tv_scale_mode_horiz(mode, input->w, 0, 0); +		intel_tv_scale_mode_vert(mode, input->h, 0, 0); +		intel_tv_set_mode_type(mode, tv_mode); + +		drm_mode_set_name(mode); + +		drm_mode_probed_add(connector, mode);  		count++;  	} @@ -1383,7 +1813,7 @@ static const struct drm_connector_funcs intel_tv_connector_funcs = {  	.destroy = intel_connector_destroy,  	.fill_modes = drm_helper_probe_single_connector_modes,  	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state, -	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, +	.atomic_duplicate_state = intel_tv_connector_duplicate_state,  };  static int intel_tv_atomic_check(struct drm_connector *connector, @@ -1530,11 +1960,15 @@ intel_tv_init(struct drm_i915_private *dev_priv)  	connector->doublescan_allowed = false;  	/* Create TV properties then attach current values */ -	for (i = 0; i < ARRAY_SIZE(tv_modes); i++) +	for (i = 0; i < ARRAY_SIZE(tv_modes); i++) { +		/* 1080p50/1080p60 not supported on gen3 */ +		if (IS_GEN(dev_priv, 3) && +		    tv_modes[i].oversample == 1) +			break; +  		tv_format_names[i] = tv_modes[i].name; -	drm_mode_create_tv_properties(dev, -				      ARRAY_SIZE(tv_modes), -				      tv_format_names); +	} +	drm_mode_create_tv_properties(dev, i, tv_format_names);  	drm_object_attach_property(&connector->base, dev->mode_config.tv_mode_property,  				   state->tv.mode); diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c index 3210ad4e08f7..15f4a6dee5aa 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ b/drivers/gpu/drm/i915/intel_workarounds.c @@ -142,7 +142,8 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)  }  static void -__wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) +wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, +		   u32 val)  {  	struct i915_wa wa = {  		.reg = reg, @@ -153,16 +154,32 @@ __wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val)  	_wa_add(wal, &wa);  } -#define WA_REG(addr, mask, val) __wa_add(wal, (addr), (mask), (val)) +static void +wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) +{ +	wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val)); +} + +static void +wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val) +{ +	wa_write_masked_or(wal, reg, ~0, val); +} + +static void +wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val) +{ +	wa_write_masked_or(wal, reg, val, val); +}  #define WA_SET_BIT_MASKED(addr, mask) \ -	WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) +	wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask))  #define WA_CLR_BIT_MASKED(addr, mask) \ -	WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask)) +	wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask))  #define WA_SET_FIELD_MASKED(addr, mask, value) \ -	WA_REG(addr, (mask), _MASKED_FIELD(mask, value)) +	wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value)))  static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine)  { @@ -532,6 +549,12 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine)  	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))  		WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,  				  GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC); + +	/* WaEnableFloatBlendOptimization:icl */ +	wa_write_masked_or(wal, +			   GEN10_CACHE_MODE_SS, +			   0, /* write-only, so skip validation */ +			   _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));  }  void intel_engine_init_ctx_wa(struct intel_engine_cs *engine) @@ -603,43 +626,6 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)  }  static void -wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) -{ -	struct i915_wa wa = { -		.reg = reg, -		.mask = val, -		.val = _MASKED_BIT_ENABLE(val) -	}; - -	_wa_add(wal, &wa); -} - -static void -wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, -		   u32 val) -{ -	struct i915_wa wa = { -		.reg = reg, -		.mask = mask, -		.val = val -	}; - -	_wa_add(wal, &wa); -} - -static void -wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val) -{ -	wa_write_masked_or(wal, reg, ~0, val); -} - -static void -wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val) -{ -	wa_write_masked_or(wal, reg, val, val); -} - -static void  gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)  {  	/* WaDisableKillLogic:bxt,skl,kbl */ diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index d0553bc69705..32dce7176f63 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -84,7 +84,7 @@ static int populate_ggtt(struct drm_i915_private *i915,  		return -EINVAL;  	} -	if (list_empty(&i915->ggtt.vm.inactive_list)) { +	if (list_empty(&i915->ggtt.vm.bound_list)) {  		pr_err("No objects on the GGTT inactive list!\n");  		return -EINVAL;  	} @@ -94,11 +94,14 @@ static int populate_ggtt(struct drm_i915_private *i915,  static void unpin_ggtt(struct drm_i915_private *i915)  { +	struct i915_ggtt *ggtt = &i915->ggtt;  	struct i915_vma *vma; -	list_for_each_entry(vma, &i915->ggtt.vm.inactive_list, vm_link) +	mutex_lock(&ggtt->vm.mutex); +	list_for_each_entry(vma, &i915->ggtt.vm.bound_list, vm_link)  		if (vma->obj->mm.quirked)  			i915_vma_unpin(vma); +	mutex_unlock(&ggtt->vm.mutex);  }  static void cleanup_objects(struct drm_i915_private *i915, diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 06bde4a273cb..3850ef4a5ec8 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1237,7 +1237,10 @@ static void track_vma_bind(struct i915_vma *vma)  	__i915_gem_object_pin_pages(obj);  	vma->pages = obj->mm.pages; -	list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + +	mutex_lock(&vma->vm->mutex); +	list_move_tail(&vma->vm_link, &vma->vm->bound_list); +	mutex_unlock(&vma->vm->mutex);  }  static int exercise_mock(struct drm_i915_private *i915, diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index a15713cae3b3..76b4f87fc853 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -13,6 +13,7 @@ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */  selftest(uncore, intel_uncore_live_selftests)  selftest(workarounds, intel_workarounds_live_selftests)  selftest(requests, i915_request_live_selftests) +selftest(timelines, i915_timeline_live_selftests)  selftest(objects, i915_gem_object_live_selftests)  selftest(dmabuf, i915_gem_dmabuf_live_selftests)  selftest(coherency, i915_gem_coherency_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 1b70208eeea7..88e5ab586337 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -15,8 +15,7 @@ selftest(scatterlist, scatterlist_mock_selftests)  selftest(syncmap, i915_syncmap_mock_selftests)  selftest(uncore, intel_uncore_mock_selftests)  selftest(engine, intel_engine_cs_mock_selftests) -selftest(breadcrumbs, intel_breadcrumbs_mock_selftests) -selftest(timelines, i915_gem_timeline_mock_selftests) +selftest(timelines, i915_timeline_mock_selftests)  selftest(requests, i915_request_mock_selftests)  selftest(objects, i915_gem_object_mock_selftests)  selftest(dmabuf, i915_gem_dmabuf_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_random.c b/drivers/gpu/drm/i915/selftests/i915_random.c index 1f415ce47018..716a3f19f030 100644 --- a/drivers/gpu/drm/i915/selftests/i915_random.c +++ b/drivers/gpu/drm/i915/selftests/i915_random.c @@ -41,18 +41,37 @@ u64 i915_prandom_u64_state(struct rnd_state *rnd)  	return x;  } -void i915_random_reorder(unsigned int *order, unsigned int count, -			 struct rnd_state *state) +void i915_prandom_shuffle(void *arr, size_t elsz, size_t count, +			  struct rnd_state *state)  { -	unsigned int i, j; +	char stack[128]; + +	if (WARN_ON(elsz > sizeof(stack) || count > U32_MAX)) +		return; + +	if (!elsz || !count) +		return; + +	/* Fisher-Yates shuffle courtesy of Knuth */ +	while (--count) { +		size_t swp; + +		swp = i915_prandom_u32_max_state(count + 1, state); +		if (swp == count) +			continue; -	for (i = 0; i < count; i++) { -		BUILD_BUG_ON(sizeof(unsigned int) > sizeof(u32)); -		j = i915_prandom_u32_max_state(count, state); -		swap(order[i], order[j]); +		memcpy(stack, arr + count * elsz, elsz); +		memcpy(arr + count * elsz, arr + swp * elsz, elsz); +		memcpy(arr + swp * elsz, stack, elsz);  	}  } +void i915_random_reorder(unsigned int *order, unsigned int count, +			 struct rnd_state *state) +{ +	i915_prandom_shuffle(order, sizeof(*order), count, state); +} +  unsigned int *i915_random_order(unsigned int count, struct rnd_state *state)  {  	unsigned int *order, i; diff --git a/drivers/gpu/drm/i915/selftests/i915_random.h b/drivers/gpu/drm/i915/selftests/i915_random.h index 7dffedc501ca..8e1ff9c105b6 100644 --- a/drivers/gpu/drm/i915/selftests/i915_random.h +++ b/drivers/gpu/drm/i915/selftests/i915_random.h @@ -54,4 +54,7 @@ void i915_random_reorder(unsigned int *order,  			 unsigned int count,  			 struct rnd_state *state); +void i915_prandom_shuffle(void *arr, size_t elsz, size_t count, +			  struct rnd_state *state); +  #endif /* !__I915_SELFTESTS_RANDOM_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 4d4b86b5fa11..6733dc5b6b4c 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -25,9 +25,12 @@  #include <linux/prime_numbers.h>  #include "../i915_selftest.h" +#include "i915_random.h"  #include "igt_live_test.h" +#include "lib_sw_fence.h"  #include "mock_context.h" +#include "mock_drm.h"  #include "mock_gem_device.h"  static int igt_add_request(void *arg) @@ -247,6 +250,254 @@ err_context_0:  	return err;  } +struct smoketest { +	struct intel_engine_cs *engine; +	struct i915_gem_context **contexts; +	atomic_long_t num_waits, num_fences; +	int ncontexts, max_batch; +	struct i915_request *(*request_alloc)(struct i915_gem_context *, +					      struct intel_engine_cs *); +}; + +static struct i915_request * +__mock_request_alloc(struct i915_gem_context *ctx, +		     struct intel_engine_cs *engine) +{ +	return mock_request(engine, ctx, 0); +} + +static struct i915_request * +__live_request_alloc(struct i915_gem_context *ctx, +		     struct intel_engine_cs *engine) +{ +	return i915_request_alloc(engine, ctx); +} + +static int __igt_breadcrumbs_smoketest(void *arg) +{ +	struct smoketest *t = arg; +	struct mutex * const BKL = &t->engine->i915->drm.struct_mutex; +	const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1; +	const unsigned int total = 4 * t->ncontexts + 1; +	unsigned int num_waits = 0, num_fences = 0; +	struct i915_request **requests; +	I915_RND_STATE(prng); +	unsigned int *order; +	int err = 0; + +	/* +	 * A very simple test to catch the most egregious of list handling bugs. +	 * +	 * At its heart, we simply create oodles of requests running across +	 * multiple kthreads and enable signaling on them, for the sole purpose +	 * of stressing our breadcrumb handling. The only inspection we do is +	 * that the fences were marked as signaled. +	 */ + +	requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL); +	if (!requests) +		return -ENOMEM; + +	order = i915_random_order(total, &prng); +	if (!order) { +		err = -ENOMEM; +		goto out_requests; +	} + +	while (!kthread_should_stop()) { +		struct i915_sw_fence *submit, *wait; +		unsigned int n, count; + +		submit = heap_fence_create(GFP_KERNEL); +		if (!submit) { +			err = -ENOMEM; +			break; +		} + +		wait = heap_fence_create(GFP_KERNEL); +		if (!wait) { +			i915_sw_fence_commit(submit); +			heap_fence_put(submit); +			err = ENOMEM; +			break; +		} + +		i915_random_reorder(order, total, &prng); +		count = 1 + i915_prandom_u32_max_state(max_batch, &prng); + +		for (n = 0; n < count; n++) { +			struct i915_gem_context *ctx = +				t->contexts[order[n] % t->ncontexts]; +			struct i915_request *rq; + +			mutex_lock(BKL); + +			rq = t->request_alloc(ctx, t->engine); +			if (IS_ERR(rq)) { +				mutex_unlock(BKL); +				err = PTR_ERR(rq); +				count = n; +				break; +			} + +			err = i915_sw_fence_await_sw_fence_gfp(&rq->submit, +							       submit, +							       GFP_KERNEL); + +			requests[n] = i915_request_get(rq); +			i915_request_add(rq); + +			mutex_unlock(BKL); + +			if (err >= 0) +				err = i915_sw_fence_await_dma_fence(wait, +								    &rq->fence, +								    0, +								    GFP_KERNEL); + +			if (err < 0) { +				i915_request_put(rq); +				count = n; +				break; +			} +		} + +		i915_sw_fence_commit(submit); +		i915_sw_fence_commit(wait); + +		if (!wait_event_timeout(wait->wait, +					i915_sw_fence_done(wait), +					HZ / 2)) { +			struct i915_request *rq = requests[count - 1]; + +			pr_err("waiting for %d fences (last %llx:%lld) on %s timed out!\n", +			       count, +			       rq->fence.context, rq->fence.seqno, +			       t->engine->name); +			i915_gem_set_wedged(t->engine->i915); +			GEM_BUG_ON(!i915_request_completed(rq)); +			i915_sw_fence_wait(wait); +			err = -EIO; +		} + +		for (n = 0; n < count; n++) { +			struct i915_request *rq = requests[n]; + +			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, +				      &rq->fence.flags)) { +				pr_err("%llu:%llu was not signaled!\n", +				       rq->fence.context, rq->fence.seqno); +				err = -EINVAL; +			} + +			i915_request_put(rq); +		} + +		heap_fence_put(wait); +		heap_fence_put(submit); + +		if (err < 0) +			break; + +		num_fences += count; +		num_waits++; + +		cond_resched(); +	} + +	atomic_long_add(num_fences, &t->num_fences); +	atomic_long_add(num_waits, &t->num_waits); + +	kfree(order); +out_requests: +	kfree(requests); +	return err; +} + +static int mock_breadcrumbs_smoketest(void *arg) +{ +	struct drm_i915_private *i915 = arg; +	struct smoketest t = { +		.engine = i915->engine[RCS], +		.ncontexts = 1024, +		.max_batch = 1024, +		.request_alloc = __mock_request_alloc +	}; +	unsigned int ncpus = num_online_cpus(); +	struct task_struct **threads; +	unsigned int n; +	int ret = 0; + +	/* +	 * Smoketest our breadcrumb/signal handling for requests across multiple +	 * threads. A very simple test to only catch the most egregious of bugs. +	 * See __igt_breadcrumbs_smoketest(); +	 */ + +	threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL); +	if (!threads) +		return -ENOMEM; + +	t.contexts = +		kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL); +	if (!t.contexts) { +		ret = -ENOMEM; +		goto out_threads; +	} + +	mutex_lock(&t.engine->i915->drm.struct_mutex); +	for (n = 0; n < t.ncontexts; n++) { +		t.contexts[n] = mock_context(t.engine->i915, "mock"); +		if (!t.contexts[n]) { +			ret = -ENOMEM; +			goto out_contexts; +		} +	} +	mutex_unlock(&t.engine->i915->drm.struct_mutex); + +	for (n = 0; n < ncpus; n++) { +		threads[n] = kthread_run(__igt_breadcrumbs_smoketest, +					 &t, "igt/%d", n); +		if (IS_ERR(threads[n])) { +			ret = PTR_ERR(threads[n]); +			ncpus = n; +			break; +		} + +		get_task_struct(threads[n]); +	} + +	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); + +	for (n = 0; n < ncpus; n++) { +		int err; + +		err = kthread_stop(threads[n]); +		if (err < 0 && !ret) +			ret = err; + +		put_task_struct(threads[n]); +	} +	pr_info("Completed %lu waits for %lu fence across %d cpus\n", +		atomic_long_read(&t.num_waits), +		atomic_long_read(&t.num_fences), +		ncpus); + +	mutex_lock(&t.engine->i915->drm.struct_mutex); +out_contexts: +	for (n = 0; n < t.ncontexts; n++) { +		if (!t.contexts[n]) +			break; +		mock_context_close(t.contexts[n]); +	} +	mutex_unlock(&t.engine->i915->drm.struct_mutex); +	kfree(t.contexts); +out_threads: +	kfree(threads); + +	return ret; +} +  int i915_request_mock_selftests(void)  {  	static const struct i915_subtest tests[] = { @@ -254,6 +505,7 @@ int i915_request_mock_selftests(void)  		SUBTEST(igt_wait_request),  		SUBTEST(igt_fence_wait),  		SUBTEST(igt_request_rewind), +		SUBTEST(mock_breadcrumbs_smoketest),  	};  	struct drm_i915_private *i915;  	intel_wakeref_t wakeref; @@ -812,6 +1064,178 @@ out_unlock:  	return err;  } +static int +max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine) +{ +	struct i915_request *rq; +	int ret; + +	/* +	 * Before execlists, all contexts share the same ringbuffer. With +	 * execlists, each context/engine has a separate ringbuffer and +	 * for the purposes of this test, inexhaustible. +	 * +	 * For the global ringbuffer though, we have to be very careful +	 * that we do not wrap while preventing the execution of requests +	 * with a unsignaled fence. +	 */ +	if (HAS_EXECLISTS(ctx->i915)) +		return INT_MAX; + +	rq = i915_request_alloc(engine, ctx); +	if (IS_ERR(rq)) { +		ret = PTR_ERR(rq); +	} else { +		int sz; + +		ret = rq->ring->size - rq->reserved_space; +		i915_request_add(rq); + +		sz = rq->ring->emit - rq->head; +		if (sz < 0) +			sz += rq->ring->size; +		ret /= sz; +		ret /= 2; /* leave half spare, in case of emergency! */ +	} + +	return ret; +} + +static int live_breadcrumbs_smoketest(void *arg) +{ +	struct drm_i915_private *i915 = arg; +	struct smoketest t[I915_NUM_ENGINES]; +	unsigned int ncpus = num_online_cpus(); +	unsigned long num_waits, num_fences; +	struct intel_engine_cs *engine; +	struct task_struct **threads; +	struct igt_live_test live; +	enum intel_engine_id id; +	intel_wakeref_t wakeref; +	struct drm_file *file; +	unsigned int n; +	int ret = 0; + +	/* +	 * Smoketest our breadcrumb/signal handling for requests across multiple +	 * threads. A very simple test to only catch the most egregious of bugs. +	 * See __igt_breadcrumbs_smoketest(); +	 * +	 * On real hardware this time. +	 */ + +	wakeref = intel_runtime_pm_get(i915); + +	file = mock_file(i915); +	if (IS_ERR(file)) { +		ret = PTR_ERR(file); +		goto out_rpm; +	} + +	threads = kcalloc(ncpus * I915_NUM_ENGINES, +			  sizeof(*threads), +			  GFP_KERNEL); +	if (!threads) { +		ret = -ENOMEM; +		goto out_file; +	} + +	memset(&t[0], 0, sizeof(t[0])); +	t[0].request_alloc = __live_request_alloc; +	t[0].ncontexts = 64; +	t[0].contexts = kmalloc_array(t[0].ncontexts, +				      sizeof(*t[0].contexts), +				      GFP_KERNEL); +	if (!t[0].contexts) { +		ret = -ENOMEM; +		goto out_threads; +	} + +	mutex_lock(&i915->drm.struct_mutex); +	for (n = 0; n < t[0].ncontexts; n++) { +		t[0].contexts[n] = live_context(i915, file); +		if (!t[0].contexts[n]) { +			ret = -ENOMEM; +			goto out_contexts; +		} +	} + +	ret = igt_live_test_begin(&live, i915, __func__, ""); +	if (ret) +		goto out_contexts; + +	for_each_engine(engine, i915, id) { +		t[id] = t[0]; +		t[id].engine = engine; +		t[id].max_batch = max_batches(t[0].contexts[0], engine); +		if (t[id].max_batch < 0) { +			ret = t[id].max_batch; +			mutex_unlock(&i915->drm.struct_mutex); +			goto out_flush; +		} +		/* One ring interleaved between requests from all cpus */ +		t[id].max_batch /= num_online_cpus() + 1; +		pr_debug("Limiting batches to %d requests on %s\n", +			 t[id].max_batch, engine->name); + +		for (n = 0; n < ncpus; n++) { +			struct task_struct *tsk; + +			tsk = kthread_run(__igt_breadcrumbs_smoketest, +					  &t[id], "igt/%d.%d", id, n); +			if (IS_ERR(tsk)) { +				ret = PTR_ERR(tsk); +				mutex_unlock(&i915->drm.struct_mutex); +				goto out_flush; +			} + +			get_task_struct(tsk); +			threads[id * ncpus + n] = tsk; +		} +	} +	mutex_unlock(&i915->drm.struct_mutex); + +	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); + +out_flush: +	num_waits = 0; +	num_fences = 0; +	for_each_engine(engine, i915, id) { +		for (n = 0; n < ncpus; n++) { +			struct task_struct *tsk = threads[id * ncpus + n]; +			int err; + +			if (!tsk) +				continue; + +			err = kthread_stop(tsk); +			if (err < 0 && !ret) +				ret = err; + +			put_task_struct(tsk); +		} + +		num_waits += atomic_long_read(&t[id].num_waits); +		num_fences += atomic_long_read(&t[id].num_fences); +	} +	pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n", +		num_waits, num_fences, RUNTIME_INFO(i915)->num_rings, ncpus); + +	mutex_lock(&i915->drm.struct_mutex); +	ret = igt_live_test_end(&live) ?: ret; +out_contexts: +	mutex_unlock(&i915->drm.struct_mutex); +	kfree(t[0].contexts); +out_threads: +	kfree(threads); +out_file: +	mock_file_free(i915, file); +out_rpm: +	intel_runtime_pm_put(i915, wakeref); + +	return ret; +} +  int i915_request_live_selftests(struct drm_i915_private *i915)  {  	static const struct i915_subtest tests[] = { @@ -819,6 +1243,7 @@ int i915_request_live_selftests(struct drm_i915_private *i915)  		SUBTEST(live_all_engines),  		SUBTEST(live_sequential_engines),  		SUBTEST(live_empty_request), +		SUBTEST(live_breadcrumbs_smoketest),  	};  	if (i915_terminally_wedged(&i915->gpu_error)) diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c index 86c54ea37f48..10ef0e636a24 100644 --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -197,6 +197,49 @@ int i915_live_selftests(struct pci_dev *pdev)  	return 0;  } +static bool apply_subtest_filter(const char *caller, const char *name) +{ +	char *filter, *sep, *tok; +	bool result = true; + +	filter = kstrdup(i915_selftest.filter, GFP_KERNEL); +	for (sep = filter; (tok = strsep(&sep, ","));) { +		bool allow = true; +		char *sl; + +		if (*tok == '!') { +			allow = false; +			tok++; +		} + +		if (*tok == '\0') +			continue; + +		sl = strchr(tok, '/'); +		if (sl) { +			*sl++ = '\0'; +			if (strcmp(tok, caller)) { +				if (allow) +					result = false; +				continue; +			} +			tok = sl; +		} + +		if (strcmp(tok, name)) { +			if (allow) +				result = false; +			continue; +		} + +		result = allow; +		break; +	} +	kfree(filter); + +	return result; +} +  int __i915_subtests(const char *caller,  		    const struct i915_subtest *st,  		    unsigned int count, @@ -209,6 +252,9 @@ int __i915_subtests(const char *caller,  		if (signal_pending(current))  			return -EINTR; +		if (!apply_subtest_filter(caller, st->name)) +			continue; +  		pr_debug(DRIVER_NAME ": Running %s/%s\n", caller, st->name);  		GEM_TRACE("Running %s/%s\n", caller, st->name); @@ -244,6 +290,7 @@ bool __igt_timeout(unsigned long timeout, const char *fmt, ...)  module_param_named(st_random_seed, i915_selftest.random_seed, uint, 0400);  module_param_named(st_timeout, i915_selftest.timeout_ms, uint, 0400); +module_param_named(st_filter, i915_selftest.filter, charp, 0400);  module_param_named_unsafe(mock_selftests, i915_selftest.mock, int, 0400);  MODULE_PARM_DESC(mock_selftests, "Run selftests before loading, using mock hardware (0:disabled [default], 1:run tests then load driver, -1:run tests then exit module)"); diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c index 19f1c6a5c8fb..12ea69b1a1e5 100644 --- a/drivers/gpu/drm/i915/selftests/i915_timeline.c +++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c @@ -4,12 +4,155 @@   * Copyright © 2017-2018 Intel Corporation   */ +#include <linux/prime_numbers.h> +  #include "../i915_selftest.h"  #include "i915_random.h" +#include "igt_flush_test.h"  #include "mock_gem_device.h"  #include "mock_timeline.h" +static struct page *hwsp_page(struct i915_timeline *tl) +{ +	struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj; + +	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); +	return sg_page(obj->mm.pages->sgl); +} + +static unsigned long hwsp_cacheline(struct i915_timeline *tl) +{ +	unsigned long address = (unsigned long)page_address(hwsp_page(tl)); + +	return (address + tl->hwsp_offset) / CACHELINE_BYTES; +} + +#define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES) + +struct mock_hwsp_freelist { +	struct drm_i915_private *i915; +	struct radix_tree_root cachelines; +	struct i915_timeline **history; +	unsigned long count, max; +	struct rnd_state prng; +}; + +enum { +	SHUFFLE = BIT(0), +}; + +static void __mock_hwsp_record(struct mock_hwsp_freelist *state, +			       unsigned int idx, +			       struct i915_timeline *tl) +{ +	tl = xchg(&state->history[idx], tl); +	if (tl) { +		radix_tree_delete(&state->cachelines, hwsp_cacheline(tl)); +		i915_timeline_put(tl); +	} +} + +static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state, +				unsigned int count, +				unsigned int flags) +{ +	struct i915_timeline *tl; +	unsigned int idx; + +	while (count--) { +		unsigned long cacheline; +		int err; + +		tl = i915_timeline_create(state->i915, "mock", NULL); +		if (IS_ERR(tl)) +			return PTR_ERR(tl); + +		cacheline = hwsp_cacheline(tl); +		err = radix_tree_insert(&state->cachelines, cacheline, tl); +		if (err) { +			if (err == -EEXIST) { +				pr_err("HWSP cacheline %lu already used; duplicate allocation!\n", +				       cacheline); +			} +			i915_timeline_put(tl); +			return err; +		} + +		idx = state->count++ % state->max; +		__mock_hwsp_record(state, idx, tl); +	} + +	if (flags & SHUFFLE) +		i915_prandom_shuffle(state->history, +				     sizeof(*state->history), +				     min(state->count, state->max), +				     &state->prng); + +	count = i915_prandom_u32_max_state(min(state->count, state->max), +					   &state->prng); +	while (count--) { +		idx = --state->count % state->max; +		__mock_hwsp_record(state, idx, NULL); +	} + +	return 0; +} + +static int mock_hwsp_freelist(void *arg) +{ +	struct mock_hwsp_freelist state; +	const struct { +		const char *name; +		unsigned int flags; +	} phases[] = { +		{ "linear", 0 }, +		{ "shuffled", SHUFFLE }, +		{ }, +	}, *p; +	unsigned int na; +	int err = 0; + +	INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL); +	state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed); + +	state.i915 = mock_gem_device(); +	if (!state.i915) +		return -ENOMEM; + +	/* +	 * Create a bunch of timelines and check that their HWSP do not overlap. +	 * Free some, and try again. +	 */ + +	state.max = PAGE_SIZE / sizeof(*state.history); +	state.count = 0; +	state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL); +	if (!state.history) { +		err = -ENOMEM; +		goto err_put; +	} + +	mutex_lock(&state.i915->drm.struct_mutex); +	for (p = phases; p->name; p++) { +		pr_debug("%s(%s)\n", __func__, p->name); +		for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) { +			err = __mock_hwsp_timeline(&state, na, p->flags); +			if (err) +				goto out; +		} +	} + +out: +	for (na = 0; na < state.max; na++) +		__mock_hwsp_record(&state, na, NULL); +	mutex_unlock(&state.i915->drm.struct_mutex); +	kfree(state.history); +err_put: +	drm_dev_put(&state.i915->drm); +	return err; +} +  struct __igt_sync {  	const char *name;  	u32 seqno; @@ -256,12 +399,331 @@ static int bench_sync(void *arg)  	return 0;  } -int i915_gem_timeline_mock_selftests(void) +int i915_timeline_mock_selftests(void)  {  	static const struct i915_subtest tests[] = { +		SUBTEST(mock_hwsp_freelist),  		SUBTEST(igt_sync),  		SUBTEST(bench_sync),  	};  	return i915_subtests(tests, NULL);  } + +static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value) +{ +	u32 *cs; + +	cs = intel_ring_begin(rq, 4); +	if (IS_ERR(cs)) +		return PTR_ERR(cs); + +	if (INTEL_GEN(rq->i915) >= 8) { +		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; +		*cs++ = addr; +		*cs++ = 0; +		*cs++ = value; +	} else if (INTEL_GEN(rq->i915) >= 4) { +		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; +		*cs++ = 0; +		*cs++ = addr; +		*cs++ = value; +	} else { +		*cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; +		*cs++ = addr; +		*cs++ = value; +		*cs++ = MI_NOOP; +	} + +	intel_ring_advance(rq, cs); + +	return 0; +} + +static struct i915_request * +tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value) +{ +	struct i915_request *rq; +	int err; + +	lockdep_assert_held(&tl->i915->drm.struct_mutex); /* lazy rq refs */ + +	err = i915_timeline_pin(tl); +	if (err) { +		rq = ERR_PTR(err); +		goto out; +	} + +	rq = i915_request_alloc(engine, engine->i915->kernel_context); +	if (IS_ERR(rq)) +		goto out_unpin; + +	err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value); +	i915_request_add(rq); +	if (err) +		rq = ERR_PTR(err); + +out_unpin: +	i915_timeline_unpin(tl); +out: +	if (IS_ERR(rq)) +		pr_err("Failed to write to timeline!\n"); +	return rq; +} + +static struct i915_timeline * +checked_i915_timeline_create(struct drm_i915_private *i915) +{ +	struct i915_timeline *tl; + +	tl = i915_timeline_create(i915, "live", NULL); +	if (IS_ERR(tl)) +		return tl; + +	if (*tl->hwsp_seqno != tl->seqno) { +		pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n", +		       *tl->hwsp_seqno, tl->seqno); +		i915_timeline_put(tl); +		return ERR_PTR(-EINVAL); +	} + +	return tl; +} + +static int live_hwsp_engine(void *arg) +{ +#define NUM_TIMELINES 4096 +	struct drm_i915_private *i915 = arg; +	struct i915_timeline **timelines; +	struct intel_engine_cs *engine; +	enum intel_engine_id id; +	intel_wakeref_t wakeref; +	unsigned long count, n; +	int err = 0; + +	/* +	 * Create a bunch of timelines and check we can write +	 * independently to each of their breadcrumb slots. +	 */ + +	timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES, +				   sizeof(*timelines), +				   GFP_KERNEL); +	if (!timelines) +		return -ENOMEM; + +	mutex_lock(&i915->drm.struct_mutex); +	wakeref = intel_runtime_pm_get(i915); + +	count = 0; +	for_each_engine(engine, i915, id) { +		if (!intel_engine_can_store_dword(engine)) +			continue; + +		for (n = 0; n < NUM_TIMELINES; n++) { +			struct i915_timeline *tl; +			struct i915_request *rq; + +			tl = checked_i915_timeline_create(i915); +			if (IS_ERR(tl)) { +				err = PTR_ERR(tl); +				goto out; +			} + +			rq = tl_write(tl, engine, count); +			if (IS_ERR(rq)) { +				i915_timeline_put(tl); +				err = PTR_ERR(rq); +				goto out; +			} + +			timelines[count++] = tl; +		} +	} + +out: +	if (igt_flush_test(i915, I915_WAIT_LOCKED)) +		err = -EIO; + +	for (n = 0; n < count; n++) { +		struct i915_timeline *tl = timelines[n]; + +		if (!err && *tl->hwsp_seqno != n) { +			pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", +			       n, *tl->hwsp_seqno); +			err = -EINVAL; +		} +		i915_timeline_put(tl); +	} + +	intel_runtime_pm_put(i915, wakeref); +	mutex_unlock(&i915->drm.struct_mutex); + +	kvfree(timelines); + +	return err; +#undef NUM_TIMELINES +} + +static int live_hwsp_alternate(void *arg) +{ +#define NUM_TIMELINES 4096 +	struct drm_i915_private *i915 = arg; +	struct i915_timeline **timelines; +	struct intel_engine_cs *engine; +	enum intel_engine_id id; +	intel_wakeref_t wakeref; +	unsigned long count, n; +	int err = 0; + +	/* +	 * Create a bunch of timelines and check we can write +	 * independently to each of their breadcrumb slots with adjacent +	 * engines. +	 */ + +	timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES, +				   sizeof(*timelines), +				   GFP_KERNEL); +	if (!timelines) +		return -ENOMEM; + +	mutex_lock(&i915->drm.struct_mutex); +	wakeref = intel_runtime_pm_get(i915); + +	count = 0; +	for (n = 0; n < NUM_TIMELINES; n++) { +		for_each_engine(engine, i915, id) { +			struct i915_timeline *tl; +			struct i915_request *rq; + +			if (!intel_engine_can_store_dword(engine)) +				continue; + +			tl = checked_i915_timeline_create(i915); +			if (IS_ERR(tl)) { +				err = PTR_ERR(tl); +				goto out; +			} + +			rq = tl_write(tl, engine, count); +			if (IS_ERR(rq)) { +				i915_timeline_put(tl); +				err = PTR_ERR(rq); +				goto out; +			} + +			timelines[count++] = tl; +		} +	} + +out: +	if (igt_flush_test(i915, I915_WAIT_LOCKED)) +		err = -EIO; + +	for (n = 0; n < count; n++) { +		struct i915_timeline *tl = timelines[n]; + +		if (!err && *tl->hwsp_seqno != n) { +			pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", +			       n, *tl->hwsp_seqno); +			err = -EINVAL; +		} +		i915_timeline_put(tl); +	} + +	intel_runtime_pm_put(i915, wakeref); +	mutex_unlock(&i915->drm.struct_mutex); + +	kvfree(timelines); + +	return err; +#undef NUM_TIMELINES +} + +static int live_hwsp_recycle(void *arg) +{ +	struct drm_i915_private *i915 = arg; +	struct intel_engine_cs *engine; +	enum intel_engine_id id; +	intel_wakeref_t wakeref; +	unsigned long count; +	int err = 0; + +	/* +	 * Check seqno writes into one timeline at a time. We expect to +	 * recycle the breadcrumb slot between iterations and neither +	 * want to confuse ourselves or the GPU. +	 */ + +	mutex_lock(&i915->drm.struct_mutex); +	wakeref = intel_runtime_pm_get(i915); + +	count = 0; +	for_each_engine(engine, i915, id) { +		IGT_TIMEOUT(end_time); + +		if (!intel_engine_can_store_dword(engine)) +			continue; + +		do { +			struct i915_timeline *tl; +			struct i915_request *rq; + +			tl = checked_i915_timeline_create(i915); +			if (IS_ERR(tl)) { +				err = PTR_ERR(tl); +				goto out; +			} + +			rq = tl_write(tl, engine, count); +			if (IS_ERR(rq)) { +				i915_timeline_put(tl); +				err = PTR_ERR(rq); +				goto out; +			} + +			if (i915_request_wait(rq, +					      I915_WAIT_LOCKED, +					      HZ / 5) < 0) { +				pr_err("Wait for timeline writes timed out!\n"); +				i915_timeline_put(tl); +				err = -EIO; +				goto out; +			} + +			if (*tl->hwsp_seqno != count) { +				pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", +				       count, *tl->hwsp_seqno); +				err = -EINVAL; +			} + +			i915_timeline_put(tl); +			count++; + +			if (err) +				goto out; + +			i915_timelines_park(i915); /* Encourage recycling! */ +		} while (!__igt_timeout(end_time, NULL)); +	} + +out: +	if (igt_flush_test(i915, I915_WAIT_LOCKED)) +		err = -EIO; +	intel_runtime_pm_put(i915, wakeref); +	mutex_unlock(&i915->drm.struct_mutex); + +	return err; +} + +int i915_timeline_live_selftests(struct drm_i915_private *i915) +{ +	static const struct i915_subtest tests[] = { +		SUBTEST(live_hwsp_recycle), +		SUBTEST(live_hwsp_engine), +		SUBTEST(live_hwsp_alternate), +	}; + +	return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index f0a32edfb9b1..cf1de82741fa 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -672,7 +672,7 @@ static int igt_vma_partial(void *arg)  		}  		count = 0; -		list_for_each_entry(vma, &obj->vma_list, obj_link) +		list_for_each_entry(vma, &obj->vma.list, obj_link)  			count++;  		if (count != nvma) {  			pr_err("(%s) All partial vma were not recorded on the obj->vma_list: found %u, expected %u\n", @@ -701,7 +701,7 @@ static int igt_vma_partial(void *arg)  		i915_vma_unpin(vma);  		count = 0; -		list_for_each_entry(vma, &obj->vma_list, obj_link) +		list_for_each_entry(vma, &obj->vma.list, obj_link)  			count++;  		if (count != nvma) {  			pr_err("(%s) allocated an extra full vma!\n", p->name); diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c index 5deb485fb942..3e902761cd16 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c @@ -35,7 +35,6 @@ int igt_live_test_begin(struct igt_live_test *t,  		return err;  	} -	i915->gpu_error.missed_irq_rings = 0;  	t->reset_global = i915_reset_count(&i915->gpu_error);  	for_each_engine(engine, i915, id) @@ -75,11 +74,5 @@ int igt_live_test_end(struct igt_live_test *t)  		return -EIO;  	} -	if (i915->gpu_error.missed_irq_rings) { -		pr_err("%s(%s): Missed interrupts on engines %lx\n", -		       t->func, t->name, i915->gpu_error.missed_irq_rings); -		return -EIO; -	} -  	return 0;  } diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c index 0e70df0230b8..9ebd9225684e 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.c +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c @@ -185,11 +185,6 @@ void igt_spinner_fini(struct igt_spinner *spin)  bool igt_wait_for_spinner(struct igt_spinner *spin, struct i915_request *rq)  { -	if (!wait_event_timeout(rq->execute, -				READ_ONCE(rq->global_seqno), -				msecs_to_jiffies(10))) -		return false; -  	return !(wait_for_us(i915_seqno_passed(hws_seqno(spin, rq),  					       rq->fence.seqno),  			     10) && diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c deleted file mode 100644 index f03b407fdbe2..000000000000 --- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c +++ /dev/null @@ -1,470 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "../i915_selftest.h" -#include "i915_random.h" - -#include "mock_gem_device.h" -#include "mock_engine.h" - -static int check_rbtree(struct intel_engine_cs *engine, -			const unsigned long *bitmap, -			const struct intel_wait *waiters, -			const int count) -{ -	struct intel_breadcrumbs *b = &engine->breadcrumbs; -	struct rb_node *rb; -	int n; - -	if (&b->irq_wait->node != rb_first(&b->waiters)) { -		pr_err("First waiter does not match first element of wait-tree\n"); -		return -EINVAL; -	} - -	n = find_first_bit(bitmap, count); -	for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { -		struct intel_wait *w = container_of(rb, typeof(*w), node); -		int idx = w - waiters; - -		if (!test_bit(idx, bitmap)) { -			pr_err("waiter[%d, seqno=%d] removed but still in wait-tree\n", -			       idx, w->seqno); -			return -EINVAL; -		} - -		if (n != idx) { -			pr_err("waiter[%d, seqno=%d] does not match expected next element in tree [%d]\n", -			       idx, w->seqno, n); -			return -EINVAL; -		} - -		n = find_next_bit(bitmap, count, n + 1); -	} - -	return 0; -} - -static int check_completion(struct intel_engine_cs *engine, -			    const unsigned long *bitmap, -			    const struct intel_wait *waiters, -			    const int count) -{ -	int n; - -	for (n = 0; n < count; n++) { -		if (intel_wait_complete(&waiters[n]) != !!test_bit(n, bitmap)) -			continue; - -		pr_err("waiter[%d, seqno=%d] is %s, but expected %s\n", -		       n, waiters[n].seqno, -		       intel_wait_complete(&waiters[n]) ? "complete" : "active", -		       test_bit(n, bitmap) ? "active" : "complete"); -		return -EINVAL; -	} - -	return 0; -} - -static int check_rbtree_empty(struct intel_engine_cs *engine) -{ -	struct intel_breadcrumbs *b = &engine->breadcrumbs; - -	if (b->irq_wait) { -		pr_err("Empty breadcrumbs still has a waiter\n"); -		return -EINVAL; -	} - -	if (!RB_EMPTY_ROOT(&b->waiters)) { -		pr_err("Empty breadcrumbs, but wait-tree not empty\n"); -		return -EINVAL; -	} - -	return 0; -} - -static int igt_random_insert_remove(void *arg) -{ -	const u32 seqno_bias = 0x1000; -	I915_RND_STATE(prng); -	struct intel_engine_cs *engine = arg; -	struct intel_wait *waiters; -	const int count = 4096; -	unsigned int *order; -	unsigned long *bitmap; -	int err = -ENOMEM; -	int n; - -	mock_engine_reset(engine); - -	waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL); -	if (!waiters) -		goto out_engines; - -	bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap), -			 GFP_KERNEL); -	if (!bitmap) -		goto out_waiters; - -	order = i915_random_order(count, &prng); -	if (!order) -		goto out_bitmap; - -	for (n = 0; n < count; n++) -		intel_wait_init_for_seqno(&waiters[n], seqno_bias + n); - -	err = check_rbtree(engine, bitmap, waiters, count); -	if (err) -		goto out_order; - -	/* Add and remove waiters into the rbtree in random order. At each -	 * step, we verify that the rbtree is correctly ordered. -	 */ -	for (n = 0; n < count; n++) { -		int i = order[n]; - -		intel_engine_add_wait(engine, &waiters[i]); -		__set_bit(i, bitmap); - -		err = check_rbtree(engine, bitmap, waiters, count); -		if (err) -			goto out_order; -	} - -	i915_random_reorder(order, count, &prng); -	for (n = 0; n < count; n++) { -		int i = order[n]; - -		intel_engine_remove_wait(engine, &waiters[i]); -		__clear_bit(i, bitmap); - -		err = check_rbtree(engine, bitmap, waiters, count); -		if (err) -			goto out_order; -	} - -	err = check_rbtree_empty(engine); -out_order: -	kfree(order); -out_bitmap: -	kfree(bitmap); -out_waiters: -	kvfree(waiters); -out_engines: -	mock_engine_flush(engine); -	return err; -} - -static int igt_insert_complete(void *arg) -{ -	const u32 seqno_bias = 0x1000; -	struct intel_engine_cs *engine = arg; -	struct intel_wait *waiters; -	const int count = 4096; -	unsigned long *bitmap; -	int err = -ENOMEM; -	int n, m; - -	mock_engine_reset(engine); - -	waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL); -	if (!waiters) -		goto out_engines; - -	bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap), -			 GFP_KERNEL); -	if (!bitmap) -		goto out_waiters; - -	for (n = 0; n < count; n++) { -		intel_wait_init_for_seqno(&waiters[n], n + seqno_bias); -		intel_engine_add_wait(engine, &waiters[n]); -		__set_bit(n, bitmap); -	} -	err = check_rbtree(engine, bitmap, waiters, count); -	if (err) -		goto out_bitmap; - -	/* On each step, we advance the seqno so that several waiters are then -	 * complete (we increase the seqno by increasingly larger values to -	 * retire more and more waiters at once). All retired waiters should -	 * be woken and removed from the rbtree, and so that we check. -	 */ -	for (n = 0; n < count; n = m) { -		int seqno = 2 * n; - -		GEM_BUG_ON(find_first_bit(bitmap, count) != n); - -		if (intel_wait_complete(&waiters[n])) { -			pr_err("waiter[%d, seqno=%d] completed too early\n", -			       n, waiters[n].seqno); -			err = -EINVAL; -			goto out_bitmap; -		} - -		/* complete the following waiters */ -		mock_seqno_advance(engine, seqno + seqno_bias); -		for (m = n; m <= seqno; m++) { -			if (m == count) -				break; - -			GEM_BUG_ON(!test_bit(m, bitmap)); -			__clear_bit(m, bitmap); -		} - -		intel_engine_remove_wait(engine, &waiters[n]); -		RB_CLEAR_NODE(&waiters[n].node); - -		err = check_rbtree(engine, bitmap, waiters, count); -		if (err) { -			pr_err("rbtree corrupt after seqno advance to %d\n", -			       seqno + seqno_bias); -			goto out_bitmap; -		} - -		err = check_completion(engine, bitmap, waiters, count); -		if (err) { -			pr_err("completions after seqno advance to %d failed\n", -			       seqno + seqno_bias); -			goto out_bitmap; -		} -	} - -	err = check_rbtree_empty(engine); -out_bitmap: -	kfree(bitmap); -out_waiters: -	kvfree(waiters); -out_engines: -	mock_engine_flush(engine); -	return err; -} - -struct igt_wakeup { -	struct task_struct *tsk; -	atomic_t *ready, *set, *done; -	struct intel_engine_cs *engine; -	unsigned long flags; -#define STOP 0 -#define IDLE 1 -	wait_queue_head_t *wq; -	u32 seqno; -}; - -static bool wait_for_ready(struct igt_wakeup *w) -{ -	DEFINE_WAIT(ready); - -	set_bit(IDLE, &w->flags); -	if (atomic_dec_and_test(w->done)) -		wake_up_var(w->done); - -	if (test_bit(STOP, &w->flags)) -		goto out; - -	for (;;) { -		prepare_to_wait(w->wq, &ready, TASK_INTERRUPTIBLE); -		if (atomic_read(w->ready) == 0) -			break; - -		schedule(); -	} -	finish_wait(w->wq, &ready); - -out: -	clear_bit(IDLE, &w->flags); -	if (atomic_dec_and_test(w->set)) -		wake_up_var(w->set); - -	return !test_bit(STOP, &w->flags); -} - -static int igt_wakeup_thread(void *arg) -{ -	struct igt_wakeup *w = arg; -	struct intel_wait wait; - -	while (wait_for_ready(w)) { -		GEM_BUG_ON(kthread_should_stop()); - -		intel_wait_init_for_seqno(&wait, w->seqno); -		intel_engine_add_wait(w->engine, &wait); -		for (;;) { -			set_current_state(TASK_UNINTERRUPTIBLE); -			if (i915_seqno_passed(intel_engine_get_seqno(w->engine), -					      w->seqno)) -				break; - -			if (test_bit(STOP, &w->flags)) /* emergency escape */ -				break; - -			schedule(); -		} -		intel_engine_remove_wait(w->engine, &wait); -		__set_current_state(TASK_RUNNING); -	} - -	return 0; -} - -static void igt_wake_all_sync(atomic_t *ready, -			      atomic_t *set, -			      atomic_t *done, -			      wait_queue_head_t *wq, -			      int count) -{ -	atomic_set(set, count); -	atomic_set(ready, 0); -	wake_up_all(wq); - -	wait_var_event(set, !atomic_read(set)); -	atomic_set(ready, count); -	atomic_set(done, count); -} - -static int igt_wakeup(void *arg) -{ -	I915_RND_STATE(prng); -	struct intel_engine_cs *engine = arg; -	struct igt_wakeup *waiters; -	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); -	const int count = 4096; -	const u32 max_seqno = count / 4; -	atomic_t ready, set, done; -	int err = -ENOMEM; -	int n, step; - -	mock_engine_reset(engine); - -	waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL); -	if (!waiters) -		goto out_engines; - -	/* Create a large number of threads, each waiting on a random seqno. -	 * Multiple waiters will be waiting for the same seqno. -	 */ -	atomic_set(&ready, count); -	for (n = 0; n < count; n++) { -		waiters[n].wq = &wq; -		waiters[n].ready = &ready; -		waiters[n].set = &set; -		waiters[n].done = &done; -		waiters[n].engine = engine; -		waiters[n].flags = BIT(IDLE); - -		waiters[n].tsk = kthread_run(igt_wakeup_thread, &waiters[n], -					     "i915/igt:%d", n); -		if (IS_ERR(waiters[n].tsk)) -			goto out_waiters; - -		get_task_struct(waiters[n].tsk); -	} - -	for (step = 1; step <= max_seqno; step <<= 1) { -		u32 seqno; - -		/* The waiter threads start paused as we assign them a random -		 * seqno and reset the engine. Once the engine is reset, -		 * we signal that the threads may begin their wait upon their -		 * seqno. -		 */ -		for (n = 0; n < count; n++) { -			GEM_BUG_ON(!test_bit(IDLE, &waiters[n].flags)); -			waiters[n].seqno = -				1 + prandom_u32_state(&prng) % max_seqno; -		} -		mock_seqno_advance(engine, 0); -		igt_wake_all_sync(&ready, &set, &done, &wq, count); - -		/* Simulate the GPU doing chunks of work, with one or more -		 * seqno appearing to finish at the same time. A random number -		 * of threads will be waiting upon the update and hopefully be -		 * woken. -		 */ -		for (seqno = 1; seqno <= max_seqno + step; seqno += step) { -			usleep_range(50, 500); -			mock_seqno_advance(engine, seqno); -		} -		GEM_BUG_ON(intel_engine_get_seqno(engine) < 1 + max_seqno); - -		/* With the seqno now beyond any of the waiting threads, they -		 * should all be woken, see that they are complete and signal -		 * that they are ready for the next test. We wait until all -		 * threads are complete and waiting for us (i.e. not a seqno). -		 */ -		if (!wait_var_event_timeout(&done, -					    !atomic_read(&done), 10 * HZ)) { -			pr_err("Timed out waiting for %d remaining waiters\n", -			       atomic_read(&done)); -			err = -ETIMEDOUT; -			break; -		} - -		err = check_rbtree_empty(engine); -		if (err) -			break; -	} - -out_waiters: -	for (n = 0; n < count; n++) { -		if (IS_ERR(waiters[n].tsk)) -			break; - -		set_bit(STOP, &waiters[n].flags); -	} -	mock_seqno_advance(engine, INT_MAX); /* wakeup any broken waiters */ -	igt_wake_all_sync(&ready, &set, &done, &wq, n); - -	for (n = 0; n < count; n++) { -		if (IS_ERR(waiters[n].tsk)) -			break; - -		kthread_stop(waiters[n].tsk); -		put_task_struct(waiters[n].tsk); -	} - -	kvfree(waiters); -out_engines: -	mock_engine_flush(engine); -	return err; -} - -int intel_breadcrumbs_mock_selftests(void) -{ -	static const struct i915_subtest tests[] = { -		SUBTEST(igt_random_insert_remove), -		SUBTEST(igt_insert_complete), -		SUBTEST(igt_wakeup), -	}; -	struct drm_i915_private *i915; -	int err; - -	i915 = mock_gem_device(); -	if (!i915) -		return -ENOMEM; - -	err = i915_subtests(tests, i915->engine[RCS]); -	drm_dev_put(&i915->drm); - -	return err; -} diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 12550b55c42f..7b6f3bea9ef8 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -363,9 +363,7 @@ static int igt_global_reset(void *arg)  	/* Check that we can issue a global GPU reset */  	igt_global_reset_lock(i915); -	set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags); -	mutex_lock(&i915->drm.struct_mutex);  	reset_count = i915_reset_count(&i915->gpu_error);  	i915_reset(i915, ALL_ENGINES, NULL); @@ -374,9 +372,7 @@ static int igt_global_reset(void *arg)  		pr_err("No GPU reset recorded!\n");  		err = -EINVAL;  	} -	mutex_unlock(&i915->drm.struct_mutex); -	GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));  	igt_global_reset_unlock(i915);  	if (i915_terminally_wedged(&i915->gpu_error)) @@ -393,18 +389,16 @@ static int igt_wedged_reset(void *arg)  	/* Check that we can recover a wedged device with a GPU reset */  	igt_global_reset_lock(i915); -	mutex_lock(&i915->drm.struct_mutex);  	wakeref = intel_runtime_pm_get(i915);  	i915_gem_set_wedged(i915); -	GEM_BUG_ON(!i915_terminally_wedged(&i915->gpu_error)); -	set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags); +	mutex_lock(&i915->drm.struct_mutex); +	GEM_BUG_ON(!i915_terminally_wedged(&i915->gpu_error));  	i915_reset(i915, ALL_ENGINES, NULL); -	GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags)); +	mutex_unlock(&i915->drm.struct_mutex);  	intel_runtime_pm_put(i915, wakeref); -	mutex_unlock(&i915->drm.struct_mutex);  	igt_global_reset_unlock(i915);  	return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0; @@ -455,8 +449,6 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)  		set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);  		do { -			u32 seqno = intel_engine_get_seqno(engine); -  			if (active) {  				struct i915_request *rq; @@ -485,8 +477,6 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)  					break;  				} -				GEM_BUG_ON(!rq->global_seqno); -				seqno = rq->global_seqno - 1;  				i915_request_put(rq);  			} @@ -502,16 +492,15 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)  				break;  			} -			reset_engine_count += active;  			if (i915_reset_engine_count(&i915->gpu_error, engine) != -			    reset_engine_count) { -				pr_err("%s engine reset %srecorded!\n", -				       engine->name, active ? "not " : ""); +			    ++reset_engine_count) { +				pr_err("%s engine reset not recorded!\n", +				       engine->name);  				err = -EINVAL;  				break;  			} -			if (!wait_for_idle(engine)) { +			if (!i915_reset_flush(i915)) {  				struct drm_printer p =  					drm_info_printer(i915->drm.dev); @@ -734,7 +723,6 @@ static int __igt_reset_engines(struct drm_i915_private *i915,  		set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);  		do { -			u32 seqno = intel_engine_get_seqno(engine);  			struct i915_request *rq = NULL;  			if (flags & TEST_ACTIVE) { @@ -762,9 +750,6 @@ static int __igt_reset_engines(struct drm_i915_private *i915,  					err = -EIO;  					break;  				} - -				GEM_BUG_ON(!rq->global_seqno); -				seqno = rq->global_seqno - 1;  			}  			err = i915_reset_engine(engine, NULL); @@ -801,10 +786,9 @@ static int __igt_reset_engines(struct drm_i915_private *i915,  		reported = i915_reset_engine_count(&i915->gpu_error, engine);  		reported -= threads[engine->id].resets; -		if (reported != (flags & TEST_ACTIVE ? count : 0)) { -			pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu, expected %lu reported\n", -			       engine->name, test_name, count, reported, -			       (flags & TEST_ACTIVE ? count : 0)); +		if (reported != count) { +			pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n", +			       engine->name, test_name, count, reported);  			if (!err)  				err = -EINVAL;  		} @@ -903,20 +887,13 @@ static int igt_reset_engines(void *arg)  	return 0;  } -static u32 fake_hangcheck(struct i915_request *rq, u32 mask) +static u32 fake_hangcheck(struct drm_i915_private *i915, u32 mask)  { -	struct i915_gpu_error *error = &rq->i915->gpu_error; -	u32 reset_count = i915_reset_count(error); - -	error->stalled_mask = mask; - -	/* set_bit() must be after we have setup the backchannel (mask) */ -	smp_mb__before_atomic(); -	set_bit(I915_RESET_HANDOFF, &error->flags); +	u32 count = i915_reset_count(&i915->gpu_error); -	wake_up_all(&error->wait_queue); +	i915_reset(i915, mask, NULL); -	return reset_count; +	return count;  }  static int igt_reset_wait(void *arg) @@ -962,7 +939,7 @@ static int igt_reset_wait(void *arg)  		goto out_rq;  	} -	reset_count = fake_hangcheck(rq, ALL_ENGINES); +	reset_count = fake_hangcheck(i915, ALL_ENGINES);  	timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10);  	if (timeout < 0) { @@ -972,7 +949,6 @@ static int igt_reset_wait(void *arg)  		goto out_rq;  	} -	GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));  	if (i915_reset_count(&i915->gpu_error) == reset_count) {  		pr_err("No GPU reset recorded!\n");  		err = -EINVAL; @@ -1151,7 +1127,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915,  	wait_for_completion(&arg.completion); -	if (wait_for(waitqueue_active(&rq->execute), 10)) { +	if (wait_for(!list_empty(&rq->fence.cb_list), 10)) {  		struct drm_printer p = drm_info_printer(i915->drm.dev);  		pr_err("igt/evict_vma kthread did not wait\n"); @@ -1162,7 +1138,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915,  	}  out_reset: -	fake_hangcheck(rq, intel_engine_flag(rq->engine)); +	fake_hangcheck(rq->i915, intel_engine_flag(rq->engine));  	if (tsk) {  		struct igt_wedge_me w; @@ -1341,12 +1317,7 @@ static int igt_reset_queue(void *arg)  				goto fini;  			} -			reset_count = fake_hangcheck(prev, ENGINE_MASK(id)); - -			i915_reset(i915, ENGINE_MASK(id), NULL); - -			GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, -					    &i915->gpu_error.flags)); +			reset_count = fake_hangcheck(i915, ENGINE_MASK(id));  			if (prev->fence.error != -EIO) {  				pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n", @@ -1565,6 +1536,7 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine,  		pr_err("%s(%s): Failed to start request %llx, at %x\n",  		       __func__, engine->name,  		       rq->fence.seqno, hws_seqno(&h, rq)); +		i915_gem_set_wedged(i915);  		err = -EIO;  	} @@ -1588,7 +1560,6 @@ out:  static void force_reset(struct drm_i915_private *i915)  {  	i915_gem_set_wedged(i915); -	set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);  	i915_reset(i915, 0, NULL);  } @@ -1618,6 +1589,26 @@ static int igt_atomic_reset(void *arg)  	if (i915_terminally_wedged(&i915->gpu_error))  		goto unlock; +	if (intel_has_gpu_reset(i915)) { +		const typeof(*phases) *p; + +		for (p = phases; p->name; p++) { +			GEM_TRACE("intel_gpu_reset under %s\n", p->name); + +			p->critical_section_begin(); +			err = intel_gpu_reset(i915, ALL_ENGINES); +			p->critical_section_end(); + +			if (err) { +				pr_err("intel_gpu_reset failed under %s\n", +				       p->name); +				goto out; +			} +		} + +		force_reset(i915); +	} +  	if (intel_has_reset_engine(i915)) {  		struct intel_engine_cs *engine;  		enum intel_engine_id id; @@ -1674,6 +1665,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)  	wakeref = intel_runtime_pm_get(i915);  	saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck); +	drain_delayed_work(&i915->gpu_error.hangcheck_work); /* flush param */  	err = i915_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c index 2b2ecd76c2ac..fb35f53c9ce3 100644 --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c @@ -268,6 +268,143 @@ err_wedged:  	goto err_ctx_lo;  } +struct preempt_client { +	struct igt_spinner spin; +	struct i915_gem_context *ctx; +}; + +static int preempt_client_init(struct drm_i915_private *i915, +			       struct preempt_client *c) +{ +	c->ctx = kernel_context(i915); +	if (!c->ctx) +		return -ENOMEM; + +	if (igt_spinner_init(&c->spin, i915)) +		goto err_ctx; + +	return 0; + +err_ctx: +	kernel_context_close(c->ctx); +	return -ENOMEM; +} + +static void preempt_client_fini(struct preempt_client *c) +{ +	igt_spinner_fini(&c->spin); +	kernel_context_close(c->ctx); +} + +static int live_suppress_self_preempt(void *arg) +{ +	struct drm_i915_private *i915 = arg; +	struct intel_engine_cs *engine; +	struct i915_sched_attr attr = { +		.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX) +	}; +	struct preempt_client a, b; +	enum intel_engine_id id; +	intel_wakeref_t wakeref; +	int err = -ENOMEM; + +	/* +	 * Verify that if a preemption request does not cause a change in +	 * the current execution order, the preempt-to-idle injection is +	 * skipped and that we do not accidentally apply it after the CS +	 * completion event. +	 */ + +	if (!HAS_LOGICAL_RING_PREEMPTION(i915)) +		return 0; + +	if (USES_GUC_SUBMISSION(i915)) +		return 0; /* presume black blox */ + +	mutex_lock(&i915->drm.struct_mutex); +	wakeref = intel_runtime_pm_get(i915); + +	if (preempt_client_init(i915, &a)) +		goto err_unlock; +	if (preempt_client_init(i915, &b)) +		goto err_client_a; + +	for_each_engine(engine, i915, id) { +		struct i915_request *rq_a, *rq_b; +		int depth; + +		engine->execlists.preempt_hang.count = 0; + +		rq_a = igt_spinner_create_request(&a.spin, +						  a.ctx, engine, +						  MI_NOOP); +		if (IS_ERR(rq_a)) { +			err = PTR_ERR(rq_a); +			goto err_client_b; +		} + +		i915_request_add(rq_a); +		if (!igt_wait_for_spinner(&a.spin, rq_a)) { +			pr_err("First client failed to start\n"); +			goto err_wedged; +		} + +		for (depth = 0; depth < 8; depth++) { +			rq_b = igt_spinner_create_request(&b.spin, +							  b.ctx, engine, +							  MI_NOOP); +			if (IS_ERR(rq_b)) { +				err = PTR_ERR(rq_b); +				goto err_client_b; +			} +			i915_request_add(rq_b); + +			GEM_BUG_ON(i915_request_completed(rq_a)); +			engine->schedule(rq_a, &attr); +			igt_spinner_end(&a.spin); + +			if (!igt_wait_for_spinner(&b.spin, rq_b)) { +				pr_err("Second client failed to start\n"); +				goto err_wedged; +			} + +			swap(a, b); +			rq_a = rq_b; +		} +		igt_spinner_end(&a.spin); + +		if (engine->execlists.preempt_hang.count) { +			pr_err("Preemption recorded x%d, depth %d; should have been suppressed!\n", +			       engine->execlists.preempt_hang.count, +			       depth); +			err = -EINVAL; +			goto err_client_b; +		} + +		if (igt_flush_test(i915, I915_WAIT_LOCKED)) +			goto err_wedged; +	} + +	err = 0; +err_client_b: +	preempt_client_fini(&b); +err_client_a: +	preempt_client_fini(&a); +err_unlock: +	if (igt_flush_test(i915, I915_WAIT_LOCKED)) +		err = -EIO; +	intel_runtime_pm_put(i915, wakeref); +	mutex_unlock(&i915->drm.struct_mutex); +	return err; + +err_wedged: +	igt_spinner_end(&b.spin); +	igt_spinner_end(&a.spin); +	i915_gem_set_wedged(i915); +	err = -EIO; +	goto err_client_b; +} +  static int live_preempt_hang(void *arg)  {  	struct drm_i915_private *i915 = arg; @@ -647,6 +784,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)  		SUBTEST(live_sanitycheck),  		SUBTEST(live_preempt),  		SUBTEST(live_late_preempt), +		SUBTEST(live_suppress_self_preempt),  		SUBTEST(live_preempt_hang),  		SUBTEST(live_preempt_smoke),  	}; diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c index a8cac56be835..b15c4f26c593 100644 --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c @@ -214,7 +214,6 @@ out_put:  static int do_device_reset(struct intel_engine_cs *engine)  { -	set_bit(I915_RESET_HANDOFF, &engine->i915->gpu_error.flags);  	i915_reset(engine->i915, ENGINE_MASK(engine->id), "live_workarounds");  	return 0;  } @@ -394,7 +393,6 @@ static int  live_gpu_reset_gt_engine_workarounds(void *arg)  {  	struct drm_i915_private *i915 = arg; -	struct i915_gpu_error *error = &i915->gpu_error;  	intel_wakeref_t wakeref;  	struct wa_lists lists;  	bool ok; @@ -413,7 +411,6 @@ live_gpu_reset_gt_engine_workarounds(void *arg)  	if (!ok)  		goto out; -	set_bit(I915_RESET_HANDOFF, &error->flags);  	i915_reset(i915, ALL_ENGINES, "live_workarounds");  	ok = verify_gt_engine_wa(i915, &lists, "after reset"); diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c index b26f07b55d86..2bfa72c1654b 100644 --- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c +++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c @@ -76,3 +76,57 @@ void timed_fence_fini(struct timed_fence *tf)  	destroy_timer_on_stack(&tf->timer);  	i915_sw_fence_fini(&tf->fence);  } + +struct heap_fence { +	struct i915_sw_fence fence; +	union { +		struct kref ref; +		struct rcu_head rcu; +	}; +}; + +static int __i915_sw_fence_call +heap_fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) +{ +	struct heap_fence *h = container_of(fence, typeof(*h), fence); + +	switch (state) { +	case FENCE_COMPLETE: +		break; + +	case FENCE_FREE: +		heap_fence_put(&h->fence); +	} + +	return NOTIFY_DONE; +} + +struct i915_sw_fence *heap_fence_create(gfp_t gfp) +{ +	struct heap_fence *h; + +	h = kmalloc(sizeof(*h), gfp); +	if (!h) +		return NULL; + +	i915_sw_fence_init(&h->fence, heap_fence_notify); +	refcount_set(&h->ref.refcount, 2); + +	return &h->fence; +} + +static void heap_fence_release(struct kref *ref) +{ +	struct heap_fence *h = container_of(ref, typeof(*h), ref); + +	i915_sw_fence_fini(&h->fence); + +	kfree_rcu(h, rcu); +} + +void heap_fence_put(struct i915_sw_fence *fence) +{ +	struct heap_fence *h = container_of(fence, typeof(*h), fence); + +	kref_put(&h->ref, heap_fence_release); +} diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.h b/drivers/gpu/drm/i915/selftests/lib_sw_fence.h index 474aafb92ae1..1f9927e10f3a 100644 --- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.h +++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.h @@ -39,4 +39,7 @@ struct timed_fence {  void timed_fence_init(struct timed_fence *tf, unsigned long expires);  void timed_fence_fini(struct timed_fence *tf); +struct i915_sw_fence *heap_fence_create(gfp_t gfp); +void heap_fence_put(struct i915_sw_fence *fence); +  #endif /* _LIB_SW_FENCE_H_ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 442ec2aeec81..08f0cab02e0f 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -30,6 +30,17 @@ struct mock_ring {  	struct i915_timeline timeline;  }; +static void mock_timeline_pin(struct i915_timeline *tl) +{ +	tl->pin_count++; +} + +static void mock_timeline_unpin(struct i915_timeline *tl) +{ +	GEM_BUG_ON(!tl->pin_count); +	tl->pin_count--; +} +  static struct intel_ring *mock_ring(struct intel_engine_cs *engine)  {  	const unsigned long sz = PAGE_SIZE / 2; @@ -39,7 +50,12 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)  	if (!ring)  		return NULL; -	i915_timeline_init(engine->i915, &ring->timeline, engine->name); +	if (i915_timeline_init(engine->i915, +			       &ring->timeline, engine->name, +			       NULL)) { +		kfree(ring); +		return NULL; +	}  	ring->base.size = sz;  	ring->base.effective_size = sz; @@ -70,15 +86,21 @@ static struct mock_request *first_request(struct mock_engine *engine)  static void advance(struct mock_request *request)  {  	list_del_init(&request->link); -	mock_seqno_advance(request->base.engine, request->base.global_seqno); +	intel_engine_write_global_seqno(request->base.engine, +					request->base.global_seqno); +	i915_request_mark_complete(&request->base); +	GEM_BUG_ON(!i915_request_completed(&request->base)); + +	intel_engine_queue_breadcrumbs(request->base.engine);  }  static void hw_delay_complete(struct timer_list *t)  {  	struct mock_engine *engine = from_timer(engine, t, hw_delay);  	struct mock_request *request; +	unsigned long flags; -	spin_lock(&engine->hw_lock); +	spin_lock_irqsave(&engine->hw_lock, flags);  	/* Timer fired, first request is complete */  	request = first_request(engine); @@ -98,11 +120,12 @@ static void hw_delay_complete(struct timer_list *t)  		advance(request);  	} -	spin_unlock(&engine->hw_lock); +	spin_unlock_irqrestore(&engine->hw_lock, flags);  }  static void mock_context_unpin(struct intel_context *ce)  { +	mock_timeline_unpin(ce->ring->timeline);  	i915_gem_context_put(ce->gem_context);  } @@ -124,6 +147,7 @@ mock_context_pin(struct intel_engine_cs *engine,  		 struct i915_gem_context *ctx)  {  	struct intel_context *ce = to_intel_context(ctx, engine); +	int err = -ENOMEM;  	if (ce->pin_count++)  		return ce; @@ -134,13 +158,15 @@ mock_context_pin(struct intel_engine_cs *engine,  			goto err;  	} +	mock_timeline_pin(ce->ring->timeline); +  	ce->ops = &mock_context_ops;  	i915_gem_context_get(ctx);  	return ce;  err:  	ce->pin_count = 0; -	return ERR_PTR(-ENOMEM); +	return ERR_PTR(err);  }  static int mock_request_alloc(struct i915_request *request) @@ -159,9 +185,9 @@ static int mock_emit_flush(struct i915_request *request,  	return 0;  } -static void mock_emit_breadcrumb(struct i915_request *request, -				 u32 *flags) +static u32 *mock_emit_breadcrumb(struct i915_request *request, u32 *cs)  { +	return cs;  }  static void mock_submit_request(struct i915_request *request) @@ -169,11 +195,12 @@ static void mock_submit_request(struct i915_request *request)  	struct mock_request *mock = container_of(request, typeof(*mock), base);  	struct mock_engine *engine =  		container_of(request->engine, typeof(*engine), base); +	unsigned long flags;  	i915_request_submit(request);  	GEM_BUG_ON(!request->global_seqno); -	spin_lock_irq(&engine->hw_lock); +	spin_lock_irqsave(&engine->hw_lock, flags);  	list_add_tail(&mock->link, &engine->hw_queue);  	if (mock->link.prev == &engine->hw_queue) {  		if (mock->delay) @@ -181,7 +208,7 @@ static void mock_submit_request(struct i915_request *request)  		else  			advance(mock);  	} -	spin_unlock_irq(&engine->hw_lock); +	spin_unlock_irqrestore(&engine->hw_lock, flags);  }  struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, @@ -200,15 +227,19 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,  	engine->base.i915 = i915;  	snprintf(engine->base.name, sizeof(engine->base.name), "%s", name);  	engine->base.id = id; -	engine->base.status_page.page_addr = (void *)(engine + 1); +	engine->base.status_page.addr = (void *)(engine + 1);  	engine->base.context_pin = mock_context_pin;  	engine->base.request_alloc = mock_request_alloc;  	engine->base.emit_flush = mock_emit_flush; -	engine->base.emit_breadcrumb = mock_emit_breadcrumb; +	engine->base.emit_fini_breadcrumb = mock_emit_breadcrumb;  	engine->base.submit_request = mock_submit_request; -	i915_timeline_init(i915, &engine->base.timeline, engine->base.name); +	if (i915_timeline_init(i915, +			       &engine->base.timeline, +			       engine->base.name, +			       NULL)) +		goto err_free;  	i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE);  	intel_engine_init_breadcrumbs(&engine->base); @@ -226,6 +257,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,  err_breadcrumbs:  	intel_engine_fini_breadcrumbs(&engine->base);  	i915_timeline_fini(&engine->base.timeline); +err_free:  	kfree(engine);  	return NULL;  } @@ -246,7 +278,7 @@ void mock_engine_flush(struct intel_engine_cs *engine)  void mock_engine_reset(struct intel_engine_cs *engine)  { -	intel_write_status_page(engine, I915_GEM_HWS_INDEX, 0); +	intel_engine_write_global_seqno(engine, 0);  }  void mock_engine_free(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.h b/drivers/gpu/drm/i915/selftests/mock_engine.h index 133d0c21790d..b9cc3a245f16 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.h +++ b/drivers/gpu/drm/i915/selftests/mock_engine.h @@ -46,10 +46,4 @@ void mock_engine_flush(struct intel_engine_cs *engine);  void mock_engine_reset(struct intel_engine_cs *engine);  void mock_engine_free(struct intel_engine_cs *engine); -static inline void mock_seqno_advance(struct intel_engine_cs *engine, u32 seqno) -{ -	intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); -	intel_engine_wakeup(engine); -} -  #endif /* !__MOCK_ENGINE_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 5477ad4a7e7d..14ae46fda49f 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -58,8 +58,8 @@ static void mock_device_release(struct drm_device *dev)  	i915_gem_contexts_lost(i915);  	mutex_unlock(&i915->drm.struct_mutex); -	cancel_delayed_work_sync(&i915->gt.retire_work); -	cancel_delayed_work_sync(&i915->gt.idle_work); +	drain_delayed_work(&i915->gt.retire_work); +	drain_delayed_work(&i915->gt.idle_work);  	i915_gem_drain_workqueue(i915);  	mutex_lock(&i915->drm.struct_mutex); @@ -68,13 +68,14 @@ static void mock_device_release(struct drm_device *dev)  	i915_gem_contexts_fini(i915);  	mutex_unlock(&i915->drm.struct_mutex); +	i915_timelines_fini(i915); +  	drain_workqueue(i915->wq);  	i915_gem_drain_freed_objects(i915);  	mutex_lock(&i915->drm.struct_mutex);  	mock_fini_ggtt(&i915->ggtt);  	mutex_unlock(&i915->drm.struct_mutex); -	WARN_ON(!list_empty(&i915->gt.timelines));  	destroy_workqueue(i915->wq); @@ -226,7 +227,8 @@ struct drm_i915_private *mock_gem_device(void)  	if (!i915->priorities)  		goto err_dependencies; -	INIT_LIST_HEAD(&i915->gt.timelines); +	i915_timelines_init(i915); +  	INIT_LIST_HEAD(&i915->gt.active_rings);  	INIT_LIST_HEAD(&i915->gt.closed_vma); @@ -253,6 +255,7 @@ err_context:  	i915_gem_contexts_fini(i915);  err_unlock:  	mutex_unlock(&i915->drm.struct_mutex); +	i915_timelines_fini(i915);  	kmem_cache_destroy(i915->priorities);  err_dependencies:  	kmem_cache_destroy(i915->dependencies); diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c index dcf3b16f5a07..cf39ccd9fc05 100644 --- a/drivers/gpu/drm/i915/selftests/mock_timeline.c +++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c @@ -10,6 +10,7 @@  void mock_timeline_init(struct i915_timeline *timeline, u64 context)  { +	timeline->i915 = NULL;  	timeline->fence_context = context;  	spin_lock_init(&timeline->lock); @@ -24,5 +25,5 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)  void mock_timeline_fini(struct i915_timeline *timeline)  { -	i915_timeline_fini(timeline); +	i915_syncmap_free(&timeline->sync);  } diff --git a/include/drm/drm_color_mgmt.h b/include/drm/drm_color_mgmt.h index 6affbda6d9cb..d1c662d92ab7 100644 --- a/include/drm/drm_color_mgmt.h +++ b/include/drm/drm_color_mgmt.h @@ -96,6 +96,5 @@ enum drm_color_lut_tests {  	DRM_COLOR_LUT_NON_DECREASING = BIT(1),  }; -int drm_color_lut_check(struct drm_property_blob *lut, -			uint32_t tests); +int drm_color_lut_check(const struct drm_property_blob *lut, u32 tests);  #endif diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index df72be7e8b88..d2fad7b0fcf6 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -394,6 +394,9 @@  	INTEL_VGA_DEVICE(0x3E9A, info)  /* SRV GT2 */  /* CFL H */ +#define INTEL_CFL_H_GT1_IDS(info) \ +	INTEL_VGA_DEVICE(0x3E9C, info) +  #define INTEL_CFL_H_GT2_IDS(info) \  	INTEL_VGA_DEVICE(0x3E9B, info), /* Halo GT2 */ \  	INTEL_VGA_DEVICE(0x3E94, info)  /* Halo GT2 */ @@ -426,6 +429,7 @@  #define INTEL_CFL_IDS(info)	   \  	INTEL_CFL_S_GT1_IDS(info), \  	INTEL_CFL_S_GT2_IDS(info), \ +	INTEL_CFL_H_GT1_IDS(info), \  	INTEL_CFL_H_GT2_IDS(info), \  	INTEL_CFL_U_GT2_IDS(info), \  	INTEL_CFL_U_GT3_IDS(info), \  | 
