8 files changed, 47 insertions, 63 deletions
diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c
index 7b1e2a549a71..54d96518a131 100644
--- a/drivers/gpu/drm/v3d/v3d_bo.c
+++ b/drivers/gpu/drm/v3d/v3d_bo.c
@@ -227,37 +227,19 @@ v3d_set_mmap_vma_flags(struct vm_area_struct *vma)
 	vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
 }
 
-int v3d_gem_fault(struct vm_fault *vmf)
+vm_fault_t v3d_gem_fault(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct drm_gem_object *obj = vma->vm_private_data;
 	struct v3d_bo *bo = to_v3d_bo(obj);
-	unsigned long pfn;
+	pfn_t pfn;
 	pgoff_t pgoff;
-	int ret;
 
 	/* We don't use vmf->pgoff since that has the fake offset: */
 	pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
-	pfn = page_to_pfn(bo->pages[pgoff]);
-
-	ret = vm_insert_mixed(vma, vmf->address, __pfn_to_pfn_t(pfn, PFN_DEV));
-
-	switch (ret) {
-	case -EAGAIN:
-	case 0:
-	case -ERESTARTSYS:
-	case -EINTR:
-	case -EBUSY:
-		/*
-		 * EBUSY is ok: this just means that another thread
-		 * already did the job.
-		 */
-		return VM_FAULT_NOPAGE;
-	case -ENOMEM:
-		return VM_FAULT_OOM;
-	default:
-		return VM_FAULT_SIGBUS;
-	}
+	pfn = __pfn_to_pfn_t(page_to_pfn(bo->pages[pgoff]), PFN_DEV);
+
+	return vmf_insert_mixed(vma, vmf->address, pfn);
 }
 
 int v3d_mmap(struct file *filp, struct vm_area_struct *vma)
diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
index cdb582043b4f..2a85fa68ffea 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.c
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -123,6 +123,7 @@ v3d_open(struct drm_device *dev, struct drm_file *file)
 {
 	struct v3d_dev *v3d = to_v3d_dev(dev);
 	struct v3d_file_priv *v3d_priv;
+	struct drm_sched_rq *rq;
 	int i;
 
 	v3d_priv = kzalloc(sizeof(*v3d_priv), GFP_KERNEL);
@@ -132,10 +133,8 @@ v3d_open(struct drm_device *dev, struct drm_file *file)
 	v3d_priv->v3d = v3d;
 
 	for (i = 0; i < V3D_MAX_QUEUES; i++) {
-		drm_sched_entity_init(&v3d->queue[i].sched,
-				      &v3d_priv->sched_entity[i],
-				      &v3d->queue[i].sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL],
-				      NULL);
+		rq = &v3d->queue[i].sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
+		drm_sched_entity_init(&v3d_priv->sched_entity[i], &rq, 1, NULL);
 	}
 
 	file->driver_priv = v3d_priv;
@@ -146,13 +145,11 @@ v3d_open(struct drm_device *dev, struct drm_file *file)
 static void
 v3d_postclose(struct drm_device *dev, struct drm_file *file)
 {
-	struct v3d_dev *v3d = to_v3d_dev(dev);
 	struct v3d_file_priv *v3d_priv = file->driver_priv;
 	enum v3d_queue q;
 
 	for (q = 0; q < V3D_MAX_QUEUES; q++) {
-		drm_sched_entity_fini(&v3d->queue[q].sched,
-				      &v3d_priv->sched_entity[q]);
+		drm_sched_entity_destroy(&v3d_priv->sched_entity[q]);
 	}
 
 	kfree(v3d_priv);
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index a043ac3aae98..e6fed696ad86 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -2,6 +2,7 @@
 /* Copyright (C) 2015-2018 Broadcom */
 
 #include <linux/reservation.h>
+#include <linux/mm_types.h>
 #include <drm/drmP.h>
 #include <drm/drm_encoder.h>
 #include <drm/drm_gem.h>
@@ -25,7 +26,6 @@ struct v3d_queue_state {
 
 	u64 fence_context;
 	u64 emit_seqno;
-	u64 finished_seqno;
 };
 
 struct v3d_dev {
@@ -85,6 +85,11 @@ struct v3d_dev {
 	 */
 	struct mutex reset_lock;
 
+	/* Lock taken when creating and pushing the GPU scheduler
+	 * jobs, to keep the sched-fence seqnos in order.
+	 */
+	struct mutex sched_lock;
+
 	struct {
 		u32 num_allocated;
 		u32 pages_allocated;
@@ -179,6 +184,8 @@ struct v3d_job {
 
 	/* GPU virtual addresses of the start/end of the CL job. */
 	u32 start, end;
+
+	u32 timedout_ctca, timedout_ctra;
 };
 
 struct v3d_exec_info {
@@ -248,7 +255,7 @@ int v3d_mmap_bo_ioctl(struct drm_device *dev, void *data,
 		      struct drm_file *file_priv);
 int v3d_get_bo_offset_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file_priv);
-int v3d_gem_fault(struct vm_fault *vmf);
+vm_fault_t v3d_gem_fault(struct vm_fault *vmf);
 int v3d_mmap(struct file *filp, struct vm_area_struct *vma);
 struct reservation_object *v3d_prime_res_obj(struct drm_gem_object *obj);
 int v3d_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
diff --git a/drivers/gpu/drm/v3d/v3d_fence.c b/drivers/gpu/drm/v3d/v3d_fence.c
index 087d49c8cb12..50bfcf9a8a1a 100644
--- a/drivers/gpu/drm/v3d/v3d_fence.c
+++ b/drivers/gpu/drm/v3d/v3d_fence.c
@@ -35,24 +35,7 @@ static const char *v3d_fence_get_timeline_name(struct dma_fence *fence)
 		return "v3d-render";
 }
 
-static bool v3d_fence_enable_signaling(struct dma_fence *fence)
-{
-	return true;
-}
-
-static bool v3d_fence_signaled(struct dma_fence *fence)
-{
-	struct v3d_fence *f = to_v3d_fence(fence);
-	struct v3d_dev *v3d = to_v3d_dev(f->dev);
-
-	return v3d->queue[f->queue].finished_seqno >= f->seqno;
-}
-
 const struct dma_fence_ops v3d_fence_ops = {
 	.get_driver_name = v3d_fence_get_driver_name,
 	.get_timeline_name = v3d_fence_get_timeline_name,
-	.enable_signaling = v3d_fence_enable_signaling,
-	.signaled = v3d_fence_signaled,
-	.wait = dma_fence_default_wait,
-	.release = dma_fence_free,
 };
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index b513f9189caf..5ce24098a5fd 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -550,9 +550,9 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		goto fail;
 
+	mutex_lock(&v3d->sched_lock);
 	if (exec->bin.start != exec->bin.end) {
 		ret = drm_sched_job_init(&exec->bin.base,
-					 &v3d->queue[V3D_BIN].sched,
 					 &v3d_priv->sched_entity[V3D_BIN],
 					 v3d_priv);
 		if (ret)
@@ -567,7 +567,6 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 	}
 
 	ret = drm_sched_job_init(&exec->render.base,
-				 &v3d->queue[V3D_RENDER].sched,
 				 &v3d_priv->sched_entity[V3D_RENDER],
 				 v3d_priv);
 	if (ret)
@@ -576,6 +575,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 	kref_get(&exec->refcount); /* put by scheduler job completion */
 	drm_sched_entity_push_job(&exec->render.base,
 				  &v3d_priv->sched_entity[V3D_RENDER]);
+	mutex_unlock(&v3d->sched_lock);
 
 	v3d_attach_object_fences(exec);
 
@@ -594,6 +594,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 	return 0;
 
 fail_unreserve:
+	mutex_unlock(&v3d->sched_lock);
 	v3d_unlock_bo_reservations(dev, exec, &acquire_ctx);
 fail:
 	v3d_exec_put(exec);
@@ -615,6 +616,7 @@ v3d_gem_init(struct drm_device *dev)
 	spin_lock_init(&v3d->job_lock);
 	mutex_init(&v3d->bo_lock);
 	mutex_init(&v3d->reset_lock);
+	mutex_init(&v3d->sched_lock);
 
 	/* Note: We don't allocate address 0.  Various bits of HW
 	 * treat 0 as special, such as the occlusion query counters
@@ -650,17 +652,14 @@ void
 v3d_gem_destroy(struct drm_device *dev)
 {
 	struct v3d_dev *v3d = to_v3d_dev(dev);
-	enum v3d_queue q;
 
 	v3d_sched_fini(v3d);
 
 	/* Waiting for exec to finish would need to be done before
 	 * unregistering V3D.
 	 */
-	for (q = 0; q < V3D_MAX_QUEUES; q++) {
-		WARN_ON(v3d->queue[q].emit_seqno !=
-			v3d->queue[q].finished_seqno);
-	}
+	WARN_ON(v3d->bin_job);
+	WARN_ON(v3d->render_job);
 
 	drm_mm_takedown(&v3d->mm);
 
diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
index 77e1fa046c10..e07514eb11b5 100644
--- a/drivers/gpu/drm/v3d/v3d_irq.c
+++ b/drivers/gpu/drm/v3d/v3d_irq.c
@@ -87,15 +87,12 @@ v3d_irq(int irq, void *arg)
 	}
 
 	if (intsts & V3D_INT_FLDONE) {
-		v3d->queue[V3D_BIN].finished_seqno++;
 		dma_fence_signal(v3d->bin_job->bin.done_fence);
 		status = IRQ_HANDLED;
 	}
 
 	if (intsts & V3D_INT_FRDONE) {
-		v3d->queue[V3D_RENDER].finished_seqno++;
 		dma_fence_signal(v3d->render_job->render.done_fence);
-
 		status = IRQ_HANDLED;
 	}
 
diff --git a/drivers/gpu/drm/v3d/v3d_regs.h b/drivers/gpu/drm/v3d/v3d_regs.h
index fc13282dfc2f..854046565989 100644
--- a/drivers/gpu/drm/v3d/v3d_regs.h
+++ b/drivers/gpu/drm/v3d/v3d_regs.h
@@ -222,6 +222,7 @@
 #define V3D_CLE_CTNCA(n) (V3D_CLE_CT0CA + 4 * n)
 #define V3D_CLE_CT0RA                                  0x00118
 #define V3D_CLE_CT1RA                                  0x0011c
+#define V3D_CLE_CTNRA(n) (V3D_CLE_CT0RA + 4 * n)
 #define V3D_CLE_CT0LC                                  0x00120
 #define V3D_CLE_CT1LC                                  0x00124
 #define V3D_CLE_CT0PC                                  0x00128
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index b07bece9417d..a5501581d96b 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -14,8 +14,8 @@
  * to the HW only when it has completed the last one, instead of
  * filling up the CT[01]Q FIFOs with jobs.  Similarly, we use
  * v3d_job_dependency() to manage the dependency between bin and
- * render, instead of having the clients submit jobs with using the
- * HW's semaphores to interlock between them.
+ * render, instead of having the clients submit jobs using the HW's
+ * semaphores to interlock between them.
  */
 
 #include <linux/kthread.h>
@@ -114,8 +114,8 @@ static struct dma_fence *v3d_job_run(struct drm_sched_job *sched_job)
 	v3d_invalidate_caches(v3d);
 
 	fence = v3d_fence_create(v3d, q);
-	if (!fence)
-		return fence;
+	if (IS_ERR(fence))
+		return NULL;
 
 	if (job->done_fence)
 		dma_fence_put(job->done_fence);
@@ -153,7 +153,25 @@ v3d_job_timedout(struct drm_sched_job *sched_job)
 	struct v3d_job *job = to_v3d_job(sched_job);
 	struct v3d_exec_info *exec = job->exec;
 	struct v3d_dev *v3d = exec->v3d;
+	enum v3d_queue job_q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
 	enum v3d_queue q;
+	u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(job_q));
+	u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(job_q));
+
+	/* If the current address or return address have changed, then
+	 * the GPU has probably made progress and we should delay the
+	 * reset.  This could fail if the GPU got in an infinite loop
+	 * in the CL, but that is pretty unlikely outside of an i-g-t
+	 * testcase.
+	 */
+	if (job->timedout_ctca != ctca || job->timedout_ctra != ctra) {
+		job->timedout_ctca = ctca;
+		job->timedout_ctra = ctra;
+
+		schedule_delayed_work(&job->base.work_tdr,
+				      job->base.sched->timeout);
+		return;
+	}
 
 	mutex_lock(&v3d->reset_lock);