summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/v3d
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@mellanox.com>2018-08-16 23:13:03 +0300
committerJason Gunthorpe <jgg@mellanox.com>2018-08-16 23:21:29 +0300
commit0a3173a5f09bc58a3638ecfd0a80bdbae55e123c (patch)
treed6c0bc84863cca54dfbde3b7463e5d49c82af9f1 /drivers/gpu/drm/v3d
parent92f4e77c85918eab5e5803d7e28ab89a7e6bd3a2 (diff)
parent5c60a7389d795e001c8748b458eb76e3a5b6008c (diff)
downloadlinux-0a3173a5f09bc58a3638ecfd0a80bdbae55e123c.tar.xz
Merge branch 'linus/master' into rdma.git for-next
rdma.git merge resolution for the 4.19 merge window Conflicts: drivers/infiniband/core/rdma_core.c - Use the rdma code and revise with the new spelling for atomic_fetch_add_unless drivers/nvme/host/rdma.c - Replace max_sge with max_send_sge in new blk code drivers/nvme/target/rdma.c - Use the blk code and revise to use NULL for ib_post_recv when appropriate - Replace max_sge with max_recv_sge in new blk code net/rds/ib_send.c - Use the net code and revise to use NULL for ib_post_recv when appropriate Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Diffstat (limited to 'drivers/gpu/drm/v3d')
-rw-r--r--drivers/gpu/drm/v3d/v3d_bo.c28
-rw-r--r--drivers/gpu/drm/v3d/v3d_drv.c11
-rw-r--r--drivers/gpu/drm/v3d/v3d_drv.h11
-rw-r--r--drivers/gpu/drm/v3d/v3d_fence.c17
-rw-r--r--drivers/gpu/drm/v3d/v3d_gem.c13
-rw-r--r--drivers/gpu/drm/v3d/v3d_irq.c3
-rw-r--r--drivers/gpu/drm/v3d/v3d_regs.h1
-rw-r--r--drivers/gpu/drm/v3d/v3d_sched.c26
8 files changed, 47 insertions, 63 deletions
diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c
index 7b1e2a549a71..54d96518a131 100644
--- a/drivers/gpu/drm/v3d/v3d_bo.c
+++ b/drivers/gpu/drm/v3d/v3d_bo.c
@@ -227,37 +227,19 @@ v3d_set_mmap_vma_flags(struct vm_area_struct *vma)
vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
}
-int v3d_gem_fault(struct vm_fault *vmf)
+vm_fault_t v3d_gem_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct drm_gem_object *obj = vma->vm_private_data;
struct v3d_bo *bo = to_v3d_bo(obj);
- unsigned long pfn;
+ pfn_t pfn;
pgoff_t pgoff;
- int ret;
/* We don't use vmf->pgoff since that has the fake offset: */
pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
- pfn = page_to_pfn(bo->pages[pgoff]);
-
- ret = vm_insert_mixed(vma, vmf->address, __pfn_to_pfn_t(pfn, PFN_DEV));
-
- switch (ret) {
- case -EAGAIN:
- case 0:
- case -ERESTARTSYS:
- case -EINTR:
- case -EBUSY:
- /*
- * EBUSY is ok: this just means that another thread
- * already did the job.
- */
- return VM_FAULT_NOPAGE;
- case -ENOMEM:
- return VM_FAULT_OOM;
- default:
- return VM_FAULT_SIGBUS;
- }
+ pfn = __pfn_to_pfn_t(page_to_pfn(bo->pages[pgoff]), PFN_DEV);
+
+ return vmf_insert_mixed(vma, vmf->address, pfn);
}
int v3d_mmap(struct file *filp, struct vm_area_struct *vma)
diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
index cdb582043b4f..2a85fa68ffea 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.c
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -123,6 +123,7 @@ v3d_open(struct drm_device *dev, struct drm_file *file)
{
struct v3d_dev *v3d = to_v3d_dev(dev);
struct v3d_file_priv *v3d_priv;
+ struct drm_sched_rq *rq;
int i;
v3d_priv = kzalloc(sizeof(*v3d_priv), GFP_KERNEL);
@@ -132,10 +133,8 @@ v3d_open(struct drm_device *dev, struct drm_file *file)
v3d_priv->v3d = v3d;
for (i = 0; i < V3D_MAX_QUEUES; i++) {
- drm_sched_entity_init(&v3d->queue[i].sched,
- &v3d_priv->sched_entity[i],
- &v3d->queue[i].sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL],
- NULL);
+ rq = &v3d->queue[i].sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
+ drm_sched_entity_init(&v3d_priv->sched_entity[i], &rq, 1, NULL);
}
file->driver_priv = v3d_priv;
@@ -146,13 +145,11 @@ v3d_open(struct drm_device *dev, struct drm_file *file)
static void
v3d_postclose(struct drm_device *dev, struct drm_file *file)
{
- struct v3d_dev *v3d = to_v3d_dev(dev);
struct v3d_file_priv *v3d_priv = file->driver_priv;
enum v3d_queue q;
for (q = 0; q < V3D_MAX_QUEUES; q++) {
- drm_sched_entity_fini(&v3d->queue[q].sched,
- &v3d_priv->sched_entity[q]);
+ drm_sched_entity_destroy(&v3d_priv->sched_entity[q]);
}
kfree(v3d_priv);
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index a043ac3aae98..e6fed696ad86 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -2,6 +2,7 @@
/* Copyright (C) 2015-2018 Broadcom */
#include <linux/reservation.h>
+#include <linux/mm_types.h>
#include <drm/drmP.h>
#include <drm/drm_encoder.h>
#include <drm/drm_gem.h>
@@ -25,7 +26,6 @@ struct v3d_queue_state {
u64 fence_context;
u64 emit_seqno;
- u64 finished_seqno;
};
struct v3d_dev {
@@ -85,6 +85,11 @@ struct v3d_dev {
*/
struct mutex reset_lock;
+ /* Lock taken when creating and pushing the GPU scheduler
+ * jobs, to keep the sched-fence seqnos in order.
+ */
+ struct mutex sched_lock;
+
struct {
u32 num_allocated;
u32 pages_allocated;
@@ -179,6 +184,8 @@ struct v3d_job {
/* GPU virtual addresses of the start/end of the CL job. */
u32 start, end;
+
+ u32 timedout_ctca, timedout_ctra;
};
struct v3d_exec_info {
@@ -248,7 +255,7 @@ int v3d_mmap_bo_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
int v3d_get_bo_offset_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
-int v3d_gem_fault(struct vm_fault *vmf);
+vm_fault_t v3d_gem_fault(struct vm_fault *vmf);
int v3d_mmap(struct file *filp, struct vm_area_struct *vma);
struct reservation_object *v3d_prime_res_obj(struct drm_gem_object *obj);
int v3d_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
diff --git a/drivers/gpu/drm/v3d/v3d_fence.c b/drivers/gpu/drm/v3d/v3d_fence.c
index 087d49c8cb12..50bfcf9a8a1a 100644
--- a/drivers/gpu/drm/v3d/v3d_fence.c
+++ b/drivers/gpu/drm/v3d/v3d_fence.c
@@ -35,24 +35,7 @@ static const char *v3d_fence_get_timeline_name(struct dma_fence *fence)
return "v3d-render";
}
-static bool v3d_fence_enable_signaling(struct dma_fence *fence)
-{
- return true;
-}
-
-static bool v3d_fence_signaled(struct dma_fence *fence)
-{
- struct v3d_fence *f = to_v3d_fence(fence);
- struct v3d_dev *v3d = to_v3d_dev(f->dev);
-
- return v3d->queue[f->queue].finished_seqno >= f->seqno;
-}
-
const struct dma_fence_ops v3d_fence_ops = {
.get_driver_name = v3d_fence_get_driver_name,
.get_timeline_name = v3d_fence_get_timeline_name,
- .enable_signaling = v3d_fence_enable_signaling,
- .signaled = v3d_fence_signaled,
- .wait = dma_fence_default_wait,
- .release = dma_fence_free,
};
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index b513f9189caf..5ce24098a5fd 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -550,9 +550,9 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
if (ret)
goto fail;
+ mutex_lock(&v3d->sched_lock);
if (exec->bin.start != exec->bin.end) {
ret = drm_sched_job_init(&exec->bin.base,
- &v3d->queue[V3D_BIN].sched,
&v3d_priv->sched_entity[V3D_BIN],
v3d_priv);
if (ret)
@@ -567,7 +567,6 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
}
ret = drm_sched_job_init(&exec->render.base,
- &v3d->queue[V3D_RENDER].sched,
&v3d_priv->sched_entity[V3D_RENDER],
v3d_priv);
if (ret)
@@ -576,6 +575,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
kref_get(&exec->refcount); /* put by scheduler job completion */
drm_sched_entity_push_job(&exec->render.base,
&v3d_priv->sched_entity[V3D_RENDER]);
+ mutex_unlock(&v3d->sched_lock);
v3d_attach_object_fences(exec);
@@ -594,6 +594,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
return 0;
fail_unreserve:
+ mutex_unlock(&v3d->sched_lock);
v3d_unlock_bo_reservations(dev, exec, &acquire_ctx);
fail:
v3d_exec_put(exec);
@@ -615,6 +616,7 @@ v3d_gem_init(struct drm_device *dev)
spin_lock_init(&v3d->job_lock);
mutex_init(&v3d->bo_lock);
mutex_init(&v3d->reset_lock);
+ mutex_init(&v3d->sched_lock);
/* Note: We don't allocate address 0. Various bits of HW
* treat 0 as special, such as the occlusion query counters
@@ -650,17 +652,14 @@ void
v3d_gem_destroy(struct drm_device *dev)
{
struct v3d_dev *v3d = to_v3d_dev(dev);
- enum v3d_queue q;
v3d_sched_fini(v3d);
/* Waiting for exec to finish would need to be done before
* unregistering V3D.
*/
- for (q = 0; q < V3D_MAX_QUEUES; q++) {
- WARN_ON(v3d->queue[q].emit_seqno !=
- v3d->queue[q].finished_seqno);
- }
+ WARN_ON(v3d->bin_job);
+ WARN_ON(v3d->render_job);
drm_mm_takedown(&v3d->mm);
diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
index 77e1fa046c10..e07514eb11b5 100644
--- a/drivers/gpu/drm/v3d/v3d_irq.c
+++ b/drivers/gpu/drm/v3d/v3d_irq.c
@@ -87,15 +87,12 @@ v3d_irq(int irq, void *arg)
}
if (intsts & V3D_INT_FLDONE) {
- v3d->queue[V3D_BIN].finished_seqno++;
dma_fence_signal(v3d->bin_job->bin.done_fence);
status = IRQ_HANDLED;
}
if (intsts & V3D_INT_FRDONE) {
- v3d->queue[V3D_RENDER].finished_seqno++;
dma_fence_signal(v3d->render_job->render.done_fence);
-
status = IRQ_HANDLED;
}
diff --git a/drivers/gpu/drm/v3d/v3d_regs.h b/drivers/gpu/drm/v3d/v3d_regs.h
index fc13282dfc2f..854046565989 100644
--- a/drivers/gpu/drm/v3d/v3d_regs.h
+++ b/drivers/gpu/drm/v3d/v3d_regs.h
@@ -222,6 +222,7 @@
#define V3D_CLE_CTNCA(n) (V3D_CLE_CT0CA + 4 * n)
#define V3D_CLE_CT0RA 0x00118
#define V3D_CLE_CT1RA 0x0011c
+#define V3D_CLE_CTNRA(n) (V3D_CLE_CT0RA + 4 * n)
#define V3D_CLE_CT0LC 0x00120
#define V3D_CLE_CT1LC 0x00124
#define V3D_CLE_CT0PC 0x00128
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index b07bece9417d..a5501581d96b 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -14,8 +14,8 @@
* to the HW only when it has completed the last one, instead of
* filling up the CT[01]Q FIFOs with jobs. Similarly, we use
* v3d_job_dependency() to manage the dependency between bin and
- * render, instead of having the clients submit jobs with using the
- * HW's semaphores to interlock between them.
+ * render, instead of having the clients submit jobs using the HW's
+ * semaphores to interlock between them.
*/
#include <linux/kthread.h>
@@ -114,8 +114,8 @@ static struct dma_fence *v3d_job_run(struct drm_sched_job *sched_job)
v3d_invalidate_caches(v3d);
fence = v3d_fence_create(v3d, q);
- if (!fence)
- return fence;
+ if (IS_ERR(fence))
+ return NULL;
if (job->done_fence)
dma_fence_put(job->done_fence);
@@ -153,7 +153,25 @@ v3d_job_timedout(struct drm_sched_job *sched_job)
struct v3d_job *job = to_v3d_job(sched_job);
struct v3d_exec_info *exec = job->exec;
struct v3d_dev *v3d = exec->v3d;
+ enum v3d_queue job_q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
enum v3d_queue q;
+ u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(job_q));
+ u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(job_q));
+
+ /* If the current address or return address have changed, then
+ * the GPU has probably made progress and we should delay the
+ * reset. This could fail if the GPU got in an infinite loop
+ * in the CL, but that is pretty unlikely outside of an i-g-t
+ * testcase.
+ */
+ if (job->timedout_ctca != ctca || job->timedout_ctra != ctra) {
+ job->timedout_ctca = ctca;
+ job->timedout_ctra = ctra;
+
+ schedule_delayed_work(&job->base.work_tdr,
+ job->base.sched->timeout);
+ return;
+ }
mutex_lock(&v3d->reset_lock);