diff options
author | Jason Gunthorpe <jgg@mellanox.com> | 2018-08-16 23:13:03 +0300 |
---|---|---|
committer | Jason Gunthorpe <jgg@mellanox.com> | 2018-08-16 23:21:29 +0300 |
commit | 0a3173a5f09bc58a3638ecfd0a80bdbae55e123c (patch) | |
tree | d6c0bc84863cca54dfbde3b7463e5d49c82af9f1 /drivers/gpu/drm/v3d | |
parent | 92f4e77c85918eab5e5803d7e28ab89a7e6bd3a2 (diff) | |
parent | 5c60a7389d795e001c8748b458eb76e3a5b6008c (diff) | |
download | linux-0a3173a5f09bc58a3638ecfd0a80bdbae55e123c.tar.xz |
Merge branch 'linus/master' into rdma.git for-next
rdma.git merge resolution for the 4.19 merge window
Conflicts:
drivers/infiniband/core/rdma_core.c
- Use the rdma code and revise with the new spelling for
atomic_fetch_add_unless
drivers/nvme/host/rdma.c
- Replace max_sge with max_send_sge in new blk code
drivers/nvme/target/rdma.c
- Use the blk code and revise to use NULL for ib_post_recv when
appropriate
- Replace max_sge with max_recv_sge in new blk code
net/rds/ib_send.c
- Use the net code and revise to use NULL for ib_post_recv when
appropriate
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Diffstat (limited to 'drivers/gpu/drm/v3d')
-rw-r--r-- | drivers/gpu/drm/v3d/v3d_bo.c | 28 | ||||
-rw-r--r-- | drivers/gpu/drm/v3d/v3d_drv.c | 11 | ||||
-rw-r--r-- | drivers/gpu/drm/v3d/v3d_drv.h | 11 | ||||
-rw-r--r-- | drivers/gpu/drm/v3d/v3d_fence.c | 17 | ||||
-rw-r--r-- | drivers/gpu/drm/v3d/v3d_gem.c | 13 | ||||
-rw-r--r-- | drivers/gpu/drm/v3d/v3d_irq.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/v3d/v3d_regs.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/v3d/v3d_sched.c | 26 |
8 files changed, 47 insertions, 63 deletions
diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c index 7b1e2a549a71..54d96518a131 100644 --- a/drivers/gpu/drm/v3d/v3d_bo.c +++ b/drivers/gpu/drm/v3d/v3d_bo.c @@ -227,37 +227,19 @@ v3d_set_mmap_vma_flags(struct vm_area_struct *vma) vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); } -int v3d_gem_fault(struct vm_fault *vmf) +vm_fault_t v3d_gem_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct drm_gem_object *obj = vma->vm_private_data; struct v3d_bo *bo = to_v3d_bo(obj); - unsigned long pfn; + pfn_t pfn; pgoff_t pgoff; - int ret; /* We don't use vmf->pgoff since that has the fake offset: */ pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT; - pfn = page_to_pfn(bo->pages[pgoff]); - - ret = vm_insert_mixed(vma, vmf->address, __pfn_to_pfn_t(pfn, PFN_DEV)); - - switch (ret) { - case -EAGAIN: - case 0: - case -ERESTARTSYS: - case -EINTR: - case -EBUSY: - /* - * EBUSY is ok: this just means that another thread - * already did the job. - */ - return VM_FAULT_NOPAGE; - case -ENOMEM: - return VM_FAULT_OOM; - default: - return VM_FAULT_SIGBUS; - } + pfn = __pfn_to_pfn_t(page_to_pfn(bo->pages[pgoff]), PFN_DEV); + + return vmf_insert_mixed(vma, vmf->address, pfn); } int v3d_mmap(struct file *filp, struct vm_area_struct *vma) diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index cdb582043b4f..2a85fa68ffea 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -123,6 +123,7 @@ v3d_open(struct drm_device *dev, struct drm_file *file) { struct v3d_dev *v3d = to_v3d_dev(dev); struct v3d_file_priv *v3d_priv; + struct drm_sched_rq *rq; int i; v3d_priv = kzalloc(sizeof(*v3d_priv), GFP_KERNEL); @@ -132,10 +133,8 @@ v3d_open(struct drm_device *dev, struct drm_file *file) v3d_priv->v3d = v3d; for (i = 0; i < V3D_MAX_QUEUES; i++) { - drm_sched_entity_init(&v3d->queue[i].sched, - &v3d_priv->sched_entity[i], - &v3d->queue[i].sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL], - NULL); + rq = &v3d->queue[i].sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; + drm_sched_entity_init(&v3d_priv->sched_entity[i], &rq, 1, NULL); } file->driver_priv = v3d_priv; @@ -146,13 +145,11 @@ v3d_open(struct drm_device *dev, struct drm_file *file) static void v3d_postclose(struct drm_device *dev, struct drm_file *file) { - struct v3d_dev *v3d = to_v3d_dev(dev); struct v3d_file_priv *v3d_priv = file->driver_priv; enum v3d_queue q; for (q = 0; q < V3D_MAX_QUEUES; q++) { - drm_sched_entity_fini(&v3d->queue[q].sched, - &v3d_priv->sched_entity[q]); + drm_sched_entity_destroy(&v3d_priv->sched_entity[q]); } kfree(v3d_priv); diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index a043ac3aae98..e6fed696ad86 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -2,6 +2,7 @@ /* Copyright (C) 2015-2018 Broadcom */ #include <linux/reservation.h> +#include <linux/mm_types.h> #include <drm/drmP.h> #include <drm/drm_encoder.h> #include <drm/drm_gem.h> @@ -25,7 +26,6 @@ struct v3d_queue_state { u64 fence_context; u64 emit_seqno; - u64 finished_seqno; }; struct v3d_dev { @@ -85,6 +85,11 @@ struct v3d_dev { */ struct mutex reset_lock; + /* Lock taken when creating and pushing the GPU scheduler + * jobs, to keep the sched-fence seqnos in order. + */ + struct mutex sched_lock; + struct { u32 num_allocated; u32 pages_allocated; @@ -179,6 +184,8 @@ struct v3d_job { /* GPU virtual addresses of the start/end of the CL job. */ u32 start, end; + + u32 timedout_ctca, timedout_ctra; }; struct v3d_exec_info { @@ -248,7 +255,7 @@ int v3d_mmap_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int v3d_get_bo_offset_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); -int v3d_gem_fault(struct vm_fault *vmf); +vm_fault_t v3d_gem_fault(struct vm_fault *vmf); int v3d_mmap(struct file *filp, struct vm_area_struct *vma); struct reservation_object *v3d_prime_res_obj(struct drm_gem_object *obj); int v3d_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); diff --git a/drivers/gpu/drm/v3d/v3d_fence.c b/drivers/gpu/drm/v3d/v3d_fence.c index 087d49c8cb12..50bfcf9a8a1a 100644 --- a/drivers/gpu/drm/v3d/v3d_fence.c +++ b/drivers/gpu/drm/v3d/v3d_fence.c @@ -35,24 +35,7 @@ static const char *v3d_fence_get_timeline_name(struct dma_fence *fence) return "v3d-render"; } -static bool v3d_fence_enable_signaling(struct dma_fence *fence) -{ - return true; -} - -static bool v3d_fence_signaled(struct dma_fence *fence) -{ - struct v3d_fence *f = to_v3d_fence(fence); - struct v3d_dev *v3d = to_v3d_dev(f->dev); - - return v3d->queue[f->queue].finished_seqno >= f->seqno; -} - const struct dma_fence_ops v3d_fence_ops = { .get_driver_name = v3d_fence_get_driver_name, .get_timeline_name = v3d_fence_get_timeline_name, - .enable_signaling = v3d_fence_enable_signaling, - .signaled = v3d_fence_signaled, - .wait = dma_fence_default_wait, - .release = dma_fence_free, }; diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index b513f9189caf..5ce24098a5fd 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -550,9 +550,9 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, if (ret) goto fail; + mutex_lock(&v3d->sched_lock); if (exec->bin.start != exec->bin.end) { ret = drm_sched_job_init(&exec->bin.base, - &v3d->queue[V3D_BIN].sched, &v3d_priv->sched_entity[V3D_BIN], v3d_priv); if (ret) @@ -567,7 +567,6 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, } ret = drm_sched_job_init(&exec->render.base, - &v3d->queue[V3D_RENDER].sched, &v3d_priv->sched_entity[V3D_RENDER], v3d_priv); if (ret) @@ -576,6 +575,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, kref_get(&exec->refcount); /* put by scheduler job completion */ drm_sched_entity_push_job(&exec->render.base, &v3d_priv->sched_entity[V3D_RENDER]); + mutex_unlock(&v3d->sched_lock); v3d_attach_object_fences(exec); @@ -594,6 +594,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, return 0; fail_unreserve: + mutex_unlock(&v3d->sched_lock); v3d_unlock_bo_reservations(dev, exec, &acquire_ctx); fail: v3d_exec_put(exec); @@ -615,6 +616,7 @@ v3d_gem_init(struct drm_device *dev) spin_lock_init(&v3d->job_lock); mutex_init(&v3d->bo_lock); mutex_init(&v3d->reset_lock); + mutex_init(&v3d->sched_lock); /* Note: We don't allocate address 0. Various bits of HW * treat 0 as special, such as the occlusion query counters @@ -650,17 +652,14 @@ void v3d_gem_destroy(struct drm_device *dev) { struct v3d_dev *v3d = to_v3d_dev(dev); - enum v3d_queue q; v3d_sched_fini(v3d); /* Waiting for exec to finish would need to be done before * unregistering V3D. */ - for (q = 0; q < V3D_MAX_QUEUES; q++) { - WARN_ON(v3d->queue[q].emit_seqno != - v3d->queue[q].finished_seqno); - } + WARN_ON(v3d->bin_job); + WARN_ON(v3d->render_job); drm_mm_takedown(&v3d->mm); diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c index 77e1fa046c10..e07514eb11b5 100644 --- a/drivers/gpu/drm/v3d/v3d_irq.c +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -87,15 +87,12 @@ v3d_irq(int irq, void *arg) } if (intsts & V3D_INT_FLDONE) { - v3d->queue[V3D_BIN].finished_seqno++; dma_fence_signal(v3d->bin_job->bin.done_fence); status = IRQ_HANDLED; } if (intsts & V3D_INT_FRDONE) { - v3d->queue[V3D_RENDER].finished_seqno++; dma_fence_signal(v3d->render_job->render.done_fence); - status = IRQ_HANDLED; } diff --git a/drivers/gpu/drm/v3d/v3d_regs.h b/drivers/gpu/drm/v3d/v3d_regs.h index fc13282dfc2f..854046565989 100644 --- a/drivers/gpu/drm/v3d/v3d_regs.h +++ b/drivers/gpu/drm/v3d/v3d_regs.h @@ -222,6 +222,7 @@ #define V3D_CLE_CTNCA(n) (V3D_CLE_CT0CA + 4 * n) #define V3D_CLE_CT0RA 0x00118 #define V3D_CLE_CT1RA 0x0011c +#define V3D_CLE_CTNRA(n) (V3D_CLE_CT0RA + 4 * n) #define V3D_CLE_CT0LC 0x00120 #define V3D_CLE_CT1LC 0x00124 #define V3D_CLE_CT0PC 0x00128 diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index b07bece9417d..a5501581d96b 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -14,8 +14,8 @@ * to the HW only when it has completed the last one, instead of * filling up the CT[01]Q FIFOs with jobs. Similarly, we use * v3d_job_dependency() to manage the dependency between bin and - * render, instead of having the clients submit jobs with using the - * HW's semaphores to interlock between them. + * render, instead of having the clients submit jobs using the HW's + * semaphores to interlock between them. */ #include <linux/kthread.h> @@ -114,8 +114,8 @@ static struct dma_fence *v3d_job_run(struct drm_sched_job *sched_job) v3d_invalidate_caches(v3d); fence = v3d_fence_create(v3d, q); - if (!fence) - return fence; + if (IS_ERR(fence)) + return NULL; if (job->done_fence) dma_fence_put(job->done_fence); @@ -153,7 +153,25 @@ v3d_job_timedout(struct drm_sched_job *sched_job) struct v3d_job *job = to_v3d_job(sched_job); struct v3d_exec_info *exec = job->exec; struct v3d_dev *v3d = exec->v3d; + enum v3d_queue job_q = job == &exec->bin ? V3D_BIN : V3D_RENDER; enum v3d_queue q; + u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(job_q)); + u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(job_q)); + + /* If the current address or return address have changed, then + * the GPU has probably made progress and we should delay the + * reset. This could fail if the GPU got in an infinite loop + * in the CL, but that is pretty unlikely outside of an i-g-t + * testcase. + */ + if (job->timedout_ctca != ctca || job->timedout_ctra != ctra) { + job->timedout_ctca = ctca; + job->timedout_ctra = ctra; + + schedule_delayed_work(&job->base.work_tdr, + job->base.sched->timeout); + return; + } mutex_lock(&v3d->reset_lock); |