diff options
Diffstat (limited to 'drivers/gpu/drm/msm')
26 files changed, 929 insertions, 567 deletions
diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index 52536e7adb95..dc7f3e40850b 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -14,6 +14,7 @@ config DRM_MSM select REGULATOR select DRM_KMS_HELPER select DRM_PANEL + select DRM_SCHED select SHMEM select TMPFS select QCOM_SCM if ARCH_QCOM diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile index 2c00aa70b708..904535eda0c4 100644 --- a/drivers/gpu/drm/msm/Makefile +++ b/drivers/gpu/drm/msm/Makefile @@ -90,6 +90,7 @@ msm-y := \ msm_gem_submit.o \ msm_gem_vma.o \ msm_gpu.o \ + msm_gpu_devfreq.o \ msm_iommu.o \ msm_perf.o \ msm_rd.o \ diff --git a/drivers/gpu/drm/msm/adreno/a5xx_debugfs.c b/drivers/gpu/drm/msm/adreno/a5xx_debugfs.c index fc2c905b6c9e..c9d11d57aed6 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_debugfs.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_debugfs.c @@ -117,13 +117,13 @@ reset_set(void *data, u64 val) if (a5xx_gpu->pm4_bo) { msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace); - drm_gem_object_put_locked(a5xx_gpu->pm4_bo); + drm_gem_object_put(a5xx_gpu->pm4_bo); a5xx_gpu->pm4_bo = NULL; } if (a5xx_gpu->pfp_bo) { msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace); - drm_gem_object_put_locked(a5xx_gpu->pfp_bo); + drm_gem_object_put(a5xx_gpu->pfp_bo); a5xx_gpu->pfp_bo = NULL; } diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 7a271de9a212..0a93ed1d6b06 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -1415,7 +1415,7 @@ struct a5xx_gpu_state { static int a5xx_crashdumper_init(struct msm_gpu *gpu, struct a5xx_crashdumper *dumper) { - dumper->ptr = msm_gem_kernel_new_locked(gpu->dev, + dumper->ptr = msm_gem_kernel_new(gpu->dev, SZ_1M, MSM_BO_WC, gpu->aspace, &dumper->bo, &dumper->iova); @@ -1517,7 +1517,7 @@ static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu, if (a5xx_crashdumper_run(gpu, &dumper)) { kfree(a5xx_state->hlsqregs); - msm_gem_kernel_put(dumper.bo, gpu->aspace, true); + msm_gem_kernel_put(dumper.bo, gpu->aspace); return; } @@ -1525,7 +1525,7 @@ static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu, memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K), count * sizeof(u32)); - msm_gem_kernel_put(dumper.bo, gpu->aspace, true); + msm_gem_kernel_put(dumper.bo, gpu->aspace); } static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_power.c b/drivers/gpu/drm/msm/adreno/a5xx_power.c index cdb165236a88..0e63a1429189 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_power.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_power.c @@ -362,7 +362,7 @@ void a5xx_gpmu_ucode_init(struct msm_gpu *gpu) */ bosize = (cmds_size + (cmds_size / TYPE4_MAX_PAYLOAD) + 1) << 2; - ptr = msm_gem_kernel_new_locked(drm, bosize, + ptr = msm_gem_kernel_new(drm, bosize, MSM_BO_WC | MSM_BO_GPU_READONLY, gpu->aspace, &a5xx_gpu->gpmu_bo, &a5xx_gpu->gpmu_iova); if (IS_ERR(ptr)) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c index ee72510ff8ce..8abc9a2b114a 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c @@ -240,7 +240,7 @@ static int preempt_init_ring(struct a5xx_gpu *a5xx_gpu, A5XX_PREEMPT_COUNTER_SIZE, MSM_BO_WC, gpu->aspace, &counters_bo, &counters_iova); if (IS_ERR(counters)) { - msm_gem_kernel_put(bo, gpu->aspace, true); + msm_gem_kernel_put(bo, gpu->aspace); return PTR_ERR(counters); } @@ -272,9 +272,8 @@ void a5xx_preempt_fini(struct msm_gpu *gpu) int i; for (i = 0; i < gpu->nr_rings; i++) { - msm_gem_kernel_put(a5xx_gpu->preempt_bo[i], gpu->aspace, true); - msm_gem_kernel_put(a5xx_gpu->preempt_counters_bo[i], - gpu->aspace, true); + msm_gem_kernel_put(a5xx_gpu->preempt_bo[i], gpu->aspace); + msm_gem_kernel_put(a5xx_gpu->preempt_counters_bo[i], gpu->aspace); } } diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index b349692219b7..d7cec7f0dde0 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -1129,12 +1129,12 @@ int a6xx_gmu_stop(struct a6xx_gpu *a6xx_gpu) static void a6xx_gmu_memory_free(struct a6xx_gmu *gmu) { - msm_gem_kernel_put(gmu->hfi.obj, gmu->aspace, false); - msm_gem_kernel_put(gmu->debug.obj, gmu->aspace, false); - msm_gem_kernel_put(gmu->icache.obj, gmu->aspace, false); - msm_gem_kernel_put(gmu->dcache.obj, gmu->aspace, false); - msm_gem_kernel_put(gmu->dummy.obj, gmu->aspace, false); - msm_gem_kernel_put(gmu->log.obj, gmu->aspace, false); + msm_gem_kernel_put(gmu->hfi.obj, gmu->aspace); + msm_gem_kernel_put(gmu->debug.obj, gmu->aspace); + msm_gem_kernel_put(gmu->icache.obj, gmu->aspace); + msm_gem_kernel_put(gmu->dcache.obj, gmu->aspace); + msm_gem_kernel_put(gmu->dummy.obj, gmu->aspace); + msm_gem_kernel_put(gmu->log.obj, gmu->aspace); gmu->aspace->mmu->funcs->detach(gmu->aspace->mmu); msm_gem_address_space_put(gmu->aspace); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 9c5e4618aa0a..989301230f14 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1035,7 +1035,7 @@ static int a6xx_hw_init(struct msm_gpu *gpu) if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) { if (!a6xx_gpu->shadow_bo) { - a6xx_gpu->shadow = msm_gem_kernel_new_locked(gpu->dev, + a6xx_gpu->shadow = msm_gem_kernel_new(gpu->dev, sizeof(u32) * gpu->nr_rings, MSM_BO_WC | MSM_BO_MAP_PRIV, gpu->aspace, &a6xx_gpu->shadow_bo, @@ -1477,7 +1477,7 @@ static int a6xx_pm_resume(struct msm_gpu *gpu) if (ret) return ret; - msm_gpu_resume_devfreq(gpu); + msm_devfreq_resume(gpu); a6xx_llc_activate(a6xx_gpu); @@ -1494,7 +1494,7 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu) a6xx_llc_deactivate(a6xx_gpu); - devfreq_suspend_device(gpu->devfreq.devfreq); + msm_devfreq_suspend(gpu); ret = a6xx_gmu_stop(a6xx_gpu); if (ret) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index ad4ea0ed5d99..e8f65cd8eca6 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -112,7 +112,7 @@ static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src, static int a6xx_crashdumper_init(struct msm_gpu *gpu, struct a6xx_crashdumper *dumper) { - dumper->ptr = msm_gem_kernel_new_locked(gpu->dev, + dumper->ptr = msm_gem_kernel_new(gpu->dev, SZ_1M, MSM_BO_WC, gpu->aspace, &dumper->bo, &dumper->iova); @@ -961,7 +961,7 @@ struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu) a6xx_get_clusters(gpu, a6xx_state, dumper); a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper); - msm_gem_kernel_put(dumper->bo, gpu->aspace, true); + msm_gem_kernel_put(dumper->bo, gpu->aspace); } if (snapshot_debugbus) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 9f5a30234b33..748665232d29 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -261,8 +261,8 @@ int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) return ret; } return -EINVAL; - case MSM_PARAM_NR_RINGS: - *value = gpu->nr_rings; + case MSM_PARAM_PRIORITIES: + *value = gpu->nr_rings * NR_SCHED_PRIORITIES; return 0; case MSM_PARAM_PP_PGTABLE: *value = 0; @@ -390,7 +390,7 @@ struct drm_gem_object *adreno_fw_create_bo(struct msm_gpu *gpu, struct drm_gem_object *bo; void *ptr; - ptr = msm_gem_kernel_new_locked(gpu->dev, fw->size - 4, + ptr = msm_gem_kernel_new(gpu->dev, fw->size - 4, MSM_BO_WC | MSM_BO_GPU_READONLY, gpu->aspace, &bo, iova); if (IS_ERR(ptr)) diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index ed504fe5074f..b466a4af7c3e 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -849,11 +849,11 @@ static void dsi_ctrl_config(struct msm_dsi_host *msm_host, bool enable, if (flags & MIPI_DSI_MODE_VIDEO) { if (flags & MIPI_DSI_MODE_VIDEO_HSE) data |= DSI_VID_CFG0_PULSE_MODE_HSA_HE; - if (flags & MIPI_DSI_MODE_VIDEO_HFP) + if (flags & MIPI_DSI_MODE_VIDEO_NO_HFP) data |= DSI_VID_CFG0_HFP_POWER_STOP; - if (flags & MIPI_DSI_MODE_VIDEO_HBP) + if (flags & MIPI_DSI_MODE_VIDEO_NO_HBP) data |= DSI_VID_CFG0_HBP_POWER_STOP; - if (flags & MIPI_DSI_MODE_VIDEO_HSA) + if (flags & MIPI_DSI_MODE_VIDEO_NO_HSA) data |= DSI_VID_CFG0_HSA_POWER_STOP; /* Always set low power stop mode for BLLP * to let command engine send packets @@ -908,7 +908,7 @@ static void dsi_ctrl_config(struct msm_dsi_host *msm_host, bool enable, DSI_T_CLK_PRE_EXTEND_INC_BY_2_BYTECLK); data = 0; - if (!(flags & MIPI_DSI_MODE_EOT_PACKET)) + if (!(flags & MIPI_DSI_MODE_NO_EOT_PACKET)) data |= DSI_EOT_PACKET_CTRL_TX_EOT_APPEND; dsi_write(msm_host, REG_DSI_EOT_PACKET_CTRL, data); diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 9b8fa2ad0d84..a332b09a5a11 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -14,7 +14,6 @@ #include <drm/drm_drv.h> #include <drm/drm_file.h> #include <drm/drm_ioctl.h> -#include <drm/drm_irq.h> #include <drm/drm_prime.h> #include <drm/drm_of.h> #include <drm/drm_vblank.h> @@ -201,6 +200,71 @@ void msm_rmw(void __iomem *addr, u32 mask, u32 or) msm_writel(val | or, addr); } +static irqreturn_t msm_irq(int irq, void *arg) +{ + struct drm_device *dev = arg; + struct msm_drm_private *priv = dev->dev_private; + struct msm_kms *kms = priv->kms; + + BUG_ON(!kms); + + return kms->funcs->irq(kms); +} + +static void msm_irq_preinstall(struct drm_device *dev) +{ + struct msm_drm_private *priv = dev->dev_private; + struct msm_kms *kms = priv->kms; + + BUG_ON(!kms); + + kms->funcs->irq_preinstall(kms); +} + +static int msm_irq_postinstall(struct drm_device *dev) +{ + struct msm_drm_private *priv = dev->dev_private; + struct msm_kms *kms = priv->kms; + + BUG_ON(!kms); + + if (kms->funcs->irq_postinstall) + return kms->funcs->irq_postinstall(kms); + + return 0; +} + +static int msm_irq_install(struct drm_device *dev, unsigned int irq) +{ + int ret; + + if (irq == IRQ_NOTCONNECTED) + return -ENOTCONN; + + msm_irq_preinstall(dev); + + ret = request_irq(irq, msm_irq, 0, dev->driver->name, dev); + if (ret) + return ret; + + ret = msm_irq_postinstall(dev); + if (ret) { + free_irq(irq, dev); + return ret; + } + + return 0; +} + +static void msm_irq_uninstall(struct drm_device *dev) +{ + struct msm_drm_private *priv = dev->dev_private; + struct msm_kms *kms = priv->kms; + + kms->funcs->irq_uninstall(kms); + free_irq(kms->irq, dev); +} + struct msm_vblank_work { struct work_struct work; int crtc_id; @@ -265,7 +329,7 @@ static int msm_drm_uninit(struct device *dev) } /* We must cancel and cleanup any pending vblank enable/disable - * work before drm_irq_uninstall() to avoid work re-enabling an + * work before msm_irq_uninstall() to avoid work re-enabling an * irq after uninstall has disabled it. */ @@ -294,7 +358,7 @@ static int msm_drm_uninit(struct device *dev) drm_mode_config_cleanup(ddev); pm_runtime_get_sync(dev); - drm_irq_uninstall(ddev); + msm_irq_uninstall(ddev); pm_runtime_put_sync(dev); if (kms && kms->funcs) @@ -553,7 +617,7 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv) if (kms) { pm_runtime_get_sync(dev); - ret = drm_irq_install(ddev, kms->irq); + ret = msm_irq_install(ddev, kms->irq); pm_runtime_put_sync(dev); if (ret < 0) { DRM_DEV_ERROR(dev, "failed to install IRQ handler\n"); @@ -662,43 +726,6 @@ static void msm_postclose(struct drm_device *dev, struct drm_file *file) context_close(ctx); } -static irqreturn_t msm_irq(int irq, void *arg) -{ - struct drm_device *dev = arg; - struct msm_drm_private *priv = dev->dev_private; - struct msm_kms *kms = priv->kms; - BUG_ON(!kms); - return kms->funcs->irq(kms); -} - -static void msm_irq_preinstall(struct drm_device *dev) -{ - struct msm_drm_private *priv = dev->dev_private; - struct msm_kms *kms = priv->kms; - BUG_ON(!kms); - kms->funcs->irq_preinstall(kms); -} - -static int msm_irq_postinstall(struct drm_device *dev) -{ - struct msm_drm_private *priv = dev->dev_private; - struct msm_kms *kms = priv->kms; - BUG_ON(!kms); - - if (kms->funcs->irq_postinstall) - return kms->funcs->irq_postinstall(kms); - - return 0; -} - -static void msm_irq_uninstall(struct drm_device *dev) -{ - struct msm_drm_private *priv = dev->dev_private; - struct msm_kms *kms = priv->kms; - BUG_ON(!kms); - kms->funcs->irq_uninstall(kms); -} - int msm_crtc_enable_vblank(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; @@ -911,6 +938,7 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data, ktime_t timeout = to_ktime(args->timeout); struct msm_gpu_submitqueue *queue; struct msm_gpu *gpu = priv->gpu; + struct dma_fence *fence; int ret; if (args->pad) { @@ -925,10 +953,35 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data, if (!queue) return -ENOENT; - ret = msm_wait_fence(gpu->rb[queue->prio]->fctx, args->fence, &timeout, - true); + /* + * Map submitqueue scoped "seqno" (which is actually an idr key) + * back to underlying dma-fence + * + * The fence is removed from the fence_idr when the submit is + * retired, so if the fence is not found it means there is nothing + * to wait for + */ + ret = mutex_lock_interruptible(&queue->lock); + if (ret) + return ret; + fence = idr_find(&queue->fence_idr, args->fence); + if (fence) + fence = dma_fence_get_rcu(fence); + mutex_unlock(&queue->lock); + + if (!fence) + return 0; + + ret = dma_fence_wait_timeout(fence, true, timeout_to_jiffies(&timeout)); + if (ret == 0) { + ret = -ETIMEDOUT; + } else if (ret != -ERESTARTSYS) { + ret = 0; + } + dma_fence_put(fence); msm_submitqueue_put(queue); + return ret; } @@ -1025,10 +1078,6 @@ static const struct drm_driver msm_driver = { .open = msm_open, .postclose = msm_postclose, .lastclose = drm_fb_helper_lastclose, - .irq_handler = msm_irq, - .irq_preinstall = msm_irq_preinstall, - .irq_postinstall = msm_irq_postinstall, - .irq_uninstall = msm_irq_uninstall, .dumb_create = msm_gem_dumb_create, .dumb_map_offset = msm_gem_dumb_map_offset, .prime_handle_to_fd = drm_gem_prime_handle_to_fd, diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c index 227404077e39..67fae60f2fa5 100644 --- a/drivers/gpu/drm/msm/msm_fbdev.c +++ b/drivers/gpu/drm/msm/msm_fbdev.c @@ -169,7 +169,7 @@ struct drm_fb_helper *msm_fbdev_init(struct drm_device *dev) } /* the fw fb could be anywhere in memory */ - ret = drm_aperture_remove_framebuffers(false, "msm"); + ret = drm_aperture_remove_framebuffers(false, dev->driver); if (ret) goto fini; diff --git a/drivers/gpu/drm/msm/msm_fence.c b/drivers/gpu/drm/msm/msm_fence.c index cd59a5918038..f2cece542c3f 100644 --- a/drivers/gpu/drm/msm/msm_fence.c +++ b/drivers/gpu/drm/msm/msm_fence.c @@ -11,7 +11,8 @@ struct msm_fence_context * -msm_fence_context_alloc(struct drm_device *dev, const char *name) +msm_fence_context_alloc(struct drm_device *dev, volatile uint32_t *fenceptr, + const char *name) { struct msm_fence_context *fctx; @@ -22,7 +23,7 @@ msm_fence_context_alloc(struct drm_device *dev, const char *name) fctx->dev = dev; strncpy(fctx->name, name, sizeof(fctx->name)); fctx->context = dma_fence_context_alloc(1); - init_waitqueue_head(&fctx->event); + fctx->fenceptr = fenceptr; spin_lock_init(&fctx->spinlock); return fctx; @@ -35,46 +36,12 @@ void msm_fence_context_free(struct msm_fence_context *fctx) static inline bool fence_completed(struct msm_fence_context *fctx, uint32_t fence) { - return (int32_t)(fctx->completed_fence - fence) >= 0; -} - -/* legacy path for WAIT_FENCE ioctl: */ -int msm_wait_fence(struct msm_fence_context *fctx, uint32_t fence, - ktime_t *timeout, bool interruptible) -{ - int ret; - - if (fence > fctx->last_fence) { - DRM_ERROR_RATELIMITED("%s: waiting on invalid fence: %u (of %u)\n", - fctx->name, fence, fctx->last_fence); - return -EINVAL; - } - - if (!timeout) { - /* no-wait: */ - ret = fence_completed(fctx, fence) ? 0 : -EBUSY; - } else { - unsigned long remaining_jiffies = timeout_to_jiffies(timeout); - - if (interruptible) - ret = wait_event_interruptible_timeout(fctx->event, - fence_completed(fctx, fence), - remaining_jiffies); - else - ret = wait_event_timeout(fctx->event, - fence_completed(fctx, fence), - remaining_jiffies); - - if (ret == 0) { - DBG("timeout waiting for fence: %u (completed: %u)", - fence, fctx->completed_fence); - ret = -ETIMEDOUT; - } else if (ret != -ERESTARTSYS) { - ret = 0; - } - } - - return ret; + /* + * Note: Check completed_fence first, as fenceptr is in a write-combine + * mapping, so it will be more expensive to read. + */ + return (int32_t)(fctx->completed_fence - fence) >= 0 || + (int32_t)(*fctx->fenceptr - fence) >= 0; } /* called from workqueue */ @@ -83,8 +50,6 @@ void msm_update_fence(struct msm_fence_context *fctx, uint32_t fence) spin_lock(&fctx->spinlock); fctx->completed_fence = max(fence, fctx->completed_fence); spin_unlock(&fctx->spinlock); - - wake_up_all(&fctx->event); } struct msm_fence { diff --git a/drivers/gpu/drm/msm/msm_fence.h b/drivers/gpu/drm/msm/msm_fence.h index 2d9af66dcca5..4783db528bcc 100644 --- a/drivers/gpu/drm/msm/msm_fence.h +++ b/drivers/gpu/drm/msm/msm_fence.h @@ -9,23 +9,53 @@ #include "msm_drv.h" +/** + * struct msm_fence_context - fence context for gpu + * + * Each ringbuffer has a single fence context, with the GPU writing an + * incrementing fence seqno at the end of each submit + */ struct msm_fence_context { struct drm_device *dev; + /** name: human readable name for fence timeline */ char name[32]; + /** context: see dma_fence_context_alloc() */ unsigned context; - /* last_fence == completed_fence --> no pending work */ - uint32_t last_fence; /* last assigned fence */ - uint32_t completed_fence; /* last completed fence */ - wait_queue_head_t event; + + /** + * last_fence: + * + * Last assigned fence, incremented each time a fence is created + * on this fence context. If last_fence == completed_fence, + * there is no remaining pending work + */ + uint32_t last_fence; + + /** + * completed_fence: + * + * The last completed fence, updated from the CPU after interrupt + * from GPU + */ + uint32_t completed_fence; + + /** + * fenceptr: + * + * The address that the GPU directly writes with completed fence + * seqno. This can be ahead of completed_fence. We can peek at + * this to see if a fence has already signaled but the CPU hasn't + * gotten around to handling the irq and updating completed_fence + */ + volatile uint32_t *fenceptr; + spinlock_t spinlock; }; struct msm_fence_context * msm_fence_context_alloc(struct drm_device *dev, - const char *name); + volatile uint32_t *fenceptr, const char *name); void msm_fence_context_free(struct msm_fence_context *fctx); -int msm_wait_fence(struct msm_fence_context *fctx, uint32_t fence, - ktime_t *timeout, bool interruptible); void msm_update_fence(struct msm_fence_context *fctx, uint32_t fence); struct dma_fence * msm_fence_alloc(struct msm_fence_context *fctx); diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 141178754231..5db07fc287ad 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -131,7 +131,6 @@ static struct page **get_pages(struct drm_gem_object *obj) if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED)) sync_for_device(msm_obj); - GEM_WARN_ON(msm_obj->active_count); update_inactive(msm_obj); } @@ -804,41 +803,6 @@ void msm_gem_vunmap(struct drm_gem_object *obj) msm_obj->vaddr = NULL; } -/* must be called before _move_to_active().. */ -int msm_gem_sync_object(struct drm_gem_object *obj, - struct msm_fence_context *fctx, bool exclusive) -{ - struct dma_resv_list *fobj; - struct dma_fence *fence; - int i, ret; - - fobj = dma_resv_shared_list(obj->resv); - if (!fobj || (fobj->shared_count == 0)) { - fence = dma_resv_excl_fence(obj->resv); - /* don't need to wait on our own fences, since ring is fifo */ - if (fence && (fence->context != fctx->context)) { - ret = dma_fence_wait(fence, true); - if (ret) - return ret; - } - } - - if (!exclusive || !fobj) - return 0; - - for (i = 0; i < fobj->shared_count; i++) { - fence = rcu_dereference_protected(fobj->shared[i], - dma_resv_held(obj->resv)); - if (fence->context != fctx->context) { - ret = dma_fence_wait(fence, true); - if (ret) - return ret; - } - } - - return 0; -} - void msm_gem_active_get(struct drm_gem_object *obj, struct msm_gpu *gpu) { struct msm_gem_object *msm_obj = to_msm_bo(obj); @@ -848,7 +812,6 @@ void msm_gem_active_get(struct drm_gem_object *obj, struct msm_gpu *gpu) GEM_WARN_ON(!msm_gem_is_locked(obj)); GEM_WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED); GEM_WARN_ON(msm_obj->dontneed); - GEM_WARN_ON(!msm_obj->sgt); if (msm_obj->active_count++ == 0) { mutex_lock(&priv->mm_lock); @@ -1062,7 +1025,7 @@ void msm_gem_describe_objects(struct list_head *list, struct seq_file *m) } #endif -/* don't call directly! Use drm_gem_object_put_locked() and friends */ +/* don't call directly! Use drm_gem_object_put() */ void msm_gem_free_object(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj); @@ -1169,7 +1132,7 @@ static int msm_gem_new_impl(struct drm_device *dev, case MSM_BO_CACHED_COHERENT: if (priv->has_cached_coherent) break; - /* fallthrough */ + fallthrough; default: DRM_DEV_ERROR(dev->dev, "invalid cache flag: %x\n", (flags & MSM_BO_CACHE_MASK)); @@ -1183,7 +1146,6 @@ static int msm_gem_new_impl(struct drm_device *dev, msm_obj->flags = flags; msm_obj->madv = MSM_MADV_WILLNEED; - INIT_LIST_HEAD(&msm_obj->submit_entry); INIT_LIST_HEAD(&msm_obj->vmas); *obj = &msm_obj->base; @@ -1192,8 +1154,7 @@ static int msm_gem_new_impl(struct drm_device *dev, return 0; } -static struct drm_gem_object *_msm_gem_new(struct drm_device *dev, - uint32_t size, uint32_t flags, bool struct_mutex_locked) +struct drm_gem_object *msm_gem_new(struct drm_device *dev, uint32_t size, uint32_t flags) { struct msm_drm_private *priv = dev->dev_private; struct msm_gem_object *msm_obj; @@ -1280,26 +1241,10 @@ static struct drm_gem_object *_msm_gem_new(struct drm_device *dev, return obj; fail: - if (struct_mutex_locked) { - drm_gem_object_put_locked(obj); - } else { - drm_gem_object_put(obj); - } + drm_gem_object_put(obj); return ERR_PTR(ret); } -struct drm_gem_object *msm_gem_new_locked(struct drm_device *dev, - uint32_t size, uint32_t flags) -{ - return _msm_gem_new(dev, size, flags, true); -} - -struct drm_gem_object *msm_gem_new(struct drm_device *dev, - uint32_t size, uint32_t flags) -{ - return _msm_gem_new(dev, size, flags, false); -} - struct drm_gem_object *msm_gem_import(struct drm_device *dev, struct dma_buf *dmabuf, struct sg_table *sgt) { @@ -1358,12 +1303,12 @@ fail: return ERR_PTR(ret); } -static void *_msm_gem_kernel_new(struct drm_device *dev, uint32_t size, +void *msm_gem_kernel_new(struct drm_device *dev, uint32_t size, uint32_t flags, struct msm_gem_address_space *aspace, - struct drm_gem_object **bo, uint64_t *iova, bool locked) + struct drm_gem_object **bo, uint64_t *iova) { void *vaddr; - struct drm_gem_object *obj = _msm_gem_new(dev, size, flags, locked); + struct drm_gem_object *obj = msm_gem_new(dev, size, flags); int ret; if (IS_ERR(obj)) @@ -1387,42 +1332,21 @@ static void *_msm_gem_kernel_new(struct drm_device *dev, uint32_t size, return vaddr; err: - if (locked) - drm_gem_object_put_locked(obj); - else - drm_gem_object_put(obj); + drm_gem_object_put(obj); return ERR_PTR(ret); } -void *msm_gem_kernel_new(struct drm_device *dev, uint32_t size, - uint32_t flags, struct msm_gem_address_space *aspace, - struct drm_gem_object **bo, uint64_t *iova) -{ - return _msm_gem_kernel_new(dev, size, flags, aspace, bo, iova, false); -} - -void *msm_gem_kernel_new_locked(struct drm_device *dev, uint32_t size, - uint32_t flags, struct msm_gem_address_space *aspace, - struct drm_gem_object **bo, uint64_t *iova) -{ - return _msm_gem_kernel_new(dev, size, flags, aspace, bo, iova, true); -} - void msm_gem_kernel_put(struct drm_gem_object *bo, - struct msm_gem_address_space *aspace, bool locked) + struct msm_gem_address_space *aspace) { if (IS_ERR_OR_NULL(bo)) return; msm_gem_put_vaddr(bo); msm_gem_unpin_iova(bo, aspace); - - if (locked) - drm_gem_object_put_locked(bo); - else - drm_gem_object_put(bo); + drm_gem_object_put(bo); } void msm_gem_object_set_name(struct drm_gem_object *bo, const char *fmt, ...) diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 405f8411e395..f9e3ffb2309a 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -9,6 +9,7 @@ #include <linux/kref.h> #include <linux/dma-resv.h> +#include "drm/gpu_scheduler.h" #include "msm_drv.h" /* Make all GEM related WARN_ON()s ratelimited.. when things go wrong they @@ -87,13 +88,6 @@ struct msm_gem_object { */ struct list_head mm_list; - /* Transiently in the process of submit ioctl, objects associated - * with the submit are on submit->bo_list.. this only lasts for - * the duration of the ioctl, so one bo can never be on multiple - * submit lists. - */ - struct list_head submit_entry; - struct page **pages; struct sg_table *sgt; void *vaddr; @@ -143,8 +137,6 @@ void *msm_gem_get_vaddr_active(struct drm_gem_object *obj); void msm_gem_put_vaddr_locked(struct drm_gem_object *obj); void msm_gem_put_vaddr(struct drm_gem_object *obj); int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv); -int msm_gem_sync_object(struct drm_gem_object *obj, - struct msm_fence_context *fctx, bool exclusive); void msm_gem_active_get(struct drm_gem_object *obj, struct msm_gpu *gpu); void msm_gem_active_put(struct drm_gem_object *obj); int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t *timeout); @@ -154,16 +146,11 @@ int msm_gem_new_handle(struct drm_device *dev, struct drm_file *file, uint32_t size, uint32_t flags, uint32_t *handle, char *name); struct drm_gem_object *msm_gem_new(struct drm_device *dev, uint32_t size, uint32_t flags); -struct drm_gem_object *msm_gem_new_locked(struct drm_device *dev, - uint32_t size, uint32_t flags); void *msm_gem_kernel_new(struct drm_device *dev, uint32_t size, uint32_t flags, struct msm_gem_address_space *aspace, struct drm_gem_object **bo, uint64_t *iova); -void *msm_gem_kernel_new_locked(struct drm_device *dev, uint32_t size, - uint32_t flags, struct msm_gem_address_space *aspace, - struct drm_gem_object **bo, uint64_t *iova); void msm_gem_kernel_put(struct drm_gem_object *bo, - struct msm_gem_address_space *aspace, bool locked); + struct msm_gem_address_space *aspace); struct drm_gem_object *msm_gem_import(struct drm_device *dev, struct dma_buf *dmabuf, struct sg_table *sgt); __printf(2, 3) @@ -313,19 +300,34 @@ void msm_gem_vunmap(struct drm_gem_object *obj); /* Created per submit-ioctl, to track bo's and cmdstream bufs, etc, * associated with the cmdstream submission for synchronization (and - * make it easier to unwind when things go wrong, etc). This only - * lasts for the duration of the submit-ioctl. + * make it easier to unwind when things go wrong, etc). */ struct msm_gem_submit { + struct drm_sched_job base; struct kref ref; struct drm_device *dev; struct msm_gpu *gpu; struct msm_gem_address_space *aspace; struct list_head node; /* node in ring submit list */ - struct list_head bo_list; struct ww_acquire_ctx ticket; uint32_t seqno; /* Sequence number of the submit on the ring */ - struct dma_fence *fence; + + /* Array of struct dma_fence * to block on before submitting this job. + */ + struct xarray deps; + unsigned long last_dep; + + /* Hw fence, which is created when the scheduler executes the job, and + * is signaled when the hw finishes (via seqno write from cmdstream) + */ + struct dma_fence *hw_fence; + + /* Userspace visible fence, which is signaled by the scheduler after + * the hw_fence is signaled. + */ + struct dma_fence *user_fence; + + int fence_id; /* key into queue->fence_idr */ struct msm_gpu_submitqueue *queue; struct pid *pid; /* submitting process */ bool fault_dumped; /* Limit devcoredump dumping to one per submit */ @@ -355,6 +357,11 @@ struct msm_gem_submit { } bos[]; }; +static inline struct msm_gem_submit *to_msm_submit(struct drm_sched_job *job) +{ + return container_of(job, struct msm_gem_submit, base); +} + void __msm_gem_submit_destroy(struct kref *kref); static inline void msm_gem_submit_get(struct msm_gem_submit *submit) @@ -367,6 +374,8 @@ static inline void msm_gem_submit_put(struct msm_gem_submit *submit) kref_put(&submit->ref, __msm_gem_submit_destroy); } +void msm_submit_retire(struct msm_gem_submit *submit); + /* helper to determine of a buffer in submit should be dumped, used for both * devcoredump and debugfs cmdstream dumping: */ diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 44f84bfd0c0e..fdc5367aecaa 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -23,8 +23,9 @@ /* make sure these don't conflict w/ MSM_SUBMIT_BO_x */ #define BO_VALID 0x8000 /* is current addr in cmdstream correct/valid? */ -#define BO_LOCKED 0x4000 -#define BO_PINNED 0x2000 +#define BO_LOCKED 0x4000 /* obj lock is held */ +#define BO_ACTIVE 0x2000 /* active refcnt is held */ +#define BO_PINNED 0x1000 /* obj is pinned and on active list */ static struct msm_gem_submit *submit_create(struct drm_device *dev, struct msm_gpu *gpu, @@ -32,32 +33,37 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, uint32_t nr_cmds) { struct msm_gem_submit *submit; - uint64_t sz = struct_size(submit, bos, nr_bos) + - ((u64)nr_cmds * sizeof(submit->cmd[0])); + uint64_t sz; + int ret; + + sz = struct_size(submit, bos, nr_bos) + + ((u64)nr_cmds * sizeof(submit->cmd[0])); if (sz > SIZE_MAX) - return NULL; + return ERR_PTR(-ENOMEM); - submit = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); + submit = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); if (!submit) - return NULL; + return ERR_PTR(-ENOMEM); + + ret = drm_sched_job_init(&submit->base, &queue->entity, queue); + if (ret) { + kfree(submit); + return ERR_PTR(ret); + } + + xa_init_flags(&submit->deps, XA_FLAGS_ALLOC); kref_init(&submit->ref); submit->dev = dev; submit->aspace = queue->ctx->aspace; submit->gpu = gpu; - submit->fence = NULL; submit->cmd = (void *)&submit->bos[nr_bos]; submit->queue = queue; - submit->ring = gpu->rb[queue->prio]; + submit->ring = gpu->rb[queue->ring_nr]; submit->fault_dumped = false; - /* initially, until copy_from_user() and bo lookup succeeds: */ - submit->nr_bos = 0; - submit->nr_cmds = 0; - INIT_LIST_HEAD(&submit->node); - INIT_LIST_HEAD(&submit->bo_list); return submit; } @@ -66,9 +72,25 @@ void __msm_gem_submit_destroy(struct kref *kref) { struct msm_gem_submit *submit = container_of(kref, struct msm_gem_submit, ref); + unsigned long index; + struct dma_fence *fence; unsigned i; - dma_fence_put(submit->fence); + if (submit->fence_id) { + mutex_lock(&submit->queue->lock); + idr_remove(&submit->queue->fence_idr, submit->fence_id); + mutex_unlock(&submit->queue->lock); + } + + xa_for_each (&submit->deps, index, fence) { + dma_fence_put(fence); + } + + xa_destroy(&submit->deps); + + dma_fence_put(submit->user_fence); + dma_fence_put(submit->hw_fence); + put_pid(submit->pid); msm_submitqueue_put(submit->queue); @@ -121,7 +143,6 @@ static int submit_lookup_objects(struct msm_gem_submit *submit, for (i = 0; i < args->nr_bos; i++) { struct drm_gem_object *obj; - struct msm_gem_object *msm_obj; /* normally use drm_gem_object_lookup(), but for bulk lookup * all under single table_lock just hit object_idr directly: @@ -133,20 +154,9 @@ static int submit_lookup_objects(struct msm_gem_submit *submit, goto out_unlock; } - msm_obj = to_msm_bo(obj); - - if (!list_empty(&msm_obj->submit_entry)) { - DRM_ERROR("handle %u at index %u already on submit list\n", - submit->bos[i].handle, i); - ret = -EINVAL; - goto out_unlock; - } - drm_gem_object_get(obj); - submit->bos[i].obj = msm_obj; - - list_add_tail(&msm_obj->submit_entry, &submit->bo_list); + submit->bos[i].obj = to_msm_bo(obj); } out_unlock: @@ -220,21 +230,34 @@ out: return ret; } -static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, - int i, bool backoff) +/* Unwind bo state, according to cleanup_flags. In the success case, only + * the lock is dropped at the end of the submit (and active/pin ref is dropped + * later when the submit is retired). + */ +static void submit_cleanup_bo(struct msm_gem_submit *submit, int i, + unsigned cleanup_flags) { - struct msm_gem_object *msm_obj = submit->bos[i].obj; + struct drm_gem_object *obj = &submit->bos[i].obj->base; + unsigned flags = submit->bos[i].flags & cleanup_flags; - if (submit->bos[i].flags & BO_PINNED) - msm_gem_unpin_iova_locked(&msm_obj->base, submit->aspace); + if (flags & BO_PINNED) + msm_gem_unpin_iova_locked(obj, submit->aspace); - if (submit->bos[i].flags & BO_LOCKED) - dma_resv_unlock(msm_obj->base.resv); + if (flags & BO_ACTIVE) + msm_gem_active_put(obj); - if (backoff && !(submit->bos[i].flags & BO_VALID)) - submit->bos[i].iova = 0; + if (flags & BO_LOCKED) + dma_resv_unlock(obj->resv); - submit->bos[i].flags &= ~(BO_LOCKED | BO_PINNED); + submit->bos[i].flags &= ~cleanup_flags; +} + +static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i) +{ + submit_cleanup_bo(submit, i, BO_PINNED | BO_ACTIVE | BO_LOCKED); + + if (!(submit->bos[i].flags & BO_VALID)) + submit->bos[i].iova = 0; } /* This is where we make sure all the bo's are reserved and pin'd: */ @@ -265,11 +288,17 @@ retry: return 0; fail: + if (ret == -EALREADY) { + DRM_ERROR("handle %u at index %u already on submit list\n", + submit->bos[i].handle, i); + ret = -EINVAL; + } + for (; i >= 0; i--) - submit_unlock_unpin_bo(submit, i, true); + submit_unlock_unpin_bo(submit, i); if (slow_locked > 0) - submit_unlock_unpin_bo(submit, slow_locked, true); + submit_unlock_unpin_bo(submit, slow_locked); if (ret == -EDEADLK) { struct msm_gem_object *msm_obj = submit->bos[contended].obj; @@ -281,6 +310,12 @@ fail: slow_locked = contended; goto retry; } + + /* Not expecting -EALREADY here, if the bo was already + * locked, we should have gotten -EALREADY already from + * the dma_resv_lock_interruptable() call. + */ + WARN_ON_ONCE(ret == -EALREADY); } return ret; @@ -291,7 +326,7 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit) int i, ret = 0; for (i = 0; i < submit->nr_bos; i++) { - struct msm_gem_object *msm_obj = submit->bos[i].obj; + struct drm_gem_object *obj = &submit->bos[i].obj->base; bool write = submit->bos[i].flags & MSM_SUBMIT_BO_WRITE; if (!write) { @@ -300,8 +335,7 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit) * strange place to call it. OTOH this is a * convenient can-fail point to hook it in. */ - ret = dma_resv_reserve_shared(msm_obj->base.resv, - 1); + ret = dma_resv_reserve_shared(obj->resv, 1); if (ret) return ret; } @@ -309,7 +343,7 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit) if (no_implicit) continue; - ret = msm_gem_sync_object(&msm_obj->base, submit->ring->fctx, + ret = drm_gem_fence_array_add_implicit(&submit->deps, obj, write); if (ret) break; @@ -324,12 +358,24 @@ static int submit_pin_objects(struct msm_gem_submit *submit) submit->valid = true; + /* + * Increment active_count first, so if under memory pressure, we + * don't inadvertently evict a bo needed by the submit in order + * to pin an earlier bo in the same submit. + */ for (i = 0; i < submit->nr_bos; i++) { - struct msm_gem_object *msm_obj = submit->bos[i].obj; + struct drm_gem_object *obj = &submit->bos[i].obj->base; + + msm_gem_active_get(obj, submit->gpu); + submit->bos[i].flags |= BO_ACTIVE; + } + + for (i = 0; i < submit->nr_bos; i++) { + struct drm_gem_object *obj = &submit->bos[i].obj->base; uint64_t iova; /* if locking succeeded, pin bo: */ - ret = msm_gem_get_and_pin_iova_locked(&msm_obj->base, + ret = msm_gem_get_and_pin_iova_locked(obj, submit->aspace, &iova); if (ret) @@ -350,6 +396,20 @@ static int submit_pin_objects(struct msm_gem_submit *submit) return ret; } +static void submit_attach_object_fences(struct msm_gem_submit *submit) +{ + int i; + + for (i = 0; i < submit->nr_bos; i++) { + struct drm_gem_object *obj = &submit->bos[i].obj->base; + + if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE) + dma_resv_add_excl_fence(obj->resv, submit->user_fence); + else if (submit->bos[i].flags & MSM_SUBMIT_BO_READ) + dma_resv_add_shared_fence(obj->resv, submit->user_fence); + } +} + static int submit_bo(struct msm_gem_submit *submit, uint32_t idx, struct msm_gem_object **obj, uint64_t *iova, bool *valid) { @@ -444,18 +504,39 @@ out: return ret; } -static void submit_cleanup(struct msm_gem_submit *submit) +/* Cleanup submit at end of ioctl. In the error case, this also drops + * references, unpins, and drops active refcnt. In the non-error case, + * this is done when the submit is retired. + */ +static void submit_cleanup(struct msm_gem_submit *submit, bool error) { + unsigned cleanup_flags = BO_LOCKED; unsigned i; + if (error) + cleanup_flags |= BO_PINNED | BO_ACTIVE; + for (i = 0; i < submit->nr_bos; i++) { struct msm_gem_object *msm_obj = submit->bos[i].obj; - submit_unlock_unpin_bo(submit, i, false); - list_del_init(&msm_obj->submit_entry); - drm_gem_object_put_locked(&msm_obj->base); + submit_cleanup_bo(submit, i, cleanup_flags); + if (error) + drm_gem_object_put(&msm_obj->base); } } +void msm_submit_retire(struct msm_gem_submit *submit) +{ + int i; + + for (i = 0; i < submit->nr_bos; i++) { + struct drm_gem_object *obj = &submit->bos[i].obj->base; + + msm_gem_lock(obj); + submit_cleanup_bo(submit, i, BO_PINNED | BO_ACTIVE); + msm_gem_unlock(obj); + drm_gem_object_put(obj); + } +} struct msm_submit_post_dep { struct drm_syncobj *syncobj; @@ -463,12 +544,12 @@ struct msm_submit_post_dep { struct dma_fence_chain *chain; }; -static struct drm_syncobj **msm_wait_deps(struct drm_device *dev, - struct drm_file *file, - uint64_t in_syncobjs_addr, - uint32_t nr_in_syncobjs, - size_t syncobj_stride, - struct msm_ringbuffer *ring) +static struct drm_syncobj **msm_parse_deps(struct msm_gem_submit *submit, + struct drm_file *file, + uint64_t in_syncobjs_addr, + uint32_t nr_in_syncobjs, + size_t syncobj_stride, + struct msm_ringbuffer *ring) { struct drm_syncobj **syncobjs = NULL; struct drm_msm_gem_submit_syncobj syncobj_desc = {0}; @@ -492,7 +573,7 @@ static struct drm_syncobj **msm_wait_deps(struct drm_device *dev, } if (syncobj_desc.point && - !drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE)) { + !drm_core_check_feature(submit->dev, DRIVER_SYNCOBJ_TIMELINE)) { ret = -EOPNOTSUPP; break; } @@ -507,10 +588,7 @@ static struct drm_syncobj **msm_wait_deps(struct drm_device *dev, if (ret) break; - if (!dma_fence_match_context(fence, ring->fctx->context)) - ret = dma_fence_wait(fence, true); - - dma_fence_put(fence); + ret = drm_gem_fence_array_add(&submit->deps, fence); if (ret) break; @@ -587,9 +665,7 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev, break; } - post_deps[i].chain = - kmalloc(sizeof(*post_deps[i].chain), - GFP_KERNEL); + post_deps[i].chain = dma_fence_chain_alloc(); if (!post_deps[i].chain) { ret = -ENOMEM; break; @@ -606,7 +682,7 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev, if (ret) { for (j = 0; j <= i; ++j) { - kfree(post_deps[j].chain); + dma_fence_chain_free(post_deps[j].chain); if (post_deps[j].syncobj) drm_syncobj_put(post_deps[j].syncobj); } @@ -643,9 +719,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, struct msm_drm_private *priv = dev->dev_private; struct drm_msm_gem_submit *args = data; struct msm_file_private *ctx = file->driver_priv; - struct msm_gem_submit *submit; + struct msm_gem_submit *submit = NULL; struct msm_gpu *gpu = priv->gpu; - struct sync_file *sync_file = NULL; struct msm_gpu_submitqueue *queue; struct msm_ringbuffer *ring; struct msm_submit_post_dep *post_deps = NULL; @@ -655,6 +730,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, bool has_ww_ticket = false; unsigned i; int ret, submitid; + if (!gpu) return -ENXIO; @@ -683,38 +759,59 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, /* Get a unique identifier for the submission for logging purposes */ submitid = atomic_inc_return(&ident) - 1; - ring = gpu->rb[queue->prio]; + ring = gpu->rb[queue->ring_nr]; trace_msm_gpu_submit(pid_nr(pid), ring->id, submitid, args->nr_bos, args->nr_cmds); + ret = mutex_lock_interruptible(&queue->lock); + if (ret) + goto out_post_unlock; + + if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) { + out_fence_fd = get_unused_fd_flags(O_CLOEXEC); + if (out_fence_fd < 0) { + ret = out_fence_fd; + goto out_unlock; + } + } + + submit = submit_create(dev, gpu, queue, args->nr_bos, + args->nr_cmds); + if (IS_ERR(submit)) { + ret = PTR_ERR(submit); + goto out_unlock; + } + + submit->pid = pid; + submit->ident = submitid; + + if (args->flags & MSM_SUBMIT_SUDO) + submit->in_rb = true; + if (args->flags & MSM_SUBMIT_FENCE_FD_IN) { struct dma_fence *in_fence; in_fence = sync_file_get_fence(args->fence_fd); - if (!in_fence) - return -EINVAL; - - /* - * Wait if the fence is from a foreign context, or if the fence - * array contains any fence from a foreign context. - */ - ret = 0; - if (!dma_fence_match_context(in_fence, ring->fctx->context)) - ret = dma_fence_wait(in_fence, true); + if (!in_fence) { + ret = -EINVAL; + goto out_unlock; + } - dma_fence_put(in_fence); + ret = drm_gem_fence_array_add(&submit->deps, in_fence); if (ret) - return ret; + goto out_unlock; } if (args->flags & MSM_SUBMIT_SYNCOBJ_IN) { - syncobjs_to_reset = msm_wait_deps(dev, file, - args->in_syncobjs, - args->nr_in_syncobjs, - args->syncobj_stride, ring); - if (IS_ERR(syncobjs_to_reset)) - return PTR_ERR(syncobjs_to_reset); + syncobjs_to_reset = msm_parse_deps(submit, file, + args->in_syncobjs, + args->nr_in_syncobjs, + args->syncobj_stride, ring); + if (IS_ERR(syncobjs_to_reset)) { + ret = PTR_ERR(syncobjs_to_reset); + goto out_unlock; + } } if (args->flags & MSM_SUBMIT_SYNCOBJ_OUT) { @@ -724,51 +821,17 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, args->syncobj_stride); if (IS_ERR(post_deps)) { ret = PTR_ERR(post_deps); - goto out_post_unlock; - } - } - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - goto out_post_unlock; - - if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) { - out_fence_fd = get_unused_fd_flags(O_CLOEXEC); - if (out_fence_fd < 0) { - ret = out_fence_fd; goto out_unlock; } } - submit = submit_create(dev, gpu, queue, args->nr_bos, - args->nr_cmds); - if (!submit) { - ret = -ENOMEM; - goto out_unlock; - } - - submit->pid = pid; - submit->ident = submitid; - - if (args->flags & MSM_SUBMIT_SUDO) - submit->in_rb = true; - ret = submit_lookup_objects(submit, args, file); if (ret) - goto out_pre_pm; + goto out; ret = submit_lookup_cmds(submit, args, file); if (ret) - goto out_pre_pm; - - /* - * Thanks to dev_pm_opp opp_table_lock interactions with mm->mmap_sem - * in the resume path, we need to to rpm get before we lock objs. - * Which unfortunately might involve powering up the GPU sooner than - * is necessary. But at least in the explicit fencing case, we will - * have already done all the fence waiting. - */ - pm_runtime_get_sync(&gpu->pdev->dev); + goto out; /* copy_*_user while holding a ww ticket upsets lockdep */ ww_acquire_init(&submit->ticket, &reservation_ww_class); @@ -815,47 +878,54 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, submit->nr_cmds = i; - submit->fence = msm_fence_alloc(ring->fctx); - if (IS_ERR(submit->fence)) { - ret = PTR_ERR(submit->fence); - submit->fence = NULL; + submit->user_fence = dma_fence_get(&submit->base.s_fence->finished); + + /* + * Allocate an id which can be used by WAIT_FENCE ioctl to map back + * to the underlying fence. + */ + submit->fence_id = idr_alloc_cyclic(&queue->fence_idr, + submit->user_fence, 0, INT_MAX, GFP_KERNEL); + if (submit->fence_id < 0) { + ret = submit->fence_id = 0; + submit->fence_id = 0; goto out; } if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) { - sync_file = sync_file_create(submit->fence); + struct sync_file *sync_file = sync_file_create(submit->user_fence); if (!sync_file) { ret = -ENOMEM; goto out; } + fd_install(out_fence_fd, sync_file->file); + args->fence_fd = out_fence_fd; } - msm_gpu_submit(gpu, submit); + submit_attach_object_fences(submit); - args->fence = submit->fence->seqno; + /* The scheduler owns a ref now: */ + msm_gem_submit_get(submit); - if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) { - fd_install(out_fence_fd, sync_file->file); - args->fence_fd = out_fence_fd; - } + drm_sched_entity_push_job(&submit->base, &queue->entity); + + args->fence = submit->fence_id; msm_reset_syncobjs(syncobjs_to_reset, args->nr_in_syncobjs); msm_process_post_deps(post_deps, args->nr_out_syncobjs, - submit->fence); + submit->user_fence); out: - pm_runtime_put(&gpu->pdev->dev); -out_pre_pm: - submit_cleanup(submit); + submit_cleanup(submit, !!ret); if (has_ww_ticket) ww_acquire_fini(&submit->ticket); - msm_gem_submit_put(submit); out_unlock: if (ret && (out_fence_fd >= 0)) put_unused_fd(out_fence_fd); - mutex_unlock(&dev->struct_mutex); - + mutex_unlock(&queue->lock); + if (submit) + msm_gem_submit_put(submit); out_post_unlock: if (!IS_ERR_OR_NULL(post_deps)) { for (i = 0; i < args->nr_out_syncobjs; ++i) { diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 0ebf7bc6ad09..8a3a592da3a4 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -13,8 +13,6 @@ #include <generated/utsrelease.h> #include <linux/string_helpers.h> -#include <linux/devfreq.h> -#include <linux/devfreq_cooling.h> #include <linux/devcoredump.h> #include <linux/sched/task.h> @@ -22,106 +20,6 @@ * Power Management: */ -static int msm_devfreq_target(struct device *dev, unsigned long *freq, - u32 flags) -{ - struct msm_gpu *gpu = dev_to_gpu(dev); - struct dev_pm_opp *opp; - - opp = devfreq_recommended_opp(dev, freq, flags); - - if (IS_ERR(opp)) - return PTR_ERR(opp); - - trace_msm_gpu_freq_change(dev_pm_opp_get_freq(opp)); - - if (gpu->funcs->gpu_set_freq) - gpu->funcs->gpu_set_freq(gpu, opp); - else - clk_set_rate(gpu->core_clk, *freq); - - dev_pm_opp_put(opp); - - return 0; -} - -static int msm_devfreq_get_dev_status(struct device *dev, - struct devfreq_dev_status *status) -{ - struct msm_gpu *gpu = dev_to_gpu(dev); - ktime_t time; - - if (gpu->funcs->gpu_get_freq) - status->current_frequency = gpu->funcs->gpu_get_freq(gpu); - else - status->current_frequency = clk_get_rate(gpu->core_clk); - - status->busy_time = gpu->funcs->gpu_busy(gpu); - - time = ktime_get(); - status->total_time = ktime_us_delta(time, gpu->devfreq.time); - gpu->devfreq.time = time; - - return 0; -} - -static int msm_devfreq_get_cur_freq(struct device *dev, unsigned long *freq) -{ - struct msm_gpu *gpu = dev_to_gpu(dev); - - if (gpu->funcs->gpu_get_freq) - *freq = gpu->funcs->gpu_get_freq(gpu); - else - *freq = clk_get_rate(gpu->core_clk); - - return 0; -} - -static struct devfreq_dev_profile msm_devfreq_profile = { - .polling_ms = 10, - .target = msm_devfreq_target, - .get_dev_status = msm_devfreq_get_dev_status, - .get_cur_freq = msm_devfreq_get_cur_freq, -}; - -static void msm_devfreq_init(struct msm_gpu *gpu) -{ - /* We need target support to do devfreq */ - if (!gpu->funcs->gpu_busy) - return; - - msm_devfreq_profile.initial_freq = gpu->fast_rate; - - /* - * Don't set the freq_table or max_state and let devfreq build the table - * from OPP - * After a deferred probe, these may have be left to non-zero values, - * so set them back to zero before creating the devfreq device - */ - msm_devfreq_profile.freq_table = NULL; - msm_devfreq_profile.max_state = 0; - - gpu->devfreq.devfreq = devm_devfreq_add_device(&gpu->pdev->dev, - &msm_devfreq_profile, DEVFREQ_GOV_SIMPLE_ONDEMAND, - NULL); - - if (IS_ERR(gpu->devfreq.devfreq)) { - DRM_DEV_ERROR(&gpu->pdev->dev, "Couldn't initialize GPU devfreq\n"); - gpu->devfreq.devfreq = NULL; - return; - } - - devfreq_suspend_device(gpu->devfreq.devfreq); - - gpu->cooling = of_devfreq_cooling_register(gpu->pdev->dev.of_node, - gpu->devfreq.devfreq); - if (IS_ERR(gpu->cooling)) { - DRM_DEV_ERROR(&gpu->pdev->dev, - "Couldn't register GPU cooling device\n"); - gpu->cooling = NULL; - } -} - static int enable_pwrrail(struct msm_gpu *gpu) { struct drm_device *dev = gpu->dev; @@ -196,14 +94,6 @@ static int disable_axi(struct msm_gpu *gpu) return 0; } -void msm_gpu_resume_devfreq(struct msm_gpu *gpu) -{ - gpu->devfreq.busy_cycles = 0; - gpu->devfreq.time = ktime_get(); - - devfreq_resume_device(gpu->devfreq.devfreq); -} - int msm_gpu_pm_resume(struct msm_gpu *gpu) { int ret; @@ -223,7 +113,7 @@ int msm_gpu_pm_resume(struct msm_gpu *gpu) if (ret) return ret; - msm_gpu_resume_devfreq(gpu); + msm_devfreq_resume(gpu); gpu->needs_hw_init = true; @@ -237,7 +127,7 @@ int msm_gpu_pm_suspend(struct msm_gpu *gpu) DBG("%s", gpu->name); trace_msm_gpu_suspend(0); - devfreq_suspend_device(gpu->devfreq.devfreq); + msm_devfreq_suspend(gpu); ret = disable_axi(gpu); if (ret) @@ -278,16 +168,18 @@ static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring, uint32_t fence) { struct msm_gem_submit *submit; + unsigned long flags; - spin_lock(&ring->submit_lock); + spin_lock_irqsave(&ring->submit_lock, flags); list_for_each_entry(submit, &ring->submits, node) { if (submit->seqno > fence) break; msm_update_fence(submit->ring->fctx, - submit->fence->seqno); + submit->hw_fence->seqno); + dma_fence_signal(submit->hw_fence); } - spin_unlock(&ring->submit_lock); + spin_unlock_irqrestore(&ring->submit_lock, flags); } #ifdef CONFIG_DEV_COREDUMP @@ -443,15 +335,16 @@ static struct msm_gem_submit * find_submit(struct msm_ringbuffer *ring, uint32_t fence) { struct msm_gem_submit *submit; + unsigned long flags; - spin_lock(&ring->submit_lock); + spin_lock_irqsave(&ring->submit_lock, flags); list_for_each_entry(submit, &ring->submits, node) { if (submit->seqno == fence) { - spin_unlock(&ring->submit_lock); + spin_unlock_irqrestore(&ring->submit_lock, flags); return submit; } } - spin_unlock(&ring->submit_lock); + spin_unlock_irqrestore(&ring->submit_lock, flags); return NULL; } @@ -487,10 +380,6 @@ static void recover_worker(struct kthread_work *work) put_task_struct(task); } - /* msm_rd_dump_submit() needs bo locked to dump: */ - for (i = 0; i < submit->nr_bos; i++) - msm_gem_lock(&submit->bos[i].obj->base); - if (comm && cmd) { DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n", gpu->name, comm, cmd); @@ -500,9 +389,6 @@ static void recover_worker(struct kthread_work *work) } else { msm_rd_dump_submit(priv->hangrd, submit, NULL); } - - for (i = 0; i < submit->nr_bos; i++) - msm_gem_unlock(&submit->bos[i].obj->base); } /* Record the crash state */ @@ -547,11 +433,12 @@ static void recover_worker(struct kthread_work *work) */ for (i = 0; i < gpu->nr_rings; i++) { struct msm_ringbuffer *ring = gpu->rb[i]; + unsigned long flags; - spin_lock(&ring->submit_lock); + spin_lock_irqsave(&ring->submit_lock, flags); list_for_each_entry(submit, &ring->submits, node) gpu->funcs->submit(gpu, submit); - spin_unlock(&ring->submit_lock); + spin_unlock_irqrestore(&ring->submit_lock, flags); } } @@ -641,7 +528,7 @@ static void hangcheck_handler(struct timer_list *t) hangcheck_timer_reset(gpu); /* workaround for missing irq: */ - kthread_queue_work(gpu->worker, &gpu->retire_work); + msm_gpu_retire(gpu); } /* @@ -752,7 +639,7 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring, int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT; volatile struct msm_gpu_submit_stats *stats; u64 elapsed, clock = 0; - int i; + unsigned long flags; stats = &ring->memptrs->stats[index]; /* Convert 19.2Mhz alwayson ticks to nanoseconds for elapsed time */ @@ -768,22 +655,22 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring, trace_msm_gpu_submit_retired(submit, elapsed, clock, stats->alwayson_start, stats->alwayson_end); - for (i = 0; i < submit->nr_bos; i++) { - struct drm_gem_object *obj = &submit->bos[i].obj->base; - - msm_gem_lock(obj); - msm_gem_active_put(obj); - msm_gem_unpin_iova_locked(obj, submit->aspace); - msm_gem_unlock(obj); - drm_gem_object_put(obj); - } + msm_submit_retire(submit); pm_runtime_mark_last_busy(&gpu->pdev->dev); pm_runtime_put_autosuspend(&gpu->pdev->dev); - spin_lock(&ring->submit_lock); + spin_lock_irqsave(&ring->submit_lock, flags); list_del(&submit->node); - spin_unlock(&ring->submit_lock); + spin_unlock_irqrestore(&ring->submit_lock, flags); + + /* Update devfreq on transition from active->idle: */ + mutex_lock(&gpu->active_lock); + gpu->active_submits--; + WARN_ON(gpu->active_submits < 0); + if (!gpu->active_submits) + msm_devfreq_idle(gpu); + mutex_unlock(&gpu->active_lock); msm_gem_submit_put(submit); } @@ -798,18 +685,19 @@ static void retire_submits(struct msm_gpu *gpu) while (true) { struct msm_gem_submit *submit = NULL; + unsigned long flags; - spin_lock(&ring->submit_lock); + spin_lock_irqsave(&ring->submit_lock, flags); submit = list_first_entry_or_null(&ring->submits, struct msm_gem_submit, node); - spin_unlock(&ring->submit_lock); + spin_unlock_irqrestore(&ring->submit_lock, flags); /* * If no submit, we are done. If submit->fence hasn't * been signalled, then later submits are not signalled * either, so we are also done. */ - if (submit && dma_fence_is_signaled(submit->fence)) { + if (submit && dma_fence_is_signaled(submit->hw_fence)) { retire_submit(gpu, ring, submit); } else { break; @@ -821,10 +709,6 @@ static void retire_submits(struct msm_gpu *gpu) static void retire_worker(struct kthread_work *work) { struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work); - int i; - - for (i = 0; i < gpu->nr_rings; i++) - update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence); retire_submits(gpu); } @@ -832,6 +716,11 @@ static void retire_worker(struct kthread_work *work) /* call from irq handler to schedule work to retire bo's */ void msm_gpu_retire(struct msm_gpu *gpu) { + int i; + + for (i = 0; i < gpu->nr_rings; i++) + update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence); + kthread_queue_work(gpu->worker, &gpu->retire_work); update_sw_cntrs(gpu); } @@ -842,7 +731,7 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) struct drm_device *dev = gpu->dev; struct msm_drm_private *priv = dev->dev_private; struct msm_ringbuffer *ring = submit->ring; - int i; + unsigned long flags; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); @@ -856,32 +745,22 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) update_sw_cntrs(gpu); - for (i = 0; i < submit->nr_bos; i++) { - struct msm_gem_object *msm_obj = submit->bos[i].obj; - struct drm_gem_object *drm_obj = &msm_obj->base; - uint64_t iova; - - /* submit takes a reference to the bo and iova until retired: */ - drm_gem_object_get(&msm_obj->base); - msm_gem_get_and_pin_iova_locked(&msm_obj->base, submit->aspace, &iova); - - if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE) - dma_resv_add_excl_fence(drm_obj->resv, submit->fence); - else if (submit->bos[i].flags & MSM_SUBMIT_BO_READ) - dma_resv_add_shared_fence(drm_obj->resv, submit->fence); - - msm_gem_active_get(drm_obj, gpu); - } - /* * ring->submits holds a ref to the submit, to deal with the case * that a submit completes before msm_ioctl_gem_submit() returns. */ msm_gem_submit_get(submit); - spin_lock(&ring->submit_lock); + spin_lock_irqsave(&ring->submit_lock, flags); list_add_tail(&submit->node, &ring->submits); - spin_unlock(&ring->submit_lock); + spin_unlock_irqrestore(&ring->submit_lock, flags); + + /* Update devfreq on transition from idle->active: */ + mutex_lock(&gpu->active_lock); + if (!gpu->active_submits) + msm_devfreq_active(gpu); + gpu->active_submits++; + mutex_unlock(&gpu->active_lock); gpu->funcs->submit(gpu, submit); priv->lastctx = submit->queue->ctx; @@ -968,6 +847,7 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, sched_set_fifo_low(gpu->worker->task); INIT_LIST_HEAD(&gpu->active_list); + mutex_init(&gpu->active_lock); kthread_init_work(&gpu->retire_work, retire_worker); kthread_init_work(&gpu->recover_work, recover_worker); kthread_init_work(&gpu->fault_work, fault_worker); @@ -1078,7 +958,7 @@ fail: gpu->rb[i] = NULL; } - msm_gem_kernel_put(gpu->memptrs_bo, gpu->aspace, false); + msm_gem_kernel_put(gpu->memptrs_bo, gpu->aspace); platform_set_drvdata(pdev, NULL); return ret; @@ -1097,7 +977,7 @@ void msm_gpu_cleanup(struct msm_gpu *gpu) gpu->rb[i] = NULL; } - msm_gem_kernel_put(gpu->memptrs_bo, gpu->aspace, false); + msm_gem_kernel_put(gpu->memptrs_bo, gpu->aspace); if (!IS_ERR_OR_NULL(gpu->aspace)) { gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu); @@ -1108,5 +988,5 @@ void msm_gpu_cleanup(struct msm_gpu *gpu) kthread_destroy_worker(gpu->worker); } - devfreq_cooling_unregister(gpu->cooling); + msm_devfreq_cleanup(gpu); } diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index ef41ec09f59c..0e4b45bff2e6 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -80,6 +80,40 @@ struct msm_gpu_fault_info { const char *block; }; +/** + * struct msm_gpu_devfreq - devfreq related state + */ +struct msm_gpu_devfreq { + /** devfreq: devfreq instance */ + struct devfreq *devfreq; + + /** + * busy_cycles: + * + * Used by implementation of gpu->gpu_busy() to track the last + * busy counter value, for calculating elapsed busy cycles since + * last sampling period. + */ + u64 busy_cycles; + + /** time: Time of last sampling period. */ + ktime_t time; + + /** idle_time: Time of last transition to idle: */ + ktime_t idle_time; + + /** + * idle_freq: + * + * Shadow frequency used while the GPU is idle. From the PoV of + * the devfreq governor, we are continuing to sample busyness and + * adjust frequency while the GPU is idle, but we use this shadow + * value as the GPU is actually clamped to minimum frequency while + * it is inactive. + */ + unsigned long idle_freq; +}; + struct msm_gpu { const char *name; struct drm_device *dev; @@ -109,6 +143,19 @@ struct msm_gpu { */ struct list_head active_list; + /** + * active_submits: + * + * The number of submitted but not yet retired submits, used to + * determine transitions between active and idle. + * + * Protected by lock + */ + int active_submits; + + /** lock: protects active_submits and idle/active transitions */ + struct mutex active_lock; + /* does gpu need hw_init? */ bool needs_hw_init; @@ -151,11 +198,7 @@ struct msm_gpu { struct drm_gem_object *memptrs_bo; - struct { - struct devfreq *devfreq; - u64 busy_cycles; - ktime_t time; - } devfreq; + struct msm_gpu_devfreq devfreq; uint32_t suspend_count; @@ -207,14 +250,90 @@ struct msm_gpu_perfcntr { const char *name; }; +/* + * The number of priority levels provided by drm gpu scheduler. The + * DRM_SCHED_PRIORITY_KERNEL priority level is treated specially in some + * cases, so we don't use it (no need for kernel generated jobs). + */ +#define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_HIGH - DRM_SCHED_PRIORITY_MIN) + +/** + * msm_gpu_convert_priority - Map userspace priority to ring # and sched priority + * + * @gpu: the gpu instance + * @prio: the userspace priority level + * @ring_nr: [out] the ringbuffer the userspace priority maps to + * @sched_prio: [out] the gpu scheduler priority level which the userspace + * priority maps to + * + * With drm/scheduler providing it's own level of prioritization, our total + * number of available priority levels is (nr_rings * NR_SCHED_PRIORITIES). + * Each ring is associated with it's own scheduler instance. However, our + * UABI is that lower numerical values are higher priority. So mapping the + * single userspace priority level into ring_nr and sched_prio takes some + * care. The userspace provided priority (when a submitqueue is created) + * is mapped to ring nr and scheduler priority as such: + * + * ring_nr = userspace_prio / NR_SCHED_PRIORITIES + * sched_prio = NR_SCHED_PRIORITIES - + * (userspace_prio % NR_SCHED_PRIORITIES) - 1 + * + * This allows generations without preemption (nr_rings==1) to have some + * amount of prioritization, and provides more priority levels for gens + * that do have preemption. + */ +static inline int msm_gpu_convert_priority(struct msm_gpu *gpu, int prio, + unsigned *ring_nr, enum drm_sched_priority *sched_prio) +{ + unsigned rn, sp; + + rn = div_u64_rem(prio, NR_SCHED_PRIORITIES, &sp); + + /* invert sched priority to map to higher-numeric-is-higher- + * priority convention + */ + sp = NR_SCHED_PRIORITIES - sp - 1; + + if (rn >= gpu->nr_rings) + return -EINVAL; + + *ring_nr = rn; + *sched_prio = sp; + + return 0; +} + +/** + * A submitqueue is associated with a gl context or vk queue (or equiv) + * in userspace. + * + * @id: userspace id for the submitqueue, unique within the drm_file + * @flags: userspace flags for the submitqueue, specified at creation + * (currently unusued) + * @ring_nr: the ringbuffer used by this submitqueue, which is determined + * by the submitqueue's priority + * @faults: the number of GPU hangs associated with this submitqueue + * @ctx: the per-drm_file context associated with the submitqueue (ie. + * which set of pgtables do submits jobs associated with the + * submitqueue use) + * @node: node in the context's list of submitqueues + * @fence_idr: maps fence-id to dma_fence for userspace visible fence + * seqno, protected by submitqueue lock + * @lock: submitqueue lock + * @ref: reference count + * @entity: the submit job-queue + */ struct msm_gpu_submitqueue { int id; u32 flags; - u32 prio; + u32 ring_nr; int faults; struct msm_file_private *ctx; struct list_head node; + struct idr fence_idr; + struct mutex lock; struct kref ref; + struct drm_sched_entity entity; }; struct msm_gpu_state_bo { @@ -301,7 +420,13 @@ static inline void gpu_write64(struct msm_gpu *gpu, u32 lo, u32 hi, u64 val) int msm_gpu_pm_suspend(struct msm_gpu *gpu); int msm_gpu_pm_resume(struct msm_gpu *gpu); -void msm_gpu_resume_devfreq(struct msm_gpu *gpu); + +void msm_devfreq_init(struct msm_gpu *gpu); +void msm_devfreq_cleanup(struct msm_gpu *gpu); +void msm_devfreq_resume(struct msm_gpu *gpu); +void msm_devfreq_suspend(struct msm_gpu *gpu); +void msm_devfreq_active(struct msm_gpu *gpu); +void msm_devfreq_idle(struct msm_gpu *gpu); int msm_gpu_hw_init(struct msm_gpu *gpu); diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c b/drivers/gpu/drm/msm/msm_gpu_devfreq.c new file mode 100644 index 000000000000..0a1ee20296a2 --- /dev/null +++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c @@ -0,0 +1,203 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2013 Red Hat + * Author: Rob Clark <robdclark@gmail.com> + */ + +#include "msm_gpu.h" +#include "msm_gpu_trace.h" + +#include <linux/devfreq.h> +#include <linux/devfreq_cooling.h> + +/* + * Power Management: + */ + +static int msm_devfreq_target(struct device *dev, unsigned long *freq, + u32 flags) +{ + struct msm_gpu *gpu = dev_to_gpu(dev); + struct dev_pm_opp *opp; + + opp = devfreq_recommended_opp(dev, freq, flags); + + /* + * If the GPU is idle, devfreq is not aware, so just ignore + * it's requests + */ + if (gpu->devfreq.idle_freq) { + gpu->devfreq.idle_freq = *freq; + return 0; + } + + if (IS_ERR(opp)) + return PTR_ERR(opp); + + trace_msm_gpu_freq_change(dev_pm_opp_get_freq(opp)); + + if (gpu->funcs->gpu_set_freq) + gpu->funcs->gpu_set_freq(gpu, opp); + else + clk_set_rate(gpu->core_clk, *freq); + + dev_pm_opp_put(opp); + + return 0; +} + +static unsigned long get_freq(struct msm_gpu *gpu) +{ + if (gpu->devfreq.idle_freq) + return gpu->devfreq.idle_freq; + + if (gpu->funcs->gpu_get_freq) + return gpu->funcs->gpu_get_freq(gpu); + + return clk_get_rate(gpu->core_clk); +} + +static int msm_devfreq_get_dev_status(struct device *dev, + struct devfreq_dev_status *status) +{ + struct msm_gpu *gpu = dev_to_gpu(dev); + ktime_t time; + + status->current_frequency = get_freq(gpu); + status->busy_time = gpu->funcs->gpu_busy(gpu); + + time = ktime_get(); + status->total_time = ktime_us_delta(time, gpu->devfreq.time); + gpu->devfreq.time = time; + + return 0; +} + +static int msm_devfreq_get_cur_freq(struct device *dev, unsigned long *freq) +{ + *freq = get_freq(dev_to_gpu(dev)); + + return 0; +} + +static struct devfreq_dev_profile msm_devfreq_profile = { + .timer = DEVFREQ_TIMER_DELAYED, + .polling_ms = 50, + .target = msm_devfreq_target, + .get_dev_status = msm_devfreq_get_dev_status, + .get_cur_freq = msm_devfreq_get_cur_freq, +}; + +void msm_devfreq_init(struct msm_gpu *gpu) +{ + /* We need target support to do devfreq */ + if (!gpu->funcs->gpu_busy) + return; + + msm_devfreq_profile.initial_freq = gpu->fast_rate; + + /* + * Don't set the freq_table or max_state and let devfreq build the table + * from OPP + * After a deferred probe, these may have be left to non-zero values, + * so set them back to zero before creating the devfreq device + */ + msm_devfreq_profile.freq_table = NULL; + msm_devfreq_profile.max_state = 0; + + gpu->devfreq.devfreq = devm_devfreq_add_device(&gpu->pdev->dev, + &msm_devfreq_profile, DEVFREQ_GOV_SIMPLE_ONDEMAND, + NULL); + + if (IS_ERR(gpu->devfreq.devfreq)) { + DRM_DEV_ERROR(&gpu->pdev->dev, "Couldn't initialize GPU devfreq\n"); + gpu->devfreq.devfreq = NULL; + return; + } + + devfreq_suspend_device(gpu->devfreq.devfreq); + + gpu->cooling = of_devfreq_cooling_register(gpu->pdev->dev.of_node, + gpu->devfreq.devfreq); + if (IS_ERR(gpu->cooling)) { + DRM_DEV_ERROR(&gpu->pdev->dev, + "Couldn't register GPU cooling device\n"); + gpu->cooling = NULL; + } +} + +void msm_devfreq_cleanup(struct msm_gpu *gpu) +{ + devfreq_cooling_unregister(gpu->cooling); +} + +void msm_devfreq_resume(struct msm_gpu *gpu) +{ + gpu->devfreq.busy_cycles = 0; + gpu->devfreq.time = ktime_get(); + + devfreq_resume_device(gpu->devfreq.devfreq); +} + +void msm_devfreq_suspend(struct msm_gpu *gpu) +{ + devfreq_suspend_device(gpu->devfreq.devfreq); +} + +void msm_devfreq_active(struct msm_gpu *gpu) +{ + struct msm_gpu_devfreq *df = &gpu->devfreq; + struct devfreq_dev_status status; + unsigned int idle_time; + unsigned long target_freq = df->idle_freq; + + /* + * Hold devfreq lock to synchronize with get_dev_status()/ + * target() callbacks + */ + mutex_lock(&df->devfreq->lock); + + idle_time = ktime_to_ms(ktime_sub(ktime_get(), df->idle_time)); + + /* + * If we've been idle for a significant fraction of a polling + * interval, then we won't meet the threshold of busyness for + * the governor to ramp up the freq.. so give some boost + */ + if (idle_time > msm_devfreq_profile.polling_ms/2) { + target_freq *= 2; + } + + df->idle_freq = 0; + + msm_devfreq_target(&gpu->pdev->dev, &target_freq, 0); + + /* + * Reset the polling interval so we aren't inconsistent + * about freq vs busy/total cycles + */ + msm_devfreq_get_dev_status(&gpu->pdev->dev, &status); + + mutex_unlock(&df->devfreq->lock); +} + +void msm_devfreq_idle(struct msm_gpu *gpu) +{ + struct msm_gpu_devfreq *df = &gpu->devfreq; + unsigned long idle_freq, target_freq = 0; + + /* + * Hold devfreq lock to synchronize with get_dev_status()/ + * target() callbacks + */ + mutex_lock(&df->devfreq->lock); + + idle_freq = get_freq(gpu); + + msm_devfreq_target(&gpu->pdev->dev, &target_freq, 0); + + df->idle_time = ktime_get(); + df->idle_freq = idle_freq; + + mutex_unlock(&df->devfreq->lock); +} diff --git a/drivers/gpu/drm/msm/msm_kms.h b/drivers/gpu/drm/msm/msm_kms.h index 086a2d59b8c8..9de7c42e1071 100644 --- a/drivers/gpu/drm/msm/msm_kms.h +++ b/drivers/gpu/drm/msm/msm_kms.h @@ -150,7 +150,7 @@ struct msm_kms { const struct msm_kms_funcs *funcs; struct drm_device *dev; - /* irq number to be passed on to drm_irq_install */ + /* irq number to be passed on to msm_irq_install */ int irq; /* mapper-id used to request GEM buffer mapped for scanout: */ diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c index 659e5cc4b40a..b55398a34fa4 100644 --- a/drivers/gpu/drm/msm/msm_rd.c +++ b/drivers/gpu/drm/msm/msm_rd.c @@ -325,15 +325,19 @@ static void snapshot_buf(struct msm_rd_state *rd, if (!(submit->bos[idx].flags & MSM_SUBMIT_BO_READ)) return; + msm_gem_lock(&obj->base); buf = msm_gem_get_vaddr_active(&obj->base); if (IS_ERR(buf)) - return; + goto out_unlock; buf += offset; rd_write_section(rd, RD_BUFFER_CONTENTS, buf, size); msm_gem_put_vaddr_locked(&obj->base); + +out_unlock: + msm_gem_unlock(&obj->base); } /* called under struct_mutex */ diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 4d2a2a4abef8..bd54c1412649 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -7,10 +7,61 @@ #include "msm_ringbuffer.h" #include "msm_gpu.h" +static uint num_hw_submissions = 8; +MODULE_PARM_DESC(num_hw_submissions, "The max # of jobs to write into ringbuffer (default 8)"); +module_param(num_hw_submissions, uint, 0600); + +static struct dma_fence *msm_job_dependency(struct drm_sched_job *job, + struct drm_sched_entity *s_entity) +{ + struct msm_gem_submit *submit = to_msm_submit(job); + + if (!xa_empty(&submit->deps)) + return xa_erase(&submit->deps, submit->last_dep++); + + return NULL; +} + +static struct dma_fence *msm_job_run(struct drm_sched_job *job) +{ + struct msm_gem_submit *submit = to_msm_submit(job); + struct msm_gpu *gpu = submit->gpu; + + submit->hw_fence = msm_fence_alloc(submit->ring->fctx); + + pm_runtime_get_sync(&gpu->pdev->dev); + + /* TODO move submit path over to using a per-ring lock.. */ + mutex_lock(&gpu->dev->struct_mutex); + + msm_gpu_submit(gpu, submit); + + mutex_unlock(&gpu->dev->struct_mutex); + + pm_runtime_put(&gpu->pdev->dev); + + return dma_fence_get(submit->hw_fence); +} + +static void msm_job_free(struct drm_sched_job *job) +{ + struct msm_gem_submit *submit = to_msm_submit(job); + + drm_sched_job_cleanup(job); + msm_gem_submit_put(submit); +} + +const struct drm_sched_backend_ops msm_sched_ops = { + .dependency = msm_job_dependency, + .run_job = msm_job_run, + .free_job = msm_job_free +}; + struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, void *memptrs, uint64_t memptrs_iova) { struct msm_ringbuffer *ring; + long sched_timeout; char name[32]; int ret; @@ -32,7 +83,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, if (IS_ERR(ring->start)) { ret = PTR_ERR(ring->start); - ring->start = 0; + ring->start = NULL; goto fail; } @@ -45,13 +96,23 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, ring->memptrs = memptrs; ring->memptrs_iova = memptrs_iova; + /* currently managing hangcheck ourselves: */ + sched_timeout = MAX_SCHEDULE_TIMEOUT; + + ret = drm_sched_init(&ring->sched, &msm_sched_ops, + num_hw_submissions, 0, sched_timeout, + NULL, NULL, to_msm_bo(ring->bo)->name); + if (ret) { + goto fail; + } + INIT_LIST_HEAD(&ring->submits); spin_lock_init(&ring->submit_lock); spin_lock_init(&ring->preempt_lock); snprintf(name, sizeof(name), "gpu-ring-%d", ring->id); - ring->fctx = msm_fence_context_alloc(gpu->dev, name); + ring->fctx = msm_fence_context_alloc(gpu->dev, &ring->memptrs->fence, name); return ring; @@ -65,9 +126,11 @@ void msm_ringbuffer_destroy(struct msm_ringbuffer *ring) if (IS_ERR_OR_NULL(ring)) return; + drm_sched_fini(&ring->sched); + msm_fence_context_free(ring->fctx); - msm_gem_kernel_put(ring->bo, ring->gpu->aspace, false); + msm_gem_kernel_put(ring->bo, ring->gpu->aspace); kfree(ring); } diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h index fe55d4a1aa16..d8c63df4e9ca 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.h +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h @@ -7,6 +7,7 @@ #ifndef __MSM_RINGBUFFER_H__ #define __MSM_RINGBUFFER_H__ +#include "drm/gpu_scheduler.h" #include "msm_drv.h" #define rbmemptr(ring, member) \ @@ -41,7 +42,18 @@ struct msm_ringbuffer { uint32_t *start, *end, *cur, *next; /* + * The job scheduler for this ring. + */ + struct drm_gpu_scheduler sched; + + /* * List of in-flight submits on this ring. Protected by submit_lock. + * + * Currently just submits that are already written into the ring, not + * submits that are still in drm_gpu_scheduler's queues. At a later + * step we could probably move to letting drm_gpu_scheduler manage + * hangcheck detection and keep track of submit jobs that are in- + * flight. */ struct list_head submits; spinlock_t submit_lock; diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c index c3d206105d28..32a55d81b58b 100644 --- a/drivers/gpu/drm/msm/msm_submitqueue.c +++ b/drivers/gpu/drm/msm/msm_submitqueue.c @@ -12,6 +12,10 @@ void msm_submitqueue_destroy(struct kref *kref) struct msm_gpu_submitqueue *queue = container_of(kref, struct msm_gpu_submitqueue, ref); + idr_destroy(&queue->fence_idr); + + drm_sched_entity_destroy(&queue->entity); + msm_file_private_put(queue->ctx); kfree(queue); @@ -62,10 +66,22 @@ int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx, { struct msm_drm_private *priv = drm->dev_private; struct msm_gpu_submitqueue *queue; + struct msm_ringbuffer *ring; + struct drm_gpu_scheduler *sched; + enum drm_sched_priority sched_prio; + unsigned ring_nr; + int ret; if (!ctx) return -ENODEV; + if (!priv->gpu) + return -ENODEV; + + ret = msm_gpu_convert_priority(priv->gpu, prio, &ring_nr, &sched_prio); + if (ret) + return ret; + queue = kzalloc(sizeof(*queue), GFP_KERNEL); if (!queue) @@ -73,14 +89,16 @@ int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx, kref_init(&queue->ref); queue->flags = flags; + queue->ring_nr = ring_nr; - if (priv->gpu) { - if (prio >= priv->gpu->nr_rings) { - kfree(queue); - return -EINVAL; - } + ring = priv->gpu->rb[ring_nr]; + sched = &ring->sched; - queue->prio = prio; + ret = drm_sched_entity_init(&queue->entity, + sched_prio, &sched, 1, NULL); + if (ret) { + kfree(queue); + return ret; } write_lock(&ctx->queuelock); @@ -91,6 +109,9 @@ int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx, if (id) *id = queue->id; + idr_init(&queue->fence_idr); + mutex_init(&queue->lock); + list_add_tail(&queue->node, &ctx->submitqueues); write_unlock(&ctx->queuelock); @@ -98,20 +119,26 @@ int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx, return 0; } +/* + * Create the default submit-queue (id==0), used for backwards compatibility + * for userspace that pre-dates the introduction of submitqueues. + */ int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx) { struct msm_drm_private *priv = drm->dev_private; - int default_prio; + int default_prio, max_priority; - if (!ctx) - return 0; + if (!priv->gpu) + return -ENODEV; + + max_priority = (priv->gpu->nr_rings * NR_SCHED_PRIORITIES) - 1; /* - * Select priority 2 as the "default priority" unless nr_rings is less - * than 2 and then pick the lowest pirority + * Pick a medium priority level as default. Lower numeric value is + * higher priority, so round-up to pick a priority that is not higher + * than the middle priority level. */ - default_prio = priv->gpu ? - clamp_t(uint32_t, 2, 0, priv->gpu->nr_rings - 1) : 0; + default_prio = DIV_ROUND_UP(max_priority, 2); INIT_LIST_HEAD(&ctx->submitqueues); |