diff options
Diffstat (limited to 'drivers/gpu/drm/msm/adreno')
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a2xx_gpu.c | 65 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a3xx_gpu.c | 77 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 82 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a5xx_debugfs.c | 21 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 120 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a5xx_gpu.h | 12 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a5xx_power.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 182 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 10 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/adreno_device.c | 12 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/adreno_gpu.c | 105 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/adreno_gpu.h | 82 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h | 1 |
16 files changed, 494 insertions, 287 deletions
diff --git a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c index 48fa49f69d6d..7e82c41a85f1 100644 --- a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c @@ -10,6 +10,48 @@ extern bool hang_debug; static void a2xx_dump(struct msm_gpu *gpu); static bool a2xx_idle(struct msm_gpu *gpu); +static void a2xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) +{ + struct msm_drm_private *priv = gpu->dev->dev_private; + struct msm_ringbuffer *ring = submit->ring; + unsigned int i; + + for (i = 0; i < submit->nr_cmds; i++) { + switch (submit->cmd[i].type) { + case MSM_SUBMIT_CMD_IB_TARGET_BUF: + /* ignore IB-targets */ + break; + case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: + /* ignore if there has not been a ctx switch: */ + if (priv->lastctx == submit->queue->ctx) + break; + fallthrough; + case MSM_SUBMIT_CMD_BUF: + OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2); + OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); + OUT_RING(ring, submit->cmd[i].size); + OUT_PKT2(ring); + break; + } + } + + OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1); + OUT_RING(ring, submit->seqno); + + /* wait for idle before cache flush/interrupt */ + OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT3(ring, CP_EVENT_WRITE, 3); + OUT_RING(ring, CACHE_FLUSH_TS); + OUT_RING(ring, rbmemptr(ring, fence)); + OUT_RING(ring, submit->seqno); + OUT_PKT3(ring, CP_INTERRUPT, 1); + OUT_RING(ring, 0x80000000); + + adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR); +} + static bool a2xx_me_init(struct msm_gpu *gpu) { struct msm_ringbuffer *ring = gpu->rb[0]; @@ -53,7 +95,7 @@ static bool a2xx_me_init(struct msm_gpu *gpu) OUT_PKT3(ring, CP_SET_PROTECTED_MODE, 1); OUT_RING(ring, 1); - gpu->funcs->flush(gpu, ring); + adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR); return a2xx_idle(gpu); } @@ -421,16 +463,11 @@ a2xx_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev) return aspace; } -/* Register offset defines for A2XX - copy of A3XX */ -static const unsigned int a2xx_register_offsets[REG_ADRENO_REGISTER_MAX] = { - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_AXXX_CP_RB_BASE), - REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_AXXX_CP_RB_RPTR_ADDR), - REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_AXXX_CP_RB_RPTR), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_AXXX_CP_RB_WPTR), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_AXXX_CP_RB_CNTL), -}; +static u32 a2xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +{ + ring->memptrs->rptr = gpu_read(gpu, REG_AXXX_CP_RB_RPTR); + return ring->memptrs->rptr; +} static const struct adreno_gpu_funcs funcs = { .base = { @@ -439,8 +476,7 @@ static const struct adreno_gpu_funcs funcs = { .pm_suspend = msm_gpu_pm_suspend, .pm_resume = msm_gpu_pm_resume, .recover = a2xx_recover, - .submit = adreno_submit, - .flush = adreno_flush, + .submit = a2xx_submit, .active_ring = adreno_active_ring, .irq = a2xx_irq, .destroy = a2xx_destroy, @@ -450,6 +486,7 @@ static const struct adreno_gpu_funcs funcs = { .gpu_state_get = a2xx_gpu_state_get, .gpu_state_put = adreno_gpu_state_put, .create_address_space = a2xx_create_address_space, + .get_rptr = a2xx_get_rptr, }, }; @@ -491,8 +528,6 @@ struct msm_gpu *a2xx_gpu_init(struct drm_device *dev) else adreno_gpu->registers = a220_registers; - adreno_gpu->reg_offsets = a2xx_register_offsets; - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); if (ret) goto fail; diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index f6471145a7a6..f29c77d9cd42 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -28,6 +28,61 @@ extern bool hang_debug; static void a3xx_dump(struct msm_gpu *gpu); static bool a3xx_idle(struct msm_gpu *gpu); +static void a3xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) +{ + struct msm_drm_private *priv = gpu->dev->dev_private; + struct msm_ringbuffer *ring = submit->ring; + unsigned int i; + + for (i = 0; i < submit->nr_cmds; i++) { + switch (submit->cmd[i].type) { + case MSM_SUBMIT_CMD_IB_TARGET_BUF: + /* ignore IB-targets */ + break; + case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: + /* ignore if there has not been a ctx switch: */ + if (priv->lastctx == submit->queue->ctx) + break; + fallthrough; + case MSM_SUBMIT_CMD_BUF: + OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2); + OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); + OUT_RING(ring, submit->cmd[i].size); + OUT_PKT2(ring); + break; + } + } + + OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1); + OUT_RING(ring, submit->seqno); + + /* Flush HLSQ lazy updates to make sure there is nothing + * pending for indirect loads after the timestamp has + * passed: + */ + OUT_PKT3(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, HLSQ_FLUSH); + + /* wait for idle before cache flush/interrupt */ + OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); + OUT_RING(ring, 0x00000000); + + /* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */ + OUT_PKT3(ring, CP_EVENT_WRITE, 3); + OUT_RING(ring, CACHE_FLUSH_TS | BIT(31)); + OUT_RING(ring, rbmemptr(ring, fence)); + OUT_RING(ring, submit->seqno); + +#if 0 + /* Dummy set-constant to trigger context rollover */ + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG)); + OUT_RING(ring, 0x00000000); +#endif + + adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR); +} + static bool a3xx_me_init(struct msm_gpu *gpu) { struct msm_ringbuffer *ring = gpu->rb[0]; @@ -51,7 +106,7 @@ static bool a3xx_me_init(struct msm_gpu *gpu) OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); - gpu->funcs->flush(gpu, ring); + adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR); return a3xx_idle(gpu); } @@ -423,16 +478,11 @@ static struct msm_gpu_state *a3xx_gpu_state_get(struct msm_gpu *gpu) return state; } -/* Register offset defines for A3XX */ -static const unsigned int a3xx_register_offsets[REG_ADRENO_REGISTER_MAX] = { - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_AXXX_CP_RB_BASE), - REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_AXXX_CP_RB_RPTR_ADDR), - REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_AXXX_CP_RB_RPTR), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_AXXX_CP_RB_WPTR), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_AXXX_CP_RB_CNTL), -}; +static u32 a3xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +{ + ring->memptrs->rptr = gpu_read(gpu, REG_AXXX_CP_RB_RPTR); + return ring->memptrs->rptr; +} static const struct adreno_gpu_funcs funcs = { .base = { @@ -441,8 +491,7 @@ static const struct adreno_gpu_funcs funcs = { .pm_suspend = msm_gpu_pm_suspend, .pm_resume = msm_gpu_pm_resume, .recover = a3xx_recover, - .submit = adreno_submit, - .flush = adreno_flush, + .submit = a3xx_submit, .active_ring = adreno_active_ring, .irq = a3xx_irq, .destroy = a3xx_destroy, @@ -452,6 +501,7 @@ static const struct adreno_gpu_funcs funcs = { .gpu_state_get = a3xx_gpu_state_get, .gpu_state_put = adreno_gpu_state_put, .create_address_space = adreno_iommu_create_address_space, + .get_rptr = a3xx_get_rptr, }, }; @@ -490,7 +540,6 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs); adreno_gpu->registers = a3xx_registers; - adreno_gpu->reg_offsets = a3xx_register_offsets; ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); if (ret) diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index 954753600625..2b93b33b05e4 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -22,6 +22,54 @@ extern bool hang_debug; static void a4xx_dump(struct msm_gpu *gpu); static bool a4xx_idle(struct msm_gpu *gpu); +static void a4xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) +{ + struct msm_drm_private *priv = gpu->dev->dev_private; + struct msm_ringbuffer *ring = submit->ring; + unsigned int i; + + for (i = 0; i < submit->nr_cmds; i++) { + switch (submit->cmd[i].type) { + case MSM_SUBMIT_CMD_IB_TARGET_BUF: + /* ignore IB-targets */ + break; + case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: + /* ignore if there has not been a ctx switch: */ + if (priv->lastctx == submit->queue->ctx) + break; + fallthrough; + case MSM_SUBMIT_CMD_BUF: + OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFE, 2); + OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); + OUT_RING(ring, submit->cmd[i].size); + OUT_PKT2(ring); + break; + } + } + + OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1); + OUT_RING(ring, submit->seqno); + + /* Flush HLSQ lazy updates to make sure there is nothing + * pending for indirect loads after the timestamp has + * passed: + */ + OUT_PKT3(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, HLSQ_FLUSH); + + /* wait for idle before cache flush/interrupt */ + OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); + OUT_RING(ring, 0x00000000); + + /* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */ + OUT_PKT3(ring, CP_EVENT_WRITE, 3); + OUT_RING(ring, CACHE_FLUSH_TS | BIT(31)); + OUT_RING(ring, rbmemptr(ring, fence)); + OUT_RING(ring, submit->seqno); + + adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR); +} + /* * a4xx_enable_hwcg() - Program the clock control registers * @device: The adreno device pointer @@ -129,7 +177,7 @@ static bool a4xx_me_init(struct msm_gpu *gpu) OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); - gpu->funcs->flush(gpu, ring); + adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR); return a4xx_idle(gpu); } @@ -515,17 +563,6 @@ static struct msm_gpu_state *a4xx_gpu_state_get(struct msm_gpu *gpu) return state; } -/* Register offset defines for A4XX, in order of enum adreno_regs */ -static const unsigned int a4xx_register_offsets[REG_ADRENO_REGISTER_MAX] = { - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A4XX_CP_RB_BASE), - REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A4XX_CP_RB_RPTR_ADDR), - REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A4XX_CP_RB_RPTR), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A4XX_CP_RB_WPTR), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A4XX_CP_RB_CNTL), -}; - static void a4xx_dump(struct msm_gpu *gpu) { printk("status: %08x\n", @@ -576,6 +613,12 @@ static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) return 0; } +static u32 a4xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +{ + ring->memptrs->rptr = gpu_read(gpu, REG_A4XX_CP_RB_RPTR); + return ring->memptrs->rptr; +} + static const struct adreno_gpu_funcs funcs = { .base = { .get_param = adreno_get_param, @@ -583,8 +626,7 @@ static const struct adreno_gpu_funcs funcs = { .pm_suspend = a4xx_pm_suspend, .pm_resume = a4xx_pm_resume, .recover = a4xx_recover, - .submit = adreno_submit, - .flush = adreno_flush, + .submit = a4xx_submit, .active_ring = adreno_active_ring, .irq = a4xx_irq, .destroy = a4xx_destroy, @@ -594,6 +636,7 @@ static const struct adreno_gpu_funcs funcs = { .gpu_state_get = a4xx_gpu_state_get, .gpu_state_put = adreno_gpu_state_put, .create_address_space = adreno_iommu_create_address_space, + .get_rptr = a4xx_get_rptr, }, .get_timestamp = a4xx_get_timestamp, }; @@ -631,15 +674,12 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev) adreno_gpu->registers = adreno_is_a405(adreno_gpu) ? a405_registers : a4xx_registers; - adreno_gpu->reg_offsets = a4xx_register_offsets; /* if needed, allocate gmem: */ - if (adreno_is_a4xx(adreno_gpu)) { - ret = adreno_gpu_ocmem_init(dev->dev, adreno_gpu, - &a4xx_gpu->ocmem); - if (ret) - goto fail; - } + ret = adreno_gpu_ocmem_init(dev->dev, adreno_gpu, + &a4xx_gpu->ocmem); + if (ret) + goto fail; if (!gpu->aspace) { /* TODO we think it is possible to configure the GPU to diff --git a/drivers/gpu/drm/msm/adreno/a5xx_debugfs.c b/drivers/gpu/drm/msm/adreno/a5xx_debugfs.c index 68eddac7771c..fc2c905b6c9e 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_debugfs.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_debugfs.c @@ -11,7 +11,7 @@ #include "a5xx_gpu.h" -static int pfp_print(struct msm_gpu *gpu, struct drm_printer *p) +static void pfp_print(struct msm_gpu *gpu, struct drm_printer *p) { int i; @@ -22,11 +22,9 @@ static int pfp_print(struct msm_gpu *gpu, struct drm_printer *p) drm_printf(p, " %02x: %08x\n", i, gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA)); } - - return 0; } -static int me_print(struct msm_gpu *gpu, struct drm_printer *p) +static void me_print(struct msm_gpu *gpu, struct drm_printer *p) { int i; @@ -37,11 +35,9 @@ static int me_print(struct msm_gpu *gpu, struct drm_printer *p) drm_printf(p, " %02x: %08x\n", i, gpu_read(gpu, REG_A5XX_CP_ME_STAT_DATA)); } - - return 0; } -static int meq_print(struct msm_gpu *gpu, struct drm_printer *p) +static void meq_print(struct msm_gpu *gpu, struct drm_printer *p) { int i; @@ -52,11 +48,9 @@ static int meq_print(struct msm_gpu *gpu, struct drm_printer *p) drm_printf(p, " %02x: %08x\n", i, gpu_read(gpu, REG_A5XX_CP_MEQ_DBG_DATA)); } - - return 0; } -static int roq_print(struct msm_gpu *gpu, struct drm_printer *p) +static void roq_print(struct msm_gpu *gpu, struct drm_printer *p) { int i; @@ -71,8 +65,6 @@ static int roq_print(struct msm_gpu *gpu, struct drm_printer *p) drm_printf(p, " %02x: %08x %08x %08x %08x\n", i, val[0], val[1], val[2], val[3]); } - - return 0; } static int show(struct seq_file *m, void *arg) @@ -81,10 +73,11 @@ static int show(struct seq_file *m, void *arg) struct drm_device *dev = node->minor->dev; struct msm_drm_private *priv = dev->dev_private; struct drm_printer p = drm_seq_file_printer(m); - int (*show)(struct msm_gpu *gpu, struct drm_printer *p) = + void (*show)(struct msm_gpu *gpu, struct drm_printer *p) = node->info_ent->data; - return show(priv->gpu, &p); + show(priv->gpu, &p); + return 0; } #define ENT(n) { .name = #n, .show = show, .data = n ##_print } diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 91726da82ed6..d6804a802355 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -18,13 +18,24 @@ static void a5xx_dump(struct msm_gpu *gpu); #define GPU_PAS_ID 13 -static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring, + bool sync) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); uint32_t wptr; unsigned long flags; + /* + * Most flush operations need to issue a WHERE_AM_I opcode to sync up + * the rptr shadow + */ + if (a5xx_gpu->has_whereami && sync) { + OUT_PKT7(ring, CP_WHERE_AM_I, 2); + OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring))); + OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring))); + } + spin_lock_irqsave(&ring->lock, flags); /* Copy the shadow to the actual register */ @@ -43,8 +54,7 @@ static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr); } -static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit, - struct msm_file_private *ctx) +static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit) { struct msm_drm_private *priv = gpu->dev->dev_private; struct msm_ringbuffer *ring = submit->ring; @@ -57,7 +67,7 @@ static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit case MSM_SUBMIT_CMD_IB_TARGET_BUF: break; case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: - if (priv->lastctx == ctx) + if (priv->lastctx == submit->queue->ctx) break; fallthrough; case MSM_SUBMIT_CMD_BUF: @@ -91,7 +101,7 @@ static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit } } - a5xx_flush(gpu, ring); + a5xx_flush(gpu, ring, true); a5xx_preempt_trigger(gpu); /* we might not necessarily have a cmd from userspace to @@ -103,8 +113,7 @@ static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit msm_gpu_retire(gpu); } -static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, - struct msm_file_private *ctx) +static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); @@ -114,7 +123,7 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) { priv->lastctx = NULL; - a5xx_submit_in_rb(gpu, submit, ctx); + a5xx_submit_in_rb(gpu, submit); return; } @@ -148,7 +157,7 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, case MSM_SUBMIT_CMD_IB_TARGET_BUF: break; case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: - if (priv->lastctx == ctx) + if (priv->lastctx == submit->queue->ctx) break; fallthrough; case MSM_SUBMIT_CMD_BUF: @@ -206,7 +215,8 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, /* Set bit 0 to trigger an interrupt on preempt complete */ OUT_RING(ring, 0x01); - a5xx_flush(gpu, ring); + /* A WHERE_AM_I packet is not needed after a YIELD */ + a5xx_flush(gpu, ring, false); /* Check to see if we need to start preemption */ a5xx_preempt_trigger(gpu); @@ -365,7 +375,7 @@ static int a5xx_me_init(struct msm_gpu *gpu) OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); - gpu->funcs->flush(gpu, ring); + a5xx_flush(gpu, ring, true); return a5xx_idle(gpu, ring) ? 0 : -EINVAL; } @@ -407,11 +417,31 @@ static int a5xx_preempt_start(struct msm_gpu *gpu) OUT_RING(ring, 0x01); OUT_RING(ring, 0x01); - gpu->funcs->flush(gpu, ring); + /* The WHERE_AMI_I packet is not needed after a YIELD is issued */ + a5xx_flush(gpu, ring, false); return a5xx_idle(gpu, ring) ? 0 : -EINVAL; } +static void a5xx_ucode_check_version(struct a5xx_gpu *a5xx_gpu, + struct drm_gem_object *obj) +{ + u32 *buf = msm_gem_get_vaddr_active(obj); + + if (IS_ERR(buf)) + return; + + /* + * If the lowest nibble is 0xa that is an indication that this microcode + * has been patched. The actual version is in dword [3] but we only care + * about the patchlevel which is the lowest nibble of dword [3] + */ + if (((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1) + a5xx_gpu->has_whereami = true; + + msm_gem_put_vaddr(obj); +} + static int a5xx_ucode_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); @@ -447,6 +477,7 @@ static int a5xx_ucode_init(struct msm_gpu *gpu) } msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw"); + a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo); } gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO, @@ -506,6 +537,7 @@ static int a5xx_zap_shader_init(struct msm_gpu *gpu) static int a5xx_hw_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); int ret; gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003); @@ -714,9 +746,36 @@ static int a5xx_hw_init(struct msm_gpu *gpu) gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI, gpu->rb[0]->iova); + /* + * If the microcode supports the WHERE_AM_I opcode then we can use that + * in lieu of the RPTR shadow and enable preemption. Otherwise, we + * can't safely use the RPTR shadow or preemption. In either case, the + * RPTR shadow should be disabled in hardware. + */ gpu_write(gpu, REG_A5XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE); + /* Disable preemption if WHERE_AM_I isn't available */ + if (!a5xx_gpu->has_whereami && gpu->nr_rings > 1) { + a5xx_preempt_fini(gpu); + gpu->nr_rings = 1; + } else { + /* Create a privileged buffer for the RPTR shadow */ + if (!a5xx_gpu->shadow_bo) { + a5xx_gpu->shadow = msm_gem_kernel_new(gpu->dev, + sizeof(u32) * gpu->nr_rings, + MSM_BO_UNCACHED | MSM_BO_MAP_PRIV, + gpu->aspace, &a5xx_gpu->shadow_bo, + &a5xx_gpu->shadow_iova); + + if (IS_ERR(a5xx_gpu->shadow)) + return PTR_ERR(a5xx_gpu->shadow); + } + + gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR, + REG_A5XX_CP_RB_RPTR_ADDR_HI, shadowptr(a5xx_gpu, gpu->rb[0])); + } + a5xx_preempt_hw_init(gpu); /* Disable the interrupts through the initial bringup stage */ @@ -740,7 +799,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu) OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1); OUT_RING(gpu->rb[0], CP_EVENT_WRITE_0_EVENT(STAT_EVENT)); - gpu->funcs->flush(gpu, gpu->rb[0]); + a5xx_flush(gpu, gpu->rb[0], true); if (!a5xx_idle(gpu, gpu->rb[0])) return -EINVAL; } @@ -758,7 +817,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu) OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1); OUT_RING(gpu->rb[0], 0x00000000); - gpu->funcs->flush(gpu, gpu->rb[0]); + a5xx_flush(gpu, gpu->rb[0], true); if (!a5xx_idle(gpu, gpu->rb[0])) return -EINVAL; } else if (ret == -ENODEV) { @@ -825,6 +884,11 @@ static void a5xx_destroy(struct msm_gpu *gpu) drm_gem_object_put(a5xx_gpu->gpmu_bo); } + if (a5xx_gpu->shadow_bo) { + msm_gem_unpin_iova(a5xx_gpu->shadow_bo, gpu->aspace); + drm_gem_object_put(a5xx_gpu->shadow_bo); + } + adreno_gpu_cleanup(adreno_gpu); kfree(a5xx_gpu); } @@ -1057,17 +1121,6 @@ static irqreturn_t a5xx_irq(struct msm_gpu *gpu) return IRQ_HANDLED; } -static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = { - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI, - REG_A5XX_CP_RB_RPTR_ADDR_HI), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL), -}; - static const u32 a5xx_registers[] = { 0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B, 0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095, @@ -1432,6 +1485,17 @@ static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu) return (unsigned long)busy_time; } +static uint32_t a5xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + + if (a5xx_gpu->has_whereami) + return a5xx_gpu->shadow[ring->id]; + + return ring->memptrs->rptr = gpu_read(gpu, REG_A5XX_CP_RB_RPTR); +} + static const struct adreno_gpu_funcs funcs = { .base = { .get_param = adreno_get_param, @@ -1440,7 +1504,6 @@ static const struct adreno_gpu_funcs funcs = { .pm_resume = a5xx_pm_resume, .recover = a5xx_recover, .submit = a5xx_submit, - .flush = a5xx_flush, .active_ring = a5xx_active_ring, .irq = a5xx_irq, .destroy = a5xx_destroy, @@ -1454,6 +1517,7 @@ static const struct adreno_gpu_funcs funcs = { .gpu_state_get = a5xx_gpu_state_get, .gpu_state_put = a5xx_gpu_state_put, .create_address_space = adreno_iommu_create_address_space, + .get_rptr = a5xx_get_rptr, }, .get_timestamp = a5xx_get_timestamp, }; @@ -1512,14 +1576,12 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) gpu = &adreno_gpu->base; adreno_gpu->registers = a5xx_registers; - adreno_gpu->reg_offsets = a5xx_register_offsets; a5xx_gpu->lm_leakage = 0x4E001A; check_speed_bin(&pdev->dev); - /* Restricting nr_rings to 1 to temporarily disable preemption */ - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4); if (ret) { a5xx_destroy(&(a5xx_gpu->base.base)); return ERR_PTR(ret); diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h index 1e5b1a15a70f..c7187bcc5e90 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h @@ -37,6 +37,13 @@ struct a5xx_gpu { atomic_t preempt_state; struct timer_list preempt_timer; + + struct drm_gem_object *shadow_bo; + uint64_t shadow_iova; + uint32_t *shadow; + + /* True if the microcode supports the WHERE_AM_I opcode */ + bool has_whereami; }; #define to_a5xx_gpu(x) container_of(x, struct a5xx_gpu, base) @@ -141,6 +148,9 @@ static inline int spin_usecs(struct msm_gpu *gpu, uint32_t usecs, return -ETIMEDOUT; } +#define shadowptr(a5xx_gpu, ring) ((a5xx_gpu)->shadow_iova + \ + ((ring)->id * sizeof(uint32_t))) + bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring); void a5xx_set_hwcg(struct msm_gpu *gpu, bool state); @@ -150,6 +160,8 @@ void a5xx_preempt_trigger(struct msm_gpu *gpu); void a5xx_preempt_irq(struct msm_gpu *gpu); void a5xx_preempt_fini(struct msm_gpu *gpu); +void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring, bool sync); + /* Return true if we are in a preempt state */ static inline bool a5xx_in_preempt(struct a5xx_gpu *a5xx_gpu) { diff --git a/drivers/gpu/drm/msm/adreno/a5xx_power.c b/drivers/gpu/drm/msm/adreno/a5xx_power.c index 321a8061fd32..f176a6f3eff6 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_power.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_power.c @@ -240,7 +240,7 @@ static int a5xx_gpmu_init(struct msm_gpu *gpu) OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); OUT_RING(ring, 1); - gpu->funcs->flush(gpu, ring); + a5xx_flush(gpu, ring, true); if (!a5xx_idle(gpu, ring)) { DRM_ERROR("%s: Unable to load GPMU firmware. GPMU will not be active\n", diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c index 9f3fe177b00e..7e04509c4e1f 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c @@ -259,8 +259,9 @@ static int preempt_init_ring(struct a5xx_gpu *a5xx_gpu, ptr->magic = A5XX_PREEMPT_RECORD_MAGIC; ptr->info = 0; ptr->data = 0; - ptr->cntl = MSM_GPU_RB_CNTL_DEFAULT; - ptr->rptr_addr = rbmemptr(ring, rptr); + ptr->cntl = MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE; + + ptr->rptr_addr = shadowptr(a5xx_gpu, ring); ptr->counter = counters_iova; return 0; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index e1c7bcd1b1eb..491fee410daf 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -11,6 +11,7 @@ #include "a6xx_gpu.h" #include "a6xx_gmu.xml.h" #include "msm_gem.h" +#include "msm_gpu_trace.h" #include "msm_mmu.h" static void a6xx_gmu_fault(struct a6xx_gmu *gmu) @@ -124,6 +125,8 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp) gmu->current_perf_index = perf_index; gmu->freq = gmu->gpu_freqs[perf_index]; + trace_msm_gmu_freq_change(gmu->freq, perf_index); + /* * This can get called from devfreq while the hardware is idle. Don't * bring up the power if it isn't already active diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 66a95e22b7b3..948f3656c20c 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -51,9 +51,20 @@ bool a6xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); uint32_t wptr; unsigned long flags; + /* Expanded APRIV doesn't need to issue the WHERE_AM_I opcode */ + if (a6xx_gpu->has_whereami && !adreno_gpu->base.hw_apriv) { + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + + OUT_PKT7(ring, CP_WHERE_AM_I, 2); + OUT_RING(ring, lower_32_bits(shadowptr(a6xx_gpu, ring))); + OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring))); + } + spin_lock_irqsave(&ring->lock, flags); /* Copy the shadow to the actual register */ @@ -81,8 +92,50 @@ static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter, OUT_RING(ring, upper_32_bits(iova)); } -static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, - struct msm_file_private *ctx) +static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, + struct msm_ringbuffer *ring, struct msm_file_private *ctx) +{ + phys_addr_t ttbr; + u32 asid; + u64 memptr = rbmemptr(ring, ttbr0); + + if (ctx == a6xx_gpu->cur_ctx) + return; + + if (msm_iommu_pagetable_params(ctx->aspace->mmu, &ttbr, &asid)) + return; + + /* Execute the table update */ + OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4); + OUT_RING(ring, CP_SMMU_TABLE_UPDATE_0_TTBR0_LO(lower_32_bits(ttbr))); + + OUT_RING(ring, + CP_SMMU_TABLE_UPDATE_1_TTBR0_HI(upper_32_bits(ttbr)) | + CP_SMMU_TABLE_UPDATE_1_ASID(asid)); + OUT_RING(ring, CP_SMMU_TABLE_UPDATE_2_CONTEXTIDR(0)); + OUT_RING(ring, CP_SMMU_TABLE_UPDATE_3_CONTEXTBANK(0)); + + /* + * Write the new TTBR0 to the memstore. This is good for debugging. + */ + OUT_PKT7(ring, CP_MEM_WRITE, 4); + OUT_RING(ring, CP_MEM_WRITE_0_ADDR_LO(lower_32_bits(memptr))); + OUT_RING(ring, CP_MEM_WRITE_1_ADDR_HI(upper_32_bits(memptr))); + OUT_RING(ring, lower_32_bits(ttbr)); + OUT_RING(ring, (asid << 16) | upper_32_bits(ttbr)); + + /* + * And finally, trigger a uche flush to be sure there isn't anything + * lingering in that part of the GPU + */ + + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, 0x31); + + a6xx_gpu->cur_ctx = ctx; +} + +static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) { unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT; struct msm_drm_private *priv = gpu->dev->dev_private; @@ -91,6 +144,8 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_ringbuffer *ring = submit->ring; unsigned int i; + a6xx_set_pagetable(a6xx_gpu, ring, submit->queue->ctx); + get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP_0_LO, rbmemptr_stats(ring, index, cpcycles_start)); @@ -115,7 +170,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, case MSM_SUBMIT_CMD_IB_TARGET_BUF: break; case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: - if (priv->lastctx == ctx) + if (priv->lastctx == submit->queue->ctx) break; fallthrough; case MSM_SUBMIT_CMD_BUF: @@ -464,6 +519,30 @@ static int a6xx_cp_init(struct msm_gpu *gpu) return a6xx_idle(gpu, ring) ? 0 : -EINVAL; } +static void a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu, + struct drm_gem_object *obj) +{ + u32 *buf = msm_gem_get_vaddr_active(obj); + + if (IS_ERR(buf)) + return; + + /* + * If the lowest nibble is 0xa that is an indication that this microcode + * has been patched. The actual version is in dword [3] but we only care + * about the patchlevel which is the lowest nibble of dword [3] + * + * Otherwise check that the firmware is greater than or equal to 1.90 + * which was the first version that had this fix built in + */ + if (((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1) + a6xx_gpu->has_whereami = true; + else if ((buf[0] & 0xfff) > 0x190) + a6xx_gpu->has_whereami = true; + + msm_gem_put_vaddr(obj); +} + static int a6xx_ucode_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); @@ -484,6 +563,7 @@ static int a6xx_ucode_init(struct msm_gpu *gpu) } msm_gem_object_set_name(a6xx_gpu->sqe_bo, "sqefw"); + a6xx_ucode_check_version(a6xx_gpu, a6xx_gpu->sqe_bo); } gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE_LO, @@ -699,12 +779,43 @@ static int a6xx_hw_init(struct msm_gpu *gpu) gpu_write64(gpu, REG_A6XX_CP_RB_BASE, REG_A6XX_CP_RB_BASE_HI, gpu->rb[0]->iova); - gpu_write(gpu, REG_A6XX_CP_RB_CNTL, - MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE); + /* Targets that support extended APRIV can use the RPTR shadow from + * hardware but all the other ones need to disable the feature. Targets + * that support the WHERE_AM_I opcode can use that instead + */ + if (adreno_gpu->base.hw_apriv) + gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT); + else + gpu_write(gpu, REG_A6XX_CP_RB_CNTL, + MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE); + + /* + * Expanded APRIV and targets that support WHERE_AM_I both need a + * privileged buffer to store the RPTR shadow + */ + + if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) { + if (!a6xx_gpu->shadow_bo) { + a6xx_gpu->shadow = msm_gem_kernel_new_locked(gpu->dev, + sizeof(u32) * gpu->nr_rings, + MSM_BO_UNCACHED | MSM_BO_MAP_PRIV, + gpu->aspace, &a6xx_gpu->shadow_bo, + &a6xx_gpu->shadow_iova); + + if (IS_ERR(a6xx_gpu->shadow)) + return PTR_ERR(a6xx_gpu->shadow); + } + + gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR_LO, + REG_A6XX_CP_RB_RPTR_ADDR_HI, + shadowptr(a6xx_gpu, gpu->rb[0])); + } /* Always come up on rb 0 */ a6xx_gpu->cur_ring = gpu->rb[0]; + a6xx_gpu->cur_ctx = NULL; + /* Enable the SQE_to start the CP engine */ gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1); @@ -911,18 +1022,6 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu) return IRQ_HANDLED; } -static const u32 a6xx_register_offsets[REG_ADRENO_REGISTER_MAX] = { - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A6XX_CP_RB_BASE), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A6XX_CP_RB_BASE_HI), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, - REG_A6XX_CP_RB_RPTR_ADDR_LO), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI, - REG_A6XX_CP_RB_RPTR_ADDR_HI), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A6XX_CP_RB_RPTR), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A6XX_CP_RB_WPTR), - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A6XX_CP_RB_CNTL), -}; - static int a6xx_pm_resume(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); @@ -931,6 +1030,8 @@ static int a6xx_pm_resume(struct msm_gpu *gpu) gpu->needs_hw_init = true; + trace_msm_gpu_resume(0); + ret = a6xx_gmu_resume(a6xx_gpu); if (ret) return ret; @@ -945,6 +1046,8 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu) struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + trace_msm_gpu_suspend(0); + devfreq_suspend_device(gpu->devfreq.devfreq); return a6xx_gmu_stop(a6xx_gpu); @@ -983,6 +1086,11 @@ static void a6xx_destroy(struct msm_gpu *gpu) drm_gem_object_put(a6xx_gpu->sqe_bo); } + if (a6xx_gpu->shadow_bo) { + msm_gem_unpin_iova(a6xx_gpu->shadow_bo, gpu->aspace); + drm_gem_object_put(a6xx_gpu->shadow_bo); + } + a6xx_gmu_remove(a6xx_gpu); adreno_gpu_cleanup(adreno_gpu); @@ -1017,6 +1125,31 @@ static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu) return (unsigned long)busy_time; } +static struct msm_gem_address_space * +a6xx_create_private_address_space(struct msm_gpu *gpu) +{ + struct msm_mmu *mmu; + + mmu = msm_iommu_pagetable_create(gpu->aspace->mmu); + + if (IS_ERR(mmu)) + return ERR_CAST(mmu); + + return msm_gem_address_space_create(mmu, + "gpu", 0x100000000ULL, 0x1ffffffffULL); +} + +static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + + if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) + return a6xx_gpu->shadow[ring->id]; + + return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR); +} + static const struct adreno_gpu_funcs funcs = { .base = { .get_param = adreno_get_param, @@ -1025,7 +1158,6 @@ static const struct adreno_gpu_funcs funcs = { .pm_resume = a6xx_pm_resume, .recover = a6xx_recover, .submit = a6xx_submit, - .flush = a6xx_flush, .active_ring = a6xx_active_ring, .irq = a6xx_irq, .destroy = a6xx_destroy, @@ -1040,6 +1172,8 @@ static const struct adreno_gpu_funcs funcs = { .gpu_state_put = a6xx_gpu_state_put, #endif .create_address_space = adreno_iommu_create_address_space, + .create_private_address_space = a6xx_create_private_address_space, + .get_rptr = a6xx_get_rptr, }, .get_timestamp = a6xx_get_timestamp, }; @@ -1048,6 +1182,8 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) { struct msm_drm_private *priv = dev->dev_private; struct platform_device *pdev = priv->gpu_pdev; + struct adreno_platform_config *config = pdev->dev.platform_data; + const struct adreno_info *info; struct device_node *node; struct a6xx_gpu *a6xx_gpu; struct adreno_gpu *adreno_gpu; @@ -1062,9 +1198,15 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) gpu = &adreno_gpu->base; adreno_gpu->registers = NULL; - adreno_gpu->reg_offsets = a6xx_register_offsets; - if (adreno_is_a650(adreno_gpu)) + /* + * We need to know the platform type before calling into adreno_gpu_init + * so that the hw_apriv flag can be correctly set. Snoop into the info + * and grab the revision number + */ + info = adreno_info(config->rev); + + if (info && info->revn == 650) adreno_gpu->base.hw_apriv = true; ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h index 03ba60d5b07f..3eeebf6a754b 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h @@ -19,8 +19,15 @@ struct a6xx_gpu { uint64_t sqe_iova; struct msm_ringbuffer *cur_ring; + struct msm_file_private *cur_ctx; struct a6xx_gmu gmu; + + struct drm_gem_object *shadow_bo; + uint64_t shadow_iova; + uint32_t *shadow; + + bool has_whereami; }; #define to_a6xx_gpu(x) container_of(x, struct a6xx_gpu, base) @@ -50,6 +57,9 @@ static inline bool a6xx_has_gbif(struct adreno_gpu *gpu) return true; } +#define shadowptr(_a6xx_gpu, _ring) ((_a6xx_gpu)->shadow_iova + \ + ((_ring)->id * sizeof(uint32_t))) + int a6xx_gmu_resume(struct a6xx_gpu *gpu); int a6xx_gmu_stop(struct a6xx_gpu *gpu); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index b12f5b4a1bea..e9ede19193b0 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -875,7 +875,7 @@ static void a6xx_get_indexed_registers(struct msm_gpu *gpu, int i; a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count, - sizeof(a6xx_state->indexed_regs)); + sizeof(*a6xx_state->indexed_regs)); if (!a6xx_state->indexed_regs) return; diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 9eeb46bf2a5d..58e03b20e1c7 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -282,7 +282,7 @@ struct msm_gpu *adreno_load_gpu(struct drm_device *dev) int ret; if (pdev) - gpu = platform_get_drvdata(pdev); + gpu = dev_to_gpu(&pdev->dev); if (!gpu) { dev_err_once(dev->dev, "no GPU device was found\n"); @@ -417,15 +417,13 @@ static int adreno_bind(struct device *dev, struct device *master, void *data) return PTR_ERR(gpu); } - dev_set_drvdata(dev, gpu); - return 0; } static void adreno_unbind(struct device *dev, struct device *master, void *data) { - struct msm_gpu *gpu = dev_get_drvdata(dev); + struct msm_gpu *gpu = dev_to_gpu(dev); pm_runtime_force_suspend(dev); gpu->funcs->destroy(gpu); @@ -490,16 +488,14 @@ static const struct of_device_id dt_match[] = { #ifdef CONFIG_PM static int adreno_resume(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct msm_gpu *gpu = platform_get_drvdata(pdev); + struct msm_gpu *gpu = dev_to_gpu(dev); return gpu->funcs->pm_resume(gpu); } static int adreno_suspend(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct msm_gpu *gpu = platform_get_drvdata(pdev); + struct msm_gpu *gpu = dev_to_gpu(dev); return gpu->funcs->pm_suspend(gpu); } diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 862dd35b27d3..458b5b26d3c2 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -189,12 +189,27 @@ struct msm_gem_address_space * adreno_iommu_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev) { - struct iommu_domain *iommu = iommu_domain_alloc(&platform_bus_type); - struct msm_mmu *mmu = msm_iommu_new(&pdev->dev, iommu); + struct iommu_domain *iommu; + struct msm_mmu *mmu; struct msm_gem_address_space *aspace; + u64 start, size; - aspace = msm_gem_address_space_create(mmu, "gpu", SZ_16M, - 0xffffffff - SZ_16M); + iommu = iommu_domain_alloc(&platform_bus_type); + if (!iommu) + return NULL; + + mmu = msm_iommu_new(&pdev->dev, iommu); + + /* + * Use the aperture start or SZ_16M, whichever is greater. This will + * ensure that we align with the allocated pagetable range while still + * allowing room in the lower 32 bits for GMEM and whatnot + */ + start = max_t(u64, SZ_16M, iommu->geometry.aperture_start); + size = iommu->geometry.aperture_end - start + 1; + + aspace = msm_gem_address_space_create(mmu, "gpu", + start & GENMASK_ULL(48, 0), size); if (IS_ERR(aspace) && !IS_ERR(mmu)) mmu->funcs->destroy(mmu); @@ -407,8 +422,9 @@ int adreno_hw_init(struct msm_gpu *gpu) static uint32_t get_rptr(struct adreno_gpu *adreno_gpu, struct msm_ringbuffer *ring) { - return ring->memptrs->rptr = adreno_gpu_read( - adreno_gpu, REG_ADRENO_CP_RB_RPTR); + struct msm_gpu *gpu = &adreno_gpu->base; + + return gpu->funcs->get_rptr(gpu, ring); } struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu) @@ -434,81 +450,8 @@ void adreno_recover(struct msm_gpu *gpu) } } -void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, - struct msm_file_private *ctx) -{ - struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - struct msm_drm_private *priv = gpu->dev->dev_private; - struct msm_ringbuffer *ring = submit->ring; - unsigned i; - - for (i = 0; i < submit->nr_cmds; i++) { - switch (submit->cmd[i].type) { - case MSM_SUBMIT_CMD_IB_TARGET_BUF: - /* ignore IB-targets */ - break; - case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: - /* ignore if there has not been a ctx switch: */ - if (priv->lastctx == ctx) - break; - fallthrough; - case MSM_SUBMIT_CMD_BUF: - OUT_PKT3(ring, adreno_is_a4xx(adreno_gpu) ? - CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2); - OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); - OUT_RING(ring, submit->cmd[i].size); - OUT_PKT2(ring); - break; - } - } - - OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1); - OUT_RING(ring, submit->seqno); - - if (adreno_is_a3xx(adreno_gpu) || adreno_is_a4xx(adreno_gpu)) { - /* Flush HLSQ lazy updates to make sure there is nothing - * pending for indirect loads after the timestamp has - * passed: - */ - OUT_PKT3(ring, CP_EVENT_WRITE, 1); - OUT_RING(ring, HLSQ_FLUSH); - } - - /* wait for idle before cache flush/interrupt */ - OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); - OUT_RING(ring, 0x00000000); - - if (!adreno_is_a2xx(adreno_gpu)) { - /* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */ - OUT_PKT3(ring, CP_EVENT_WRITE, 3); - OUT_RING(ring, CACHE_FLUSH_TS | BIT(31)); - OUT_RING(ring, rbmemptr(ring, fence)); - OUT_RING(ring, submit->seqno); - } else { - /* BIT(31) means something else on a2xx */ - OUT_PKT3(ring, CP_EVENT_WRITE, 3); - OUT_RING(ring, CACHE_FLUSH_TS); - OUT_RING(ring, rbmemptr(ring, fence)); - OUT_RING(ring, submit->seqno); - OUT_PKT3(ring, CP_INTERRUPT, 1); - OUT_RING(ring, 0x80000000); - } - -#if 0 - if (adreno_is_a3xx(adreno_gpu)) { - /* Dummy set-constant to trigger context rollover */ - OUT_PKT3(ring, CP_SET_CONSTANT, 2); - OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG)); - OUT_RING(ring, 0x00000000); - } -#endif - - gpu->funcs->flush(gpu, ring); -} - -void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring, u32 reg) { - struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); uint32_t wptr; /* Copy the shadow to the actual register */ @@ -524,7 +467,7 @@ void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) /* ensure writes to ringbuffer have hit system memory: */ mb(); - adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_WPTR, wptr); + gpu_write(gpu, reg, wptr); } bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index e55abae365b5..c3775f79525a 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -17,29 +17,8 @@ #include "adreno_common.xml.h" #include "adreno_pm4.xml.h" -#define REG_ADRENO_DEFINE(_offset, _reg) [_offset] = (_reg) + 1 -#define REG_SKIP ~0 -#define REG_ADRENO_SKIP(_offset) [_offset] = REG_SKIP - extern bool snapshot_debugbus; -/** - * adreno_regs: List of registers that are used in across all - * 3D devices. Each device type has different offset value for the same - * register, so an array of register offsets are declared for every device - * and are indexed by the enumeration values defined in this enum - */ -enum adreno_regs { - REG_ADRENO_CP_RB_BASE, - REG_ADRENO_CP_RB_BASE_HI, - REG_ADRENO_CP_RB_RPTR_ADDR, - REG_ADRENO_CP_RB_RPTR_ADDR_HI, - REG_ADRENO_CP_RB_RPTR, - REG_ADRENO_CP_RB_WPTR, - REG_ADRENO_CP_RB_CNTL, - REG_ADRENO_REGISTER_MAX, -}; - enum { ADRENO_FW_PM4 = 0, ADRENO_FW_SQE = 0, /* a6xx */ @@ -176,11 +155,6 @@ static inline bool adreno_is_a225(struct adreno_gpu *gpu) return gpu->revn == 225; } -static inline bool adreno_is_a3xx(struct adreno_gpu *gpu) -{ - return (gpu->revn >= 300) && (gpu->revn < 400); -} - static inline bool adreno_is_a305(struct adreno_gpu *gpu) { return gpu->revn == 305; @@ -207,11 +181,6 @@ static inline bool adreno_is_a330v2(struct adreno_gpu *gpu) return adreno_is_a330(gpu) && (gpu->rev.patchid > 0); } -static inline bool adreno_is_a4xx(struct adreno_gpu *gpu) -{ - return (gpu->revn >= 400) && (gpu->revn < 500); -} - static inline int adreno_is_a405(struct adreno_gpu *gpu) { return gpu->revn == 405; @@ -269,9 +238,7 @@ struct drm_gem_object *adreno_fw_create_bo(struct msm_gpu *gpu, const struct firmware *fw, u64 *iova); int adreno_hw_init(struct msm_gpu *gpu); void adreno_recover(struct msm_gpu *gpu); -void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, - struct msm_file_private *ctx); -void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring); +void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring, u32 reg); bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring); #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state, @@ -365,59 +332,12 @@ OUT_PKT7(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt) ((opcode & 0x7F) << 16) | (PM4_PARITY(opcode) << 23)); } -/* - * adreno_reg_check() - Checks the validity of a register enum - * @gpu: Pointer to struct adreno_gpu - * @offset_name: The register enum that is checked - */ -static inline bool adreno_reg_check(struct adreno_gpu *gpu, - enum adreno_regs offset_name) -{ - BUG_ON(offset_name >= REG_ADRENO_REGISTER_MAX || !gpu->reg_offsets[offset_name]); - - /* - * REG_SKIP is a special value that tell us that the register in - * question isn't implemented on target but don't trigger a BUG(). This - * is used to cleanly implement adreno_gpu_write64() and - * adreno_gpu_read64() in a generic fashion - */ - if (gpu->reg_offsets[offset_name] == REG_SKIP) - return false; - - return true; -} - -static inline u32 adreno_gpu_read(struct adreno_gpu *gpu, - enum adreno_regs offset_name) -{ - u32 reg = gpu->reg_offsets[offset_name]; - u32 val = 0; - if(adreno_reg_check(gpu,offset_name)) - val = gpu_read(&gpu->base, reg - 1); - return val; -} - -static inline void adreno_gpu_write(struct adreno_gpu *gpu, - enum adreno_regs offset_name, u32 data) -{ - u32 reg = gpu->reg_offsets[offset_name]; - if(adreno_reg_check(gpu, offset_name)) - gpu_write(&gpu->base, reg - 1, data); -} - struct msm_gpu *a2xx_gpu_init(struct drm_device *dev); struct msm_gpu *a3xx_gpu_init(struct drm_device *dev); struct msm_gpu *a4xx_gpu_init(struct drm_device *dev); struct msm_gpu *a5xx_gpu_init(struct drm_device *dev); struct msm_gpu *a6xx_gpu_init(struct drm_device *dev); -static inline void adreno_gpu_write64(struct adreno_gpu *gpu, - enum adreno_regs lo, enum adreno_regs hi, u64 data) -{ - adreno_gpu_write(gpu, lo, lower_32_bits(data)); - adreno_gpu_write(gpu, hi, upper_32_bits(data)); -} - static inline uint32_t get_wptr(struct msm_ringbuffer *ring) { return (ring->cur - ring->start) % (MSM_GPU_RINGBUFFER_SZ >> 2); diff --git a/drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h b/drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h index 3931eecadaff..59bb8c1ffce6 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h +++ b/drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h @@ -298,6 +298,7 @@ enum adreno_pm4_type3_packets { CP_SET_BIN_DATA5_OFFSET = 46, CP_SET_CTXSWITCH_IB = 85, CP_REG_WRITE = 109, + CP_WHERE_AM_I = 98, }; enum adreno_state_block { |