diff options
author | Dave Airlie <airlied@redhat.com> | 2022-11-30 10:19:17 +0300 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2022-11-30 10:19:18 +0300 |
commit | 077bd80083abb4d67af8c2d30ea7f7eb2dee1f0d (patch) | |
tree | 106e9f4af37fed0a12c25f8c0ed3be3515da355a /drivers/gpu/drm/msm | |
parent | 92e11ddb2c3d931241ddc7b3d82f0fe34ef757a7 (diff) | |
parent | d73b1d02de0858b96f743e1e8b767fb092ae4c1b (diff) | |
download | linux-077bd80083abb4d67af8c2d30ea7f7eb2dee1f0d.tar.xz |
Merge tag 'drm-msm-next-2022-11-28' of https://gitlab.freedesktop.org/drm/msm into drm-next
msm-next for v6.2 (the gpu/gem bits)
- Remove exclusive-fence hack that caused over-synchronization
- Fix speed-bin detection vs. probe-defer
- Enable clamp_to_idle on 7c3
- Improved hangcheck detection
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Rob Clark <robdclark@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/CAF6AEGvT1h_S4d=YRgphgR8i7aMaxQaNW8mru7QaoUo9uiUk2A@mail.gmail.com
Diffstat (limited to 'drivers/gpu/drm/msm')
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 27 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 84 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_drv.c | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_drv.h | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_gem_shrinker.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_gem_submit.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_gpu.c | 31 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_gpu.h | 22 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_ringbuffer.h | 28 |
12 files changed, 150 insertions, 66 deletions
diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index 2fb32d5552c4..3e09d3a7a0ac 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -606,8 +606,7 @@ static int a4xx_pm_suspend(struct msm_gpu *gpu) { static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) { - *value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO, - REG_A4XX_RBBM_PERFCTR_CP_0_HI); + *value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO); return 0; } diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 3c537c0016fa..660ba0db8900 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -605,11 +605,9 @@ static int a5xx_ucode_init(struct msm_gpu *gpu) a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo); } - gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO, - REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova); + gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO, a5xx_gpu->pm4_iova); - gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO, - REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova); + gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO, a5xx_gpu->pfp_iova); return 0; } @@ -868,8 +866,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu) * memory rendering at this point in time and we don't want to block off * part of the virtual memory space. */ - gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, - REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000); + gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 0x00000000); gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); /* Put the GPU into 64 bit by default */ @@ -908,8 +905,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu) return ret; /* Set the ringbuffer address */ - gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI, - gpu->rb[0]->iova); + gpu_write64(gpu, REG_A5XX_CP_RB_BASE, gpu->rb[0]->iova); /* * If the microcode supports the WHERE_AM_I opcode then we can use that @@ -936,7 +932,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu) } gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR, - REG_A5XX_CP_RB_RPTR_ADDR_HI, shadowptr(a5xx_gpu, gpu->rb[0])); + shadowptr(a5xx_gpu, gpu->rb[0])); } else if (gpu->nr_rings > 1) { /* Disable preemption if WHERE_AM_I isn't available */ a5xx_preempt_fini(gpu); @@ -1239,9 +1235,9 @@ static void a5xx_fault_detect_irq(struct msm_gpu *gpu) gpu_read(gpu, REG_A5XX_RBBM_STATUS), gpu_read(gpu, REG_A5XX_CP_RB_RPTR), gpu_read(gpu, REG_A5XX_CP_RB_WPTR), - gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI), + gpu_read64(gpu, REG_A5XX_CP_IB1_BASE), gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ), - gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI), + gpu_read64(gpu, REG_A5XX_CP_IB2_BASE), gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ)); /* Turn off the hangcheck timer to keep it from bothering us */ @@ -1427,8 +1423,7 @@ static int a5xx_pm_suspend(struct msm_gpu *gpu) static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) { - *value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO, - REG_A5XX_RBBM_ALWAYSON_COUNTER_HI); + *value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO); return 0; } @@ -1465,8 +1460,7 @@ static int a5xx_crashdumper_run(struct msm_gpu *gpu, if (IS_ERR_OR_NULL(dumper->ptr)) return -EINVAL; - gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO, - REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova); + gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO, dumper->iova); gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1); @@ -1666,8 +1660,7 @@ static u64 a5xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate) { u64 busy_cycles; - busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO, - REG_A5XX_RBBM_PERFCTR_RBBM_0_HI); + busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO); *out_sample_rate = clk_get_rate(gpu->core_clk); return busy_cycles; diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c index 8abc9a2b114a..7658e89844b4 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c @@ -137,7 +137,6 @@ void a5xx_preempt_trigger(struct msm_gpu *gpu) /* Set the address of the incoming preemption record */ gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO, - REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI, a5xx_gpu->preempt_iova[ring->id]); a5xx_gpu->next_ring = ring; @@ -211,8 +210,7 @@ void a5xx_preempt_hw_init(struct msm_gpu *gpu) } /* Write a 0 to signal that we aren't switching pagetables */ - gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO, - REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI, 0); + gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO, 0); /* Reset the preemption state */ set_preempt_state(a5xx_gpu, PREEMPT_NONE); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index e87196457b9a..36c8fb699b56 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -247,8 +247,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) OUT_RING(ring, submit->seqno); trace_msm_gpu_submit_flush(submit, - gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO, - REG_A6XX_CP_ALWAYS_ON_COUNTER_HI)); + gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO)); a6xx_flush(gpu, ring); } @@ -947,8 +946,7 @@ static int a6xx_ucode_init(struct msm_gpu *gpu) } } - gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, - REG_A6XX_CP_SQE_INSTR_BASE+1, a6xx_gpu->sqe_iova); + gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova); return 0; } @@ -999,8 +997,7 @@ static int hw_init(struct msm_gpu *gpu) * memory rendering at this point in time and we don't want to block off * part of the virtual memory space. */ - gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, - REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000); + gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 0x00000000); gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); /* Turn on 64 bit addressing for all blocks */ @@ -1049,11 +1046,9 @@ static int hw_init(struct msm_gpu *gpu) if (!adreno_is_a650_family(adreno_gpu)) { /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */ - gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN_LO, - REG_A6XX_UCHE_GMEM_RANGE_MIN_HI, 0x00100000); + gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000); gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX_LO, - REG_A6XX_UCHE_GMEM_RANGE_MAX_HI, 0x00100000 + adreno_gpu->gmem - 1); } @@ -1145,8 +1140,7 @@ static int hw_init(struct msm_gpu *gpu) goto out; /* Set the ringbuffer address */ - gpu_write64(gpu, REG_A6XX_CP_RB_BASE, REG_A6XX_CP_RB_BASE_HI, - gpu->rb[0]->iova); + gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova); /* Targets that support extended APRIV can use the RPTR shadow from * hardware but all the other ones need to disable the feature. Targets @@ -1178,7 +1172,6 @@ static int hw_init(struct msm_gpu *gpu) } gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR_LO, - REG_A6XX_CP_RB_RPTR_ADDR_HI, shadowptr(a6xx_gpu, gpu->rb[0])); } @@ -1499,9 +1492,9 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) gpu_read(gpu, REG_A6XX_RBBM_STATUS), gpu_read(gpu, REG_A6XX_CP_RB_RPTR), gpu_read(gpu, REG_A6XX_CP_RB_WPTR), - gpu_read64(gpu, REG_A6XX_CP_IB1_BASE, REG_A6XX_CP_IB1_BASE_HI), + gpu_read64(gpu, REG_A6XX_CP_IB1_BASE), gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE), - gpu_read64(gpu, REG_A6XX_CP_IB2_BASE, REG_A6XX_CP_IB2_BASE_HI), + gpu_read64(gpu, REG_A6XX_CP_IB2_BASE), gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE)); /* Turn off the hangcheck timer to keep it from bothering us */ @@ -1712,8 +1705,7 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) /* Force the GPU power on so we can read this register */ a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); - *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO, - REG_A6XX_CP_ALWAYS_ON_COUNTER_HI); + *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO); a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); @@ -1824,6 +1816,39 @@ static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR); } +static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +{ + struct msm_cp_state cp_state = { + .ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE), + .ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE), + .ib1_rem = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE), + .ib2_rem = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE), + }; + bool progress; + + /* + * Adjust the remaining data to account for what has already been + * fetched from memory, but not yet consumed by the SQE. + * + * This is not *technically* correct, the amount buffered could + * exceed the IB size due to hw prefetching ahead, but: + * + * (1) We aren't trying to find the exact position, just whether + * progress has been made + * (2) The CP_REG_TO_MEM at the end of a submit should be enough + * to prevent prefetching into an unrelated submit. (And + * either way, at some point the ROQ will be full.) + */ + cp_state.ib1_rem += gpu_read(gpu, REG_A6XX_CP_CSQ_IB1_STAT) >> 16; + cp_state.ib2_rem += gpu_read(gpu, REG_A6XX_CP_CSQ_IB2_STAT) >> 16; + + progress = !!memcmp(&cp_state, &ring->last_cp_state, sizeof(cp_state)); + + ring->last_cp_state = cp_state; + + return progress; +} + static u32 a618_get_speed_bin(u32 fuse) { if (fuse == 0) @@ -1879,7 +1904,7 @@ static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse) if (val == UINT_MAX) { DRM_DEV_ERROR(dev, - "missing support for speed-bin: %u. Some OPPs may not be supported by hardware", + "missing support for speed-bin: %u. Some OPPs may not be supported by hardware\n", fuse); return UINT_MAX; } @@ -1889,7 +1914,7 @@ static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse) static int a6xx_set_supported_hw(struct device *dev, struct adreno_rev rev) { - u32 supp_hw = UINT_MAX; + u32 supp_hw; u32 speedbin; int ret; @@ -1901,15 +1926,13 @@ static int a6xx_set_supported_hw(struct device *dev, struct adreno_rev rev) if (ret == -ENOENT) { return 0; } else if (ret) { - DRM_DEV_ERROR(dev, - "failed to read speed-bin (%d). Some OPPs may not be supported by hardware", - ret); - goto done; + dev_err_probe(dev, ret, + "failed to read speed-bin. Some OPPs may not be supported by hardware\n"); + return ret; } supp_hw = fuse_to_supp_hw(dev, rev, speedbin); -done: ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1); if (ret) return ret; @@ -1942,6 +1965,7 @@ static const struct adreno_gpu_funcs funcs = { .create_address_space = a6xx_create_address_space, .create_private_address_space = a6xx_create_private_address_space, .get_rptr = a6xx_get_rptr, + .progress = a6xx_progress, }, .get_timestamp = a6xx_get_timestamp, }; @@ -1978,13 +2002,6 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) adreno_cmp_rev(ADRENO_REV(6, 3, 5, ANY_ID), info->rev))) adreno_gpu->base.hw_apriv = true; - /* - * For now only clamp to idle freq for devices where this is known not - * to cause power supply issues: - */ - if (info && (info->revn == 618)) - gpu->clamp_to_idle = true; - a6xx_llc_slices_init(pdev, a6xx_gpu); ret = a6xx_set_supported_hw(&pdev->dev, config->rev); @@ -1999,6 +2016,13 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) return ERR_PTR(ret); } + /* + * For now only clamp to idle freq for devices where this is known not + * to cause power supply issues: + */ + if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu)) + gpu->clamp_to_idle = true; + /* Check if there is a GMU phandle and set it up */ node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index a5c3d1ed255a..a023d5f962dc 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -147,8 +147,7 @@ static int a6xx_crashdumper_run(struct msm_gpu *gpu, /* Make sure all pending memory writes are posted */ wmb(); - gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO, - REG_A6XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova); + gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO, dumper->iova); gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1); diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index cffba9ca7ec2..8b0b0ac74a6f 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -418,7 +418,6 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv) priv->dev = ddev; priv->wq = alloc_ordered_workqueue("msm", 0); - priv->hangcheck_period = DRM_MSM_HANGCHECK_DEFAULT_PERIOD; INIT_LIST_HEAD(&priv->objects); mutex_init(&priv->obj_lock); diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index b2ea262296a4..d4e0ef608950 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -224,7 +224,13 @@ struct msm_drm_private { struct drm_atomic_state *pm_state; - /* For hang detection, in ms */ + /** + * hangcheck_period: For hang detection, in ms + * + * Note that in practice, a submit/job will get at least two hangcheck + * periods, due to checking for progress being implemented as simply + * "have the CP position registers changed since last time?" + */ unsigned int hangcheck_period; /** diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c index 1de14e67f96b..051bdbc093cf 100644 --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c @@ -15,7 +15,7 @@ /* Default disabled for now until it has some more testing on the different * iommu combinations that can be paired with the driver: */ -static bool enable_eviction = false; +static bool enable_eviction = true; MODULE_PARM_DESC(enable_eviction, "Enable swappable GEM buffers"); module_param(enable_eviction, bool, 0600); diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 45a3e5cadc7d..73a2ca122c57 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -334,8 +334,7 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit) if (ret) return ret; - /* exclusive fences must be ordered */ - if (no_implicit && !write) + if (no_implicit) continue; ret = drm_sched_job_add_implicit_dependencies(&submit->base, diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 021f4e29b613..30ed45af76ad 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -492,6 +492,21 @@ static void hangcheck_timer_reset(struct msm_gpu *gpu) round_jiffies_up(jiffies + msecs_to_jiffies(priv->hangcheck_period))); } +static bool made_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +{ + if (ring->hangcheck_progress_retries >= DRM_MSM_HANGCHECK_PROGRESS_RETRIES) + return false; + + if (!gpu->funcs->progress) + return false; + + if (!gpu->funcs->progress(gpu, ring)) + return false; + + ring->hangcheck_progress_retries++; + return true; +} + static void hangcheck_handler(struct timer_list *t) { struct msm_gpu *gpu = from_timer(gpu, t, hangcheck_timer); @@ -502,9 +517,12 @@ static void hangcheck_handler(struct timer_list *t) if (fence != ring->hangcheck_fence) { /* some progress has been made.. ya! */ ring->hangcheck_fence = fence; - } else if (fence_before(fence, ring->fctx->last_fence)) { + ring->hangcheck_progress_retries = 0; + } else if (fence_before(fence, ring->fctx->last_fence) && + !made_progress(gpu, ring)) { /* no progress and not done.. hung! */ ring->hangcheck_fence = fence; + ring->hangcheck_progress_retries = 0; DRM_DEV_ERROR(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n", gpu->name, ring->id); DRM_DEV_ERROR(dev->dev, "%s: completed fence: %u\n", @@ -830,6 +848,7 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, const char *name, struct msm_gpu_config *config) { + struct msm_drm_private *priv = drm->dev_private; int i, ret, nr_rings = config->nr_rings; void *memptrs; uint64_t memptrs_iova; @@ -857,6 +876,16 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, kthread_init_work(&gpu->recover_work, recover_worker); kthread_init_work(&gpu->fault_work, fault_worker); + priv->hangcheck_period = DRM_MSM_HANGCHECK_DEFAULT_PERIOD; + + /* + * If progress detection is supported, halve the hangcheck timer + * duration, as it takes two iterations of the hangcheck handler + * to detect a hang. + */ + if (funcs->progress) + priv->hangcheck_period /= 2; + timer_setup(&gpu->hangcheck_timer, hangcheck_handler, 0); spin_lock_init(&gpu->perf_lock); diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 58a72e6b1400..651786bc55e5 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -78,6 +78,15 @@ struct msm_gpu_funcs { struct msm_gem_address_space *(*create_private_address_space) (struct msm_gpu *gpu); uint32_t (*get_rptr)(struct msm_gpu *gpu, struct msm_ringbuffer *ring); + + /** + * progress: Has the GPU made progress? + * + * Return true if GPU position in cmdstream has advanced (or changed) + * since the last call. To avoid false negatives, this should account + * for cmdstream that is buffered in this FIFO upstream of the CP fw. + */ + bool (*progress)(struct msm_gpu *gpu, struct msm_ringbuffer *ring); }; /* Additional state for iommu faults: */ @@ -237,6 +246,7 @@ struct msm_gpu { #define DRM_MSM_INACTIVE_PERIOD 66 /* in ms (roughly four frames) */ #define DRM_MSM_HANGCHECK_DEFAULT_PERIOD 500 /* in ms */ +#define DRM_MSM_HANGCHECK_PROGRESS_RETRIES 3 struct timer_list hangcheck_timer; /* Fault info for most recent iova fault: */ @@ -540,7 +550,7 @@ static inline void gpu_rmw(struct msm_gpu *gpu, u32 reg, u32 mask, u32 or) msm_rmw(gpu->mmio + (reg << 2), mask, or); } -static inline u64 gpu_read64(struct msm_gpu *gpu, u32 lo, u32 hi) +static inline u64 gpu_read64(struct msm_gpu *gpu, u32 reg) { u64 val; @@ -558,17 +568,17 @@ static inline u64 gpu_read64(struct msm_gpu *gpu, u32 lo, u32 hi) * when the lo is read, so make sure to read the lo first to trigger * that */ - val = (u64) msm_readl(gpu->mmio + (lo << 2)); - val |= ((u64) msm_readl(gpu->mmio + (hi << 2)) << 32); + val = (u64) msm_readl(gpu->mmio + (reg << 2)); + val |= ((u64) msm_readl(gpu->mmio + ((reg + 1) << 2)) << 32); return val; } -static inline void gpu_write64(struct msm_gpu *gpu, u32 lo, u32 hi, u64 val) +static inline void gpu_write64(struct msm_gpu *gpu, u32 reg, u64 val) { /* Why not a writeq here? Read the screed above */ - msm_writel(lower_32_bits(val), gpu->mmio + (lo << 2)); - msm_writel(upper_32_bits(val), gpu->mmio + (hi << 2)); + msm_writel(lower_32_bits(val), gpu->mmio + (reg << 2)); + msm_writel(upper_32_bits(val), gpu->mmio + ((reg + 1) << 2)); } int msm_gpu_pm_suspend(struct msm_gpu *gpu); diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h index 2a5045abe46e..698b333abccd 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.h +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h @@ -35,6 +35,11 @@ struct msm_rbmemptrs { volatile u64 ttbr0; }; +struct msm_cp_state { + uint64_t ib1_base, ib2_base; + uint32_t ib1_rem, ib2_rem; +}; + struct msm_ringbuffer { struct msm_gpu *gpu; int id; @@ -64,6 +69,29 @@ struct msm_ringbuffer { uint64_t memptrs_iova; struct msm_fence_context *fctx; + /** + * hangcheck_progress_retries: + * + * The number of extra hangcheck duration cycles that we have given + * due to it appearing that the GPU is making forward progress. + * + * For GPU generations which support progress detection (see. + * msm_gpu_funcs::progress()), if the GPU appears to be making progress + * (ie. the CP has advanced in the command stream, we'll allow up to + * DRM_MSM_HANGCHECK_PROGRESS_RETRIES expirations of the hangcheck timer + * before killing the job. But to detect progress we need two sample + * points, so the duration of the hangcheck timer is halved. In other + * words we'll let the submit run for up to: + * + * (DRM_MSM_HANGCHECK_DEFAULT_PERIOD / 2) * (DRM_MSM_HANGCHECK_PROGRESS_RETRIES + 1) + */ + int hangcheck_progress_retries; + + /** + * last_cp_state: The state of the CP at the last call to gpu->progress() + */ + struct msm_cp_state last_cp_state; + /* * preempt_lock protects preemption and serializes wptr updates against * preemption. Can be aquired from irq context. |