diff options
Diffstat (limited to 'drivers/gpu/drm/msm/adreno')
24 files changed, 1782 insertions, 241 deletions
diff --git a/drivers/gpu/drm/msm/adreno/a2xx_catalog.c b/drivers/gpu/drm/msm/adreno/a2xx_catalog.c index 9ddb7b31fd98..5ddd015f930d 100644 --- a/drivers/gpu/drm/msm/adreno/a2xx_catalog.c +++ b/drivers/gpu/drm/msm/adreno/a2xx_catalog.c @@ -45,8 +45,3 @@ static const struct adreno_info a2xx_gpus[] = { } }; DECLARE_ADRENO_GPULIST(a2xx); - -MODULE_FIRMWARE("qcom/leia_pfp_470.fw"); -MODULE_FIRMWARE("qcom/leia_pm4_470.fw"); -MODULE_FIRMWARE("qcom/yamato_pfp.fw"); -MODULE_FIRMWARE("qcom/yamato_pm4.fw"); diff --git a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c index 0dc255ddf5ce..379a3d346c30 100644 --- a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c @@ -22,7 +22,7 @@ static void a2xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) break; case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: /* ignore if there has not been a ctx switch: */ - if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno) + if (ring->cur_ctx_seqno == submit->queue->ctx->seqno) break; fallthrough; case MSM_SUBMIT_CMD_BUF: diff --git a/drivers/gpu/drm/msm/adreno/a2xx_gpummu.c b/drivers/gpu/drm/msm/adreno/a2xx_gpummu.c index 39641551eeb6..4280f71e472a 100644 --- a/drivers/gpu/drm/msm/adreno/a2xx_gpummu.c +++ b/drivers/gpu/drm/msm/adreno/a2xx_gpummu.c @@ -71,10 +71,6 @@ static int a2xx_gpummu_unmap(struct msm_mmu *mmu, uint64_t iova, size_t len) return 0; } -static void a2xx_gpummu_resume_translation(struct msm_mmu *mmu) -{ -} - static void a2xx_gpummu_destroy(struct msm_mmu *mmu) { struct a2xx_gpummu *gpummu = to_a2xx_gpummu(mmu); @@ -90,7 +86,6 @@ static const struct msm_mmu_funcs funcs = { .map = a2xx_gpummu_map, .unmap = a2xx_gpummu_unmap, .destroy = a2xx_gpummu_destroy, - .resume_translation = a2xx_gpummu_resume_translation, }; struct msm_mmu *a2xx_gpummu_new(struct device *dev, struct msm_gpu *gpu) diff --git a/drivers/gpu/drm/msm/adreno/a3xx_catalog.c b/drivers/gpu/drm/msm/adreno/a3xx_catalog.c index 2eb6c3e93748..1498e6532f62 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_catalog.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_catalog.c @@ -85,8 +85,3 @@ static const struct adreno_info a3xx_gpus[] = { } }; DECLARE_ADRENO_GPULIST(a3xx); - -MODULE_FIRMWARE("qcom/a300_pm4.fw"); -MODULE_FIRMWARE("qcom/a300_pfp.fw"); -MODULE_FIRMWARE("qcom/a330_pm4.fw"); -MODULE_FIRMWARE("qcom/a330_pfp.fw"); diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index b46ff49f47cf..b6df115bb567 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -40,7 +40,7 @@ static void a3xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) break; case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: /* ignore if there has not been a ctx switch: */ - if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno) + if (ring->cur_ctx_seqno == submit->queue->ctx->seqno) break; fallthrough; case MSM_SUBMIT_CMD_BUF: diff --git a/drivers/gpu/drm/msm/adreno/a4xx_catalog.c b/drivers/gpu/drm/msm/adreno/a4xx_catalog.c index 93519f807f87..09f9f228b75e 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_catalog.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_catalog.c @@ -45,6 +45,3 @@ static const struct adreno_info a4xx_gpus[] = { } }; DECLARE_ADRENO_GPULIST(a4xx); - -MODULE_FIRMWARE("qcom/a420_pm4.fw"); -MODULE_FIRMWARE("qcom/a420_pfp.fw"); diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index 8b4cdf95f445..f1b18a6663f7 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -34,7 +34,7 @@ static void a4xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) break; case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: /* ignore if there has not been a ctx switch: */ - if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno) + if (ring->cur_ctx_seqno == submit->queue->ctx->seqno) break; fallthrough; case MSM_SUBMIT_CMD_BUF: @@ -251,8 +251,8 @@ static int a4xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07); /* Disable L2 bypass to avoid UCHE out of bounds errors */ - gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000); - gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000); + gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, lower_32_bits(adreno_gpu->uche_trap_base)); + gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, upper_32_bits(adreno_gpu->uche_trap_base)); gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) | (adreno_is_a420(adreno_gpu) ? (1 << 29) : 0)); @@ -693,6 +693,8 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev) if (ret) goto fail; + adreno_gpu->uche_trap_base = 0xffff0000ffff0000ull; + if (!gpu->aspace) { /* TODO we think it is possible to configure the GPU to * restrict access to VRAM carveout. But the required diff --git a/drivers/gpu/drm/msm/adreno/a5xx_catalog.c b/drivers/gpu/drm/msm/adreno/a5xx_catalog.c index 633f31539162..b48a636d8237 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_catalog.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_catalog.c @@ -150,12 +150,3 @@ static const struct adreno_info a5xx_gpus[] = { } }; DECLARE_ADRENO_GPULIST(a5xx); - -MODULE_FIRMWARE("qcom/a530_pm4.fw"); -MODULE_FIRMWARE("qcom/a530_pfp.fw"); -MODULE_FIRMWARE("qcom/a530v3_gpmu.fw2"); -MODULE_FIRMWARE("qcom/a530_zap.mdt"); -MODULE_FIRMWARE("qcom/a530_zap.b00"); -MODULE_FIRMWARE("qcom/a530_zap.b01"); -MODULE_FIRMWARE("qcom/a530_zap.b02"); -MODULE_FIRMWARE("qcom/a540_gpmu.fw2"); diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index e09044930547..60aef0796236 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -77,7 +77,7 @@ static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit case MSM_SUBMIT_CMD_IB_TARGET_BUF: break; case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: - if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno) + if (ring->cur_ctx_seqno == submit->queue->ctx->seqno) break; fallthrough; case MSM_SUBMIT_CMD_BUF: @@ -131,8 +131,10 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) struct msm_ringbuffer *ring = submit->ring; unsigned int i, ibs = 0; + adreno_check_and_reenable_stall(adreno_gpu); + if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) { - gpu->cur_ctx_seqno = 0; + ring->cur_ctx_seqno = 0; a5xx_submit_in_rb(gpu, submit); return; } @@ -171,7 +173,7 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) case MSM_SUBMIT_CMD_IB_TARGET_BUF: break; case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: - if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno) + if (ring->cur_ctx_seqno == submit->queue->ctx->seqno) break; fallthrough; case MSM_SUBMIT_CMD_BUF: @@ -750,10 +752,10 @@ static int a5xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02); /* Disable L2 bypass in the UCHE */ - gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000); - gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF); - gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000); - gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF); + gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, lower_32_bits(adreno_gpu->uche_trap_base)); + gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, upper_32_bits(adreno_gpu->uche_trap_base)); + gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, lower_32_bits(adreno_gpu->uche_trap_base)); + gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, upper_32_bits(adreno_gpu->uche_trap_base)); /* Set the GMEM VA range (0 to gpu->gmem) */ gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000); @@ -1253,7 +1255,7 @@ static void a5xx_fault_detect_irq(struct msm_gpu *gpu) gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ)); /* Turn off the hangcheck timer to keep it from bothering us */ - del_timer(&gpu->hangcheck_timer); + timer_delete(&gpu->hangcheck_timer); kthread_queue_work(gpu->worker, &gpu->recover_work); } @@ -1760,11 +1762,6 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) unsigned int nr_rings; int ret; - if (!pdev) { - DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n"); - return ERR_PTR(-ENXIO); - } - a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL); if (!a5xx_gpu) return ERR_PTR(-ENOMEM); @@ -1805,5 +1802,7 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) adreno_gpu->ubwc_config.macrotile_mode = 0; adreno_gpu->ubwc_config.ubwc_swizzle = 0x7; + adreno_gpu->uche_trap_base = 0x0001ffffffff0000ull; + return gpu; } diff --git a/drivers/gpu/drm/msm/adreno/a5xx_power.c b/drivers/gpu/drm/msm/adreno/a5xx_power.c index 7705f8010484..6b91e0bd1514 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_power.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_power.c @@ -307,7 +307,7 @@ int a5xx_power_init(struct msm_gpu *gpu) else if (adreno_is_a540(adreno_gpu)) a540_lm_setup(gpu); - /* Set up SP/TP power collpase */ + /* Set up SP/TP power collapse */ a5xx_pc_init(gpu); /* Start the GPMU */ diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c index 0469fea55010..b5f9d40687d5 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c @@ -79,7 +79,8 @@ static struct msm_ringbuffer *get_next_ring(struct msm_gpu *gpu) static void a5xx_preempt_timer(struct timer_list *t) { - struct a5xx_gpu *a5xx_gpu = from_timer(a5xx_gpu, t, preempt_timer); + struct a5xx_gpu *a5xx_gpu = timer_container_of(a5xx_gpu, t, + preempt_timer); struct msm_gpu *gpu = &a5xx_gpu->base.base; struct drm_device *dev = gpu->dev; @@ -182,7 +183,7 @@ void a5xx_preempt_irq(struct msm_gpu *gpu) return; /* Delete the preemption watchdog timer */ - del_timer(&a5xx_gpu->preempt_timer); + timer_delete(&a5xx_gpu->preempt_timer); /* * The hardware should be setting CP_CONTEXT_SWITCH_CNTL to zero before diff --git a/drivers/gpu/drm/msm/adreno/a6xx_catalog.c b/drivers/gpu/drm/msm/adreno/a6xx_catalog.c index 0312b6ee0356..70f7ad806c34 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_catalog.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_catalog.c @@ -681,6 +681,7 @@ static const struct adreno_info a6xx_gpus[] = { [ADRENO_FW_SQE] = "a630_sqe.fw", }, .gmem = (SZ_128K + SZ_4K), + .quirks = ADRENO_QUIRK_4GB_VA, .inactive_period = DRM_MSM_INACTIVE_PERIOD, .init = a6xx_gpu_init, .zapfw = "a610_zap.mdt", @@ -713,6 +714,7 @@ static const struct adreno_info a6xx_gpus[] = { [ADRENO_FW_GMU] = "a630_gmu.bin", }, .gmem = SZ_512K, + .quirks = ADRENO_QUIRK_4GB_VA, .inactive_period = DRM_MSM_INACTIVE_PERIOD, .init = a6xx_gpu_init, .zapfw = "a615_zap.mdt", @@ -743,7 +745,8 @@ static const struct adreno_info a6xx_gpus[] = { }, .gmem = SZ_512K, .inactive_period = DRM_MSM_INACTIVE_PERIOD, - .quirks = ADRENO_QUIRK_HAS_CACHED_COHERENT, + .quirks = ADRENO_QUIRK_HAS_CACHED_COHERENT | + ADRENO_QUIRK_4GB_VA, .init = a6xx_gpu_init, .zapfw = "a615_zap.mbn", .a6xx = &(const struct a6xx_info) { @@ -769,7 +772,8 @@ static const struct adreno_info a6xx_gpus[] = { }, .gmem = SZ_512K, .inactive_period = DRM_MSM_INACTIVE_PERIOD, - .quirks = ADRENO_QUIRK_HAS_CACHED_COHERENT, + .quirks = ADRENO_QUIRK_HAS_CACHED_COHERENT | + ADRENO_QUIRK_4GB_VA, .init = a6xx_gpu_init, .a6xx = &(const struct a6xx_info) { .protect = &a630_protect, @@ -791,6 +795,7 @@ static const struct adreno_info a6xx_gpus[] = { [ADRENO_FW_GMU] = "a619_gmu.bin", }, .gmem = SZ_512K, + .quirks = ADRENO_QUIRK_4GB_VA, .inactive_period = DRM_MSM_INACTIVE_PERIOD, .init = a6xx_gpu_init, .zapfw = "a615_zap.mdt", @@ -815,6 +820,7 @@ static const struct adreno_info a6xx_gpus[] = { [ADRENO_FW_GMU] = "a619_gmu.bin", }, .gmem = SZ_512K, + .quirks = ADRENO_QUIRK_4GB_VA, .inactive_period = DRM_MSM_INACTIVE_PERIOD, .init = a6xx_gpu_init, .zapfw = "a615_zap.mdt", @@ -838,8 +844,9 @@ static const struct adreno_info a6xx_gpus[] = { [ADRENO_FW_GMU] = "a619_gmu.bin", }, .gmem = SZ_512K, + .quirks = ADRENO_QUIRK_HAS_CACHED_COHERENT | + ADRENO_QUIRK_4GB_VA, .inactive_period = DRM_MSM_INACTIVE_PERIOD, - .quirks = ADRENO_QUIRK_HAS_CACHED_COHERENT, .init = a6xx_gpu_init, .zapfw = "a615_zap.mdt", .a6xx = &(const struct a6xx_info) { @@ -874,12 +881,39 @@ static const struct adreno_info a6xx_gpus[] = { .gmu_cgc_mode = 0x00020200, .prim_fifo_threshold = 0x00010000, }, - .address_space_size = SZ_16G, .speedbins = ADRENO_SPEEDBINS( { 0, 0 }, { 137, 1 }, ), }, { + .chip_ids = ADRENO_CHIP_IDS(0x06020300), + .family = ADRENO_6XX_GEN3, + .fw = { + [ADRENO_FW_SQE] = "a650_sqe.fw", + [ADRENO_FW_GMU] = "a623_gmu.bin", + }, + .gmem = SZ_512K, + .inactive_period = DRM_MSM_INACTIVE_PERIOD, + .quirks = ADRENO_QUIRK_HAS_CACHED_COHERENT | + ADRENO_QUIRK_HAS_HW_APRIV, + .init = a6xx_gpu_init, + .a6xx = &(const struct a6xx_info) { + .hwcg = a690_hwcg, + .protect = &a650_protect, + .gmu_cgc_mode = 0x00020200, + .prim_fifo_threshold = 0x00010000, + .bcms = (const struct a6xx_bcm[]) { + { .name = "SH0", .buswidth = 16 }, + { .name = "MC0", .buswidth = 4 }, + { + .name = "ACV", + .fixed = true, + .perfmode = BIT(3), + }, + { /* sentinel */ }, + }, + }, + }, { .chip_ids = ADRENO_CHIP_IDS( 0x06030001, 0x06030002 @@ -891,8 +925,9 @@ static const struct adreno_info a6xx_gpus[] = { [ADRENO_FW_GMU] = "a630_gmu.bin", }, .gmem = SZ_1M, + .quirks = ADRENO_QUIRK_HAS_CACHED_COHERENT | + ADRENO_QUIRK_4GB_VA, .inactive_period = DRM_MSM_INACTIVE_PERIOD, - .quirks = ADRENO_QUIRK_HAS_CACHED_COHERENT, .init = a6xx_gpu_init, .zapfw = "a630_zap.mdt", .a6xx = &(const struct a6xx_info) { @@ -910,8 +945,9 @@ static const struct adreno_info a6xx_gpus[] = { [ADRENO_FW_GMU] = "a640_gmu.bin", }, .gmem = SZ_1M, + .quirks = ADRENO_QUIRK_HAS_CACHED_COHERENT | + ADRENO_QUIRK_4GB_VA, .inactive_period = DRM_MSM_INACTIVE_PERIOD, - .quirks = ADRENO_QUIRK_HAS_CACHED_COHERENT, .init = a6xx_gpu_init, .zapfw = "a640_zap.mdt", .a6xx = &(const struct a6xx_info) { @@ -944,7 +980,6 @@ static const struct adreno_info a6xx_gpus[] = { .gmu_cgc_mode = 0x00020202, .prim_fifo_threshold = 0x00300200, }, - .address_space_size = SZ_16G, .speedbins = ADRENO_SPEEDBINS( { 0, 0 }, { 1, 1 }, @@ -971,7 +1006,24 @@ static const struct adreno_info a6xx_gpus[] = { .gmu_cgc_mode = 0x00020000, .prim_fifo_threshold = 0x00300200, }, - .address_space_size = SZ_16G, + }, { + .chip_ids = ADRENO_CHIP_IDS(0x06060300), + .family = ADRENO_6XX_GEN4, + .fw = { + [ADRENO_FW_SQE] = "a660_sqe.fw", + [ADRENO_FW_GMU] = "a663_gmu.bin", + }, + .gmem = SZ_1M + SZ_512K, + .inactive_period = DRM_MSM_INACTIVE_PERIOD, + .quirks = ADRENO_QUIRK_HAS_CACHED_COHERENT | + ADRENO_QUIRK_HAS_HW_APRIV, + .init = a6xx_gpu_init, + .a6xx = &(const struct a6xx_info) { + .hwcg = a690_hwcg, + .protect = &a660_protect, + .gmu_cgc_mode = 0x00020200, + .prim_fifo_threshold = 0x00300200, + }, }, { .chip_ids = ADRENO_CHIP_IDS(0x06030500), .family = ADRENO_6XX_GEN4, @@ -991,7 +1043,6 @@ static const struct adreno_info a6xx_gpus[] = { .gmu_cgc_mode = 0x00020202, .prim_fifo_threshold = 0x00200200, }, - .address_space_size = SZ_16G, .speedbins = ADRENO_SPEEDBINS( { 0, 0 }, { 117, 0 }, @@ -1008,8 +1059,9 @@ static const struct adreno_info a6xx_gpus[] = { [ADRENO_FW_GMU] = "a640_gmu.bin", }, .gmem = SZ_2M, + .quirks = ADRENO_QUIRK_HAS_CACHED_COHERENT | + ADRENO_QUIRK_4GB_VA, .inactive_period = DRM_MSM_INACTIVE_PERIOD, - .quirks = ADRENO_QUIRK_HAS_CACHED_COHERENT, .init = a6xx_gpu_init, .zapfw = "a640_zap.mdt", .a6xx = &(const struct a6xx_info) { @@ -1037,22 +1089,10 @@ static const struct adreno_info a6xx_gpus[] = { .gmu_cgc_mode = 0x00020200, .prim_fifo_threshold = 0x00800200, }, - .address_space_size = SZ_16G, } }; DECLARE_ADRENO_GPULIST(a6xx); -MODULE_FIRMWARE("qcom/a615_zap.mbn"); -MODULE_FIRMWARE("qcom/a619_gmu.bin"); -MODULE_FIRMWARE("qcom/a630_sqe.fw"); -MODULE_FIRMWARE("qcom/a630_gmu.bin"); -MODULE_FIRMWARE("qcom/a630_zap.mbn"); -MODULE_FIRMWARE("qcom/a640_gmu.bin"); -MODULE_FIRMWARE("qcom/a650_gmu.bin"); -MODULE_FIRMWARE("qcom/a650_sqe.fw"); -MODULE_FIRMWARE("qcom/a660_gmu.bin"); -MODULE_FIRMWARE("qcom/a660_sqe.fw"); - static const struct adreno_reglist a702_hwcg[] = { { REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x22222222 }, { REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220 }, @@ -1281,6 +1321,28 @@ static const u32 a730_protect_regs[] = { }; DECLARE_ADRENO_PROTECT(a730_protect, 48); +static const uint32_t a7xx_pwrup_reglist_regs[] = { + REG_A6XX_UCHE_TRAP_BASE, + REG_A6XX_UCHE_TRAP_BASE + 1, + REG_A6XX_UCHE_WRITE_THRU_BASE, + REG_A6XX_UCHE_WRITE_THRU_BASE + 1, + REG_A6XX_UCHE_GMEM_RANGE_MIN, + REG_A6XX_UCHE_GMEM_RANGE_MIN + 1, + REG_A6XX_UCHE_GMEM_RANGE_MAX, + REG_A6XX_UCHE_GMEM_RANGE_MAX + 1, + REG_A6XX_UCHE_CACHE_WAYS, + REG_A6XX_UCHE_MODE_CNTL, + REG_A6XX_RB_NC_MODE_CNTL, + REG_A6XX_RB_CMP_DBG_ECO_CNTL, + REG_A7XX_GRAS_NC_MODE_CNTL, + REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE, + REG_A6XX_UCHE_GBIF_GX_CONFIG, + REG_A6XX_UCHE_CLIENT_PF, + REG_A6XX_TPL1_DBG_ECO_CNTL1, +}; + +DECLARE_ADRENO_REGLIST_LIST(a7xx_pwrup_reglist); + static const struct adreno_info a7xx_gpus[] = { { .chip_ids = ADRENO_CHIP_IDS(0x07000200), @@ -1315,15 +1377,17 @@ static const struct adreno_info a7xx_gpus[] = { .gmem = SZ_2M, .inactive_period = DRM_MSM_INACTIVE_PERIOD, .quirks = ADRENO_QUIRK_HAS_CACHED_COHERENT | - ADRENO_QUIRK_HAS_HW_APRIV, + ADRENO_QUIRK_HAS_HW_APRIV | + ADRENO_QUIRK_PREEMPTION, .init = a6xx_gpu_init, .zapfw = "a730_zap.mdt", .a6xx = &(const struct a6xx_info) { .hwcg = a730_hwcg, .protect = &a730_protect, + .pwrup_reglist = &a7xx_pwrup_reglist, .gmu_cgc_mode = 0x00020000, }, - .address_space_size = SZ_16G, + .preempt_record_size = 2860 * SZ_1K, }, { .chip_ids = ADRENO_CHIP_IDS(0x43050a01), /* "C510v2" */ .family = ADRENO_7XX_GEN2, @@ -1334,16 +1398,29 @@ static const struct adreno_info a7xx_gpus[] = { .gmem = 3 * SZ_1M, .inactive_period = DRM_MSM_INACTIVE_PERIOD, .quirks = ADRENO_QUIRK_HAS_CACHED_COHERENT | - ADRENO_QUIRK_HAS_HW_APRIV, + ADRENO_QUIRK_HAS_HW_APRIV | + ADRENO_QUIRK_PREEMPTION, .init = a6xx_gpu_init, .zapfw = "a740_zap.mdt", .a6xx = &(const struct a6xx_info) { .hwcg = a740_hwcg, .protect = &a730_protect, + .pwrup_reglist = &a7xx_pwrup_reglist, .gmu_chipid = 0x7020100, .gmu_cgc_mode = 0x00020202, + .bcms = (const struct a6xx_bcm[]) { + { .name = "SH0", .buswidth = 16 }, + { .name = "MC0", .buswidth = 4 }, + { + .name = "ACV", + .fixed = true, + .perfmode = BIT(3), + .perfmode_bw = 16500000, + }, + { /* sentinel */ }, + }, }, - .address_space_size = SZ_16G, + .preempt_record_size = 4192 * SZ_1K, }, { .chip_ids = ADRENO_CHIP_IDS(0x43050c01), /* "C512v2" */ .family = ADRENO_7XX_GEN2, @@ -1354,15 +1431,17 @@ static const struct adreno_info a7xx_gpus[] = { .gmem = 3 * SZ_1M, .inactive_period = DRM_MSM_INACTIVE_PERIOD, .quirks = ADRENO_QUIRK_HAS_CACHED_COHERENT | - ADRENO_QUIRK_HAS_HW_APRIV, + ADRENO_QUIRK_HAS_HW_APRIV | + ADRENO_QUIRK_PREEMPTION, .init = a6xx_gpu_init, .a6xx = &(const struct a6xx_info) { .hwcg = a740_hwcg, .protect = &a730_protect, + .pwrup_reglist = &a7xx_pwrup_reglist, .gmu_chipid = 0x7050001, .gmu_cgc_mode = 0x00020202, }, - .address_space_size = SZ_256G, + .preempt_record_size = 4192 * SZ_1K, }, { .chip_ids = ADRENO_CHIP_IDS(0x43051401), /* "C520v2" */ .family = ADRENO_7XX_GEN3, @@ -1373,15 +1452,28 @@ static const struct adreno_info a7xx_gpus[] = { .gmem = 3 * SZ_1M, .inactive_period = DRM_MSM_INACTIVE_PERIOD, .quirks = ADRENO_QUIRK_HAS_CACHED_COHERENT | - ADRENO_QUIRK_HAS_HW_APRIV, + ADRENO_QUIRK_HAS_HW_APRIV | + ADRENO_QUIRK_PREEMPTION, .init = a6xx_gpu_init, .zapfw = "gen70900_zap.mbn", .a6xx = &(const struct a6xx_info) { .protect = &a730_protect, + .pwrup_reglist = &a7xx_pwrup_reglist, .gmu_chipid = 0x7090100, .gmu_cgc_mode = 0x00020202, + .bcms = (const struct a6xx_bcm[]) { + { .name = "SH0", .buswidth = 16 }, + { .name = "MC0", .buswidth = 4 }, + { + .name = "ACV", + .fixed = true, + .perfmode = BIT(2), + .perfmode_bw = 10687500, + }, + { /* sentinel */ }, + }, }, - .address_space_size = SZ_16G, + .preempt_record_size = 3572 * SZ_1K, } }; DECLARE_ADRENO_GPULIST(a7xx); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 37927bdd6fbe..38c0f8ef85c3 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -9,6 +9,7 @@ #include <linux/pm_domain.h> #include <linux/pm_opp.h> #include <soc/qcom/cmd-db.h> +#include <soc/qcom/tcs.h> #include <drm/drm_gem.h> #include "a6xx_gpu.h" @@ -27,7 +28,7 @@ static void a6xx_gmu_fault(struct a6xx_gmu *gmu) gmu->hung = true; /* Turn off the hangcheck timer while we are resetting */ - del_timer(&gpu->hangcheck_timer); + timer_delete(&gpu->hangcheck_timer); /* Queue the GPU handler because we need to treat this as a recovery */ kthread_queue_work(gpu->worker, &gpu->recover_work); @@ -109,9 +110,11 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp, bool suspended) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + const struct a6xx_info *info = adreno_gpu->info->a6xx; struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); struct a6xx_gmu *gmu = &a6xx_gpu->gmu; u32 perf_index; + u32 bw_index = 0; unsigned long gpu_freq; int ret = 0; @@ -124,6 +127,37 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp, if (gpu_freq == gmu->gpu_freqs[perf_index]) break; + /* If enabled, find the corresponding DDR bandwidth index */ + if (info->bcms && gmu->nr_gpu_bws > 1) { + unsigned int bw = dev_pm_opp_get_bw(opp, true, 0); + + for (bw_index = 0; bw_index < gmu->nr_gpu_bws - 1; bw_index++) { + if (bw == gmu->gpu_bw_table[bw_index]) + break; + } + + /* Vote AB as a fraction of the max bandwidth, starting from A750 */ + if (bw && adreno_is_a750_family(adreno_gpu)) { + u64 tmp; + + /* For now, vote for 25% of the bandwidth */ + tmp = bw * 25; + do_div(tmp, 100); + + /* + * The AB vote consists of a 16 bit wide quantized level + * against the maximum supported bandwidth. + * Quantization can be calculated as below: + * vote = (bandwidth * 2^16) / max bandwidth + */ + tmp *= MAX_AB_VOTE; + do_div(tmp, gmu->gpu_bw_table[gmu->nr_gpu_bws - 1]); + + bw_index |= AB_VOTE(clamp(tmp, 1, MAX_AB_VOTE)); + bw_index |= AB_VOTE_ENABLE; + } + } + gmu->current_perf_index = perf_index; gmu->freq = gmu->gpu_freqs[perf_index]; @@ -139,8 +173,10 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp, return; if (!gmu->legacy) { - a6xx_hfi_set_freq(gmu, perf_index); - dev_pm_opp_set_opp(&gpu->pdev->dev, opp); + a6xx_hfi_set_freq(gmu, perf_index, bw_index); + /* With Bandwidth voting, we now vote for all resources, so skip OPP set */ + if (!bw_index) + dev_pm_opp_set_opp(&gpu->pdev->dev, opp); return; } @@ -729,6 +765,7 @@ static int a6xx_gmu_fw_load(struct a6xx_gmu *gmu) const struct firmware *fw_image = adreno_gpu->fw[ADRENO_FW_GMU]; const struct block_header *blk; u32 reg_offset; + u32 ver; u32 itcm_base = 0x00000000; u32 dtcm_base = 0x00040000; @@ -775,6 +812,12 @@ static int a6xx_gmu_fw_load(struct a6xx_gmu *gmu) } } + ver = gmu_read(gmu, REG_A6XX_GMU_CORE_FW_VERSION); + DRM_INFO_ONCE("Loaded GMU firmware v%u.%u.%u\n", + FIELD_GET(A6XX_GMU_CORE_FW_VERSION_MAJOR__MASK, ver), + FIELD_GET(A6XX_GMU_CORE_FW_VERSION_MINOR__MASK, ver), + FIELD_GET(A6XX_GMU_CORE_FW_VERSION_STEP__MASK, ver)); + return 0; } @@ -1021,14 +1064,6 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu) gmu->hung = false; - /* Notify AOSS about the ACD state (unimplemented for now => disable it) */ - if (!IS_ERR(gmu->qmp)) { - ret = qmp_send(gmu->qmp, "{class: gpu, res: acd, val: %d}", - 0 /* Hardcode ACD to be disabled for now */); - if (ret) - dev_err(gmu->dev, "failed to send GPU ACD state\n"); - } - /* Turn on the resources */ pm_runtime_get_sync(gmu->dev); @@ -1126,49 +1161,50 @@ static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu) struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; u32 val; + int ret; /* - * The GMU may still be in slumber unless the GPU started so check and - * skip putting it back into slumber if so + * GMU firmware's internal power state gets messed up if we send "prepare_slumber" hfi when + * oob_gpu handshake wasn't done after the last wake up. So do a dummy handshake here when + * required */ - val = gmu_read(gmu, REG_A6XX_GPU_GMU_CX_GMU_RPMH_POWER_STATE); + if (adreno_gpu->base.needs_hw_init) { + if (a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET)) + goto force_off; - if (val != 0xf) { - int ret = a6xx_gmu_wait_for_idle(gmu); + a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); + } - /* If the GMU isn't responding assume it is hung */ - if (ret) { - a6xx_gmu_force_off(gmu); - return; - } + ret = a6xx_gmu_wait_for_idle(gmu); - a6xx_bus_clear_pending_transactions(adreno_gpu, a6xx_gpu->hung); + /* If the GMU isn't responding assume it is hung */ + if (ret) + goto force_off; - /* tell the GMU we want to slumber */ - ret = a6xx_gmu_notify_slumber(gmu); - if (ret) { - a6xx_gmu_force_off(gmu); - return; - } + a6xx_bus_clear_pending_transactions(adreno_gpu, a6xx_gpu->hung); - ret = gmu_poll_timeout(gmu, - REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS, val, - !(val & A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS_GPUBUSYIGNAHB), - 100, 10000); + /* tell the GMU we want to slumber */ + ret = a6xx_gmu_notify_slumber(gmu); + if (ret) + goto force_off; - /* - * Let the user know we failed to slumber but don't worry too - * much because we are powering down anyway - */ + ret = gmu_poll_timeout(gmu, + REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS, val, + !(val & A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS_GPUBUSYIGNAHB), + 100, 10000); - if (ret) - DRM_DEV_ERROR(gmu->dev, - "Unable to slumber GMU: status = 0%x/0%x\n", - gmu_read(gmu, - REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS), - gmu_read(gmu, - REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS2)); - } + /* + * Let the user know we failed to slumber but don't worry too + * much because we are powering down anyway + */ + + if (ret) + DRM_DEV_ERROR(gmu->dev, + "Unable to slumber GMU: status = 0%x/0%x\n", + gmu_read(gmu, + REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS), + gmu_read(gmu, + REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS2)); /* Turn off HFI */ a6xx_hfi_stop(gmu); @@ -1178,6 +1214,11 @@ static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu) /* Tell RPMh to power off the GPU */ a6xx_rpmh_stop(gmu); + + return; + +force_off: + a6xx_gmu_force_off(gmu); } @@ -1265,7 +1306,7 @@ static int a6xx_gmu_memory_alloc(struct a6xx_gmu *gmu, struct a6xx_gmu_bo *bo, bo->virt = msm_gem_get_vaddr(bo->obj); bo->size = size; - msm_gem_object_set_name(bo->obj, name); + msm_gem_object_set_name(bo->obj, "%s", name); return 0; } @@ -1287,6 +1328,104 @@ static int a6xx_gmu_memory_probe(struct a6xx_gmu *gmu) return 0; } +/** + * struct bcm_db - Auxiliary data pertaining to each Bus Clock Manager (BCM) + * @unit: divisor used to convert bytes/sec bw value to an RPMh msg + * @width: multiplier used to convert bytes/sec bw value to an RPMh msg + * @vcd: virtual clock domain that this bcm belongs to + * @reserved: reserved field + */ +struct bcm_db { + __le32 unit; + __le16 width; + u8 vcd; + u8 reserved; +}; + +static int a6xx_gmu_rpmh_bw_votes_init(struct adreno_gpu *adreno_gpu, + const struct a6xx_info *info, + struct a6xx_gmu *gmu) +{ + const struct bcm_db *bcm_data[GMU_MAX_BCMS] = { 0 }; + unsigned int bcm_index, bw_index, bcm_count = 0; + + /* Retrieve BCM data from cmd-db */ + for (bcm_index = 0; bcm_index < GMU_MAX_BCMS; bcm_index++) { + const struct a6xx_bcm *bcm = &info->bcms[bcm_index]; + size_t count; + + /* Stop at NULL terminated bcm entry */ + if (!bcm->name) + break; + + bcm_data[bcm_index] = cmd_db_read_aux_data(bcm->name, &count); + if (IS_ERR(bcm_data[bcm_index])) + return PTR_ERR(bcm_data[bcm_index]); + + if (!count) { + dev_err(gmu->dev, "invalid BCM '%s' aux data size\n", + bcm->name); + return -EINVAL; + } + + bcm_count++; + } + + /* Generate BCM votes values for each bandwidth & BCM */ + for (bw_index = 0; bw_index < gmu->nr_gpu_bws; bw_index++) { + u32 *data = gmu->gpu_ib_votes[bw_index]; + u32 bw = gmu->gpu_bw_table[bw_index]; + + /* Calculations loosely copied from bcm_aggregate() & tcs_cmd_gen() */ + for (bcm_index = 0; bcm_index < bcm_count; bcm_index++) { + const struct a6xx_bcm *bcm = &info->bcms[bcm_index]; + bool commit = false; + u64 peak; + u32 vote; + + if (bcm_index == bcm_count - 1 || + (bcm_data[bcm_index + 1] && + bcm_data[bcm_index]->vcd != bcm_data[bcm_index + 1]->vcd)) + commit = true; + + if (!bw) { + data[bcm_index] = BCM_TCS_CMD(commit, false, 0, 0); + continue; + } + + if (bcm->fixed) { + u32 perfmode = 0; + + /* GMU on A6xx votes perfmode on all valid bandwidth */ + if (!adreno_is_a7xx(adreno_gpu) || + (bcm->perfmode_bw && bw >= bcm->perfmode_bw)) + perfmode = bcm->perfmode; + + data[bcm_index] = BCM_TCS_CMD(commit, true, 0, perfmode); + continue; + } + + /* Multiply the bandwidth by the width of the connection */ + peak = (u64)bw * le16_to_cpu(bcm_data[bcm_index]->width); + do_div(peak, bcm->buswidth); + + /* Input bandwidth value is in KBps, scale the value to BCM unit */ + peak *= 1000; + do_div(peak, le32_to_cpu(bcm_data[bcm_index]->unit)); + + vote = clamp(peak, 1, BCM_TCS_CMD_VOTE_MASK); + + /* GMUs on A7xx votes on both x & y */ + if (adreno_is_a7xx(adreno_gpu)) + data[bcm_index] = BCM_TCS_CMD(commit, true, vote, vote); + else + data[bcm_index] = BCM_TCS_CMD(commit, true, 0, vote); + } + } + + return 0; +} + /* Return the 'arc-level' for the given frequency */ static unsigned int a6xx_gmu_get_arc_level(struct device *dev, unsigned long freq) @@ -1390,12 +1529,15 @@ static int a6xx_gmu_rpmh_arc_votes_init(struct device *dev, u32 *votes, * The GMU votes with the RPMh for itself and on behalf of the GPU but we need * to construct the list of votes on the CPU and send it over. Query the RPMh * voltage levels and build the votes + * The GMU can also vote for DDR interconnects, use the OPP bandwidth entries + * and BCM parameters to build the votes. */ static int a6xx_gmu_rpmh_votes_init(struct a6xx_gmu *gmu) { struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + const struct a6xx_info *info = adreno_gpu->info->a6xx; struct msm_gpu *gpu = &adreno_gpu->base; int ret; @@ -1407,6 +1549,10 @@ static int a6xx_gmu_rpmh_votes_init(struct a6xx_gmu *gmu) ret |= a6xx_gmu_rpmh_arc_votes_init(gmu->dev, gmu->cx_arc_votes, gmu->gmu_freqs, gmu->nr_gmu_freqs, "cx.lvl"); + /* Build the interconnect votes */ + if (info->bcms && gmu->nr_gpu_bws > 1) + ret |= a6xx_gmu_rpmh_bw_votes_init(adreno_gpu, info, gmu); + return ret; } @@ -1442,10 +1588,43 @@ static int a6xx_gmu_build_freq_table(struct device *dev, unsigned long *freqs, return index; } +static int a6xx_gmu_build_bw_table(struct device *dev, unsigned long *bandwidths, + u32 size) +{ + int count = dev_pm_opp_get_opp_count(dev); + struct dev_pm_opp *opp; + int i, index = 0; + unsigned int bandwidth = 1; + + /* + * The OPP table doesn't contain the "off" bandwidth level so we need to + * add 1 to the table size to account for it + */ + + if (WARN(count + 1 > size, + "The GMU bandwidth table is being truncated\n")) + count = size - 1; + + /* Set the "off" bandwidth */ + bandwidths[index++] = 0; + + for (i = 0; i < count; i++) { + opp = dev_pm_opp_find_bw_ceil(dev, &bandwidth, 0); + if (IS_ERR(opp)) + break; + + dev_pm_opp_put(opp); + bandwidths[index++] = bandwidth++; + } + + return index; +} + static int a6xx_gmu_pwrlevels_probe(struct a6xx_gmu *gmu) { struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + const struct a6xx_info *info = adreno_gpu->info->a6xx; struct msm_gpu *gpu = &adreno_gpu->base; int ret = 0; @@ -1472,10 +1651,87 @@ static int a6xx_gmu_pwrlevels_probe(struct a6xx_gmu *gmu) gmu->current_perf_index = gmu->nr_gpu_freqs - 1; + /* + * The GMU also handles GPU Interconnect Votes so build a list + * of DDR bandwidths from the GPU OPP table + */ + if (info->bcms) + gmu->nr_gpu_bws = a6xx_gmu_build_bw_table(&gpu->pdev->dev, + gmu->gpu_bw_table, ARRAY_SIZE(gmu->gpu_bw_table)); + /* Build the list of RPMh votes that we'll send to the GMU */ return a6xx_gmu_rpmh_votes_init(gmu); } +static int a6xx_gmu_acd_probe(struct a6xx_gmu *gmu) +{ + struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); + struct a6xx_hfi_acd_table *cmd = &gmu->acd_table; + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + struct msm_gpu *gpu = &adreno_gpu->base; + int ret, i, cmd_idx = 0; + extern bool disable_acd; + + /* Skip ACD probe if requested via module param */ + if (disable_acd) { + DRM_DEV_ERROR(gmu->dev, "Skipping GPU ACD probe\n"); + return 0; + } + + cmd->version = 1; + cmd->stride = 1; + cmd->enable_by_level = 0; + + /* Skip freq = 0 and parse acd-level for rest of the OPPs */ + for (i = 1; i < gmu->nr_gpu_freqs; i++) { + struct dev_pm_opp *opp; + struct device_node *np; + unsigned long freq; + u32 val; + + freq = gmu->gpu_freqs[i]; + opp = dev_pm_opp_find_freq_exact(&gpu->pdev->dev, freq, true); + np = dev_pm_opp_get_of_node(opp); + + ret = of_property_read_u32(np, "qcom,opp-acd-level", &val); + of_node_put(np); + dev_pm_opp_put(opp); + if (ret == -EINVAL) + continue; + else if (ret) { + DRM_DEV_ERROR(gmu->dev, "Unable to read acd level for freq %lu\n", freq); + return ret; + } + + cmd->enable_by_level |= BIT(i); + cmd->data[cmd_idx++] = val; + } + + cmd->num_levels = cmd_idx; + + /* It is a problem if qmp node is unavailable when ACD is required */ + if (cmd->enable_by_level && IS_ERR_OR_NULL(gmu->qmp)) { + DRM_DEV_ERROR(gmu->dev, "Unable to send ACD state to AOSS\n"); + return -EINVAL; + } + + /* Otherwise, nothing to do if qmp is unavailable */ + if (IS_ERR_OR_NULL(gmu->qmp)) + return 0; + + /* + * Notify AOSS about the ACD state. AOSS is supposed to assume that ACD is disabled on + * system reset. So it is harmless if we couldn't notify 'OFF' state + */ + ret = qmp_send(gmu->qmp, "{class: gpu, res: acd, val: %d}", !!cmd->enable_by_level); + if (ret && cmd->enable_by_level) { + DRM_DEV_ERROR(gmu->dev, "Failed to send ACD state to AOSS\n"); + return ret; + } + + return 0; +} + static int a6xx_gmu_clocks_probe(struct a6xx_gmu *gmu) { int ret = devm_clk_bulk_get_all(gmu->dev, &gmu->clocks); @@ -1522,15 +1778,13 @@ static int a6xx_gmu_get_irq(struct a6xx_gmu *gmu, struct platform_device *pdev, irq = platform_get_irq_byname(pdev, name); - ret = request_irq(irq, handler, IRQF_TRIGGER_HIGH, name, gmu); + ret = request_irq(irq, handler, IRQF_TRIGGER_HIGH | IRQF_NO_AUTOEN, name, gmu); if (ret) { DRM_DEV_ERROR(&pdev->dev, "Unable to get interrupt %s %d\n", name, ret); return ret; } - disable_irq(irq); - return irq; } @@ -1605,7 +1859,9 @@ int a6xx_gmu_wrapper_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) gmu->dev = &pdev->dev; - of_dma_configure(gmu->dev, node, true); + ret = of_dma_configure(gmu->dev, node, true); + if (ret) + return ret; pm_runtime_enable(gmu->dev); @@ -1670,7 +1926,9 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) gmu->dev = &pdev->dev; - of_dma_configure(gmu->dev, node, true); + ret = of_dma_configure(gmu->dev, node, true); + if (ret) + return ret; /* Fow now, don't do anything fancy until we get our feet under us */ gmu->idle_level = GMU_IDLE_STATE_ACTIVE; @@ -1792,10 +2050,11 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) goto detach_cxpd; } + /* Other errors are handled during GPU ACD probe */ gmu->qmp = qmp_get(gmu->dev); - if (IS_ERR(gmu->qmp) && adreno_is_a7xx(adreno_gpu)) { - ret = PTR_ERR(gmu->qmp); - goto remove_device_link; + if (PTR_ERR_OR_ZERO(gmu->qmp) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto detach_gxpd; } init_completion(&gmu->pd_gate); @@ -1811,6 +2070,10 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) /* Get the power levels for the GMU and GPU */ a6xx_gmu_pwrlevels_probe(gmu); + ret = a6xx_gmu_acd_probe(gmu); + if (ret) + goto detach_gxpd; + /* Set up the HFI queues */ a6xx_hfi_init(gmu); @@ -1821,7 +2084,13 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) return 0; -remove_device_link: +detach_gxpd: + if (!IS_ERR_OR_NULL(gmu->gxpd)) + dev_pm_domain_detach(gmu->gxpd, false); + + if (!IS_ERR_OR_NULL(gmu->qmp)) + qmp_put(gmu->qmp); + device_link_del(link); detach_cxpd: diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h index 94b6c5cab6f4..b2d4489b4024 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h @@ -19,6 +19,18 @@ struct a6xx_gmu_bo { u64 iova; }; +#define GMU_MAX_GX_FREQS 16 +#define GMU_MAX_CX_FREQS 4 +#define GMU_MAX_BCMS 3 + +struct a6xx_bcm { + char *name; + unsigned int buswidth; + bool fixed; + unsigned int perfmode; + unsigned int perfmode_bw; +}; + /* * These define the different GMU wake up options - these define how both the * CPU and the GMU bring up the hardware @@ -79,12 +91,17 @@ struct a6xx_gmu { int current_perf_index; int nr_gpu_freqs; - unsigned long gpu_freqs[16]; - u32 gx_arc_votes[16]; + unsigned long gpu_freqs[GMU_MAX_GX_FREQS]; + u32 gx_arc_votes[GMU_MAX_GX_FREQS]; + struct a6xx_hfi_acd_table acd_table; + + int nr_gpu_bws; + unsigned long gpu_bw_table[GMU_MAX_GX_FREQS]; + u32 gpu_ib_votes[GMU_MAX_GX_FREQS][GMU_MAX_BCMS]; int nr_gmu_freqs; - unsigned long gmu_freqs[4]; - u32 cx_arc_votes[4]; + unsigned long gmu_freqs[GMU_MAX_CX_FREQS]; + u32 cx_arc_votes[GMU_MAX_CX_FREQS]; unsigned long freq; @@ -99,6 +116,7 @@ struct a6xx_gmu { struct completion pd_gate; struct qmp *qmp; + struct a6xx_hfi_msg_bw_table *bw_table; }; static inline u32 gmu_read(struct a6xx_gmu *gmu, u32 offset) @@ -192,7 +210,7 @@ void a6xx_hfi_init(struct a6xx_gmu *gmu); int a6xx_hfi_start(struct a6xx_gmu *gmu, int boot_state); void a6xx_hfi_stop(struct a6xx_gmu *gmu); int a6xx_hfi_send_prep_slumber(struct a6xx_gmu *gmu); -int a6xx_hfi_set_freq(struct a6xx_gmu *gmu, int index); +int a6xx_hfi_set_freq(struct a6xx_gmu *gmu, u32 perf_index, u32 bw_index); bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu); bool a6xx_gmu_sptprac_is_on(struct a6xx_gmu *gmu); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 702b8d4b3497..491fde0083a2 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -68,6 +68,8 @@ static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); uint32_t wptr; unsigned long flags; @@ -81,12 +83,17 @@ static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) /* Make sure to wrap wptr if we need to */ wptr = get_wptr(ring); - spin_unlock_irqrestore(&ring->preempt_lock, flags); - - /* Make sure everything is posted before making a decision */ - mb(); + /* Update HW if this is the current ring and we are not in preempt*/ + if (!a6xx_in_preempt(a6xx_gpu)) { + if (a6xx_gpu->cur_ring == ring) + gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); + else + ring->restore_wptr = true; + } else { + ring->restore_wptr = true; + } - gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); + spin_unlock_irqrestore(&ring->preempt_lock, flags); } static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter, @@ -110,7 +117,7 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, u32 asid; u64 memptr = rbmemptr(ring, ttbr0); - if (ctx->seqno == a6xx_gpu->base.base.cur_ctx_seqno) + if (ctx->seqno == ring->cur_ctx_seqno) return; if (msm_iommu_pagetable_params(ctx->aspace->mmu, &ttbr, &asid)) @@ -123,6 +130,20 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); OUT_RING(ring, submit->seqno - 1); + + OUT_PKT7(ring, CP_THREAD_CONTROL, 1); + OUT_RING(ring, CP_SET_THREAD_BOTH); + + /* Reset state used to synchronize BR and BV */ + OUT_PKT7(ring, CP_RESET_CONTEXT_STATE, 1); + OUT_RING(ring, + CP_RESET_CONTEXT_STATE_0_CLEAR_ON_CHIP_TS | + CP_RESET_CONTEXT_STATE_0_CLEAR_RESOURCE_TABLE | + CP_RESET_CONTEXT_STATE_0_CLEAR_BV_BR_COUNTER | + CP_RESET_CONTEXT_STATE_0_RESET_GLOBAL_LOCAL_TS); + + OUT_PKT7(ring, CP_THREAD_CONTROL, 1); + OUT_RING(ring, CP_SET_THREAD_BR); } if (!sysprof) { @@ -148,12 +169,14 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, /* * Write the new TTBR0 to the memstore. This is good for debugging. + * Needed for preemption */ - OUT_PKT7(ring, CP_MEM_WRITE, 4); + OUT_PKT7(ring, CP_MEM_WRITE, 5); OUT_RING(ring, CP_MEM_WRITE_0_ADDR_LO(lower_32_bits(memptr))); OUT_RING(ring, CP_MEM_WRITE_1_ADDR_HI(upper_32_bits(memptr))); OUT_RING(ring, lower_32_bits(ttbr)); - OUT_RING(ring, (asid << 16) | upper_32_bits(ttbr)); + OUT_RING(ring, upper_32_bits(ttbr)); + OUT_RING(ring, ctx->seqno); /* * Sync both threads after switching pagetables and enable BR only @@ -203,6 +226,8 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) struct msm_ringbuffer *ring = submit->ring; unsigned int i, ibs = 0; + adreno_check_and_reenable_stall(adreno_gpu); + a6xx_set_pagetable(a6xx_gpu, ring, submit); get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0), @@ -229,14 +254,14 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) case MSM_SUBMIT_CMD_IB_TARGET_BUF: break; case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: - if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno) + if (ring->cur_ctx_seqno == submit->queue->ctx->seqno) break; fallthrough; case MSM_SUBMIT_CMD_BUF: - OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); + OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); - OUT_RING(ring, submit->cmd[i].size); + OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); ibs++; break; } @@ -278,6 +303,46 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) a6xx_flush(gpu, ring); } +static void a6xx_emit_set_pseudo_reg(struct msm_ringbuffer *ring, + struct a6xx_gpu *a6xx_gpu, struct msm_gpu_submitqueue *queue) +{ + u64 preempt_postamble; + + OUT_PKT7(ring, CP_SET_PSEUDO_REG, 12); + + OUT_RING(ring, SMMU_INFO); + /* don't save SMMU, we write the record from the kernel instead */ + OUT_RING(ring, 0); + OUT_RING(ring, 0); + + /* privileged and non secure buffer save */ + OUT_RING(ring, NON_SECURE_SAVE_ADDR); + OUT_RING(ring, lower_32_bits( + a6xx_gpu->preempt_iova[ring->id])); + OUT_RING(ring, upper_32_bits( + a6xx_gpu->preempt_iova[ring->id])); + + /* user context buffer save, seems to be unnused by fw */ + OUT_RING(ring, NON_PRIV_SAVE_ADDR); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + + OUT_RING(ring, COUNTER); + /* seems OK to set to 0 to disable it */ + OUT_RING(ring, 0); + OUT_RING(ring, 0); + + /* Emit postamble to clear perfcounters */ + preempt_postamble = a6xx_gpu->preempt_postamble_iova; + + OUT_PKT7(ring, CP_SET_AMBLE, 3); + OUT_RING(ring, lower_32_bits(preempt_postamble)); + OUT_RING(ring, upper_32_bits(preempt_postamble)); + OUT_RING(ring, CP_SET_AMBLE_2_DWORDS( + a6xx_gpu->preempt_postamble_len) | + CP_SET_AMBLE_2_TYPE(KMD_AMBLE_TYPE)); +} + static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) { unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT; @@ -286,6 +351,8 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) struct msm_ringbuffer *ring = submit->ring; unsigned int i, ibs = 0; + adreno_check_and_reenable_stall(adreno_gpu); + /* * Toggle concurrent binning for pagetable switch and set the thread to * BR since only it can execute the pagetable switch packets. @@ -295,6 +362,13 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) a6xx_set_pagetable(a6xx_gpu, ring, submit); + /* + * If preemption is enabled, then set the pseudo register for the save + * sequence + */ + if (gpu->nr_rings > 1) + a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, submit->queue); + get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0), rbmemptr_stats(ring, index, cpcycles_start)); get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER, @@ -306,8 +380,10 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) OUT_PKT7(ring, CP_SET_MARKER, 1); OUT_RING(ring, 0x101); /* IFPC disable */ - OUT_PKT7(ring, CP_SET_MARKER, 1); - OUT_RING(ring, 0x00d); /* IB1LIST start */ + if (submit->queue->flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT) { + OUT_PKT7(ring, CP_SET_MARKER, 1); + OUT_RING(ring, 0x00d); /* IB1LIST start */ + } /* Submit the commands */ for (i = 0; i < submit->nr_cmds; i++) { @@ -315,14 +391,14 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) case MSM_SUBMIT_CMD_IB_TARGET_BUF: break; case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: - if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno) + if (ring->cur_ctx_seqno == submit->queue->ctx->seqno) break; fallthrough; case MSM_SUBMIT_CMD_BUF: - OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); + OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); - OUT_RING(ring, submit->cmd[i].size); + OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); ibs++; break; } @@ -338,8 +414,10 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) update_shadow_rptr(gpu, ring); } - OUT_PKT7(ring, CP_SET_MARKER, 1); - OUT_RING(ring, 0x00e); /* IB1LIST end */ + if (submit->queue->flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT) { + OUT_PKT7(ring, CP_SET_MARKER, 1); + OUT_RING(ring, 0x00e); /* IB1LIST end */ + } get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0), rbmemptr_stats(ring, index, cpcycles_end)); @@ -386,6 +464,8 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence))); OUT_RING(ring, submit->seqno); + a6xx_gpu->last_seqno[ring->id] = submit->seqno; + /* write the ringbuffer timestamp */ OUT_PKT7(ring, CP_EVENT_WRITE, 4); OUT_RING(ring, CACHE_CLEAN | CP_EVENT_WRITE_0_IRQ | BIT(27)); @@ -399,10 +479,32 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) OUT_PKT7(ring, CP_SET_MARKER, 1); OUT_RING(ring, 0x100); /* IFPC enable */ + /* If preemption is enabled */ + if (gpu->nr_rings > 1) { + /* Yield the floor on command completion */ + OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); + + /* + * If dword[2:1] are non zero, they specify an address for + * the CP to write the value of dword[3] to on preemption + * complete. Write 0 to skip the write + */ + OUT_RING(ring, 0x00); + OUT_RING(ring, 0x00); + /* Data value - not used if the address above is 0 */ + OUT_RING(ring, 0x01); + /* generate interrupt on preemption completion */ + OUT_RING(ring, 0x00); + } + + trace_msm_gpu_submit_flush(submit, gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER)); a6xx_flush(gpu, ring); + + /* Check to see if we need to start preemption */ + a6xx_preempt_trigger(gpu); } static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state) @@ -532,6 +634,14 @@ static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu) gpu->ubwc_config.uavflagprd_inv = 2; } + if (adreno_is_a623(gpu)) { + gpu->ubwc_config.highest_bank_bit = 16; + gpu->ubwc_config.amsbc = 1; + gpu->ubwc_config.rgb565_predicator = 1; + gpu->ubwc_config.uavflagprd_inv = 2; + gpu->ubwc_config.macrotile_mode = 1; + } + if (adreno_is_a640_family(gpu)) gpu->ubwc_config.amsbc = 1; @@ -551,10 +661,18 @@ static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu) gpu->ubwc_config.macrotile_mode = 1; } + if (adreno_is_a663(gpu)) { + gpu->ubwc_config.highest_bank_bit = 13; + gpu->ubwc_config.amsbc = 1; + gpu->ubwc_config.rgb565_predicator = 1; + gpu->ubwc_config.uavflagprd_inv = 2; + gpu->ubwc_config.macrotile_mode = 1; + gpu->ubwc_config.ubwc_swizzle = 0x4; + } + if (adreno_is_7c3(gpu)) { gpu->ubwc_config.highest_bank_bit = 14; gpu->ubwc_config.amsbc = 1; - gpu->ubwc_config.rgb565_predicator = 1; gpu->ubwc_config.uavflagprd_inv = 2; gpu->ubwc_config.macrotile_mode = 1; } @@ -609,6 +727,77 @@ static void a6xx_set_ubwc_config(struct msm_gpu *gpu) adreno_gpu->ubwc_config.macrotile_mode); } +static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + const struct adreno_reglist_list *reglist; + void *ptr = a6xx_gpu->pwrup_reglist_ptr; + struct cpu_gpu_lock *lock = ptr; + u32 *dest = (u32 *)&lock->regs[0]; + int i; + + reglist = adreno_gpu->info->a6xx->pwrup_reglist; + + lock->gpu_req = lock->cpu_req = lock->turn = 0; + lock->ifpc_list_len = 0; + lock->preemption_list_len = reglist->count; + + /* + * For each entry in each of the lists, write the offset and the current + * register value into the GPU buffer + */ + for (i = 0; i < reglist->count; i++) { + *dest++ = reglist->regs[i]; + *dest++ = gpu_read(gpu, reglist->regs[i]); + } + + /* + * The overall register list is composed of + * 1. Static IFPC-only registers + * 2. Static IFPC + preemption registers + * 3. Dynamic IFPC + preemption registers (ex: perfcounter selects) + * + * The first two lists are static. Size of these lists are stored as + * number of pairs in ifpc_list_len and preemption_list_len + * respectively. With concurrent binning, Some of the perfcounter + * registers being virtualized, CP needs to know the pipe id to program + * the aperture inorder to restore the same. Thus, third list is a + * dynamic list with triplets as + * (<aperture, shifted 12 bits> <address> <data>), and the length is + * stored as number for triplets in dynamic_list_len. + */ + lock->dynamic_list_len = 0; +} + +static int a7xx_preempt_start(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + struct msm_ringbuffer *ring = gpu->rb[0]; + + if (gpu->nr_rings <= 1) + return 0; + + /* Turn CP protection off */ + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); + OUT_RING(ring, 0); + + a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, NULL); + + /* Yield the floor on command completion */ + OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); + OUT_RING(ring, 0x00); + OUT_RING(ring, 0x00); + OUT_RING(ring, 0x00); + /* Generate interrupt on preemption completion */ + OUT_RING(ring, 0x00); + + a6xx_flush(gpu, ring); + + return a6xx_idle(gpu, ring) ? 0 : -EINVAL; +} + static int a6xx_cp_init(struct msm_gpu *gpu) { struct msm_ringbuffer *ring = gpu->rb[0]; @@ -640,6 +829,8 @@ static int a6xx_cp_init(struct msm_gpu *gpu) static int a7xx_cp_init(struct msm_gpu *gpu) { + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); struct msm_ringbuffer *ring = gpu->rb[0]; u32 mask; @@ -677,11 +868,11 @@ static int a7xx_cp_init(struct msm_gpu *gpu) /* *Don't* send a power up reg list for concurrent binning (TODO) */ /* Lo address */ - OUT_RING(ring, 0x00000000); + OUT_RING(ring, lower_32_bits(a6xx_gpu->pwrup_reglist_iova)); /* Hi address */ - OUT_RING(ring, 0x00000000); + OUT_RING(ring, upper_32_bits(a6xx_gpu->pwrup_reglist_iova)); /* BIT(31) set => read the regs from the list */ - OUT_RING(ring, 0x00000000); + OUT_RING(ring, BIT(31)); a6xx_flush(gpu, ring); return a6xx_idle(gpu, ring) ? 0 : -EINVAL; @@ -805,6 +996,16 @@ static int a6xx_ucode_load(struct msm_gpu *gpu) msm_gem_object_set_name(a6xx_gpu->shadow_bo, "shadow"); } + a6xx_gpu->pwrup_reglist_ptr = msm_gem_kernel_new(gpu->dev, PAGE_SIZE, + MSM_BO_WC | MSM_BO_MAP_PRIV, + gpu->aspace, &a6xx_gpu->pwrup_reglist_bo, + &a6xx_gpu->pwrup_reglist_iova); + + if (IS_ERR(a6xx_gpu->pwrup_reglist_ptr)) + return PTR_ERR(a6xx_gpu->pwrup_reglist_ptr); + + msm_gem_object_set_name(a6xx_gpu->pwrup_reglist_bo, "pwrup_reglist"); + return 0; } @@ -864,6 +1065,7 @@ static int hw_init(struct msm_gpu *gpu) struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); struct a6xx_gmu *gmu = &a6xx_gpu->gmu; u64 gmem_range_min; + unsigned int i; int ret; if (!adreno_has_gmu_wrapper(adreno_gpu)) { @@ -946,12 +1148,12 @@ static int hw_init(struct msm_gpu *gpu) /* Disable L2 bypass in the UCHE */ if (adreno_is_a7xx(adreno_gpu)) { - gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, 0x0001fffffffff000llu); - gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, 0x0001fffffffff000llu); + gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); + gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); } else { - gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, 0x0001ffffffffffc0llu); - gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, 0x0001fffffffff000llu); - gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, 0x0001fffffffff000llu); + gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, adreno_gpu->uche_trap_base + 0xfc0); + gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); + gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); } if (!(adreno_is_a650_family(adreno_gpu) || @@ -1072,7 +1274,7 @@ static int hw_init(struct msm_gpu *gpu) if (adreno_is_a690(adreno_gpu)) gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x90); /* Set dualQ + disable afull for A660 GPU */ - else if (adreno_is_a660(adreno_gpu)) + else if (adreno_is_a660(adreno_gpu) || adreno_is_a663(adreno_gpu)) gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906); else if (adreno_is_a7xx(adreno_gpu)) gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, @@ -1134,22 +1336,32 @@ static int hw_init(struct msm_gpu *gpu) if (a6xx_gpu->shadow_bo) { gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR, shadowptr(a6xx_gpu, gpu->rb[0])); + for (unsigned int i = 0; i < gpu->nr_rings; i++) + a6xx_gpu->shadow[i] = 0; } /* ..which means "always" on A7xx, also for BV shadow */ if (adreno_is_a7xx(adreno_gpu)) { gpu_write64(gpu, REG_A7XX_CP_BV_RB_RPTR_ADDR, - rbmemptr(gpu->rb[0], bv_fence)); + rbmemptr(gpu->rb[0], bv_rptr)); } + a6xx_preempt_hw_init(gpu); + /* Always come up on rb 0 */ a6xx_gpu->cur_ring = gpu->rb[0]; - gpu->cur_ctx_seqno = 0; + for (i = 0; i < gpu->nr_rings; i++) + gpu->rb[i]->cur_ctx_seqno = 0; /* Enable the SQE_to start the CP engine */ gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1); + if (adreno_is_a7xx(adreno_gpu) && !a6xx_gpu->pwrup_reglist_emitted) { + a7xx_patch_pwrup_reglist(gpu); + a6xx_gpu->pwrup_reglist_emitted = true; + } + ret = adreno_is_a7xx(adreno_gpu) ? a7xx_cp_init(gpu) : a6xx_cp_init(gpu); if (ret) goto out; @@ -1187,6 +1399,10 @@ static int hw_init(struct msm_gpu *gpu) out: if (adreno_has_gmu_wrapper(adreno_gpu)) return ret; + + /* Last step - yield the ringbuffer */ + a7xx_preempt_start(gpu); + /* * Tell the GMU that we are done touching the GPU and it can start power * management @@ -1507,7 +1723,7 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE)); /* Turn off the hangcheck timer to keep it from bothering us */ - del_timer(&gpu->hangcheck_timer); + timer_delete(&gpu->hangcheck_timer); kthread_queue_work(gpu->worker, &gpu->recover_work); } @@ -1527,7 +1743,7 @@ static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) */ if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { - del_timer(&gpu->hangcheck_timer); + timer_delete(&gpu->hangcheck_timer); kthread_queue_work(gpu->worker, &gpu->recover_work); } @@ -1564,8 +1780,13 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu) if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) a7xx_sw_fuse_violation_irq(gpu); - if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) + if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) { msm_gpu_retire(gpu); + a6xx_preempt_trigger(gpu); + } + + if (status & A6XX_RBBM_INT_0_MASK_CP_SW) + a6xx_preempt_irq(gpu); return IRQ_HANDLED; } @@ -2064,7 +2285,7 @@ a6xx_create_private_address_space(struct msm_gpu *gpu) return ERR_CAST(mmu); return msm_gem_address_space_create(mmu, - "gpu", 0x100000000ULL, + "gpu", ADRENO_VM_START, adreno_private_address_space_size(gpu)); } @@ -2259,6 +2480,7 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) struct a6xx_gpu *a6xx_gpu; struct adreno_gpu *adreno_gpu; struct msm_gpu *gpu; + extern int enable_preemption; bool is_a7xx; int ret; @@ -2297,7 +2519,10 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) return ERR_PTR(ret); } - if (is_a7xx) + if ((enable_preemption == 1) || (enable_preemption == -1 && + (config->info->quirks & ADRENO_QUIRK_PREEMPTION))) + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_a7xx, 4); + else if (is_a7xx) ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_a7xx, 1); else if (adreno_has_gmu_wrapper(adreno_gpu)) ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_gmuwrapper, 1); @@ -2333,11 +2558,15 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) } } + adreno_gpu->uche_trap_base = 0x1fffffffff000ull; + if (gpu->aspace) msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a6xx_fault_handler); a6xx_calc_ubwc_config(adreno_gpu); + /* Set up the preemption specific bits and pieces for each ringbuffer */ + a6xx_preempt_init(gpu); return gpu; } diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h index 0fb7febf70e7..9201a53dd341 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h @@ -12,18 +12,39 @@ extern bool hang_debug; +struct cpu_gpu_lock { + uint32_t gpu_req; + uint32_t cpu_req; + uint32_t turn; + union { + struct { + uint16_t list_length; + uint16_t list_offset; + }; + struct { + uint8_t ifpc_list_len; + uint8_t preemption_list_len; + uint16_t dynamic_list_len; + }; + }; + uint64_t regs[62]; +}; + /** * struct a6xx_info - a6xx specific information from device table * * @hwcg: hw clock gating register sequence * @protect: CP_PROTECT settings + * @pwrup_reglist pwrup reglist for preemption */ struct a6xx_info { const struct adreno_reglist *hwcg; const struct adreno_protect *protect; + const struct adreno_reglist_list *pwrup_reglist; u32 gmu_chipid; u32 gmu_cgc_mode; u32 prim_fifo_threshold; + const struct a6xx_bcm *bcms; }; struct a6xx_gpu { @@ -33,6 +54,29 @@ struct a6xx_gpu { uint64_t sqe_iova; struct msm_ringbuffer *cur_ring; + struct msm_ringbuffer *next_ring; + + struct drm_gem_object *preempt_bo[MSM_GPU_MAX_RINGS]; + void *preempt[MSM_GPU_MAX_RINGS]; + uint64_t preempt_iova[MSM_GPU_MAX_RINGS]; + struct drm_gem_object *preempt_smmu_bo[MSM_GPU_MAX_RINGS]; + void *preempt_smmu[MSM_GPU_MAX_RINGS]; + uint64_t preempt_smmu_iova[MSM_GPU_MAX_RINGS]; + uint32_t last_seqno[MSM_GPU_MAX_RINGS]; + + atomic_t preempt_state; + spinlock_t eval_lock; + struct timer_list preempt_timer; + + unsigned int preempt_level; + bool uses_gmem; + bool skip_save_restore; + + struct drm_gem_object *preempt_postamble_bo; + void *preempt_postamble_ptr; + uint64_t preempt_postamble_iova; + uint64_t preempt_postamble_len; + bool postamble_enabled; struct a6xx_gmu gmu; @@ -40,6 +84,11 @@ struct a6xx_gpu { uint64_t shadow_iova; uint32_t *shadow; + struct drm_gem_object *pwrup_reglist_bo; + void *pwrup_reglist_ptr; + uint64_t pwrup_reglist_iova; + bool pwrup_reglist_emitted; + bool has_whereami; void __iomem *llc_mmio; @@ -52,6 +101,100 @@ struct a6xx_gpu { #define to_a6xx_gpu(x) container_of(x, struct a6xx_gpu, base) /* + * In order to do lockless preemption we use a simple state machine to progress + * through the process. + * + * PREEMPT_NONE - no preemption in progress. Next state START. + * PREEMPT_START - The trigger is evaluating if preemption is possible. Next + * states: TRIGGERED, NONE + * PREEMPT_FINISH - An intermediate state before moving back to NONE. Next + * state: NONE. + * PREEMPT_TRIGGERED: A preemption has been executed on the hardware. Next + * states: FAULTED, PENDING + * PREEMPT_FAULTED: A preemption timed out (never completed). This will trigger + * recovery. Next state: N/A + * PREEMPT_PENDING: Preemption complete interrupt fired - the callback is + * checking the success of the operation. Next state: FAULTED, NONE. + */ + +enum a6xx_preempt_state { + PREEMPT_NONE = 0, + PREEMPT_START, + PREEMPT_FINISH, + PREEMPT_TRIGGERED, + PREEMPT_FAULTED, + PREEMPT_PENDING, +}; + +/* + * struct a6xx_preempt_record is a shared buffer between the microcode and the + * CPU to store the state for preemption. The record itself is much larger + * (2112k) but most of that is used by the CP for storage. + * + * There is a preemption record assigned per ringbuffer. When the CPU triggers a + * preemption, it fills out the record with the useful information (wptr, ring + * base, etc) and the microcode uses that information to set up the CP following + * the preemption. When a ring is switched out, the CP will save the ringbuffer + * state back to the record. In this way, once the records are properly set up + * the CPU can quickly switch back and forth between ringbuffers by only + * updating a few registers (often only the wptr). + * + * These are the CPU aware registers in the record: + * @magic: Must always be 0xAE399D6EUL + * @info: Type of the record - written 0 by the CPU, updated by the CP + * @errno: preemption error record + * @data: Data field in YIELD and SET_MARKER packets, Written and used by CP + * @cntl: Value of RB_CNTL written by CPU, save/restored by CP + * @rptr: Value of RB_RPTR written by CPU, save/restored by CP + * @wptr: Value of RB_WPTR written by CPU, save/restored by CP + * @_pad: Reserved/padding + * @rptr_addr: Value of RB_RPTR_ADDR_LO|HI written by CPU, save/restored by CP + * @rbase: Value of RB_BASE written by CPU, save/restored by CP + * @counter: GPU address of the storage area for the preemption counters + * @bv_rptr_addr: Value of BV_RB_RPTR_ADDR_LO|HI written by CPU, save/restored by CP + */ +struct a6xx_preempt_record { + u32 magic; + u32 info; + u32 errno; + u32 data; + u32 cntl; + u32 rptr; + u32 wptr; + u32 _pad; + u64 rptr_addr; + u64 rbase; + u64 counter; + u64 bv_rptr_addr; +}; + +#define A6XX_PREEMPT_RECORD_MAGIC 0xAE399D6EUL + +#define PREEMPT_SMMU_INFO_SIZE 4096 + +#define PREEMPT_RECORD_SIZE(adreno_gpu) \ + ((adreno_gpu->info->preempt_record_size) == 0 ? \ + 4192 * SZ_1K : (adreno_gpu->info->preempt_record_size)) + +/* + * The preemption counter block is a storage area for the value of the + * preemption counters that are saved immediately before context switch. We + * append it on to the end of the allocation for the preemption record. + */ +#define A6XX_PREEMPT_COUNTER_SIZE (16 * 4) + +struct a7xx_cp_smmu_info { + u32 magic; + u32 _pad4; + u64 ttbr0; + u32 asid; + u32 context_idr; + u32 context_bank; +}; + +#define GEN7_CP_SMMU_INFO_MAGIC 0x241350d5UL + +/* * Given a register and a count, return a value to program into * REG_CP_PROTECT_REG(n) - this will block both reads and writes for * _len + 1 registers starting at _reg. @@ -108,6 +251,34 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node); int a6xx_gmu_wrapper_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node); void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu); +void a6xx_preempt_init(struct msm_gpu *gpu); +void a6xx_preempt_hw_init(struct msm_gpu *gpu); +void a6xx_preempt_trigger(struct msm_gpu *gpu); +void a6xx_preempt_irq(struct msm_gpu *gpu); +void a6xx_preempt_fini(struct msm_gpu *gpu); +int a6xx_preempt_submitqueue_setup(struct msm_gpu *gpu, + struct msm_gpu_submitqueue *queue); +void a6xx_preempt_submitqueue_close(struct msm_gpu *gpu, + struct msm_gpu_submitqueue *queue); + +/* Return true if we are in a preempt state */ +static inline bool a6xx_in_preempt(struct a6xx_gpu *a6xx_gpu) +{ + /* + * Make sure the read to preempt_state is ordered with respect to reads + * of other variables before ... + */ + smp_rmb(); + + int preempt_state = atomic_read(&a6xx_gpu->preempt_state); + + /* ... and after. */ + smp_rmb(); + + return !(preempt_state == PREEMPT_NONE || + preempt_state == PREEMPT_FINISH); +} + void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp, bool suspended); unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index 0fcae53c0b14..341a72a67401 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -1214,12 +1214,12 @@ static void a6xx_get_gmu_registers(struct msm_gpu *gpu, struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); a6xx_state->gmu_registers = state_kcalloc(a6xx_state, - 3, sizeof(*a6xx_state->gmu_registers)); + 4, sizeof(*a6xx_state->gmu_registers)); if (!a6xx_state->gmu_registers) return; - a6xx_state->nr_gmu_registers = 3; + a6xx_state->nr_gmu_registers = 4; /* Get the CX GMU registers from AHB */ _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0], @@ -1227,6 +1227,13 @@ static void a6xx_get_gmu_registers(struct msm_gpu *gpu, _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1], &a6xx_state->gmu_registers[1], true); + if (adreno_is_a621(adreno_gpu) || adreno_is_a623(adreno_gpu)) + _a6xx_get_gmu_registers(gpu, a6xx_state, &a621_gpucc_reg, + &a6xx_state->gmu_registers[2], false); + else + _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gpucc_reg, + &a6xx_state->gmu_registers[2], false); + if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) return; @@ -1234,7 +1241,7 @@ static void a6xx_get_gmu_registers(struct msm_gpu *gpu, gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0); _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2], - &a6xx_state->gmu_registers[2], false); + &a6xx_state->gmu_registers[3], false); } static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo( @@ -1507,6 +1514,8 @@ static void a6xx_get_indexed_registers(struct msm_gpu *gpu, /* Restore the size in the hardware */ gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size); + + a6xx_state->nr_indexed_regs = count; } static void a7xx_get_indexed_registers(struct msm_gpu *gpu, diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h index dd4c28a8d923..e545106c70be 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h @@ -363,6 +363,9 @@ static const u32 a6xx_gmu_cx_registers[] = { 0x51e0, 0x51e2, 0x51f0, 0x51f0, 0x5200, 0x5201, /* GMU AO */ 0x9300, 0x9316, 0x9400, 0x9400, +}; + +static const u32 a6xx_gmu_gpucc_registers[] = { /* GPU CC */ 0x9800, 0x9812, 0x9840, 0x9852, 0x9c00, 0x9c04, 0x9c07, 0x9c0b, 0x9c15, 0x9c1c, 0x9c1e, 0x9c2d, 0x9c3c, 0x9c3d, 0x9c3f, 0x9c40, @@ -373,6 +376,17 @@ static const u32 a6xx_gmu_cx_registers[] = { 0xbc00, 0xbc16, 0xbc20, 0xbc27, }; +static const u32 a621_gmu_gpucc_registers[] = { + /* GPU CC */ + 0x9800, 0x980e, 0x9c00, 0x9c0e, 0xb000, 0xb004, 0xb400, 0xb404, + 0xb800, 0xb804, 0xbc00, 0xbc05, 0xbc14, 0xbc1d, 0xbc2a, 0xbc30, + 0xbc32, 0xbc32, 0xbc41, 0xbc55, 0xbc66, 0xbc68, 0xbc78, 0xbc7a, + 0xbc89, 0xbc8a, 0xbc9c, 0xbc9e, 0xbca0, 0xbca3, 0xbcb3, 0xbcb5, + 0xbcc5, 0xbcc7, 0xbcd6, 0xbcd8, 0xbce8, 0xbce9, 0xbcf9, 0xbcfc, + 0xbd0b, 0xbd0c, 0xbd1c, 0xbd1e, 0xbd40, 0xbd70, 0xbe00, 0xbe16, + 0xbe20, 0xbe2d, +}; + static const u32 a6xx_gmu_cx_rscc_registers[] = { /* GPU RSCC */ 0x008c, 0x008c, 0x0101, 0x0102, 0x0340, 0x0342, 0x0344, 0x0347, @@ -386,6 +400,9 @@ static const struct a6xx_registers a6xx_gmu_reglist[] = { REGS(a6xx_gmu_gx_registers, 0, 0), }; +static const struct a6xx_registers a6xx_gpucc_reg = REGS(a6xx_gmu_gpucc_registers, 0, 0); +static const struct a6xx_registers a621_gpucc_reg = REGS(a621_gmu_gpucc_registers, 0, 0); + static u32 a6xx_get_cp_roq_size(struct msm_gpu *gpu); static u32 a7xx_get_cp_roq_size(struct msm_gpu *gpu); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c index cdb3f6e74d3e..8e69b1e84657 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c @@ -6,6 +6,7 @@ #include <linux/list.h> #include <soc/qcom/cmd-db.h> +#include <soc/qcom/tcs.h> #include "a6xx_gmu.h" #include "a6xx_gmu.xml.h" @@ -99,16 +100,14 @@ static int a6xx_hfi_queue_write(struct a6xx_gmu *gmu, return 0; } -static int a6xx_hfi_wait_for_ack(struct a6xx_gmu *gmu, u32 id, u32 seqnum, - u32 *payload, u32 payload_size) +static int a6xx_hfi_wait_for_msg_interrupt(struct a6xx_gmu *gmu, u32 id, u32 seqnum) { - struct a6xx_hfi_queue *queue = &gmu->queues[HFI_RESPONSE_QUEUE]; - u32 val; int ret; + u32 val; /* Wait for a response */ ret = gmu_poll_timeout(gmu, REG_A6XX_GMU_GMU2HOST_INTR_INFO, val, - val & A6XX_GMU_GMU2HOST_INTR_INFO_MSGQ, 100, 5000); + val & A6XX_GMU_GMU2HOST_INTR_INFO_MSGQ, 100, 1000000); if (ret) { DRM_DEV_ERROR(gmu->dev, @@ -121,6 +120,19 @@ static int a6xx_hfi_wait_for_ack(struct a6xx_gmu *gmu, u32 id, u32 seqnum, gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_CLR, A6XX_GMU_GMU2HOST_INTR_INFO_MSGQ); + return 0; +} + +static int a6xx_hfi_wait_for_ack(struct a6xx_gmu *gmu, u32 id, u32 seqnum, + u32 *payload, u32 payload_size) +{ + struct a6xx_hfi_queue *queue = &gmu->queues[HFI_RESPONSE_QUEUE]; + int ret; + + ret = a6xx_hfi_wait_for_msg_interrupt(gmu, id, seqnum); + if (ret) + return ret; + for (;;) { struct a6xx_hfi_msg_response resp; @@ -128,12 +140,18 @@ static int a6xx_hfi_wait_for_ack(struct a6xx_gmu *gmu, u32 id, u32 seqnum, ret = a6xx_hfi_queue_read(gmu, queue, (u32 *) &resp, sizeof(resp) >> 2); - /* If the queue is empty our response never made it */ + /* If the queue is empty, there may have been previous missed + * responses that preceded the response to our packet. Wait + * further before we give up. + */ if (!ret) { - DRM_DEV_ERROR(gmu->dev, - "The HFI response queue is unexpectedly empty\n"); - - return -ENOENT; + ret = a6xx_hfi_wait_for_msg_interrupt(gmu, id, seqnum); + if (ret) { + DRM_DEV_ERROR(gmu->dev, + "The HFI response queue is unexpectedly empty\n"); + return ret; + } + continue; } if (HFI_HEADER_ID(resp.header) == HFI_F2H_MSG_ERROR) { @@ -259,6 +277,48 @@ static int a6xx_hfi_send_perf_table(struct a6xx_gmu *gmu) NULL, 0); } +static void a6xx_generate_bw_table(const struct a6xx_info *info, struct a6xx_gmu *gmu, + struct a6xx_hfi_msg_bw_table *msg) +{ + unsigned int i, j; + + for (i = 0; i < GMU_MAX_BCMS; i++) { + if (!info->bcms[i].name) + break; + msg->ddr_cmds_addrs[i] = cmd_db_read_addr(info->bcms[i].name); + } + msg->ddr_cmds_num = i; + + for (i = 0; i < gmu->nr_gpu_bws; ++i) + for (j = 0; j < msg->ddr_cmds_num; j++) + msg->ddr_cmds_data[i][j] = gmu->gpu_ib_votes[i][j]; + msg->bw_level_num = gmu->nr_gpu_bws; + + /* Compute the wait bitmask with each BCM having the commit bit */ + msg->ddr_wait_bitmask = 0; + for (j = 0; j < msg->ddr_cmds_num; j++) + if (msg->ddr_cmds_data[0][j] & BCM_TCS_CMD_COMMIT_MASK) + msg->ddr_wait_bitmask |= BIT(j); + + /* + * These are the CX (CNOC) votes - these are used by the GMU + * The 'CN0' BCM is used on all targets, and votes are basically + * 'off' and 'on' states with first bit to enable the path. + */ + + msg->cnoc_cmds_addrs[0] = cmd_db_read_addr("CN0"); + msg->cnoc_cmds_num = 1; + + msg->cnoc_cmds_data[0][0] = BCM_TCS_CMD(true, false, 0, 0); + msg->cnoc_cmds_data[1][0] = BCM_TCS_CMD(true, true, 0, BIT(0)); + + /* Compute the wait bitmask with each BCM having the commit bit */ + msg->cnoc_wait_bitmask = 0; + for (j = 0; j < msg->cnoc_cmds_num; j++) + if (msg->cnoc_cmds_data[0][j] & BCM_TCS_CMD_COMMIT_MASK) + msg->cnoc_wait_bitmask |= BIT(j); +} + static void a618_build_bw_table(struct a6xx_hfi_msg_bw_table *msg) { /* Send a single "off" entry since the 618 GMU doesn't do bus scaling */ @@ -478,6 +538,37 @@ static void a660_build_bw_table(struct a6xx_hfi_msg_bw_table *msg) msg->cnoc_cmds_data[1][0] = 0x60000001; } +static void a663_build_bw_table(struct a6xx_hfi_msg_bw_table *msg) +{ + /* + * Send a single "off" entry just to get things running + * TODO: bus scaling + */ + msg->bw_level_num = 1; + + msg->ddr_cmds_num = 3; + msg->ddr_wait_bitmask = 0x07; + + msg->ddr_cmds_addrs[0] = 0x50004; + msg->ddr_cmds_addrs[1] = 0x50000; + msg->ddr_cmds_addrs[2] = 0x500b4; + + msg->ddr_cmds_data[0][0] = 0x40000000; + msg->ddr_cmds_data[0][1] = 0x40000000; + msg->ddr_cmds_data[0][2] = 0x40000000; + + /* + * These are the CX (CNOC) votes - these are used by the GMU but the + * votes are known and fixed for the target + */ + msg->cnoc_cmds_num = 1; + msg->cnoc_wait_bitmask = 0x01; + + msg->cnoc_cmds_addrs[0] = 0x50058; + msg->cnoc_cmds_data[0][0] = 0x40000000; + msg->cnoc_cmds_data[1][0] = 0x60000001; +} + static void adreno_7c3_build_bw_table(struct a6xx_hfi_msg_bw_table *msg) { /* @@ -630,35 +721,82 @@ static void a6xx_build_bw_table(struct a6xx_hfi_msg_bw_table *msg) static int a6xx_hfi_send_bw_table(struct a6xx_gmu *gmu) { - struct a6xx_hfi_msg_bw_table msg = { 0 }; + struct a6xx_hfi_msg_bw_table *msg; struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + const struct a6xx_info *info = adreno_gpu->info->a6xx; + + if (gmu->bw_table) + goto send; + + msg = devm_kzalloc(gmu->dev, sizeof(*msg), GFP_KERNEL); + if (!msg) + return -ENOMEM; - if (adreno_is_a618(adreno_gpu)) - a618_build_bw_table(&msg); + if (info->bcms && gmu->nr_gpu_bws > 1) + a6xx_generate_bw_table(info, gmu, msg); + else if (adreno_is_a618(adreno_gpu)) + a618_build_bw_table(msg); else if (adreno_is_a619(adreno_gpu)) - a619_build_bw_table(&msg); + a619_build_bw_table(msg); else if (adreno_is_a640_family(adreno_gpu)) - a640_build_bw_table(&msg); + a640_build_bw_table(msg); else if (adreno_is_a650(adreno_gpu)) - a650_build_bw_table(&msg); + a650_build_bw_table(msg); else if (adreno_is_7c3(adreno_gpu)) - adreno_7c3_build_bw_table(&msg); + adreno_7c3_build_bw_table(msg); else if (adreno_is_a660(adreno_gpu)) - a660_build_bw_table(&msg); + a660_build_bw_table(msg); + else if (adreno_is_a663(adreno_gpu)) + a663_build_bw_table(msg); else if (adreno_is_a690(adreno_gpu)) - a690_build_bw_table(&msg); + a690_build_bw_table(msg); else if (adreno_is_a730(adreno_gpu)) - a730_build_bw_table(&msg); + a730_build_bw_table(msg); else if (adreno_is_a740_family(adreno_gpu)) - a740_build_bw_table(&msg); + a740_build_bw_table(msg); else - a6xx_build_bw_table(&msg); + a6xx_build_bw_table(msg); - return a6xx_hfi_send_msg(gmu, HFI_H2F_MSG_BW_TABLE, &msg, sizeof(msg), + gmu->bw_table = msg; + +send: + return a6xx_hfi_send_msg(gmu, HFI_H2F_MSG_BW_TABLE, gmu->bw_table, sizeof(*(gmu->bw_table)), NULL, 0); } +#define HFI_FEATURE_ACD 12 + +static int a6xx_hfi_enable_acd(struct a6xx_gmu *gmu) +{ + struct a6xx_hfi_acd_table *acd_table = &gmu->acd_table; + struct a6xx_hfi_msg_feature_ctrl msg = { + .feature = HFI_FEATURE_ACD, + .enable = 1, + .data = 0, + }; + int ret; + + if (!acd_table->enable_by_level) + return 0; + + /* Enable ACD feature at GMU */ + ret = a6xx_hfi_send_msg(gmu, HFI_H2F_FEATURE_CTRL, &msg, sizeof(msg), NULL, 0); + if (ret) { + DRM_DEV_ERROR(gmu->dev, "Unable to enable ACD (%d)\n", ret); + return ret; + } + + /* Send ACD table to GMU */ + ret = a6xx_hfi_send_msg(gmu, HFI_H2F_MSG_ACD, acd_table, sizeof(*acd_table), NULL, 0); + if (ret) { + DRM_DEV_ERROR(gmu->dev, "Unable to ACD table (%d)\n", ret); + return ret; + } + + return 0; +} + static int a6xx_hfi_send_test(struct a6xx_gmu *gmu) { struct a6xx_hfi_msg_test msg = { 0 }; @@ -683,13 +821,13 @@ static int a6xx_hfi_send_core_fw_start(struct a6xx_gmu *gmu) sizeof(msg), NULL, 0); } -int a6xx_hfi_set_freq(struct a6xx_gmu *gmu, int index) +int a6xx_hfi_set_freq(struct a6xx_gmu *gmu, u32 freq_index, u32 bw_index) { struct a6xx_hfi_gx_bw_perf_vote_cmd msg = { 0 }; msg.ack_type = 1; /* blocking */ - msg.freq = index; - msg.bw = 0; /* TODO: bus scaling */ + msg.freq = freq_index; + msg.bw = bw_index; return a6xx_hfi_send_msg(gmu, HFI_H2F_MSG_GX_BW_PERF_VOTE, &msg, sizeof(msg), NULL, 0); @@ -756,6 +894,10 @@ int a6xx_hfi_start(struct a6xx_gmu *gmu, int boot_state) if (ret) return ret; + ret = a6xx_hfi_enable_acd(gmu); + if (ret) + return ret; + ret = a6xx_hfi_send_core_fw_start(gmu); if (ret) return ret; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.h b/drivers/gpu/drm/msm/adreno/a6xx_hfi.h index 528110169398..653ef720e2da 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.h @@ -151,12 +151,33 @@ struct a6xx_hfi_msg_test { u32 header; }; +#define HFI_H2F_MSG_ACD 7 +#define MAX_ACD_STRIDE 2 + +struct a6xx_hfi_acd_table { + u32 header; + u32 version; + u32 enable_by_level; + u32 stride; + u32 num_levels; + u32 data[16 * MAX_ACD_STRIDE]; +}; + #define HFI_H2F_MSG_START 10 struct a6xx_hfi_msg_start { u32 header; }; +#define HFI_H2F_FEATURE_CTRL 11 + +struct a6xx_hfi_msg_feature_ctrl { + u32 header; + u32 feature; + u32 enable; + u32 data; +}; + #define HFI_H2F_MSG_CORE_FW_START 14 struct a6xx_hfi_msg_core_fw_start { @@ -173,6 +194,11 @@ struct a6xx_hfi_gx_bw_perf_vote_cmd { u32 bw; }; +#define AB_VOTE_MASK GENMASK(31, 16) +#define MAX_AB_VOTE (FIELD_MAX(AB_VOTE_MASK) - 1) +#define AB_VOTE(vote) FIELD_PREP(AB_VOTE_MASK, (vote)) +#define AB_VOTE_ENABLE BIT(8) + #define HFI_H2F_MSG_PREPARE_SLUMBER 33 struct a6xx_hfi_prep_slumber_cmd { diff --git a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c new file mode 100644 index 000000000000..3b17fd2dba89 --- /dev/null +++ b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c @@ -0,0 +1,457 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018, The Linux Foundation. All rights reserved. */ +/* Copyright (c) 2023 Collabora, Ltd. */ +/* Copyright (c) 2024 Valve Corporation */ + +#include "msm_gem.h" +#include "a6xx_gpu.h" +#include "a6xx_gmu.xml.h" +#include "msm_mmu.h" +#include "msm_gpu_trace.h" + +/* + * Try to transition the preemption state from old to new. Return + * true on success or false if the original state wasn't 'old' + */ +static inline bool try_preempt_state(struct a6xx_gpu *a6xx_gpu, + enum a6xx_preempt_state old, enum a6xx_preempt_state new) +{ + enum a6xx_preempt_state cur = atomic_cmpxchg(&a6xx_gpu->preempt_state, + old, new); + + return (cur == old); +} + +/* + * Force the preemption state to the specified state. This is used in cases + * where the current state is known and won't change + */ +static inline void set_preempt_state(struct a6xx_gpu *gpu, + enum a6xx_preempt_state new) +{ + /* + * preempt_state may be read by other cores trying to trigger a + * preemption or in the interrupt handler so barriers are needed + * before... + */ + smp_mb__before_atomic(); + atomic_set(&gpu->preempt_state, new); + /* ... and after*/ + smp_mb__after_atomic(); +} + +/* Write the most recent wptr for the given ring into the hardware */ +static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +{ + unsigned long flags; + uint32_t wptr; + + spin_lock_irqsave(&ring->preempt_lock, flags); + + if (ring->restore_wptr) { + wptr = get_wptr(ring); + + gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); + + ring->restore_wptr = false; + } + + spin_unlock_irqrestore(&ring->preempt_lock, flags); +} + +/* Return the highest priority ringbuffer with something in it */ +static struct msm_ringbuffer *get_next_ring(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + + unsigned long flags; + int i; + + for (i = 0; i < gpu->nr_rings; i++) { + bool empty; + struct msm_ringbuffer *ring = gpu->rb[i]; + + spin_lock_irqsave(&ring->preempt_lock, flags); + empty = (get_wptr(ring) == gpu->funcs->get_rptr(gpu, ring)); + if (!empty && ring == a6xx_gpu->cur_ring) + empty = ring->memptrs->fence == a6xx_gpu->last_seqno[i]; + spin_unlock_irqrestore(&ring->preempt_lock, flags); + + if (!empty) + return ring; + } + + return NULL; +} + +static void a6xx_preempt_timer(struct timer_list *t) +{ + struct a6xx_gpu *a6xx_gpu = timer_container_of(a6xx_gpu, t, + preempt_timer); + struct msm_gpu *gpu = &a6xx_gpu->base.base; + struct drm_device *dev = gpu->dev; + + if (!try_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED, PREEMPT_FAULTED)) + return; + + dev_err(dev->dev, "%s: preemption timed out\n", gpu->name); + kthread_queue_work(gpu->worker, &gpu->recover_work); +} + +static void preempt_prepare_postamble(struct a6xx_gpu *a6xx_gpu) +{ + u32 *postamble = a6xx_gpu->preempt_postamble_ptr; + u32 count = 0; + + postamble[count++] = PKT7(CP_REG_RMW, 3); + postamble[count++] = REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD; + postamble[count++] = 0; + postamble[count++] = 1; + + postamble[count++] = PKT7(CP_WAIT_REG_MEM, 6); + postamble[count++] = CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ); + postamble[count++] = CP_WAIT_REG_MEM_1_POLL_ADDR_LO( + REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS); + postamble[count++] = CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0); + postamble[count++] = CP_WAIT_REG_MEM_3_REF(0x1); + postamble[count++] = CP_WAIT_REG_MEM_4_MASK(0x1); + postamble[count++] = CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0); + + a6xx_gpu->preempt_postamble_len = count; + + a6xx_gpu->postamble_enabled = true; +} + +static void preempt_disable_postamble(struct a6xx_gpu *a6xx_gpu) +{ + u32 *postamble = a6xx_gpu->preempt_postamble_ptr; + + /* + * Disable the postamble by replacing the first packet header with a NOP + * that covers the whole buffer. + */ + *postamble = PKT7(CP_NOP, (a6xx_gpu->preempt_postamble_len - 1)); + + a6xx_gpu->postamble_enabled = false; +} + +void a6xx_preempt_irq(struct msm_gpu *gpu) +{ + uint32_t status; + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + struct drm_device *dev = gpu->dev; + + if (!try_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED, PREEMPT_PENDING)) + return; + + /* Delete the preemption watchdog timer */ + timer_delete(&a6xx_gpu->preempt_timer); + + /* + * The hardware should be setting the stop bit of CP_CONTEXT_SWITCH_CNTL + * to zero before firing the interrupt, but there is a non zero chance + * of a hardware condition or a software race that could set it again + * before we have a chance to finish. If that happens, log and go for + * recovery + */ + status = gpu_read(gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL); + if (unlikely(status & A6XX_CP_CONTEXT_SWITCH_CNTL_STOP)) { + DRM_DEV_ERROR(&gpu->pdev->dev, + "!!!!!!!!!!!!!!!! preemption faulted !!!!!!!!!!!!!! irq\n"); + set_preempt_state(a6xx_gpu, PREEMPT_FAULTED); + dev_err(dev->dev, "%s: Preemption failed to complete\n", + gpu->name); + kthread_queue_work(gpu->worker, &gpu->recover_work); + return; + } + + a6xx_gpu->cur_ring = a6xx_gpu->next_ring; + a6xx_gpu->next_ring = NULL; + + set_preempt_state(a6xx_gpu, PREEMPT_FINISH); + + update_wptr(gpu, a6xx_gpu->cur_ring); + + set_preempt_state(a6xx_gpu, PREEMPT_NONE); + + trace_msm_gpu_preemption_irq(a6xx_gpu->cur_ring->id); + + /* + * Retrigger preemption to avoid a deadlock that might occur when preemption + * is skipped due to it being already in flight when requested. + */ + a6xx_preempt_trigger(gpu); +} + +void a6xx_preempt_hw_init(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + int i; + + /* No preemption if we only have one ring */ + if (gpu->nr_rings == 1) + return; + + for (i = 0; i < gpu->nr_rings; i++) { + struct a6xx_preempt_record *record_ptr = a6xx_gpu->preempt[i]; + + record_ptr->wptr = 0; + record_ptr->rptr = 0; + record_ptr->rptr_addr = shadowptr(a6xx_gpu, gpu->rb[i]); + record_ptr->info = 0; + record_ptr->data = 0; + record_ptr->rbase = gpu->rb[i]->iova; + } + + /* Write a 0 to signal that we aren't switching pagetables */ + gpu_write64(gpu, REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, 0); + + /* Enable the GMEM save/restore feature for preemption */ + gpu_write(gpu, REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE, 0x1); + + /* Reset the preemption state */ + set_preempt_state(a6xx_gpu, PREEMPT_NONE); + + spin_lock_init(&a6xx_gpu->eval_lock); + + /* Always come up on rb 0 */ + a6xx_gpu->cur_ring = gpu->rb[0]; +} + +void a6xx_preempt_trigger(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + unsigned long flags; + struct msm_ringbuffer *ring; + unsigned int cntl; + bool sysprof; + + if (gpu->nr_rings == 1) + return; + + /* + * Lock to make sure another thread attempting preemption doesn't skip it + * while we are still evaluating the next ring. This makes sure the other + * thread does start preemption if we abort it and avoids a soft lock. + */ + spin_lock_irqsave(&a6xx_gpu->eval_lock, flags); + + /* + * Try to start preemption by moving from NONE to START. If + * unsuccessful, a preemption is already in flight + */ + if (!try_preempt_state(a6xx_gpu, PREEMPT_NONE, PREEMPT_START)) { + spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags); + return; + } + + cntl = A6XX_CP_CONTEXT_SWITCH_CNTL_LEVEL(a6xx_gpu->preempt_level); + + if (a6xx_gpu->skip_save_restore) + cntl |= A6XX_CP_CONTEXT_SWITCH_CNTL_SKIP_SAVE_RESTORE; + + if (a6xx_gpu->uses_gmem) + cntl |= A6XX_CP_CONTEXT_SWITCH_CNTL_USES_GMEM; + + cntl |= A6XX_CP_CONTEXT_SWITCH_CNTL_STOP; + + /* Get the next ring to preempt to */ + ring = get_next_ring(gpu); + + /* + * If no ring is populated or the highest priority ring is the current + * one do nothing except to update the wptr to the latest and greatest + */ + if (!ring || (a6xx_gpu->cur_ring == ring)) { + set_preempt_state(a6xx_gpu, PREEMPT_FINISH); + update_wptr(gpu, a6xx_gpu->cur_ring); + set_preempt_state(a6xx_gpu, PREEMPT_NONE); + spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags); + return; + } + + spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags); + + spin_lock_irqsave(&ring->preempt_lock, flags); + + struct a7xx_cp_smmu_info *smmu_info_ptr = + a6xx_gpu->preempt_smmu[ring->id]; + struct a6xx_preempt_record *record_ptr = a6xx_gpu->preempt[ring->id]; + u64 ttbr0 = ring->memptrs->ttbr0; + u32 context_idr = ring->memptrs->context_idr; + + smmu_info_ptr->ttbr0 = ttbr0; + smmu_info_ptr->context_idr = context_idr; + record_ptr->wptr = get_wptr(ring); + + /* + * The GPU will write the wptr we set above when we preempt. Reset + * restore_wptr to make sure that we don't write WPTR to the same + * thing twice. It's still possible subsequent submissions will update + * wptr again, in which case they will set the flag to true. This has + * to be protected by the lock for setting the flag and updating wptr + * to be atomic. + */ + ring->restore_wptr = false; + + trace_msm_gpu_preemption_trigger(a6xx_gpu->cur_ring->id, ring->id); + + spin_unlock_irqrestore(&ring->preempt_lock, flags); + + gpu_write64(gpu, + REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, + a6xx_gpu->preempt_smmu_iova[ring->id]); + + gpu_write64(gpu, + REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR, + a6xx_gpu->preempt_iova[ring->id]); + + a6xx_gpu->next_ring = ring; + + /* Start a timer to catch a stuck preemption */ + mod_timer(&a6xx_gpu->preempt_timer, jiffies + msecs_to_jiffies(10000)); + + /* Enable or disable postamble as needed */ + sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1; + + if (!sysprof && !a6xx_gpu->postamble_enabled) + preempt_prepare_postamble(a6xx_gpu); + + if (sysprof && a6xx_gpu->postamble_enabled) + preempt_disable_postamble(a6xx_gpu); + + /* Set the preemption state to triggered */ + set_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED); + + /* Trigger the preemption */ + gpu_write(gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl); +} + +static int preempt_init_ring(struct a6xx_gpu *a6xx_gpu, + struct msm_ringbuffer *ring) +{ + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + struct msm_gpu *gpu = &adreno_gpu->base; + struct drm_gem_object *bo = NULL; + phys_addr_t ttbr; + u64 iova = 0; + void *ptr; + int asid; + + ptr = msm_gem_kernel_new(gpu->dev, + PREEMPT_RECORD_SIZE(adreno_gpu), + MSM_BO_WC | MSM_BO_MAP_PRIV, gpu->aspace, &bo, &iova); + + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + + memset(ptr, 0, PREEMPT_RECORD_SIZE(adreno_gpu)); + + msm_gem_object_set_name(bo, "preempt_record ring%d", ring->id); + + a6xx_gpu->preempt_bo[ring->id] = bo; + a6xx_gpu->preempt_iova[ring->id] = iova; + a6xx_gpu->preempt[ring->id] = ptr; + + struct a6xx_preempt_record *record_ptr = ptr; + + ptr = msm_gem_kernel_new(gpu->dev, + PREEMPT_SMMU_INFO_SIZE, + MSM_BO_WC | MSM_BO_MAP_PRIV | MSM_BO_GPU_READONLY, + gpu->aspace, &bo, &iova); + + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + + memset(ptr, 0, PREEMPT_SMMU_INFO_SIZE); + + msm_gem_object_set_name(bo, "preempt_smmu_info ring%d", ring->id); + + a6xx_gpu->preempt_smmu_bo[ring->id] = bo; + a6xx_gpu->preempt_smmu_iova[ring->id] = iova; + a6xx_gpu->preempt_smmu[ring->id] = ptr; + + struct a7xx_cp_smmu_info *smmu_info_ptr = ptr; + + msm_iommu_pagetable_params(gpu->aspace->mmu, &ttbr, &asid); + + smmu_info_ptr->magic = GEN7_CP_SMMU_INFO_MAGIC; + smmu_info_ptr->ttbr0 = ttbr; + smmu_info_ptr->asid = 0xdecafbad; + smmu_info_ptr->context_idr = 0; + + /* Set up the defaults on the preemption record */ + record_ptr->magic = A6XX_PREEMPT_RECORD_MAGIC; + record_ptr->info = 0; + record_ptr->data = 0; + record_ptr->rptr = 0; + record_ptr->wptr = 0; + record_ptr->cntl = MSM_GPU_RB_CNTL_DEFAULT; + record_ptr->rbase = ring->iova; + record_ptr->counter = 0; + record_ptr->bv_rptr_addr = rbmemptr(ring, bv_rptr); + + return 0; +} + +void a6xx_preempt_fini(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + int i; + + for (i = 0; i < gpu->nr_rings; i++) + msm_gem_kernel_put(a6xx_gpu->preempt_bo[i], gpu->aspace); +} + +void a6xx_preempt_init(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + int i; + + /* No preemption if we only have one ring */ + if (gpu->nr_rings <= 1) + return; + + for (i = 0; i < gpu->nr_rings; i++) { + if (preempt_init_ring(a6xx_gpu, gpu->rb[i])) + goto fail; + } + + /* TODO: make this configurable? */ + a6xx_gpu->preempt_level = 1; + a6xx_gpu->uses_gmem = 1; + a6xx_gpu->skip_save_restore = 1; + + a6xx_gpu->preempt_postamble_ptr = msm_gem_kernel_new(gpu->dev, + PAGE_SIZE, + MSM_BO_WC | MSM_BO_MAP_PRIV | MSM_BO_GPU_READONLY, + gpu->aspace, &a6xx_gpu->preempt_postamble_bo, + &a6xx_gpu->preempt_postamble_iova); + + preempt_prepare_postamble(a6xx_gpu); + + if (IS_ERR(a6xx_gpu->preempt_postamble_ptr)) + goto fail; + + timer_setup(&a6xx_gpu->preempt_timer, a6xx_preempt_timer, 0); + + return; +fail: + /* + * On any failure our adventure is over. Clean up and + * set nr_rings to 1 to force preemption off + */ + a6xx_preempt_fini(gpu); + gpu->nr_rings = 1; + + DRM_DEV_ERROR(&gpu->pdev->dev, + "preemption init failed, disabling preemption\n"); + + return; +} diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index cfc74a9e2646..16e7ac444efd 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -20,6 +20,14 @@ bool allow_vram_carveout = false; MODULE_PARM_DESC(allow_vram_carveout, "Allow using VRAM Carveout, in place of IOMMU"); module_param_named(allow_vram_carveout, allow_vram_carveout, bool, 0600); +int enable_preemption = -1; +MODULE_PARM_DESC(enable_preemption, "Enable preemption (A7xx only) (1=on , 0=disable, -1=auto (default))"); +module_param(enable_preemption, int, 0600); + +bool disable_acd; +MODULE_PARM_DESC(disable_acd, "Forcefully disable GPU ACD"); +module_param_unsafe(disable_acd, bool, 0400); + extern const struct adreno_gpulist a2xx_gpulist; extern const struct adreno_gpulist a3xx_gpulist; extern const struct adreno_gpulist a4xx_gpulist; @@ -129,9 +137,8 @@ err_disable_rpm: return NULL; } -static int find_chipid(struct device *dev, uint32_t *chipid) +static int find_chipid(struct device_node *node, uint32_t *chipid) { - struct device_node *node = dev->of_node; const char *compat; int ret; @@ -165,15 +172,36 @@ static int find_chipid(struct device *dev, uint32_t *chipid) /* and if that fails, fall back to legacy "qcom,chipid" property: */ ret = of_property_read_u32(node, "qcom,chipid", chipid); if (ret) { - DRM_DEV_ERROR(dev, "could not parse qcom,chipid: %d\n", ret); + DRM_ERROR("%pOF: could not parse qcom,chipid: %d\n", + node, ret); return ret; } - dev_warn(dev, "Using legacy qcom,chipid binding!\n"); + pr_warn("%pOF: Using legacy qcom,chipid binding!\n", node); return 0; } +bool adreno_has_gpu(struct device_node *node) +{ + const struct adreno_info *info; + uint32_t chip_id; + int ret; + + ret = find_chipid(node, &chip_id); + if (ret) + return false; + + info = adreno_info(chip_id); + if (!info) { + pr_warn("%pOF: Unknown GPU revision: %"ADRENO_CHIPID_FMT"\n", + node, ADRENO_CHIPID_ARGS(chip_id)); + return false; + } + + return true; +} + static int adreno_bind(struct device *dev, struct device *master, void *data) { static struct adreno_platform_config config = {}; @@ -183,19 +211,18 @@ static int adreno_bind(struct device *dev, struct device *master, void *data) struct msm_gpu *gpu; int ret; - ret = find_chipid(dev, &config.chip_id); - if (ret) + ret = find_chipid(dev->of_node, &config.chip_id); + /* We shouldn't have gotten this far if we can't parse the chip_id */ + if (WARN_ON(ret)) return ret; dev->platform_data = &config; priv->gpu_pdev = to_platform_device(dev); info = adreno_info(config.chip_id); - if (!info) { - dev_warn(drm->dev, "Unknown GPU revision: %"ADRENO_CHIPID_FMT"\n", - ADRENO_CHIPID_ARGS(config.chip_id)); + /* We shouldn't have gotten this far if we don't recognize the GPU: */ + if (WARN_ON(!info)) return -ENXIO; - } config.info = info; @@ -389,7 +416,7 @@ static const struct dev_pm_ops adreno_pm_ops = { static struct platform_driver adreno_driver = { .probe = adreno_probe, - .remove_new = adreno_remove, + .remove = adreno_remove, .shutdown = adreno_shutdown, .driver = { .name = "adreno", diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 465a4cd14a43..86bff915c3e7 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -236,34 +236,77 @@ adreno_iommu_create_address_space(struct msm_gpu *gpu, u64 adreno_private_address_space_size(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(&gpu->pdev->dev); + const struct io_pgtable_cfg *ttbr1_cfg; if (address_space_size) return address_space_size; - if (adreno_gpu->info->address_space_size) - return adreno_gpu->info->address_space_size; + if (adreno_gpu->info->quirks & ADRENO_QUIRK_4GB_VA) + return SZ_4G; - return SZ_4G; + if (!adreno_smmu || !adreno_smmu->get_ttbr1_cfg) + return SZ_4G; + + ttbr1_cfg = adreno_smmu->get_ttbr1_cfg(adreno_smmu->cookie); + + /* + * Userspace VM is actually using TTBR0, but both are the same size, + * with b48 (sign bit) selecting which TTBRn to use. So if IAS is + * 48, the total (kernel+user) address space size is effectively + * 49 bits. But what userspace is control of is the lower 48. + */ + return BIT(ttbr1_cfg->ias) - ADRENO_VM_START; +} + +void adreno_check_and_reenable_stall(struct adreno_gpu *adreno_gpu) +{ + struct msm_gpu *gpu = &adreno_gpu->base; + struct msm_drm_private *priv = gpu->dev->dev_private; + unsigned long flags; + + /* + * Wait until the cooldown period has passed and we would actually + * collect a crashdump to re-enable stall-on-fault. + */ + spin_lock_irqsave(&priv->fault_stall_lock, flags); + if (!priv->stall_enabled && + ktime_after(ktime_get(), priv->stall_reenable_time) && + !READ_ONCE(gpu->crashstate)) { + priv->stall_enabled = true; + + gpu->aspace->mmu->funcs->set_stall(gpu->aspace->mmu, true); + } + spin_unlock_irqrestore(&priv->fault_stall_lock, flags); } #define ARM_SMMU_FSR_TF BIT(1) #define ARM_SMMU_FSR_PF BIT(3) #define ARM_SMMU_FSR_EF BIT(4) +#define ARM_SMMU_FSR_SS BIT(30) int adreno_fault_handler(struct msm_gpu *gpu, unsigned long iova, int flags, struct adreno_smmu_fault_info *info, const char *block, u32 scratch[4]) { + struct msm_drm_private *priv = gpu->dev->dev_private; const char *type = "UNKNOWN"; - bool do_devcoredump = info && !READ_ONCE(gpu->crashstate); + bool do_devcoredump = info && (info->fsr & ARM_SMMU_FSR_SS) && + !READ_ONCE(gpu->crashstate); + unsigned long irq_flags; /* - * If we aren't going to be resuming later from fault_worker, then do - * it now. + * In case there is a subsequent storm of pagefaults, disable + * stall-on-fault for at least half a second. */ - if (!do_devcoredump) { - gpu->aspace->mmu->funcs->resume_translation(gpu->aspace->mmu); + spin_lock_irqsave(&priv->fault_stall_lock, irq_flags); + if (priv->stall_enabled) { + priv->stall_enabled = false; + + gpu->aspace->mmu->funcs->set_stall(gpu->aspace->mmu, false); } + priv->stall_reenable_time = ktime_add_ms(ktime_get(), 500); + spin_unlock_irqrestore(&priv->fault_stall_lock, irq_flags); /* * Print a default message if we couldn't get the data from the @@ -291,16 +334,18 @@ int adreno_fault_handler(struct msm_gpu *gpu, unsigned long iova, int flags, scratch[0], scratch[1], scratch[2], scratch[3]); if (do_devcoredump) { + struct msm_gpu_fault_info fault_info = {}; + /* Turn off the hangcheck timer to keep it from bothering us */ - del_timer(&gpu->hangcheck_timer); + timer_delete(&gpu->hangcheck_timer); - gpu->fault_info.ttbr0 = info->ttbr0; - gpu->fault_info.iova = iova; - gpu->fault_info.flags = flags; - gpu->fault_info.type = type; - gpu->fault_info.block = block; + fault_info.ttbr0 = info->ttbr0; + fault_info.iova = iova; + fault_info.flags = flags; + fault_info.type = type; + fault_info.block = block; - kthread_queue_work(gpu->worker, &gpu->fault_work); + msm_gpu_fault_crashstate_capture(gpu, &fault_info); } return 0; @@ -310,10 +355,11 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_file_private *ctx, uint32_t param, uint64_t *value, uint32_t *len) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct drm_device *drm = gpu->dev; /* No pointer params yet */ if (*len != 0) - return -EINVAL; + return UERR(EINVAL, drm, "invalid len"); switch (param) { case MSM_PARAM_GPU_ID: @@ -365,12 +411,12 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_file_private *ctx, return 0; case MSM_PARAM_VA_START: if (ctx->aspace == gpu->aspace) - return -EINVAL; + return UERR(EINVAL, drm, "requires per-process pgtables"); *value = ctx->aspace->va_start; return 0; case MSM_PARAM_VA_SIZE: if (ctx->aspace == gpu->aspace) - return -EINVAL; + return UERR(EINVAL, drm, "requires per-process pgtables"); *value = ctx->aspace->va_size; return 0; case MSM_PARAM_HIGHEST_BANK_BIT: @@ -385,15 +431,19 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_file_private *ctx, case MSM_PARAM_MACROTILE_MODE: *value = adreno_gpu->ubwc_config.macrotile_mode; return 0; + case MSM_PARAM_UCHE_TRAP_BASE: + *value = adreno_gpu->uche_trap_base; + return 0; default: - DBG("%s: invalid param: %u", gpu->name, param); - return -EINVAL; + return UERR(EINVAL, drm, "%s: invalid param: %u", gpu->name, param); } } int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx, uint32_t param, uint64_t value, uint32_t len) { + struct drm_device *drm = gpu->dev; + switch (param) { case MSM_PARAM_COMM: case MSM_PARAM_CMDLINE: @@ -401,11 +451,11 @@ int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx, * that should be a reasonable upper bound */ if (len > PAGE_SIZE) - return -EINVAL; + return UERR(EINVAL, drm, "invalid len"); break; default: if (len != 0) - return -EINVAL; + return UERR(EINVAL, drm, "invalid len"); } switch (param) { @@ -434,11 +484,10 @@ int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx, } case MSM_PARAM_SYSPROF: if (!capable(CAP_SYS_ADMIN)) - return -EPERM; + return UERR(EPERM, drm, "invalid permissions"); return msm_file_private_set_sysprof(ctx, gpu, value); default: - DBG("%s: invalid param: %u", gpu->name, param); - return -EINVAL; + return UERR(EINVAL, drm, "%s: invalid param: %u", gpu->name, param); } } @@ -533,7 +582,7 @@ int adreno_load_fw(struct adreno_gpu *adreno_gpu) if (!adreno_gpu->info->fw[i]) continue; - /* Skip loading GMU firwmare with GMU Wrapper */ + /* Skip loading GMU firmware with GMU Wrapper */ if (adreno_has_gmu_wrapper(adreno_gpu) && i == ADRENO_FW_GMU) continue; @@ -572,8 +621,19 @@ struct drm_gem_object *adreno_fw_create_bo(struct msm_gpu *gpu, int adreno_hw_init(struct msm_gpu *gpu) { + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + int ret; + VERB("%s", gpu->name); + if (adreno_gpu->info->family >= ADRENO_6XX_GEN1 && + qcom_scm_set_gpu_smmu_aperture_is_available()) { + /* We currently always use context bank 0, so hard code this */ + ret = qcom_scm_set_gpu_smmu_aperture(0); + if (ret) + DRM_DEV_ERROR(gpu->dev->dev, "unable to set SMMU aperture: %d\n", ret); + } + for (int i = 0; i < gpu->nr_rings; i++) { struct msm_ringbuffer *ring = gpu->rb[i]; @@ -868,6 +928,16 @@ void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state, drm_printf(p, " - dir=%s\n", info->flags & IOMMU_FAULT_WRITE ? "WRITE" : "READ"); drm_printf(p, " - type=%s\n", info->type); drm_printf(p, " - source=%s\n", info->block); + + /* Information extracted from what we think are the current + * pgtables. Hopefully the TTBR0 matches what we've extracted + * from the SMMU registers in smmu_info! + */ + drm_puts(p, "pgtable-fault-info:\n"); + drm_printf(p, " - ttbr0: %.16llx\n", (u64)info->pgtbl_ttbr0); + drm_printf(p, " - asid: %d\n", info->asid); + drm_printf(p, " - ptes: %.16llx %.16llx %.16llx %.16llx\n", + info->ptes[0], info->ptes[1], info->ptes[2], info->ptes[3]); } drm_printf(p, "rbbm-status: 0x%08x\n", state->rbbm_status); diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index 58d7e7915c57..bc063594a359 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -56,6 +56,8 @@ enum adreno_family { #define ADRENO_QUIRK_LMLOADKILL_DISABLE BIT(2) #define ADRENO_QUIRK_HAS_HW_APRIV BIT(3) #define ADRENO_QUIRK_HAS_CACHED_COHERENT BIT(4) +#define ADRENO_QUIRK_PREEMPTION BIT(5) +#define ADRENO_QUIRK_4GB_VA BIT(6) /* Helper for formating the chip_id in the way that userspace tools like * crashdec expect. @@ -103,7 +105,6 @@ struct adreno_info { union { const struct a6xx_info *a6xx; }; - u64 address_space_size; /** * @speedbins: Optional table of fuse to speedbin mappings * @@ -111,6 +112,7 @@ struct adreno_info { * {SHRT_MAX, 0} sentinal. */ struct adreno_speedbin *speedbins; + u64 preempt_record_size; }; #define ADRENO_CHIP_IDS(tbl...) (uint32_t[]) { tbl, 0 } @@ -156,6 +158,19 @@ static const struct adreno_protect name = { \ .count_max = __count_max, \ }; +struct adreno_reglist_list { + /** @reg: List of register **/ + const u32 *regs; + /** @count: Number of registers in the list **/ + u32 count; +}; + +#define DECLARE_ADRENO_REGLIST_LIST(name) \ +static const struct adreno_reglist_list name = { \ + .regs = name ## _regs, \ + .count = ARRAY_SIZE(name ## _regs), \ +}; + struct adreno_gpu { struct msm_gpu base; const struct adreno_info *info; @@ -238,6 +253,8 @@ struct adreno_gpu { bool gmu_is_wrapper; bool has_ray_tracing; + + u64 uche_trap_base; }; #define to_adreno_gpu(x) container_of(x, struct adreno_gpu, base) @@ -425,6 +442,11 @@ static inline int adreno_is_a621(const struct adreno_gpu *gpu) return gpu->info->chip_ids[0] == 0x06020100; } +static inline int adreno_is_a623(const struct adreno_gpu *gpu) +{ + return gpu->info->chip_ids[0] == 0x06020300; +} + static inline int adreno_is_a630(const struct adreno_gpu *gpu) { return adreno_is_revn(gpu, 630); @@ -455,6 +477,11 @@ static inline int adreno_is_a680(const struct adreno_gpu *gpu) return adreno_is_revn(gpu, 680); } +static inline int adreno_is_a663(const struct adreno_gpu *gpu) +{ + return gpu->info->chip_ids[0] == 0x06060300; +} + static inline int adreno_is_a690(const struct adreno_gpu *gpu) { return gpu->info->chip_ids[0] == 0x06090000; @@ -539,6 +566,11 @@ static inline int adreno_is_a740_family(struct adreno_gpu *gpu) gpu->info->family == ADRENO_7XX_GEN3; } +static inline int adreno_is_a750_family(struct adreno_gpu *gpu) +{ + return gpu->info->family == ADRENO_7XX_GEN3; +} + static inline int adreno_is_a7xx(struct adreno_gpu *gpu) { /* Update with non-fake (i.e. non-A702) Gen 7 GPUs */ @@ -546,6 +578,8 @@ static inline int adreno_is_a7xx(struct adreno_gpu *gpu) adreno_is_a740_family(gpu); } +/* Put vm_start above 32b to catch issues with not setting xyz_BASE_HI */ +#define ADRENO_VM_START 0x100000000ULL u64 adreno_private_address_space_size(struct msm_gpu *gpu); int adreno_get_param(struct msm_gpu *gpu, struct msm_file_private *ctx, uint32_t param, uint64_t *value, uint32_t *len); @@ -602,6 +636,8 @@ int adreno_fault_handler(struct msm_gpu *gpu, unsigned long iova, int flags, struct adreno_smmu_fault_info *info, const char *block, u32 scratch[4]); +void adreno_check_and_reenable_stall(struct adreno_gpu *gpu); + int adreno_read_speedbin(struct device *dev, u32 *speedbin); /* @@ -656,12 +692,15 @@ OUT_PKT4(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt) OUT_RING(ring, PKT4(regindx, cnt)); } +#define PKT7(opcode, cnt) \ + (CP_TYPE7_PKT | (cnt << 0) | (PM4_PARITY(cnt) << 15) | \ + ((opcode & 0x7F) << 16) | (PM4_PARITY(opcode) << 23)) + static inline void OUT_PKT7(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt) { adreno_wait_ring(ring, cnt + 1); - OUT_RING(ring, CP_TYPE7_PKT | (cnt << 0) | (PM4_PARITY(cnt) << 15) | - ((opcode & 0x7F) << 16) | (PM4_PARITY(opcode) << 23)); + OUT_RING(ring, PKT7(opcode, cnt)); } struct msm_gpu *a2xx_gpu_init(struct drm_device *dev); |