diff options
Diffstat (limited to 'drivers/gpu/drm/msm/adreno/a6xx_gpu.c')
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 137 |
1 files changed, 86 insertions, 51 deletions
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 9c5e4618aa0a..40c9fef457a4 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -52,21 +52,25 @@ static bool a6xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) return true; } -static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); - uint32_t wptr; - unsigned long flags; /* Expanded APRIV doesn't need to issue the WHERE_AM_I opcode */ if (a6xx_gpu->has_whereami && !adreno_gpu->base.hw_apriv) { - struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); - OUT_PKT7(ring, CP_WHERE_AM_I, 2); OUT_RING(ring, lower_32_bits(shadowptr(a6xx_gpu, ring))); OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring))); } +} + +static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +{ + uint32_t wptr; + unsigned long flags; + + update_shadow_rptr(gpu, ring); spin_lock_irqsave(&ring->preempt_lock, flags); @@ -145,7 +149,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); struct msm_ringbuffer *ring = submit->ring; - unsigned int i; + unsigned int i, ibs = 0; a6xx_set_pagetable(a6xx_gpu, ring, submit->queue->ctx); @@ -181,8 +185,19 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); OUT_RING(ring, submit->cmd[i].size); + ibs++; break; } + + /* + * Periodically update shadow-wptr if needed, so that we + * can see partial progress of submits with large # of + * cmds.. otherwise we could needlessly stall waiting for + * ringbuffer state, simply due to looking at a shadow + * rptr value that has not been updated + */ + if ((ibs % 32) == 0) + update_shadow_rptr(gpu, ring); } get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0), @@ -652,7 +667,7 @@ static void a6xx_set_cp_protect(struct msm_gpu *gpu) regs = a650_protect; count = ARRAY_SIZE(a650_protect); count_max = 48; - } else if (adreno_is_a660(adreno_gpu)) { + } else if (adreno_is_a660_family(adreno_gpu)) { regs = a660_protect; count = ARRAY_SIZE(a660_protect); count_max = 48; @@ -683,7 +698,7 @@ static void a6xx_set_ubwc_config(struct msm_gpu *gpu) if (adreno_is_a618(adreno_gpu)) return; - if (adreno_is_a640(adreno_gpu)) + if (adreno_is_a640_family(adreno_gpu)) amsbc = 1; if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu)) { @@ -694,6 +709,13 @@ static void a6xx_set_ubwc_config(struct msm_gpu *gpu) uavflagprd_inv = 2; } + if (adreno_is_7c3(adreno_gpu)) { + lower_bit = 1; + amsbc = 1; + rgb565_predicator = 1; + uavflagprd_inv = 2; + } + gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL, rgb565_predicator << 11 | amsbc << 4 | lower_bit << 1); gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, lower_bit << 1); @@ -740,6 +762,7 @@ static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu, { struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; struct msm_gpu *gpu = &adreno_gpu->base; + const char *sqe_name = adreno_gpu->info->fw[ADRENO_FW_SQE]; u32 *buf = msm_gem_get_vaddr(obj); bool ret = false; @@ -756,8 +779,7 @@ static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu, * * a660 targets have all the critical security fixes from the start */ - if (adreno_is_a618(adreno_gpu) || adreno_is_a630(adreno_gpu) || - adreno_is_a640(adreno_gpu)) { + if (!strcmp(sqe_name, "a630_sqe.fw")) { /* * If the lowest nibble is 0xa that is an indication that this * microcode has been patched. The actual version is in dword @@ -778,7 +800,7 @@ static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu, DRM_DEV_ERROR(&gpu->pdev->dev, "a630 SQE ucode is too old. Have version %x need at least %x\n", buf[0] & 0xfff, 0x190); - } else if (adreno_is_a650(adreno_gpu)) { + } else if (!strcmp(sqe_name, "a650_sqe.fw")) { if ((buf[0] & 0xfff) >= 0x095) { ret = true; goto out; @@ -787,7 +809,7 @@ static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu, DRM_DEV_ERROR(&gpu->pdev->dev, "a650 SQE ucode is too old. Have version %x need at least %x\n", buf[0] & 0xfff, 0x095); - } else if (adreno_is_a660(adreno_gpu)) { + } else if (!strcmp(sqe_name, "a660_sqe.fw")) { ret = true; } else { DRM_DEV_ERROR(&gpu->pdev->dev, @@ -897,7 +919,8 @@ static int a6xx_hw_init(struct msm_gpu *gpu) a6xx_set_hwcg(gpu, true); /* VBIF/GBIF start*/ - if (adreno_is_a640(adreno_gpu) || adreno_is_a650_family(adreno_gpu)) { + if (adreno_is_a640_family(adreno_gpu) || + adreno_is_a650_family(adreno_gpu)) { gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE0, 0x00071620); gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE1, 0x00071620); gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE2, 0x00071620); @@ -935,13 +958,14 @@ static int a6xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A6XX_UCHE_FILTER_CNTL, 0x804); gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, 0x4); - if (adreno_is_a640(adreno_gpu) || adreno_is_a650_family(adreno_gpu)) + if (adreno_is_a640_family(adreno_gpu) || + adreno_is_a650_family(adreno_gpu)) gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140); else gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x010000c0); gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c); - if (adreno_is_a660(adreno_gpu)) + if (adreno_is_a660_family(adreno_gpu)) gpu_write(gpu, REG_A6XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020); /* Setting the mem pool size */ @@ -952,8 +976,10 @@ static int a6xx_hw_init(struct msm_gpu *gpu) */ if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu)) gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00300200); - else if (adreno_is_a640(adreno_gpu)) + else if (adreno_is_a640_family(adreno_gpu) || adreno_is_7c3(adreno_gpu)) gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00200200); + else if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu)) + gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00300200); else gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00180000); @@ -990,13 +1016,15 @@ static int a6xx_hw_init(struct msm_gpu *gpu) /* Protect registers from the CP */ a6xx_set_cp_protect(gpu); - if (adreno_is_a660(adreno_gpu)) { + if (adreno_is_a660_family(adreno_gpu)) { gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1); gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0); - /* Set dualQ + disable afull for A660 GPU but not for A635 */ - gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906); } + /* Set dualQ + disable afull for A660 GPU */ + if (adreno_is_a660(adreno_gpu)) + gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906); + /* Enable expanded apriv for targets that support it */ if (gpu->hw_apriv) { gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL, @@ -1035,7 +1063,7 @@ static int a6xx_hw_init(struct msm_gpu *gpu) if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) { if (!a6xx_gpu->shadow_bo) { - a6xx_gpu->shadow = msm_gem_kernel_new_locked(gpu->dev, + a6xx_gpu->shadow = msm_gem_kernel_new(gpu->dev, sizeof(u32) * gpu->nr_rings, MSM_BO_WC | MSM_BO_MAP_PRIV, gpu->aspace, &a6xx_gpu->shadow_bo, @@ -1383,13 +1411,13 @@ static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu) { struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; struct msm_gpu *gpu = &adreno_gpu->base; - u32 cntl1_regval = 0; + u32 gpu_scid, cntl1_regval = 0; if (IS_ERR(a6xx_gpu->llc_mmio)) return; if (!llcc_slice_activate(a6xx_gpu->llc_slice)) { - u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice); + gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice); gpu_scid &= 0x1f; cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 10) | @@ -1409,26 +1437,34 @@ static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu) } } - if (cntl1_regval) { + if (!cntl1_regval) + return; + + /* + * Program the slice IDs for the various GPU blocks and GPU MMU + * pagetables + */ + if (!a6xx_gpu->have_mmu500) { + a6xx_llc_write(a6xx_gpu, + REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval); + /* - * Program the slice IDs for the various GPU blocks and GPU MMU - * pagetables + * Program cacheability overrides to not allocate cache + * lines on a write miss */ - if (a6xx_gpu->have_mmu500) - gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0), - cntl1_regval); - else { - a6xx_llc_write(a6xx_gpu, - REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval); - - /* - * Program cacheability overrides to not allocate cache - * lines on a write miss - */ - a6xx_llc_rmw(a6xx_gpu, - REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03); - } + a6xx_llc_rmw(a6xx_gpu, + REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03); + return; } + + gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0), cntl1_regval); + + /* On A660, the SCID programming for UCHE traffic is done in + * A6XX_GBIF_SCACHE_CNTL0[14:10] + */ + if (adreno_is_a660_family(adreno_gpu)) + gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) | + (1 << 8), (gpu_scid << 10) | (1 << 8)); } static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu) @@ -1477,7 +1513,7 @@ static int a6xx_pm_resume(struct msm_gpu *gpu) if (ret) return ret; - msm_gpu_resume_devfreq(gpu); + msm_devfreq_resume(gpu); a6xx_llc_activate(a6xx_gpu); @@ -1494,7 +1530,7 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu) a6xx_llc_deactivate(a6xx_gpu); - devfreq_suspend_device(gpu->devfreq.devfreq); + msm_devfreq_suspend(gpu); ret = a6xx_gmu_stop(a6xx_gpu); if (ret) @@ -1667,11 +1703,11 @@ static u32 a618_get_speed_bin(u32 fuse) return UINT_MAX; } -static u32 fuse_to_supp_hw(struct device *dev, u32 revn, u32 fuse) +static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse) { u32 val = UINT_MAX; - if (revn == 618) + if (adreno_cmp_rev(ADRENO_REV(6, 1, 8, ANY_ID), rev)) val = a618_get_speed_bin(fuse); if (val == UINT_MAX) { @@ -1684,14 +1720,13 @@ static u32 fuse_to_supp_hw(struct device *dev, u32 revn, u32 fuse) return (1 << val); } -static int a6xx_set_supported_hw(struct device *dev, struct a6xx_gpu *a6xx_gpu, - u32 revn) +static int a6xx_set_supported_hw(struct device *dev, struct adreno_rev rev) { u32 supp_hw = UINT_MAX; - u16 speedbin; + u32 speedbin; int ret; - ret = nvmem_cell_read_u16(dev, "speed_bin", &speedbin); + ret = nvmem_cell_read_variable_le_u32(dev, "speed_bin", &speedbin); /* * -ENOENT means that the platform doesn't support speedbin which is * fine @@ -1704,9 +1739,8 @@ static int a6xx_set_supported_hw(struct device *dev, struct a6xx_gpu *a6xx_gpu, ret); goto done; } - speedbin = le16_to_cpu(speedbin); - supp_hw = fuse_to_supp_hw(dev, revn, speedbin); + supp_hw = fuse_to_supp_hw(dev, rev, speedbin); done: ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1); @@ -1772,12 +1806,13 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) */ info = adreno_info(config->rev); - if (info && (info->revn == 650 || info->revn == 660)) + if (info && (info->revn == 650 || info->revn == 660 || + adreno_cmp_rev(ADRENO_REV(6, 3, 5, ANY_ID), info->rev))) adreno_gpu->base.hw_apriv = true; a6xx_llc_slices_init(pdev, a6xx_gpu); - ret = a6xx_set_supported_hw(&pdev->dev, a6xx_gpu, info->revn); + ret = a6xx_set_supported_hw(&pdev->dev, config->rev); if (ret) { a6xx_destroy(&(a6xx_gpu->base.base)); return ERR_PTR(ret); |