diff options
Diffstat (limited to 'drivers/gpu/drm/msm/adreno/a6xx_gmu.c')
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 377 |
1 files changed, 323 insertions, 54 deletions
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 37927bdd6fbe..38c0f8ef85c3 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -9,6 +9,7 @@ #include <linux/pm_domain.h> #include <linux/pm_opp.h> #include <soc/qcom/cmd-db.h> +#include <soc/qcom/tcs.h> #include <drm/drm_gem.h> #include "a6xx_gpu.h" @@ -27,7 +28,7 @@ static void a6xx_gmu_fault(struct a6xx_gmu *gmu) gmu->hung = true; /* Turn off the hangcheck timer while we are resetting */ - del_timer(&gpu->hangcheck_timer); + timer_delete(&gpu->hangcheck_timer); /* Queue the GPU handler because we need to treat this as a recovery */ kthread_queue_work(gpu->worker, &gpu->recover_work); @@ -109,9 +110,11 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp, bool suspended) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + const struct a6xx_info *info = adreno_gpu->info->a6xx; struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); struct a6xx_gmu *gmu = &a6xx_gpu->gmu; u32 perf_index; + u32 bw_index = 0; unsigned long gpu_freq; int ret = 0; @@ -124,6 +127,37 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp, if (gpu_freq == gmu->gpu_freqs[perf_index]) break; + /* If enabled, find the corresponding DDR bandwidth index */ + if (info->bcms && gmu->nr_gpu_bws > 1) { + unsigned int bw = dev_pm_opp_get_bw(opp, true, 0); + + for (bw_index = 0; bw_index < gmu->nr_gpu_bws - 1; bw_index++) { + if (bw == gmu->gpu_bw_table[bw_index]) + break; + } + + /* Vote AB as a fraction of the max bandwidth, starting from A750 */ + if (bw && adreno_is_a750_family(adreno_gpu)) { + u64 tmp; + + /* For now, vote for 25% of the bandwidth */ + tmp = bw * 25; + do_div(tmp, 100); + + /* + * The AB vote consists of a 16 bit wide quantized level + * against the maximum supported bandwidth. + * Quantization can be calculated as below: + * vote = (bandwidth * 2^16) / max bandwidth + */ + tmp *= MAX_AB_VOTE; + do_div(tmp, gmu->gpu_bw_table[gmu->nr_gpu_bws - 1]); + + bw_index |= AB_VOTE(clamp(tmp, 1, MAX_AB_VOTE)); + bw_index |= AB_VOTE_ENABLE; + } + } + gmu->current_perf_index = perf_index; gmu->freq = gmu->gpu_freqs[perf_index]; @@ -139,8 +173,10 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp, return; if (!gmu->legacy) { - a6xx_hfi_set_freq(gmu, perf_index); - dev_pm_opp_set_opp(&gpu->pdev->dev, opp); + a6xx_hfi_set_freq(gmu, perf_index, bw_index); + /* With Bandwidth voting, we now vote for all resources, so skip OPP set */ + if (!bw_index) + dev_pm_opp_set_opp(&gpu->pdev->dev, opp); return; } @@ -729,6 +765,7 @@ static int a6xx_gmu_fw_load(struct a6xx_gmu *gmu) const struct firmware *fw_image = adreno_gpu->fw[ADRENO_FW_GMU]; const struct block_header *blk; u32 reg_offset; + u32 ver; u32 itcm_base = 0x00000000; u32 dtcm_base = 0x00040000; @@ -775,6 +812,12 @@ static int a6xx_gmu_fw_load(struct a6xx_gmu *gmu) } } + ver = gmu_read(gmu, REG_A6XX_GMU_CORE_FW_VERSION); + DRM_INFO_ONCE("Loaded GMU firmware v%u.%u.%u\n", + FIELD_GET(A6XX_GMU_CORE_FW_VERSION_MAJOR__MASK, ver), + FIELD_GET(A6XX_GMU_CORE_FW_VERSION_MINOR__MASK, ver), + FIELD_GET(A6XX_GMU_CORE_FW_VERSION_STEP__MASK, ver)); + return 0; } @@ -1021,14 +1064,6 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu) gmu->hung = false; - /* Notify AOSS about the ACD state (unimplemented for now => disable it) */ - if (!IS_ERR(gmu->qmp)) { - ret = qmp_send(gmu->qmp, "{class: gpu, res: acd, val: %d}", - 0 /* Hardcode ACD to be disabled for now */); - if (ret) - dev_err(gmu->dev, "failed to send GPU ACD state\n"); - } - /* Turn on the resources */ pm_runtime_get_sync(gmu->dev); @@ -1126,49 +1161,50 @@ static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu) struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; u32 val; + int ret; /* - * The GMU may still be in slumber unless the GPU started so check and - * skip putting it back into slumber if so + * GMU firmware's internal power state gets messed up if we send "prepare_slumber" hfi when + * oob_gpu handshake wasn't done after the last wake up. So do a dummy handshake here when + * required */ - val = gmu_read(gmu, REG_A6XX_GPU_GMU_CX_GMU_RPMH_POWER_STATE); + if (adreno_gpu->base.needs_hw_init) { + if (a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET)) + goto force_off; - if (val != 0xf) { - int ret = a6xx_gmu_wait_for_idle(gmu); + a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); + } - /* If the GMU isn't responding assume it is hung */ - if (ret) { - a6xx_gmu_force_off(gmu); - return; - } + ret = a6xx_gmu_wait_for_idle(gmu); - a6xx_bus_clear_pending_transactions(adreno_gpu, a6xx_gpu->hung); + /* If the GMU isn't responding assume it is hung */ + if (ret) + goto force_off; - /* tell the GMU we want to slumber */ - ret = a6xx_gmu_notify_slumber(gmu); - if (ret) { - a6xx_gmu_force_off(gmu); - return; - } + a6xx_bus_clear_pending_transactions(adreno_gpu, a6xx_gpu->hung); - ret = gmu_poll_timeout(gmu, - REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS, val, - !(val & A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS_GPUBUSYIGNAHB), - 100, 10000); + /* tell the GMU we want to slumber */ + ret = a6xx_gmu_notify_slumber(gmu); + if (ret) + goto force_off; - /* - * Let the user know we failed to slumber but don't worry too - * much because we are powering down anyway - */ + ret = gmu_poll_timeout(gmu, + REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS, val, + !(val & A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS_GPUBUSYIGNAHB), + 100, 10000); - if (ret) - DRM_DEV_ERROR(gmu->dev, - "Unable to slumber GMU: status = 0%x/0%x\n", - gmu_read(gmu, - REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS), - gmu_read(gmu, - REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS2)); - } + /* + * Let the user know we failed to slumber but don't worry too + * much because we are powering down anyway + */ + + if (ret) + DRM_DEV_ERROR(gmu->dev, + "Unable to slumber GMU: status = 0%x/0%x\n", + gmu_read(gmu, + REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS), + gmu_read(gmu, + REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS2)); /* Turn off HFI */ a6xx_hfi_stop(gmu); @@ -1178,6 +1214,11 @@ static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu) /* Tell RPMh to power off the GPU */ a6xx_rpmh_stop(gmu); + + return; + +force_off: + a6xx_gmu_force_off(gmu); } @@ -1265,7 +1306,7 @@ static int a6xx_gmu_memory_alloc(struct a6xx_gmu *gmu, struct a6xx_gmu_bo *bo, bo->virt = msm_gem_get_vaddr(bo->obj); bo->size = size; - msm_gem_object_set_name(bo->obj, name); + msm_gem_object_set_name(bo->obj, "%s", name); return 0; } @@ -1287,6 +1328,104 @@ static int a6xx_gmu_memory_probe(struct a6xx_gmu *gmu) return 0; } +/** + * struct bcm_db - Auxiliary data pertaining to each Bus Clock Manager (BCM) + * @unit: divisor used to convert bytes/sec bw value to an RPMh msg + * @width: multiplier used to convert bytes/sec bw value to an RPMh msg + * @vcd: virtual clock domain that this bcm belongs to + * @reserved: reserved field + */ +struct bcm_db { + __le32 unit; + __le16 width; + u8 vcd; + u8 reserved; +}; + +static int a6xx_gmu_rpmh_bw_votes_init(struct adreno_gpu *adreno_gpu, + const struct a6xx_info *info, + struct a6xx_gmu *gmu) +{ + const struct bcm_db *bcm_data[GMU_MAX_BCMS] = { 0 }; + unsigned int bcm_index, bw_index, bcm_count = 0; + + /* Retrieve BCM data from cmd-db */ + for (bcm_index = 0; bcm_index < GMU_MAX_BCMS; bcm_index++) { + const struct a6xx_bcm *bcm = &info->bcms[bcm_index]; + size_t count; + + /* Stop at NULL terminated bcm entry */ + if (!bcm->name) + break; + + bcm_data[bcm_index] = cmd_db_read_aux_data(bcm->name, &count); + if (IS_ERR(bcm_data[bcm_index])) + return PTR_ERR(bcm_data[bcm_index]); + + if (!count) { + dev_err(gmu->dev, "invalid BCM '%s' aux data size\n", + bcm->name); + return -EINVAL; + } + + bcm_count++; + } + + /* Generate BCM votes values for each bandwidth & BCM */ + for (bw_index = 0; bw_index < gmu->nr_gpu_bws; bw_index++) { + u32 *data = gmu->gpu_ib_votes[bw_index]; + u32 bw = gmu->gpu_bw_table[bw_index]; + + /* Calculations loosely copied from bcm_aggregate() & tcs_cmd_gen() */ + for (bcm_index = 0; bcm_index < bcm_count; bcm_index++) { + const struct a6xx_bcm *bcm = &info->bcms[bcm_index]; + bool commit = false; + u64 peak; + u32 vote; + + if (bcm_index == bcm_count - 1 || + (bcm_data[bcm_index + 1] && + bcm_data[bcm_index]->vcd != bcm_data[bcm_index + 1]->vcd)) + commit = true; + + if (!bw) { + data[bcm_index] = BCM_TCS_CMD(commit, false, 0, 0); + continue; + } + + if (bcm->fixed) { + u32 perfmode = 0; + + /* GMU on A6xx votes perfmode on all valid bandwidth */ + if (!adreno_is_a7xx(adreno_gpu) || + (bcm->perfmode_bw && bw >= bcm->perfmode_bw)) + perfmode = bcm->perfmode; + + data[bcm_index] = BCM_TCS_CMD(commit, true, 0, perfmode); + continue; + } + + /* Multiply the bandwidth by the width of the connection */ + peak = (u64)bw * le16_to_cpu(bcm_data[bcm_index]->width); + do_div(peak, bcm->buswidth); + + /* Input bandwidth value is in KBps, scale the value to BCM unit */ + peak *= 1000; + do_div(peak, le32_to_cpu(bcm_data[bcm_index]->unit)); + + vote = clamp(peak, 1, BCM_TCS_CMD_VOTE_MASK); + + /* GMUs on A7xx votes on both x & y */ + if (adreno_is_a7xx(adreno_gpu)) + data[bcm_index] = BCM_TCS_CMD(commit, true, vote, vote); + else + data[bcm_index] = BCM_TCS_CMD(commit, true, 0, vote); + } + } + + return 0; +} + /* Return the 'arc-level' for the given frequency */ static unsigned int a6xx_gmu_get_arc_level(struct device *dev, unsigned long freq) @@ -1390,12 +1529,15 @@ static int a6xx_gmu_rpmh_arc_votes_init(struct device *dev, u32 *votes, * The GMU votes with the RPMh for itself and on behalf of the GPU but we need * to construct the list of votes on the CPU and send it over. Query the RPMh * voltage levels and build the votes + * The GMU can also vote for DDR interconnects, use the OPP bandwidth entries + * and BCM parameters to build the votes. */ static int a6xx_gmu_rpmh_votes_init(struct a6xx_gmu *gmu) { struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + const struct a6xx_info *info = adreno_gpu->info->a6xx; struct msm_gpu *gpu = &adreno_gpu->base; int ret; @@ -1407,6 +1549,10 @@ static int a6xx_gmu_rpmh_votes_init(struct a6xx_gmu *gmu) ret |= a6xx_gmu_rpmh_arc_votes_init(gmu->dev, gmu->cx_arc_votes, gmu->gmu_freqs, gmu->nr_gmu_freqs, "cx.lvl"); + /* Build the interconnect votes */ + if (info->bcms && gmu->nr_gpu_bws > 1) + ret |= a6xx_gmu_rpmh_bw_votes_init(adreno_gpu, info, gmu); + return ret; } @@ -1442,10 +1588,43 @@ static int a6xx_gmu_build_freq_table(struct device *dev, unsigned long *freqs, return index; } +static int a6xx_gmu_build_bw_table(struct device *dev, unsigned long *bandwidths, + u32 size) +{ + int count = dev_pm_opp_get_opp_count(dev); + struct dev_pm_opp *opp; + int i, index = 0; + unsigned int bandwidth = 1; + + /* + * The OPP table doesn't contain the "off" bandwidth level so we need to + * add 1 to the table size to account for it + */ + + if (WARN(count + 1 > size, + "The GMU bandwidth table is being truncated\n")) + count = size - 1; + + /* Set the "off" bandwidth */ + bandwidths[index++] = 0; + + for (i = 0; i < count; i++) { + opp = dev_pm_opp_find_bw_ceil(dev, &bandwidth, 0); + if (IS_ERR(opp)) + break; + + dev_pm_opp_put(opp); + bandwidths[index++] = bandwidth++; + } + + return index; +} + static int a6xx_gmu_pwrlevels_probe(struct a6xx_gmu *gmu) { struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + const struct a6xx_info *info = adreno_gpu->info->a6xx; struct msm_gpu *gpu = &adreno_gpu->base; int ret = 0; @@ -1472,10 +1651,87 @@ static int a6xx_gmu_pwrlevels_probe(struct a6xx_gmu *gmu) gmu->current_perf_index = gmu->nr_gpu_freqs - 1; + /* + * The GMU also handles GPU Interconnect Votes so build a list + * of DDR bandwidths from the GPU OPP table + */ + if (info->bcms) + gmu->nr_gpu_bws = a6xx_gmu_build_bw_table(&gpu->pdev->dev, + gmu->gpu_bw_table, ARRAY_SIZE(gmu->gpu_bw_table)); + /* Build the list of RPMh votes that we'll send to the GMU */ return a6xx_gmu_rpmh_votes_init(gmu); } +static int a6xx_gmu_acd_probe(struct a6xx_gmu *gmu) +{ + struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); + struct a6xx_hfi_acd_table *cmd = &gmu->acd_table; + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + struct msm_gpu *gpu = &adreno_gpu->base; + int ret, i, cmd_idx = 0; + extern bool disable_acd; + + /* Skip ACD probe if requested via module param */ + if (disable_acd) { + DRM_DEV_ERROR(gmu->dev, "Skipping GPU ACD probe\n"); + return 0; + } + + cmd->version = 1; + cmd->stride = 1; + cmd->enable_by_level = 0; + + /* Skip freq = 0 and parse acd-level for rest of the OPPs */ + for (i = 1; i < gmu->nr_gpu_freqs; i++) { + struct dev_pm_opp *opp; + struct device_node *np; + unsigned long freq; + u32 val; + + freq = gmu->gpu_freqs[i]; + opp = dev_pm_opp_find_freq_exact(&gpu->pdev->dev, freq, true); + np = dev_pm_opp_get_of_node(opp); + + ret = of_property_read_u32(np, "qcom,opp-acd-level", &val); + of_node_put(np); + dev_pm_opp_put(opp); + if (ret == -EINVAL) + continue; + else if (ret) { + DRM_DEV_ERROR(gmu->dev, "Unable to read acd level for freq %lu\n", freq); + return ret; + } + + cmd->enable_by_level |= BIT(i); + cmd->data[cmd_idx++] = val; + } + + cmd->num_levels = cmd_idx; + + /* It is a problem if qmp node is unavailable when ACD is required */ + if (cmd->enable_by_level && IS_ERR_OR_NULL(gmu->qmp)) { + DRM_DEV_ERROR(gmu->dev, "Unable to send ACD state to AOSS\n"); + return -EINVAL; + } + + /* Otherwise, nothing to do if qmp is unavailable */ + if (IS_ERR_OR_NULL(gmu->qmp)) + return 0; + + /* + * Notify AOSS about the ACD state. AOSS is supposed to assume that ACD is disabled on + * system reset. So it is harmless if we couldn't notify 'OFF' state + */ + ret = qmp_send(gmu->qmp, "{class: gpu, res: acd, val: %d}", !!cmd->enable_by_level); + if (ret && cmd->enable_by_level) { + DRM_DEV_ERROR(gmu->dev, "Failed to send ACD state to AOSS\n"); + return ret; + } + + return 0; +} + static int a6xx_gmu_clocks_probe(struct a6xx_gmu *gmu) { int ret = devm_clk_bulk_get_all(gmu->dev, &gmu->clocks); @@ -1522,15 +1778,13 @@ static int a6xx_gmu_get_irq(struct a6xx_gmu *gmu, struct platform_device *pdev, irq = platform_get_irq_byname(pdev, name); - ret = request_irq(irq, handler, IRQF_TRIGGER_HIGH, name, gmu); + ret = request_irq(irq, handler, IRQF_TRIGGER_HIGH | IRQF_NO_AUTOEN, name, gmu); if (ret) { DRM_DEV_ERROR(&pdev->dev, "Unable to get interrupt %s %d\n", name, ret); return ret; } - disable_irq(irq); - return irq; } @@ -1605,7 +1859,9 @@ int a6xx_gmu_wrapper_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) gmu->dev = &pdev->dev; - of_dma_configure(gmu->dev, node, true); + ret = of_dma_configure(gmu->dev, node, true); + if (ret) + return ret; pm_runtime_enable(gmu->dev); @@ -1670,7 +1926,9 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) gmu->dev = &pdev->dev; - of_dma_configure(gmu->dev, node, true); + ret = of_dma_configure(gmu->dev, node, true); + if (ret) + return ret; /* Fow now, don't do anything fancy until we get our feet under us */ gmu->idle_level = GMU_IDLE_STATE_ACTIVE; @@ -1792,10 +2050,11 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) goto detach_cxpd; } + /* Other errors are handled during GPU ACD probe */ gmu->qmp = qmp_get(gmu->dev); - if (IS_ERR(gmu->qmp) && adreno_is_a7xx(adreno_gpu)) { - ret = PTR_ERR(gmu->qmp); - goto remove_device_link; + if (PTR_ERR_OR_ZERO(gmu->qmp) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto detach_gxpd; } init_completion(&gmu->pd_gate); @@ -1811,6 +2070,10 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) /* Get the power levels for the GMU and GPU */ a6xx_gmu_pwrlevels_probe(gmu); + ret = a6xx_gmu_acd_probe(gmu); + if (ret) + goto detach_gxpd; + /* Set up the HFI queues */ a6xx_hfi_init(gmu); @@ -1821,7 +2084,13 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) return 0; -remove_device_link: +detach_gxpd: + if (!IS_ERR_OR_NULL(gmu->gxpd)) + dev_pm_domain_detach(gmu->gxpd, false); + + if (!IS_ERR_OR_NULL(gmu->qmp)) + qmp_put(gmu->qmp); + device_link_del(link); detach_cxpd: |