summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/msm/adreno/a6xx_gmu.c')
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gmu.c377
1 files changed, 323 insertions, 54 deletions
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 37927bdd6fbe..38c0f8ef85c3 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -9,6 +9,7 @@
#include <linux/pm_domain.h>
#include <linux/pm_opp.h>
#include <soc/qcom/cmd-db.h>
+#include <soc/qcom/tcs.h>
#include <drm/drm_gem.h>
#include "a6xx_gpu.h"
@@ -27,7 +28,7 @@ static void a6xx_gmu_fault(struct a6xx_gmu *gmu)
gmu->hung = true;
/* Turn off the hangcheck timer while we are resetting */
- del_timer(&gpu->hangcheck_timer);
+ timer_delete(&gpu->hangcheck_timer);
/* Queue the GPU handler because we need to treat this as a recovery */
kthread_queue_work(gpu->worker, &gpu->recover_work);
@@ -109,9 +110,11 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp,
bool suspended)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ const struct a6xx_info *info = adreno_gpu->info->a6xx;
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
u32 perf_index;
+ u32 bw_index = 0;
unsigned long gpu_freq;
int ret = 0;
@@ -124,6 +127,37 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp,
if (gpu_freq == gmu->gpu_freqs[perf_index])
break;
+ /* If enabled, find the corresponding DDR bandwidth index */
+ if (info->bcms && gmu->nr_gpu_bws > 1) {
+ unsigned int bw = dev_pm_opp_get_bw(opp, true, 0);
+
+ for (bw_index = 0; bw_index < gmu->nr_gpu_bws - 1; bw_index++) {
+ if (bw == gmu->gpu_bw_table[bw_index])
+ break;
+ }
+
+ /* Vote AB as a fraction of the max bandwidth, starting from A750 */
+ if (bw && adreno_is_a750_family(adreno_gpu)) {
+ u64 tmp;
+
+ /* For now, vote for 25% of the bandwidth */
+ tmp = bw * 25;
+ do_div(tmp, 100);
+
+ /*
+ * The AB vote consists of a 16 bit wide quantized level
+ * against the maximum supported bandwidth.
+ * Quantization can be calculated as below:
+ * vote = (bandwidth * 2^16) / max bandwidth
+ */
+ tmp *= MAX_AB_VOTE;
+ do_div(tmp, gmu->gpu_bw_table[gmu->nr_gpu_bws - 1]);
+
+ bw_index |= AB_VOTE(clamp(tmp, 1, MAX_AB_VOTE));
+ bw_index |= AB_VOTE_ENABLE;
+ }
+ }
+
gmu->current_perf_index = perf_index;
gmu->freq = gmu->gpu_freqs[perf_index];
@@ -139,8 +173,10 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp,
return;
if (!gmu->legacy) {
- a6xx_hfi_set_freq(gmu, perf_index);
- dev_pm_opp_set_opp(&gpu->pdev->dev, opp);
+ a6xx_hfi_set_freq(gmu, perf_index, bw_index);
+ /* With Bandwidth voting, we now vote for all resources, so skip OPP set */
+ if (!bw_index)
+ dev_pm_opp_set_opp(&gpu->pdev->dev, opp);
return;
}
@@ -729,6 +765,7 @@ static int a6xx_gmu_fw_load(struct a6xx_gmu *gmu)
const struct firmware *fw_image = adreno_gpu->fw[ADRENO_FW_GMU];
const struct block_header *blk;
u32 reg_offset;
+ u32 ver;
u32 itcm_base = 0x00000000;
u32 dtcm_base = 0x00040000;
@@ -775,6 +812,12 @@ static int a6xx_gmu_fw_load(struct a6xx_gmu *gmu)
}
}
+ ver = gmu_read(gmu, REG_A6XX_GMU_CORE_FW_VERSION);
+ DRM_INFO_ONCE("Loaded GMU firmware v%u.%u.%u\n",
+ FIELD_GET(A6XX_GMU_CORE_FW_VERSION_MAJOR__MASK, ver),
+ FIELD_GET(A6XX_GMU_CORE_FW_VERSION_MINOR__MASK, ver),
+ FIELD_GET(A6XX_GMU_CORE_FW_VERSION_STEP__MASK, ver));
+
return 0;
}
@@ -1021,14 +1064,6 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu)
gmu->hung = false;
- /* Notify AOSS about the ACD state (unimplemented for now => disable it) */
- if (!IS_ERR(gmu->qmp)) {
- ret = qmp_send(gmu->qmp, "{class: gpu, res: acd, val: %d}",
- 0 /* Hardcode ACD to be disabled for now */);
- if (ret)
- dev_err(gmu->dev, "failed to send GPU ACD state\n");
- }
-
/* Turn on the resources */
pm_runtime_get_sync(gmu->dev);
@@ -1126,49 +1161,50 @@ static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu)
struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
u32 val;
+ int ret;
/*
- * The GMU may still be in slumber unless the GPU started so check and
- * skip putting it back into slumber if so
+ * GMU firmware's internal power state gets messed up if we send "prepare_slumber" hfi when
+ * oob_gpu handshake wasn't done after the last wake up. So do a dummy handshake here when
+ * required
*/
- val = gmu_read(gmu, REG_A6XX_GPU_GMU_CX_GMU_RPMH_POWER_STATE);
+ if (adreno_gpu->base.needs_hw_init) {
+ if (a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET))
+ goto force_off;
- if (val != 0xf) {
- int ret = a6xx_gmu_wait_for_idle(gmu);
+ a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
+ }
- /* If the GMU isn't responding assume it is hung */
- if (ret) {
- a6xx_gmu_force_off(gmu);
- return;
- }
+ ret = a6xx_gmu_wait_for_idle(gmu);
- a6xx_bus_clear_pending_transactions(adreno_gpu, a6xx_gpu->hung);
+ /* If the GMU isn't responding assume it is hung */
+ if (ret)
+ goto force_off;
- /* tell the GMU we want to slumber */
- ret = a6xx_gmu_notify_slumber(gmu);
- if (ret) {
- a6xx_gmu_force_off(gmu);
- return;
- }
+ a6xx_bus_clear_pending_transactions(adreno_gpu, a6xx_gpu->hung);
- ret = gmu_poll_timeout(gmu,
- REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS, val,
- !(val & A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS_GPUBUSYIGNAHB),
- 100, 10000);
+ /* tell the GMU we want to slumber */
+ ret = a6xx_gmu_notify_slumber(gmu);
+ if (ret)
+ goto force_off;
- /*
- * Let the user know we failed to slumber but don't worry too
- * much because we are powering down anyway
- */
+ ret = gmu_poll_timeout(gmu,
+ REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS, val,
+ !(val & A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS_GPUBUSYIGNAHB),
+ 100, 10000);
- if (ret)
- DRM_DEV_ERROR(gmu->dev,
- "Unable to slumber GMU: status = 0%x/0%x\n",
- gmu_read(gmu,
- REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS),
- gmu_read(gmu,
- REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS2));
- }
+ /*
+ * Let the user know we failed to slumber but don't worry too
+ * much because we are powering down anyway
+ */
+
+ if (ret)
+ DRM_DEV_ERROR(gmu->dev,
+ "Unable to slumber GMU: status = 0%x/0%x\n",
+ gmu_read(gmu,
+ REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS),
+ gmu_read(gmu,
+ REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS2));
/* Turn off HFI */
a6xx_hfi_stop(gmu);
@@ -1178,6 +1214,11 @@ static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu)
/* Tell RPMh to power off the GPU */
a6xx_rpmh_stop(gmu);
+
+ return;
+
+force_off:
+ a6xx_gmu_force_off(gmu);
}
@@ -1265,7 +1306,7 @@ static int a6xx_gmu_memory_alloc(struct a6xx_gmu *gmu, struct a6xx_gmu_bo *bo,
bo->virt = msm_gem_get_vaddr(bo->obj);
bo->size = size;
- msm_gem_object_set_name(bo->obj, name);
+ msm_gem_object_set_name(bo->obj, "%s", name);
return 0;
}
@@ -1287,6 +1328,104 @@ static int a6xx_gmu_memory_probe(struct a6xx_gmu *gmu)
return 0;
}
+/**
+ * struct bcm_db - Auxiliary data pertaining to each Bus Clock Manager (BCM)
+ * @unit: divisor used to convert bytes/sec bw value to an RPMh msg
+ * @width: multiplier used to convert bytes/sec bw value to an RPMh msg
+ * @vcd: virtual clock domain that this bcm belongs to
+ * @reserved: reserved field
+ */
+struct bcm_db {
+ __le32 unit;
+ __le16 width;
+ u8 vcd;
+ u8 reserved;
+};
+
+static int a6xx_gmu_rpmh_bw_votes_init(struct adreno_gpu *adreno_gpu,
+ const struct a6xx_info *info,
+ struct a6xx_gmu *gmu)
+{
+ const struct bcm_db *bcm_data[GMU_MAX_BCMS] = { 0 };
+ unsigned int bcm_index, bw_index, bcm_count = 0;
+
+ /* Retrieve BCM data from cmd-db */
+ for (bcm_index = 0; bcm_index < GMU_MAX_BCMS; bcm_index++) {
+ const struct a6xx_bcm *bcm = &info->bcms[bcm_index];
+ size_t count;
+
+ /* Stop at NULL terminated bcm entry */
+ if (!bcm->name)
+ break;
+
+ bcm_data[bcm_index] = cmd_db_read_aux_data(bcm->name, &count);
+ if (IS_ERR(bcm_data[bcm_index]))
+ return PTR_ERR(bcm_data[bcm_index]);
+
+ if (!count) {
+ dev_err(gmu->dev, "invalid BCM '%s' aux data size\n",
+ bcm->name);
+ return -EINVAL;
+ }
+
+ bcm_count++;
+ }
+
+ /* Generate BCM votes values for each bandwidth & BCM */
+ for (bw_index = 0; bw_index < gmu->nr_gpu_bws; bw_index++) {
+ u32 *data = gmu->gpu_ib_votes[bw_index];
+ u32 bw = gmu->gpu_bw_table[bw_index];
+
+ /* Calculations loosely copied from bcm_aggregate() & tcs_cmd_gen() */
+ for (bcm_index = 0; bcm_index < bcm_count; bcm_index++) {
+ const struct a6xx_bcm *bcm = &info->bcms[bcm_index];
+ bool commit = false;
+ u64 peak;
+ u32 vote;
+
+ if (bcm_index == bcm_count - 1 ||
+ (bcm_data[bcm_index + 1] &&
+ bcm_data[bcm_index]->vcd != bcm_data[bcm_index + 1]->vcd))
+ commit = true;
+
+ if (!bw) {
+ data[bcm_index] = BCM_TCS_CMD(commit, false, 0, 0);
+ continue;
+ }
+
+ if (bcm->fixed) {
+ u32 perfmode = 0;
+
+ /* GMU on A6xx votes perfmode on all valid bandwidth */
+ if (!adreno_is_a7xx(adreno_gpu) ||
+ (bcm->perfmode_bw && bw >= bcm->perfmode_bw))
+ perfmode = bcm->perfmode;
+
+ data[bcm_index] = BCM_TCS_CMD(commit, true, 0, perfmode);
+ continue;
+ }
+
+ /* Multiply the bandwidth by the width of the connection */
+ peak = (u64)bw * le16_to_cpu(bcm_data[bcm_index]->width);
+ do_div(peak, bcm->buswidth);
+
+ /* Input bandwidth value is in KBps, scale the value to BCM unit */
+ peak *= 1000;
+ do_div(peak, le32_to_cpu(bcm_data[bcm_index]->unit));
+
+ vote = clamp(peak, 1, BCM_TCS_CMD_VOTE_MASK);
+
+ /* GMUs on A7xx votes on both x & y */
+ if (adreno_is_a7xx(adreno_gpu))
+ data[bcm_index] = BCM_TCS_CMD(commit, true, vote, vote);
+ else
+ data[bcm_index] = BCM_TCS_CMD(commit, true, 0, vote);
+ }
+ }
+
+ return 0;
+}
+
/* Return the 'arc-level' for the given frequency */
static unsigned int a6xx_gmu_get_arc_level(struct device *dev,
unsigned long freq)
@@ -1390,12 +1529,15 @@ static int a6xx_gmu_rpmh_arc_votes_init(struct device *dev, u32 *votes,
* The GMU votes with the RPMh for itself and on behalf of the GPU but we need
* to construct the list of votes on the CPU and send it over. Query the RPMh
* voltage levels and build the votes
+ * The GMU can also vote for DDR interconnects, use the OPP bandwidth entries
+ * and BCM parameters to build the votes.
*/
static int a6xx_gmu_rpmh_votes_init(struct a6xx_gmu *gmu)
{
struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
+ const struct a6xx_info *info = adreno_gpu->info->a6xx;
struct msm_gpu *gpu = &adreno_gpu->base;
int ret;
@@ -1407,6 +1549,10 @@ static int a6xx_gmu_rpmh_votes_init(struct a6xx_gmu *gmu)
ret |= a6xx_gmu_rpmh_arc_votes_init(gmu->dev, gmu->cx_arc_votes,
gmu->gmu_freqs, gmu->nr_gmu_freqs, "cx.lvl");
+ /* Build the interconnect votes */
+ if (info->bcms && gmu->nr_gpu_bws > 1)
+ ret |= a6xx_gmu_rpmh_bw_votes_init(adreno_gpu, info, gmu);
+
return ret;
}
@@ -1442,10 +1588,43 @@ static int a6xx_gmu_build_freq_table(struct device *dev, unsigned long *freqs,
return index;
}
+static int a6xx_gmu_build_bw_table(struct device *dev, unsigned long *bandwidths,
+ u32 size)
+{
+ int count = dev_pm_opp_get_opp_count(dev);
+ struct dev_pm_opp *opp;
+ int i, index = 0;
+ unsigned int bandwidth = 1;
+
+ /*
+ * The OPP table doesn't contain the "off" bandwidth level so we need to
+ * add 1 to the table size to account for it
+ */
+
+ if (WARN(count + 1 > size,
+ "The GMU bandwidth table is being truncated\n"))
+ count = size - 1;
+
+ /* Set the "off" bandwidth */
+ bandwidths[index++] = 0;
+
+ for (i = 0; i < count; i++) {
+ opp = dev_pm_opp_find_bw_ceil(dev, &bandwidth, 0);
+ if (IS_ERR(opp))
+ break;
+
+ dev_pm_opp_put(opp);
+ bandwidths[index++] = bandwidth++;
+ }
+
+ return index;
+}
+
static int a6xx_gmu_pwrlevels_probe(struct a6xx_gmu *gmu)
{
struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
+ const struct a6xx_info *info = adreno_gpu->info->a6xx;
struct msm_gpu *gpu = &adreno_gpu->base;
int ret = 0;
@@ -1472,10 +1651,87 @@ static int a6xx_gmu_pwrlevels_probe(struct a6xx_gmu *gmu)
gmu->current_perf_index = gmu->nr_gpu_freqs - 1;
+ /*
+ * The GMU also handles GPU Interconnect Votes so build a list
+ * of DDR bandwidths from the GPU OPP table
+ */
+ if (info->bcms)
+ gmu->nr_gpu_bws = a6xx_gmu_build_bw_table(&gpu->pdev->dev,
+ gmu->gpu_bw_table, ARRAY_SIZE(gmu->gpu_bw_table));
+
/* Build the list of RPMh votes that we'll send to the GMU */
return a6xx_gmu_rpmh_votes_init(gmu);
}
+static int a6xx_gmu_acd_probe(struct a6xx_gmu *gmu)
+{
+ struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
+ struct a6xx_hfi_acd_table *cmd = &gmu->acd_table;
+ struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
+ struct msm_gpu *gpu = &adreno_gpu->base;
+ int ret, i, cmd_idx = 0;
+ extern bool disable_acd;
+
+ /* Skip ACD probe if requested via module param */
+ if (disable_acd) {
+ DRM_DEV_ERROR(gmu->dev, "Skipping GPU ACD probe\n");
+ return 0;
+ }
+
+ cmd->version = 1;
+ cmd->stride = 1;
+ cmd->enable_by_level = 0;
+
+ /* Skip freq = 0 and parse acd-level for rest of the OPPs */
+ for (i = 1; i < gmu->nr_gpu_freqs; i++) {
+ struct dev_pm_opp *opp;
+ struct device_node *np;
+ unsigned long freq;
+ u32 val;
+
+ freq = gmu->gpu_freqs[i];
+ opp = dev_pm_opp_find_freq_exact(&gpu->pdev->dev, freq, true);
+ np = dev_pm_opp_get_of_node(opp);
+
+ ret = of_property_read_u32(np, "qcom,opp-acd-level", &val);
+ of_node_put(np);
+ dev_pm_opp_put(opp);
+ if (ret == -EINVAL)
+ continue;
+ else if (ret) {
+ DRM_DEV_ERROR(gmu->dev, "Unable to read acd level for freq %lu\n", freq);
+ return ret;
+ }
+
+ cmd->enable_by_level |= BIT(i);
+ cmd->data[cmd_idx++] = val;
+ }
+
+ cmd->num_levels = cmd_idx;
+
+ /* It is a problem if qmp node is unavailable when ACD is required */
+ if (cmd->enable_by_level && IS_ERR_OR_NULL(gmu->qmp)) {
+ DRM_DEV_ERROR(gmu->dev, "Unable to send ACD state to AOSS\n");
+ return -EINVAL;
+ }
+
+ /* Otherwise, nothing to do if qmp is unavailable */
+ if (IS_ERR_OR_NULL(gmu->qmp))
+ return 0;
+
+ /*
+ * Notify AOSS about the ACD state. AOSS is supposed to assume that ACD is disabled on
+ * system reset. So it is harmless if we couldn't notify 'OFF' state
+ */
+ ret = qmp_send(gmu->qmp, "{class: gpu, res: acd, val: %d}", !!cmd->enable_by_level);
+ if (ret && cmd->enable_by_level) {
+ DRM_DEV_ERROR(gmu->dev, "Failed to send ACD state to AOSS\n");
+ return ret;
+ }
+
+ return 0;
+}
+
static int a6xx_gmu_clocks_probe(struct a6xx_gmu *gmu)
{
int ret = devm_clk_bulk_get_all(gmu->dev, &gmu->clocks);
@@ -1522,15 +1778,13 @@ static int a6xx_gmu_get_irq(struct a6xx_gmu *gmu, struct platform_device *pdev,
irq = platform_get_irq_byname(pdev, name);
- ret = request_irq(irq, handler, IRQF_TRIGGER_HIGH, name, gmu);
+ ret = request_irq(irq, handler, IRQF_TRIGGER_HIGH | IRQF_NO_AUTOEN, name, gmu);
if (ret) {
DRM_DEV_ERROR(&pdev->dev, "Unable to get interrupt %s %d\n",
name, ret);
return ret;
}
- disable_irq(irq);
-
return irq;
}
@@ -1605,7 +1859,9 @@ int a6xx_gmu_wrapper_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
gmu->dev = &pdev->dev;
- of_dma_configure(gmu->dev, node, true);
+ ret = of_dma_configure(gmu->dev, node, true);
+ if (ret)
+ return ret;
pm_runtime_enable(gmu->dev);
@@ -1670,7 +1926,9 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
gmu->dev = &pdev->dev;
- of_dma_configure(gmu->dev, node, true);
+ ret = of_dma_configure(gmu->dev, node, true);
+ if (ret)
+ return ret;
/* Fow now, don't do anything fancy until we get our feet under us */
gmu->idle_level = GMU_IDLE_STATE_ACTIVE;
@@ -1792,10 +2050,11 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
goto detach_cxpd;
}
+ /* Other errors are handled during GPU ACD probe */
gmu->qmp = qmp_get(gmu->dev);
- if (IS_ERR(gmu->qmp) && adreno_is_a7xx(adreno_gpu)) {
- ret = PTR_ERR(gmu->qmp);
- goto remove_device_link;
+ if (PTR_ERR_OR_ZERO(gmu->qmp) == -EPROBE_DEFER) {
+ ret = -EPROBE_DEFER;
+ goto detach_gxpd;
}
init_completion(&gmu->pd_gate);
@@ -1811,6 +2070,10 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
/* Get the power levels for the GMU and GPU */
a6xx_gmu_pwrlevels_probe(gmu);
+ ret = a6xx_gmu_acd_probe(gmu);
+ if (ret)
+ goto detach_gxpd;
+
/* Set up the HFI queues */
a6xx_hfi_init(gmu);
@@ -1821,7 +2084,13 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
return 0;
-remove_device_link:
+detach_gxpd:
+ if (!IS_ERR_OR_NULL(gmu->gxpd))
+ dev_pm_domain_detach(gmu->gxpd, false);
+
+ if (!IS_ERR_OR_NULL(gmu->qmp))
+ qmp_put(gmu->qmp);
+
device_link_del(link);
detach_cxpd: