summaryrefslogtreecommitdiff
path: root/drivers/misc/habanalabs/goya/goya.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/habanalabs/goya/goya.c')
-rw-r--r--drivers/misc/habanalabs/goya/goya.c165
1 files changed, 129 insertions, 36 deletions
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 5536e8c27bd5..fbcc7bbf44b3 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
+ * Copyright 2016-2021 HabanaLabs, Ltd.
* All Rights Reserved.
*/
@@ -410,25 +410,26 @@ int goya_set_fixed_properties(struct hl_device *hdev)
else
prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
prop->mmu_pte_size = HL_PTE_SIZE;
- prop->mmu_hop_table_size = HOP_TABLE_SIZE;
- prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
+ prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
+ prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
prop->dram_page_size = PAGE_SIZE_2MB;
prop->dram_supports_virtual_memory = true;
- prop->dmmu.hop0_shift = HOP0_SHIFT;
- prop->dmmu.hop1_shift = HOP1_SHIFT;
- prop->dmmu.hop2_shift = HOP2_SHIFT;
- prop->dmmu.hop3_shift = HOP3_SHIFT;
- prop->dmmu.hop4_shift = HOP4_SHIFT;
- prop->dmmu.hop0_mask = HOP0_MASK;
- prop->dmmu.hop1_mask = HOP1_MASK;
- prop->dmmu.hop2_mask = HOP2_MASK;
- prop->dmmu.hop3_mask = HOP3_MASK;
- prop->dmmu.hop4_mask = HOP4_MASK;
+ prop->dmmu.hop0_shift = MMU_V1_0_HOP0_SHIFT;
+ prop->dmmu.hop1_shift = MMU_V1_0_HOP1_SHIFT;
+ prop->dmmu.hop2_shift = MMU_V1_0_HOP2_SHIFT;
+ prop->dmmu.hop3_shift = MMU_V1_0_HOP3_SHIFT;
+ prop->dmmu.hop4_shift = MMU_V1_0_HOP4_SHIFT;
+ prop->dmmu.hop0_mask = MMU_V1_0_HOP0_MASK;
+ prop->dmmu.hop1_mask = MMU_V1_0_HOP1_MASK;
+ prop->dmmu.hop2_mask = MMU_V1_0_HOP2_MASK;
+ prop->dmmu.hop3_mask = MMU_V1_0_HOP3_MASK;
+ prop->dmmu.hop4_mask = MMU_V1_0_HOP4_MASK;
prop->dmmu.start_addr = VA_DDR_SPACE_START;
prop->dmmu.end_addr = VA_DDR_SPACE_END;
prop->dmmu.page_size = PAGE_SIZE_2MB;
prop->dmmu.num_hops = MMU_ARCH_5_HOPS;
+ prop->dmmu.last_mask = LAST_MASK;
/* shifts and masks are the same in PMMU and DMMU */
memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
@@ -436,6 +437,7 @@ int goya_set_fixed_properties(struct hl_device *hdev)
prop->pmmu.end_addr = VA_HOST_SPACE_END;
prop->pmmu.page_size = PAGE_SIZE_4KB;
prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
+ prop->pmmu.last_mask = LAST_MASK;
/* PMMU and HPMMU are the same except of page size */
memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
@@ -473,6 +475,8 @@ int goya_set_fixed_properties(struct hl_device *hdev)
prop->clk_pll_index = HL_GOYA_MME_PLL;
+ prop->use_get_power_for_reset_history = true;
+
return 0;
}
@@ -735,6 +739,11 @@ static void goya_fetch_psoc_frequency(struct hl_device *hdev)
int rc;
if (hdev->asic_prop.fw_security_enabled) {
+ struct goya_device *goya = hdev->asic_specific;
+
+ if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
+ return;
+
rc = hl_fw_cpucp_pll_info_get(hdev, HL_GOYA_PCI_PLL,
pll_freq_arr);
@@ -778,9 +787,59 @@ static void goya_fetch_psoc_frequency(struct hl_device *hdev)
prop->psoc_pci_pll_div_factor = div_fctr;
}
+/*
+ * goya_set_frequency - set the frequency of the device
+ *
+ * @hdev: pointer to habanalabs device structure
+ * @freq: the new frequency value
+ *
+ * Change the frequency if needed. This function has no protection against
+ * concurrency, therefore it is assumed that the calling function has protected
+ * itself against the case of calling this function from multiple threads with
+ * different values
+ *
+ * Returns 0 if no change was done, otherwise returns 1
+ */
+int goya_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
+{
+ struct goya_device *goya = hdev->asic_specific;
+
+ if ((goya->pm_mng_profile == PM_MANUAL) ||
+ (goya->curr_pll_profile == freq))
+ return 0;
+
+ dev_dbg(hdev->dev, "Changing device frequency to %s\n",
+ freq == PLL_HIGH ? "high" : "low");
+
+ goya_set_pll_profile(hdev, freq);
+
+ goya->curr_pll_profile = freq;
+
+ return 1;
+}
+
+static void goya_set_freq_to_low_job(struct work_struct *work)
+{
+ struct goya_work_freq *goya_work = container_of(work,
+ struct goya_work_freq,
+ work_freq.work);
+ struct hl_device *hdev = goya_work->hdev;
+
+ mutex_lock(&hdev->fpriv_list_lock);
+
+ if (!hdev->is_compute_ctx_active)
+ goya_set_frequency(hdev, PLL_LOW);
+
+ mutex_unlock(&hdev->fpriv_list_lock);
+
+ schedule_delayed_work(&goya_work->work_freq,
+ usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
+}
+
int goya_late_init(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct goya_device *goya = hdev->asic_specific;
int rc;
goya_fetch_psoc_frequency(hdev);
@@ -829,6 +888,16 @@ int goya_late_init(struct hl_device *hdev)
return rc;
}
+ /* force setting to low frequency */
+ goya->curr_pll_profile = PLL_LOW;
+
+ goya->pm_mng_profile = PM_AUTO;
+
+ hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW);
+
+ schedule_delayed_work(&goya->goya_work->work_freq,
+ usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
+
return 0;
}
@@ -842,8 +911,11 @@ int goya_late_init(struct hl_device *hdev)
void goya_late_fini(struct hl_device *hdev)
{
const struct hwmon_channel_info **channel_info_arr;
+ struct goya_device *goya = hdev->asic_specific;
int i = 0;
+ cancel_delayed_work_sync(&goya->goya_work->work_freq);
+
if (!hdev->hl_chip_info->info)
return;
@@ -961,12 +1033,21 @@ static int goya_sw_init(struct hl_device *hdev)
spin_lock_init(&goya->hw_queues_lock);
hdev->supports_coresight = true;
- hdev->supports_soft_reset = true;
- hdev->allow_inference_soft_reset = true;
+ hdev->asic_prop.supports_soft_reset = true;
+ hdev->asic_prop.allow_inference_soft_reset = true;
hdev->supports_wait_for_multi_cs = false;
hdev->asic_funcs->set_pci_memory_regions(hdev);
+ goya->goya_work = kmalloc(sizeof(struct goya_work_freq), GFP_KERNEL);
+ if (!goya->goya_work) {
+ rc = -ENOMEM;
+ goto free_cpu_accessible_dma_pool;
+ }
+
+ goya->goya_work->hdev = hdev;
+ INIT_DELAYED_WORK(&goya->goya_work->work_freq, goya_set_freq_to_low_job);
+
return 0;
free_cpu_accessible_dma_pool:
@@ -1003,6 +1084,7 @@ static int goya_sw_fini(struct hl_device *hdev)
dma_pool_destroy(hdev->dma_pool);
+ kfree(goya->goya_work);
kfree(goya);
return 0;
@@ -2502,7 +2584,7 @@ static void goya_init_firmware_loader(struct hl_device *hdev)
struct fw_load_mgr *fw_loader = &hdev->fw_loader;
/* fill common fields */
- fw_loader->linux_loaded = false;
+ fw_loader->fw_comp_loaded = FW_TYPE_NONE;
fw_loader->boot_fit_img.image_name = GOYA_BOOT_FIT_FILE;
fw_loader->linux_img.image_name = GOYA_LINUX_FW_FILE;
fw_loader->cpu_timeout = GOYA_CPU_TIMEOUT_USEC;
@@ -2619,7 +2701,7 @@ int goya_mmu_init(struct hl_device *hdev)
(~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK));
hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
- VM_TYPE_USERPTR | VM_TYPE_PHYS_PACK);
+ MMU_OP_USERPTR | MMU_OP_PHYS_PACK);
WREG32(mmMMU_MMU_ENABLE, 1);
WREG32(mmMMU_SPI_MASK, 0xF);
@@ -4395,7 +4477,7 @@ static u64 goya_read_pte(struct hl_device *hdev, u64 addr)
{
struct goya_device *goya = hdev->asic_specific;
- if (hdev->hard_reset_pending)
+ if (hdev->reset_info.hard_reset_pending)
return U64_MAX;
return readq(hdev->pcie_bar[DDR_BAR_ID] +
@@ -4406,7 +4488,7 @@ static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val)
{
struct goya_device *goya = hdev->asic_specific;
- if (hdev->hard_reset_pending)
+ if (hdev->reset_info.hard_reset_pending)
return;
writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
@@ -4731,7 +4813,7 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
return rc;
}
-static int goya_soft_reset_late_init(struct hl_device *hdev)
+static int goya_non_hard_reset_late_init(struct hl_device *hdev)
{
/*
* Unmask all IRQs since some could have been received
@@ -4764,24 +4846,39 @@ static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type)
{
+ ktime_t zero_time = ktime_set(0, 0);
+
+ mutex_lock(&hdev->clk_throttling.lock);
+
switch (event_type) {
case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
- hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
+ hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
+ hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
+ hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
+ hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
dev_info_ratelimited(hdev->dev,
"Clock throttling due to power consumption\n");
break;
+
case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
- hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
+ hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
+ hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
dev_info_ratelimited(hdev->dev,
"Power envelop is safe, back to optimal clock\n");
break;
+
case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
- hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
+ hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
+ hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
+ hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
+ hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
dev_info_ratelimited(hdev->dev,
"Clock throttling due to overheating\n");
break;
+
case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
- hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
+ hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
+ hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
dev_info_ratelimited(hdev->dev,
"Thermal envelop is safe, back to optimal clock\n");
break;
@@ -4791,6 +4888,8 @@ static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type)
event_type);
break;
}
+
+ mutex_unlock(&hdev->clk_throttling.lock);
}
void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
@@ -4834,14 +4933,14 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
goya_print_irq_info(hdev, event_type, false);
if (hdev->hard_reset_on_fw_events)
- hl_device_reset(hdev, (HL_RESET_HARD |
- HL_RESET_FW_FATAL_ERR));
+ hl_device_reset(hdev, (HL_DRV_RESET_HARD |
+ HL_DRV_RESET_FW_FATAL_ERR));
break;
case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
goya_print_irq_info(hdev, event_type, false);
if (hdev->hard_reset_on_fw_events)
- hl_device_reset(hdev, HL_RESET_HARD);
+ hl_device_reset(hdev, HL_DRV_RESET_HARD);
break;
case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
@@ -4901,7 +5000,7 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
goya_print_irq_info(hdev, event_type, false);
goya_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
if (hdev->hard_reset_on_fw_events)
- hl_device_reset(hdev, HL_RESET_HARD);
+ hl_device_reset(hdev, HL_DRV_RESET_HARD);
else
hl_fw_unmask_irq(hdev, event_type);
break;
@@ -5209,7 +5308,7 @@ static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
int rc;
if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
- hdev->hard_reset_pending)
+ hdev->reset_info.hard_reset_pending)
return 0;
/* no need in L1 only invalidation in Goya */
@@ -5232,12 +5331,6 @@ static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
1000,
timeout_usec);
- if (rc) {
- dev_err_ratelimited(hdev->dev,
- "MMU cache invalidation timeout\n");
- hl_device_reset(hdev, HL_RESET_HARD);
- }
-
return rc;
}
@@ -5645,7 +5738,7 @@ static const struct hl_asic_funcs goya_funcs = {
.disable_clock_gating = goya_disable_clock_gating,
.debug_coresight = goya_debug_coresight,
.is_device_idle = goya_is_device_idle,
- .soft_reset_late_init = goya_soft_reset_late_init,
+ .non_hard_reset_late_init = goya_non_hard_reset_late_init,
.hw_queues_lock = goya_hw_queues_lock,
.hw_queues_unlock = goya_hw_queues_unlock,
.get_pci_id = goya_get_pci_id,