diff options
Diffstat (limited to 'drivers/misc/habanalabs/goya/goya.c')
-rw-r--r-- | drivers/misc/habanalabs/goya/goya.c | 251 |
1 files changed, 138 insertions, 113 deletions
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index e0ad2a269779..755e08cf2ecc 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -87,6 +87,7 @@ #define GOYA_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) #define GOYA_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */ #define GOYA_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ +#define GOYA_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */ #define GOYA_QMAN0_FENCE_VAL 0xD169B243 @@ -354,7 +355,7 @@ static int goya_mmu_set_dram_default_page(struct hl_device *hdev); static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev); static void goya_mmu_prepare(struct hl_device *hdev, u32 asid); -int goya_get_fixed_properties(struct hl_device *hdev) +int goya_set_fixed_properties(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; int i; @@ -460,8 +461,10 @@ int goya_get_fixed_properties(struct hl_device *hdev) for (i = 0 ; i < HL_MAX_DCORES ; i++) prop->first_available_cq[i] = USHRT_MAX; - prop->fw_security_status_valid = false; + prop->fw_cpu_boot_dev_sts0_valid = false; + prop->fw_cpu_boot_dev_sts1_valid = false; prop->hard_reset_done_by_fw = false; + prop->gic_interrupts_enable = true; return 0; } @@ -531,10 +534,8 @@ static int goya_init_iatu(struct hl_device *hdev) struct hl_outbound_pci_region outbound_region; int rc; - if (hdev->asic_prop.iatu_done_by_fw) { - hdev->asic_funcs->set_dma_mask_from_fw(hdev); + if (hdev->asic_prop.iatu_done_by_fw) return 0; - } /* Inbound Region 0 - Bar 0 - Point to SRAM and CFG */ inbound_region.mode = PCI_BAR_MATCH_MODE; @@ -586,7 +587,7 @@ static int goya_early_init(struct hl_device *hdev) u32 fw_boot_status, val; int rc; - rc = goya_get_fixed_properties(hdev); + rc = goya_set_fixed_properties(hdev); if (rc) { dev_err(hdev->dev, "Failed to get fixed properties\n"); return rc; @@ -618,7 +619,7 @@ static int goya_early_init(struct hl_device *hdev) prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID); /* If FW security is enabled at this point it means no access to ELBI */ - if (!hdev->asic_prop.fw_security_disabled) { + if (hdev->asic_prop.fw_security_enabled) { hdev->asic_prop.iatu_done_by_fw = true; goto pci_init; } @@ -642,8 +643,10 @@ pci_init: * version to determine whether we run with a security-enabled firmware */ rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS, - mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0, - GOYA_BOOT_FIT_REQ_TIMEOUT_USEC); + mmCPU_BOOT_DEV_STS0, + mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0, + mmCPU_BOOT_ERR1, + GOYA_BOOT_FIT_REQ_TIMEOUT_USEC); if (rc) { if (hdev->reset_on_preboot_fail) hdev->asic_funcs->hw_fini(hdev, true); @@ -723,7 +726,15 @@ static void goya_fetch_psoc_frequency(struct hl_device *hdev) u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq; int rc; - if (hdev->asic_prop.fw_security_disabled) { + if (hdev->asic_prop.fw_security_enabled) { + rc = hl_fw_cpucp_pll_info_get(hdev, HL_GOYA_PCI_PLL, + pll_freq_arr); + + if (rc) + return; + + freq = pll_freq_arr[1]; + } else { div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1); div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1); nr = RREG32(mmPSOC_PCI_PLL_NR); @@ -750,14 +761,6 @@ static void goya_fetch_psoc_frequency(struct hl_device *hdev) div_sel); freq = 0; } - } else { - rc = hl_fw_cpucp_pll_info_get(hdev, HL_GOYA_PCI_PLL, - pll_freq_arr); - - if (rc) - return; - - freq = pll_freq_arr[1]; } prop->psoc_timestamp_frequency = freq; @@ -849,6 +852,39 @@ void goya_late_fini(struct hl_device *hdev) hdev->hl_chip_info->info = NULL; } +static void goya_set_pci_memory_regions(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct pci_mem_region *region; + + /* CFG */ + region = &hdev->pci_mem_region[PCI_REGION_CFG]; + region->region_base = CFG_BASE; + region->region_size = CFG_SIZE; + region->offset_in_bar = CFG_BASE - SRAM_BASE_ADDR; + region->bar_size = CFG_BAR_SIZE; + region->bar_id = SRAM_CFG_BAR_ID; + region->used = 1; + + /* SRAM */ + region = &hdev->pci_mem_region[PCI_REGION_SRAM]; + region->region_base = SRAM_BASE_ADDR; + region->region_size = SRAM_SIZE; + region->offset_in_bar = 0; + region->bar_size = CFG_BAR_SIZE; + region->bar_id = SRAM_CFG_BAR_ID; + region->used = 1; + + /* DRAM */ + region = &hdev->pci_mem_region[PCI_REGION_DRAM]; + region->region_base = DRAM_PHYS_BASE; + region->region_size = hdev->asic_prop.dram_size; + region->offset_in_bar = 0; + region->bar_size = prop->dram_pci_bar_size; + region->bar_id = DDR_BAR_ID; + region->used = 1; +} + /* * goya_sw_init - Goya software initialization code * @@ -918,6 +954,9 @@ static int goya_sw_init(struct hl_device *hdev) spin_lock_init(&goya->hw_queues_lock); hdev->supports_coresight = true; hdev->supports_soft_reset = true; + hdev->allow_external_soft_reset = true; + + goya_set_pci_memory_regions(hdev); return 0; @@ -1263,8 +1302,11 @@ int goya_init_cpu_queues(struct hl_device *hdev) } /* update FW application security bits */ - if (prop->fw_security_status_valid) - prop->fw_app_security_map = RREG32(mmCPU_BOOT_DEV_STS0); + if (prop->fw_cpu_boot_dev_sts0_valid) + prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0); + + if (prop->fw_cpu_boot_dev_sts1_valid) + prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1); goya->hw_cap_initialized |= HW_CAP_CPU_Q; return 0; @@ -2402,47 +2444,67 @@ static int goya_load_boot_fit_to_device(struct hl_device *hdev) return hl_fw_load_fw_to_device(hdev, GOYA_BOOT_FIT_FILE, dst, 0, 0); } -/* - * FW component passes an offset from SRAM_BASE_ADDR in SCRATCHPAD_xx. - * The version string should be located by that offset. - */ -static int goya_read_device_fw_version(struct hl_device *hdev, - enum hl_fw_component fwc) -{ - const char *name; - u32 ver_off; - char *dest; - - switch (fwc) { - case FW_COMP_UBOOT: - ver_off = RREG32(mmUBOOT_VER_OFFSET); - dest = hdev->asic_prop.uboot_ver; - name = "U-Boot"; - break; - case FW_COMP_PREBOOT: - ver_off = RREG32(mmPREBOOT_VER_OFFSET); - dest = hdev->asic_prop.preboot_ver; - name = "Preboot"; - break; - default: - dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc); - return -EIO; - } +static void goya_init_dynamic_firmware_loader(struct hl_device *hdev) +{ + struct dynamic_fw_load_mgr *dynamic_loader; + struct cpu_dyn_regs *dyn_regs; - ver_off &= ~((u32)SRAM_BASE_ADDR); + dynamic_loader = &hdev->fw_loader.dynamic_loader; - if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) { - memcpy_fromio(dest, hdev->pcie_bar[SRAM_CFG_BAR_ID] + ver_off, - VERSION_MAX_LEN); - } else { - dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n", - name, ver_off); - strcpy(dest, "unavailable"); + /* + * here we update initial values for few specific dynamic regs (as + * before reading the first descriptor from FW those value has to be + * hard-coded) in later stages of the protocol those values will be + * updated automatically by reading the FW descriptor so data there + * will always be up-to-date + */ + dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs; + dyn_regs->kmd_msg_to_cpu = + cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU); + dyn_regs->cpu_cmd_status_to_host = + cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST); - return -EIO; - } + dynamic_loader->wait_for_bl_timeout = GOYA_WAIT_FOR_BL_TIMEOUT_USEC; +} - return 0; +static void goya_init_static_firmware_loader(struct hl_device *hdev) +{ + struct static_fw_load_mgr *static_loader; + + static_loader = &hdev->fw_loader.static_loader; + + static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN; + static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN; + static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU; + static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST; + static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS; + static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0; + static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1; + static_loader->boot_err0_reg = mmCPU_BOOT_ERR0; + static_loader->boot_err1_reg = mmCPU_BOOT_ERR1; + static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET; + static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET; + static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR)); +} + +static void goya_init_firmware_loader(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct fw_load_mgr *fw_loader = &hdev->fw_loader; + + /* fill common fields */ + fw_loader->boot_fit_img.image_name = GOYA_BOOT_FIT_FILE; + fw_loader->linux_img.image_name = GOYA_LINUX_FW_FILE; + fw_loader->cpu_timeout = GOYA_CPU_TIMEOUT_USEC; + fw_loader->boot_fit_timeout = GOYA_BOOT_FIT_REQ_TIMEOUT_USEC; + fw_loader->skip_bmc = false; + fw_loader->sram_bar_id = SRAM_CFG_BAR_ID; + fw_loader->dram_bar_id = DDR_BAR_ID; + + if (prop->dynamic_fw_load) + goya_init_dynamic_firmware_loader(hdev); + else + goya_init_static_firmware_loader(hdev); } static int goya_init_cpu(struct hl_device *hdev) @@ -2466,12 +2528,7 @@ static int goya_init_cpu(struct hl_device *hdev) return -EIO; } - rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS, - mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, - mmCPU_CMD_STATUS_TO_HOST, - mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0, - false, GOYA_CPU_TIMEOUT_USEC, - GOYA_BOOT_FIT_REQ_TIMEOUT_USEC); + rc = hl_fw_init_cpu(hdev); if (rc) return rc; @@ -2881,7 +2938,7 @@ void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id, *dma_handle = hdev->asic_prop.sram_base_address; - base = (void *) hdev->pcie_bar[SRAM_CFG_BAR_ID]; + base = (__force void *) hdev->pcie_bar[SRAM_CFG_BAR_ID]; switch (queue_id) { case GOYA_QUEUE_ID_MME: @@ -3270,6 +3327,7 @@ already_pinned: return 0; unpin_memory: + list_del(&userptr->job_node); hl_unpin_host_memory(hdev, userptr); free_userptr: kfree(userptr); @@ -5169,54 +5227,13 @@ static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, } static int goya_mmu_invalidate_cache_range(struct hl_device *hdev, - bool is_hard, u32 asid, u64 va, u64 size) + bool is_hard, u32 flags, + u32 asid, u64 va, u64 size) { - struct goya_device *goya = hdev->asic_specific; - u32 status, timeout_usec, inv_data, pi; - int rc; - - if (!(goya->hw_cap_initialized & HW_CAP_MMU) || - hdev->hard_reset_pending) - return 0; - - /* no need in L1 only invalidation in Goya */ - if (!is_hard) - return 0; - - if (hdev->pldm) - timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC; - else - timeout_usec = MMU_CONFIG_TIMEOUT_USEC; - - /* - * TODO: currently invalidate entire L0 & L1 as in regular hard - * invalidation. Need to apply invalidation of specific cache lines with - * mask of ASID & VA & size. - * Note that L1 with be flushed entirely in any case. + /* Treat as invalidate all because there is no range invalidation + * in Goya */ - - /* L0 & L1 invalidation */ - inv_data = RREG32(mmSTLB_CACHE_INV); - /* PI is 8 bit */ - pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF; - WREG32(mmSTLB_CACHE_INV, - (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi); - - rc = hl_poll_timeout( - hdev, - mmSTLB_INV_CONSUMER_INDEX, - status, - status == pi, - 1000, - timeout_usec); - - if (rc) { - dev_err_ratelimited(hdev->dev, - "MMU cache invalidation timeout\n"); - hl_device_reset(hdev, HL_RESET_HARD); - } - - return rc; + return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags); } int goya_send_heartbeat(struct hl_device *hdev) @@ -5239,7 +5256,9 @@ int goya_cpucp_info_get(struct hl_device *hdev) if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) return 0; - rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0); + rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, + mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0, + mmCPU_BOOT_ERR1); if (rc) return rc; @@ -5385,6 +5404,11 @@ static int goya_get_eeprom_data(struct hl_device *hdev, void *data, return hl_fw_get_eeprom_data(hdev, data, max_size); } +static void goya_cpu_init_scrambler_dram(struct hl_device *hdev) +{ + +} + static int goya_ctx_init(struct hl_ctx *ctx) { if (ctx->asid != HL_KERNEL_ASID_ID) @@ -5565,7 +5589,6 @@ static const struct hl_asic_funcs goya_funcs = { .ctx_fini = goya_ctx_fini, .get_clk_rate = goya_get_clk_rate, .get_queue_id_for_cq = goya_get_queue_id_for_cq, - .read_device_fw_version = goya_read_device_fw_version, .load_firmware_to_device = goya_load_firmware_to_device, .load_boot_fit_to_device = goya_load_boot_fit_to_device, .get_signal_cb_size = goya_get_signal_cb_size, @@ -5584,7 +5607,9 @@ static const struct hl_asic_funcs goya_funcs = { .get_hw_block_id = goya_get_hw_block_id, .hw_block_mmap = goya_block_mmap, .enable_events_from_fw = goya_enable_events_from_fw, - .map_pll_idx_to_fw_idx = goya_map_pll_idx_to_fw_idx + .map_pll_idx_to_fw_idx = goya_map_pll_idx_to_fw_idx, + .init_firmware_loader = goya_init_firmware_loader, + .init_cpu_scrambler_dram = goya_cpu_init_scrambler_dram }; /* |