diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-03-28 22:27:35 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-03-28 22:27:35 +0300 |
commit | 02e2af20f4f9f2aa0c84e9a30a35c02f0fbb7daa (patch) | |
tree | d126449a7d2ea2270627183f7cebd726fbe56a9d /drivers/misc/habanalabs/gaudi/gaudi.c | |
parent | ff61bc81b3feebcef4d0431a92e2e40e8d4fe8b3 (diff) | |
parent | 37fd83916da2e4cae03d350015c82a67b1b334c4 (diff) | |
download | linux-02e2af20f4f9f2aa0c84e9a30a35c02f0fbb7daa.tar.xz |
Merge tag 'char-misc-5.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc
Pull char/misc and other driver updates from Greg KH:
"Here is the big set of char/misc and other small driver subsystem
updates for 5.18-rc1.
Included in here are merges from driver subsystems which contain:
- iio driver updates and new drivers
- fsi driver updates
- fpga driver updates
- habanalabs driver updates and support for new hardware
- soundwire driver updates and new drivers
- phy driver updates and new drivers
- coresight driver updates
- icc driver updates
Individual changes include:
- mei driver updates
- interconnect driver updates
- new PECI driver subsystem added
- vmci driver updates
- lots of tiny misc/char driver updates
All of these have been in linux-next for a while with no reported
problems"
* tag 'char-misc-5.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc: (556 commits)
firmware: google: Properly state IOMEM dependency
kgdbts: fix return value of __setup handler
firmware: sysfb: fix platform-device leak in error path
firmware: stratix10-svc: add missing callback parameter on RSU
arm64: dts: qcom: add non-secure domain property to fastrpc nodes
misc: fastrpc: Add dma handle implementation
misc: fastrpc: Add fdlist implementation
misc: fastrpc: Add helper function to get list and page
misc: fastrpc: Add support to secure memory map
dt-bindings: misc: add fastrpc domain vmid property
misc: fastrpc: check before loading process to the DSP
misc: fastrpc: add secure domain support
dt-bindings: misc: add property to support non-secure DSP
misc: fastrpc: Add support to get DSP capabilities
misc: fastrpc: add support for FASTRPC_IOCTL_MEM_MAP/UNMAP
misc: fastrpc: separate fastrpc device from channel context
dt-bindings: nvmem: brcm,nvram: add basic NVMEM cells
dt-bindings: nvmem: make "reg" property optional
nvmem: brcm_nvram: parse NVRAM content into NVMEM cells
nvmem: dt-bindings: Fix the error of dt-bindings check
...
Diffstat (limited to 'drivers/misc/habanalabs/gaudi/gaudi.c')
-rw-r--r-- | drivers/misc/habanalabs/gaudi/gaudi.c | 462 |
1 files changed, 161 insertions, 301 deletions
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 013c6da2e3ca..21c2b678ff72 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright 2016-2021 HabanaLabs, Ltd. + * Copyright 2016-2022 HabanaLabs, Ltd. * All Rights Reserved. */ @@ -458,7 +458,6 @@ struct ecc_info_extract_params { u64 block_address; u32 num_memories; bool derr; - bool disable_clock_gating; }; static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, @@ -614,6 +613,9 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) prop->pmmu.page_size = PAGE_SIZE_4KB; prop->pmmu.num_hops = MMU_ARCH_5_HOPS; prop->pmmu.last_mask = LAST_MASK; + /* TODO: will be duplicated until implementing per-MMU props */ + prop->pmmu.hop_table_size = prop->mmu_hop_table_size; + prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; /* PMMU and HPMMU are the same except of page size */ memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); @@ -667,6 +669,10 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) prop->use_get_power_for_reset_history = true; + prop->configurable_stop_on_err = true; + + prop->set_max_power_on_device_init = true; + return 0; } @@ -1636,7 +1642,7 @@ static int gaudi_late_init(struct hl_device *hdev) */ gaudi_mmu_prepare(hdev, 1); - hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST); + hl_fw_set_pll_profile(hdev); return 0; @@ -1896,7 +1902,6 @@ static int gaudi_sw_init(struct hl_device *hdev) goto free_cpu_accessible_dma_pool; spin_lock_init(&gaudi->hw_queues_lock); - mutex_init(&gaudi->clk_gate_mutex); hdev->supports_sync_stream = true; hdev->supports_coresight = true; @@ -1946,8 +1951,6 @@ static int gaudi_sw_fini(struct hl_device *hdev) dma_pool_destroy(hdev->dma_pool); - mutex_destroy(&gaudi->clk_gate_mutex); - kfree(gaudi); return 0; @@ -3738,76 +3741,8 @@ static void gaudi_tpc_stall(struct hl_device *hdev) WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); } -static void gaudi_set_clock_gating(struct hl_device *hdev) -{ - struct gaudi_device *gaudi = hdev->asic_specific; - u32 qman_offset; - bool enable; - int i; - - /* In case we are during debug session, don't enable the clock gate - * as it may interfere - */ - if (hdev->in_debug) - return; - - if (hdev->asic_prop.fw_security_enabled) - return; - - for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) { - enable = !!(hdev->clock_gating_mask & - (BIT_ULL(gaudi_dma_assignment[i]))); - - qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET; - WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, - enable ? QMAN_CGM1_PWR_GATE_EN : 0); - WREG32(mmDMA0_QM_CGM_CFG + qman_offset, - enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0); - } - - for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) { - enable = !!(hdev->clock_gating_mask & - (BIT_ULL(gaudi_dma_assignment[i]))); - - /* GC sends work to DMA engine through Upper CP in DMA5 so - * we need to not enable clock gating in that DMA - */ - if (i == GAUDI_HBM_DMA_4) - enable = 0; - - qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET; - WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, - enable ? QMAN_CGM1_PWR_GATE_EN : 0); - WREG32(mmDMA0_QM_CGM_CFG + qman_offset, - enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0); - } - - enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0))); - WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0); - WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0); - - enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2))); - WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0); - WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0); - - for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { - enable = !!(hdev->clock_gating_mask & - (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i))); - - WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, - enable ? QMAN_CGM1_PWR_GATE_EN : 0); - WREG32(mmTPC0_QM_CGM_CFG + qman_offset, - enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0); - - qman_offset += TPC_QMAN_OFFSET; - } - - gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE; -} - static void gaudi_disable_clock_gating(struct hl_device *hdev) { - struct gaudi_device *gaudi = hdev->asic_specific; u32 qman_offset; int i; @@ -3832,8 +3767,6 @@ static void gaudi_disable_clock_gating(struct hl_device *hdev) qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG); } - - gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE); } static void gaudi_enable_timestamp(struct hl_device *hdev) @@ -3876,8 +3809,6 @@ static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_ gaudi_stop_hbm_dma_qmans(hdev); gaudi_stop_pci_dma_qmans(hdev); - hdev->asic_funcs->disable_clock_gating(hdev); - msleep(wait_timeout_ms); gaudi_pci_dma_stall(hdev); @@ -3931,7 +3862,7 @@ static int gaudi_mmu_init(struct hl_device *hdev) /* mem cache invalidation */ WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1); - hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0); + hl_mmu_invalidate_cache(hdev, true, 0); WREG32(mmMMU_UP_MMU_ENABLE, 1); WREG32(mmMMU_UP_SPI_MASK, 0xF); @@ -4203,10 +4134,8 @@ static int gaudi_hw_init(struct hl_device *hdev) /* In case the clock gating was enabled in preboot we need to disable * it here before touching the MME/TPC registers. - * There is no need to take clk gating mutex because when this function - * runs, no other relevant code can run */ - hdev->asic_funcs->disable_clock_gating(hdev); + gaudi_disable_clock_gating(hdev); /* SRAM scrambler must be initialized after CPU is running from HBM */ gaudi_init_scrambler_sram(hdev); @@ -4232,8 +4161,6 @@ static int gaudi_hw_init(struct hl_device *hdev) gaudi_init_nic_qmans(hdev); - hdev->asic_funcs->set_clock_gating(hdev); - gaudi_enable_timestamp(hdev); /* MSI must be enabled before CPU queues and NIC are initialized */ @@ -4400,14 +4327,11 @@ skip_reset: status); if (gaudi) { - gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | - HW_CAP_HBM | HW_CAP_PCI_DMA | - HW_CAP_MME | HW_CAP_TPC_MASK | - HW_CAP_HBM_DMA | HW_CAP_PLL | - HW_CAP_NIC_MASK | HW_CAP_MMU | - HW_CAP_SRAM_SCRAMBLER | - HW_CAP_HBM_SCRAMBLER | - HW_CAP_CLK_GATE); + gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM | + HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK | + HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK | + HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER | + HW_CAP_HBM_SCRAMBLER); memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat)); @@ -4884,7 +4808,6 @@ static int gaudi_hbm_scrubbing(struct hl_device *hdev) static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size) { struct asic_fixed_properties *prop = &hdev->asic_prop; - struct gaudi_device *gaudi = hdev->asic_specific; int rc = 0; u64 val = 0; @@ -4919,17 +4842,11 @@ static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size) return rc; } - mutex_lock(&gaudi->clk_gate_mutex); - hdev->asic_funcs->disable_clock_gating(hdev); - /* Scrub HBM using all DMA channels in parallel */ rc = gaudi_hbm_scrubbing(hdev); if (rc) dev_err(hdev->dev, "Failed to clear HBM in mem scrub all\n"); - - hdev->asic_funcs->set_clock_gating(hdev); - mutex_unlock(&gaudi->clk_gate_mutex); } return rc; @@ -6188,7 +6105,6 @@ static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, bool user_address, u32 *val) { struct asic_fixed_properties *prop = &hdev->asic_prop; - struct gaudi_device *gaudi = hdev->asic_specific; u64 hbm_bar_addr, host_phys_end; int rc = 0; @@ -6196,38 +6112,31 @@ static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { - if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && - (hdev->clock_gating_mask & - GAUDI_CLK_GATE_DEBUGFS_MASK)) { + *val = RREG32(addr - CFG_BASE); - dev_err_ratelimited(hdev->dev, - "Can't read register - clock gating is enabled!\n"); - rc = -EFAULT; - } else { - *val = RREG32(addr - CFG_BASE); - } + } else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) { + + *val = readl(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR)); - } else if ((addr >= SRAM_BASE_ADDR) && - (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) { - *val = readl(hdev->pcie_bar[SRAM_BAR_ID] + - (addr - SRAM_BASE_ADDR)); } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) { - u64 bar_base_addr = DRAM_PHYS_BASE + - (addr & ~(prop->dram_pci_bar_size - 0x1ull)); + + u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull)); hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr); - if (hbm_bar_addr != U64_MAX) { - *val = readl(hdev->pcie_bar[HBM_BAR_ID] + - (addr - bar_base_addr)); - hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, - hbm_bar_addr); + if (hbm_bar_addr != U64_MAX) { + *val = readl(hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr)); + hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr); } + if (hbm_bar_addr == U64_MAX) rc = -EIO; + } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && user_address && !iommu_present(&pci_bus_type)) { + *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE); + } else { rc = -EFAULT; } @@ -6239,7 +6148,6 @@ static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, bool user_address, u32 val) { struct asic_fixed_properties *prop = &hdev->asic_prop; - struct gaudi_device *gaudi = hdev->asic_specific; u64 hbm_bar_addr, host_phys_end; int rc = 0; @@ -6247,38 +6155,31 @@ static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { - if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && - (hdev->clock_gating_mask & - GAUDI_CLK_GATE_DEBUGFS_MASK)) { + WREG32(addr - CFG_BASE, val); - dev_err_ratelimited(hdev->dev, - "Can't write register - clock gating is enabled!\n"); - rc = -EFAULT; - } else { - WREG32(addr - CFG_BASE, val); - } + } else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) { + + writel(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR)); - } else if ((addr >= SRAM_BASE_ADDR) && - (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) { - writel(val, hdev->pcie_bar[SRAM_BAR_ID] + - (addr - SRAM_BASE_ADDR)); } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) { - u64 bar_base_addr = DRAM_PHYS_BASE + - (addr & ~(prop->dram_pci_bar_size - 0x1ull)); + + u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull)); hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr); - if (hbm_bar_addr != U64_MAX) { - writel(val, hdev->pcie_bar[HBM_BAR_ID] + - (addr - bar_base_addr)); - hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, - hbm_bar_addr); + if (hbm_bar_addr != U64_MAX) { + writel(val, hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr)); + hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr); } + if (hbm_bar_addr == U64_MAX) rc = -EIO; + } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && user_address && !iommu_present(&pci_bus_type)) { + *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val; + } else { rc = -EFAULT; } @@ -6290,7 +6191,6 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, bool user_address, u64 *val) { struct asic_fixed_properties *prop = &hdev->asic_prop; - struct gaudi_device *gaudi = hdev->asic_specific; u64 hbm_bar_addr, host_phys_end; int rc = 0; @@ -6298,42 +6198,35 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { - if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && - (hdev->clock_gating_mask & - GAUDI_CLK_GATE_DEBUGFS_MASK)) { - - dev_err_ratelimited(hdev->dev, - "Can't read register - clock gating is enabled!\n"); - rc = -EFAULT; - } else { - u32 val_l = RREG32(addr - CFG_BASE); - u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE); + u32 val_l = RREG32(addr - CFG_BASE); + u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE); - *val = (((u64) val_h) << 32) | val_l; - } + *val = (((u64) val_h) << 32) | val_l; } else if ((addr >= SRAM_BASE_ADDR) && - (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) { - *val = readq(hdev->pcie_bar[SRAM_BAR_ID] + - (addr - SRAM_BASE_ADDR)); - } else if (addr <= - DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) { - u64 bar_base_addr = DRAM_PHYS_BASE + - (addr & ~(prop->dram_pci_bar_size - 0x1ull)); + (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) { + + *val = readq(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR)); + + } else if (addr <= DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) { + + u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull)); hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr); - if (hbm_bar_addr != U64_MAX) { - *val = readq(hdev->pcie_bar[HBM_BAR_ID] + - (addr - bar_base_addr)); - hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, - hbm_bar_addr); + if (hbm_bar_addr != U64_MAX) { + *val = readq(hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr)); + hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr); } + if (hbm_bar_addr == U64_MAX) rc = -EIO; + } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && user_address && !iommu_present(&pci_bus_type)) { + *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE); + } else { rc = -EFAULT; } @@ -6345,7 +6238,6 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, bool user_address, u64 val) { struct asic_fixed_properties *prop = &hdev->asic_prop; - struct gaudi_device *gaudi = hdev->asic_specific; u64 hbm_bar_addr, host_phys_end; int rc = 0; @@ -6353,41 +6245,33 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { - if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && - (hdev->clock_gating_mask & - GAUDI_CLK_GATE_DEBUGFS_MASK)) { - - dev_err_ratelimited(hdev->dev, - "Can't write register - clock gating is enabled!\n"); - rc = -EFAULT; - } else { - WREG32(addr - CFG_BASE, lower_32_bits(val)); - WREG32(addr + sizeof(u32) - CFG_BASE, - upper_32_bits(val)); - } + WREG32(addr - CFG_BASE, lower_32_bits(val)); + WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val)); } else if ((addr >= SRAM_BASE_ADDR) && - (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) { - writeq(val, hdev->pcie_bar[SRAM_BAR_ID] + - (addr - SRAM_BASE_ADDR)); - } else if (addr <= - DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) { - u64 bar_base_addr = DRAM_PHYS_BASE + - (addr & ~(prop->dram_pci_bar_size - 0x1ull)); + (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) { + + writeq(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR)); + + } else if (addr <= DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) { + + u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull)); hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr); - if (hbm_bar_addr != U64_MAX) { - writeq(val, hdev->pcie_bar[HBM_BAR_ID] + - (addr - bar_base_addr)); - hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, - hbm_bar_addr); + if (hbm_bar_addr != U64_MAX) { + writeq(val, hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr)); + hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr); } + if (hbm_bar_addr == U64_MAX) rc = -EIO; + } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && user_address && !iommu_present(&pci_bus_type)) { + *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val; + } else { rc = -EFAULT; } @@ -6446,7 +6330,6 @@ static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr) { u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma; - struct gaudi_device *gaudi = hdev->asic_specific; u32 qm_glbl_sts0, qm_cgm_sts; u64 dma_offset, qm_offset; dma_addr_t dma_addr; @@ -6462,10 +6345,6 @@ static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, if (!kernel_addr) return -ENOMEM; - mutex_lock(&gaudi->clk_gate_mutex); - - hdev->asic_funcs->disable_clock_gating(hdev); - hdev->asic_funcs->hw_queues_lock(hdev); dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; @@ -6550,10 +6429,6 @@ static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, out: hdev->asic_funcs->hw_queues_unlock(hdev); - hdev->asic_funcs->set_clock_gating(hdev); - - mutex_unlock(&gaudi->clk_gate_mutex); - hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr); @@ -6601,10 +6476,6 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid) return; } - mutex_lock(&gaudi->clk_gate_mutex); - - hdev->asic_funcs->disable_clock_gating(hdev); - gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid); gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid); gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid); @@ -6882,10 +6753,6 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid) gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid); gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid); - - hdev->asic_funcs->set_clock_gating(hdev); - - mutex_unlock(&gaudi->clk_gate_mutex); } static int gaudi_send_job_on_qman0(struct hl_device *hdev, @@ -7266,10 +7133,8 @@ static int gaudi_extract_ecc_info(struct hl_device *hdev, struct ecc_info_extract_params *params, u64 *ecc_address, u64 *ecc_syndrom, u8 *memory_wrapper_idx) { - struct gaudi_device *gaudi = hdev->asic_specific; u32 i, num_mem_regs, reg, err_bit; u64 err_addr, err_word = 0; - int rc = 0; num_mem_regs = params->num_memories / 32 + ((params->num_memories % 32) ? 1 : 0); @@ -7282,11 +7147,6 @@ static int gaudi_extract_ecc_info(struct hl_device *hdev, else err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET; - if (params->disable_clock_gating) { - mutex_lock(&gaudi->clk_gate_mutex); - hdev->asic_funcs->disable_clock_gating(hdev); - } - /* Set invalid wrapper index */ *memory_wrapper_idx = 0xFF; @@ -7303,8 +7163,7 @@ static int gaudi_extract_ecc_info(struct hl_device *hdev, if (*memory_wrapper_idx == 0xFF) { dev_err(hdev->dev, "ECC error information cannot be found\n"); - rc = -EINVAL; - goto enable_clk_gate; + return -EINVAL; } WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET, @@ -7324,14 +7183,7 @@ static int gaudi_extract_ecc_info(struct hl_device *hdev, WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg); -enable_clk_gate: - if (params->disable_clock_gating) { - hdev->asic_funcs->set_clock_gating(hdev); - - mutex_unlock(&gaudi->clk_gate_mutex); - } - - return rc; + return 0; } /* @@ -7589,7 +7441,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; params.num_memories = 90; params.derr = false; - params.disable_clock_gating = true; extract_info_from_fw = false; break; case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: @@ -7598,7 +7449,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; params.num_memories = 90; params.derr = true; - params.disable_clock_gating = true; extract_info_from_fw = false; break; case GAUDI_EVENT_MME0_ACC_SERR: @@ -7609,7 +7459,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; params.num_memories = 128; params.derr = false; - params.disable_clock_gating = true; extract_info_from_fw = false; break; case GAUDI_EVENT_MME0_ACC_DERR: @@ -7620,7 +7469,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; params.num_memories = 128; params.derr = true; - params.disable_clock_gating = true; extract_info_from_fw = false; break; case GAUDI_EVENT_MME0_SBAB_SERR: @@ -7632,7 +7480,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; params.num_memories = 33; params.derr = false; - params.disable_clock_gating = true; extract_info_from_fw = false; break; case GAUDI_EVENT_MME0_SBAB_DERR: @@ -7644,7 +7491,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; params.num_memories = 33; params.derr = true; - params.disable_clock_gating = true; extract_info_from_fw = false; break; default: @@ -7819,6 +7665,48 @@ static void gaudi_print_fw_alive_info(struct hl_device *hdev, fw_alive->thread_id, fw_alive->uptime_seconds); } +static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type, + void *data) +{ + char desc[64] = "", *type; + struct eq_nic_sei_event *eq_nic_sei = data; + u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0; + + switch (eq_nic_sei->axi_error_cause) { + case RXB: + type = "RXB"; + break; + case RXE: + type = "RXE"; + break; + case TXS: + type = "TXS"; + break; + case TXE: + type = "TXE"; + break; + case QPC_RESP: + type = "QPC_RESP"; + break; + case NON_AXI_ERR: + type = "NON_AXI_ERR"; + break; + case TMR: + type = "TMR"; + break; + default: + dev_err(hdev->dev, "unknown NIC AXI cause %d\n", + eq_nic_sei->axi_error_cause); + type = "N/A"; + break; + } + + snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type, + eq_nic_sei->id); + dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", + event_type, desc); +} + static int gaudi_non_hard_reset_late_init(struct hl_device *hdev) { /* GAUDI doesn't support any reset except hard-reset */ @@ -7966,19 +7854,9 @@ static int gaudi_hbm_event_to_dev(u16 hbm_event_type) static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id, char *interrupt_name) { - struct gaudi_device *gaudi = hdev->asic_specific; u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i; bool soft_reset_required = false; - /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock - * gating, and thus cannot be done in CPU-CP and should be done instead - * by the driver. - */ - - mutex_lock(&gaudi->clk_gate_mutex); - - hdev->asic_funcs->disable_clock_gating(hdev); - tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) & TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK; @@ -7996,10 +7874,6 @@ static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id, /* Clear interrupts */ WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0); - hdev->asic_funcs->set_clock_gating(hdev); - - mutex_unlock(&gaudi->clk_gate_mutex); - return soft_reset_required; } @@ -8066,6 +7940,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) { struct gaudi_device *gaudi = hdev->asic_specific; + u64 data = le64_to_cpu(eq_entry->data[0]); u32 ctl = le32_to_cpu(eq_entry->hdr.ctl); u32 fw_fatal_err_flag = 0; u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) @@ -8102,6 +7977,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, case GAUDI_EVENT_PSOC_MEM_DERR: case GAUDI_EVENT_PSOC_CORESIGHT_DERR: case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR: + case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR: case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR: case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR: case GAUDI_EVENT_MMU_DERR: @@ -8202,6 +8078,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, case GAUDI_EVENT_PSOC_MEM_SERR: case GAUDI_EVENT_PSOC_CORESIGHT_SERR: case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR: + case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR: case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR: case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR: fallthrough; @@ -8263,6 +8140,11 @@ static void gaudi_handle_eqe(struct hl_device *hdev, hl_fw_unmask_irq(hdev, event_type); break; + case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4: + gaudi_print_nic_axi_irq_info(hdev, event_type, &data); + hl_fw_unmask_irq(hdev, event_type); + break; + case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3: gaudi_print_irq_info(hdev, event_type, false); gaudi_print_sm_sei_info(hdev, event_type, @@ -8274,6 +8156,9 @@ static void gaudi_handle_eqe(struct hl_device *hdev, hl_fw_unmask_irq(hdev, event_type); break; + case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1: + break; + case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E: gaudi_print_clk_change_info(hdev, event_type); hl_fw_unmask_irq(hdev, event_type); @@ -8314,7 +8199,7 @@ reset_device: | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag); else if (hdev->hard_reset_on_fw_events) - hl_device_reset(hdev, HL_DRV_RESET_HARD | fw_fatal_err_flag); + hl_device_reset(hdev, HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag); else hl_fw_unmask_irq(hdev, event_type); } @@ -8461,10 +8346,6 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u64 offset; int i, dma_id, port; - mutex_lock(&gaudi->clk_gate_mutex); - - hdev->asic_funcs->disable_clock_gating(hdev); - if (s) seq_puts(s, "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n" @@ -8585,10 +8466,6 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (s) seq_puts(s, "\n"); - hdev->asic_funcs->set_clock_gating(hdev); - - mutex_unlock(&gaudi->clk_gate_mutex); - return is_idle; } @@ -8628,10 +8505,8 @@ static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data, * this function should be used only during initialization and/or after reset, * when there are no active users. */ -static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, - u32 tpc_id) +static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id) { - struct gaudi_device *gaudi = hdev->asic_specific; u64 kernel_timeout; u32 status, offset; int rc; @@ -8643,10 +8518,6 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, else kernel_timeout = HL_DEVICE_TIMEOUT_USEC; - mutex_lock(&gaudi->clk_gate_mutex); - - hdev->asic_funcs->disable_clock_gating(hdev); - WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset, lower_32_bits(tpc_kernel)); WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset, @@ -8686,8 +8557,6 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, dev_err(hdev->dev, "Timeout while waiting for TPC%d icache prefetch\n", tpc_id); - hdev->asic_funcs->set_clock_gating(hdev); - mutex_unlock(&gaudi->clk_gate_mutex); return -EIO; } @@ -8711,8 +8580,6 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, dev_err(hdev->dev, "Timeout while waiting for TPC%d vector pipe\n", tpc_id); - hdev->asic_funcs->set_clock_gating(hdev); - mutex_unlock(&gaudi->clk_gate_mutex); return -EIO; } @@ -8724,9 +8591,6 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, 1000, kernel_timeout); - hdev->asic_funcs->set_clock_gating(hdev); - mutex_unlock(&gaudi->clk_gate_mutex); - if (rc) { dev_err(hdev->dev, "Timeout while waiting for TPC%d kernel to execute\n", @@ -8791,7 +8655,7 @@ static int gaudi_internal_cb_pool_init(struct hl_device *hdev, hdev->internal_cb_pool_dma_addr, HOST_SPACE_INTERNAL_CB_SZ); - hdev->asic_funcs->mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); + hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); mutex_unlock(&ctx->mmu_lock); if (rc) @@ -8826,7 +8690,7 @@ static void gaudi_internal_cb_pool_fini(struct hl_device *hdev, HOST_SPACE_INTERNAL_CB_SZ); hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); - hdev->asic_funcs->mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); + hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); mutex_unlock(&ctx->mmu_lock); gen_pool_destroy(hdev->internal_cb_pool); @@ -9204,14 +9068,7 @@ static void gaudi_reset_sob(struct hl_device *hdev, void *data) static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev) { - if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) == - HL_POWER9_HOST_MAGIC) { - hdev->power9_64bit_dma_enable = 1; - hdev->dma_mask = 64; - } else { - hdev->power9_64bit_dma_enable = 0; - hdev->dma_mask = 48; - } + hdev->dma_mask = 48; } static u64 gaudi_get_device_time(struct hl_device *hdev) @@ -9293,23 +9150,15 @@ static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev, struct hl_sync_to_engine_map *map) { struct hl_state_dump_specs *sds = &hdev->state_dump_specs; - struct gaudi_device *gaudi = hdev->asic_specific; int i, j, rc; u32 reg_value; /* Iterate over TPC engines */ for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) { - /* TPC registered must be accessed with clock gating disabled */ - mutex_lock(&gaudi->clk_gate_mutex); - hdev->asic_funcs->disable_clock_gating(hdev); reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] + sds->props[SP_NEXT_TPC] * i); - /* We can reenable clock_gating */ - hdev->asic_funcs->set_clock_gating(hdev); - mutex_unlock(&gaudi->clk_gate_mutex); - rc = gaudi_add_sync_to_engine_map_entry(map, reg_value, ENGINE_TPC, i); if (rc) @@ -9319,20 +9168,11 @@ static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev, /* Iterate over MME engines */ for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) { for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) { - /* MME registered must be accessed with clock gating - * disabled - */ - mutex_lock(&gaudi->clk_gate_mutex); - hdev->asic_funcs->disable_clock_gating(hdev); reg_value = RREG32(sds->props[SP_MME_CFG_SO] + sds->props[SP_NEXT_MME] * i + j * sizeof(u32)); - /* We can reenable clock_gating */ - hdev->asic_funcs->set_clock_gating(hdev); - mutex_unlock(&gaudi->clk_gate_mutex); - rc = gaudi_add_sync_to_engine_map_entry( map, reg_value, ENGINE_MME, i * sds->props[SP_SUB_MME_ENG_NUM] + j); @@ -9537,6 +9377,29 @@ static u32 *gaudi_get_stream_master_qid_arr(void) return gaudi_stream_master; } +static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + struct cpucp_info *cpucp_info; + + cpucp_info = &hdev->asic_prop.cpucp_info; + + return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version)); +} + +static DEVICE_ATTR_RO(infineon_ver); + +static struct attribute *gaudi_vrm_dev_attrs[] = { + &dev_attr_infineon_ver.attr, +}; + +static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp, + struct attribute_group *dev_vrm_attr_grp) +{ + hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp); + dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs; +} + static const struct hl_asic_funcs gaudi_funcs = { .early_init = gaudi_early_init, .early_fini = gaudi_early_fini, @@ -9574,17 +9437,14 @@ static const struct hl_asic_funcs gaudi_funcs = { .debugfs_read64 = gaudi_debugfs_read64, .debugfs_write64 = gaudi_debugfs_write64, .debugfs_read_dma = gaudi_debugfs_read_dma, - .add_device_attr = hl_add_device_attr, + .add_device_attr = gaudi_add_device_attr, .handle_eqe = gaudi_handle_eqe, - .set_pll_profile = hl_set_pll_profile, .get_events_stat = gaudi_get_events_stat, .read_pte = gaudi_read_pte, .write_pte = gaudi_write_pte, .mmu_invalidate_cache = gaudi_mmu_invalidate_cache, .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range, .send_heartbeat = gaudi_send_heartbeat, - .set_clock_gating = gaudi_set_clock_gating, - .disable_clock_gating = gaudi_disable_clock_gating, .debug_coresight = gaudi_debug_coresight, .is_device_idle = gaudi_is_device_idle, .non_hard_reset_late_init = gaudi_non_hard_reset_late_init, @@ -9600,7 +9460,6 @@ static const struct hl_asic_funcs gaudi_funcs = { .halt_coresight = gaudi_halt_coresight, .ctx_init = gaudi_ctx_init, .ctx_fini = gaudi_ctx_fini, - .get_clk_rate = hl_get_clk_rate, .get_queue_id_for_cq = gaudi_get_queue_id_for_cq, .load_firmware_to_device = gaudi_load_firmware_to_device, .load_boot_fit_to_device = gaudi_load_boot_fit_to_device, @@ -9626,7 +9485,8 @@ static const struct hl_asic_funcs gaudi_funcs = { .state_dump_init = gaudi_state_dump_init, .get_sob_addr = gaudi_get_sob_addr, .set_pci_memory_regions = gaudi_set_pci_memory_regions, - .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr + .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr, + .is_valid_dram_page_size = NULL }; /** |