From b19dc67aa8c008661b10b1c2372a29f403bf5dac Mon Sep 17 00:00:00 2001 From: Moti Haimovski Date: Wed, 18 Nov 2020 20:15:29 +0200 Subject: habanalabs: support non power-of-2 DRAM phys page sizes DRAM physical page sizes depend of the amount of HBMs available in the device. this number is device-dependent and may also be subject to binning when one or more of the DRAM controllers are found to to be faulty. Such a configuration may lead to partitioning the DRAM to non-power-of-2 pages. To support this feature we also need to add infrastructure of address scarmbling. Signed-off-by: Moti Haimovski Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/debugfs.c | 6 +- drivers/misc/habanalabs/common/habanalabs.h | 8 ++ drivers/misc/habanalabs/common/memory.c | 176 +++++++++++++++++++++++----- drivers/misc/habanalabs/common/mmu.c | 61 ++++++++-- drivers/misc/habanalabs/gaudi/gaudi.c | 3 +- drivers/misc/habanalabs/goya/goya.c | 3 +- 6 files changed, 213 insertions(+), 44 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index cef716643979..50ca8eea6648 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -333,8 +333,10 @@ static int mmu_show(struct seq_file *s, void *data) return 0; } - seq_printf(s, "asid: %u, virt_addr: 0x%llx\n", - dev_entry->mmu_asid, dev_entry->mmu_addr); + seq_printf(s, + "asid: %u, virt_addr: 0x%llx, scrambled virt_addr: 0x%llx\n", + dev_entry->mmu_asid, dev_entry->mmu_addr, + hops_info.scrambled_vaddr); for (i = 0 ; i < hops_info.used_hops ; i++) { seq_printf(s, "hop%d_addr: 0x%llx\n", diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index f083487ca563..5da26e81a251 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -848,6 +848,8 @@ enum div_select_defs { * @collective_wait_init_cs: Generate collective master/slave packets * and place them in the relevant cs jobs * @collective_wait_create_jobs: allocate collective wait cs jobs + * @scramble_vaddr: Routine to scramble the virtual address prior of mapping it + * in the MMU. */ struct hl_asic_funcs { int (*early_init)(struct hl_device *hdev); @@ -957,6 +959,7 @@ struct hl_asic_funcs { int (*collective_wait_create_jobs)(struct hl_device *hdev, struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id, u32 collective_engine_id); + u64 (*scramble_vaddr)(struct hl_device *hdev, u64 virt_addr); }; @@ -1690,10 +1693,14 @@ struct hl_mmu_per_hop_info { * struct hl_mmu_hop_info - A structure describing the TLB hops and their * hop-entries that were created in order to translate a virtual address to a * physical one. + * @scrambled_vaddr: The value of the virtual address after scrambling. This + * address replaces the original virtual-address when mapped + * in the MMU tables. * @hop_info: Array holding the per-hop information used for the translation. * @used_hops: The number of hops used for the translation. */ struct hl_mmu_hop_info { + u64 scrambled_vaddr; struct hl_mmu_per_hop_info hop_info[MMU_ARCH_5_HOPS]; u32 used_hops; }; @@ -2184,6 +2191,7 @@ void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu); int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr); int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_info *hops); +u64 hl_mmu_scramble_vaddr(struct hl_device *hdev, u64 virt_addr); bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr); int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name, diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index 207fda36cac9..ada977eb136c 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -14,6 +14,9 @@ #define HL_MMU_DEBUG 0 +/* use small pages for supporting non-pow2 (32M/40M/48M) DRAM phys page sizes */ +#define DRAM_POOL_PAGE_SIZE SZ_8M + /* * The va ranges in context object contain a list with the available chunks of * device virtual memory. @@ -54,15 +57,14 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, struct hl_vm *vm = &hdev->vm; struct hl_vm_phys_pg_pack *phys_pg_pack; u64 paddr = 0, total_size, num_pgs, i; - u32 num_curr_pgs, page_size, page_shift; + u32 num_curr_pgs, page_size; int handle, rc; bool contiguous; num_curr_pgs = 0; page_size = hdev->asic_prop.dram_page_size; - page_shift = __ffs(page_size); - num_pgs = (args->alloc.mem_size + (page_size - 1)) >> page_shift; - total_size = num_pgs << page_shift; + num_pgs = DIV_ROUND_UP_ULL(args->alloc.mem_size, page_size); + total_size = num_pgs * page_size; if (!total_size) { dev_err(hdev->dev, "Cannot allocate 0 bytes\n"); @@ -518,7 +520,8 @@ static inline int add_va_block(struct hl_device *hdev, } /** - * get_va_block() - get a virtual block for the given size and alignment. + * get_va_block_pow2() - get a virtual block for the given size and alignment + * where alignment is a power of 2. * @hdev: pointer to the habanalabs device structure. * @va_range: pointer to the virtual addresses range. * @size: requested block size. @@ -531,12 +534,13 @@ static inline int add_va_block(struct hl_device *hdev, * - Reserve the requested block and update the list. * - Return the start address of the virtual block. */ -static u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range, - u64 size, u64 hint_addr, u32 va_block_align) +static u64 get_va_block_pow2(struct hl_device *hdev, + struct hl_va_range *va_range, + u64 size, u64 hint_addr, u32 va_block_align) { struct hl_vm_va_block *va_block, *new_va_block = NULL; u64 valid_start, valid_size, prev_start, prev_end, align_mask, - res_valid_start = 0, res_valid_size = 0; + reserved_valid_start = 0, reserved_valid_size = 0; bool add_prev = false; align_mask = ~((u64)va_block_align - 1); @@ -562,34 +566,34 @@ static u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range, valid_size = va_block->end - valid_start; - if (valid_size >= size && - (!new_va_block || valid_size < res_valid_size)) { + if (valid_size >= size && (!new_va_block || + valid_size < reserved_valid_size)) { new_va_block = va_block; - res_valid_start = valid_start; - res_valid_size = valid_size; + reserved_valid_start = valid_start; + reserved_valid_size = valid_size; } if (hint_addr && hint_addr >= valid_start && - ((hint_addr + size) <= va_block->end)) { + (hint_addr + size) <= va_block->end) { new_va_block = va_block; - res_valid_start = hint_addr; - res_valid_size = valid_size; + reserved_valid_start = hint_addr; + reserved_valid_size = valid_size; break; } } if (!new_va_block) { dev_err(hdev->dev, "no available va block for size %llu\n", - size); + size); goto out; } - if (res_valid_start > new_va_block->start) { + if (reserved_valid_start > new_va_block->start) { prev_start = new_va_block->start; - prev_end = res_valid_start - 1; + prev_end = reserved_valid_start - 1; - new_va_block->start = res_valid_start; - new_va_block->size = res_valid_size; + new_va_block->start = reserved_valid_start; + new_va_block->size = reserved_valid_size; add_prev = true; } @@ -610,10 +614,98 @@ static u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range, out: mutex_unlock(&va_range->lock); - return res_valid_start; + return reserved_valid_start; } /** + * get_va_block_non_pow2() - get a virtual block for the given size and + * alignment where alignment is not a power of 2. + * @hdev: pointer to the habanalabs device structure. + * @va_range: pointer to the virtual addresses range. + * @size: requested block size. + * @hint_addr: hint for requested address by the user. + * @va_block_align: required alignment of the virtual block start address. + * + * This function does the following: + * - Iterate on the virtual block list to find a suitable virtual block for the + * given size and alignment. + * - Reserve the requested block and update the list. + * - Return the start address of the virtual block. + */ +static u64 get_va_block_non_pow2(struct hl_device *hdev, + struct hl_va_range *va_range, + u64 size, u64 hint_addr, u32 va_block_align) +{ + struct hl_vm_va_block *va_block, *new_va_block = NULL; + u64 reserved_valid_start = 0; + + /* + * with non-power-of-2 range we work only with page granularity and the + * start address is page aligned, so no need for alignment checking. + */ + size = DIV_ROUND_UP_ULL(size, va_range->page_size) * + va_range->page_size; + + mutex_lock(&va_range->lock); + + print_va_list_locked(hdev, &va_range->list); + + list_for_each_entry(va_block, &va_range->list, node) { + if ((va_block->start + size) > va_block->end) + continue; + + new_va_block = va_block; + reserved_valid_start = va_block->start; + break; + } + + if (!new_va_block) { + dev_err(hdev->dev, "no available va block for size %llu\n", + size); + goto out; + } + + if (new_va_block->size > size) { + new_va_block->start += size; + new_va_block->size = new_va_block->end - new_va_block->start; + } else { + list_del(&new_va_block->node); + kfree(new_va_block); + } + + print_va_list_locked(hdev, &va_range->list); +out: + mutex_unlock(&va_range->lock); + + return reserved_valid_start; +} + +/* + * get_va_block() - get a virtual block for the given size and alignment. + * @hdev: pointer to the habanalabs device structure. + * @va_range: pointer to the virtual addresses range. + * @size: requested block size. + * @hint_addr: hint for requested address by the user. + * @va_block_align: required alignment of the virtual block start address. + * + * This function does the following: + * - Iterate on the virtual block list to find a suitable virtual block for the + * given size and alignment. + * - Reserve the requested block and update the list. + * - Return the start address of the virtual block. + */ +static u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range, + u64 size, u64 hint_addr, u32 va_block_align) +{ + if (is_power_of_2(va_range->page_size)) + return get_va_block_pow2(hdev, va_range, + size, hint_addr, va_block_align); + else + return get_va_block_non_pow2(hdev, va_range, + size, hint_addr, va_block_align); +} + +/* * hl_reserve_va_block() - reserve a virtual block of a given size. * @hdev: pointer to the habanalabs device structure. * @ctx: current context @@ -1024,7 +1116,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, hint_addr = args->map_device.hint_addr; - /* DRAM VA alignment is the same as the DRAM page size */ + /* DRAM VA alignment is the same as the MMU page size */ va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM]; va_block_align = hdev->asic_prop.dmmu.page_size; } @@ -1129,6 +1221,7 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, bool ctx_free) { struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; struct hl_vm_hash_node *hnode = NULL; struct hl_userptr *userptr = NULL; @@ -1192,7 +1285,13 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, goto mapping_cnt_err; } - vaddr &= ~(((u64) phys_pg_pack->page_size) - 1); + if (!is_userptr && !is_power_of_2(phys_pg_pack->page_size)) + vaddr = prop->dram_base_address + + DIV_ROUND_DOWN_ULL(vaddr - prop->dram_base_address, + phys_pg_pack->page_size) * + phys_pg_pack->page_size; + else + vaddr &= ~(((u64) phys_pg_pack->page_size) - 1); mutex_lock(&ctx->mmu_lock); @@ -1637,16 +1736,22 @@ static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range, INIT_LIST_HEAD(&va_range->list); - /* PAGE_SIZE alignment */ + /* + * PAGE_SIZE alignment + * it is the callers responsibility to align the addresses if the + * page size is not a power of 2 + */ + + if (is_power_of_2(page_size)) { + if (start & (PAGE_SIZE - 1)) { + start &= PAGE_MASK; + start += PAGE_SIZE; + } - if (start & (PAGE_SIZE - 1)) { - start &= PAGE_MASK; - start += PAGE_SIZE; + if (end & (PAGE_SIZE - 1)) + end &= PAGE_MASK; } - if (end & (PAGE_SIZE - 1)) - end &= PAGE_MASK; - if (start >= end) { dev_err(hdev->dev, "too small vm range for va list\n"); return -EFAULT; @@ -1820,7 +1925,8 @@ int hl_vm_ctx_init(struct hl_ctx *ctx) dram_range_start = prop->dmmu.start_addr; dram_range_end = prop->dmmu.end_addr; - dram_page_size = prop->dmmu.page_size; + dram_page_size = prop->dram_page_size ? + prop->dram_page_size : prop->dmmu.page_size; host_range_start = prop->pmmu.start_addr; host_range_end = prop->pmmu.end_addr; host_page_size = prop->pmmu.page_size; @@ -1938,7 +2044,13 @@ int hl_vm_init(struct hl_device *hdev) struct hl_vm *vm = &hdev->vm; int rc; - vm->dram_pg_pool = gen_pool_create(__ffs(prop->dram_page_size), -1); + if (is_power_of_2(prop->dram_page_size)) + vm->dram_pg_pool = + gen_pool_create(__ffs(prop->dram_page_size), -1); + else + vm->dram_pg_pool = + gen_pool_create(__ffs(DRAM_POOL_PAGE_SIZE), -1); + if (!vm->dram_pg_pool) { dev_err(hdev->dev, "Failed to create dram page pool\n"); return -ENOMEM; diff --git a/drivers/misc/habanalabs/common/mmu.c b/drivers/misc/habanalabs/common/mmu.c index 62cfa4190fe4..38234c243b21 100644 --- a/drivers/misc/habanalabs/common/mmu.c +++ b/drivers/misc/habanalabs/common/mmu.c @@ -166,7 +166,6 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, mmu_prop = &prop->pmmu; pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT; - /* * The H/W handles mapping of specific page sizes. Hence if the page * size is bigger, we break it to sub-pages and unmap them separately. @@ -174,11 +173,21 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, if ((page_size % mmu_prop->page_size) == 0) { real_page_size = mmu_prop->page_size; } else { - dev_err(hdev->dev, - "page size of %u is not %uKB aligned, can't unmap\n", - page_size, mmu_prop->page_size >> 10); + /* + * MMU page size may differ from DRAM page size. + * In such case work with the DRAM page size and let the MMU + * scrambling routine to handle this mismatch when + * calculating the address to remove from the MMU page table + */ + if (is_dram_addr && ((page_size % prop->dram_page_size) == 0)) { + real_page_size = prop->dram_page_size; + } else { + dev_err(hdev->dev, + "page size of %u is not %uKB aligned, can't unmap\n", + page_size, mmu_prop->page_size >> 10); - return -EFAULT; + return -EFAULT; + } } npages = page_size / real_page_size; @@ -253,6 +262,17 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, */ if ((page_size % mmu_prop->page_size) == 0) { real_page_size = mmu_prop->page_size; + } else if (is_dram_addr && ((page_size % prop->dram_page_size) == 0) && + (prop->dram_page_size < mmu_prop->page_size)) { + /* + * MMU page size may differ from DRAM page size. + * In such case work with the DRAM page size and let the MMU + * scrambling routine handle this mismatch when calculating + * the address to place in the MMU page table. (in that case + * also make sure that the dram_page_size smaller than the + * mmu page size) + */ + real_page_size = prop->dram_page_size; } else { dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n", @@ -261,10 +281,21 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, return -EFAULT; } - if (phys_addr & (real_page_size - 1)) + /* + * Verify that the phys and virt addresses are aligned with the + * MMU page size (in dram this means checking the address and MMU + * after scrambling) + */ + if ((is_dram_addr && + ((hdev->asic_funcs->scramble_vaddr(hdev, phys_addr) & + (mmu_prop->page_size - 1)) || + (hdev->asic_funcs->scramble_vaddr(hdev, virt_addr) & + (mmu_prop->page_size - 1)))) || + (!is_dram_addr && ((phys_addr & (real_page_size - 1)) || + (virt_addr & (real_page_size - 1))))) dev_crit(hdev->dev, - "Mapping 0x%llx with page size of 0x%x is erroneous! Address must be divisible by page size", - phys_addr, real_page_size); + "Mapping address 0x%llx with virtual address 0x%llx and page size of 0x%x is erroneous! Addresses must be divisible by page size", + phys_addr, virt_addr, real_page_size); npages = page_size / real_page_size; real_virt_addr = virt_addr; @@ -474,6 +505,8 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, if (!hdev->mmu_enable) return -EOPNOTSUPP; + hops->scrambled_vaddr = virt_addr; /* assume no scrambling */ + is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, prop->dmmu.start_addr, prop->dmmu.end_addr); @@ -513,3 +546,15 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev) return 0; } + +/** + * hl_mmu_scramble_vaddr() - The generic mmu virtual address scrambling routine. + * @hdev: pointer to device data. + * @virt_addr: The virtual address to scramble. + * + * Return: The scrambled virtual address. + */ +u64 hl_mmu_scramble_vaddr(struct hl_device *hdev, u64 virt_addr) +{ + return virt_addr; +} diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 3d8414d3afa8..66194b7d3806 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -8308,7 +8308,8 @@ static const struct hl_asic_funcs gaudi_funcs = { .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw, .get_device_time = gaudi_get_device_time, .collective_wait_init_cs = gaudi_collective_wait_init_cs, - .collective_wait_create_jobs = gaudi_collective_wait_create_jobs + .collective_wait_create_jobs = gaudi_collective_wait_create_jobs, + .scramble_vaddr = hl_mmu_scramble_vaddr }; /** diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index eeade9a45bc0..80198c39985a 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -5456,7 +5456,8 @@ static const struct hl_asic_funcs goya_funcs = { .set_dma_mask_from_fw = goya_set_dma_mask_from_fw, .get_device_time = goya_get_device_time, .collective_wait_init_cs = goya_collective_wait_init_cs, - .collective_wait_create_jobs = goya_collective_wait_create_jobs + .collective_wait_create_jobs = goya_collective_wait_create_jobs, + .scramble_vaddr = hl_mmu_scramble_vaddr }; /* -- cgit v1.2.3