summaryrefslogtreecommitdiff
path: root/drivers/misc/habanalabs/common/mmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/habanalabs/common/mmu.c')
-rw-r--r--drivers/misc/habanalabs/common/mmu.c273
1 files changed, 239 insertions, 34 deletions
diff --git a/drivers/misc/habanalabs/common/mmu.c b/drivers/misc/habanalabs/common/mmu.c
index b5058798aeb9..33ae953d3a36 100644
--- a/drivers/misc/habanalabs/common/mmu.c
+++ b/drivers/misc/habanalabs/common/mmu.c
@@ -22,18 +22,25 @@ static bool is_dram_va(struct hl_device *hdev, u64 virt_addr)
* hl_mmu_init() - initialize the MMU module.
* @hdev: habanalabs device structure.
*
- * This function does the following:
- * - Create a pool of pages for pgt_infos.
- * - Create a shadow table for pgt
- *
* Return: 0 for success, non-zero for failure.
*/
int hl_mmu_init(struct hl_device *hdev)
{
- if (hdev->mmu_enable)
- return hdev->mmu_func.init(hdev);
+ int rc = -EOPNOTSUPP;
- return 0;
+ if (!hdev->mmu_enable)
+ return 0;
+
+ if (hdev->mmu_func[MMU_DR_PGT].init != NULL) {
+ rc = hdev->mmu_func[MMU_DR_PGT].init(hdev);
+ if (rc)
+ return rc;
+ }
+
+ if (hdev->mmu_func[MMU_HR_PGT].init != NULL)
+ rc = hdev->mmu_func[MMU_HR_PGT].init(hdev);
+
+ return rc;
}
/**
@@ -48,8 +55,14 @@ int hl_mmu_init(struct hl_device *hdev)
*/
void hl_mmu_fini(struct hl_device *hdev)
{
- if (hdev->mmu_enable)
- hdev->mmu_func.fini(hdev);
+ if (!hdev->mmu_enable)
+ return;
+
+ if (hdev->mmu_func[MMU_DR_PGT].fini != NULL)
+ hdev->mmu_func[MMU_DR_PGT].fini(hdev);
+
+ if (hdev->mmu_func[MMU_HR_PGT].fini != NULL)
+ hdev->mmu_func[MMU_HR_PGT].fini(hdev);
}
/**
@@ -63,11 +76,23 @@ void hl_mmu_fini(struct hl_device *hdev)
int hl_mmu_ctx_init(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
+ int rc = -EOPNOTSUPP;
- if (hdev->mmu_enable)
- return hdev->mmu_func.ctx_init(ctx);
+ if (!hdev->mmu_enable)
+ return 0;
- return 0;
+ mutex_init(&ctx->mmu_lock);
+
+ if (hdev->mmu_func[MMU_DR_PGT].ctx_init != NULL) {
+ rc = hdev->mmu_func[MMU_DR_PGT].ctx_init(ctx);
+ if (rc)
+ return rc;
+ }
+
+ if (hdev->mmu_func[MMU_HR_PGT].ctx_init != NULL)
+ rc = hdev->mmu_func[MMU_HR_PGT].ctx_init(ctx);
+
+ return rc;
}
/*
@@ -84,12 +109,20 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
- if (hdev->mmu_enable)
- hdev->mmu_func.ctx_fini(ctx);
+ if (!hdev->mmu_enable)
+ return;
+
+ if (hdev->mmu_func[MMU_DR_PGT].ctx_fini != NULL)
+ hdev->mmu_func[MMU_DR_PGT].ctx_fini(ctx);
+
+ if (hdev->mmu_func[MMU_HR_PGT].ctx_fini != NULL)
+ hdev->mmu_func[MMU_HR_PGT].ctx_fini(ctx);
+
+ mutex_destroy(&ctx->mmu_lock);
}
/*
- * hl_mmu_unmap - unmaps a virtual addr
+ * hl_mmu_unmap_page - unmaps a virtual addr
*
* @ctx: pointer to the context structure
* @virt_addr: virt addr to map from
@@ -109,7 +142,7 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx)
* For optimization reasons PCI flush may be requested once after unmapping of
* large area.
*/
-int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
+int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
bool flush_pte)
{
struct hl_device *hdev = ctx->hdev;
@@ -117,7 +150,7 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
struct hl_mmu_properties *mmu_prop;
u64 real_virt_addr;
u32 real_page_size, npages;
- int i, rc = 0;
+ int i, rc = 0, pgt_residency;
bool is_dram_addr;
if (!hdev->mmu_enable)
@@ -132,6 +165,8 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
else
mmu_prop = &prop->pmmu;
+ pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT;
+
/*
* The H/W handles mapping of specific page sizes. Hence if the page
* size is bigger, we break it to sub-pages and unmap them separately.
@@ -150,7 +185,8 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
real_virt_addr = virt_addr;
for (i = 0 ; i < npages ; i++) {
- rc = hdev->mmu_func.unmap(ctx, real_virt_addr, is_dram_addr);
+ rc = hdev->mmu_func[pgt_residency].unmap(ctx,
+ real_virt_addr, is_dram_addr);
if (rc)
break;
@@ -158,13 +194,13 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
}
if (flush_pte)
- hdev->mmu_func.flush(ctx);
+ hdev->mmu_func[pgt_residency].flush(ctx);
return rc;
}
/*
- * hl_mmu_map - maps a virtual addr to physical addr
+ * hl_mmu_map_page - maps a virtual addr to physical addr
*
* @ctx: pointer to the context structure
* @virt_addr: virt addr to map from
@@ -185,17 +221,18 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
* For optimization reasons PCI flush may be requested once after mapping of
* large area.
*/
-int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
- bool flush_pte)
+int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
+ u32 page_size, bool flush_pte)
{
struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_mmu_properties *mmu_prop;
u64 real_virt_addr, real_phys_addr;
u32 real_page_size, npages;
- int i, rc, mapped_cnt = 0;
+ int i, rc, pgt_residency, mapped_cnt = 0;
bool is_dram_addr;
+
if (!hdev->mmu_enable)
return 0;
@@ -208,6 +245,8 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
else
mmu_prop = &prop->pmmu;
+ pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT;
+
/*
* The H/W handles mapping of specific page sizes. Hence if the page
* size is bigger, we break it to sub-pages and map them separately.
@@ -216,7 +255,7 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
real_page_size = mmu_prop->page_size;
} else {
dev_err(hdev->dev,
- "page size of %u is not %uKB aligned, can't unmap\n",
+ "page size of %u is not %uKB aligned, can't map\n",
page_size, mmu_prop->page_size >> 10);
return -EFAULT;
@@ -231,8 +270,9 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
real_phys_addr = phys_addr;
for (i = 0 ; i < npages ; i++) {
- rc = hdev->mmu_func.map(ctx, real_virt_addr, real_phys_addr,
- real_page_size, is_dram_addr);
+ rc = hdev->mmu_func[pgt_residency].map(ctx,
+ real_virt_addr, real_phys_addr,
+ real_page_size, is_dram_addr);
if (rc)
goto err;
@@ -242,21 +282,124 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
}
if (flush_pte)
- hdev->mmu_func.flush(ctx);
+ hdev->mmu_func[pgt_residency].flush(ctx);
return 0;
err:
real_virt_addr = virt_addr;
for (i = 0 ; i < mapped_cnt ; i++) {
- if (hdev->mmu_func.unmap(ctx, real_virt_addr, is_dram_addr))
+ if (hdev->mmu_func[pgt_residency].unmap(ctx,
+ real_virt_addr, is_dram_addr))
dev_warn_ratelimited(hdev->dev,
"failed to unmap va: 0x%llx\n", real_virt_addr);
real_virt_addr += real_page_size;
}
- hdev->mmu_func.flush(ctx);
+ hdev->mmu_func[pgt_residency].flush(ctx);
+
+ return rc;
+}
+
+/*
+ * hl_mmu_map_contiguous - implements a wrapper for hl_mmu_map_page
+ * for mapping contiguous physical memory
+ *
+ * @ctx: pointer to the context structure
+ * @virt_addr: virt addr to map from
+ * @phys_addr: phys addr to map to
+ * @size: size to map
+ *
+ */
+int hl_mmu_map_contiguous(struct hl_ctx *ctx, u64 virt_addr,
+ u64 phys_addr, u32 size)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u64 curr_va, curr_pa;
+ u32 page_size;
+ bool flush_pte;
+ int rc = 0, off;
+
+ if (hl_mem_area_inside_range(virt_addr, size,
+ prop->dmmu.start_addr, prop->dmmu.end_addr))
+ page_size = prop->dmmu.page_size;
+ else if (hl_mem_area_inside_range(virt_addr, size,
+ prop->pmmu.start_addr, prop->pmmu.end_addr))
+ page_size = prop->pmmu.page_size;
+ else if (hl_mem_area_inside_range(virt_addr, size,
+ prop->pmmu_huge.start_addr, prop->pmmu_huge.end_addr))
+ page_size = prop->pmmu_huge.page_size;
+ else
+ return -EINVAL;
+
+ for (off = 0 ; off < size ; off += page_size) {
+ curr_va = virt_addr + off;
+ curr_pa = phys_addr + off;
+ flush_pte = (off + page_size) >= size;
+ rc = hl_mmu_map_page(ctx, curr_va, curr_pa, page_size,
+ flush_pte);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Map failed for va 0x%llx to pa 0x%llx\n",
+ curr_va, curr_pa);
+ goto unmap;
+ }
+ }
+
+ return rc;
+
+unmap:
+ for (; off >= 0 ; off -= page_size) {
+ curr_va = virt_addr + off;
+ flush_pte = (off - (s32) page_size) < 0;
+ if (hl_mmu_unmap_page(ctx, curr_va, page_size, flush_pte))
+ dev_warn_ratelimited(hdev->dev,
+ "failed to unmap va 0x%llx\n", curr_va);
+ }
+
+ return rc;
+}
+
+/*
+ * hl_mmu_unmap_contiguous - implements a wrapper for hl_mmu_unmap_page
+ * for unmapping contiguous physical memory
+ *
+ * @ctx: pointer to the context structure
+ * @virt_addr: virt addr to unmap
+ * @size: size to unmap
+ *
+ */
+int hl_mmu_unmap_contiguous(struct hl_ctx *ctx, u64 virt_addr, u32 size)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u64 curr_va;
+ u32 page_size;
+ bool flush_pte;
+ int rc = 0, off;
+
+ if (hl_mem_area_inside_range(virt_addr, size,
+ prop->dmmu.start_addr, prop->dmmu.end_addr))
+ page_size = prop->dmmu.page_size;
+ else if (hl_mem_area_inside_range(virt_addr, size,
+ prop->pmmu.start_addr, prop->pmmu.end_addr))
+ page_size = prop->pmmu.page_size;
+ else if (hl_mem_area_inside_range(virt_addr, size,
+ prop->pmmu_huge.start_addr, prop->pmmu_huge.end_addr))
+ page_size = prop->pmmu_huge.page_size;
+ else
+ return -EINVAL;
+
+ for (off = 0 ; off < size ; off += page_size) {
+ curr_va = virt_addr + off;
+ flush_pte = (off + page_size) >= size;
+ rc = hl_mmu_unmap_page(ctx, curr_va, page_size, flush_pte);
+ if (rc)
+ dev_warn_ratelimited(hdev->dev,
+ "Unmap failed for va 0x%llx\n", curr_va);
+ }
return rc;
}
@@ -271,8 +414,14 @@ void hl_mmu_swap_out(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
- if (hdev->mmu_enable)
- hdev->mmu_func.swap_out(ctx);
+ if (!hdev->mmu_enable)
+ return;
+
+ if (hdev->mmu_func[MMU_DR_PGT].swap_out != NULL)
+ hdev->mmu_func[MMU_DR_PGT].swap_out(ctx);
+
+ if (hdev->mmu_func[MMU_HR_PGT].swap_out != NULL)
+ hdev->mmu_func[MMU_HR_PGT].swap_out(ctx);
}
/*
@@ -285,8 +434,64 @@ void hl_mmu_swap_in(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
- if (hdev->mmu_enable)
- hdev->mmu_func.swap_in(ctx);
+ if (!hdev->mmu_enable)
+ return;
+
+ if (hdev->mmu_func[MMU_DR_PGT].swap_in != NULL)
+ hdev->mmu_func[MMU_DR_PGT].swap_in(ctx);
+
+ if (hdev->mmu_func[MMU_HR_PGT].swap_in != NULL)
+ hdev->mmu_func[MMU_HR_PGT].swap_in(ctx);
+}
+
+int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr)
+{
+ struct hl_mmu_hop_info hops;
+ u64 tmp_addr;
+ int rc;
+
+ rc = hl_mmu_get_tlb_info(ctx, virt_addr, &hops);
+ if (rc)
+ return rc;
+
+ /* last hop holds the phys address and flags */
+ tmp_addr = hops.hop_info[hops.used_hops - 1].hop_pte_val;
+ *phys_addr = (tmp_addr & HOP_PHYS_ADDR_MASK) | (virt_addr & FLAGS_MASK);
+
+ return 0;
+}
+
+int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
+ struct hl_mmu_hop_info *hops)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct hl_mmu_properties *mmu_prop;
+ int rc;
+ bool is_dram_addr;
+
+ if (!hdev->mmu_enable)
+ return -EOPNOTSUPP;
+
+ is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
+ prop->dmmu.start_addr,
+ prop->dmmu.end_addr);
+
+ /* host-residency is the same in PMMU and HPMMU, use one of them */
+ mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+
+ mutex_lock(&ctx->mmu_lock);
+
+ if (mmu_prop->host_resident)
+ rc = hdev->mmu_func[MMU_HR_PGT].get_tlb_info(ctx,
+ virt_addr, hops);
+ else
+ rc = hdev->mmu_func[MMU_DR_PGT].get_tlb_info(ctx,
+ virt_addr, hops);
+
+ mutex_unlock(&ctx->mmu_lock);
+
+ return rc;
}
int hl_mmu_if_set_funcs(struct hl_device *hdev)
@@ -297,7 +502,7 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev)
switch (hdev->asic_type) {
case ASIC_GOYA:
case ASIC_GAUDI:
- hl_mmu_v1_set_funcs(hdev);
+ hl_mmu_v1_set_funcs(hdev, &hdev->mmu_func[MMU_DR_PGT]);
break;
default:
dev_err(hdev->dev, "Unrecognized ASIC type %d\n",