summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe/xe_pt.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe/xe_pt.c')
-rw-r--r--drivers/gpu/drm/xe/xe_pt.c707
1 files changed, 529 insertions, 178 deletions
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index f27f579f4d85..b04756a97cdc 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -20,6 +20,7 @@
#include "xe_res_cursor.h"
#include "xe_sched_job.h"
#include "xe_sync.h"
+#include "xe_svm.h"
#include "xe_trace.h"
#include "xe_ttm_stolen_mgr.h"
#include "xe_vm.h"
@@ -28,6 +29,8 @@ struct xe_pt_dir {
struct xe_pt pt;
/** @children: Array of page-table child nodes */
struct xe_ptw *children[XE_PDES];
+ /** @staging: Array of page-table staging nodes */
+ struct xe_ptw *staging[XE_PDES];
};
#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
@@ -48,9 +51,10 @@ static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt)
return container_of(pt, struct xe_pt_dir, pt);
}
-static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index)
+static struct xe_pt *
+xe_pt_entry_staging(struct xe_pt_dir *pt_dir, unsigned int index)
{
- return container_of(pt_dir->children[index], struct xe_pt, base);
+ return container_of(pt_dir->staging[index], struct xe_pt, base);
}
static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
@@ -99,6 +103,7 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
{
struct xe_pt *pt;
struct xe_bo *bo;
+ u32 bo_flags;
int err;
if (level) {
@@ -111,20 +116,23 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
if (!pt)
return ERR_PTR(-ENOMEM);
+ bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE |
+ XE_BO_FLAG_NO_RESV_EVICT | XE_BO_FLAG_PAGETABLE;
+ if (vm->xef) /* userspace */
+ bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE;
+
pt->level = level;
bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K,
ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE |
- XE_BO_FLAG_PINNED |
- XE_BO_FLAG_NO_RESV_EVICT |
- XE_BO_FLAG_PAGETABLE);
+ bo_flags);
if (IS_ERR(bo)) {
err = PTR_ERR(bo);
goto err_kfree;
}
pt->bo = bo;
pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL;
+ pt->base.staging = level ? as_xe_pt_dir(pt)->staging : NULL;
if (vm->xef)
xe_drm_client_add_bo(vm->xef->client, pt->bo);
@@ -136,6 +144,7 @@ err_kfree:
xe_pt_free(pt);
return ERR_PTR(err);
}
+ALLOW_ERROR_INJECTION(xe_pt_create, ERRNO);
/**
* xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero
@@ -205,8 +214,8 @@ void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred)
struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt);
for (i = 0; i < XE_PDES; i++) {
- if (xe_pt_entry(pt_dir, i))
- xe_pt_destroy(xe_pt_entry(pt_dir, i), flags,
+ if (xe_pt_entry_staging(pt_dir, i))
+ xe_pt_destroy(xe_pt_entry_staging(pt_dir, i), flags,
deferred);
}
}
@@ -214,6 +223,20 @@ void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred)
}
/**
+ * xe_pt_clear() - Clear a page-table.
+ * @xe: xe device.
+ * @pt: The page-table.
+ *
+ * Clears page-table by setting to zero.
+ */
+void xe_pt_clear(struct xe_device *xe, struct xe_pt *pt)
+{
+ struct iosys_map *map = &pt->bo->vmap;
+
+ xe_map_memset(xe, map, 0, 0, SZ_4K);
+}
+
+/**
* DOC: Pagetable building
*
* Below we use the term "page-table" for both page-directories, containing
@@ -249,8 +272,11 @@ struct xe_pt_update {
bool preexisting;
};
+/**
+ * struct xe_pt_stage_bind_walk - Walk state for the stage_bind walk.
+ */
struct xe_pt_stage_bind_walk {
- /** base: The base class. */
+ /** @base: The base class. */
struct xe_pt_walk base;
/* Input parameters for the walk */
@@ -258,15 +284,19 @@ struct xe_pt_stage_bind_walk {
struct xe_vm *vm;
/** @tile: The tile we're building for. */
struct xe_tile *tile;
- /** @default_pte: PTE flag only template. No address is associated */
- u64 default_pte;
+ /** @default_vram_pte: PTE flag only template for VRAM. No address is associated */
+ u64 default_vram_pte;
+ /** @default_system_pte: PTE flag only template for System. No address is associated */
+ u64 default_system_pte;
/** @dma_offset: DMA offset to add to the PTE. */
u64 dma_offset;
/**
- * @needs_64k: This address range enforces 64K alignment and
- * granularity.
+ * @needs_64K: This address range enforces 64K alignment and
+ * granularity on VRAM.
*/
bool needs_64K;
+ /** @clear_pt: clear page table entries during the bind walk */
+ bool clear_pt;
/**
* @vma: VMA being mapped
*/
@@ -275,10 +305,11 @@ struct xe_pt_stage_bind_walk {
/* Also input, but is updated during the walk*/
/** @curs: The DMA address cursor. */
struct xe_res_cursor *curs;
- /** @va_curs_start: The Virtual address coresponding to @curs->start */
+ /** @va_curs_start: The Virtual address corresponding to @curs->start */
u64 va_curs_start;
/* Output */
+ /** @wupd: Walk output data for page-table updates. */
struct xe_walk_update {
/** @wupd.entries: Caller provided storage. */
struct xe_vm_pgtable_update *entries;
@@ -296,7 +327,7 @@ struct xe_pt_stage_bind_walk {
u64 l0_end_addr;
/** @addr_64K: The start address of the current 64K chunk. */
u64 addr_64K;
- /** @found_64: Whether @add_64K actually points to a 64K chunk. */
+ /** @found_64K: Whether @add_64K actually points to a 64K chunk. */
bool found_64K;
};
@@ -375,8 +406,10 @@ xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent,
/* Continue building a non-connected subtree. */
struct iosys_map *map = &parent->bo->vmap;
- if (unlikely(xe_child))
+ if (unlikely(xe_child)) {
parent->base.children[offset] = &xe_child->base;
+ parent->base.staging[offset] = &xe_child->base;
+ }
xe_pt_write(xe_walk->vm->xe, map, offset, pte);
parent->num_live++;
@@ -414,6 +447,10 @@ static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level,
if (xe_vma_is_null(xe_walk->vma))
return true;
+ /* if we are clearing page table, no dma addresses*/
+ if (xe_walk->clear_pt)
+ return true;
+
/* Is the DMA address huge PTE size aligned? */
size = next - addr;
dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs);
@@ -493,24 +530,35 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) {
struct xe_res_cursor *curs = xe_walk->curs;
bool is_null = xe_vma_is_null(xe_walk->vma);
+ bool is_vram = is_null ? false : xe_res_is_vram(curs);
XE_WARN_ON(xe_walk->va_curs_start != addr);
- pte = vm->pt_ops->pte_encode_vma(is_null ? 0 :
- xe_res_dma(curs) + xe_walk->dma_offset,
- xe_walk->vma, pat_index, level);
- pte |= xe_walk->default_pte;
+ if (xe_walk->clear_pt) {
+ pte = 0;
+ } else {
+ pte = vm->pt_ops->pte_encode_vma(is_null ? 0 :
+ xe_res_dma(curs) +
+ xe_walk->dma_offset,
+ xe_walk->vma,
+ pat_index, level);
+ if (!is_null)
+ pte |= is_vram ? xe_walk->default_vram_pte :
+ xe_walk->default_system_pte;
- /*
- * Set the XE_PTE_PS64 hint if possible, otherwise if
- * this device *requires* 64K PTE size for VRAM, fail.
- */
- if (level == 0 && !xe_parent->is_compact) {
- if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) {
- xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K;
- pte |= XE_PTE_PS64;
- } else if (XE_WARN_ON(xe_walk->needs_64K)) {
- return -EINVAL;
+ /*
+ * Set the XE_PTE_PS64 hint if possible, otherwise if
+ * this device *requires* 64K PTE size for VRAM, fail.
+ */
+ if (level == 0 && !xe_parent->is_compact) {
+ if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) {
+ xe_walk->vma->gpuva.flags |=
+ XE_VMA_PTE_64K;
+ pte |= XE_PTE_PS64;
+ } else if (XE_WARN_ON(xe_walk->needs_64K &&
+ is_vram)) {
+ return -EINVAL;
+ }
}
}
@@ -518,7 +566,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
if (unlikely(ret))
return ret;
- if (!is_null)
+ if (!is_null && !xe_walk->clear_pt)
xe_res_next(curs, next - addr);
xe_walk->va_curs_start = next;
xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level);
@@ -581,14 +629,54 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = {
.pt_entry = xe_pt_stage_bind_entry,
};
+/*
+ * Default atomic expectations for different allocation scenarios are as follows:
+ *
+ * 1. Traditional API: When the VM is not in LR mode:
+ * - Device atomics are expected to function with all allocations.
+ *
+ * 2. Compute/SVM API: When the VM is in LR mode:
+ * - Device atomics are the default behavior when the bo is placed in a single region.
+ * - In all other cases device atomics will be disabled with AE=0 until an application
+ * request differently using a ioctl like madvise.
+ */
+static bool xe_atomic_for_vram(struct xe_vm *vm)
+{
+ return true;
+}
+
+static bool xe_atomic_for_system(struct xe_vm *vm, struct xe_bo *bo)
+{
+ struct xe_device *xe = vm->xe;
+
+ if (!xe->info.has_device_atomics_on_smem)
+ return false;
+
+ /*
+ * If a SMEM+LMEM allocation is backed by SMEM, a device
+ * atomics will cause a gpu page fault and which then
+ * gets migrated to LMEM, bind such allocations with
+ * device atomics enabled.
+ *
+ * TODO: Revisit this. Perhaps add something like a
+ * fault_on_atomics_in_system UAPI flag.
+ * Note that this also prohibits GPU atomics in LR mode for
+ * userptr and system memory on DGFX.
+ */
+ return (!IS_DGFX(xe) || (!xe_vm_in_lr_mode(vm) ||
+ (bo && xe_bo_has_single_placement(bo))));
+}
+
/**
* xe_pt_stage_bind() - Build a disconnected page-table tree for a given address
* range.
* @tile: The tile we're building for.
* @vma: The vma indicating the address range.
+ * @range: The range indicating the address range.
* @entries: Storage for the update entries used for connecting the tree to
* the main tree at commit time.
* @num_entries: On output contains the number of @entries used.
+ * @clear_pt: Clear the page table entries.
*
* This function builds a disconnected page-table tree for a given address
* range. The tree is connected to the main vm tree for the gpu using
@@ -601,76 +689,72 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = {
*/
static int
xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
- struct xe_vm_pgtable_update *entries, u32 *num_entries)
+ struct xe_svm_range *range,
+ struct xe_vm_pgtable_update *entries,
+ u32 *num_entries, bool clear_pt)
{
struct xe_device *xe = tile_to_xe(tile);
struct xe_bo *bo = xe_vma_bo(vma);
- bool is_devmem = !xe_vma_is_userptr(vma) && bo &&
- (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo));
struct xe_res_cursor curs;
+ struct xe_vm *vm = xe_vma_vm(vma);
struct xe_pt_stage_bind_walk xe_walk = {
.base = {
.ops = &xe_pt_stage_bind_ops,
.shifts = xe_normal_pt_shifts,
.max_level = XE_PT_HIGHEST_LEVEL,
+ .staging = true,
},
- .vm = xe_vma_vm(vma),
+ .vm = vm,
.tile = tile,
.curs = &curs,
- .va_curs_start = xe_vma_start(vma),
+ .va_curs_start = range ? range->base.itree.start :
+ xe_vma_start(vma),
.vma = vma,
.wupd.entries = entries,
- .needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem,
+ .clear_pt = clear_pt,
};
- struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
+ struct xe_pt *pt = vm->pt_root[tile->id];
int ret;
- /**
- * Default atomic expectations for different allocation scenarios are as follows:
- *
- * 1. Traditional API: When the VM is not in LR mode:
- * - Device atomics are expected to function with all allocations.
- *
- * 2. Compute/SVM API: When the VM is in LR mode:
- * - Device atomics are the default behavior when the bo is placed in a single region.
- * - In all other cases device atomics will be disabled with AE=0 until an application
- * request differently using a ioctl like madvise.
- */
- if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) {
- if (xe_vm_in_lr_mode(xe_vma_vm(vma))) {
- if (bo && xe_bo_has_single_placement(bo))
- xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
- /**
- * If a SMEM+LMEM allocation is backed by SMEM, a device
- * atomics will cause a gpu page fault and which then
- * gets migrated to LMEM, bind such allocations with
- * device atomics enabled.
- */
- else if (is_devmem && !xe_bo_has_single_placement(bo))
- xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
+ if (range) {
+ /* Move this entire thing to xe_svm.c? */
+ xe_svm_notifier_lock(vm);
+ if (!xe_svm_range_pages_valid(range)) {
+ xe_svm_range_debug(range, "BIND PREPARE - RETRY");
+ xe_svm_notifier_unlock(vm);
+ return -EAGAIN;
+ }
+ if (xe_svm_range_has_dma_mapping(range)) {
+ xe_res_first_dma(range->base.dma_addr, 0,
+ range->base.itree.last + 1 - range->base.itree.start,
+ &curs);
+ xe_svm_range_debug(range, "BIND PREPARE - MIXED");
} else {
- xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
+ xe_assert(xe, false);
}
-
- /**
- * Unset AE if the platform(PVC) doesn't support it on an
- * allocation
+ /*
+ * Note, when unlocking the resource cursor dma addresses may become
+ * stale, but the bind will be aborted anyway at commit time.
*/
- if (!xe->info.has_device_atomics_on_smem && !is_devmem)
- xe_walk.default_pte &= ~XE_USM_PPGTT_PTE_AE;
+ xe_svm_notifier_unlock(vm);
}
- if (is_devmem) {
- xe_walk.default_pte |= XE_PPGTT_PTE_DM;
- xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource);
- }
+ xe_walk.needs_64K = (vm->flags & XE_VM_FLAG_64K);
+ if (clear_pt)
+ goto walk_pt;
- if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo))
- xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo));
+ if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) {
+ xe_walk.default_vram_pte = xe_atomic_for_vram(vm) ? XE_USM_PPGTT_PTE_AE : 0;
+ xe_walk.default_system_pte = xe_atomic_for_system(vm, bo) ?
+ XE_USM_PPGTT_PTE_AE : 0;
+ }
- xe_bo_assert_held(bo);
+ xe_walk.default_vram_pte |= XE_PPGTT_PTE_DM;
+ xe_walk.dma_offset = bo ? vram_region_gpu_offset(bo->ttm.resource) : 0;
+ if (!range)
+ xe_bo_assert_held(bo);
- if (!xe_vma_is_null(vma)) {
+ if (!xe_vma_is_null(vma) && !range) {
if (xe_vma_is_userptr(vma))
xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0,
xe_vma_size(vma), &curs);
@@ -680,12 +764,15 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
else
xe_res_first_sg(xe_bo_sg(bo), xe_vma_bo_offset(vma),
xe_vma_size(vma), &curs);
- } else {
+ } else if (!range) {
curs.size = xe_vma_size(vma);
}
- ret = xe_pt_walk_range(&pt->base, pt->level, xe_vma_start(vma),
- xe_vma_end(vma), &xe_walk.base);
+walk_pt:
+ ret = xe_pt_walk_range(&pt->base, pt->level,
+ range ? range->base.itree.start : xe_vma_start(vma),
+ range ? range->base.itree.last + 1 : xe_vma_end(vma),
+ &xe_walk.base);
*num_entries = xe_walk.wupd.num_used_entries;
return ret;
@@ -829,6 +916,46 @@ bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma)
return xe_walk.needs_invalidate;
}
+/**
+ * xe_pt_zap_ptes_range() - Zap (zero) gpu ptes of a SVM range
+ * @tile: The tile we're zapping for.
+ * @vm: The VM we're zapping for.
+ * @range: The SVM range we're zapping for.
+ *
+ * SVM invalidation needs to be able to zap the gpu ptes of a given address
+ * range. In order to be able to do that, that function needs access to the
+ * shared page-table entries so it can either clear the leaf PTEs or
+ * clear the pointers to lower-level page-tables. The caller is required
+ * to hold the SVM notifier lock.
+ *
+ * Return: Whether ptes were actually updated and a TLB invalidation is
+ * required.
+ */
+bool xe_pt_zap_ptes_range(struct xe_tile *tile, struct xe_vm *vm,
+ struct xe_svm_range *range)
+{
+ struct xe_pt_zap_ptes_walk xe_walk = {
+ .base = {
+ .ops = &xe_pt_zap_ptes_ops,
+ .shifts = xe_normal_pt_shifts,
+ .max_level = XE_PT_HIGHEST_LEVEL,
+ },
+ .tile = tile,
+ };
+ struct xe_pt *pt = vm->pt_root[tile->id];
+ u8 pt_mask = (range->tile_present & ~range->tile_invalidated);
+
+ xe_svm_assert_in_notifier(vm);
+
+ if (!(pt_mask & BIT(tile->id)))
+ return false;
+
+ (void)xe_pt_walk_shared(&pt->base, pt->level, range->base.itree.start,
+ range->base.itree.last + 1, &xe_walk.base);
+
+ return xe_walk.needs_invalidate;
+}
+
static void
xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *tile,
struct iosys_map *map, void *data,
@@ -872,18 +999,38 @@ static void xe_pt_cancel_bind(struct xe_vma *vma,
}
}
-static void xe_pt_commit_locks_assert(struct xe_vma *vma)
+#define XE_INVALID_VMA ((struct xe_vma *)(0xdeaddeadull))
+
+static void xe_pt_commit_prepare_locks_assert(struct xe_vma *vma)
{
- struct xe_vm *vm = xe_vma_vm(vma);
+ struct xe_vm *vm;
+ if (vma == XE_INVALID_VMA)
+ return;
+
+ vm = xe_vma_vm(vma);
lockdep_assert_held(&vm->lock);
- if (!xe_vma_is_userptr(vma) && !xe_vma_is_null(vma))
+ if (!xe_vma_has_no_bo(vma))
dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv);
xe_vm_assert_held(vm);
}
+static void xe_pt_commit_locks_assert(struct xe_vma *vma)
+{
+ struct xe_vm *vm;
+
+ if (vma == XE_INVALID_VMA)
+ return;
+
+ vm = xe_vma_vm(vma);
+ xe_pt_commit_prepare_locks_assert(vma);
+
+ if (xe_vma_is_userptr(vma))
+ lockdep_assert_held_read(&vm->userptr.notifier_lock);
+}
+
static void xe_pt_commit(struct xe_vma *vma,
struct xe_vm_pgtable_update *entries,
u32 num_entries, struct llist_head *deferred)
@@ -894,14 +1041,19 @@ static void xe_pt_commit(struct xe_vma *vma,
for (i = 0; i < num_entries; i++) {
struct xe_pt *pt = entries[i].pt;
+ struct xe_pt_dir *pt_dir;
if (!pt->level)
continue;
+ pt_dir = as_xe_pt_dir(pt);
for (j = 0; j < entries[i].qwords; j++) {
struct xe_pt *oldpte = entries[i].pt_entries[j].pt;
+ int j_ = j + entries[i].ofs;
- xe_pt_destroy(oldpte, xe_vma_vm(vma)->flags, deferred);
+ pt_dir->children[j_] = pt_dir->staging[j_];
+ xe_pt_destroy(oldpte, (vma == XE_INVALID_VMA) ? 0 :
+ xe_vma_vm(vma)->flags, deferred);
}
}
}
@@ -912,7 +1064,7 @@ static void xe_pt_abort_bind(struct xe_vma *vma,
{
int i, j;
- xe_pt_commit_locks_assert(vma);
+ xe_pt_commit_prepare_locks_assert(vma);
for (i = num_entries - 1; i >= 0; --i) {
struct xe_pt *pt = entries[i].pt;
@@ -927,10 +1079,10 @@ static void xe_pt_abort_bind(struct xe_vma *vma,
pt_dir = as_xe_pt_dir(pt);
for (j = 0; j < entries[i].qwords; j++) {
u32 j_ = j + entries[i].ofs;
- struct xe_pt *newpte = xe_pt_entry(pt_dir, j_);
+ struct xe_pt *newpte = xe_pt_entry_staging(pt_dir, j_);
struct xe_pt *oldpte = entries[i].pt_entries[j].pt;
- pt_dir->children[j_] = oldpte ? &oldpte->base : 0;
+ pt_dir->staging[j_] = oldpte ? &oldpte->base : 0;
xe_pt_destroy(newpte, xe_vma_vm(vma)->flags, NULL);
}
}
@@ -942,7 +1094,7 @@ static void xe_pt_commit_prepare_bind(struct xe_vma *vma,
{
u32 i, j;
- xe_pt_commit_locks_assert(vma);
+ xe_pt_commit_prepare_locks_assert(vma);
for (i = 0; i < num_entries; i++) {
struct xe_pt *pt = entries[i].pt;
@@ -960,10 +1112,10 @@ static void xe_pt_commit_prepare_bind(struct xe_vma *vma,
struct xe_pt *newpte = entries[i].pt_entries[j].pt;
struct xe_pt *oldpte = NULL;
- if (xe_pt_entry(pt_dir, j_))
- oldpte = xe_pt_entry(pt_dir, j_);
+ if (xe_pt_entry_staging(pt_dir, j_))
+ oldpte = xe_pt_entry_staging(pt_dir, j_);
- pt_dir->children[j_] = &newpte->base;
+ pt_dir->staging[j_] = &newpte->base;
entries[i].pt_entries[j].pt = oldpte;
}
}
@@ -980,12 +1132,15 @@ static void xe_pt_free_bind(struct xe_vm_pgtable_update *entries,
static int
xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma,
- struct xe_vm_pgtable_update *entries, u32 *num_entries)
+ struct xe_svm_range *range,
+ struct xe_vm_pgtable_update *entries,
+ u32 *num_entries, bool invalidate_on_bind)
{
int err;
*num_entries = 0;
- err = xe_pt_stage_bind(tile, vma, entries, num_entries);
+ err = xe_pt_stage_bind(tile, vma, range, entries, num_entries,
+ invalidate_on_bind);
if (!err)
xe_tile_assert(tile, *num_entries);
@@ -1068,6 +1223,11 @@ static int op_add_deps(struct xe_vm *vm, struct xe_vma_op *op,
{
int err = 0;
+ /*
+ * No need to check for is_cpu_addr_mirror here as vma_add_deps is a
+ * NOP if VMA is_cpu_addr_mirror
+ */
+
switch (op->base.op) {
case DRM_GPUVA_OP_MAP:
if (!op->map.immediate && xe_vm_in_fault_mode(vm))
@@ -1086,6 +1246,8 @@ static int op_add_deps(struct xe_vm *vm, struct xe_vma_op *op,
case DRM_GPUVA_OP_PREFETCH:
err = vma_add_deps(gpuva_to_vma(op->base.prefetch.va), job);
break;
+ case DRM_GPUVA_OP_DRIVER:
+ break;
default:
drm_warn(&vm->xe->drm, "NOT POSSIBLE");
}
@@ -1212,42 +1374,22 @@ static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma,
return 0;
uvma = to_userptr_vma(vma);
- notifier_seq = uvma->userptr.notifier_seq;
+ if (xe_pt_userptr_inject_eagain(uvma))
+ xe_vma_userptr_force_invalidate(uvma);
- if (uvma->userptr.initial_bind && !xe_vm_in_fault_mode(vm))
- return 0;
+ notifier_seq = uvma->userptr.notifier_seq;
if (!mmu_interval_read_retry(&uvma->userptr.notifier,
- notifier_seq) &&
- !xe_pt_userptr_inject_eagain(uvma))
+ notifier_seq))
return 0;
- if (xe_vm_in_fault_mode(vm)) {
+ if (xe_vm_in_fault_mode(vm))
return -EAGAIN;
- } else {
- spin_lock(&vm->userptr.invalidated_lock);
- list_move_tail(&uvma->userptr.invalidate_link,
- &vm->userptr.invalidated);
- spin_unlock(&vm->userptr.invalidated_lock);
-
- if (xe_vm_in_preempt_fence_mode(vm)) {
- struct dma_resv_iter cursor;
- struct dma_fence *fence;
- long err;
-
- dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
- DMA_RESV_USAGE_BOOKKEEP);
- dma_resv_for_each_fence_unlocked(&cursor, fence)
- dma_fence_enable_sw_signaling(fence);
- dma_resv_iter_end(&cursor);
-
- err = dma_resv_wait_timeout(xe_vm_resv(vm),
- DMA_RESV_USAGE_BOOKKEEP,
- false, MAX_SCHEDULE_TIMEOUT);
- XE_WARN_ON(err <= 0);
- }
- }
+ /*
+ * Just continue the operation since exec or rebind worker
+ * will take care of rebinding.
+ */
return 0;
}
@@ -1310,6 +1452,42 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
return err;
}
+#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM)
+static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update)
+{
+ struct xe_vm *vm = pt_update->vops->vm;
+ struct xe_vma_ops *vops = pt_update->vops;
+ struct xe_vma_op *op;
+ int err;
+
+ err = xe_pt_pre_commit(pt_update);
+ if (err)
+ return err;
+
+ xe_svm_notifier_lock(vm);
+
+ list_for_each_entry(op, &vops->list, link) {
+ struct xe_svm_range *range = op->map_range.range;
+
+ if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE)
+ continue;
+
+ xe_svm_range_debug(range, "PRE-COMMIT");
+
+ xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma));
+ xe_assert(vm->xe, op->subop == XE_VMA_SUBOP_MAP_RANGE);
+
+ if (!xe_svm_range_pages_valid(range)) {
+ xe_svm_range_debug(range, "PRE-COMMIT - RETRY");
+ xe_svm_notifier_unlock(vm);
+ return -EAGAIN;
+ }
+ }
+
+ return 0;
+}
+#endif
+
struct invalidation_fence {
struct xe_gt_tlb_invalidation_fence base;
struct xe_gt *gt;
@@ -1333,8 +1511,7 @@ static void invalidation_fence_cb(struct dma_fence *fence,
queue_work(system_wq, &ifence->work);
} else {
ifence->base.base.error = ifence->fence->error;
- dma_fence_signal(&ifence->base.base);
- dma_fence_put(&ifence->base.base);
+ xe_gt_tlb_invalidation_fence_signal(&ifence->base);
}
dma_fence_put(ifence->fence);
}
@@ -1496,7 +1673,9 @@ static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = {
* xe_pt_stage_unbind() - Build page-table update structures for an unbind
* operation
* @tile: The tile we're unbinding for.
+ * @vm: The vm
* @vma: The vma we're unbinding.
+ * @range: The range we're unbinding.
* @entries: Caller-provided storage for the update structures.
*
* Builds page-table update structures for an unbind operation. The function
@@ -1506,24 +1685,30 @@ static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = {
*
* Return: The number of entries used.
*/
-static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma,
+static unsigned int xe_pt_stage_unbind(struct xe_tile *tile,
+ struct xe_vm *vm,
+ struct xe_vma *vma,
+ struct xe_svm_range *range,
struct xe_vm_pgtable_update *entries)
{
+ u64 start = range ? range->base.itree.start : xe_vma_start(vma);
+ u64 end = range ? range->base.itree.last + 1 : xe_vma_end(vma);
struct xe_pt_stage_unbind_walk xe_walk = {
.base = {
.ops = &xe_pt_stage_unbind_ops,
.shifts = xe_normal_pt_shifts,
.max_level = XE_PT_HIGHEST_LEVEL,
+ .staging = true,
},
.tile = tile,
- .modified_start = xe_vma_start(vma),
- .modified_end = xe_vma_end(vma),
+ .modified_start = start,
+ .modified_end = end,
.wupd.entries = entries,
};
- struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
+ struct xe_pt *pt = vm->pt_root[tile->id];
- (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma),
- xe_vma_end(vma), &xe_walk.base);
+ (void)xe_pt_walk_shared(&pt->base, pt->level, start, end,
+ &xe_walk.base);
return xe_walk.wupd.num_used_entries;
}
@@ -1555,7 +1740,7 @@ static void xe_pt_abort_unbind(struct xe_vma *vma,
{
int i, j;
- xe_pt_commit_locks_assert(vma);
+ xe_pt_commit_prepare_locks_assert(vma);
for (i = num_entries - 1; i >= 0; --i) {
struct xe_vm_pgtable_update *entry = &entries[i];
@@ -1568,7 +1753,7 @@ static void xe_pt_abort_unbind(struct xe_vma *vma,
continue;
for (j = entry->ofs; j < entry->ofs + entry->qwords; j++)
- pt_dir->children[j] =
+ pt_dir->staging[j] =
entries[i].pt_entries[j - entry->ofs].pt ?
&entries[i].pt_entries[j - entry->ofs].pt->base : NULL;
}
@@ -1581,7 +1766,7 @@ xe_pt_commit_prepare_unbind(struct xe_vma *vma,
{
int i, j;
- xe_pt_commit_locks_assert(vma);
+ xe_pt_commit_prepare_locks_assert(vma);
for (i = 0; i < num_entries; ++i) {
struct xe_vm_pgtable_update *entry = &entries[i];
@@ -1595,20 +1780,20 @@ xe_pt_commit_prepare_unbind(struct xe_vma *vma,
pt_dir = as_xe_pt_dir(pt);
for (j = entry->ofs; j < entry->ofs + entry->qwords; j++) {
entry->pt_entries[j - entry->ofs].pt =
- xe_pt_entry(pt_dir, j);
- pt_dir->children[j] = NULL;
+ xe_pt_entry_staging(pt_dir, j);
+ pt_dir->staging[j] = NULL;
}
}
}
static void
xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops,
- struct xe_vma *vma)
+ u64 start, u64 end)
{
+ u64 last;
u32 current_op = pt_update_ops->current_op;
struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op];
int i, level = 0;
- u64 start, last;
for (i = 0; i < pt_op->num_entries; i++) {
const struct xe_vm_pgtable_update *entry = &pt_op->entries[i];
@@ -1618,8 +1803,8 @@ xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops,
}
/* Greedy (non-optimal) calculation but simple */
- start = ALIGN_DOWN(xe_vma_start(vma), 0x1ull << xe_pt_shift(level));
- last = ALIGN(xe_vma_end(vma), 0x1ull << xe_pt_shift(level)) - 1;
+ start = ALIGN_DOWN(start, 0x1ull << xe_pt_shift(level));
+ last = ALIGN(end, 0x1ull << xe_pt_shift(level)) - 1;
if (start < pt_update_ops->start)
pt_update_ops->start = start;
@@ -1640,12 +1825,13 @@ static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma)
static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile,
struct xe_vm_pgtable_update_ops *pt_update_ops,
- struct xe_vma *vma)
+ struct xe_vma *vma, bool invalidate_on_bind)
{
u32 current_op = pt_update_ops->current_op;
struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op];
int err;
+ xe_tile_assert(tile, !xe_vma_is_cpu_addr_mirror(vma));
xe_bo_assert_held(xe_vma_bo(vma));
vm_dbg(&xe_vma_vm(vma)->xe->drm,
@@ -1660,15 +1846,17 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile,
if (err)
return err;
- err = xe_pt_prepare_bind(tile, vma, pt_op->entries,
- &pt_op->num_entries);
+ err = xe_pt_prepare_bind(tile, vma, NULL, pt_op->entries,
+ &pt_op->num_entries, invalidate_on_bind);
if (!err) {
xe_tile_assert(tile, pt_op->num_entries <=
ARRAY_SIZE(pt_op->entries));
xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries,
pt_op->num_entries, true);
- xe_pt_update_ops_rfence_interval(pt_update_ops, vma);
+ xe_pt_update_ops_rfence_interval(pt_update_ops,
+ xe_vma_start(vma),
+ xe_vma_end(vma));
++pt_update_ops->current_op;
pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma);
@@ -1681,11 +1869,11 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile,
* If !rebind, and scratch enabled VMs, there is a chance the scratch
* PTE is already cached in the TLB so it needs to be invalidated.
* On !LR VMs this is done in the ring ops preceding a batch, but on
- * non-faulting LR, in particular on user-space batch buffer chaining,
- * it needs to be done here.
+ * LR, in particular on user-space batch buffer chaining, it needs to
+ * be done here.
*/
if ((!pt_op->rebind && xe_vm_has_scratch(vm) &&
- xe_vm_in_preempt_fence_mode(vm)))
+ xe_vm_in_lr_mode(vm)))
pt_update_ops->needs_invalidation = true;
else if (pt_op->rebind && !xe_vm_in_lr_mode(vm))
/* We bump also if batch_invalidate_tlb is true */
@@ -1702,6 +1890,48 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile,
return err;
}
+static int bind_range_prepare(struct xe_vm *vm, struct xe_tile *tile,
+ struct xe_vm_pgtable_update_ops *pt_update_ops,
+ struct xe_vma *vma, struct xe_svm_range *range)
+{
+ u32 current_op = pt_update_ops->current_op;
+ struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op];
+ int err;
+
+ xe_tile_assert(tile, xe_vma_is_cpu_addr_mirror(vma));
+
+ vm_dbg(&xe_vma_vm(vma)->xe->drm,
+ "Preparing bind, with range [%lx...%lx)\n",
+ range->base.itree.start, range->base.itree.last);
+
+ pt_op->vma = NULL;
+ pt_op->bind = true;
+ pt_op->rebind = BIT(tile->id) & range->tile_present;
+
+ err = xe_pt_prepare_bind(tile, vma, range, pt_op->entries,
+ &pt_op->num_entries, false);
+ if (!err) {
+ xe_tile_assert(tile, pt_op->num_entries <=
+ ARRAY_SIZE(pt_op->entries));
+ xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries,
+ pt_op->num_entries, true);
+
+ xe_pt_update_ops_rfence_interval(pt_update_ops,
+ range->base.itree.start,
+ range->base.itree.last + 1);
+ ++pt_update_ops->current_op;
+ pt_update_ops->needs_svm_lock = true;
+
+ pt_op->vma = vma;
+ xe_pt_commit_prepare_bind(vma, pt_op->entries,
+ pt_op->num_entries, pt_op->rebind);
+ } else {
+ xe_pt_cancel_bind(vma, pt_op->entries, pt_op->num_entries);
+ }
+
+ return err;
+}
+
static int unbind_op_prepare(struct xe_tile *tile,
struct xe_vm_pgtable_update_ops *pt_update_ops,
struct xe_vma *vma)
@@ -1713,19 +1943,13 @@ static int unbind_op_prepare(struct xe_tile *tile,
if (!((vma->tile_present | vma->tile_staged) & BIT(tile->id)))
return 0;
+ xe_tile_assert(tile, !xe_vma_is_cpu_addr_mirror(vma));
xe_bo_assert_held(xe_vma_bo(vma));
vm_dbg(&xe_vma_vm(vma)->xe->drm,
"Preparing unbind, with range [%llx...%llx)\n",
xe_vma_start(vma), xe_vma_end(vma) - 1);
- /*
- * Wait for invalidation to complete. Can corrupt internal page table
- * state if an invalidation is running while preparing an unbind.
- */
- if (xe_vma_is_userptr(vma) && xe_vm_in_fault_mode(xe_vma_vm(vma)))
- mmu_interval_read_begin(&to_userptr_vma(vma)->userptr.notifier);
-
pt_op->vma = vma;
pt_op->bind = false;
pt_op->rebind = false;
@@ -1734,11 +1958,13 @@ static int unbind_op_prepare(struct xe_tile *tile,
if (err)
return err;
- pt_op->num_entries = xe_pt_stage_unbind(tile, vma, pt_op->entries);
+ pt_op->num_entries = xe_pt_stage_unbind(tile, xe_vma_vm(vma),
+ vma, NULL, pt_op->entries);
xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries,
pt_op->num_entries, false);
- xe_pt_update_ops_rfence_interval(pt_update_ops, vma);
+ xe_pt_update_ops_rfence_interval(pt_update_ops, xe_vma_start(vma),
+ xe_vma_end(vma));
++pt_update_ops->current_op;
pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma);
pt_update_ops->needs_invalidation = true;
@@ -1748,6 +1974,42 @@ static int unbind_op_prepare(struct xe_tile *tile,
return 0;
}
+static int unbind_range_prepare(struct xe_vm *vm,
+ struct xe_tile *tile,
+ struct xe_vm_pgtable_update_ops *pt_update_ops,
+ struct xe_svm_range *range)
+{
+ u32 current_op = pt_update_ops->current_op;
+ struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op];
+
+ if (!(range->tile_present & BIT(tile->id)))
+ return 0;
+
+ vm_dbg(&vm->xe->drm,
+ "Preparing unbind, with range [%lx...%lx)\n",
+ range->base.itree.start, range->base.itree.last);
+
+ pt_op->vma = XE_INVALID_VMA;
+ pt_op->bind = false;
+ pt_op->rebind = false;
+
+ pt_op->num_entries = xe_pt_stage_unbind(tile, vm, NULL, range,
+ pt_op->entries);
+
+ xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries,
+ pt_op->num_entries, false);
+ xe_pt_update_ops_rfence_interval(pt_update_ops, range->base.itree.start,
+ range->base.itree.last + 1);
+ ++pt_update_ops->current_op;
+ pt_update_ops->needs_svm_lock = true;
+ pt_update_ops->needs_invalidation = true;
+
+ xe_pt_commit_prepare_unbind(XE_INVALID_VMA, pt_op->entries,
+ pt_op->num_entries);
+
+ return 0;
+}
+
static int op_prepare(struct xe_vm *vm,
struct xe_tile *tile,
struct xe_vm_pgtable_update_ops *pt_update_ops,
@@ -1759,36 +2021,69 @@ static int op_prepare(struct xe_vm *vm,
switch (op->base.op) {
case DRM_GPUVA_OP_MAP:
- if (!op->map.immediate && xe_vm_in_fault_mode(vm))
+ if ((!op->map.immediate && xe_vm_in_fault_mode(vm) &&
+ !op->map.invalidate_on_bind) ||
+ op->map.is_cpu_addr_mirror)
break;
- err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma);
+ err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma,
+ op->map.invalidate_on_bind);
pt_update_ops->wait_vm_kernel = true;
break;
case DRM_GPUVA_OP_REMAP:
- err = unbind_op_prepare(tile, pt_update_ops,
- gpuva_to_vma(op->base.remap.unmap->va));
+ {
+ struct xe_vma *old = gpuva_to_vma(op->base.remap.unmap->va);
+
+ if (xe_vma_is_cpu_addr_mirror(old))
+ break;
+
+ err = unbind_op_prepare(tile, pt_update_ops, old);
if (!err && op->remap.prev) {
err = bind_op_prepare(vm, tile, pt_update_ops,
- op->remap.prev);
+ op->remap.prev, false);
pt_update_ops->wait_vm_bookkeep = true;
}
if (!err && op->remap.next) {
err = bind_op_prepare(vm, tile, pt_update_ops,
- op->remap.next);
+ op->remap.next, false);
pt_update_ops->wait_vm_bookkeep = true;
}
break;
+ }
case DRM_GPUVA_OP_UNMAP:
- err = unbind_op_prepare(tile, pt_update_ops,
- gpuva_to_vma(op->base.unmap.va));
+ {
+ struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
+
+ if (xe_vma_is_cpu_addr_mirror(vma))
+ break;
+
+ err = unbind_op_prepare(tile, pt_update_ops, vma);
break;
+ }
case DRM_GPUVA_OP_PREFETCH:
- err = bind_op_prepare(vm, tile, pt_update_ops,
- gpuva_to_vma(op->base.prefetch.va));
+ {
+ struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
+
+ if (xe_vma_is_cpu_addr_mirror(vma))
+ break;
+
+ err = bind_op_prepare(vm, tile, pt_update_ops, vma, false);
pt_update_ops->wait_vm_kernel = true;
break;
+ }
+ case DRM_GPUVA_OP_DRIVER:
+ if (op->subop == XE_VMA_SUBOP_MAP_RANGE) {
+ xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma));
+
+ err = bind_range_prepare(vm, tile, pt_update_ops,
+ op->map_range.vma,
+ op->map_range.range);
+ } else if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) {
+ err = unbind_range_prepare(vm, tile, pt_update_ops,
+ op->unmap_range.range);
+ }
+ break;
default:
drm_warn(&vm->xe->drm, "NOT POSSIBLE");
}
@@ -1851,12 +2146,15 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops)
return 0;
}
+ALLOW_ERROR_INJECTION(xe_pt_update_ops_prepare, ERRNO);
static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
struct xe_vm_pgtable_update_ops *pt_update_ops,
struct xe_vma *vma, struct dma_fence *fence,
- struct dma_fence *fence2)
+ struct dma_fence *fence2, bool invalidate_on_bind)
{
+ xe_tile_assert(tile, !xe_vma_is_cpu_addr_mirror(vma));
+
if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) {
dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
pt_update_ops->wait_vm_bookkeep ?
@@ -1870,6 +2168,8 @@ static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
}
vma->tile_present |= BIT(tile->id);
vma->tile_staged &= ~BIT(tile->id);
+ if (invalidate_on_bind)
+ vma->tile_invalidated |= BIT(tile->id);
if (xe_vma_is_userptr(vma)) {
lockdep_assert_held_read(&vm->userptr.notifier_lock);
to_userptr_vma(vma)->userptr.initial_bind = true;
@@ -1890,6 +2190,8 @@ static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
struct xe_vma *vma, struct dma_fence *fence,
struct dma_fence *fence2)
{
+ xe_tile_assert(tile, !xe_vma_is_cpu_addr_mirror(vma));
+
if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) {
dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
pt_update_ops->wait_vm_bookkeep ?
@@ -1924,32 +2226,66 @@ static void op_commit(struct xe_vm *vm,
switch (op->base.op) {
case DRM_GPUVA_OP_MAP:
- if (!op->map.immediate && xe_vm_in_fault_mode(vm))
+ if ((!op->map.immediate && xe_vm_in_fault_mode(vm)) ||
+ op->map.is_cpu_addr_mirror)
break;
bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence,
- fence2);
+ fence2, op->map.invalidate_on_bind);
break;
case DRM_GPUVA_OP_REMAP:
- unbind_op_commit(vm, tile, pt_update_ops,
- gpuva_to_vma(op->base.remap.unmap->va), fence,
- fence2);
+ {
+ struct xe_vma *old = gpuva_to_vma(op->base.remap.unmap->va);
+
+ if (xe_vma_is_cpu_addr_mirror(old))
+ break;
+
+ unbind_op_commit(vm, tile, pt_update_ops, old, fence, fence2);
if (op->remap.prev)
bind_op_commit(vm, tile, pt_update_ops, op->remap.prev,
- fence, fence2);
+ fence, fence2, false);
if (op->remap.next)
bind_op_commit(vm, tile, pt_update_ops, op->remap.next,
- fence, fence2);
+ fence, fence2, false);
break;
+ }
case DRM_GPUVA_OP_UNMAP:
- unbind_op_commit(vm, tile, pt_update_ops,
- gpuva_to_vma(op->base.unmap.va), fence, fence2);
+ {
+ struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
+
+ if (!xe_vma_is_cpu_addr_mirror(vma))
+ unbind_op_commit(vm, tile, pt_update_ops, vma, fence,
+ fence2);
break;
+ }
case DRM_GPUVA_OP_PREFETCH:
- bind_op_commit(vm, tile, pt_update_ops,
- gpuva_to_vma(op->base.prefetch.va), fence, fence2);
+ {
+ struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
+
+ if (!xe_vma_is_cpu_addr_mirror(vma))
+ bind_op_commit(vm, tile, pt_update_ops, vma, fence,
+ fence2, false);
+ break;
+ }
+ case DRM_GPUVA_OP_DRIVER:
+ {
+ /* WRITE_ONCE pairs with READ_ONCE in xe_svm.c */
+
+ if (op->subop == XE_VMA_SUBOP_MAP_RANGE) {
+ WRITE_ONCE(op->map_range.range->tile_present,
+ op->map_range.range->tile_present |
+ BIT(tile->id));
+ WRITE_ONCE(op->map_range.range->tile_invalidated,
+ op->map_range.range->tile_invalidated &
+ ~BIT(tile->id));
+ } else if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) {
+ WRITE_ONCE(op->unmap_range.range->tile_present,
+ op->unmap_range.range->tile_present &
+ ~BIT(tile->id));
+ }
break;
+ }
default:
drm_warn(&vm->xe->drm, "NOT POSSIBLE");
}
@@ -1967,6 +2303,16 @@ static const struct xe_migrate_pt_update_ops userptr_migrate_ops = {
.pre_commit = xe_pt_userptr_pre_commit,
};
+#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM)
+static const struct xe_migrate_pt_update_ops svm_migrate_ops = {
+ .populate = xe_vm_populate_pgtable,
+ .clear = xe_migrate_clear_pgtable_callback,
+ .pre_commit = xe_pt_svm_pre_commit,
+};
+#else
+static const struct xe_migrate_pt_update_ops svm_migrate_ops;
+#endif
+
/**
* xe_pt_update_ops_run() - Run PT update operations
* @tile: Tile of PT update operations
@@ -1992,7 +2338,9 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
struct xe_vma_op *op;
int err = 0, i;
struct xe_migrate_pt_update update = {
- .ops = pt_update_ops->needs_userptr_lock ?
+ .ops = pt_update_ops->needs_svm_lock ?
+ &svm_migrate_ops :
+ pt_update_ops->needs_userptr_lock ?
&userptr_migrate_ops :
&migrate_ops,
.vops = vops,
@@ -2113,6 +2461,8 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
&ifence->base.base, &mfence->base.base);
}
+ if (pt_update_ops->needs_svm_lock)
+ xe_svm_notifier_unlock(vm);
if (pt_update_ops->needs_userptr_lock)
up_read(&vm->userptr.notifier_lock);
@@ -2131,6 +2481,7 @@ kill_vm_tile1:
return ERR_PTR(err);
}
+ALLOW_ERROR_INJECTION(xe_pt_update_ops_run, ERRNO);
/**
* xe_pt_update_ops_fini() - Finish PT update operations