diff options
| author | Thomas Hellström <thomas.hellstrom@linux.intel.com> | 2025-12-19 14:33:20 +0300 |
|---|---|---|
| committer | Thomas Hellström <thomas.hellstrom@linux.intel.com> | 2025-12-23 12:00:49 +0300 |
| commit | 0620837490e0401cb4e9965a1e0c462dbff1c97b (patch) | |
| tree | 7d48d67bdc3a5796b4ebc8d86346f735c1a3fde7 | |
| parent | ec265e1f1cfcce6b03167868cadebf2d831e52b5 (diff) | |
| download | linux-0620837490e0401cb4e9965a1e0c462dbff1c97b.tar.xz | |
drm/xe/svm: Serialize migration to device if racing
Introduce an rw-semaphore to serialize migration to device if
it's likely that migration races with another device migration
of the same CPU address space range.
This is a temporary fix to attempt to mitigate a livelock that
might happen if many devices try to migrate a range at the same
time, and it affects only devices using the xe driver.
A longer term fix is probably improvements in the core mm
migration layer.
Suggested-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20251219113320.183860-25-thomas.hellstrom@linux.intel.com
| -rw-r--r-- | drivers/gpu/drm/xe/xe_svm.c | 22 |
1 files changed, 20 insertions, 2 deletions
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 84ff99aa3e49..fa2ee2c08f31 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -1593,10 +1593,12 @@ struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *t int xe_svm_alloc_vram(struct xe_svm_range *range, const struct drm_gpusvm_ctx *ctx, struct drm_pagemap *dpagemap) { + static DECLARE_RWSEM(driver_migrate_lock); struct xe_vm *vm = range_to_vm(&range->base); enum drm_gpusvm_scan_result migration_state; struct xe_device *xe = vm->xe; int err, retries = 1; + bool write_locked = false; xe_assert(range_to_vm(&range->base)->xe, range->base.pages.flags.migrate_devmem); range_debug(range, "ALLOCATE VRAM"); @@ -1615,16 +1617,32 @@ int xe_svm_alloc_vram(struct xe_svm_range *range, const struct drm_gpusvm_ctx *c drm_dbg(&xe->drm, "Request migration to device memory on \"%s\".\n", dpagemap->drm->unique); + err = down_read_interruptible(&driver_migrate_lock); + if (err) + return err; do { err = drm_pagemap_populate_mm(dpagemap, xe_svm_range_start(range), xe_svm_range_end(range), range->base.gpusvm->mm, ctx->timeslice_ms); - if (err == -EBUSY && retries) - drm_gpusvm_range_evict(range->base.gpusvm, &range->base); + if (err == -EBUSY && retries) { + if (!write_locked) { + int lock_err; + up_read(&driver_migrate_lock); + lock_err = down_write_killable(&driver_migrate_lock); + if (lock_err) + return lock_err; + write_locked = true; + } + drm_gpusvm_range_evict(range->base.gpusvm, &range->base); + } } while (err == -EBUSY && retries--); + if (write_locked) + up_write(&driver_migrate_lock); + else + up_read(&driver_migrate_lock); return err; } |
