summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/xe/display/xe_display_bo.c2
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_gpu_commands.h4
-rw-r--r--drivers/gpu/drm/xe/regs/xe_engine_regs.h4
-rw-r--r--drivers/gpu/drm/xe/regs/xe_lrc_layout.h3
-rw-r--r--drivers/gpu/drm/xe/regs/xe_oa_regs.h3
-rw-r--r--drivers/gpu/drm/xe/tests/xe_migrate.c2
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c22
-rw-r--r--drivers/gpu/drm/xe/xe_bo.h88
-rw-r--r--drivers/gpu/drm/xe/xe_bo_types.h28
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h3
-rw-r--r--drivers/gpu/drm/xe/xe_device_wa_oob.rules2
-rw-r--r--drivers/gpu/drm/xe/xe_dma_buf.c80
-rw-r--r--drivers/gpu/drm/xe/xe_eu_stall.c40
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c14
-rw-r--r--drivers/gpu/drm/xe/xe_gt.h15
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c8
-rw-r--r--drivers/gpu/drm/xe/xe_gt_types.h7
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads.c5
-rw-r--r--drivers/gpu/drm/xe/xe_guc_capture.c6
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c69
-rw-r--r--drivers/gpu/drm/xe/xe_hw_error.c32
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c247
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.h12
-rw-r--r--drivers/gpu/drm/xe/xe_lrc_types.h11
-rw-r--r--drivers/gpu/drm/xe/xe_memirq.c32
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c75
-rw-r--r--drivers/gpu/drm/xe/xe_oa.c40
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c9
-rw-r--r--drivers/gpu/drm/xe/xe_reg_whitelist.c14
-rw-r--r--drivers/gpu/drm/xe/xe_res_cursor.h14
-rw-r--r--drivers/gpu/drm/xe/xe_ring_ops.c8
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_migration.c7
-rw-r--r--drivers/gpu/drm/xe/xe_survivability_mode.c14
-rw-r--r--drivers/gpu/drm/xe/xe_survivability_mode.h2
-rw-r--r--drivers/gpu/drm/xe/xe_tile_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_trace_lrc.h27
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c64
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h12
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_vram_mgr.c11
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c51
-rw-r--r--drivers/gpu/drm/xe/xe_vm_madvise.c162
-rw-r--r--drivers/gpu/drm/xe/xe_vm_madvise.h2
-rw-r--r--include/drm/intel/pciids.h6
43 files changed, 776 insertions, 483 deletions
diff --git a/drivers/gpu/drm/xe/display/xe_display_bo.c b/drivers/gpu/drm/xe/display/xe_display_bo.c
index dc0d78ff2d79..7fbac223b097 100644
--- a/drivers/gpu/drm/xe/display/xe_display_bo.c
+++ b/drivers/gpu/drm/xe/display/xe_display_bo.c
@@ -138,7 +138,7 @@ bool xe_display_bo_fbdev_prefer_stolen(struct xe_device *xe, unsigned int size)
* important and we should probably use that space with FBC or other
* features.
*/
- return stolen->size >= size * 2;
+ return stolen->size >= (size * 2) >> PAGE_SHIFT;
}
static struct drm_gem_object *xe_display_bo_fbdev_create(struct drm_device *drm, int size)
diff --git a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h
index 885fcf211e6d..18d0fde8c98f 100644
--- a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h
+++ b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h
@@ -20,7 +20,6 @@
#define XY_FAST_COLOR_BLT_CMD (2 << 29 | 0x44 << 22)
#define XY_FAST_COLOR_BLT_DEPTH_32 (2 << 19)
-#define XY_FAST_COLOR_BLT_DW 16
#define XY_FAST_COLOR_BLT_MOCS_MASK GENMASK(27, 22)
#define XE2_XY_FAST_COLOR_BLT_MOCS_INDEX_MASK GENMASK(27, 24)
#define XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31
@@ -31,14 +30,13 @@
#define XY_FAST_COPY_BLT_D1_DST_TILE4 REG_BIT(30)
#define XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK GENMASK(23, 20)
-#define MEM_COPY_CMD (2 << 29 | 0x5a << 22 | 0x8)
+#define MEM_COPY_CMD (2 << 29 | 0x5a << 22)
#define MEM_COPY_PAGE_COPY_MODE REG_BIT(19)
#define MEM_COPY_MATRIX_COPY REG_BIT(17)
#define MEM_COPY_SRC_MOCS_INDEX_MASK GENMASK(31, 28)
#define MEM_COPY_DST_MOCS_INDEX_MASK GENMASK(6, 3)
#define PVC_MEM_SET_CMD (2 << 29 | 0x5b << 22)
-#define PVC_MEM_SET_CMD_LEN_DW 7
#define PVC_MEM_SET_MATRIX REG_BIT(17)
#define PVC_MEM_SET_DATA_FIELD GENMASK(31, 24)
/* Bspec lists field as [6:0], but index alone is from [6:1] */
diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index c4c879a9e555..94033982e694 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -170,6 +170,10 @@
#define GFX_DISABLE_LEGACY_MODE REG_BIT(3)
#define RING_CSMQDEBUG(base) XE_REG((base) + 0x2b0)
+#define CURRENT_ACTIVE_QUEUE_ID_MASK REG_GENMASK(7, 0)
+
+#define RING_QUEUE_TIMESTAMP(base) XE_REG((base) + 0x4c0)
+#define RING_QUEUE_TIMESTAMP_UDW(base) XE_REG((base) + 0x4c0 + 4)
#define RING_TIMESTAMP(base) XE_REG((base) + 0x358)
diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
index b5eff383902c..4ab86fc369fd 100644
--- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
+++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
@@ -34,6 +34,9 @@
#define CTX_CS_INT_VEC_REG 0x5a
#define CTX_CS_INT_VEC_DATA (CTX_CS_INT_VEC_REG + 1)
+#define CTX_QUEUE_TIMESTAMP (0xd0 + 1)
+#define CTX_QUEUE_TIMESTAMP_UDW (0xd2 + 1)
+
#define INDIRECT_CTX_RING_HEAD (0x02 + 1)
#define INDIRECT_CTX_RING_TAIL (0x04 + 1)
#define INDIRECT_CTX_RING_START (0x06 + 1)
diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h
index 04a729e610aa..aa66af7e99fe 100644
--- a/drivers/gpu/drm/xe/regs/xe_oa_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h
@@ -6,6 +6,9 @@
#ifndef __XE_OA_REGS__
#define __XE_OA_REGS__
+#define SYS_MEM_LAT_MEASURE XE_REG(0x145194)
+#define SYS_MEM_LAT_MEASURE_EN REG_BIT(31)
+
#define RPM_CONFIG1 XE_REG(0xd04)
#define GT_NOA_ENABLE REG_BIT(9)
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 50a97705e0ac..3c1be809be82 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -421,7 +421,7 @@ static struct dma_fence *blt_copy(struct xe_tile *tile,
avail_pts, avail_pts);
/* Add copy commands size here */
- batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) +
+ batch_size += ((copy_only_ccs) ? 0 : emit_copy_cmd_len(xe)) +
((xe_device_has_flat_ccs(xe) && copy_only_ccs) ? EMIT_COPY_CCS_DW : 0);
bb = xe_bb_new(gt, batch_size, xe->info.has_usm);
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 5ce60d161e09..4c80bac67622 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -586,11 +586,17 @@ static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
kfree(tt);
}
-static bool xe_ttm_resource_visible(struct ttm_resource *mem)
+static bool xe_ttm_resource_visible(struct xe_device *xe, struct ttm_resource *mem)
{
- struct xe_ttm_vram_mgr_resource *vres =
- to_xe_ttm_vram_mgr_resource(mem);
+ struct xe_ttm_vram_mgr_resource *vres;
+ if (mem->mem_type == XE_PL_STOLEN) {
+ struct xe_ttm_stolen_mgr *mgr = xe->mem.stolen_mgr;
+
+ return mgr->io_base && !xe_ttm_stolen_cpu_access_needs_ggtt(xe);
+ }
+
+ vres = to_xe_ttm_vram_mgr_resource(mem);
return vres->used_visible_size == mem->size;
}
@@ -608,7 +614,7 @@ bool xe_bo_is_visible_vram(struct xe_bo *bo)
if (drm_WARN_ON(bo->ttm.base.dev, !xe_bo_is_vram(bo)))
return false;
- return xe_ttm_resource_visible(bo->ttm.resource);
+ return xe_ttm_resource_visible(xe_bo_device(bo), bo->ttm.resource);
}
static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
@@ -624,7 +630,7 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
case XE_PL_VRAM1: {
struct xe_vram_region *vram = xe_map_resource_to_region(mem);
- if (!xe_ttm_resource_visible(mem))
+ if (!xe_ttm_resource_visible(xe, mem))
return -EINVAL;
mem->bus.offset = mem->start << PAGE_SHIFT;
@@ -884,10 +890,10 @@ void xe_bo_set_purgeable_state(struct xe_bo *bo,
new_state == XE_MADV_PURGEABLE_PURGED);
/* Once purged, always purged - cannot transition out */
- xe_assert(xe, !(bo->madv_purgeable == XE_MADV_PURGEABLE_PURGED &&
+ xe_assert(xe, !(bo->purgeable.state == XE_MADV_PURGEABLE_PURGED &&
new_state != XE_MADV_PURGEABLE_PURGED));
- bo->madv_purgeable = new_state;
+ bo->purgeable.state = new_state;
xe_bo_set_purgeable_shrinker(bo, new_state);
}
@@ -2355,7 +2361,7 @@ struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo,
INIT_LIST_HEAD(&bo->vram_userfault_link);
/* Initialize purge advisory state */
- bo->madv_purgeable = XE_MADV_PURGEABLE_WILLNEED;
+ bo->purgeable.state = XE_MADV_PURGEABLE_WILLNEED;
drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 68dea7d25a6b..6340317f7d2e 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -251,7 +251,7 @@ static inline bool xe_bo_is_protected(const struct xe_bo *bo)
static inline bool xe_bo_is_purged(struct xe_bo *bo)
{
xe_bo_assert_held(bo);
- return bo->madv_purgeable == XE_MADV_PURGEABLE_PURGED;
+ return bo->purgeable.state == XE_MADV_PURGEABLE_PURGED;
}
/**
@@ -268,11 +268,95 @@ static inline bool xe_bo_is_purged(struct xe_bo *bo)
static inline bool xe_bo_madv_is_dontneed(struct xe_bo *bo)
{
xe_bo_assert_held(bo);
- return bo->madv_purgeable == XE_MADV_PURGEABLE_DONTNEED;
+ return bo->purgeable.state == XE_MADV_PURGEABLE_DONTNEED;
}
void xe_bo_set_purgeable_state(struct xe_bo *bo, enum xe_madv_purgeable_state new_state);
+/**
+ * xe_bo_willneed_get_locked() - Acquire a WILLNEED holder on a BO
+ * @bo: Buffer object
+ *
+ * Increments willneed_count and, on a 0->1 transition, promotes the BO
+ * from DONTNEED to WILLNEED. PURGED is terminal and is never modified.
+ *
+ * Caller must hold the BO's dma-resv lock.
+ */
+static inline void xe_bo_willneed_get_locked(struct xe_bo *bo)
+{
+ xe_bo_assert_held(bo);
+
+ /* Imported BOs are owned externally; do not track purgeability. */
+ if (drm_gem_is_imported(&bo->ttm.base))
+ return;
+
+ if (bo->purgeable.willneed_count++ == 0 && xe_bo_madv_is_dontneed(bo))
+ xe_bo_set_purgeable_state(bo, XE_MADV_PURGEABLE_WILLNEED);
+}
+
+/**
+ * xe_bo_willneed_put_locked() - Release a WILLNEED holder on a BO
+ * @bo: Buffer object
+ *
+ * Decrements willneed_count and, on a 1->0 transition, marks the BO
+ * DONTNEED only if it still has VMAs (implying all active VMAs are
+ * DONTNEED). If the last VMA is being removed, preserve the current BO
+ * state to match the previous VMA-walk semantics.
+ *
+ * PURGED is terminal and the BO state is never modified.
+ *
+ * Caller must hold the BO's dma-resv lock.
+ */
+static inline void xe_bo_willneed_put_locked(struct xe_bo *bo)
+{
+ xe_bo_assert_held(bo);
+
+ if (drm_gem_is_imported(&bo->ttm.base))
+ return;
+
+ xe_assert(xe_bo_device(bo), bo->purgeable.willneed_count > 0);
+ if (--bo->purgeable.willneed_count == 0 && bo->purgeable.vma_count > 0 &&
+ !xe_bo_is_purged(bo))
+ xe_bo_set_purgeable_state(bo, XE_MADV_PURGEABLE_DONTNEED);
+}
+
+/**
+ * xe_bo_vma_count_inc_locked() - Account a new VMA on a BO
+ * @bo: Buffer object
+ *
+ * Increments vma_count.
+ *
+ * Caller must hold the BO's dma-resv lock.
+ */
+static inline void xe_bo_vma_count_inc_locked(struct xe_bo *bo)
+{
+ xe_bo_assert_held(bo);
+
+ if (drm_gem_is_imported(&bo->ttm.base))
+ return;
+
+ bo->purgeable.vma_count++;
+}
+
+/**
+ * xe_bo_vma_count_dec_locked() - Account a VMA removal on a BO
+ * @bo: Buffer object
+ *
+ * Decrements vma_count.
+ *
+ * Caller must hold the BO's dma-resv lock.
+ */
+static inline void xe_bo_vma_count_dec_locked(struct xe_bo *bo)
+{
+ xe_bo_assert_held(bo);
+
+ if (drm_gem_is_imported(&bo->ttm.base))
+ return;
+
+ xe_assert(xe_bo_device(bo), bo->purgeable.vma_count > 0);
+ bo->purgeable.vma_count--;
+}
+
static inline void xe_bo_unpin_map_no_vm(struct xe_bo *bo)
{
if (likely(bo)) {
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index 9c199badd9b2..fcc63ae3f455 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -111,10 +111,32 @@ struct xe_bo {
u64 min_align;
/**
- * @madv_purgeable: user space advise on BO purgeability, protected
- * by BO's dma-resv lock.
+ * @purgeable: Purgeability state and accounting.
+ *
+ * All fields are protected by the BO's dma-resv lock.
*/
- u32 madv_purgeable;
+ struct {
+ /**
+ * @purgeable.state: BO purgeability state
+ * (WILLNEED/DONTNEED/PURGED).
+ */
+ u32 state;
+
+ /**
+ * @purgeable.vma_count: Number of VMAs currently mapping this BO.
+ */
+ u32 vma_count;
+
+ /**
+ * @purgeable.willneed_count: Number of active WILLNEED holders.
+ *
+ * Counts WILLNEED VMAs plus active dma-buf exports for
+ * non-imported BOs. The BO flips to DONTNEED on a 1->0
+ * transition only when VMAs still exist; if the last VMA is
+ * removed, the previous BO state is preserved.
+ */
+ u32 willneed_count;
+ } purgeable;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 89437de3001a..32dd2ffbc796 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -42,6 +42,7 @@ struct xe_ggtt;
struct xe_i2c;
struct xe_pat_ops;
struct xe_pxp;
+struct xe_ttm_stolen_mgr;
struct xe_vram_region;
/**
@@ -276,6 +277,8 @@ struct xe_device {
struct ttm_resource_manager sys_mgr;
/** @mem.shrinker: system memory shrinker. */
struct xe_shrinker *shrinker;
+ /** @mem.stolen_mgr: stolen memory manager. */
+ struct xe_ttm_stolen_mgr *stolen_mgr;
} mem;
/** @sriov: device level virtualization data */
diff --git a/drivers/gpu/drm/xe/xe_device_wa_oob.rules b/drivers/gpu/drm/xe/xe_device_wa_oob.rules
index 92371c490529..d8dc41851425 100644
--- a/drivers/gpu/drm/xe/xe_device_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_device_wa_oob.rules
@@ -5,3 +5,5 @@
14022085890 SUBPLATFORM(BATTLEMAGE, G21)
14026539277 PLATFORM(NOVALAKE_P), PLATFORM_STEP(A0, B0)
14026633728 PLATFORM(CRESCENTISLAND)
+14026746987 PLATFORM(CRESCENTISLAND)
+14026779378 PLATFORM(CRESCENTISLAND)
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
index b9828da15897..8a920e58245c 100644
--- a/drivers/gpu/drm/xe/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -193,6 +193,18 @@ static int xe_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
return 0;
}
+static void xe_dma_buf_release(struct dma_buf *dmabuf)
+{
+ struct drm_gem_object *obj = dmabuf->priv;
+ struct xe_bo *bo = gem_to_xe_bo(obj);
+
+ xe_bo_lock(bo, false);
+ xe_bo_willneed_put_locked(bo);
+ xe_bo_unlock(bo);
+
+ drm_gem_dmabuf_release(dmabuf);
+}
+
static const struct dma_buf_ops xe_dmabuf_ops = {
.attach = xe_dma_buf_attach,
.detach = xe_dma_buf_detach,
@@ -200,7 +212,7 @@ static const struct dma_buf_ops xe_dmabuf_ops = {
.unpin = xe_dma_buf_unpin,
.map_dma_buf = xe_dma_buf_map,
.unmap_dma_buf = xe_dma_buf_unmap,
- .release = drm_gem_dmabuf_release,
+ .release = xe_dma_buf_release,
.begin_cpu_access = xe_dma_buf_begin_cpu_access,
.mmap = drm_gem_dmabuf_mmap,
.vmap = drm_gem_dmabuf_vmap,
@@ -241,33 +253,33 @@ struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags)
ret = -EINVAL;
goto out_unlock;
}
+
+ xe_bo_willneed_get_locked(bo);
xe_bo_unlock(bo);
ret = ttm_bo_setup_export(&bo->ttm, &ctx);
if (ret)
- return ERR_PTR(ret);
+ goto out_put;
buf = drm_gem_prime_export(obj, flags);
- if (!IS_ERR(buf))
- buf->ops = &xe_dmabuf_ops;
+ if (IS_ERR(buf)) {
+ ret = PTR_ERR(buf);
+ goto out_put;
+ }
+ buf->ops = &xe_dmabuf_ops;
return buf;
+out_put:
+ xe_bo_lock(bo, false);
+ xe_bo_willneed_put_locked(bo);
out_unlock:
xe_bo_unlock(bo);
return ERR_PTR(ret);
}
-/*
- * Takes ownership of @storage: on success it is transferred to the returned
- * drm_gem_object; on failure it is freed before returning the error.
- * This matches the contract of xe_bo_init_locked() which frees @storage on
- * its error paths, so callers need not (and must not) free @storage after
- * this call.
- */
static struct drm_gem_object *
-xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
- struct dma_buf *dma_buf)
+xe_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
{
struct dma_resv *resv = dma_buf->resv;
struct xe_device *xe = to_xe_device(dev);
@@ -278,10 +290,8 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
int ret = 0;
dummy_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
- if (!dummy_obj) {
- xe_bo_free(storage);
+ if (!dummy_obj)
return ERR_PTR(-ENOMEM);
- }
dummy_obj->resv = resv;
xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, ret) {
@@ -290,8 +300,7 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
if (ret)
break;
- /* xe_bo_init_locked() frees storage on error */
- bo = xe_bo_init_locked(xe, storage, NULL, resv, NULL, dma_buf->size,
+ bo = xe_bo_init_locked(xe, NULL, NULL, resv, NULL, dma_buf->size,
0, /* Will require 1way or 2way for vm_bind */
ttm_bo_type_sg, XE_BO_FLAG_SYSTEM, &exec);
drm_exec_retry_on_contention(&exec);
@@ -342,7 +351,6 @@ struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev,
const struct dma_buf_attach_ops *attach_ops;
struct dma_buf_attachment *attach;
struct drm_gem_object *obj;
- struct xe_bo *bo;
if (dma_buf->ops == &xe_dmabuf_ops) {
obj = dma_buf->priv;
@@ -358,13 +366,15 @@ struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev,
}
/*
- * Don't publish the bo until we have a valid attachment, and a
- * valid attachment needs the bo address. So pre-create a bo before
- * creating the attachment and publish.
+ * This needs to happen before the attach, since it will create a new
+ * attachment for this, and add it to the list of attachments, at which
+ * point it is globally visible, and at any point the export side can
+ * call into on invalidate_mappings callback, which require a working
+ * object.
*/
- bo = xe_bo_alloc();
- if (IS_ERR(bo))
- return ERR_CAST(bo);
+ obj = xe_dma_buf_create_obj(dev, dma_buf);
+ if (IS_ERR(obj))
+ return obj;
attach_ops = &xe_dma_buf_attach_ops;
#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
@@ -372,29 +382,15 @@ struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev,
attach_ops = test->attach_ops;
#endif
- attach = dma_buf_dynamic_attach(dma_buf, dev->dev, attach_ops, &bo->ttm.base);
+ attach = dma_buf_dynamic_attach(dma_buf, dev->dev, attach_ops, obj);
if (IS_ERR(attach)) {
- obj = ERR_CAST(attach);
- goto out_err;
+ xe_bo_put(gem_to_xe_bo(obj));
+ return ERR_CAST(attach);
}
- /*
- * xe_dma_buf_init_obj() takes ownership of bo on both success
- * and failure, so we must not touch bo after this call.
- */
- obj = xe_dma_buf_init_obj(dev, bo, dma_buf);
- if (IS_ERR(obj)) {
- dma_buf_detach(dma_buf, attach);
- return obj;
- }
get_dma_buf(dma_buf);
obj->import_attach = attach;
return obj;
-
-out_err:
- xe_bo_free(bo);
-
- return obj;
}
#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c
index dddcdd0bb7a3..297be3c42b20 100644
--- a/drivers/gpu/drm/xe/xe_eu_stall.c
+++ b/drivers/gpu/drm/xe/xe_eu_stall.c
@@ -44,6 +44,7 @@ struct per_xecore_buf {
struct xe_eu_stall_data_stream {
bool pollin;
bool enabled;
+ bool reset_detected;
int wait_num_reports;
int sampling_rate_mult;
wait_queue_head_t poll_wq;
@@ -428,9 +429,20 @@ static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream)
set_bit(xecore, stream->data_drop.mask);
xecore_buf->write = write_ptr;
}
+ /* If a GT or engine reset happens during EU stall sampling,
+ * all EU stall registers get reset to 0 and the cached values of
+ * the EU stall data buffers' read pointers are out of sync with
+ * the register values. This causes invalid data to be returned
+ * from read(). To prevent this, check the value of a EU stall base
+ * register. If it is zero, there has been a reset.
+ */
+ if (unlikely(!xe_gt_mcr_unicast_read_any(gt, XEHPC_EUSTALL_BASE)))
+ stream->reset_detected = true;
+
+ stream->pollin = min_data_present || stream->reset_detected;
mutex_unlock(&stream->xecore_buf_lock);
- return min_data_present;
+ return stream->pollin;
}
static void clear_dropped_eviction_line_bit(struct xe_gt *gt, u16 group, u16 instance)
@@ -544,6 +556,15 @@ static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *st
int ret = 0;
mutex_lock(&stream->xecore_buf_lock);
+ /* If EU stall registers got reset due to a GT/engine reset,
+ * continuing with the read() will return invalid data to
+ * the user space. Just return -ENODEV instead.
+ */
+ if (unlikely(stream->reset_detected)) {
+ xe_gt_dbg(gt, "EU stall base register has been reset\n");
+ mutex_unlock(&stream->xecore_buf_lock);
+ return -ENODEV;
+ }
if (bitmap_weight(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS)) {
if (!stream->data_drop.reported_to_user) {
stream->data_drop.reported_to_user = true;
@@ -554,7 +575,6 @@ static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *st
}
stream->data_drop.reported_to_user = false;
}
-
for_each_dss_steering(xecore, gt, group, instance) {
ret = xe_eu_stall_data_buf_read(stream, buf, count, &total_size,
gt, group, instance, xecore);
@@ -609,7 +629,8 @@ static ssize_t xe_eu_stall_stream_read(struct file *file, char __user *buf,
* We don't want to block the next read() when there is data in the buffer
* now, but couldn't be accommodated in the small user buffer.
*/
- stream->pollin = false;
+ if (!stream->reset_detected)
+ stream->pollin = false;
return ret;
}
@@ -692,6 +713,7 @@ static int xe_eu_stall_stream_enable(struct xe_eu_stall_data_stream *stream)
xecore_buf->write = write_ptr;
xecore_buf->read = write_ptr;
}
+ stream->reset_detected = false;
stream->data_drop.reported_to_user = false;
bitmap_zero(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS);
@@ -717,13 +739,13 @@ static void eu_stall_data_buf_poll_work_fn(struct work_struct *work)
container_of(work, typeof(*stream), buf_poll_work.work);
struct xe_gt *gt = stream->gt;
- if (eu_stall_data_buf_poll(stream)) {
- stream->pollin = true;
+ if (eu_stall_data_buf_poll(stream))
wake_up(&stream->poll_wq);
- }
- queue_delayed_work(gt->eu_stall->buf_ptr_poll_wq,
- &stream->buf_poll_work,
- msecs_to_jiffies(POLL_PERIOD_MS));
+
+ if (!stream->reset_detected)
+ queue_delayed_work(gt->eu_stall->buf_ptr_poll_wq,
+ &stream->buf_poll_work,
+ msecs_to_jiffies(POLL_PERIOD_MS));
}
static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream,
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 071b8c41df43..1b5ca3ce578a 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -275,8 +275,12 @@ static void xe_exec_queue_set_lrc(struct xe_exec_queue *q, struct xe_lrc *lrc, u
{
xe_assert(gt_to_xe(q->gt), idx < q->width);
- scoped_guard(spinlock, &q->lrc_lookup_lock)
+ scoped_guard(spinlock, &q->lrc_lookup_lock) {
q->lrc[idx] = lrc;
+ if (xe_exec_queue_is_multi_queue(q))
+ q->lrc[idx]->multi_queue.primary_lrc =
+ q->multi_queue.group->primary->lrc[0];
+ }
}
/**
@@ -852,11 +856,6 @@ static int xe_exec_queue_group_init(struct xe_device *xe, struct xe_exec_queue *
return 0;
}
-static inline bool xe_exec_queue_supports_multi_queue(struct xe_exec_queue *q)
-{
- return q->gt->info.multi_queue_engine_class_mask & BIT(q->class);
-}
-
static int xe_exec_queue_group_validate(struct xe_device *xe, struct xe_exec_queue *q,
u32 primary_id)
{
@@ -912,6 +911,7 @@ static int xe_exec_queue_group_add(struct xe_device *xe, struct xe_exec_queue *q
}
q->multi_queue.pos = pos;
+ q->lrc[0]->multi_queue.pos = pos;
return 0;
}
@@ -931,7 +931,7 @@ static void xe_exec_queue_group_delete(struct xe_device *xe, struct xe_exec_queu
static int exec_queue_set_multi_group(struct xe_device *xe, struct xe_exec_queue *q,
u64 value)
{
- if (XE_IOCTL_DBG(xe, !xe_exec_queue_supports_multi_queue(q)))
+ if (XE_IOCTL_DBG(xe, !xe_gt_supports_multi_queue(q->gt, q->class)))
return -ENODEV;
if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe)))
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index de7e47763411..4150aa594f05 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -155,4 +155,19 @@ static inline bool xe_gt_recovery_pending(struct xe_gt *gt)
xe_gt_sriov_vf_recovery_pending(gt);
}
+/**
+ * xe_gt_supports_multi_queue() - Check if gt supports multi queue for the
+ * specified engine class.
+ *
+ * @gt: the GT object
+ * @class: hwe class type
+ *
+ * Return: true if the hw engine class supports multi queue, else false
+ */
+static inline bool xe_gt_supports_multi_queue(const struct xe_gt *gt,
+ enum xe_engine_class class)
+{
+ return gt->info.multi_queue_engine_class_mask & BIT(class);
+}
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
index 87a164efcc33..01fe03b9efe8 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
@@ -385,10 +385,10 @@ static int pf_migration_mmio_save(struct xe_gt *gt, unsigned int vfid, void *buf
if (xe_gt_is_media_type(gt))
for (n = 0; n < MED_VF_SW_FLAG_COUNT; n++)
- regs[n] = xe_mmio_read32(&gt->mmio, MED_VF_SW_FLAG(n));
+ regs[n] = xe_mmio_read32(&mmio, MED_VF_SW_FLAG(n));
else
for (n = 0; n < VF_SW_FLAG_COUNT; n++)
- regs[n] = xe_mmio_read32(&gt->mmio, VF_SW_FLAG(n));
+ regs[n] = xe_mmio_read32(&mmio, VF_SW_FLAG(n));
return 0;
}
@@ -407,10 +407,10 @@ static int pf_migration_mmio_restore(struct xe_gt *gt, unsigned int vfid,
if (xe_gt_is_media_type(gt))
for (n = 0; n < MED_VF_SW_FLAG_COUNT; n++)
- xe_mmio_write32(&gt->mmio, MED_VF_SW_FLAG(n), regs[n]);
+ xe_mmio_write32(&mmio, MED_VF_SW_FLAG(n), regs[n]);
else
for (n = 0; n < VF_SW_FLAG_COUNT; n++)
- xe_mmio_write32(&gt->mmio, VF_SW_FLAG(n), regs[n]);
+ xe_mmio_write32(&mmio, VF_SW_FLAG(n), regs[n]);
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 7351aadd238e..e5588c88800a 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -145,6 +145,13 @@ struct xe_gt {
/** @info.has_indirect_ring_state: GT has indirect ring state support */
u8 has_indirect_ring_state:1;
/**
+ * @info.has_xe2_blt_instructions: GT supports Xe2-style MEM_SET
+ * and MEM_COPY blitter functionality. Note that despite the
+ * name, some Xe1 platforms may also support this "Xe2-style"
+ * feature.
+ */
+ u8 has_xe2_blt_instructions:1;
+ /**
* @info.num_geometry_xecore_fuse_regs: Number of 32b-bit fuse
* registers the geometry XeCore mask spans.
*/
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index ce651da6f318..b9bca6084a4f 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -515,12 +515,9 @@ static void guc_golden_lrc_init(struct xe_guc_ads *ads)
* that starts after the execlists LRC registers. This is
* required to allow the GuC to restore just the engine state
* when a watchdog reset occurs.
- * We calculate the engine state size by removing the size of
- * what comes before it in the context image (which is identical
- * on all engines).
*/
ads_blob_write(ads, ads.eng_state_size[guc_class],
- real_size - xe_lrc_skip_size(xe));
+ xe_lrc_engine_state_size(gt, class));
ads_blob_write(ads, ads.golden_context_lrca[guc_class],
addr_ggtt);
diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c
index bc49e40165a3..21f7caf9ea08 100644
--- a/drivers/gpu/drm/xe/xe_guc_capture.c
+++ b/drivers/gpu/drm/xe/xe_guc_capture.c
@@ -1841,12 +1841,6 @@ void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm
str_yes_no(snapshot->kernel_reserved));
for (type = GUC_STATE_CAPTURE_TYPE_GLOBAL; type < GUC_STATE_CAPTURE_TYPE_MAX; type++) {
- /*
- * FIXME: During devcoredump print we should avoid accessing the
- * driver pointers for gt or engine. Printing should be done only
- * using the snapshot captured. Here we are accessing the gt
- * pointer. It should be fixed.
- */
list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF, type,
capture_class, false);
snapshot_print_by_list_order(snapshot, p, type, list);
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index b1222b42174c..4171eff4e8ad 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -852,10 +852,27 @@ static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc,
xe_guc_ct_send(&guc->ct, action, len, G2H_LEN_DW_MULTI_QUEUE_CONTEXT, 1);
}
-static void __register_exec_queue_group(struct xe_guc *guc,
- struct xe_exec_queue *q,
+static void guc_exec_queue_send_cgp_sync(struct xe_exec_queue *q)
+{
+#define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2)
+ struct xe_guc *guc = exec_queue_to_guc(q);
+ struct xe_exec_queue_group *group = q->multi_queue.group;
+ u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE];
+ int len = 0;
+
+ action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC;
+ action[len++] = group->primary->guc->id;
+
+ xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE);
+#undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE
+
+ xe_guc_exec_queue_group_cgp_sync(guc, q, action, len);
+}
+
+static void __register_exec_queue_group(struct xe_exec_queue *q,
struct guc_ctxt_registration_info *info)
{
+ struct xe_guc *guc = exec_queue_to_guc(q);
#define MAX_MULTI_QUEUE_REG_SIZE (8)
u32 action[MAX_MULTI_QUEUE_REG_SIZE];
int len = 0;
@@ -880,29 +897,6 @@ static void __register_exec_queue_group(struct xe_guc *guc,
xe_guc_exec_queue_group_cgp_sync(guc, q, action, len);
}
-static void xe_guc_exec_queue_group_add(struct xe_guc *guc,
- struct xe_exec_queue *q)
-{
-#define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2)
- u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE];
- int len = 0;
-
- xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_multi_queue_secondary(q));
-
- action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC;
- action[len++] = q->multi_queue.group->primary->guc->id;
-
- xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE);
-#undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE
-
- /*
- * The above XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC do expect a
- * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response
- * from guc.
- */
- xe_guc_exec_queue_group_cgp_sync(guc, q, action, len);
-}
-
static void __register_mlrc_exec_queue(struct xe_guc *guc,
struct xe_exec_queue *q,
struct guc_ctxt_registration_info *info)
@@ -1028,7 +1022,7 @@ static void register_exec_queue(struct xe_exec_queue *q, int ctx_type)
set_exec_queue_registered(q);
trace_xe_exec_queue_register(q);
if (xe_exec_queue_is_multi_queue_primary(q))
- __register_exec_queue_group(guc, q, &info);
+ __register_exec_queue_group(q, &info);
else if (xe_exec_queue_is_parallel(q))
__register_mlrc_exec_queue(guc, q, &info);
else if (!xe_exec_queue_is_multi_queue_secondary(q))
@@ -1038,7 +1032,7 @@ static void register_exec_queue(struct xe_exec_queue *q, int ctx_type)
init_policies(guc, q);
if (xe_exec_queue_is_multi_queue_secondary(q))
- xe_guc_exec_queue_group_add(guc, q);
+ guc_exec_queue_send_cgp_sync(q);
}
static u32 wq_space_until_wrap(struct xe_exec_queue *q)
@@ -1216,10 +1210,8 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job)
if (xe_exec_queue_is_multi_queue_secondary(q)) {
struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
- if (exec_queue_killed_or_banned_or_wedged(primary)) {
- killed_or_banned_or_wedged = true;
+ if (exec_queue_killed_or_banned_or_wedged(primary))
goto run_job_out;
- }
if (!exec_queue_registered(primary))
register_exec_queue(primary, GUC_CONTEXT_NORMAL);
@@ -1889,21 +1881,8 @@ static void __guc_exec_queue_process_msg_set_multi_queue_priority(struct xe_sche
{
struct xe_exec_queue *q = msg->private_data;
- if (guc_exec_queue_allowed_to_change_state(q)) {
-#define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2)
- struct xe_guc *guc = exec_queue_to_guc(q);
- struct xe_exec_queue_group *group = q->multi_queue.group;
- u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE];
- int len = 0;
-
- action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC;
- action[len++] = group->primary->guc->id;
-
- xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE);
-#undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE
-
- xe_guc_exec_queue_group_cgp_sync(guc, q, action, len);
- }
+ if (guc_exec_queue_allowed_to_change_state(q))
+ guc_exec_queue_send_cgp_sync(q);
kfree(msg);
}
diff --git a/drivers/gpu/drm/xe/xe_hw_error.c b/drivers/gpu/drm/xe/xe_hw_error.c
index 2a31b430570e..5135e8e4093f 100644
--- a/drivers/gpu/drm/xe/xe_hw_error.c
+++ b/drivers/gpu/drm/xe/xe_hw_error.c
@@ -36,11 +36,6 @@ static const char * const hec_uncorrected_fw_errors[] = {
"Data Corruption"
};
-static const unsigned long xe_hw_error_map[] = {
- [XE_GT_ERROR] = DRM_XE_RAS_ERR_COMP_CORE_COMPUTE,
- [XE_SOC_ERROR] = DRM_XE_RAS_ERR_COMP_SOC_INTERNAL,
-};
-
enum gt_vector_regs {
ERR_STAT_GT_VECTOR0 = 0,
ERR_STAT_GT_VECTOR1,
@@ -65,6 +60,18 @@ static enum drm_xe_ras_error_severity hw_err_to_severity(const enum hardware_err
return DRM_XE_RAS_ERR_SEV_UNCORRECTABLE;
}
+static inline u32 err_src_to_id(u32 err_bit)
+{
+ switch (err_bit) {
+ case XE_GT_ERROR:
+ return DRM_XE_RAS_ERR_COMP_CORE_COMPUTE;
+ case XE_SOC_ERROR:
+ return DRM_XE_RAS_ERR_COMP_SOC_INTERNAL;
+ default:
+ return 0;
+ }
+}
+
static const char * const pvc_master_global_err_reg[] = {
[0 ... 1] = "Undefined",
[2] = "HBM SS0: Channel0",
@@ -169,11 +176,8 @@ static void csc_hw_error_work(struct work_struct *work)
{
struct xe_tile *tile = container_of(work, typeof(*tile), csc_hw_error_work);
struct xe_device *xe = tile_to_xe(tile);
- int ret;
- ret = xe_survivability_mode_runtime_enable(xe);
- if (ret)
- drm_err(&xe->drm, "Failed to enable runtime survivability mode\n");
+ xe_survivability_mode_runtime_enable(xe);
}
static void csc_hw_error_handler(struct xe_tile *tile, const enum hardware_error hw_err)
@@ -459,14 +463,8 @@ static void hw_error_source_handler(struct xe_tile *tile, const enum hardware_er
const char *name;
u32 error_id;
- /* Check error bit is within bounds */
- if (err_bit >= ARRAY_SIZE(xe_hw_error_map))
- break;
-
- error_id = xe_hw_error_map[err_bit];
-
- /* Check error component is within max */
- if (!error_id || error_id >= DRM_XE_RAS_ERR_COMP_MAX)
+ error_id = err_src_to_id(err_bit);
+ if (!error_id)
continue;
name = info[error_id].name;
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 9db914584347..a4292a11391d 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -21,8 +21,10 @@
#include "xe_configfs.h"
#include "xe_device.h"
#include "xe_drm_client.h"
+#include "xe_exec_queue.h"
#include "xe_exec_queue_types.h"
#include "xe_gt.h"
+#include "xe_gt_clock.h"
#include "xe_gt_printk.h"
#include "xe_hw_fence.h"
#include "xe_map.h"
@@ -727,9 +729,16 @@ size_t xe_lrc_reg_size(struct xe_device *xe)
return 80 * sizeof(u32);
}
-size_t xe_lrc_skip_size(struct xe_device *xe)
+/**
+ * xe_lrc_engine_state_size() - Get size of the engine state within LRC
+ * @gt: the &xe_gt struct instance
+ * @class: Hardware engine class
+ *
+ * Returns: Size of the engine state
+ */
+size_t xe_lrc_engine_state_size(struct xe_gt *gt, enum xe_engine_class class)
{
- return LRC_PPHWSP_SIZE + xe_lrc_reg_size(xe);
+ return xe_gt_lrc_hang_replay_size(gt, class) - xe_lrc_reg_size(gt_to_xe(gt));
}
static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
@@ -769,6 +778,16 @@ static u32 __xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc *lrc)
return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP_UDW * sizeof(u32);
}
+static u32 __xe_lrc_queue_timestamp_offset(struct xe_lrc *lrc)
+{
+ return __xe_lrc_regs_offset(lrc) + CTX_QUEUE_TIMESTAMP * sizeof(u32);
+}
+
+static u32 __xe_lrc_queue_timestamp_udw_offset(struct xe_lrc *lrc)
+{
+ return __xe_lrc_regs_offset(lrc) + CTX_QUEUE_TIMESTAMP_UDW * sizeof(u32);
+}
+
static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
{
u32 offset = xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE -
@@ -818,6 +837,8 @@ DECL_MAP_ADDR_HELPERS(ctx_timestamp_udw, lrc->bo)
DECL_MAP_ADDR_HELPERS(parallel, lrc->bo)
DECL_MAP_ADDR_HELPERS(indirect_ring, lrc->bo)
DECL_MAP_ADDR_HELPERS(engine_id, lrc->bo)
+DECL_MAP_ADDR_HELPERS(queue_timestamp, lrc->bo)
+DECL_MAP_ADDR_HELPERS(queue_timestamp_udw, lrc->bo)
#undef DECL_MAP_ADDR_HELPERS
@@ -867,6 +888,29 @@ static u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc)
}
/**
+ * xe_lrc_queue_timestamp() - Read queue timestamp value
+ * @lrc: Pointer to the lrc.
+ *
+ * Returns: queue timestamp value
+ */
+static u64 xe_lrc_queue_timestamp(struct xe_lrc *lrc)
+{
+ struct xe_device *xe = lrc_to_xe(lrc);
+ struct iosys_map map;
+ u32 ldw, udw = 0;
+
+ xe_assert(xe, xe_lrc_is_multi_queue(lrc));
+
+ map = __xe_lrc_queue_timestamp_map(lrc);
+ ldw = xe_map_read32(xe, &map);
+
+ map = __xe_lrc_queue_timestamp_udw_map(lrc);
+ udw = xe_map_read32(xe, &map);
+
+ return (u64)udw << 32 | ldw;
+}
+
+/**
* xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address
* @lrc: Pointer to the lrc.
*
@@ -1530,6 +1574,18 @@ static int xe_lrc_ctx_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct
if (lrc_to_xe(lrc)->info.has_64bit_timestamp)
xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP_UDW, 0);
+ /*
+ * Note: It's possible that this LRC may belong to an exec_queue that is
+ * not part of a multi-queue group. That said, it doesn't hurt to set
+ * this field anyways since any class that supports multi-queue will
+ * have these LRC fields defined.
+ */
+ if (xe_gt_supports_multi_queue(gt, hwe->class)) {
+ lrc->queue_timestamp = 0;
+ xe_lrc_write_ctx_reg(lrc, CTX_QUEUE_TIMESTAMP, 0);
+ xe_lrc_write_ctx_reg(lrc, CTX_QUEUE_TIMESTAMP_UDW, 0);
+ }
+
if (xe->info.has_asid && vm)
xe_lrc_write_ctx_reg(lrc, CTX_ASID, vm->usm.asid);
@@ -2455,7 +2511,17 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
snapshot->replay_offset = 0;
snapshot->replay_size = lrc->replay_size;
snapshot->lrc_snapshot = NULL;
- snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc));
+ snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
+ snapshot->ctx_timestamp_ms =
+ xe_gt_clock_interval_to_ms(lrc->gt, xe_lrc_ctx_timestamp(lrc));
+ if (xe_lrc_is_multi_queue(lrc)) {
+ snapshot->queue_timestamp = xe_lrc_queue_timestamp(lrc);
+ snapshot->queue_timestamp_ms =
+ xe_gt_clock_interval_to_ms(lrc->gt, snapshot->queue_timestamp);
+ } else {
+ snapshot->queue_timestamp = 0;
+ snapshot->queue_timestamp_ms = 0;
+ }
snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
return snapshot;
}
@@ -2508,7 +2574,10 @@ void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer
drm_printf(p, "\tRing start: (memory) 0x%08x\n", snapshot->start);
drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno);
drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno);
- drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp);
+ drm_printf(p, "\tTimestamp: 0x%016llx\n", snapshot->ctx_timestamp);
+ drm_printf(p, "\tTimestamp ms: %llu\n", snapshot->ctx_timestamp_ms);
+ drm_printf(p, "\tQueue Timestamp: 0x%016llx\n", snapshot->queue_timestamp);
+ drm_printf(p, "\tQueue Timestamp ms: %llu\n", snapshot->queue_timestamp_ms);
drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp);
if (!snapshot->lrc_snapshot)
@@ -2549,17 +2618,27 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
kfree(snapshot);
}
-static int get_ctx_timestamp(struct xe_lrc *lrc, u32 engine_id, u64 *reg_ctx_ts)
+static struct xe_hw_engine *engine_id_to_hwe(struct xe_gt *gt, u32 engine_id)
{
u16 class = REG_FIELD_GET(ENGINE_CLASS_ID, engine_id);
u16 instance = REG_FIELD_GET(ENGINE_INSTANCE_ID, engine_id);
+ struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, class, instance, false);
+
+ if (xe_gt_WARN_ONCE(gt, !hwe || xe_hw_engine_is_reserved(hwe),
+ "Unexpected engine class:instance %d:%d for utilization\n",
+ class, instance))
+ return NULL;
+
+ return hwe;
+}
+
+static int get_ctx_timestamp(struct xe_lrc *lrc, u32 engine_id, u64 *reg_ctx_ts)
+{
struct xe_hw_engine *hwe;
u64 val;
- hwe = xe_gt_hw_engine(lrc->gt, class, instance, false);
- if (xe_gt_WARN_ONCE(lrc->gt, !hwe || xe_hw_engine_is_reserved(hwe),
- "Unexpected engine class:instance %d:%d for context utilization\n",
- class, instance))
+ hwe = engine_id_to_hwe(lrc->gt, engine_id);
+ if (!hwe)
return -1;
if (lrc_to_xe(lrc)->info.has_64bit_timestamp)
@@ -2574,66 +2653,136 @@ static int get_ctx_timestamp(struct xe_lrc *lrc, u32 engine_id, u64 *reg_ctx_ts)
return 0;
}
-/**
- * xe_lrc_timestamp() - Current ctx timestamp
- * @lrc: Pointer to the lrc.
- *
- * Return latest ctx timestamp. With support for active contexts, the
- * calculation may be slightly racy, so follow a read-again logic to ensure that
- * the context is still active before returning the right timestamp.
- *
- * Returns: New ctx timestamp value
- */
-u64 xe_lrc_timestamp(struct xe_lrc *lrc)
+static u64 get_queue_timestamp(struct xe_hw_engine *hwe)
{
- u64 lrc_ts, reg_ts, new_ts = lrc->ctx_timestamp;
- u32 engine_id;
+ return xe_mmio_read64_2x32(&hwe->gt->mmio,
+ RING_QUEUE_TIMESTAMP(hwe->mmio_base));
+}
- lrc_ts = xe_lrc_ctx_timestamp(lrc);
- /* CTX_TIMESTAMP mmio read is invalid on VF, so return the LRC value */
- if (IS_SRIOV_VF(lrc_to_xe(lrc))) {
- new_ts = lrc_ts;
- goto done;
- }
+static u32 get_multi_queue_active_queue_id(struct xe_hw_engine *hwe)
+{
+ u32 val = xe_mmio_read32(&hwe->gt->mmio,
+ RING_CSMQDEBUG(hwe->mmio_base));
- if (lrc_ts == CONTEXT_ACTIVE) {
- engine_id = xe_lrc_engine_id(lrc);
- if (!get_ctx_timestamp(lrc, engine_id, &reg_ts))
- new_ts = reg_ts;
+ return REG_FIELD_GET(CURRENT_ACTIVE_QUEUE_ID_MASK, val);
+}
- /* read lrc again to ensure context is still active */
- lrc_ts = xe_lrc_ctx_timestamp(lrc);
- }
+static bool context_active(struct xe_lrc *lrc)
+{
+ return xe_lrc_ctx_timestamp(lrc) == CONTEXT_ACTIVE;
+}
+
+static u64 xe_lrc_multi_queue_timestamp(struct xe_lrc *lrc)
+{
+ struct xe_device *xe = lrc_to_xe(lrc);
+ struct xe_lrc *primary_lrc = lrc->multi_queue.primary_lrc;
+ struct xe_hw_engine *hwe;
+ u64 reg_queue_ts = lrc->queue_timestamp;
+
+ if (IS_SRIOV_VF(xe))
+ return xe_lrc_queue_timestamp(lrc);
+
+ xe_assert(xe, primary_lrc);
+
+ /* WA BB populates CONTEXT_ACTIVE cookie for primary context only */
+ if (!context_active(primary_lrc))
+ return xe_lrc_queue_timestamp(lrc);
+
+ /* WA BB populates engine id in PPHWSP of primary context only */
+ hwe = engine_id_to_hwe(primary_lrc->gt, xe_lrc_engine_id(primary_lrc));
+ if (!hwe)
+ return xe_lrc_queue_timestamp(lrc);
+
+ if (get_multi_queue_active_queue_id(hwe) != lrc->multi_queue.pos)
+ return xe_lrc_queue_timestamp(lrc);
+
+ /* queue is active, so store the queue timestamp register */
+ reg_queue_ts = get_queue_timestamp(hwe);
+
+ /* double check queue and primary queue are both still active */
+ if (get_multi_queue_active_queue_id(hwe) != lrc->multi_queue.pos ||
+ !context_active(primary_lrc))
+ return xe_lrc_queue_timestamp(lrc);
+
+ return reg_queue_ts;
+}
+
+static u64 xe_lrc_update_multi_queue_timestamp(struct xe_lrc *lrc, u64 *old_ts)
+{
+ *old_ts = lrc->queue_timestamp;
+ lrc->queue_timestamp = xe_lrc_multi_queue_timestamp(lrc);
+
+ trace_xe_lrc_update_queue_timestamp(lrc, *old_ts);
+
+ return lrc->queue_timestamp;
+}
+
+static u64 xe_lrc_context_timestamp(struct xe_lrc *lrc)
+{
+ u64 reg_ts, new_ts = lrc->ctx_timestamp;
+
+ /* CTX_TIMESTAMP mmio read is invalid on VF, so return the LRC value */
+ if (IS_SRIOV_VF(lrc_to_xe(lrc)))
+ return xe_lrc_ctx_timestamp(lrc);
+
+ if (context_active(lrc) &&
+ !get_ctx_timestamp(lrc, xe_lrc_engine_id(lrc), &reg_ts))
+ new_ts = reg_ts;
/*
- * If context switched out, just use the lrc_ts. Note that this needs to
- * be a separate if condition.
+ * If context swicthed out while we were here, just return the latest
+ * LRC CTX TIMESTAMP value.
*/
- if (lrc_ts != CONTEXT_ACTIVE)
- new_ts = lrc_ts;
+ if (!context_active(lrc))
+ return xe_lrc_ctx_timestamp(lrc);
-done:
return new_ts;
}
+static u64 xe_lrc_update_context_timestamp(struct xe_lrc *lrc, u64 *old_ts)
+{
+ *old_ts = lrc->ctx_timestamp;
+ lrc->ctx_timestamp = xe_lrc_context_timestamp(lrc);
+
+ trace_xe_lrc_update_timestamp(lrc, *old_ts);
+
+ return lrc->ctx_timestamp;
+}
+
+/**
+ * xe_lrc_timestamp() - Current lrc timestamp
+ * @lrc: Pointer to the lrc.
+ *
+ * Return latest lrc timestamp. With support for active contexts/queues, the
+ * calculation may be slightly racy, so follow a read-again logic to ensure that
+ * the context/queue is still active before returning the right timestamp.
+ *
+ * Returns: New lrc timestamp value
+ */
+u64 xe_lrc_timestamp(struct xe_lrc *lrc)
+{
+ if (xe_lrc_is_multi_queue(lrc))
+ return xe_lrc_multi_queue_timestamp(lrc);
+ else
+ return xe_lrc_context_timestamp(lrc);
+}
+
/**
- * xe_lrc_update_timestamp() - Update ctx timestamp
+ * xe_lrc_update_timestamp() - Update lrc timestamp
* @lrc: Pointer to the lrc.
* @old_ts: Old timestamp value
*
- * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and
+ * Populate @old_ts with current saved lrc timestamp, read new lrc timestamp and
* update saved value.
*
- * Returns: New ctx timestamp value
+ * Returns: New lrc timestamp value
*/
u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts)
{
- *old_ts = lrc->ctx_timestamp;
- lrc->ctx_timestamp = xe_lrc_timestamp(lrc);
-
- trace_xe_lrc_update_timestamp(lrc, *old_ts);
-
- return lrc->ctx_timestamp;
+ if (xe_lrc_is_multi_queue(lrc))
+ return xe_lrc_update_multi_queue_timestamp(lrc, old_ts);
+ else
+ return xe_lrc_update_context_timestamp(lrc, old_ts);
}
/**
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
index e7c975f9e2d9..0a3a611391ee 100644
--- a/drivers/gpu/drm/xe/xe_lrc.h
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -37,7 +37,10 @@ struct xe_lrc_snapshot {
} tail;
u32 start_seqno;
u32 seqno;
- u32 ctx_timestamp;
+ u64 ctx_timestamp;
+ u64 ctx_timestamp_ms;
+ u64 queue_timestamp;
+ u64 queue_timestamp_ms;
u32 ctx_job_timestamp;
};
@@ -90,6 +93,11 @@ static inline size_t xe_lrc_ring_size(void)
return SZ_16K;
}
+static inline bool xe_lrc_is_multi_queue(struct xe_lrc *lrc)
+{
+ return lrc->multi_queue.primary_lrc;
+}
+
size_t xe_gt_lrc_hang_replay_size(struct xe_gt *gt, enum xe_engine_class class);
size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class);
u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc);
@@ -130,7 +138,7 @@ u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc);
struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc);
size_t xe_lrc_reg_size(struct xe_device *xe);
-size_t xe_lrc_skip_size(struct xe_device *xe);
+size_t xe_lrc_engine_state_size(struct xe_gt *gt, enum xe_engine_class class);
void xe_lrc_dump_default(struct drm_printer *p,
struct xe_gt *gt,
diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h
index 5a718f759ed6..53ef48feebfc 100644
--- a/drivers/gpu/drm/xe/xe_lrc_types.h
+++ b/drivers/gpu/drm/xe/xe_lrc_types.h
@@ -63,6 +63,17 @@ struct xe_lrc {
/** @ctx_timestamp: readout value of CTX_TIMESTAMP on last update */
u64 ctx_timestamp;
+
+ /** @queue_timestamp: value of QUEUE_TIMESTAMP on last update */
+ u64 queue_timestamp;
+
+ /** @multi_queue: Multi queue LRC related information */
+ struct {
+ /** @multi_queue.primary_lrc: Primary lrc of this multi-queue group*/
+ struct xe_lrc *primary_lrc;
+ /** @multi_queue.pos: Position of LRC within the multi-queue group */
+ u8 pos;
+ } multi_queue;
};
struct xe_lrc_snapshot;
diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c
index 811e07136efb..3848ff81c1f9 100644
--- a/drivers/gpu/drm/xe/xe_memirq.c
+++ b/drivers/gpu/drm/xe/xe_memirq.c
@@ -212,7 +212,11 @@ out:
static void memirq_set_enable(struct xe_memirq *memirq, bool enable)
{
- iosys_map_wr(&memirq->mask, 0, u32, enable ? GENMASK(15, 0) : 0);
+ /*
+ * We only care about the GT_MI_USER_INTERRUPT from the engines and
+ * the GuC does not look at the ENABLE mask at all.
+ */
+ iosys_map_wr(&memirq->mask, 0, u32, enable ? GT_MI_USER_INTERRUPT : 0);
memirq->enabled = enable;
}
@@ -427,13 +431,25 @@ static bool memirq_received(struct xe_memirq *memirq, struct iosys_map *vector,
return __memirq_received(memirq, vector, offset, name, true);
}
+static void memirq_assume_received(struct xe_memirq *memirq, const char *source,
+ u16 offset, const char *status)
+{
+ memirq_debug(memirq, "ASSUME %s %s(%u)\n", source, status, offset);
+}
+
static void memirq_dispatch_engine(struct xe_memirq *memirq, struct iosys_map *status,
struct xe_hw_engine *hwe)
{
memirq_debug(memirq, "STATUS %s %*ph\n", hwe->name, 16, status->vaddr);
- if (memirq_received(memirq, status, ilog2(GT_MI_USER_INTERRUPT), hwe->name))
- xe_hw_engine_handle_irq(hwe, GT_MI_USER_INTERRUPT);
+ /*
+ * The programming note says to assume that GT_MI_USER_INTERRUPT is always
+ * set. Check and clear related status byte just for a debug.
+ */
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEMIRQ) &&
+ !memirq_received(memirq, status, ilog2(GT_MI_USER_INTERRUPT), hwe->name))
+ memirq_assume_received(memirq, hwe->name, ilog2(GT_MI_USER_INTERRUPT), "USER");
+ xe_hw_engine_handle_irq(hwe, GT_MI_USER_INTERRUPT);
}
static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *status,
@@ -443,8 +459,14 @@ static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *stat
memirq_debug(memirq, "STATUS %s %*ph\n", name, 16, status->vaddr);
- if (memirq_received(memirq, status, ilog2(GUC_INTR_GUC2HOST), name))
- xe_guc_irq_handler(guc, GUC_INTR_GUC2HOST);
+ /*
+ * The programming note says to assume that GUC_INTR_GUC2HOST is always
+ * set. Check and clear related status byte just for a debug.
+ */
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEMIRQ) &&
+ !memirq_received(memirq, status, ilog2(GUC_INTR_GUC2HOST), name))
+ memirq_assume_received(memirq, name, ilog2(GUC_INTR_GUC2HOST), "GUC2HOST");
+ xe_guc_irq_handler(guc, GUC_INTR_GUC2HOST);
/*
* This is a software interrupt that must be cleared after it's consumed
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index a87fbc1e9fb1..9428dd5e7760 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -728,7 +728,22 @@ static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb,
bb->len = cs - bb->cs;
}
-#define EMIT_COPY_DW 10
+static u32 blt_fast_copy_cmd_len(struct xe_device *xe)
+{
+ return 10;
+}
+
+static u32 blt_mem_copy_cmd_len(struct xe_device *xe)
+{
+ return 10;
+}
+
+static u32 emit_copy_cmd_len(struct xe_device *xe)
+{
+ return (xe->info.has_mem_copy_instr) ? blt_mem_copy_cmd_len(xe) :
+ blt_fast_copy_cmd_len(xe);
+}
+
static void emit_xy_fast_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
u64 dst_ofs, unsigned int size,
unsigned int pitch)
@@ -736,6 +751,7 @@ static void emit_xy_fast_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
struct xe_device *xe = gt_to_xe(gt);
u32 mocs = 0;
u32 tile_y = 0;
+ u32 len;
xe_gt_assert(gt, !(pitch & 3));
xe_gt_assert(gt, size / pitch <= S16_MAX);
@@ -748,7 +764,8 @@ static void emit_xy_fast_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
if (GRAPHICS_VERx100(xe) >= 1250)
tile_y = XY_FAST_COPY_BLT_D1_SRC_TILE4 | XY_FAST_COPY_BLT_D1_DST_TILE4;
- bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (10 - 2);
+ len = blt_fast_copy_cmd_len(xe);
+ bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (len - 2);
bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch | tile_y | mocs;
bb->cs[bb->len++] = 0;
bb->cs[bb->len++] = (size / pitch) << 16 | pitch / 4;
@@ -765,6 +782,7 @@ static void emit_mem_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
u64 dst_ofs, unsigned int size, unsigned int pitch)
{
u32 mode, copy_type, width;
+ u32 len;
xe_gt_assert(gt, IS_ALIGNED(size, pitch));
xe_gt_assert(gt, pitch <= U16_MAX);
@@ -790,7 +808,9 @@ static void emit_mem_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
xe_gt_assert(gt, width <= U16_MAX);
- bb->cs[bb->len++] = MEM_COPY_CMD | mode | copy_type;
+ len = blt_mem_copy_cmd_len(gt_to_xe(gt));
+
+ bb->cs[bb->len++] = MEM_COPY_CMD | mode | copy_type | (len - 2);
bb->cs[bb->len++] = width - 1;
bb->cs[bb->len++] = size / pitch - 1; /* ignored by hw for page-copy/linear above */
bb->cs[bb->len++] = pitch - 1;
@@ -967,7 +987,7 @@ static struct dma_fence *__xe_migrate_copy(struct xe_migrate *m,
}
/* Add copy commands size here */
- batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) +
+ batch_size += ((copy_only_ccs) ? 0 : emit_copy_cmd_len(xe)) +
((needs_ccs_emit ? EMIT_COPY_CCS_DW : 0));
bb = xe_bb_new(gt, batch_size, usm);
@@ -1406,7 +1426,7 @@ struct dma_fence *xe_migrate_vram_copy_chunk(struct xe_bo *vram_bo, u64 vram_off
batch_size += pte_update_size(m, 0, sysmem, &sysmem_it, &vram_L0, &sysmem_L0_ofs,
&sysmem_L0_pt, 0, avail_pts, avail_pts);
- batch_size += EMIT_COPY_DW;
+ batch_size += emit_copy_cmd_len(xe);
bb = xe_bb_new(gt, batch_size, usm);
if (IS_ERR(bb)) {
@@ -1461,12 +1481,17 @@ struct dma_fence *xe_migrate_vram_copy_chunk(struct xe_bo *vram_bo, u64 vram_off
return fence;
}
+static u32 blt_mem_set_cmd_len(struct xe_device *xe)
+{
+ return 7;
+}
+
static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
u32 size, u32 pitch)
{
struct xe_device *xe = gt_to_xe(gt);
u32 *cs = bb->cs + bb->len;
- u32 len = PVC_MEM_SET_CMD_LEN_DW;
+ u32 len = blt_mem_set_cmd_len(xe);
*cs++ = PVC_MEM_SET_CMD | PVC_MEM_SET_MATRIX | (len - 2);
*cs++ = pitch - 1;
@@ -1484,15 +1509,21 @@ static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs
bb->len += len;
}
+static u32 blt_fast_color_cmd_len(struct xe_device *xe)
+{
+ if (GRAPHICS_VERx100(xe) >= 1250)
+ return 16;
+ else
+ return 11;
+}
+
static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb,
u64 src_ofs, u32 size, u32 pitch, bool is_vram)
{
struct xe_device *xe = gt_to_xe(gt);
u32 *cs = bb->cs + bb->len;
- u32 len = XY_FAST_COLOR_BLT_DW;
+ u32 len = blt_fast_color_cmd_len(xe);
- if (GRAPHICS_VERx100(xe) < 1250)
- len = 11;
*cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
(len - 2);
@@ -1525,32 +1556,20 @@ static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb,
bb->len += len;
}
-static bool has_service_copy_support(struct xe_gt *gt)
-{
- /*
- * What we care about is whether the architecture was designed with
- * service copy functionality (specifically the new MEM_SET / MEM_COPY
- * instructions) so check the architectural engine list rather than the
- * actual list since these instructions are usable on BCS0 even if
- * all of the actual service copy engines (BCS1-BCS8) have been fused
- * off.
- */
- return gt->info.engine_mask & GENMASK(XE_HW_ENGINE_BCS8,
- XE_HW_ENGINE_BCS1);
-}
-
static u32 emit_clear_cmd_len(struct xe_gt *gt)
{
- if (has_service_copy_support(gt))
- return PVC_MEM_SET_CMD_LEN_DW;
+ struct xe_device *xe = gt_to_xe(gt);
+
+ if (gt->info.has_xe2_blt_instructions)
+ return blt_mem_set_cmd_len(xe);
else
- return XY_FAST_COLOR_BLT_DW;
+ return blt_fast_color_cmd_len(xe);
}
static void emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
u32 size, u32 pitch, bool is_vram)
{
- if (has_service_copy_support(gt))
+ if (gt->info.has_xe2_blt_instructions)
emit_clear_link_copy(gt, bb, src_ofs, size, pitch);
else
emit_clear_main_copy(gt, bb, src_ofs, size, pitch,
@@ -2217,7 +2236,7 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m,
xe_assert(xe, npages * PAGE_SIZE <= MAX_PREEMPTDISABLE_TRANSFER);
batch_size += pte_update_cmd_size(npages << PAGE_SHIFT);
- batch_size += EMIT_COPY_DW;
+ batch_size += emit_copy_cmd_len(xe);
bb = xe_bb_new(gt, batch_size, use_usm_batch);
if (IS_ERR(bb)) {
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 5de5bf19240a..7c9071abb44f 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -1934,16 +1934,21 @@ static u64 oa_exponent_to_ns(struct xe_gt *gt, int exponent)
return div_u64(nom + den - 1, den);
}
-static bool oa_unit_supports_oa_format(struct xe_oa_open_param *param, int type)
+static bool oa_unit_supports_oa_format(struct xe_oa *oa, struct xe_oa_open_param *param)
{
+ const struct xe_oa_format *f = &oa->oa_formats[param->oa_format];
+
switch (param->oa_unit->type) {
case DRM_XE_OA_UNIT_TYPE_OAG:
- return type == DRM_XE_OA_FMT_TYPE_OAG || type == DRM_XE_OA_FMT_TYPE_OAR ||
- type == DRM_XE_OA_FMT_TYPE_OAC || type == DRM_XE_OA_FMT_TYPE_PEC;
+ return f->type == DRM_XE_OA_FMT_TYPE_OAG || f->type == DRM_XE_OA_FMT_TYPE_OAR ||
+ f->type == DRM_XE_OA_FMT_TYPE_OAC || f->type == DRM_XE_OA_FMT_TYPE_PEC;
+ case DRM_XE_OA_UNIT_TYPE_MERT:
+ if (XE_DEVICE_WA(oa->xe, 14026746987))
+ return param->oa_format == XE_OAM_FORMAT_MPEC8u32_B8_C8;
+ fallthrough;
case DRM_XE_OA_UNIT_TYPE_OAM:
case DRM_XE_OA_UNIT_TYPE_OAM_SAG:
- case DRM_XE_OA_UNIT_TYPE_MERT:
- return type == DRM_XE_OA_FMT_TYPE_OAM || type == DRM_XE_OA_FMT_TYPE_OAM_MPEC;
+ return f->type == DRM_XE_OA_FMT_TYPE_OAM || f->type == DRM_XE_OA_FMT_TYPE_OAM_MPEC;
default:
return false;
}
@@ -2083,8 +2088,7 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
goto err_exec_q;
f = &oa->oa_formats[param.oa_format];
- if (!param.oa_format || !f->size ||
- !oa_unit_supports_oa_format(&param, f->type)) {
+ if (!param.oa_format || !f->size || !oa_unit_supports_oa_format(oa, &param)) {
drm_dbg(&oa->xe->drm, "Invalid OA format %d type %d size %d for class %d\n",
param.oa_format, f->type, f->size, param.hwe->class);
ret = -EINVAL;
@@ -2245,15 +2249,19 @@ static bool xe_oa_is_valid_mux_addr(struct xe_oa *oa, u32 addr)
return xe_oa_reg_in_range_table(addr, gen12_oa_mux_regs);
}
-static bool xe_oa_is_valid_config_reg_addr(struct xe_oa *oa, u32 addr)
+static bool xe_oa_is_valid_config_reg(struct xe_oa *oa, u32 addr, u32 val)
{
+ if (XE_DEVICE_WA(oa->xe, 14026779378) &&
+ addr == SYS_MEM_LAT_MEASURE.addr && val & SYS_MEM_LAT_MEASURE_EN)
+ return false;
+
return xe_oa_is_valid_flex_addr(oa, addr) ||
xe_oa_is_valid_b_counter_addr(oa, addr) ||
xe_oa_is_valid_mux_addr(oa, addr);
}
static struct xe_oa_reg *
-xe_oa_alloc_regs(struct xe_oa *oa, bool (*is_valid)(struct xe_oa *oa, u32 addr),
+xe_oa_alloc_regs(struct xe_oa *oa, bool (*is_valid)(struct xe_oa *oa, u32 addr, u32 val),
u32 __user *regs, u32 n_regs)
{
struct xe_oa_reg *oa_regs;
@@ -2271,16 +2279,16 @@ xe_oa_alloc_regs(struct xe_oa *oa, bool (*is_valid)(struct xe_oa *oa, u32 addr),
if (err)
goto addr_err;
- if (!is_valid(oa, addr)) {
- drm_dbg(&oa->xe->drm, "Invalid oa_reg address: %X\n", addr);
- err = -EINVAL;
- goto addr_err;
- }
-
err = get_user(value, regs + 1);
if (err)
goto addr_err;
+ if (!is_valid(oa, addr, value)) {
+ drm_dbg(&oa->xe->drm, "Invalid oa_reg addr/value: %#x %#x\n", addr, value);
+ err = -EINVAL;
+ goto addr_err;
+ }
+
oa_regs[i].addr = XE_REG(addr);
oa_regs[i].value = value;
@@ -2379,7 +2387,7 @@ int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *fi
memcpy(oa_config->uuid, arg->uuid, sizeof(arg->uuid));
oa_config->regs_len = arg->n_regs;
- regs = xe_oa_alloc_regs(oa, xe_oa_is_valid_config_reg_addr,
+ regs = xe_oa_alloc_regs(oa, xe_oa_is_valid_config_reg,
u64_to_user_ptr(arg->regs_ptr),
arg->n_regs);
if (IS_ERR(regs)) {
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 41435f84aeb2..12d3be7f9f6c 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -850,6 +850,15 @@ static struct xe_gt *alloc_primary_gt(struct xe_tile *tile,
gt->info.num_compute_xecore_fuse_regs = graphics_desc->num_compute_xecore_fuse_regs;
/*
+ * Even if the service copy engines wind up being fused off, their
+ * presence in the IP descriptor indicates that the platform supports
+ * Xe2-style MEM_SET and MEM_COPY functionality.
+ */
+ if (graphics_desc->hw_engine_mask & GENMASK(XE_HW_ENGINE_BCS8,
+ XE_HW_ENGINE_BCS1))
+ gt->info.has_xe2_blt_instructions = true;
+
+ /*
* Before media version 13, the media IP was part of the primary GT
* so we need to add the media engines to the primary GT's engine list.
*/
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
index 8cc313182968..fb65940848d7 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -9,6 +9,7 @@
#include "regs/xe_gt_regs.h"
#include "regs/xe_oa_regs.h"
#include "xe_device.h"
+#include "xe_gt.h"
#include "xe_gt_types.h"
#include "xe_gt_printk.h"
#include "xe_platform_types.h"
@@ -33,6 +34,13 @@ static bool match_has_mert(const struct xe_device *xe,
return xe_device_has_mert((struct xe_device *)xe);
}
+static bool match_multi_queue_class(const struct xe_device *xe,
+ const struct xe_gt *gt,
+ const struct xe_hw_engine *hwe)
+{
+ return xe_gt_supports_multi_queue(gt, hwe->class);
+}
+
static const struct xe_rtp_entry_sr register_whitelist[] = {
{ XE_RTP_NAME("WaAllowPMDepthAndInvocationCountAccessFromUMD, 1408556865"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
@@ -54,6 +62,12 @@ static const struct xe_rtp_entry_sr register_whitelist[] = {
RING_FORCE_TO_NONPRIV_ACCESS_RD,
XE_RTP_ACTION_FLAG(ENGINE_BASE)))
},
+ { XE_RTP_NAME("allow_read_queue_timestamp"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3500, 3511), FUNC(match_multi_queue_class)),
+ XE_RTP_ACTIONS(WHITELIST(RING_QUEUE_TIMESTAMP(0),
+ RING_FORCE_TO_NONPRIV_ACCESS_RD,
+ XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+ },
{ XE_RTP_NAME("16014440446"),
XE_RTP_RULES(PLATFORM(PVC)),
XE_RTP_ACTIONS(WHITELIST(XE_REG(0x4400),
diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h
index 5f4ab08c0686..0522caafd89d 100644
--- a/drivers/gpu/drm/xe/xe_res_cursor.h
+++ b/drivers/gpu/drm/xe/xe_res_cursor.h
@@ -101,7 +101,15 @@ static inline void xe_res_first(struct ttm_resource *res,
cur->mem_type = res->mem_type;
switch (cur->mem_type) {
- case XE_PL_STOLEN:
+ case XE_PL_STOLEN: {
+ /* res->start is in pages (ttm_range_manager). */
+ cur->start = (res->start << PAGE_SHIFT) + start;
+ cur->size = size;
+ cur->remaining = size;
+ cur->node = NULL;
+ cur->mm = NULL;
+ break;
+ }
case XE_PL_VRAM0:
case XE_PL_VRAM1: {
struct gpu_buddy_block *block;
@@ -289,6 +297,10 @@ static inline void xe_res_next(struct xe_res_cursor *cur, u64 size)
switch (cur->mem_type) {
case XE_PL_STOLEN:
+ /* Just advance within the contiguous region. */
+ cur->start += size;
+ cur->size = cur->remaining;
+ break;
case XE_PL_VRAM0:
case XE_PL_VRAM1:
start = size - cur->size;
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index cfeb4fc7d217..39a670e91ba7 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -269,8 +269,12 @@ static u32 get_ppgtt_flag(struct xe_sched_job *job)
static int emit_copy_timestamp(struct xe_device *xe, struct xe_lrc *lrc,
u32 *dw, int i)
{
+ const struct xe_reg reg = xe_lrc_is_multi_queue(lrc) ?
+ RING_QUEUE_TIMESTAMP(0) :
+ RING_CTX_TIMESTAMP(0);
+
dw[i++] = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET;
- dw[i++] = RING_CTX_TIMESTAMP(0).addr;
+ dw[i++] = reg.addr;
dw[i++] = xe_lrc_ctx_job_timestamp_ggtt_addr(lrc);
dw[i++] = 0;
@@ -281,7 +285,7 @@ static int emit_copy_timestamp(struct xe_device *xe, struct xe_lrc *lrc,
if (IS_SRIOV_VF(xe)) {
dw[i++] = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT |
MI_SRM_ADD_CS_OFFSET;
- dw[i++] = RING_CTX_TIMESTAMP(0).addr;
+ dw[i++] = reg.addr;
dw[i++] = xe_lrc_ctx_timestamp_ggtt_addr(lrc);
dw[i++] = 0;
}
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_sriov_pf_migration.c
index 6c4b16409cc9..150a241110fb 100644
--- a/drivers/gpu/drm/xe/xe_sriov_pf_migration.c
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_migration.c
@@ -149,10 +149,11 @@ pf_migration_consume(struct xe_device *xe, unsigned int vfid)
for_each_gt(gt, xe, gt_id) {
data = xe_gt_sriov_pf_migration_save_consume(gt, vfid);
- if (data && PTR_ERR(data) != EAGAIN)
+ if (!data)
+ continue;
+ if (!IS_ERR(data) || PTR_ERR(data) != -EAGAIN)
return data;
- if (PTR_ERR(data) == -EAGAIN)
- more_data = true;
+ more_data = true;
}
if (!more_data)
diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c
index db64cac39c94..427afd144f3a 100644
--- a/drivers/gpu/drm/xe/xe_survivability_mode.c
+++ b/drivers/gpu/drm/xe/xe_survivability_mode.c
@@ -396,25 +396,21 @@ bool xe_survivability_mode_is_requested(struct xe_device *xe)
* Runtime survivability mode is enabled when certain errors cause the device to be
* in non-recoverable state. The device is declared wedged with the appropriate
* recovery method and survivability mode sysfs exposed to userspace
- *
- * Return: 0 if runtime survivability mode is enabled, negative error code otherwise.
*/
-int xe_survivability_mode_runtime_enable(struct xe_device *xe)
+void xe_survivability_mode_runtime_enable(struct xe_device *xe)
{
struct xe_survivability *survivability = &xe->survivability;
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
- int ret;
if (!IS_DGFX(xe) || IS_SRIOV_VF(xe) || xe->info.platform < XE_BATTLEMAGE) {
dev_err(&pdev->dev, "Runtime Survivability Mode not supported\n");
- return -EINVAL;
+ return;
}
populate_survivability_info(xe);
- ret = create_survivability_sysfs(pdev);
- if (ret)
- dev_err(&pdev->dev, "Failed to create survivability mode sysfs\n");
+ if (create_survivability_sysfs(pdev))
+ dev_err(&pdev->dev, "Failed to create survivability sysfs\n");
survivability->type = XE_SURVIVABILITY_TYPE_RUNTIME;
dev_err(&pdev->dev, "Runtime Survivability mode enabled\n");
@@ -422,8 +418,6 @@ int xe_survivability_mode_runtime_enable(struct xe_device *xe)
xe_device_set_wedged_method(xe, DRM_WEDGE_RECOVERY_VENDOR);
xe_device_declare_wedged(xe);
dev_err(&pdev->dev, "Firmware flash required, Please refer to the userspace documentation for more details!\n");
-
- return 0;
}
/**
diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.h b/drivers/gpu/drm/xe/xe_survivability_mode.h
index 1cc94226aa82..cd040e4d18bb 100644
--- a/drivers/gpu/drm/xe/xe_survivability_mode.h
+++ b/drivers/gpu/drm/xe/xe_survivability_mode.h
@@ -11,7 +11,7 @@
struct xe_device;
int xe_survivability_mode_boot_enable(struct xe_device *xe);
-int xe_survivability_mode_runtime_enable(struct xe_device *xe);
+void xe_survivability_mode_runtime_enable(struct xe_device *xe);
bool xe_survivability_mode_is_boot_enabled(struct xe_device *xe);
bool xe_survivability_mode_is_requested(struct xe_device *xe);
diff --git a/drivers/gpu/drm/xe/xe_tile_types.h b/drivers/gpu/drm/xe/xe_tile_types.h
index 33932fd547d7..0048100ccb72 100644
--- a/drivers/gpu/drm/xe/xe_tile_types.h
+++ b/drivers/gpu/drm/xe/xe_tile_types.h
@@ -106,8 +106,6 @@ struct xe_tile {
struct xe_lmtt lmtt;
} pf;
struct {
- /** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */
- struct xe_ggtt_node *ggtt_balloon[2];
/** @sriov.vf.self_config: VF configuration data */
struct xe_tile_sriov_vf_selfconfig self_config;
} vf;
diff --git a/drivers/gpu/drm/xe/xe_trace_lrc.h b/drivers/gpu/drm/xe/xe_trace_lrc.h
index d525cbee1e34..5c4cfa0c1fe9 100644
--- a/drivers/gpu/drm/xe/xe_trace_lrc.h
+++ b/drivers/gpu/drm/xe/xe_trace_lrc.h
@@ -12,6 +12,7 @@
#include <linux/tracepoint.h>
#include <linux/types.h>
+#include "xe_exec_queue_types.h"
#include "xe_gt_types.h"
#include "xe_lrc.h"
#include "xe_lrc_types.h"
@@ -42,6 +43,32 @@ TRACE_EVENT(xe_lrc_update_timestamp,
__get_str(device_id))
);
+TRACE_EVENT(xe_lrc_update_queue_timestamp,
+ TP_PROTO(struct xe_lrc *lrc, uint64_t old),
+ TP_ARGS(lrc, old),
+ TP_STRUCT__entry(
+ __field(struct xe_lrc *, lrc)
+ __field(struct xe_lrc *, primary_lrc)
+ __field(u64, old)
+ __field(u64, new)
+ __string(name, lrc->fence_ctx.name)
+ __string(device_id, __dev_name_lrc(lrc))
+ ),
+
+ TP_fast_assign(
+ __entry->lrc = lrc;
+ __entry->primary_lrc = lrc->multi_queue.primary_lrc;
+ __entry->old = old;
+ __entry->new = lrc->queue_timestamp;
+ __assign_str(name);
+ __assign_str(device_id);
+ ),
+ TP_printk("lrc=%p primary_lrc=%p lrc->name=%s old=%llu new=%llu device_id:%s",
+ __entry->lrc, __entry->primary_lrc, __get_str(name),
+ __entry->old, __entry->new,
+ __get_str(device_id))
+);
+
#endif
/* This part must be outside protection */
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index 27c9d72222cf..5e9070739e65 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -19,30 +19,11 @@
#include "xe_device.h"
#include "xe_gt_printk.h"
#include "xe_mmio.h"
-#include "xe_res_cursor.h"
#include "xe_sriov.h"
#include "xe_ttm_stolen_mgr.h"
-#include "xe_ttm_vram_mgr.h"
#include "xe_vram.h"
#include "xe_wa.h"
-struct xe_ttm_stolen_mgr {
- struct xe_ttm_vram_mgr base;
-
- /* PCI base offset */
- resource_size_t io_base;
- /* GPU base offset */
- resource_size_t stolen_base;
-
- void __iomem *mapping;
-};
-
-static inline struct xe_ttm_stolen_mgr *
-to_stolen_mgr(struct ttm_resource_manager *man)
-{
- return container_of(man, struct xe_ttm_stolen_mgr, base.manager);
-}
-
/**
* xe_ttm_stolen_cpu_access_needs_ggtt() - If we can't directly CPU access
* stolen, can we then fallback to mapping through the GGTT.
@@ -210,12 +191,19 @@ static u64 detect_stolen(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
#endif
}
+static void xe_ttm_stolen_mgr_fini(struct drm_device *dev, void *arg)
+{
+ struct xe_device *xe = to_xe_device(dev);
+
+ ttm_range_man_fini_nocheck(&xe->ttm, XE_PL_STOLEN);
+}
+
int xe_ttm_stolen_mgr_init(struct xe_device *xe)
{
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
struct xe_ttm_stolen_mgr *mgr;
u64 stolen_size, io_size;
- int err;
+ int ret;
mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL);
if (!mgr)
@@ -244,12 +232,12 @@ int xe_ttm_stolen_mgr_init(struct xe_device *xe)
if (mgr->io_base && !xe_ttm_stolen_cpu_access_needs_ggtt(xe))
io_size = stolen_size;
- err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size,
- io_size, PAGE_SIZE);
- if (err) {
- drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err);
- return err;
- }
+ ret = ttm_range_man_init_nocheck(&xe->ttm, XE_PL_STOLEN, false,
+ stolen_size >> PAGE_SHIFT);
+ if (ret)
+ return ret;
+
+ xe->mem.stolen_mgr = mgr;
drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n",
stolen_size);
@@ -257,36 +245,32 @@ int xe_ttm_stolen_mgr_init(struct xe_device *xe)
if (io_size)
mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, io_size);
- return 0;
+ return drmm_add_action_or_reset(&xe->drm, xe_ttm_stolen_mgr_fini, mgr);
}
u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset)
{
struct xe_device *xe = xe_bo_device(bo);
- struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
- struct xe_ttm_stolen_mgr *mgr = to_stolen_mgr(ttm_mgr);
- struct xe_res_cursor cur;
+ struct xe_ttm_stolen_mgr *mgr = xe->mem.stolen_mgr;
XE_WARN_ON(!mgr->io_base);
if (xe_ttm_stolen_cpu_access_needs_ggtt(xe))
return mgr->io_base + xe_bo_ggtt_addr(bo) + offset;
- xe_res_first(bo->ttm.resource, offset, 4096, &cur);
- return mgr->io_base + cur.start;
+ /* Range allocator: res->start is in pages. */
+ return mgr->io_base + (bo->ttm.resource->start << PAGE_SHIFT) + offset;
}
static int __xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device *xe,
struct xe_ttm_stolen_mgr *mgr,
struct ttm_resource *mem)
{
- struct xe_res_cursor cur;
-
if (!mgr->io_base)
return -EIO;
- xe_res_first(mem, 0, 4096, &cur);
- mem->bus.offset = cur.start;
+ /* Range allocator always produces contiguous allocations. */
+ mem->bus.offset = mem->start << PAGE_SHIFT;
drm_WARN_ON(&xe->drm, !(mem->placement & TTM_PL_FLAG_CONTIGUOUS));
@@ -329,8 +313,7 @@ static int __xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device *xe,
int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem)
{
- struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
- struct xe_ttm_stolen_mgr *mgr = ttm_mgr ? to_stolen_mgr(ttm_mgr) : NULL;
+ struct xe_ttm_stolen_mgr *mgr = xe->mem.stolen_mgr;
if (!mgr || !mgr->io_base)
return -EIO;
@@ -343,8 +326,5 @@ int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem)
u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe)
{
- struct xe_ttm_stolen_mgr *mgr =
- to_stolen_mgr(ttm_manager_type(&xe->ttm, XE_PL_STOLEN));
-
- return mgr->stolen_base;
+ return xe->mem.stolen_mgr->stolen_base;
}
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h
index 8e877d1e839b..0675106d535b 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h
@@ -12,6 +12,18 @@ struct ttm_resource;
struct xe_bo;
struct xe_device;
+/**
+ * struct xe_ttm_stolen_mgr - Xe TTM stolen memory manager
+ */
+struct xe_ttm_stolen_mgr {
+ /** @io_base: PCI base offset for CPU I/O access */
+ resource_size_t io_base;
+ /** @stolen_base: GPU base offset */
+ resource_size_t stolen_base;
+ /** @mapping: I/O memory mapping for CPU access */
+ void __iomem *mapping;
+};
+
int xe_ttm_stolen_mgr_init(struct xe_device *xe);
int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem);
bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe);
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
index 9f67df646955..b518f7dec680 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -299,14 +299,13 @@ int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr,
u64 default_page_size)
{
struct ttm_resource_manager *man = &mgr->manager;
+ const char *name;
int err;
- if (mem_type != XE_PL_STOLEN) {
- const char *name = mem_type == XE_PL_VRAM0 ? "vram0" : "vram1";
- man->cg = drmm_cgroup_register_region(&xe->drm, name, size);
- if (IS_ERR(man->cg))
- return PTR_ERR(man->cg);
- }
+ name = mem_type == XE_PL_VRAM0 ? "vram0" : "vram1";
+ man->cg = drmm_cgroup_register_region(&xe->drm, name, size);
+ if (IS_ERR(man->cg))
+ return PTR_ERR(man->cg);
man->func = &xe_ttm_vram_mgr_func;
mgr->mem_type = mem_type;
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 43a578d9c067..b01f31ed4417 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1120,6 +1120,25 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
xe_bo_assert_held(bo);
+ /*
+ * Reject only WILLNEED mappings on DONTNEED/PURGED BOs. This
+ * gates new vm_bind ioctls (user supplies WILLNEED) while
+ * still allowing partial-unbind / remap splits whose new VMAs
+ * inherit the parent's DONTNEED attr. It must also run before
+ * xe_bo_willneed_get_locked() below so a 0->1 holder bump
+ * cannot silently promote DONTNEED back to WILLNEED.
+ */
+ if (vma->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED) {
+ if (xe_bo_madv_is_dontneed(bo)) {
+ xe_vma_free(vma);
+ return ERR_PTR(-EBUSY);
+ }
+ if (xe_bo_is_purged(bo)) {
+ xe_vma_free(vma);
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
vm_bo = drm_gpuvm_bo_obtain_locked(vma->gpuva.vm, &bo->ttm.base);
if (IS_ERR(vm_bo)) {
xe_vma_free(vma);
@@ -1131,6 +1150,10 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
vma->gpuva.gem.offset = bo_offset_or_userptr;
drm_gpuva_link(&vma->gpuva, vm_bo);
drm_gpuvm_bo_put(vm_bo);
+
+ xe_bo_vma_count_inc_locked(bo);
+ if (vma->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED)
+ xe_bo_willneed_get_locked(bo);
} else /* userptr or null */ {
if (!is_null && !is_cpu_addr_mirror) {
struct xe_userptr_vma *uvma = to_userptr_vma(vma);
@@ -1208,7 +1231,10 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
xe_bo_assert_held(bo);
drm_gpuva_unlink(&vma->gpuva);
- xe_bo_recompute_purgeable_state(bo);
+
+ xe_bo_vma_count_dec_locked(bo);
+ if (vma->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED)
+ xe_bo_willneed_put_locked(bo);
}
xe_vm_assert_held(vm);
@@ -3016,7 +3042,7 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
* @res_evict: Allow evicting resources during validation
* @validate: Perform BO validation
* @request_decompress: Request BO decompression
- * @check_purged: Reject operation if BO is purged
+ * @check_purged: Reject operation if BO is DONTNEED or PURGED
*/
struct xe_vma_lock_and_validate_flags {
u32 res_evict : 1;
@@ -3030,6 +3056,7 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
{
struct xe_bo *bo = xe_vma_bo(vma);
struct xe_vm *vm = xe_vma_vm(vma);
+ bool validate_bo = flags.validate;
int err = 0;
if (bo) {
@@ -3044,7 +3071,11 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
err = -EINVAL; /* BO already purged */
}
- if (!err && flags.validate)
+ /* Don't validate the BO for DONTNEED/PURGED remap remnants. */
+ if (vma->attr.purgeable_state != XE_MADV_PURGEABLE_WILLNEED)
+ validate_bo = false;
+
+ if (!err && validate_bo)
err = xe_bo_validate(bo, vm,
xe_vm_allow_vm_eviction(vm) &&
flags.res_evict, exec);
@@ -3152,7 +3183,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
op->map.immediate,
.request_decompress =
op->map.request_decompress,
- .check_purged = true,
+ .check_purged = false,
});
break;
case DRM_GPUVA_OP_REMAP:
@@ -3174,7 +3205,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
.res_evict = res_evict,
.validate = true,
.request_decompress = false,
- .check_purged = true,
+ .check_purged = false,
});
if (!err && op->remap.next)
err = vma_lock_and_validate(exec, op->remap.next,
@@ -3182,7 +3213,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
.res_evict = res_evict,
.validate = true,
.request_decompress = false,
- .check_purged = true,
+ .check_purged = false,
});
break;
case DRM_GPUVA_OP_UNMAP:
@@ -3211,9 +3242,11 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
}
/*
- * Prefetch attempts to migrate BO's backing store without
- * repopulating it first. Purged BOs have no backing store
- * to migrate, so reject the operation.
+ * PREFETCH is the only op that still gates on BO purge state.
+ * MAP/REMAP handle this inside xe_vma_create() so partial
+ * unbind on a DONTNEED BO still works. PREFETCH skips
+ * xe_vma_create() and would migrate a BO with no backing
+ * store, so reject DONTNEED/PURGED here.
*/
err = vma_lock_and_validate(exec,
gpuva_to_vma(op->base.prefetch.va),
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
index c78906dea82b..c4fb29004195 100644
--- a/drivers/gpu/drm/xe/xe_vm_madvise.c
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -186,147 +186,6 @@ static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm,
}
/**
- * xe_bo_is_dmabuf_shared() - Check if BO is shared via dma-buf
- * @bo: Buffer object
- *
- * Prevent marking imported or exported dma-bufs as purgeable.
- * For imported BOs, Xe doesn't own the backing store and cannot
- * safely reclaim pages (exporter or other devices may still be
- * using them). For exported BOs, external devices may have active
- * mappings we cannot track.
- *
- * Return: true if BO is imported or exported, false otherwise
- */
-static bool xe_bo_is_dmabuf_shared(struct xe_bo *bo)
-{
- struct drm_gem_object *obj = &bo->ttm.base;
-
- /* Imported: exporter owns backing store */
- if (drm_gem_is_imported(obj))
- return true;
-
- /* Exported: external devices may be accessing */
- if (obj->dma_buf)
- return true;
-
- return false;
-}
-
-/**
- * enum xe_bo_vmas_purge_state - VMA purgeable state aggregation
- *
- * Distinguishes whether a BO's VMAs are all DONTNEED, have at least
- * one WILLNEED, or have no VMAs at all.
- *
- * Enum values align with XE_MADV_PURGEABLE_* states for consistency.
- */
-enum xe_bo_vmas_purge_state {
- /** @XE_BO_VMAS_STATE_WILLNEED: At least one VMA is WILLNEED */
- XE_BO_VMAS_STATE_WILLNEED = 0,
- /** @XE_BO_VMAS_STATE_DONTNEED: All VMAs are DONTNEED */
- XE_BO_VMAS_STATE_DONTNEED = 1,
- /** @XE_BO_VMAS_STATE_NO_VMAS: BO has no VMAs */
- XE_BO_VMAS_STATE_NO_VMAS = 2,
-};
-
-/*
- * xe_bo_recompute_purgeable_state() casts between xe_bo_vmas_purge_state and
- * xe_madv_purgeable_state. Enforce that WILLNEED=0 and DONTNEED=1 match across
- * both enums so the single-line cast is always valid.
- */
-static_assert(XE_BO_VMAS_STATE_WILLNEED == (int)XE_MADV_PURGEABLE_WILLNEED,
- "VMA purge state WILLNEED must equal madv purgeable WILLNEED");
-static_assert(XE_BO_VMAS_STATE_DONTNEED == (int)XE_MADV_PURGEABLE_DONTNEED,
- "VMA purge state DONTNEED must equal madv purgeable DONTNEED");
-
-/**
- * xe_bo_all_vmas_dontneed() - Determine BO VMA purgeable state
- * @bo: Buffer object
- *
- * Check all VMAs across all VMs to determine aggregate purgeable state.
- * Shared BOs require unanimous DONTNEED state from all mappings.
- *
- * Caller must hold BO dma-resv lock.
- *
- * Return: XE_BO_VMAS_STATE_DONTNEED if all VMAs are DONTNEED,
- * XE_BO_VMAS_STATE_WILLNEED if at least one VMA is not DONTNEED,
- * XE_BO_VMAS_STATE_NO_VMAS if BO has no VMAs
- */
-static enum xe_bo_vmas_purge_state xe_bo_all_vmas_dontneed(struct xe_bo *bo)
-{
- struct drm_gpuvm_bo *vm_bo;
- struct drm_gpuva *gpuva;
- struct drm_gem_object *obj = &bo->ttm.base;
- bool has_vmas = false;
-
- xe_bo_assert_held(bo);
-
- /* Shared dma-bufs cannot be purgeable */
- if (xe_bo_is_dmabuf_shared(bo))
- return XE_BO_VMAS_STATE_WILLNEED;
-
- drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
- drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
- struct xe_vma *vma = gpuva_to_vma(gpuva);
-
- has_vmas = true;
-
- /* Any non-DONTNEED VMA prevents purging */
- if (vma->attr.purgeable_state != XE_MADV_PURGEABLE_DONTNEED)
- return XE_BO_VMAS_STATE_WILLNEED;
- }
- }
-
- /*
- * No VMAs => preserve existing BO purgeable state.
- * Avoids incorrectly flipping DONTNEED -> WILLNEED when last VMA unmapped.
- */
- if (!has_vmas)
- return XE_BO_VMAS_STATE_NO_VMAS;
-
- return XE_BO_VMAS_STATE_DONTNEED;
-}
-
-/**
- * xe_bo_recompute_purgeable_state() - Recompute BO purgeable state from VMAs
- * @bo: Buffer object
- *
- * Walk all VMAs to determine if BO should be purgeable or not.
- * Shared BOs require unanimous DONTNEED state from all mappings.
- * If the BO has no VMAs the existing state is preserved.
- *
- * Locking: Caller must hold BO dma-resv lock. When iterating GPUVM lists,
- * VM lock must also be held (write) to prevent concurrent VMA modifications.
- * This is satisfied at both call sites:
- * - xe_vma_destroy(): holds vm->lock write
- * - madvise_purgeable(): holds vm->lock write (from madvise ioctl path)
- *
- * Return: nothing
- */
-void xe_bo_recompute_purgeable_state(struct xe_bo *bo)
-{
- enum xe_bo_vmas_purge_state vma_state;
-
- if (!bo)
- return;
-
- xe_bo_assert_held(bo);
-
- /*
- * Once purged, always purged. Cannot transition back to WILLNEED.
- * This matches i915 semantics where purged BOs are permanently invalid.
- */
- if (bo->madv_purgeable == XE_MADV_PURGEABLE_PURGED)
- return;
-
- vma_state = xe_bo_all_vmas_dontneed(bo);
-
- if (vma_state != (enum xe_bo_vmas_purge_state)bo->madv_purgeable &&
- vma_state != XE_BO_VMAS_STATE_NO_VMAS)
- xe_bo_set_purgeable_state(bo, (enum xe_madv_purgeable_state)vma_state);
-}
-
-/**
* madvise_purgeable - Handle purgeable buffer object advice
* @xe: XE device
* @vm: VM
@@ -359,12 +218,6 @@ static void madvise_purgeable(struct xe_device *xe, struct xe_vm *vm,
/* BO must be locked before modifying madv state */
xe_bo_assert_held(bo);
- /* Skip shared dma-bufs - no PTEs to zap */
- if (xe_bo_is_dmabuf_shared(bo)) {
- vmas[i]->skip_invalidation = true;
- continue;
- }
-
/*
* Once purged, always purged. Cannot transition back to WILLNEED.
* This matches i915 semantics where purged BOs are permanently invalid.
@@ -377,13 +230,14 @@ static void madvise_purgeable(struct xe_device *xe, struct xe_vm *vm,
switch (op->purge_state_val.val) {
case DRM_XE_VMA_PURGEABLE_STATE_WILLNEED:
- vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_WILLNEED;
vmas[i]->skip_invalidation = true;
-
- xe_bo_recompute_purgeable_state(bo);
+ /* Only act on a real DONTNEED -> WILLNEED transition. */
+ if (vmas[i]->attr.purgeable_state == XE_MADV_PURGEABLE_DONTNEED) {
+ vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_WILLNEED;
+ xe_bo_willneed_get_locked(bo);
+ }
break;
case DRM_XE_VMA_PURGEABLE_STATE_DONTNEED:
- vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_DONTNEED;
/*
* Don't zap PTEs at DONTNEED time -- pages are still
* alive. The zap happens in xe_bo_move_notify() right
@@ -391,7 +245,11 @@ static void madvise_purgeable(struct xe_device *xe, struct xe_vm *vm,
*/
vmas[i]->skip_invalidation = true;
- xe_bo_recompute_purgeable_state(bo);
+ /* Only act on a real WILLNEED -> DONTNEED transition. */
+ if (vmas[i]->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED) {
+ vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_DONTNEED;
+ xe_bo_willneed_put_locked(bo);
+ }
break;
default:
/* Should never hit - values validated in madvise_args_are_sane() */
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.h b/drivers/gpu/drm/xe/xe_vm_madvise.h
index 39acd2689ca0..a3078f634c7e 100644
--- a/drivers/gpu/drm/xe/xe_vm_madvise.h
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.h
@@ -13,6 +13,4 @@ struct xe_bo;
int xe_vm_madvise_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
-void xe_bo_recompute_purgeable_state(struct xe_bo *bo);
-
#endif
diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h
index 33b91cb2e684..e32ef763427c 100644
--- a/include/drm/intel/pciids.h
+++ b/include/drm/intel/pciids.h
@@ -898,7 +898,11 @@
/* CRI */
#define INTEL_CRI_IDS(MACRO__, ...) \
- MACRO__(0x674C, ## __VA_ARGS__)
+ MACRO__(0x674C, ## __VA_ARGS__), \
+ MACRO__(0x674D, ## __VA_ARGS__), \
+ MACRO__(0x674E, ## __VA_ARGS__), \
+ MACRO__(0x674F, ## __VA_ARGS__), \
+ MACRO__(0x6750, ## __VA_ARGS__)
/* NVL-P */
#define INTEL_NVLP_IDS(MACRO__, ...) \