diff options
| author | Dave Airlie <airlied@redhat.com> | 2026-05-15 06:57:51 +0300 |
|---|---|---|
| committer | Dave Airlie <airlied@redhat.com> | 2026-05-15 06:58:13 +0300 |
| commit | bfdb8fa114007403c5e4da594eda7f4feda65317 (patch) | |
| tree | 7aecfbee8c9bcd75ad415ad892d68f5bc6be2e0f | |
| parent | 15342d4aebcb4c02c37a62c9222ac5056eab6faf (diff) | |
| parent | 2ddedd4b7b7c329dd65358025cba8652675bec3d (diff) | |
| download | linux-bfdb8fa114007403c5e4da594eda7f4feda65317.tar.xz | |
Merge tag 'drm-xe-next-2026-05-14' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next
Driver Changes:
- drm/xe/cri: Add new PCI IDs (Balasubramani Vivekanandan)
- drm/xe/memirq: Enable GT_MI_USER_INTERRUPT only (Michal Wajdeczko)
- drm/xe/memirq: Update interrupt handler logic (Michal Wajdeczko)
- drm/xe: Drop unused ggtt_balloon field (Michal Wajdeczko)
- drm/xe: Refactor emit_xy_fast_copy and emit_mem_copy functions (Balasubramani Vivekanandan)
- drm/xe: Refactor emit_clear_link_copy (Balasubramani Vivekanandan)
- drm/xe: Refactor emit_clear_main_copy (Balasubramani Vivekanandan)
- drm/xe/devcoredump: Drop a FIXME in devcoredump (Shekhar Chauhan)
- drm/xe/oa: MERTOA Wa_14026779378 (Ashutosh Dixit)
- drm/xe/oa: Add val arg to xe_oa_is_valid_config_reg (Ashutosh Dixit)
- drm/xe/oa: MERTOA Wa_14026746987 (Ashutosh Dixit)
- drm/xe/oa: Refactor oa_unit_supports_oa_format (Ashutosh Dixit)
- drm/xe/dma-buf: fix UAF with retry loop (Matthew Auld)
- drm/xe/dma-buf: handle empty bo and UAF races (Matthew Auld)
- drm/xe/multi_queue: Whitelist QUEUE_TIMESTAMP register (Umesh Nerlige Ramappa)
- drm/xe/multi_queue: Use QUEUE_TIMESTAMP as job timestamp for multi-queue (Umesh Nerlige Ramappa)
- drm/xe/multi_queue: Add trace event for the multi queue timestamp (Umesh Nerlige Ramappa)
- drm/xe/multi_queue: Capture queue run times for active queues (Umesh Nerlige Ramappa)
- drm/xe/lrc: Refactor out engine id to hwe conversion (Umesh Nerlige Ramappa)
- drm/xe/multi_queue: Add helpers to access CS QUEUE TIMESTAMP from lrc (Umesh Nerlige Ramappa)
- drm/xe/multi_queue: Store primary LRC and position info in LRC (Umesh Nerlige Ramappa)
- drm/xe/multi_queue: Refactor check for multi queue support for engine class (Umesh Nerlige Ramappa)
- drm/xe/lrc: Refactor xe_lrc_timestamp to simplify logic (Umesh Nerlige Ramappa)
- drm/xe: Add timestamp_ms to LRC snapshot (Matthew Brost)
- drm/xe/lrc: Use 64 bit ctx timestamp in the LRC snapshot (Umesh Nerlige Ramappa)
- drm/xe/eustall: Return ENODEV from read if EU stall registers get reset (Harish Chegondi)
- drm/xe/multi_queue: Refactor CGP_SYNC send path (Niranjana Vishwanathapura)
- drm/xe/multi_queue: Remove redundant assignment in guc_exec_queue_run_job (Niranjana Vishwanathapura)
- drm/xe: Make decision to use Xe2-style blitter instructions a feature flag (Matt Roper)
- drm/xe: Convert stolen memory over to ttm_range_manager (Sanjay Yadav)
- drm/xe/madvise: Track purgeability with BO-local counters (Arvind Yadav)
- drm/xe/xe_survivability: Simplify runtime survivability error handling (Mallesh Koujalagi)
- drm/xe/guc: Exclude indirect ring state page from ADS engine state size (Satyanarayana K V P)
- drm/xe/hw_error: Cleanup array map (Raag Jadav)
- drm/xe/pf: Fix MMIO access using PF view instead of VF view during migration (Shuicheng Lin)
- drm/xe/pf: Fix EAGAIN sign in pf_migration_consume() (Shuicheng Lin)
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Thomas Hellstrom <thomas.hellstrom@linux.intel.com>
Link: https://patch.msgid.link/agXMKRRl1oxB204x@fedora
43 files changed, 776 insertions, 483 deletions
diff --git a/drivers/gpu/drm/xe/display/xe_display_bo.c b/drivers/gpu/drm/xe/display/xe_display_bo.c index dc0d78ff2d79..7fbac223b097 100644 --- a/drivers/gpu/drm/xe/display/xe_display_bo.c +++ b/drivers/gpu/drm/xe/display/xe_display_bo.c @@ -138,7 +138,7 @@ bool xe_display_bo_fbdev_prefer_stolen(struct xe_device *xe, unsigned int size) * important and we should probably use that space with FBC or other * features. */ - return stolen->size >= size * 2; + return stolen->size >= (size * 2) >> PAGE_SHIFT; } static struct drm_gem_object *xe_display_bo_fbdev_create(struct drm_device *drm, int size) diff --git a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h index 885fcf211e6d..18d0fde8c98f 100644 --- a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h @@ -20,7 +20,6 @@ #define XY_FAST_COLOR_BLT_CMD (2 << 29 | 0x44 << 22) #define XY_FAST_COLOR_BLT_DEPTH_32 (2 << 19) -#define XY_FAST_COLOR_BLT_DW 16 #define XY_FAST_COLOR_BLT_MOCS_MASK GENMASK(27, 22) #define XE2_XY_FAST_COLOR_BLT_MOCS_INDEX_MASK GENMASK(27, 24) #define XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31 @@ -31,14 +30,13 @@ #define XY_FAST_COPY_BLT_D1_DST_TILE4 REG_BIT(30) #define XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK GENMASK(23, 20) -#define MEM_COPY_CMD (2 << 29 | 0x5a << 22 | 0x8) +#define MEM_COPY_CMD (2 << 29 | 0x5a << 22) #define MEM_COPY_PAGE_COPY_MODE REG_BIT(19) #define MEM_COPY_MATRIX_COPY REG_BIT(17) #define MEM_COPY_SRC_MOCS_INDEX_MASK GENMASK(31, 28) #define MEM_COPY_DST_MOCS_INDEX_MASK GENMASK(6, 3) #define PVC_MEM_SET_CMD (2 << 29 | 0x5b << 22) -#define PVC_MEM_SET_CMD_LEN_DW 7 #define PVC_MEM_SET_MATRIX REG_BIT(17) #define PVC_MEM_SET_DATA_FIELD GENMASK(31, 24) /* Bspec lists field as [6:0], but index alone is from [6:1] */ diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index c4c879a9e555..94033982e694 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -170,6 +170,10 @@ #define GFX_DISABLE_LEGACY_MODE REG_BIT(3) #define RING_CSMQDEBUG(base) XE_REG((base) + 0x2b0) +#define CURRENT_ACTIVE_QUEUE_ID_MASK REG_GENMASK(7, 0) + +#define RING_QUEUE_TIMESTAMP(base) XE_REG((base) + 0x4c0) +#define RING_QUEUE_TIMESTAMP_UDW(base) XE_REG((base) + 0x4c0 + 4) #define RING_TIMESTAMP(base) XE_REG((base) + 0x358) diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h index b5eff383902c..4ab86fc369fd 100644 --- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h +++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h @@ -34,6 +34,9 @@ #define CTX_CS_INT_VEC_REG 0x5a #define CTX_CS_INT_VEC_DATA (CTX_CS_INT_VEC_REG + 1) +#define CTX_QUEUE_TIMESTAMP (0xd0 + 1) +#define CTX_QUEUE_TIMESTAMP_UDW (0xd2 + 1) + #define INDIRECT_CTX_RING_HEAD (0x02 + 1) #define INDIRECT_CTX_RING_TAIL (0x04 + 1) #define INDIRECT_CTX_RING_START (0x06 + 1) diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h index 04a729e610aa..aa66af7e99fe 100644 --- a/drivers/gpu/drm/xe/regs/xe_oa_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h @@ -6,6 +6,9 @@ #ifndef __XE_OA_REGS__ #define __XE_OA_REGS__ +#define SYS_MEM_LAT_MEASURE XE_REG(0x145194) +#define SYS_MEM_LAT_MEASURE_EN REG_BIT(31) + #define RPM_CONFIG1 XE_REG(0xd04) #define GT_NOA_ENABLE REG_BIT(9) diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 50a97705e0ac..3c1be809be82 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -421,7 +421,7 @@ static struct dma_fence *blt_copy(struct xe_tile *tile, avail_pts, avail_pts); /* Add copy commands size here */ - batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) + + batch_size += ((copy_only_ccs) ? 0 : emit_copy_cmd_len(xe)) + ((xe_device_has_flat_ccs(xe) && copy_only_ccs) ? EMIT_COPY_CCS_DW : 0); bb = xe_bb_new(gt, batch_size, xe->info.has_usm); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 5ce60d161e09..4c80bac67622 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -586,11 +586,17 @@ static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt) kfree(tt); } -static bool xe_ttm_resource_visible(struct ttm_resource *mem) +static bool xe_ttm_resource_visible(struct xe_device *xe, struct ttm_resource *mem) { - struct xe_ttm_vram_mgr_resource *vres = - to_xe_ttm_vram_mgr_resource(mem); + struct xe_ttm_vram_mgr_resource *vres; + if (mem->mem_type == XE_PL_STOLEN) { + struct xe_ttm_stolen_mgr *mgr = xe->mem.stolen_mgr; + + return mgr->io_base && !xe_ttm_stolen_cpu_access_needs_ggtt(xe); + } + + vres = to_xe_ttm_vram_mgr_resource(mem); return vres->used_visible_size == mem->size; } @@ -608,7 +614,7 @@ bool xe_bo_is_visible_vram(struct xe_bo *bo) if (drm_WARN_ON(bo->ttm.base.dev, !xe_bo_is_vram(bo))) return false; - return xe_ttm_resource_visible(bo->ttm.resource); + return xe_ttm_resource_visible(xe_bo_device(bo), bo->ttm.resource); } static int xe_ttm_io_mem_reserve(struct ttm_device *bdev, @@ -624,7 +630,7 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev, case XE_PL_VRAM1: { struct xe_vram_region *vram = xe_map_resource_to_region(mem); - if (!xe_ttm_resource_visible(mem)) + if (!xe_ttm_resource_visible(xe, mem)) return -EINVAL; mem->bus.offset = mem->start << PAGE_SHIFT; @@ -884,10 +890,10 @@ void xe_bo_set_purgeable_state(struct xe_bo *bo, new_state == XE_MADV_PURGEABLE_PURGED); /* Once purged, always purged - cannot transition out */ - xe_assert(xe, !(bo->madv_purgeable == XE_MADV_PURGEABLE_PURGED && + xe_assert(xe, !(bo->purgeable.state == XE_MADV_PURGEABLE_PURGED && new_state != XE_MADV_PURGEABLE_PURGED)); - bo->madv_purgeable = new_state; + bo->purgeable.state = new_state; xe_bo_set_purgeable_shrinker(bo, new_state); } @@ -2355,7 +2361,7 @@ struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo, INIT_LIST_HEAD(&bo->vram_userfault_link); /* Initialize purge advisory state */ - bo->madv_purgeable = XE_MADV_PURGEABLE_WILLNEED; + bo->purgeable.state = XE_MADV_PURGEABLE_WILLNEED; drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size); diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 68dea7d25a6b..6340317f7d2e 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -251,7 +251,7 @@ static inline bool xe_bo_is_protected(const struct xe_bo *bo) static inline bool xe_bo_is_purged(struct xe_bo *bo) { xe_bo_assert_held(bo); - return bo->madv_purgeable == XE_MADV_PURGEABLE_PURGED; + return bo->purgeable.state == XE_MADV_PURGEABLE_PURGED; } /** @@ -268,11 +268,95 @@ static inline bool xe_bo_is_purged(struct xe_bo *bo) static inline bool xe_bo_madv_is_dontneed(struct xe_bo *bo) { xe_bo_assert_held(bo); - return bo->madv_purgeable == XE_MADV_PURGEABLE_DONTNEED; + return bo->purgeable.state == XE_MADV_PURGEABLE_DONTNEED; } void xe_bo_set_purgeable_state(struct xe_bo *bo, enum xe_madv_purgeable_state new_state); +/** + * xe_bo_willneed_get_locked() - Acquire a WILLNEED holder on a BO + * @bo: Buffer object + * + * Increments willneed_count and, on a 0->1 transition, promotes the BO + * from DONTNEED to WILLNEED. PURGED is terminal and is never modified. + * + * Caller must hold the BO's dma-resv lock. + */ +static inline void xe_bo_willneed_get_locked(struct xe_bo *bo) +{ + xe_bo_assert_held(bo); + + /* Imported BOs are owned externally; do not track purgeability. */ + if (drm_gem_is_imported(&bo->ttm.base)) + return; + + if (bo->purgeable.willneed_count++ == 0 && xe_bo_madv_is_dontneed(bo)) + xe_bo_set_purgeable_state(bo, XE_MADV_PURGEABLE_WILLNEED); +} + +/** + * xe_bo_willneed_put_locked() - Release a WILLNEED holder on a BO + * @bo: Buffer object + * + * Decrements willneed_count and, on a 1->0 transition, marks the BO + * DONTNEED only if it still has VMAs (implying all active VMAs are + * DONTNEED). If the last VMA is being removed, preserve the current BO + * state to match the previous VMA-walk semantics. + * + * PURGED is terminal and the BO state is never modified. + * + * Caller must hold the BO's dma-resv lock. + */ +static inline void xe_bo_willneed_put_locked(struct xe_bo *bo) +{ + xe_bo_assert_held(bo); + + if (drm_gem_is_imported(&bo->ttm.base)) + return; + + xe_assert(xe_bo_device(bo), bo->purgeable.willneed_count > 0); + if (--bo->purgeable.willneed_count == 0 && bo->purgeable.vma_count > 0 && + !xe_bo_is_purged(bo)) + xe_bo_set_purgeable_state(bo, XE_MADV_PURGEABLE_DONTNEED); +} + +/** + * xe_bo_vma_count_inc_locked() - Account a new VMA on a BO + * @bo: Buffer object + * + * Increments vma_count. + * + * Caller must hold the BO's dma-resv lock. + */ +static inline void xe_bo_vma_count_inc_locked(struct xe_bo *bo) +{ + xe_bo_assert_held(bo); + + if (drm_gem_is_imported(&bo->ttm.base)) + return; + + bo->purgeable.vma_count++; +} + +/** + * xe_bo_vma_count_dec_locked() - Account a VMA removal on a BO + * @bo: Buffer object + * + * Decrements vma_count. + * + * Caller must hold the BO's dma-resv lock. + */ +static inline void xe_bo_vma_count_dec_locked(struct xe_bo *bo) +{ + xe_bo_assert_held(bo); + + if (drm_gem_is_imported(&bo->ttm.base)) + return; + + xe_assert(xe_bo_device(bo), bo->purgeable.vma_count > 0); + bo->purgeable.vma_count--; +} + static inline void xe_bo_unpin_map_no_vm(struct xe_bo *bo) { if (likely(bo)) { diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 9c199badd9b2..fcc63ae3f455 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -111,10 +111,32 @@ struct xe_bo { u64 min_align; /** - * @madv_purgeable: user space advise on BO purgeability, protected - * by BO's dma-resv lock. + * @purgeable: Purgeability state and accounting. + * + * All fields are protected by the BO's dma-resv lock. */ - u32 madv_purgeable; + struct { + /** + * @purgeable.state: BO purgeability state + * (WILLNEED/DONTNEED/PURGED). + */ + u32 state; + + /** + * @purgeable.vma_count: Number of VMAs currently mapping this BO. + */ + u32 vma_count; + + /** + * @purgeable.willneed_count: Number of active WILLNEED holders. + * + * Counts WILLNEED VMAs plus active dma-buf exports for + * non-imported BOs. The BO flips to DONTNEED on a 1->0 + * transition only when VMAs still exist; if the last VMA is + * removed, the previous BO state is preserved. + */ + u32 willneed_count; + } purgeable; }; #endif diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 89437de3001a..32dd2ffbc796 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -42,6 +42,7 @@ struct xe_ggtt; struct xe_i2c; struct xe_pat_ops; struct xe_pxp; +struct xe_ttm_stolen_mgr; struct xe_vram_region; /** @@ -276,6 +277,8 @@ struct xe_device { struct ttm_resource_manager sys_mgr; /** @mem.shrinker: system memory shrinker. */ struct xe_shrinker *shrinker; + /** @mem.stolen_mgr: stolen memory manager. */ + struct xe_ttm_stolen_mgr *stolen_mgr; } mem; /** @sriov: device level virtualization data */ diff --git a/drivers/gpu/drm/xe/xe_device_wa_oob.rules b/drivers/gpu/drm/xe/xe_device_wa_oob.rules index 92371c490529..d8dc41851425 100644 --- a/drivers/gpu/drm/xe/xe_device_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_device_wa_oob.rules @@ -5,3 +5,5 @@ 14022085890 SUBPLATFORM(BATTLEMAGE, G21) 14026539277 PLATFORM(NOVALAKE_P), PLATFORM_STEP(A0, B0) 14026633728 PLATFORM(CRESCENTISLAND) +14026746987 PLATFORM(CRESCENTISLAND) +14026779378 PLATFORM(CRESCENTISLAND) diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index b9828da15897..8a920e58245c 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -193,6 +193,18 @@ static int xe_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, return 0; } +static void xe_dma_buf_release(struct dma_buf *dmabuf) +{ + struct drm_gem_object *obj = dmabuf->priv; + struct xe_bo *bo = gem_to_xe_bo(obj); + + xe_bo_lock(bo, false); + xe_bo_willneed_put_locked(bo); + xe_bo_unlock(bo); + + drm_gem_dmabuf_release(dmabuf); +} + static const struct dma_buf_ops xe_dmabuf_ops = { .attach = xe_dma_buf_attach, .detach = xe_dma_buf_detach, @@ -200,7 +212,7 @@ static const struct dma_buf_ops xe_dmabuf_ops = { .unpin = xe_dma_buf_unpin, .map_dma_buf = xe_dma_buf_map, .unmap_dma_buf = xe_dma_buf_unmap, - .release = drm_gem_dmabuf_release, + .release = xe_dma_buf_release, .begin_cpu_access = xe_dma_buf_begin_cpu_access, .mmap = drm_gem_dmabuf_mmap, .vmap = drm_gem_dmabuf_vmap, @@ -241,33 +253,33 @@ struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags) ret = -EINVAL; goto out_unlock; } + + xe_bo_willneed_get_locked(bo); xe_bo_unlock(bo); ret = ttm_bo_setup_export(&bo->ttm, &ctx); if (ret) - return ERR_PTR(ret); + goto out_put; buf = drm_gem_prime_export(obj, flags); - if (!IS_ERR(buf)) - buf->ops = &xe_dmabuf_ops; + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + goto out_put; + } + buf->ops = &xe_dmabuf_ops; return buf; +out_put: + xe_bo_lock(bo, false); + xe_bo_willneed_put_locked(bo); out_unlock: xe_bo_unlock(bo); return ERR_PTR(ret); } -/* - * Takes ownership of @storage: on success it is transferred to the returned - * drm_gem_object; on failure it is freed before returning the error. - * This matches the contract of xe_bo_init_locked() which frees @storage on - * its error paths, so callers need not (and must not) free @storage after - * this call. - */ static struct drm_gem_object * -xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, - struct dma_buf *dma_buf) +xe_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf) { struct dma_resv *resv = dma_buf->resv; struct xe_device *xe = to_xe_device(dev); @@ -278,10 +290,8 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, int ret = 0; dummy_obj = drm_gpuvm_resv_object_alloc(&xe->drm); - if (!dummy_obj) { - xe_bo_free(storage); + if (!dummy_obj) return ERR_PTR(-ENOMEM); - } dummy_obj->resv = resv; xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, ret) { @@ -290,8 +300,7 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, if (ret) break; - /* xe_bo_init_locked() frees storage on error */ - bo = xe_bo_init_locked(xe, storage, NULL, resv, NULL, dma_buf->size, + bo = xe_bo_init_locked(xe, NULL, NULL, resv, NULL, dma_buf->size, 0, /* Will require 1way or 2way for vm_bind */ ttm_bo_type_sg, XE_BO_FLAG_SYSTEM, &exec); drm_exec_retry_on_contention(&exec); @@ -342,7 +351,6 @@ struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev, const struct dma_buf_attach_ops *attach_ops; struct dma_buf_attachment *attach; struct drm_gem_object *obj; - struct xe_bo *bo; if (dma_buf->ops == &xe_dmabuf_ops) { obj = dma_buf->priv; @@ -358,13 +366,15 @@ struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev, } /* - * Don't publish the bo until we have a valid attachment, and a - * valid attachment needs the bo address. So pre-create a bo before - * creating the attachment and publish. + * This needs to happen before the attach, since it will create a new + * attachment for this, and add it to the list of attachments, at which + * point it is globally visible, and at any point the export side can + * call into on invalidate_mappings callback, which require a working + * object. */ - bo = xe_bo_alloc(); - if (IS_ERR(bo)) - return ERR_CAST(bo); + obj = xe_dma_buf_create_obj(dev, dma_buf); + if (IS_ERR(obj)) + return obj; attach_ops = &xe_dma_buf_attach_ops; #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) @@ -372,29 +382,15 @@ struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev, attach_ops = test->attach_ops; #endif - attach = dma_buf_dynamic_attach(dma_buf, dev->dev, attach_ops, &bo->ttm.base); + attach = dma_buf_dynamic_attach(dma_buf, dev->dev, attach_ops, obj); if (IS_ERR(attach)) { - obj = ERR_CAST(attach); - goto out_err; + xe_bo_put(gem_to_xe_bo(obj)); + return ERR_CAST(attach); } - /* - * xe_dma_buf_init_obj() takes ownership of bo on both success - * and failure, so we must not touch bo after this call. - */ - obj = xe_dma_buf_init_obj(dev, bo, dma_buf); - if (IS_ERR(obj)) { - dma_buf_detach(dma_buf, attach); - return obj; - } get_dma_buf(dma_buf); obj->import_attach = attach; return obj; - -out_err: - xe_bo_free(bo); - - return obj; } #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index dddcdd0bb7a3..297be3c42b20 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -44,6 +44,7 @@ struct per_xecore_buf { struct xe_eu_stall_data_stream { bool pollin; bool enabled; + bool reset_detected; int wait_num_reports; int sampling_rate_mult; wait_queue_head_t poll_wq; @@ -428,9 +429,20 @@ static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream) set_bit(xecore, stream->data_drop.mask); xecore_buf->write = write_ptr; } + /* If a GT or engine reset happens during EU stall sampling, + * all EU stall registers get reset to 0 and the cached values of + * the EU stall data buffers' read pointers are out of sync with + * the register values. This causes invalid data to be returned + * from read(). To prevent this, check the value of a EU stall base + * register. If it is zero, there has been a reset. + */ + if (unlikely(!xe_gt_mcr_unicast_read_any(gt, XEHPC_EUSTALL_BASE))) + stream->reset_detected = true; + + stream->pollin = min_data_present || stream->reset_detected; mutex_unlock(&stream->xecore_buf_lock); - return min_data_present; + return stream->pollin; } static void clear_dropped_eviction_line_bit(struct xe_gt *gt, u16 group, u16 instance) @@ -544,6 +556,15 @@ static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *st int ret = 0; mutex_lock(&stream->xecore_buf_lock); + /* If EU stall registers got reset due to a GT/engine reset, + * continuing with the read() will return invalid data to + * the user space. Just return -ENODEV instead. + */ + if (unlikely(stream->reset_detected)) { + xe_gt_dbg(gt, "EU stall base register has been reset\n"); + mutex_unlock(&stream->xecore_buf_lock); + return -ENODEV; + } if (bitmap_weight(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS)) { if (!stream->data_drop.reported_to_user) { stream->data_drop.reported_to_user = true; @@ -554,7 +575,6 @@ static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *st } stream->data_drop.reported_to_user = false; } - for_each_dss_steering(xecore, gt, group, instance) { ret = xe_eu_stall_data_buf_read(stream, buf, count, &total_size, gt, group, instance, xecore); @@ -609,7 +629,8 @@ static ssize_t xe_eu_stall_stream_read(struct file *file, char __user *buf, * We don't want to block the next read() when there is data in the buffer * now, but couldn't be accommodated in the small user buffer. */ - stream->pollin = false; + if (!stream->reset_detected) + stream->pollin = false; return ret; } @@ -692,6 +713,7 @@ static int xe_eu_stall_stream_enable(struct xe_eu_stall_data_stream *stream) xecore_buf->write = write_ptr; xecore_buf->read = write_ptr; } + stream->reset_detected = false; stream->data_drop.reported_to_user = false; bitmap_zero(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS); @@ -717,13 +739,13 @@ static void eu_stall_data_buf_poll_work_fn(struct work_struct *work) container_of(work, typeof(*stream), buf_poll_work.work); struct xe_gt *gt = stream->gt; - if (eu_stall_data_buf_poll(stream)) { - stream->pollin = true; + if (eu_stall_data_buf_poll(stream)) wake_up(&stream->poll_wq); - } - queue_delayed_work(gt->eu_stall->buf_ptr_poll_wq, - &stream->buf_poll_work, - msecs_to_jiffies(POLL_PERIOD_MS)); + + if (!stream->reset_detected) + queue_delayed_work(gt->eu_stall->buf_ptr_poll_wq, + &stream->buf_poll_work, + msecs_to_jiffies(POLL_PERIOD_MS)); } static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream, diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 071b8c41df43..1b5ca3ce578a 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -275,8 +275,12 @@ static void xe_exec_queue_set_lrc(struct xe_exec_queue *q, struct xe_lrc *lrc, u { xe_assert(gt_to_xe(q->gt), idx < q->width); - scoped_guard(spinlock, &q->lrc_lookup_lock) + scoped_guard(spinlock, &q->lrc_lookup_lock) { q->lrc[idx] = lrc; + if (xe_exec_queue_is_multi_queue(q)) + q->lrc[idx]->multi_queue.primary_lrc = + q->multi_queue.group->primary->lrc[0]; + } } /** @@ -852,11 +856,6 @@ static int xe_exec_queue_group_init(struct xe_device *xe, struct xe_exec_queue * return 0; } -static inline bool xe_exec_queue_supports_multi_queue(struct xe_exec_queue *q) -{ - return q->gt->info.multi_queue_engine_class_mask & BIT(q->class); -} - static int xe_exec_queue_group_validate(struct xe_device *xe, struct xe_exec_queue *q, u32 primary_id) { @@ -912,6 +911,7 @@ static int xe_exec_queue_group_add(struct xe_device *xe, struct xe_exec_queue *q } q->multi_queue.pos = pos; + q->lrc[0]->multi_queue.pos = pos; return 0; } @@ -931,7 +931,7 @@ static void xe_exec_queue_group_delete(struct xe_device *xe, struct xe_exec_queu static int exec_queue_set_multi_group(struct xe_device *xe, struct xe_exec_queue *q, u64 value) { - if (XE_IOCTL_DBG(xe, !xe_exec_queue_supports_multi_queue(q))) + if (XE_IOCTL_DBG(xe, !xe_gt_supports_multi_queue(q->gt, q->class))) return -ENODEV; if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe))) diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index de7e47763411..4150aa594f05 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -155,4 +155,19 @@ static inline bool xe_gt_recovery_pending(struct xe_gt *gt) xe_gt_sriov_vf_recovery_pending(gt); } +/** + * xe_gt_supports_multi_queue() - Check if gt supports multi queue for the + * specified engine class. + * + * @gt: the GT object + * @class: hwe class type + * + * Return: true if the hw engine class supports multi queue, else false + */ +static inline bool xe_gt_supports_multi_queue(const struct xe_gt *gt, + enum xe_engine_class class) +{ + return gt->info.multi_queue_engine_class_mask & BIT(class); +} + #endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c index 87a164efcc33..01fe03b9efe8 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c @@ -385,10 +385,10 @@ static int pf_migration_mmio_save(struct xe_gt *gt, unsigned int vfid, void *buf if (xe_gt_is_media_type(gt)) for (n = 0; n < MED_VF_SW_FLAG_COUNT; n++) - regs[n] = xe_mmio_read32(>->mmio, MED_VF_SW_FLAG(n)); + regs[n] = xe_mmio_read32(&mmio, MED_VF_SW_FLAG(n)); else for (n = 0; n < VF_SW_FLAG_COUNT; n++) - regs[n] = xe_mmio_read32(>->mmio, VF_SW_FLAG(n)); + regs[n] = xe_mmio_read32(&mmio, VF_SW_FLAG(n)); return 0; } @@ -407,10 +407,10 @@ static int pf_migration_mmio_restore(struct xe_gt *gt, unsigned int vfid, if (xe_gt_is_media_type(gt)) for (n = 0; n < MED_VF_SW_FLAG_COUNT; n++) - xe_mmio_write32(>->mmio, MED_VF_SW_FLAG(n), regs[n]); + xe_mmio_write32(&mmio, MED_VF_SW_FLAG(n), regs[n]); else for (n = 0; n < VF_SW_FLAG_COUNT; n++) - xe_mmio_write32(>->mmio, VF_SW_FLAG(n), regs[n]); + xe_mmio_write32(&mmio, VF_SW_FLAG(n), regs[n]); return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 7351aadd238e..e5588c88800a 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -145,6 +145,13 @@ struct xe_gt { /** @info.has_indirect_ring_state: GT has indirect ring state support */ u8 has_indirect_ring_state:1; /** + * @info.has_xe2_blt_instructions: GT supports Xe2-style MEM_SET + * and MEM_COPY blitter functionality. Note that despite the + * name, some Xe1 platforms may also support this "Xe2-style" + * feature. + */ + u8 has_xe2_blt_instructions:1; + /** * @info.num_geometry_xecore_fuse_regs: Number of 32b-bit fuse * registers the geometry XeCore mask spans. */ diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index ce651da6f318..b9bca6084a4f 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -515,12 +515,9 @@ static void guc_golden_lrc_init(struct xe_guc_ads *ads) * that starts after the execlists LRC registers. This is * required to allow the GuC to restore just the engine state * when a watchdog reset occurs. - * We calculate the engine state size by removing the size of - * what comes before it in the context image (which is identical - * on all engines). */ ads_blob_write(ads, ads.eng_state_size[guc_class], - real_size - xe_lrc_skip_size(xe)); + xe_lrc_engine_state_size(gt, class)); ads_blob_write(ads, ads.golden_context_lrca[guc_class], addr_ggtt); diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index bc49e40165a3..21f7caf9ea08 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -1841,12 +1841,6 @@ void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm str_yes_no(snapshot->kernel_reserved)); for (type = GUC_STATE_CAPTURE_TYPE_GLOBAL; type < GUC_STATE_CAPTURE_TYPE_MAX; type++) { - /* - * FIXME: During devcoredump print we should avoid accessing the - * driver pointers for gt or engine. Printing should be done only - * using the snapshot captured. Here we are accessing the gt - * pointer. It should be fixed. - */ list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF, type, capture_class, false); snapshot_print_by_list_order(snapshot, p, type, list); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index b1222b42174c..4171eff4e8ad 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -852,10 +852,27 @@ static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc, xe_guc_ct_send(&guc->ct, action, len, G2H_LEN_DW_MULTI_QUEUE_CONTEXT, 1); } -static void __register_exec_queue_group(struct xe_guc *guc, - struct xe_exec_queue *q, +static void guc_exec_queue_send_cgp_sync(struct xe_exec_queue *q) +{ +#define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2) + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_exec_queue_group *group = q->multi_queue.group; + u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE]; + int len = 0; + + action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC; + action[len++] = group->primary->guc->id; + + xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE); +#undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE + + xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); +} + +static void __register_exec_queue_group(struct xe_exec_queue *q, struct guc_ctxt_registration_info *info) { + struct xe_guc *guc = exec_queue_to_guc(q); #define MAX_MULTI_QUEUE_REG_SIZE (8) u32 action[MAX_MULTI_QUEUE_REG_SIZE]; int len = 0; @@ -880,29 +897,6 @@ static void __register_exec_queue_group(struct xe_guc *guc, xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); } -static void xe_guc_exec_queue_group_add(struct xe_guc *guc, - struct xe_exec_queue *q) -{ -#define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2) - u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE]; - int len = 0; - - xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_multi_queue_secondary(q)); - - action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC; - action[len++] = q->multi_queue.group->primary->guc->id; - - xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE); -#undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE - - /* - * The above XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC do expect a - * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response - * from guc. - */ - xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); -} - static void __register_mlrc_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q, struct guc_ctxt_registration_info *info) @@ -1028,7 +1022,7 @@ static void register_exec_queue(struct xe_exec_queue *q, int ctx_type) set_exec_queue_registered(q); trace_xe_exec_queue_register(q); if (xe_exec_queue_is_multi_queue_primary(q)) - __register_exec_queue_group(guc, q, &info); + __register_exec_queue_group(q, &info); else if (xe_exec_queue_is_parallel(q)) __register_mlrc_exec_queue(guc, q, &info); else if (!xe_exec_queue_is_multi_queue_secondary(q)) @@ -1038,7 +1032,7 @@ static void register_exec_queue(struct xe_exec_queue *q, int ctx_type) init_policies(guc, q); if (xe_exec_queue_is_multi_queue_secondary(q)) - xe_guc_exec_queue_group_add(guc, q); + guc_exec_queue_send_cgp_sync(q); } static u32 wq_space_until_wrap(struct xe_exec_queue *q) @@ -1216,10 +1210,8 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job) if (xe_exec_queue_is_multi_queue_secondary(q)) { struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); - if (exec_queue_killed_or_banned_or_wedged(primary)) { - killed_or_banned_or_wedged = true; + if (exec_queue_killed_or_banned_or_wedged(primary)) goto run_job_out; - } if (!exec_queue_registered(primary)) register_exec_queue(primary, GUC_CONTEXT_NORMAL); @@ -1889,21 +1881,8 @@ static void __guc_exec_queue_process_msg_set_multi_queue_priority(struct xe_sche { struct xe_exec_queue *q = msg->private_data; - if (guc_exec_queue_allowed_to_change_state(q)) { -#define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2) - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_exec_queue_group *group = q->multi_queue.group; - u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE]; - int len = 0; - - action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC; - action[len++] = group->primary->guc->id; - - xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE); -#undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE - - xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); - } + if (guc_exec_queue_allowed_to_change_state(q)) + guc_exec_queue_send_cgp_sync(q); kfree(msg); } diff --git a/drivers/gpu/drm/xe/xe_hw_error.c b/drivers/gpu/drm/xe/xe_hw_error.c index 2a31b430570e..5135e8e4093f 100644 --- a/drivers/gpu/drm/xe/xe_hw_error.c +++ b/drivers/gpu/drm/xe/xe_hw_error.c @@ -36,11 +36,6 @@ static const char * const hec_uncorrected_fw_errors[] = { "Data Corruption" }; -static const unsigned long xe_hw_error_map[] = { - [XE_GT_ERROR] = DRM_XE_RAS_ERR_COMP_CORE_COMPUTE, - [XE_SOC_ERROR] = DRM_XE_RAS_ERR_COMP_SOC_INTERNAL, -}; - enum gt_vector_regs { ERR_STAT_GT_VECTOR0 = 0, ERR_STAT_GT_VECTOR1, @@ -65,6 +60,18 @@ static enum drm_xe_ras_error_severity hw_err_to_severity(const enum hardware_err return DRM_XE_RAS_ERR_SEV_UNCORRECTABLE; } +static inline u32 err_src_to_id(u32 err_bit) +{ + switch (err_bit) { + case XE_GT_ERROR: + return DRM_XE_RAS_ERR_COMP_CORE_COMPUTE; + case XE_SOC_ERROR: + return DRM_XE_RAS_ERR_COMP_SOC_INTERNAL; + default: + return 0; + } +} + static const char * const pvc_master_global_err_reg[] = { [0 ... 1] = "Undefined", [2] = "HBM SS0: Channel0", @@ -169,11 +176,8 @@ static void csc_hw_error_work(struct work_struct *work) { struct xe_tile *tile = container_of(work, typeof(*tile), csc_hw_error_work); struct xe_device *xe = tile_to_xe(tile); - int ret; - ret = xe_survivability_mode_runtime_enable(xe); - if (ret) - drm_err(&xe->drm, "Failed to enable runtime survivability mode\n"); + xe_survivability_mode_runtime_enable(xe); } static void csc_hw_error_handler(struct xe_tile *tile, const enum hardware_error hw_err) @@ -459,14 +463,8 @@ static void hw_error_source_handler(struct xe_tile *tile, const enum hardware_er const char *name; u32 error_id; - /* Check error bit is within bounds */ - if (err_bit >= ARRAY_SIZE(xe_hw_error_map)) - break; - - error_id = xe_hw_error_map[err_bit]; - - /* Check error component is within max */ - if (!error_id || error_id >= DRM_XE_RAS_ERR_COMP_MAX) + error_id = err_src_to_id(err_bit); + if (!error_id) continue; name = info[error_id].name; diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 9db914584347..a4292a11391d 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -21,8 +21,10 @@ #include "xe_configfs.h" #include "xe_device.h" #include "xe_drm_client.h" +#include "xe_exec_queue.h" #include "xe_exec_queue_types.h" #include "xe_gt.h" +#include "xe_gt_clock.h" #include "xe_gt_printk.h" #include "xe_hw_fence.h" #include "xe_map.h" @@ -727,9 +729,16 @@ size_t xe_lrc_reg_size(struct xe_device *xe) return 80 * sizeof(u32); } -size_t xe_lrc_skip_size(struct xe_device *xe) +/** + * xe_lrc_engine_state_size() - Get size of the engine state within LRC + * @gt: the &xe_gt struct instance + * @class: Hardware engine class + * + * Returns: Size of the engine state + */ +size_t xe_lrc_engine_state_size(struct xe_gt *gt, enum xe_engine_class class) { - return LRC_PPHWSP_SIZE + xe_lrc_reg_size(xe); + return xe_gt_lrc_hang_replay_size(gt, class) - xe_lrc_reg_size(gt_to_xe(gt)); } static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) @@ -769,6 +778,16 @@ static u32 __xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc *lrc) return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP_UDW * sizeof(u32); } +static u32 __xe_lrc_queue_timestamp_offset(struct xe_lrc *lrc) +{ + return __xe_lrc_regs_offset(lrc) + CTX_QUEUE_TIMESTAMP * sizeof(u32); +} + +static u32 __xe_lrc_queue_timestamp_udw_offset(struct xe_lrc *lrc) +{ + return __xe_lrc_regs_offset(lrc) + CTX_QUEUE_TIMESTAMP_UDW * sizeof(u32); +} + static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) { u32 offset = xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - @@ -818,6 +837,8 @@ DECL_MAP_ADDR_HELPERS(ctx_timestamp_udw, lrc->bo) DECL_MAP_ADDR_HELPERS(parallel, lrc->bo) DECL_MAP_ADDR_HELPERS(indirect_ring, lrc->bo) DECL_MAP_ADDR_HELPERS(engine_id, lrc->bo) +DECL_MAP_ADDR_HELPERS(queue_timestamp, lrc->bo) +DECL_MAP_ADDR_HELPERS(queue_timestamp_udw, lrc->bo) #undef DECL_MAP_ADDR_HELPERS @@ -867,6 +888,29 @@ static u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc) } /** + * xe_lrc_queue_timestamp() - Read queue timestamp value + * @lrc: Pointer to the lrc. + * + * Returns: queue timestamp value + */ +static u64 xe_lrc_queue_timestamp(struct xe_lrc *lrc) +{ + struct xe_device *xe = lrc_to_xe(lrc); + struct iosys_map map; + u32 ldw, udw = 0; + + xe_assert(xe, xe_lrc_is_multi_queue(lrc)); + + map = __xe_lrc_queue_timestamp_map(lrc); + ldw = xe_map_read32(xe, &map); + + map = __xe_lrc_queue_timestamp_udw_map(lrc); + udw = xe_map_read32(xe, &map); + + return (u64)udw << 32 | ldw; +} + +/** * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address * @lrc: Pointer to the lrc. * @@ -1530,6 +1574,18 @@ static int xe_lrc_ctx_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct if (lrc_to_xe(lrc)->info.has_64bit_timestamp) xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP_UDW, 0); + /* + * Note: It's possible that this LRC may belong to an exec_queue that is + * not part of a multi-queue group. That said, it doesn't hurt to set + * this field anyways since any class that supports multi-queue will + * have these LRC fields defined. + */ + if (xe_gt_supports_multi_queue(gt, hwe->class)) { + lrc->queue_timestamp = 0; + xe_lrc_write_ctx_reg(lrc, CTX_QUEUE_TIMESTAMP, 0); + xe_lrc_write_ctx_reg(lrc, CTX_QUEUE_TIMESTAMP_UDW, 0); + } + if (xe->info.has_asid && vm) xe_lrc_write_ctx_reg(lrc, CTX_ASID, vm->usm.asid); @@ -2455,7 +2511,17 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) snapshot->replay_offset = 0; snapshot->replay_size = lrc->replay_size; snapshot->lrc_snapshot = NULL; - snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc)); + snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); + snapshot->ctx_timestamp_ms = + xe_gt_clock_interval_to_ms(lrc->gt, xe_lrc_ctx_timestamp(lrc)); + if (xe_lrc_is_multi_queue(lrc)) { + snapshot->queue_timestamp = xe_lrc_queue_timestamp(lrc); + snapshot->queue_timestamp_ms = + xe_gt_clock_interval_to_ms(lrc->gt, snapshot->queue_timestamp); + } else { + snapshot->queue_timestamp = 0; + snapshot->queue_timestamp_ms = 0; + } snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); return snapshot; } @@ -2508,7 +2574,10 @@ void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer drm_printf(p, "\tRing start: (memory) 0x%08x\n", snapshot->start); drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno); drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno); - drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp); + drm_printf(p, "\tTimestamp: 0x%016llx\n", snapshot->ctx_timestamp); + drm_printf(p, "\tTimestamp ms: %llu\n", snapshot->ctx_timestamp_ms); + drm_printf(p, "\tQueue Timestamp: 0x%016llx\n", snapshot->queue_timestamp); + drm_printf(p, "\tQueue Timestamp ms: %llu\n", snapshot->queue_timestamp_ms); drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp); if (!snapshot->lrc_snapshot) @@ -2549,17 +2618,27 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) kfree(snapshot); } -static int get_ctx_timestamp(struct xe_lrc *lrc, u32 engine_id, u64 *reg_ctx_ts) +static struct xe_hw_engine *engine_id_to_hwe(struct xe_gt *gt, u32 engine_id) { u16 class = REG_FIELD_GET(ENGINE_CLASS_ID, engine_id); u16 instance = REG_FIELD_GET(ENGINE_INSTANCE_ID, engine_id); + struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, class, instance, false); + + if (xe_gt_WARN_ONCE(gt, !hwe || xe_hw_engine_is_reserved(hwe), + "Unexpected engine class:instance %d:%d for utilization\n", + class, instance)) + return NULL; + + return hwe; +} + +static int get_ctx_timestamp(struct xe_lrc *lrc, u32 engine_id, u64 *reg_ctx_ts) +{ struct xe_hw_engine *hwe; u64 val; - hwe = xe_gt_hw_engine(lrc->gt, class, instance, false); - if (xe_gt_WARN_ONCE(lrc->gt, !hwe || xe_hw_engine_is_reserved(hwe), - "Unexpected engine class:instance %d:%d for context utilization\n", - class, instance)) + hwe = engine_id_to_hwe(lrc->gt, engine_id); + if (!hwe) return -1; if (lrc_to_xe(lrc)->info.has_64bit_timestamp) @@ -2574,66 +2653,136 @@ static int get_ctx_timestamp(struct xe_lrc *lrc, u32 engine_id, u64 *reg_ctx_ts) return 0; } -/** - * xe_lrc_timestamp() - Current ctx timestamp - * @lrc: Pointer to the lrc. - * - * Return latest ctx timestamp. With support for active contexts, the - * calculation may be slightly racy, so follow a read-again logic to ensure that - * the context is still active before returning the right timestamp. - * - * Returns: New ctx timestamp value - */ -u64 xe_lrc_timestamp(struct xe_lrc *lrc) +static u64 get_queue_timestamp(struct xe_hw_engine *hwe) { - u64 lrc_ts, reg_ts, new_ts = lrc->ctx_timestamp; - u32 engine_id; + return xe_mmio_read64_2x32(&hwe->gt->mmio, + RING_QUEUE_TIMESTAMP(hwe->mmio_base)); +} - lrc_ts = xe_lrc_ctx_timestamp(lrc); - /* CTX_TIMESTAMP mmio read is invalid on VF, so return the LRC value */ - if (IS_SRIOV_VF(lrc_to_xe(lrc))) { - new_ts = lrc_ts; - goto done; - } +static u32 get_multi_queue_active_queue_id(struct xe_hw_engine *hwe) +{ + u32 val = xe_mmio_read32(&hwe->gt->mmio, + RING_CSMQDEBUG(hwe->mmio_base)); - if (lrc_ts == CONTEXT_ACTIVE) { - engine_id = xe_lrc_engine_id(lrc); - if (!get_ctx_timestamp(lrc, engine_id, ®_ts)) - new_ts = reg_ts; + return REG_FIELD_GET(CURRENT_ACTIVE_QUEUE_ID_MASK, val); +} - /* read lrc again to ensure context is still active */ - lrc_ts = xe_lrc_ctx_timestamp(lrc); - } +static bool context_active(struct xe_lrc *lrc) +{ + return xe_lrc_ctx_timestamp(lrc) == CONTEXT_ACTIVE; +} + +static u64 xe_lrc_multi_queue_timestamp(struct xe_lrc *lrc) +{ + struct xe_device *xe = lrc_to_xe(lrc); + struct xe_lrc *primary_lrc = lrc->multi_queue.primary_lrc; + struct xe_hw_engine *hwe; + u64 reg_queue_ts = lrc->queue_timestamp; + + if (IS_SRIOV_VF(xe)) + return xe_lrc_queue_timestamp(lrc); + + xe_assert(xe, primary_lrc); + + /* WA BB populates CONTEXT_ACTIVE cookie for primary context only */ + if (!context_active(primary_lrc)) + return xe_lrc_queue_timestamp(lrc); + + /* WA BB populates engine id in PPHWSP of primary context only */ + hwe = engine_id_to_hwe(primary_lrc->gt, xe_lrc_engine_id(primary_lrc)); + if (!hwe) + return xe_lrc_queue_timestamp(lrc); + + if (get_multi_queue_active_queue_id(hwe) != lrc->multi_queue.pos) + return xe_lrc_queue_timestamp(lrc); + + /* queue is active, so store the queue timestamp register */ + reg_queue_ts = get_queue_timestamp(hwe); + + /* double check queue and primary queue are both still active */ + if (get_multi_queue_active_queue_id(hwe) != lrc->multi_queue.pos || + !context_active(primary_lrc)) + return xe_lrc_queue_timestamp(lrc); + + return reg_queue_ts; +} + +static u64 xe_lrc_update_multi_queue_timestamp(struct xe_lrc *lrc, u64 *old_ts) +{ + *old_ts = lrc->queue_timestamp; + lrc->queue_timestamp = xe_lrc_multi_queue_timestamp(lrc); + + trace_xe_lrc_update_queue_timestamp(lrc, *old_ts); + + return lrc->queue_timestamp; +} + +static u64 xe_lrc_context_timestamp(struct xe_lrc *lrc) +{ + u64 reg_ts, new_ts = lrc->ctx_timestamp; + + /* CTX_TIMESTAMP mmio read is invalid on VF, so return the LRC value */ + if (IS_SRIOV_VF(lrc_to_xe(lrc))) + return xe_lrc_ctx_timestamp(lrc); + + if (context_active(lrc) && + !get_ctx_timestamp(lrc, xe_lrc_engine_id(lrc), ®_ts)) + new_ts = reg_ts; /* - * If context switched out, just use the lrc_ts. Note that this needs to - * be a separate if condition. + * If context swicthed out while we were here, just return the latest + * LRC CTX TIMESTAMP value. */ - if (lrc_ts != CONTEXT_ACTIVE) - new_ts = lrc_ts; + if (!context_active(lrc)) + return xe_lrc_ctx_timestamp(lrc); -done: return new_ts; } +static u64 xe_lrc_update_context_timestamp(struct xe_lrc *lrc, u64 *old_ts) +{ + *old_ts = lrc->ctx_timestamp; + lrc->ctx_timestamp = xe_lrc_context_timestamp(lrc); + + trace_xe_lrc_update_timestamp(lrc, *old_ts); + + return lrc->ctx_timestamp; +} + +/** + * xe_lrc_timestamp() - Current lrc timestamp + * @lrc: Pointer to the lrc. + * + * Return latest lrc timestamp. With support for active contexts/queues, the + * calculation may be slightly racy, so follow a read-again logic to ensure that + * the context/queue is still active before returning the right timestamp. + * + * Returns: New lrc timestamp value + */ +u64 xe_lrc_timestamp(struct xe_lrc *lrc) +{ + if (xe_lrc_is_multi_queue(lrc)) + return xe_lrc_multi_queue_timestamp(lrc); + else + return xe_lrc_context_timestamp(lrc); +} + /** - * xe_lrc_update_timestamp() - Update ctx timestamp + * xe_lrc_update_timestamp() - Update lrc timestamp * @lrc: Pointer to the lrc. * @old_ts: Old timestamp value * - * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and + * Populate @old_ts with current saved lrc timestamp, read new lrc timestamp and * update saved value. * - * Returns: New ctx timestamp value + * Returns: New lrc timestamp value */ u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts) { - *old_ts = lrc->ctx_timestamp; - lrc->ctx_timestamp = xe_lrc_timestamp(lrc); - - trace_xe_lrc_update_timestamp(lrc, *old_ts); - - return lrc->ctx_timestamp; + if (xe_lrc_is_multi_queue(lrc)) + return xe_lrc_update_multi_queue_timestamp(lrc, old_ts); + else + return xe_lrc_update_context_timestamp(lrc, old_ts); } /** diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index e7c975f9e2d9..0a3a611391ee 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -37,7 +37,10 @@ struct xe_lrc_snapshot { } tail; u32 start_seqno; u32 seqno; - u32 ctx_timestamp; + u64 ctx_timestamp; + u64 ctx_timestamp_ms; + u64 queue_timestamp; + u64 queue_timestamp_ms; u32 ctx_job_timestamp; }; @@ -90,6 +93,11 @@ static inline size_t xe_lrc_ring_size(void) return SZ_16K; } +static inline bool xe_lrc_is_multi_queue(struct xe_lrc *lrc) +{ + return lrc->multi_queue.primary_lrc; +} + size_t xe_gt_lrc_hang_replay_size(struct xe_gt *gt, enum xe_engine_class class); size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class); u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc); @@ -130,7 +138,7 @@ u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc); struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc); size_t xe_lrc_reg_size(struct xe_device *xe); -size_t xe_lrc_skip_size(struct xe_device *xe); +size_t xe_lrc_engine_state_size(struct xe_gt *gt, enum xe_engine_class class); void xe_lrc_dump_default(struct drm_printer *p, struct xe_gt *gt, diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index 5a718f759ed6..53ef48feebfc 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -63,6 +63,17 @@ struct xe_lrc { /** @ctx_timestamp: readout value of CTX_TIMESTAMP on last update */ u64 ctx_timestamp; + + /** @queue_timestamp: value of QUEUE_TIMESTAMP on last update */ + u64 queue_timestamp; + + /** @multi_queue: Multi queue LRC related information */ + struct { + /** @multi_queue.primary_lrc: Primary lrc of this multi-queue group*/ + struct xe_lrc *primary_lrc; + /** @multi_queue.pos: Position of LRC within the multi-queue group */ + u8 pos; + } multi_queue; }; struct xe_lrc_snapshot; diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c index 811e07136efb..3848ff81c1f9 100644 --- a/drivers/gpu/drm/xe/xe_memirq.c +++ b/drivers/gpu/drm/xe/xe_memirq.c @@ -212,7 +212,11 @@ out: static void memirq_set_enable(struct xe_memirq *memirq, bool enable) { - iosys_map_wr(&memirq->mask, 0, u32, enable ? GENMASK(15, 0) : 0); + /* + * We only care about the GT_MI_USER_INTERRUPT from the engines and + * the GuC does not look at the ENABLE mask at all. + */ + iosys_map_wr(&memirq->mask, 0, u32, enable ? GT_MI_USER_INTERRUPT : 0); memirq->enabled = enable; } @@ -427,13 +431,25 @@ static bool memirq_received(struct xe_memirq *memirq, struct iosys_map *vector, return __memirq_received(memirq, vector, offset, name, true); } +static void memirq_assume_received(struct xe_memirq *memirq, const char *source, + u16 offset, const char *status) +{ + memirq_debug(memirq, "ASSUME %s %s(%u)\n", source, status, offset); +} + static void memirq_dispatch_engine(struct xe_memirq *memirq, struct iosys_map *status, struct xe_hw_engine *hwe) { memirq_debug(memirq, "STATUS %s %*ph\n", hwe->name, 16, status->vaddr); - if (memirq_received(memirq, status, ilog2(GT_MI_USER_INTERRUPT), hwe->name)) - xe_hw_engine_handle_irq(hwe, GT_MI_USER_INTERRUPT); + /* + * The programming note says to assume that GT_MI_USER_INTERRUPT is always + * set. Check and clear related status byte just for a debug. + */ + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEMIRQ) && + !memirq_received(memirq, status, ilog2(GT_MI_USER_INTERRUPT), hwe->name)) + memirq_assume_received(memirq, hwe->name, ilog2(GT_MI_USER_INTERRUPT), "USER"); + xe_hw_engine_handle_irq(hwe, GT_MI_USER_INTERRUPT); } static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *status, @@ -443,8 +459,14 @@ static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *stat memirq_debug(memirq, "STATUS %s %*ph\n", name, 16, status->vaddr); - if (memirq_received(memirq, status, ilog2(GUC_INTR_GUC2HOST), name)) - xe_guc_irq_handler(guc, GUC_INTR_GUC2HOST); + /* + * The programming note says to assume that GUC_INTR_GUC2HOST is always + * set. Check and clear related status byte just for a debug. + */ + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEMIRQ) && + !memirq_received(memirq, status, ilog2(GUC_INTR_GUC2HOST), name)) + memirq_assume_received(memirq, name, ilog2(GUC_INTR_GUC2HOST), "GUC2HOST"); + xe_guc_irq_handler(guc, GUC_INTR_GUC2HOST); /* * This is a software interrupt that must be cleared after it's consumed diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index a87fbc1e9fb1..9428dd5e7760 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -728,7 +728,22 @@ static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb, bb->len = cs - bb->cs; } -#define EMIT_COPY_DW 10 +static u32 blt_fast_copy_cmd_len(struct xe_device *xe) +{ + return 10; +} + +static u32 blt_mem_copy_cmd_len(struct xe_device *xe) +{ + return 10; +} + +static u32 emit_copy_cmd_len(struct xe_device *xe) +{ + return (xe->info.has_mem_copy_instr) ? blt_mem_copy_cmd_len(xe) : + blt_fast_copy_cmd_len(xe); +} + static void emit_xy_fast_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, u64 dst_ofs, unsigned int size, unsigned int pitch) @@ -736,6 +751,7 @@ static void emit_xy_fast_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, struct xe_device *xe = gt_to_xe(gt); u32 mocs = 0; u32 tile_y = 0; + u32 len; xe_gt_assert(gt, !(pitch & 3)); xe_gt_assert(gt, size / pitch <= S16_MAX); @@ -748,7 +764,8 @@ static void emit_xy_fast_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, if (GRAPHICS_VERx100(xe) >= 1250) tile_y = XY_FAST_COPY_BLT_D1_SRC_TILE4 | XY_FAST_COPY_BLT_D1_DST_TILE4; - bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (10 - 2); + len = blt_fast_copy_cmd_len(xe); + bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (len - 2); bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch | tile_y | mocs; bb->cs[bb->len++] = 0; bb->cs[bb->len++] = (size / pitch) << 16 | pitch / 4; @@ -765,6 +782,7 @@ static void emit_mem_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, u64 dst_ofs, unsigned int size, unsigned int pitch) { u32 mode, copy_type, width; + u32 len; xe_gt_assert(gt, IS_ALIGNED(size, pitch)); xe_gt_assert(gt, pitch <= U16_MAX); @@ -790,7 +808,9 @@ static void emit_mem_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, xe_gt_assert(gt, width <= U16_MAX); - bb->cs[bb->len++] = MEM_COPY_CMD | mode | copy_type; + len = blt_mem_copy_cmd_len(gt_to_xe(gt)); + + bb->cs[bb->len++] = MEM_COPY_CMD | mode | copy_type | (len - 2); bb->cs[bb->len++] = width - 1; bb->cs[bb->len++] = size / pitch - 1; /* ignored by hw for page-copy/linear above */ bb->cs[bb->len++] = pitch - 1; @@ -967,7 +987,7 @@ static struct dma_fence *__xe_migrate_copy(struct xe_migrate *m, } /* Add copy commands size here */ - batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) + + batch_size += ((copy_only_ccs) ? 0 : emit_copy_cmd_len(xe)) + ((needs_ccs_emit ? EMIT_COPY_CCS_DW : 0)); bb = xe_bb_new(gt, batch_size, usm); @@ -1406,7 +1426,7 @@ struct dma_fence *xe_migrate_vram_copy_chunk(struct xe_bo *vram_bo, u64 vram_off batch_size += pte_update_size(m, 0, sysmem, &sysmem_it, &vram_L0, &sysmem_L0_ofs, &sysmem_L0_pt, 0, avail_pts, avail_pts); - batch_size += EMIT_COPY_DW; + batch_size += emit_copy_cmd_len(xe); bb = xe_bb_new(gt, batch_size, usm); if (IS_ERR(bb)) { @@ -1461,12 +1481,17 @@ struct dma_fence *xe_migrate_vram_copy_chunk(struct xe_bo *vram_bo, u64 vram_off return fence; } +static u32 blt_mem_set_cmd_len(struct xe_device *xe) +{ + return 7; +} + static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, u32 size, u32 pitch) { struct xe_device *xe = gt_to_xe(gt); u32 *cs = bb->cs + bb->len; - u32 len = PVC_MEM_SET_CMD_LEN_DW; + u32 len = blt_mem_set_cmd_len(xe); *cs++ = PVC_MEM_SET_CMD | PVC_MEM_SET_MATRIX | (len - 2); *cs++ = pitch - 1; @@ -1484,15 +1509,21 @@ static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs bb->len += len; } +static u32 blt_fast_color_cmd_len(struct xe_device *xe) +{ + if (GRAPHICS_VERx100(xe) >= 1250) + return 16; + else + return 11; +} + static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, u32 size, u32 pitch, bool is_vram) { struct xe_device *xe = gt_to_xe(gt); u32 *cs = bb->cs + bb->len; - u32 len = XY_FAST_COLOR_BLT_DW; + u32 len = blt_fast_color_cmd_len(xe); - if (GRAPHICS_VERx100(xe) < 1250) - len = 11; *cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 | (len - 2); @@ -1525,32 +1556,20 @@ static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb, bb->len += len; } -static bool has_service_copy_support(struct xe_gt *gt) -{ - /* - * What we care about is whether the architecture was designed with - * service copy functionality (specifically the new MEM_SET / MEM_COPY - * instructions) so check the architectural engine list rather than the - * actual list since these instructions are usable on BCS0 even if - * all of the actual service copy engines (BCS1-BCS8) have been fused - * off. - */ - return gt->info.engine_mask & GENMASK(XE_HW_ENGINE_BCS8, - XE_HW_ENGINE_BCS1); -} - static u32 emit_clear_cmd_len(struct xe_gt *gt) { - if (has_service_copy_support(gt)) - return PVC_MEM_SET_CMD_LEN_DW; + struct xe_device *xe = gt_to_xe(gt); + + if (gt->info.has_xe2_blt_instructions) + return blt_mem_set_cmd_len(xe); else - return XY_FAST_COLOR_BLT_DW; + return blt_fast_color_cmd_len(xe); } static void emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, u32 size, u32 pitch, bool is_vram) { - if (has_service_copy_support(gt)) + if (gt->info.has_xe2_blt_instructions) emit_clear_link_copy(gt, bb, src_ofs, size, pitch); else emit_clear_main_copy(gt, bb, src_ofs, size, pitch, @@ -2217,7 +2236,7 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, xe_assert(xe, npages * PAGE_SIZE <= MAX_PREEMPTDISABLE_TRANSFER); batch_size += pte_update_cmd_size(npages << PAGE_SHIFT); - batch_size += EMIT_COPY_DW; + batch_size += emit_copy_cmd_len(xe); bb = xe_bb_new(gt, batch_size, use_usm_batch); if (IS_ERR(bb)) { diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 5de5bf19240a..7c9071abb44f 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -1934,16 +1934,21 @@ static u64 oa_exponent_to_ns(struct xe_gt *gt, int exponent) return div_u64(nom + den - 1, den); } -static bool oa_unit_supports_oa_format(struct xe_oa_open_param *param, int type) +static bool oa_unit_supports_oa_format(struct xe_oa *oa, struct xe_oa_open_param *param) { + const struct xe_oa_format *f = &oa->oa_formats[param->oa_format]; + switch (param->oa_unit->type) { case DRM_XE_OA_UNIT_TYPE_OAG: - return type == DRM_XE_OA_FMT_TYPE_OAG || type == DRM_XE_OA_FMT_TYPE_OAR || - type == DRM_XE_OA_FMT_TYPE_OAC || type == DRM_XE_OA_FMT_TYPE_PEC; + return f->type == DRM_XE_OA_FMT_TYPE_OAG || f->type == DRM_XE_OA_FMT_TYPE_OAR || + f->type == DRM_XE_OA_FMT_TYPE_OAC || f->type == DRM_XE_OA_FMT_TYPE_PEC; + case DRM_XE_OA_UNIT_TYPE_MERT: + if (XE_DEVICE_WA(oa->xe, 14026746987)) + return param->oa_format == XE_OAM_FORMAT_MPEC8u32_B8_C8; + fallthrough; case DRM_XE_OA_UNIT_TYPE_OAM: case DRM_XE_OA_UNIT_TYPE_OAM_SAG: - case DRM_XE_OA_UNIT_TYPE_MERT: - return type == DRM_XE_OA_FMT_TYPE_OAM || type == DRM_XE_OA_FMT_TYPE_OAM_MPEC; + return f->type == DRM_XE_OA_FMT_TYPE_OAM || f->type == DRM_XE_OA_FMT_TYPE_OAM_MPEC; default: return false; } @@ -2083,8 +2088,7 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f goto err_exec_q; f = &oa->oa_formats[param.oa_format]; - if (!param.oa_format || !f->size || - !oa_unit_supports_oa_format(¶m, f->type)) { + if (!param.oa_format || !f->size || !oa_unit_supports_oa_format(oa, ¶m)) { drm_dbg(&oa->xe->drm, "Invalid OA format %d type %d size %d for class %d\n", param.oa_format, f->type, f->size, param.hwe->class); ret = -EINVAL; @@ -2245,15 +2249,19 @@ static bool xe_oa_is_valid_mux_addr(struct xe_oa *oa, u32 addr) return xe_oa_reg_in_range_table(addr, gen12_oa_mux_regs); } -static bool xe_oa_is_valid_config_reg_addr(struct xe_oa *oa, u32 addr) +static bool xe_oa_is_valid_config_reg(struct xe_oa *oa, u32 addr, u32 val) { + if (XE_DEVICE_WA(oa->xe, 14026779378) && + addr == SYS_MEM_LAT_MEASURE.addr && val & SYS_MEM_LAT_MEASURE_EN) + return false; + return xe_oa_is_valid_flex_addr(oa, addr) || xe_oa_is_valid_b_counter_addr(oa, addr) || xe_oa_is_valid_mux_addr(oa, addr); } static struct xe_oa_reg * -xe_oa_alloc_regs(struct xe_oa *oa, bool (*is_valid)(struct xe_oa *oa, u32 addr), +xe_oa_alloc_regs(struct xe_oa *oa, bool (*is_valid)(struct xe_oa *oa, u32 addr, u32 val), u32 __user *regs, u32 n_regs) { struct xe_oa_reg *oa_regs; @@ -2271,16 +2279,16 @@ xe_oa_alloc_regs(struct xe_oa *oa, bool (*is_valid)(struct xe_oa *oa, u32 addr), if (err) goto addr_err; - if (!is_valid(oa, addr)) { - drm_dbg(&oa->xe->drm, "Invalid oa_reg address: %X\n", addr); - err = -EINVAL; - goto addr_err; - } - err = get_user(value, regs + 1); if (err) goto addr_err; + if (!is_valid(oa, addr, value)) { + drm_dbg(&oa->xe->drm, "Invalid oa_reg addr/value: %#x %#x\n", addr, value); + err = -EINVAL; + goto addr_err; + } + oa_regs[i].addr = XE_REG(addr); oa_regs[i].value = value; @@ -2379,7 +2387,7 @@ int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *fi memcpy(oa_config->uuid, arg->uuid, sizeof(arg->uuid)); oa_config->regs_len = arg->n_regs; - regs = xe_oa_alloc_regs(oa, xe_oa_is_valid_config_reg_addr, + regs = xe_oa_alloc_regs(oa, xe_oa_is_valid_config_reg, u64_to_user_ptr(arg->regs_ptr), arg->n_regs); if (IS_ERR(regs)) { diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 41435f84aeb2..12d3be7f9f6c 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -850,6 +850,15 @@ static struct xe_gt *alloc_primary_gt(struct xe_tile *tile, gt->info.num_compute_xecore_fuse_regs = graphics_desc->num_compute_xecore_fuse_regs; /* + * Even if the service copy engines wind up being fused off, their + * presence in the IP descriptor indicates that the platform supports + * Xe2-style MEM_SET and MEM_COPY functionality. + */ + if (graphics_desc->hw_engine_mask & GENMASK(XE_HW_ENGINE_BCS8, + XE_HW_ENGINE_BCS1)) + gt->info.has_xe2_blt_instructions = true; + + /* * Before media version 13, the media IP was part of the primary GT * so we need to add the media engines to the primary GT's engine list. */ diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 8cc313182968..fb65940848d7 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -9,6 +9,7 @@ #include "regs/xe_gt_regs.h" #include "regs/xe_oa_regs.h" #include "xe_device.h" +#include "xe_gt.h" #include "xe_gt_types.h" #include "xe_gt_printk.h" #include "xe_platform_types.h" @@ -33,6 +34,13 @@ static bool match_has_mert(const struct xe_device *xe, return xe_device_has_mert((struct xe_device *)xe); } +static bool match_multi_queue_class(const struct xe_device *xe, + const struct xe_gt *gt, + const struct xe_hw_engine *hwe) +{ + return xe_gt_supports_multi_queue(gt, hwe->class); +} + static const struct xe_rtp_entry_sr register_whitelist[] = { { XE_RTP_NAME("WaAllowPMDepthAndInvocationCountAccessFromUMD, 1408556865"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), @@ -54,6 +62,12 @@ static const struct xe_rtp_entry_sr register_whitelist[] = { RING_FORCE_TO_NONPRIV_ACCESS_RD, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, + { XE_RTP_NAME("allow_read_queue_timestamp"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3500, 3511), FUNC(match_multi_queue_class)), + XE_RTP_ACTIONS(WHITELIST(RING_QUEUE_TIMESTAMP(0), + RING_FORCE_TO_NONPRIV_ACCESS_RD, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, { XE_RTP_NAME("16014440446"), XE_RTP_RULES(PLATFORM(PVC)), XE_RTP_ACTIONS(WHITELIST(XE_REG(0x4400), diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h index 5f4ab08c0686..0522caafd89d 100644 --- a/drivers/gpu/drm/xe/xe_res_cursor.h +++ b/drivers/gpu/drm/xe/xe_res_cursor.h @@ -101,7 +101,15 @@ static inline void xe_res_first(struct ttm_resource *res, cur->mem_type = res->mem_type; switch (cur->mem_type) { - case XE_PL_STOLEN: + case XE_PL_STOLEN: { + /* res->start is in pages (ttm_range_manager). */ + cur->start = (res->start << PAGE_SHIFT) + start; + cur->size = size; + cur->remaining = size; + cur->node = NULL; + cur->mm = NULL; + break; + } case XE_PL_VRAM0: case XE_PL_VRAM1: { struct gpu_buddy_block *block; @@ -289,6 +297,10 @@ static inline void xe_res_next(struct xe_res_cursor *cur, u64 size) switch (cur->mem_type) { case XE_PL_STOLEN: + /* Just advance within the contiguous region. */ + cur->start += size; + cur->size = cur->remaining; + break; case XE_PL_VRAM0: case XE_PL_VRAM1: start = size - cur->size; diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index cfeb4fc7d217..39a670e91ba7 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -269,8 +269,12 @@ static u32 get_ppgtt_flag(struct xe_sched_job *job) static int emit_copy_timestamp(struct xe_device *xe, struct xe_lrc *lrc, u32 *dw, int i) { + const struct xe_reg reg = xe_lrc_is_multi_queue(lrc) ? + RING_QUEUE_TIMESTAMP(0) : + RING_CTX_TIMESTAMP(0); + dw[i++] = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET; - dw[i++] = RING_CTX_TIMESTAMP(0).addr; + dw[i++] = reg.addr; dw[i++] = xe_lrc_ctx_job_timestamp_ggtt_addr(lrc); dw[i++] = 0; @@ -281,7 +285,7 @@ static int emit_copy_timestamp(struct xe_device *xe, struct xe_lrc *lrc, if (IS_SRIOV_VF(xe)) { dw[i++] = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET; - dw[i++] = RING_CTX_TIMESTAMP(0).addr; + dw[i++] = reg.addr; dw[i++] = xe_lrc_ctx_timestamp_ggtt_addr(lrc); dw[i++] = 0; } diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_sriov_pf_migration.c index 6c4b16409cc9..150a241110fb 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_migration.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_migration.c @@ -149,10 +149,11 @@ pf_migration_consume(struct xe_device *xe, unsigned int vfid) for_each_gt(gt, xe, gt_id) { data = xe_gt_sriov_pf_migration_save_consume(gt, vfid); - if (data && PTR_ERR(data) != EAGAIN) + if (!data) + continue; + if (!IS_ERR(data) || PTR_ERR(data) != -EAGAIN) return data; - if (PTR_ERR(data) == -EAGAIN) - more_data = true; + more_data = true; } if (!more_data) diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c index db64cac39c94..427afd144f3a 100644 --- a/drivers/gpu/drm/xe/xe_survivability_mode.c +++ b/drivers/gpu/drm/xe/xe_survivability_mode.c @@ -396,25 +396,21 @@ bool xe_survivability_mode_is_requested(struct xe_device *xe) * Runtime survivability mode is enabled when certain errors cause the device to be * in non-recoverable state. The device is declared wedged with the appropriate * recovery method and survivability mode sysfs exposed to userspace - * - * Return: 0 if runtime survivability mode is enabled, negative error code otherwise. */ -int xe_survivability_mode_runtime_enable(struct xe_device *xe) +void xe_survivability_mode_runtime_enable(struct xe_device *xe) { struct xe_survivability *survivability = &xe->survivability; struct pci_dev *pdev = to_pci_dev(xe->drm.dev); - int ret; if (!IS_DGFX(xe) || IS_SRIOV_VF(xe) || xe->info.platform < XE_BATTLEMAGE) { dev_err(&pdev->dev, "Runtime Survivability Mode not supported\n"); - return -EINVAL; + return; } populate_survivability_info(xe); - ret = create_survivability_sysfs(pdev); - if (ret) - dev_err(&pdev->dev, "Failed to create survivability mode sysfs\n"); + if (create_survivability_sysfs(pdev)) + dev_err(&pdev->dev, "Failed to create survivability sysfs\n"); survivability->type = XE_SURVIVABILITY_TYPE_RUNTIME; dev_err(&pdev->dev, "Runtime Survivability mode enabled\n"); @@ -422,8 +418,6 @@ int xe_survivability_mode_runtime_enable(struct xe_device *xe) xe_device_set_wedged_method(xe, DRM_WEDGE_RECOVERY_VENDOR); xe_device_declare_wedged(xe); dev_err(&pdev->dev, "Firmware flash required, Please refer to the userspace documentation for more details!\n"); - - return 0; } /** diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.h b/drivers/gpu/drm/xe/xe_survivability_mode.h index 1cc94226aa82..cd040e4d18bb 100644 --- a/drivers/gpu/drm/xe/xe_survivability_mode.h +++ b/drivers/gpu/drm/xe/xe_survivability_mode.h @@ -11,7 +11,7 @@ struct xe_device; int xe_survivability_mode_boot_enable(struct xe_device *xe); -int xe_survivability_mode_runtime_enable(struct xe_device *xe); +void xe_survivability_mode_runtime_enable(struct xe_device *xe); bool xe_survivability_mode_is_boot_enabled(struct xe_device *xe); bool xe_survivability_mode_is_requested(struct xe_device *xe); diff --git a/drivers/gpu/drm/xe/xe_tile_types.h b/drivers/gpu/drm/xe/xe_tile_types.h index 33932fd547d7..0048100ccb72 100644 --- a/drivers/gpu/drm/xe/xe_tile_types.h +++ b/drivers/gpu/drm/xe/xe_tile_types.h @@ -106,8 +106,6 @@ struct xe_tile { struct xe_lmtt lmtt; } pf; struct { - /** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */ - struct xe_ggtt_node *ggtt_balloon[2]; /** @sriov.vf.self_config: VF configuration data */ struct xe_tile_sriov_vf_selfconfig self_config; } vf; diff --git a/drivers/gpu/drm/xe/xe_trace_lrc.h b/drivers/gpu/drm/xe/xe_trace_lrc.h index d525cbee1e34..5c4cfa0c1fe9 100644 --- a/drivers/gpu/drm/xe/xe_trace_lrc.h +++ b/drivers/gpu/drm/xe/xe_trace_lrc.h @@ -12,6 +12,7 @@ #include <linux/tracepoint.h> #include <linux/types.h> +#include "xe_exec_queue_types.h" #include "xe_gt_types.h" #include "xe_lrc.h" #include "xe_lrc_types.h" @@ -42,6 +43,32 @@ TRACE_EVENT(xe_lrc_update_timestamp, __get_str(device_id)) ); +TRACE_EVENT(xe_lrc_update_queue_timestamp, + TP_PROTO(struct xe_lrc *lrc, uint64_t old), + TP_ARGS(lrc, old), + TP_STRUCT__entry( + __field(struct xe_lrc *, lrc) + __field(struct xe_lrc *, primary_lrc) + __field(u64, old) + __field(u64, new) + __string(name, lrc->fence_ctx.name) + __string(device_id, __dev_name_lrc(lrc)) + ), + + TP_fast_assign( + __entry->lrc = lrc; + __entry->primary_lrc = lrc->multi_queue.primary_lrc; + __entry->old = old; + __entry->new = lrc->queue_timestamp; + __assign_str(name); + __assign_str(device_id); + ), + TP_printk("lrc=%p primary_lrc=%p lrc->name=%s old=%llu new=%llu device_id:%s", + __entry->lrc, __entry->primary_lrc, __get_str(name), + __entry->old, __entry->new, + __get_str(device_id)) +); + #endif /* This part must be outside protection */ diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index 27c9d72222cf..5e9070739e65 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -19,30 +19,11 @@ #include "xe_device.h" #include "xe_gt_printk.h" #include "xe_mmio.h" -#include "xe_res_cursor.h" #include "xe_sriov.h" #include "xe_ttm_stolen_mgr.h" -#include "xe_ttm_vram_mgr.h" #include "xe_vram.h" #include "xe_wa.h" -struct xe_ttm_stolen_mgr { - struct xe_ttm_vram_mgr base; - - /* PCI base offset */ - resource_size_t io_base; - /* GPU base offset */ - resource_size_t stolen_base; - - void __iomem *mapping; -}; - -static inline struct xe_ttm_stolen_mgr * -to_stolen_mgr(struct ttm_resource_manager *man) -{ - return container_of(man, struct xe_ttm_stolen_mgr, base.manager); -} - /** * xe_ttm_stolen_cpu_access_needs_ggtt() - If we can't directly CPU access * stolen, can we then fallback to mapping through the GGTT. @@ -210,12 +191,19 @@ static u64 detect_stolen(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) #endif } +static void xe_ttm_stolen_mgr_fini(struct drm_device *dev, void *arg) +{ + struct xe_device *xe = to_xe_device(dev); + + ttm_range_man_fini_nocheck(&xe->ttm, XE_PL_STOLEN); +} + int xe_ttm_stolen_mgr_init(struct xe_device *xe) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); struct xe_ttm_stolen_mgr *mgr; u64 stolen_size, io_size; - int err; + int ret; mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL); if (!mgr) @@ -244,12 +232,12 @@ int xe_ttm_stolen_mgr_init(struct xe_device *xe) if (mgr->io_base && !xe_ttm_stolen_cpu_access_needs_ggtt(xe)) io_size = stolen_size; - err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size, - io_size, PAGE_SIZE); - if (err) { - drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err); - return err; - } + ret = ttm_range_man_init_nocheck(&xe->ttm, XE_PL_STOLEN, false, + stolen_size >> PAGE_SHIFT); + if (ret) + return ret; + + xe->mem.stolen_mgr = mgr; drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n", stolen_size); @@ -257,36 +245,32 @@ int xe_ttm_stolen_mgr_init(struct xe_device *xe) if (io_size) mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, io_size); - return 0; + return drmm_add_action_or_reset(&xe->drm, xe_ttm_stolen_mgr_fini, mgr); } u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset) { struct xe_device *xe = xe_bo_device(bo); - struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN); - struct xe_ttm_stolen_mgr *mgr = to_stolen_mgr(ttm_mgr); - struct xe_res_cursor cur; + struct xe_ttm_stolen_mgr *mgr = xe->mem.stolen_mgr; XE_WARN_ON(!mgr->io_base); if (xe_ttm_stolen_cpu_access_needs_ggtt(xe)) return mgr->io_base + xe_bo_ggtt_addr(bo) + offset; - xe_res_first(bo->ttm.resource, offset, 4096, &cur); - return mgr->io_base + cur.start; + /* Range allocator: res->start is in pages. */ + return mgr->io_base + (bo->ttm.resource->start << PAGE_SHIFT) + offset; } static int __xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr, struct ttm_resource *mem) { - struct xe_res_cursor cur; - if (!mgr->io_base) return -EIO; - xe_res_first(mem, 0, 4096, &cur); - mem->bus.offset = cur.start; + /* Range allocator always produces contiguous allocations. */ + mem->bus.offset = mem->start << PAGE_SHIFT; drm_WARN_ON(&xe->drm, !(mem->placement & TTM_PL_FLAG_CONTIGUOUS)); @@ -329,8 +313,7 @@ static int __xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device *xe, int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem) { - struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN); - struct xe_ttm_stolen_mgr *mgr = ttm_mgr ? to_stolen_mgr(ttm_mgr) : NULL; + struct xe_ttm_stolen_mgr *mgr = xe->mem.stolen_mgr; if (!mgr || !mgr->io_base) return -EIO; @@ -343,8 +326,5 @@ int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem) u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe) { - struct xe_ttm_stolen_mgr *mgr = - to_stolen_mgr(ttm_manager_type(&xe->ttm, XE_PL_STOLEN)); - - return mgr->stolen_base; + return xe->mem.stolen_mgr->stolen_base; } diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h index 8e877d1e839b..0675106d535b 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h @@ -12,6 +12,18 @@ struct ttm_resource; struct xe_bo; struct xe_device; +/** + * struct xe_ttm_stolen_mgr - Xe TTM stolen memory manager + */ +struct xe_ttm_stolen_mgr { + /** @io_base: PCI base offset for CPU I/O access */ + resource_size_t io_base; + /** @stolen_base: GPU base offset */ + resource_size_t stolen_base; + /** @mapping: I/O memory mapping for CPU access */ + void __iomem *mapping; +}; + int xe_ttm_stolen_mgr_init(struct xe_device *xe); int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem); bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe); diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index 9f67df646955..b518f7dec680 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -299,14 +299,13 @@ int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr, u64 default_page_size) { struct ttm_resource_manager *man = &mgr->manager; + const char *name; int err; - if (mem_type != XE_PL_STOLEN) { - const char *name = mem_type == XE_PL_VRAM0 ? "vram0" : "vram1"; - man->cg = drmm_cgroup_register_region(&xe->drm, name, size); - if (IS_ERR(man->cg)) - return PTR_ERR(man->cg); - } + name = mem_type == XE_PL_VRAM0 ? "vram0" : "vram1"; + man->cg = drmm_cgroup_register_region(&xe->drm, name, size); + if (IS_ERR(man->cg)) + return PTR_ERR(man->cg); man->func = &xe_ttm_vram_mgr_func; mgr->mem_type = mem_type; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 43a578d9c067..b01f31ed4417 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1120,6 +1120,25 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, xe_bo_assert_held(bo); + /* + * Reject only WILLNEED mappings on DONTNEED/PURGED BOs. This + * gates new vm_bind ioctls (user supplies WILLNEED) while + * still allowing partial-unbind / remap splits whose new VMAs + * inherit the parent's DONTNEED attr. It must also run before + * xe_bo_willneed_get_locked() below so a 0->1 holder bump + * cannot silently promote DONTNEED back to WILLNEED. + */ + if (vma->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED) { + if (xe_bo_madv_is_dontneed(bo)) { + xe_vma_free(vma); + return ERR_PTR(-EBUSY); + } + if (xe_bo_is_purged(bo)) { + xe_vma_free(vma); + return ERR_PTR(-EINVAL); + } + } + vm_bo = drm_gpuvm_bo_obtain_locked(vma->gpuva.vm, &bo->ttm.base); if (IS_ERR(vm_bo)) { xe_vma_free(vma); @@ -1131,6 +1150,10 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, vma->gpuva.gem.offset = bo_offset_or_userptr; drm_gpuva_link(&vma->gpuva, vm_bo); drm_gpuvm_bo_put(vm_bo); + + xe_bo_vma_count_inc_locked(bo); + if (vma->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED) + xe_bo_willneed_get_locked(bo); } else /* userptr or null */ { if (!is_null && !is_cpu_addr_mirror) { struct xe_userptr_vma *uvma = to_userptr_vma(vma); @@ -1208,7 +1231,10 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) xe_bo_assert_held(bo); drm_gpuva_unlink(&vma->gpuva); - xe_bo_recompute_purgeable_state(bo); + + xe_bo_vma_count_dec_locked(bo); + if (vma->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED) + xe_bo_willneed_put_locked(bo); } xe_vm_assert_held(vm); @@ -3016,7 +3042,7 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, * @res_evict: Allow evicting resources during validation * @validate: Perform BO validation * @request_decompress: Request BO decompression - * @check_purged: Reject operation if BO is purged + * @check_purged: Reject operation if BO is DONTNEED or PURGED */ struct xe_vma_lock_and_validate_flags { u32 res_evict : 1; @@ -3030,6 +3056,7 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, { struct xe_bo *bo = xe_vma_bo(vma); struct xe_vm *vm = xe_vma_vm(vma); + bool validate_bo = flags.validate; int err = 0; if (bo) { @@ -3044,7 +3071,11 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, err = -EINVAL; /* BO already purged */ } - if (!err && flags.validate) + /* Don't validate the BO for DONTNEED/PURGED remap remnants. */ + if (vma->attr.purgeable_state != XE_MADV_PURGEABLE_WILLNEED) + validate_bo = false; + + if (!err && validate_bo) err = xe_bo_validate(bo, vm, xe_vm_allow_vm_eviction(vm) && flags.res_evict, exec); @@ -3152,7 +3183,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, op->map.immediate, .request_decompress = op->map.request_decompress, - .check_purged = true, + .check_purged = false, }); break; case DRM_GPUVA_OP_REMAP: @@ -3174,7 +3205,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, .res_evict = res_evict, .validate = true, .request_decompress = false, - .check_purged = true, + .check_purged = false, }); if (!err && op->remap.next) err = vma_lock_and_validate(exec, op->remap.next, @@ -3182,7 +3213,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, .res_evict = res_evict, .validate = true, .request_decompress = false, - .check_purged = true, + .check_purged = false, }); break; case DRM_GPUVA_OP_UNMAP: @@ -3211,9 +3242,11 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, } /* - * Prefetch attempts to migrate BO's backing store without - * repopulating it first. Purged BOs have no backing store - * to migrate, so reject the operation. + * PREFETCH is the only op that still gates on BO purge state. + * MAP/REMAP handle this inside xe_vma_create() so partial + * unbind on a DONTNEED BO still works. PREFETCH skips + * xe_vma_create() and would migrate a BO with no backing + * store, so reject DONTNEED/PURGED here. */ err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.prefetch.va), diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c index c78906dea82b..c4fb29004195 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.c +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -186,147 +186,6 @@ static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm, } /** - * xe_bo_is_dmabuf_shared() - Check if BO is shared via dma-buf - * @bo: Buffer object - * - * Prevent marking imported or exported dma-bufs as purgeable. - * For imported BOs, Xe doesn't own the backing store and cannot - * safely reclaim pages (exporter or other devices may still be - * using them). For exported BOs, external devices may have active - * mappings we cannot track. - * - * Return: true if BO is imported or exported, false otherwise - */ -static bool xe_bo_is_dmabuf_shared(struct xe_bo *bo) -{ - struct drm_gem_object *obj = &bo->ttm.base; - - /* Imported: exporter owns backing store */ - if (drm_gem_is_imported(obj)) - return true; - - /* Exported: external devices may be accessing */ - if (obj->dma_buf) - return true; - - return false; -} - -/** - * enum xe_bo_vmas_purge_state - VMA purgeable state aggregation - * - * Distinguishes whether a BO's VMAs are all DONTNEED, have at least - * one WILLNEED, or have no VMAs at all. - * - * Enum values align with XE_MADV_PURGEABLE_* states for consistency. - */ -enum xe_bo_vmas_purge_state { - /** @XE_BO_VMAS_STATE_WILLNEED: At least one VMA is WILLNEED */ - XE_BO_VMAS_STATE_WILLNEED = 0, - /** @XE_BO_VMAS_STATE_DONTNEED: All VMAs are DONTNEED */ - XE_BO_VMAS_STATE_DONTNEED = 1, - /** @XE_BO_VMAS_STATE_NO_VMAS: BO has no VMAs */ - XE_BO_VMAS_STATE_NO_VMAS = 2, -}; - -/* - * xe_bo_recompute_purgeable_state() casts between xe_bo_vmas_purge_state and - * xe_madv_purgeable_state. Enforce that WILLNEED=0 and DONTNEED=1 match across - * both enums so the single-line cast is always valid. - */ -static_assert(XE_BO_VMAS_STATE_WILLNEED == (int)XE_MADV_PURGEABLE_WILLNEED, - "VMA purge state WILLNEED must equal madv purgeable WILLNEED"); -static_assert(XE_BO_VMAS_STATE_DONTNEED == (int)XE_MADV_PURGEABLE_DONTNEED, - "VMA purge state DONTNEED must equal madv purgeable DONTNEED"); - -/** - * xe_bo_all_vmas_dontneed() - Determine BO VMA purgeable state - * @bo: Buffer object - * - * Check all VMAs across all VMs to determine aggregate purgeable state. - * Shared BOs require unanimous DONTNEED state from all mappings. - * - * Caller must hold BO dma-resv lock. - * - * Return: XE_BO_VMAS_STATE_DONTNEED if all VMAs are DONTNEED, - * XE_BO_VMAS_STATE_WILLNEED if at least one VMA is not DONTNEED, - * XE_BO_VMAS_STATE_NO_VMAS if BO has no VMAs - */ -static enum xe_bo_vmas_purge_state xe_bo_all_vmas_dontneed(struct xe_bo *bo) -{ - struct drm_gpuvm_bo *vm_bo; - struct drm_gpuva *gpuva; - struct drm_gem_object *obj = &bo->ttm.base; - bool has_vmas = false; - - xe_bo_assert_held(bo); - - /* Shared dma-bufs cannot be purgeable */ - if (xe_bo_is_dmabuf_shared(bo)) - return XE_BO_VMAS_STATE_WILLNEED; - - drm_gem_for_each_gpuvm_bo(vm_bo, obj) { - drm_gpuvm_bo_for_each_va(gpuva, vm_bo) { - struct xe_vma *vma = gpuva_to_vma(gpuva); - - has_vmas = true; - - /* Any non-DONTNEED VMA prevents purging */ - if (vma->attr.purgeable_state != XE_MADV_PURGEABLE_DONTNEED) - return XE_BO_VMAS_STATE_WILLNEED; - } - } - - /* - * No VMAs => preserve existing BO purgeable state. - * Avoids incorrectly flipping DONTNEED -> WILLNEED when last VMA unmapped. - */ - if (!has_vmas) - return XE_BO_VMAS_STATE_NO_VMAS; - - return XE_BO_VMAS_STATE_DONTNEED; -} - -/** - * xe_bo_recompute_purgeable_state() - Recompute BO purgeable state from VMAs - * @bo: Buffer object - * - * Walk all VMAs to determine if BO should be purgeable or not. - * Shared BOs require unanimous DONTNEED state from all mappings. - * If the BO has no VMAs the existing state is preserved. - * - * Locking: Caller must hold BO dma-resv lock. When iterating GPUVM lists, - * VM lock must also be held (write) to prevent concurrent VMA modifications. - * This is satisfied at both call sites: - * - xe_vma_destroy(): holds vm->lock write - * - madvise_purgeable(): holds vm->lock write (from madvise ioctl path) - * - * Return: nothing - */ -void xe_bo_recompute_purgeable_state(struct xe_bo *bo) -{ - enum xe_bo_vmas_purge_state vma_state; - - if (!bo) - return; - - xe_bo_assert_held(bo); - - /* - * Once purged, always purged. Cannot transition back to WILLNEED. - * This matches i915 semantics where purged BOs are permanently invalid. - */ - if (bo->madv_purgeable == XE_MADV_PURGEABLE_PURGED) - return; - - vma_state = xe_bo_all_vmas_dontneed(bo); - - if (vma_state != (enum xe_bo_vmas_purge_state)bo->madv_purgeable && - vma_state != XE_BO_VMAS_STATE_NO_VMAS) - xe_bo_set_purgeable_state(bo, (enum xe_madv_purgeable_state)vma_state); -} - -/** * madvise_purgeable - Handle purgeable buffer object advice * @xe: XE device * @vm: VM @@ -359,12 +218,6 @@ static void madvise_purgeable(struct xe_device *xe, struct xe_vm *vm, /* BO must be locked before modifying madv state */ xe_bo_assert_held(bo); - /* Skip shared dma-bufs - no PTEs to zap */ - if (xe_bo_is_dmabuf_shared(bo)) { - vmas[i]->skip_invalidation = true; - continue; - } - /* * Once purged, always purged. Cannot transition back to WILLNEED. * This matches i915 semantics where purged BOs are permanently invalid. @@ -377,13 +230,14 @@ static void madvise_purgeable(struct xe_device *xe, struct xe_vm *vm, switch (op->purge_state_val.val) { case DRM_XE_VMA_PURGEABLE_STATE_WILLNEED: - vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_WILLNEED; vmas[i]->skip_invalidation = true; - - xe_bo_recompute_purgeable_state(bo); + /* Only act on a real DONTNEED -> WILLNEED transition. */ + if (vmas[i]->attr.purgeable_state == XE_MADV_PURGEABLE_DONTNEED) { + vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_WILLNEED; + xe_bo_willneed_get_locked(bo); + } break; case DRM_XE_VMA_PURGEABLE_STATE_DONTNEED: - vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_DONTNEED; /* * Don't zap PTEs at DONTNEED time -- pages are still * alive. The zap happens in xe_bo_move_notify() right @@ -391,7 +245,11 @@ static void madvise_purgeable(struct xe_device *xe, struct xe_vm *vm, */ vmas[i]->skip_invalidation = true; - xe_bo_recompute_purgeable_state(bo); + /* Only act on a real WILLNEED -> DONTNEED transition. */ + if (vmas[i]->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED) { + vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_DONTNEED; + xe_bo_willneed_put_locked(bo); + } break; default: /* Should never hit - values validated in madvise_args_are_sane() */ diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.h b/drivers/gpu/drm/xe/xe_vm_madvise.h index 39acd2689ca0..a3078f634c7e 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.h +++ b/drivers/gpu/drm/xe/xe_vm_madvise.h @@ -13,6 +13,4 @@ struct xe_bo; int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file); -void xe_bo_recompute_purgeable_state(struct xe_bo *bo); - #endif diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h index 33b91cb2e684..e32ef763427c 100644 --- a/include/drm/intel/pciids.h +++ b/include/drm/intel/pciids.h @@ -898,7 +898,11 @@ /* CRI */ #define INTEL_CRI_IDS(MACRO__, ...) \ - MACRO__(0x674C, ## __VA_ARGS__) + MACRO__(0x674C, ## __VA_ARGS__), \ + MACRO__(0x674D, ## __VA_ARGS__), \ + MACRO__(0x674E, ## __VA_ARGS__), \ + MACRO__(0x674F, ## __VA_ARGS__), \ + MACRO__(0x6750, ## __VA_ARGS__) /* NVL-P */ #define INTEL_NVLP_IDS(MACRO__, ...) \ |
