diff options
Diffstat (limited to 'drivers/gpu/drm/xe')
59 files changed, 600 insertions, 134 deletions
diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index b3921dbc52ff..b735e30953ce 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -346,7 +346,8 @@ static void __xe_display_pm_suspend(struct xe_device *xe, bool runtime) xe_display_flush_cleanup_work(xe); - intel_hpd_cancel_work(xe); + if (!runtime) + intel_hpd_cancel_work(xe); if (!runtime && has_display(xe)) { intel_display_driver_suspend_access(display); diff --git a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h index a255946b6f77..8cfcd3360896 100644 --- a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h @@ -41,6 +41,7 @@ #define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2)) +#define PIPE_CONTROL0_L3_READ_ONLY_CACHE_INVALIDATE BIT(10) /* gen12 */ #define PIPE_CONTROL0_HDC_PIPELINE_FLUSH BIT(9) /* gen12 */ #define PIPE_CONTROL_COMMAND_CACHE_INVALIDATE (1<<29) diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h index 10ec2920d31b..d4033278be9f 100644 --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h @@ -47,6 +47,10 @@ #define MI_LRI_FORCE_POSTED REG_BIT(12) #define MI_LRI_LEN(x) (((x) & 0xff) + 1) +#define MI_STORE_REGISTER_MEM (__MI_INSTR(0x24) | XE_INSTR_NUM_DW(4)) +#define MI_SRM_USE_GGTT REG_BIT(22) +#define MI_SRM_ADD_CS_OFFSET REG_BIT(19) + #define MI_FLUSH_DW __MI_INSTR(0x26) #define MI_FLUSH_DW_STORE_INDEX REG_BIT(21) #define MI_INVALIDATE_TLB REG_BIT(18) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 162f18e975da..ab95d3545a72 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -157,6 +157,7 @@ #define XEHPG_SC_INSTDONE_EXTRA2 XE_REG_MCR(0x7108) #define COMMON_SLICE_CHICKEN4 XE_REG(0x7300, XE_REG_OPTION_MASKED) +#define SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE REG_BIT(12) #define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6) #define COMMON_SLICE_CHICKEN3 XE_REG(0x7304, XE_REG_OPTION_MASKED) @@ -475,6 +476,7 @@ #define TDL_TSL_CHICKEN XE_REG_MCR(0xe4c4, XE_REG_OPTION_MASKED) #define STK_ID_RESTRICT REG_BIT(12) #define SLM_WMTP_RESTORE REG_BIT(11) +#define RES_CHK_SPR_DIS REG_BIT(6) #define ROW_CHICKEN XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED) #define UGM_BACKUP_MODE REG_BIT(13) @@ -500,6 +502,9 @@ #define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11) #define DIS_ATOMIC_CHAINING_TYPED_WRITES REG_BIT(3) +#define TDL_CHICKEN XE_REG_MCR(0xe5f4, XE_REG_OPTION_MASKED) +#define QID_WAIT_FOR_THREAD_NOT_RUN_DISABLE REG_BIT(12) + #define LSC_CHICKEN_BIT_0 XE_REG_MCR(0xe7c8) #define DISABLE_D8_D16_COASLESCE REG_BIT(30) #define WR_REQ_CHAINING_DIS REG_BIT(26) diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c index ef1e5256c56a..0e502feaca81 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs.c @@ -46,8 +46,11 @@ static void read_l3cc_table(struct xe_gt *gt, unsigned int fw_ref, i; u32 reg_val; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - KUNIT_ASSERT_NE_MSG(test, fw_ref, 0, "Forcewake Failed.\n"); + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { + xe_force_wake_put(gt_to_fw(gt), fw_ref); + KUNIT_ASSERT_TRUE_MSG(test, true, "Forcewake Failed.\n"); + } for (i = 0; i < info->num_mocs_regs; i++) { if (!(i & 1)) { diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c index 36a3b5420fef..b0254b014fe4 100644 --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c @@ -320,7 +320,7 @@ static void xe_rtp_process_to_sr_tests(struct kunit *test) count_rtp_entries++; xe_rtp_process_ctx_enable_active_tracking(&ctx, &active, count_rtp_entries); - xe_rtp_process_to_sr(&ctx, param->entries, reg_sr); + xe_rtp_process_to_sr(&ctx, param->entries, count_rtp_entries, reg_sr); xa_for_each(®_sr->xa, idx, sre) { if (idx == param->expected_reg.addr) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 3f5391d416d4..2070aa12059c 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -713,6 +713,21 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, goto out; } + /* Reject BO eviction if BO is bound to current VM. */ + if (evict && ctx->resv) { + struct drm_gpuvm_bo *vm_bo; + + drm_gem_for_each_gpuvm_bo(vm_bo, &bo->ttm.base) { + struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); + + if (xe_vm_resv(vm) == ctx->resv && + xe_vm_in_preempt_fence_mode(vm)) { + ret = -EBUSY; + goto out; + } + } + } + /* * Failed multi-hop where the old_mem is still marked as * TTM_PL_FLAG_TEMPORARY, should just be a dummy move. @@ -2142,6 +2157,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, struct xe_file *xef = to_xe_file(file); struct drm_xe_gem_create *args = data; struct xe_vm *vm = NULL; + ktime_t end = 0; struct xe_bo *bo; unsigned int bo_flags; u32 handle; @@ -2214,6 +2230,10 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, vm = xe_vm_lookup(xef, args->vm_id); if (XE_IOCTL_DBG(xe, !vm)) return -ENOENT; + } + +retry: + if (vm) { err = xe_vm_lock(vm, true); if (err) goto out_vm; @@ -2227,6 +2247,8 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, if (IS_ERR(bo)) { err = PTR_ERR(bo); + if (xe_vm_validate_should_retry(NULL, err, &end)) + goto retry; goto out_vm; } diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 492b4877433f..92e6fa8fe3a1 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -166,7 +166,7 @@ static ssize_t wedged_mode_set(struct file *f, const char __user *ubuf, return -EINVAL; if (xe->wedged.mode == wedged_mode) - return 0; + return size; xe->wedged.mode = wedged_mode; @@ -175,6 +175,7 @@ static ssize_t wedged_mode_set(struct file *f, const char __user *ubuf, ret = xe_guc_ads_scheduler_policy_toggle_reset(>->uc.guc.ads); if (ret) { xe_gt_err(gt, "Failed to update GuC ADS scheduler policy. GuC may still cause engine reset even with wedged_mode=2\n"); + xe_pm_runtime_put(xe); return -EIO; } } diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 4e1839b483a0..74516e73ba4e 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -722,7 +722,9 @@ int xe_device_probe(struct xe_device *xe) } /* Allocate and map stolen after potential VRAM resize */ - xe_ttm_stolen_mgr_init(xe); + err = xe_ttm_stolen_mgr_init(xe); + if (err) + return err; /* * Now that GT is initialized (TTM in particular), @@ -734,6 +736,12 @@ int xe_device_probe(struct xe_device *xe) if (err) goto err; + for_each_tile(tile, xe, id) { + err = xe_tile_init(tile); + if (err) + goto err; + } + for_each_gt(gt, xe, id) { last_gt = id; diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 2d4874d2b922..31f688e953d7 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -325,6 +325,14 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file) unsigned int fw_ref; /* + * RING_TIMESTAMP registers are inaccessible in VF mode. + * Without drm-total-cycles-*, other keys provide little value. + * Show all or none of the optional "run_ticks" keys in this case. + */ + if (IS_SRIOV_VF(xe)) + return; + + /* * Wait for any exec queue going away: their cycles will get updated on * context switch out, so wait for that to happen */ diff --git a/drivers/gpu/drm/xe/xe_gen_wa_oob.c b/drivers/gpu/drm/xe/xe_gen_wa_oob.c index 904cf47925aa..ed9183599e31 100644 --- a/drivers/gpu/drm/xe/xe_gen_wa_oob.c +++ b/drivers/gpu/drm/xe/xe_gen_wa_oob.c @@ -28,10 +28,10 @@ "\n" \ "#endif\n" -static void print_usage(FILE *f) +static void print_usage(FILE *f, const char *progname) { fprintf(f, "usage: %s <input-rule-file> <generated-c-source-file> <generated-c-header-file>\n", - program_invocation_short_name); + progname); } static void print_parse_error(const char *err_msg, const char *line, @@ -144,7 +144,7 @@ int main(int argc, const char *argv[]) if (argc < 3) { fprintf(stderr, "ERROR: wrong arguments\n"); - print_usage(stderr); + print_usage(stderr, argv[0]); return 1; } diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 1eb791ddc375..0f11b1c00960 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -564,6 +564,28 @@ void xe_gsc_remove(struct xe_gsc *gsc) xe_gsc_proxy_remove(gsc); } +void xe_gsc_stop_prepare(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + int ret; + + if (!xe_uc_fw_is_loadable(&gsc->fw) || xe_uc_fw_is_in_error_state(&gsc->fw)) + return; + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GSC); + + /* + * If the GSC FW load or the proxy init are interrupted, the only way + * to recover it is to do an FLR and reload the GSC from scratch. + * Therefore, let's wait for the init to complete before stopping + * operations. The proxy init is the last step, so we can just wait on + * that + */ + ret = xe_gsc_wait_for_proxy_init_done(gsc); + if (ret) + xe_gt_err(gt, "failed to wait for GSC init completion before uc stop\n"); +} + /* * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a * GSC engine reset by writing a notification bit in the GS1 register and then diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h index e282b9ef6ec4..c31fe24c4b66 100644 --- a/drivers/gpu/drm/xe/xe_gsc.h +++ b/drivers/gpu/drm/xe/xe_gsc.h @@ -16,6 +16,7 @@ struct xe_hw_engine; int xe_gsc_init(struct xe_gsc *gsc); int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc); void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc); +void xe_gsc_stop_prepare(struct xe_gsc *gsc); void xe_gsc_load_start(struct xe_gsc *gsc); void xe_gsc_remove(struct xe_gsc *gsc); void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec); diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index 24cc6a4f9a96..76636da3d06c 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -71,6 +71,17 @@ bool xe_gsc_proxy_init_done(struct xe_gsc *gsc) HECI1_FWSTS1_PROXY_STATE_NORMAL; } +int xe_gsc_wait_for_proxy_init_done(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + + /* Proxy init can take up to 500ms, so wait double that for safety */ + return xe_mmio_wait32(>->mmio, HECI_FWSTS1(MTL_GSC_HECI1_BASE), + HECI1_FWSTS1_CURRENT_STATE, + HECI1_FWSTS1_PROXY_STATE_NORMAL, + USEC_PER_SEC, NULL, false); +} + static void __gsc_proxy_irq_rmw(struct xe_gsc *gsc, u32 clr, u32 set) { struct xe_gt *gt = gsc_to_gt(gsc); diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.h b/drivers/gpu/drm/xe/xe_gsc_proxy.h index c511ade6b863..e2498aa6de18 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.h +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.h @@ -13,6 +13,7 @@ struct xe_gsc; int xe_gsc_proxy_init(struct xe_gsc *gsc); bool xe_gsc_proxy_init_done(struct xe_gsc *gsc); void xe_gsc_proxy_remove(struct xe_gsc *gsc); +int xe_gsc_wait_for_proxy_init_done(struct xe_gsc *gsc); int xe_gsc_proxy_start(struct xe_gsc *gsc); int xe_gsc_proxy_request_handler(struct xe_gsc *gsc); diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 8a20e6744836..6b4b9eca2c38 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -381,6 +381,10 @@ int xe_gt_init_early(struct xe_gt *gt) if (err) return err; + err = xe_tuning_init(gt); + if (err) + return err; + xe_wa_process_oob(gt); xe_force_wake_init_gt(gt, gt_to_fw(gt)); @@ -646,6 +650,9 @@ void xe_gt_mmio_init(struct xe_gt *gt) if (gt->info.type == XE_GT_TYPE_MEDIA) { gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET; gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH; + } else { + gt->mmio.adj_offset = 0; + gt->mmio.adj_limit = 0; } if (IS_SRIOV_VF(gt_to_xe(gt))) @@ -858,7 +865,7 @@ void xe_gt_suspend_prepare(struct xe_gt *gt) fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - xe_uc_stop_prepare(>->uc); + xe_uc_suspend_prepare(>->uc); xe_force_wake_put(gt_to_fw(gt), fw_ref); } diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index e7792858b1e4..f7005a3643e6 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -30,6 +30,7 @@ #include "xe_reg_sr.h" #include "xe_reg_whitelist.h" #include "xe_sriov.h" +#include "xe_tuning.h" #include "xe_uc_debugfs.h" #include "xe_wa.h" @@ -91,22 +92,23 @@ static int hw_engines(struct xe_gt *gt, struct drm_printer *p) struct xe_hw_engine *hwe; enum xe_hw_engine_id id; unsigned int fw_ref; + int ret = 0; xe_pm_runtime_get(xe); fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { - xe_pm_runtime_put(xe); - xe_force_wake_put(gt_to_fw(gt), fw_ref); - return -ETIMEDOUT; + ret = -ETIMEDOUT; + goto fw_put; } for_each_hw_engine(hwe, gt, id) xe_hw_engine_print(hwe, p); +fw_put: xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_pm_runtime_put(xe); - return 0; + return ret; } static int powergate_info(struct xe_gt *gt, struct drm_printer *p) @@ -217,6 +219,15 @@ static int workarounds(struct xe_gt *gt, struct drm_printer *p) return 0; } +static int tunings(struct xe_gt *gt, struct drm_printer *p) +{ + xe_pm_runtime_get(gt_to_xe(gt)); + xe_tuning_dump(gt, p); + xe_pm_runtime_put(gt_to_xe(gt)); + + return 0; +} + static int pat(struct xe_gt *gt, struct drm_printer *p) { xe_pm_runtime_get(gt_to_xe(gt)); @@ -300,6 +311,7 @@ static const struct drm_info_list debugfs_list[] = { {"powergate_info", .show = xe_gt_debugfs_simple_show, .data = powergate_info}, {"register-save-restore", .show = xe_gt_debugfs_simple_show, .data = register_save_restore}, {"workarounds", .show = xe_gt_debugfs_simple_show, .data = workarounds}, + {"tunings", .show = xe_gt_debugfs_simple_show, .data = tunings}, {"pat", .show = xe_gt_debugfs_simple_show, .data = pat}, {"mocs", .show = xe_gt_debugfs_simple_show, .data = mocs}, {"default_lrc_rcs", .show = xe_gt_debugfs_simple_show, .data = rcs_default_lrc}, diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index ffd3ba7f6656..fbbace7b0b12 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -69,6 +69,8 @@ static u64 get_residency_ms(struct xe_gt_idle *gtidle, u64 cur_residency) { u64 delta, overflow_residency, prev_residency; + lockdep_assert_held(>idle->lock); + overflow_residency = BIT_ULL(32); /* @@ -275,8 +277,21 @@ static ssize_t idle_status_show(struct device *dev, return sysfs_emit(buff, "%s\n", gt_idle_state_to_string(state)); } -static DEVICE_ATTR_RO(idle_status); +u64 xe_gt_idle_residency_msec(struct xe_gt_idle *gtidle) +{ + struct xe_guc_pc *pc = gtidle_to_pc(gtidle); + u64 residency; + unsigned long flags; + + raw_spin_lock_irqsave(>idle->lock, flags); + residency = get_residency_ms(gtidle, gtidle->idle_residency(pc)); + raw_spin_unlock_irqrestore(>idle->lock, flags); + + return residency; +} + +static DEVICE_ATTR_RO(idle_status); static ssize_t idle_residency_ms_show(struct device *dev, struct device_attribute *attr, char *buff) { @@ -285,10 +300,10 @@ static ssize_t idle_residency_ms_show(struct device *dev, u64 residency; xe_pm_runtime_get(pc_to_xe(pc)); - residency = gtidle->idle_residency(pc); + residency = xe_gt_idle_residency_msec(gtidle); xe_pm_runtime_put(pc_to_xe(pc)); - return sysfs_emit(buff, "%llu\n", get_residency_ms(gtidle, residency)); + return sysfs_emit(buff, "%llu\n", residency); } static DEVICE_ATTR_RO(idle_residency_ms); @@ -331,6 +346,8 @@ int xe_gt_idle_init(struct xe_gt_idle *gtidle) if (!kobj) return -ENOMEM; + raw_spin_lock_init(>idle->lock); + if (xe_gt_is_media_type(gt)) { snprintf(gtidle->name, sizeof(gtidle->name), "gt%d-mc", gt->info.id); gtidle->idle_residency = xe_guc_pc_mc6_residency; diff --git a/drivers/gpu/drm/xe/xe_gt_idle.h b/drivers/gpu/drm/xe/xe_gt_idle.h index 4455a6501cb0..591a01e181bc 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.h +++ b/drivers/gpu/drm/xe/xe_gt_idle.h @@ -17,5 +17,6 @@ void xe_gt_idle_disable_c6(struct xe_gt *gt); void xe_gt_idle_enable_pg(struct xe_gt *gt); void xe_gt_idle_disable_pg(struct xe_gt *gt); int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p); +u64 xe_gt_idle_residency_msec(struct xe_gt_idle *gtidle); #endif /* _XE_GT_IDLE_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_idle_types.h b/drivers/gpu/drm/xe/xe_gt_idle_types.h index b8b297a3f884..a3667c567f8a 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle_types.h +++ b/drivers/gpu/drm/xe/xe_gt_idle_types.h @@ -6,6 +6,7 @@ #ifndef _XE_GT_IDLE_SYSFS_TYPES_H_ #define _XE_GT_IDLE_SYSFS_TYPES_H_ +#include <linux/spinlock.h> #include <linux/types.h> struct xe_guc_pc; @@ -31,6 +32,8 @@ struct xe_gt_idle { u64 cur_residency; /** @prev_residency: previous residency counter */ u64 prev_residency; + /** @lock: Lock protecting idle residency counters */ + raw_spinlock_t lock; /** @idle_status: get the current idle state */ enum xe_gt_idle_state (*idle_status)(struct xe_guc_pc *pc); /** @idle_residency: get idle residency counter */ diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 2606cd396df5..0d0207be93ed 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -422,9 +422,16 @@ static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue) num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss, XE_MAX_EU_FUSE_BITS) * num_dss; - /* user can issue separate page faults per EU and per CS */ + /* + * user can issue separate page faults per EU and per CS + * + * XXX: Multiplier required as compute UMD are getting PF queue errors + * without it. Follow on why this multiplier is required. + */ +#define PF_MULTIPLIER 8 pf_queue->num_dw = - (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW; + (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER; +#undef PF_MULTIPLIER pf_queue->gt = gt; pf_queue->data = devm_kcalloc(xe->drm.dev, pf_queue->num_dw, diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c index 6f906c8e8108..c08efca6420e 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -15,7 +15,11 @@ #include "xe_gt_sriov_pf_helpers.h" #include "xe_gt_sriov_pf_migration.h" #include "xe_gt_sriov_pf_service.h" +#include "xe_gt_sriov_printk.h" #include "xe_mmio.h" +#include "xe_pm.h" + +static void pf_worker_restart_func(struct work_struct *w); /* * VF's metadata is maintained in the flexible array where: @@ -41,6 +45,11 @@ static int pf_alloc_metadata(struct xe_gt *gt) return 0; } +static void pf_init_workers(struct xe_gt *gt) +{ + INIT_WORK(>->sriov.pf.workers.restart, pf_worker_restart_func); +} + /** * xe_gt_sriov_pf_init_early - Prepare SR-IOV PF data structures on PF. * @gt: the &xe_gt to initialize @@ -65,6 +74,8 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt) if (err) return err; + pf_init_workers(gt); + return 0; } @@ -78,6 +89,12 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt) */ int xe_gt_sriov_pf_init(struct xe_gt *gt) { + int err; + + err = xe_gt_sriov_pf_config_init(gt); + if (err) + return err; + return xe_gt_sriov_pf_migration_init(gt); } @@ -155,6 +172,35 @@ void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid) pf_clear_vf_scratch_regs(gt, vfid); } +static void pf_restart(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + xe_pm_runtime_get(xe); + xe_gt_sriov_pf_config_restart(gt); + xe_gt_sriov_pf_control_restart(gt); + xe_pm_runtime_put(xe); + + xe_gt_sriov_dbg(gt, "restart completed\n"); +} + +static void pf_worker_restart_func(struct work_struct *w) +{ + struct xe_gt *gt = container_of(w, typeof(*gt), sriov.pf.workers.restart); + + pf_restart(gt); +} + +static void pf_queue_restart(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + xe_gt_assert(gt, IS_SRIOV_PF(xe)); + + if (!queue_work(xe->sriov.wq, >->sriov.pf.workers.restart)) + xe_gt_sriov_dbg(gt, "restart already in queue!\n"); +} + /** * xe_gt_sriov_pf_restart - Restart SR-IOV support after a GT reset. * @gt: the &xe_gt @@ -163,6 +209,5 @@ void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid) */ void xe_gt_sriov_pf_restart(struct xe_gt *gt) { - xe_gt_sriov_pf_config_restart(gt); - xe_gt_sriov_pf_control_restart(gt); + pf_queue_restart(gt); } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 4bd255adfb40..27f309e3a76e 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -336,6 +336,26 @@ static int pf_push_full_vf_config(struct xe_gt *gt, unsigned int vfid) return err; } +static int pf_push_vf_cfg(struct xe_gt *gt, unsigned int vfid, bool reset) +{ + int err = 0; + + xe_gt_assert(gt, vfid); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + if (reset) + err = pf_send_vf_cfg_reset(gt, vfid); + if (!err) + err = pf_push_full_vf_config(gt, vfid); + + return err; +} + +static int pf_refresh_vf_cfg(struct xe_gt *gt, unsigned int vfid) +{ + return pf_push_vf_cfg(gt, vfid, true); +} + static u64 pf_get_ggtt_alignment(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); @@ -432,6 +452,10 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) return err; pf_release_vf_config_ggtt(gt, config); + + err = pf_refresh_vf_cfg(gt, vfid); + if (unlikely(err)) + return err; } xe_gt_assert(gt, !xe_ggtt_node_allocated(config->ggtt_region)); @@ -757,6 +781,10 @@ static int pf_provision_vf_ctxs(struct xe_gt *gt, unsigned int vfid, u32 num_ctx return ret; pf_release_config_ctxs(gt, config); + + ret = pf_refresh_vf_cfg(gt, vfid); + if (unlikely(ret)) + return ret; } if (!num_ctxs) @@ -1054,6 +1082,10 @@ static int pf_provision_vf_dbs(struct xe_gt *gt, unsigned int vfid, u32 num_dbs) return ret; pf_release_config_dbs(gt, config); + + ret = pf_refresh_vf_cfg(gt, vfid); + if (unlikely(ret)) + return ret; } if (!num_dbs) @@ -2085,10 +2117,7 @@ int xe_gt_sriov_pf_config_push(struct xe_gt *gt, unsigned int vfid, bool refresh xe_gt_assert(gt, vfid); mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); - if (refresh) - err = pf_send_vf_cfg_reset(gt, vfid); - if (!err) - err = pf_push_full_vf_config(gt, vfid); + err = pf_push_vf_cfg(gt, vfid, refresh); mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); if (unlikely(err)) { @@ -2320,6 +2349,35 @@ int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid, return err; } +static void fini_config(void *arg) +{ + struct xe_gt *gt = arg; + struct xe_device *xe = gt_to_xe(gt); + unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(xe); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + for (n = 1; n <= total_vfs; n++) + pf_release_vf_config(gt, n); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); +} + +/** + * xe_gt_sriov_pf_config_init - Initialize SR-IOV configuration data. + * @gt: the &xe_gt + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + xe_gt_assert(gt, IS_SRIOV_PF(xe)); + + return devm_add_action_or_reset(xe->drm.dev, fini_config, gt); +} + /** * xe_gt_sriov_pf_config_restart - Restart SR-IOV configurations after a GT reset. * @gt: the &xe_gt diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h index f894e9d4abba..513e6512a575 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h @@ -63,6 +63,7 @@ int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid, bool xe_gt_sriov_pf_config_is_empty(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_config_init(struct xe_gt *gt); void xe_gt_sriov_pf_config_restart(struct xe_gt *gt); int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h index 0426b1a77069..a64a6835ad65 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h @@ -36,7 +36,16 @@ struct xe_gt_sriov_metadata { }; /** + * struct xe_gt_sriov_pf_workers - GT level workers used by the PF. + */ +struct xe_gt_sriov_pf_workers { + /** @restart: worker that executes actions post GT reset */ + struct work_struct restart; +}; + +/** * struct xe_gt_sriov_pf - GT level PF virtualization data. + * @workers: workers data. * @service: service data. * @control: control data. * @policy: policy data. @@ -45,6 +54,7 @@ struct xe_gt_sriov_metadata { * @vfs: metadata for all VFs. */ struct xe_gt_sriov_pf { + struct xe_gt_sriov_pf_workers workers; struct xe_gt_sriov_pf_service service; struct xe_gt_sriov_pf_control control; struct xe_gt_sriov_pf_policy policy; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 9c30cbd9af6e..a439261bf4d7 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -47,12 +47,19 @@ static int guc_action_vf_reset(struct xe_guc *guc) return ret > 0 ? -EPROTO : ret; } +#define GUC_RESET_VF_STATE_RETRY_MAX 10 static int vf_reset_guc_state(struct xe_gt *gt) { + unsigned int retry = GUC_RESET_VF_STATE_RETRY_MAX; struct xe_guc *guc = >->uc.guc; int err; - err = guc_action_vf_reset(guc); + do { + err = guc_action_vf_reset(guc); + if (!err || err != -ETIMEDOUT) + break; + } while (--retry); + if (unlikely(err)) xe_gt_sriov_err(gt, "Failed to reset GuC state (%pe)\n", ERR_PTR(err)); return err; @@ -229,6 +236,9 @@ int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt) { int err; + if (!xe_device_uc_enabled(gt_to_xe(gt))) + return -ENODEV; + err = vf_reset_guc_state(gt); if (unlikely(err)) return err; diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 9405d83d4db2..084cbdeba8ea 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -419,6 +419,28 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, } /** + * xe_gt_tlb_invalidation_vm - Issue a TLB invalidation on this GT for a VM + * @gt: graphics tile + * @vm: VM to invalidate + * + * Invalidate entire VM's address space + */ +void xe_gt_tlb_invalidation_vm(struct xe_gt *gt, struct xe_vm *vm) +{ + struct xe_gt_tlb_invalidation_fence fence; + u64 range = 1ull << vm->xe->info.va_bits; + int ret; + + xe_gt_tlb_invalidation_fence_init(gt, &fence, true); + + ret = xe_gt_tlb_invalidation_range(gt, &fence, 0, range, vm->usm.asid); + if (ret < 0) + return; + + xe_gt_tlb_invalidation_fence_wait(&fence); +} + +/** * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA * @gt: GT structure * @fence: invalidation fence which will be signal on TLB invalidation diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index 672acfcdf0d7..abe9b03d543e 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -12,6 +12,7 @@ struct xe_gt; struct xe_guc; +struct xe_vm; struct xe_vma; int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt); @@ -21,6 +22,7 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt); int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, struct xe_vma *vma); +void xe_gt_tlb_invalidation_vm(struct xe_gt *gt, struct xe_vm *vm); int xe_gt_tlb_invalidation_range(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, u64 start, u64 end, u32 asid); diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 6e66bf0e8b3f..dd2969a1846d 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -413,6 +413,16 @@ struct xe_gt { bool oob_initialized; } wa_active; + /** @tuning_active: keep track of active tunings */ + struct { + /** @tuning_active.gt: bitmap with active GT tunings */ + unsigned long *gt; + /** @tuning_active.engine: bitmap with active engine tunings */ + unsigned long *engine; + /** @tuning_active.lrc: bitmap with active LRC tunings */ + unsigned long *lrc; + } tuning_active; + /** @user_engines: engines present in GT and available to userspace */ struct { /** diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index f6d523e4c5fe..9095618648bc 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -359,7 +359,7 @@ static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext, ext->reg = XE_REG(extlist->reg.__reg.addr); ext->flags = FIELD_PREP(GUC_REGSET_STEERING_NEEDED, 1); - ext->flags = FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id); + ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id); ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id); ext->regname = extlist->name; } diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index 0ca3056d8bd3..80514a446ba2 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -149,16 +149,12 @@ struct xe_guc_log_snapshot *xe_guc_log_snapshot_capture(struct xe_guc_log *log, size_t remain; int i; - if (!log->bo) { - xe_gt_err(gt, "GuC log buffer not allocated\n"); + if (!log->bo) return NULL; - } snapshot = xe_guc_log_snapshot_alloc(log, atomic); - if (!snapshot) { - xe_gt_err(gt, "GuC log snapshot not allocated\n"); + if (!snapshot) return NULL; - } remain = snapshot->size; for (i = 0; i < snapshot->num_chunks; i++) { diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index f382f5d53ca8..2276d85926fc 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -371,16 +371,17 @@ static void tgl_update_rpa_value(struct xe_guc_pc *pc) u32 reg; /* - * For PVC we still need to use fused RP1 as the approximation for RPe - * For other platforms than PVC we get the resolved RPe directly from + * For PVC we still need to use fused RP0 as the approximation for RPa + * For other platforms than PVC we get the resolved RPa directly from * PCODE at a different register */ - if (xe->info.platform == XE_PVC) + if (xe->info.platform == XE_PVC) { reg = xe_mmio_read32(>->mmio, PVC_RP_STATE_CAP); - else + pc->rpa_freq = REG_FIELD_GET(RP0_MASK, reg) * GT_FREQUENCY_MULTIPLIER; + } else { reg = xe_mmio_read32(>->mmio, FREQ_INFO_REC); - - pc->rpa_freq = REG_FIELD_GET(RPA_MASK, reg) * GT_FREQUENCY_MULTIPLIER; + pc->rpa_freq = REG_FIELD_GET(RPA_MASK, reg) * GT_FREQUENCY_MULTIPLIER; + } } static void tgl_update_rpe_value(struct xe_guc_pc *pc) @@ -394,12 +395,13 @@ static void tgl_update_rpe_value(struct xe_guc_pc *pc) * For other platforms than PVC we get the resolved RPe directly from * PCODE at a different register */ - if (xe->info.platform == XE_PVC) + if (xe->info.platform == XE_PVC) { reg = xe_mmio_read32(>->mmio, PVC_RP_STATE_CAP); - else + pc->rpe_freq = REG_FIELD_GET(RP1_MASK, reg) * GT_FREQUENCY_MULTIPLIER; + } else { reg = xe_mmio_read32(>->mmio, FREQ_INFO_REC); - - pc->rpe_freq = REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER; + pc->rpe_freq = REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER; + } } static void pc_update_rp_values(struct xe_guc_pc *pc) diff --git a/drivers/gpu/drm/xe/xe_guc_relay.c b/drivers/gpu/drm/xe/xe_guc_relay.c index 8f62de026724..e5dc94f3e618 100644 --- a/drivers/gpu/drm/xe/xe_guc_relay.c +++ b/drivers/gpu/drm/xe/xe_guc_relay.c @@ -225,7 +225,7 @@ __relay_get_transaction(struct xe_guc_relay *relay, bool incoming, u32 remote, u * with CTB lock held which is marked as used in the reclaim path. * Btw, that's one of the reason why we use mempool here! */ - txn = mempool_alloc(&relay->pool, incoming ? GFP_ATOMIC : GFP_KERNEL); + txn = mempool_alloc(&relay->pool, incoming ? GFP_ATOMIC : GFP_NOWAIT); if (!txn) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index fc447751fe78..b26b6fb5cdb5 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -386,12 +386,6 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) blit_cctl_val, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, - /* Use Fixed slice CCS mode */ - { XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"), - XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)), - XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE, - RCU_MODE_FIXED_SLICE_CCS_MODE)) - }, /* Disable WMTP if HW doesn't support it */ { XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"), XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)), @@ -400,10 +394,9 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) PREEMPT_GPGPU_THREAD_GROUP_LEVEL)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE) }, - {} }; - xe_rtp_process_to_sr(&ctx, lrc_setup, &hwe->reg_lrc); + xe_rtp_process_to_sr(&ctx, lrc_setup, ARRAY_SIZE(lrc_setup), &hwe->reg_lrc); } static void @@ -459,10 +452,15 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, - {} + /* Use Fixed slice CCS mode */ + { XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"), + XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)), + XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE, + RCU_MODE_FIXED_SLICE_CCS_MODE)) + }, }; - xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr); + xe_rtp_process_to_sr(&ctx, engine_entries, ARRAY_SIZE(engine_entries), &hwe->reg_sr); } static const struct engine_info *find_engine_info(enum xe_engine_class class, int instance) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index bbb9ffbf6367..5d7629bb6b8d 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -684,7 +684,7 @@ static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc) { - /* The start seqno is stored in the driver-defined portion of PPHWSP */ + /* This is stored in the driver-defined portion of PPHWSP */ return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET; } @@ -864,7 +864,7 @@ static void *empty_lrc_data(struct xe_hw_engine *hwe) static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) { - u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile); + u64 desc = xe_vm_pdp4_descriptor(vm, gt_to_tile(lrc->gt)); xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); @@ -895,6 +895,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, int err; kref_init(&lrc->refcount); + lrc->gt = gt; lrc->flags = 0; lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class); if (xe_gt_has_indirect_ring_state(gt)) @@ -913,7 +914,6 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, return PTR_ERR(lrc->bo); lrc->size = lrc_size; - lrc->tile = gt_to_tile(hwe->gt); lrc->ring.size = ring_size; lrc->ring.tail = 0; lrc->ctx_timestamp = 0; diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index 71ecb453f811..cd38586ae989 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -25,8 +25,8 @@ struct xe_lrc { /** @size: size of lrc including any indirect ring state page */ u32 size; - /** @tile: tile which this LRC belongs to */ - struct xe_tile *tile; + /** @gt: gt which this LRC belongs to */ + struct xe_gt *gt; /** @flags: LRC flags */ #define XE_LRC_FLAG_INDIRECT_RING_STATE 0x1 diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index a48f239cad1c..9c2f60ce0c94 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -76,12 +76,12 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) * is fine as it's going to the root tile's mmio, that's * guaranteed to be initialized earlier in xe_mmio_init() */ - mtcfg = xe_mmio_read64_2x32(mmio, XEHP_MTCFG_ADDR); + mtcfg = xe_mmio_read32(mmio, XEHP_MTCFG_ADDR); tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; if (tile_count < xe->info.tile_count) { drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n", - xe->info.tile_count, tile_count); + xe->info.tile_count, tile_count); xe->info.tile_count = tile_count; /* @@ -173,7 +173,7 @@ int xe_mmio_init(struct xe_device *xe) */ xe->mmio.size = pci_resource_len(pdev, GTTMMADR_BAR); xe->mmio.regs = pci_iomap(pdev, GTTMMADR_BAR, 0); - if (xe->mmio.regs == NULL) { + if (!xe->mmio.regs) { drm_err(&xe->drm, "failed to map registers\n"); return -EIO; } @@ -338,8 +338,8 @@ u64 xe_mmio_read64_2x32(struct xe_mmio *mmio, struct xe_reg reg) return (u64)udw << 32 | ldw; } -static int __xe_mmio_wait32(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us, - u32 *out_val, bool atomic, bool expect_match) +static int __xe_mmio_wait32(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val, + u32 timeout_us, u32 *out_val, bool atomic, bool expect_match) { ktime_t cur = ktime_get_raw(); const ktime_t end = ktime_add_us(cur, timeout_us); diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index eb6cd91e1e22..abf37d9ab221 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -548,6 +548,7 @@ static ssize_t xe_oa_read(struct file *file, char __user *buf, mutex_unlock(&stream->stream_lock); } while (!offset && !ret); } else { + xe_oa_buffer_check_unlocked(stream); mutex_lock(&stream->stream_lock); ret = __xe_oa_read(stream, buf, count, &offset); mutex_unlock(&stream->stream_lock); diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 39be74848e44..d92b2e5885b9 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -150,7 +150,6 @@ static const struct xe_graphics_desc graphics_xehpc = { }; static const struct xe_graphics_desc graphics_xelpg = { - .name = "Xe_LPG", .hw_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | BIT(XE_HW_ENGINE_CCS0), @@ -174,8 +173,6 @@ static const struct xe_graphics_desc graphics_xelpg = { GENMASK(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0) static const struct xe_graphics_desc graphics_xe2 = { - .name = "Xe2_LPG / Xe2_HPG / Xe3_LPG", - XE2_GFX_FEATURES, }; @@ -200,15 +197,6 @@ static const struct xe_media_desc media_xehpm = { }; static const struct xe_media_desc media_xelpmp = { - .name = "Xe_LPM+", - .hw_engine_mask = - GENMASK(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0) | - GENMASK(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0) | - BIT(XE_HW_ENGINE_GSCCS0) -}; - -static const struct xe_media_desc media_xe2 = { - .name = "Xe2_LPM / Xe2_HPM / Xe3_LPM", .hw_engine_mask = GENMASK(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0) | GENMASK(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0) | @@ -357,21 +345,21 @@ __diag_pop(); /* Map of GMD_ID values to graphics IP */ static const struct gmdid_map graphics_ip_map[] = { - { 1270, &graphics_xelpg }, - { 1271, &graphics_xelpg }, - { 1274, &graphics_xelpg }, /* Xe_LPG+ */ - { 2001, &graphics_xe2 }, - { 2004, &graphics_xe2 }, - { 3000, &graphics_xe2 }, - { 3001, &graphics_xe2 }, + { 1270, "Xe_LPG", &graphics_xelpg }, + { 1271, "Xe_LPG", &graphics_xelpg }, + { 1274, "Xe_LPG+", &graphics_xelpg }, + { 2001, "Xe2_HPG", &graphics_xe2 }, + { 2004, "Xe2_LPG", &graphics_xe2 }, + { 3000, "Xe3_LPG", &graphics_xe2 }, + { 3001, "Xe3_LPG", &graphics_xe2 }, }; /* Map of GMD_ID values to media IP */ static const struct gmdid_map media_ip_map[] = { - { 1300, &media_xelpmp }, - { 1301, &media_xe2 }, - { 2000, &media_xe2 }, - { 3000, &media_xe2 }, + { 1300, "Xe_LPM+", &media_xelpmp }, + { 1301, "Xe2_HPM", &media_xelpmp }, + { 2000, "Xe2_LPM", &media_xelpmp }, + { 3000, "Xe3_LPM", &media_xelpmp }, }; /* @@ -502,6 +490,7 @@ static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, gt->info.type = XE_GT_TYPE_MAIN; } + xe_gt_mmio_init(gt); xe_guc_comm_init_early(>->uc.guc); /* Don't bother with GMDID if failed to negotiate the GuC ABI */ @@ -566,6 +555,7 @@ static void handle_gmdid(struct xe_device *xe, for (int i = 0; i < ARRAY_SIZE(graphics_ip_map); i++) { if (ver == graphics_ip_map[i].ver) { xe->info.graphics_verx100 = ver; + xe->info.graphics_name = graphics_ip_map[i].name; *graphics = graphics_ip_map[i].ip; break; @@ -586,6 +576,7 @@ static void handle_gmdid(struct xe_device *xe, for (int i = 0; i < ARRAY_SIZE(media_ip_map); i++) { if (ver == media_ip_map[i].ver) { xe->info.media_verx100 = ver; + xe->info.media_name = media_ip_map[i].name; *media = media_ip_map[i].ip; break; diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c index aaceee748287..09ee8a06fe2e 100644 --- a/drivers/gpu/drm/xe/xe_pci_sriov.c +++ b/drivers/gpu/drm/xe/xe_pci_sriov.c @@ -62,6 +62,55 @@ static void pf_reset_vfs(struct xe_device *xe, unsigned int num_vfs) xe_gt_sriov_pf_control_trigger_flr(gt, n); } +static struct pci_dev *xe_pci_pf_get_vf_dev(struct xe_device *xe, unsigned int vf_id) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + + xe_assert(xe, IS_SRIOV_PF(xe)); + + /* caller must use pci_dev_put() */ + return pci_get_domain_bus_and_slot(pci_domain_nr(pdev->bus), + pdev->bus->number, + pci_iov_virtfn_devfn(pdev, vf_id)); +} + +static void pf_link_vfs(struct xe_device *xe, int num_vfs) +{ + struct pci_dev *pdev_pf = to_pci_dev(xe->drm.dev); + struct device_link *link; + struct pci_dev *pdev_vf; + unsigned int n; + + /* + * When both PF and VF devices are enabled on the host, during system + * resume they are resuming in parallel. + * + * But PF has to complete the provision of VF first to allow any VFs to + * successfully resume. + * + * Create a parent-child device link between PF and VF devices that will + * enforce correct resume order. + */ + for (n = 1; n <= num_vfs; n++) { + pdev_vf = xe_pci_pf_get_vf_dev(xe, n - 1); + + /* unlikely, something weird is happening, abort */ + if (!pdev_vf) { + xe_sriov_err(xe, "Cannot find VF%u device, aborting link%s creation!\n", + n, str_plural(num_vfs)); + break; + } + + link = device_link_add(&pdev_vf->dev, &pdev_pf->dev, + DL_FLAG_AUTOREMOVE_CONSUMER); + /* unlikely and harmless, continue with other VFs */ + if (!link) + xe_sriov_notice(xe, "Failed linking VF%u\n", n); + + pci_dev_put(pdev_vf); + } +} + static int pf_enable_vfs(struct xe_device *xe, int num_vfs) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -92,6 +141,8 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs) if (err < 0) goto failed; + pf_link_vfs(xe, num_vfs); + xe_sriov_info(xe, "Enabled %u of %u VF%s\n", num_vfs, total_vfs, str_plural(total_vfs)); return num_vfs; diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index 79b0f80376a4..665b4447b2eb 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -44,6 +44,7 @@ struct xe_media_desc { struct gmdid_map { unsigned int ver; + const char *name; const void *ip; }; diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index dc24baa84092..148d90611eeb 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -219,6 +219,20 @@ void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) } /** + * xe_pt_clear() - Clear a page-table. + * @xe: xe device. + * @pt: The page-table. + * + * Clears page-table by setting to zero. + */ +void xe_pt_clear(struct xe_device *xe, struct xe_pt *pt) +{ + struct iosys_map *map = &pt->bo->vmap; + + xe_map_memset(xe, map, 0, 0, SZ_4K); +} + +/** * DOC: Pagetable building * * Below we use the term "page-table" for both page-directories, containing diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h index 9ab386431cad..8e43912ae8e9 100644 --- a/drivers/gpu/drm/xe/xe_pt.h +++ b/drivers/gpu/drm/xe/xe_pt.h @@ -13,6 +13,7 @@ struct dma_fence; struct xe_bo; struct xe_device; struct xe_exec_queue; +struct xe_svm_range; struct xe_sync_entry; struct xe_tile; struct xe_vm; @@ -35,6 +36,8 @@ void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred); +void xe_pt_clear(struct xe_device *xe, struct xe_pt *pt); + int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops); struct dma_fence *xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops); diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index edab5d4e3ba5..23f6c81d9994 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -88,7 +88,6 @@ static const struct xe_rtp_entry_sr register_whitelist[] = { RING_FORCE_TO_NONPRIV_ACCESS_RD | RING_FORCE_TO_NONPRIV_RANGE_4)) }, - {} }; static void whitelist_apply_to_hwe(struct xe_hw_engine *hwe) @@ -137,7 +136,8 @@ void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe) { struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); - xe_rtp_process_to_sr(&ctx, register_whitelist, &hwe->reg_whitelist); + xe_rtp_process_to_sr(&ctx, register_whitelist, ARRAY_SIZE(register_whitelist), + &hwe->reg_whitelist); whitelist_apply_to_hwe(hwe); } diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 9f327f27c072..349317794768 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -141,7 +141,8 @@ emit_pipe_control(u32 *dw, int i, u32 bit_group_0, u32 bit_group_1, u32 offset, static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw, int i) { - u32 flags = PIPE_CONTROL_CS_STALL | + u32 flags0 = 0; + u32 flags1 = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_COMMAND_CACHE_INVALIDATE | PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | @@ -152,11 +153,15 @@ static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw, PIPE_CONTROL_STORE_DATA_INDEX; if (invalidate_tlb) - flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags1 |= PIPE_CONTROL_TLB_INVALIDATE; - flags &= ~mask_flags; + flags1 &= ~mask_flags; - return emit_pipe_control(dw, i, 0, flags, LRC_PPHWSP_SCRATCH_ADDR, 0); + if (flags1 & PIPE_CONTROL_VF_CACHE_INVALIDATE) + flags0 |= PIPE_CONTROL0_L3_READ_ONLY_CACHE_INVALIDATE; + + return emit_pipe_control(dw, i, flags0, flags1, + LRC_PPHWSP_SCRATCH_ADDR, 0); } static int emit_store_imm_ppgtt_posted(u64 addr, u64 value, @@ -229,13 +234,10 @@ static u32 get_ppgtt_flag(struct xe_sched_job *job) static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i) { - dw[i++] = MI_COPY_MEM_MEM | MI_COPY_MEM_MEM_SRC_GGTT | - MI_COPY_MEM_MEM_DST_GGTT; + dw[i++] = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET; + dw[i++] = RING_CTX_TIMESTAMP(0).addr; dw[i++] = xe_lrc_ctx_job_timestamp_ggtt_addr(lrc); dw[i++] = 0; - dw[i++] = xe_lrc_ctx_timestamp_ggtt_addr(lrc); - dw[i++] = 0; - dw[i++] = MI_NOOP; return i; } diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 7a1c78fdfc92..13bb62d3e615 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -237,6 +237,7 @@ static void rtp_mark_active(struct xe_device *xe, * the save-restore argument. * @ctx: The context for processing the table, with one of device, gt or hwe * @entries: Table with RTP definitions + * @n_entries: Number of entries to process, usually ARRAY_SIZE(entries) * @sr: Save-restore struct where matching rules execute the action. This can be * viewed as the "coalesced view" of multiple the tables. The bits for each * register set are expected not to collide with previously added entries @@ -247,6 +248,7 @@ static void rtp_mark_active(struct xe_device *xe, */ void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, const struct xe_rtp_entry_sr *entries, + size_t n_entries, struct xe_reg_sr *sr) { const struct xe_rtp_entry_sr *entry; @@ -259,7 +261,9 @@ void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, if (IS_SRIOV_VF(xe)) return; - for (entry = entries; entry && entry->name; entry++) { + xe_assert(xe, entries); + + for (entry = entries; entry - entries < n_entries; entry++) { bool match = false; if (entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE) { diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index 38b9f13bba5e..4fe736a11c42 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -430,7 +430,7 @@ void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx, void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, const struct xe_rtp_entry_sr *entries, - struct xe_reg_sr *sr); + size_t n_entries, struct xe_reg_sr *sr); void xe_rtp_process(struct xe_rtp_process_ctx *ctx, const struct xe_rtp_entry *entries); diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index e055bed7ae55..4e7aba445ebc 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -57,8 +57,6 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 } sa_manager->bo = bo; sa_manager->is_iomem = bo->vmap.is_iomem; - - drm_suballoc_manager_init(&sa_manager->base, managed_size, align); sa_manager->gpu_addr = xe_bo_ggtt_addr(bo); if (bo->vmap.is_iomem) { @@ -72,6 +70,7 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 memset(sa_manager->cpu_ptr, 0, bo->ttm.base.size); } + drm_suballoc_manager_init(&sa_manager->base, managed_size, align); ret = drmm_add_action_or_reset(&xe->drm, xe_sa_bo_manager_fini, sa_manager); if (ret) diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 07cf7cfe4abd..377438ea6b83 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -170,17 +170,19 @@ int xe_tile_init_noalloc(struct xe_tile *tile) if (err) return err; + xe_wa_apply_tile_workarounds(tile); + + return xe_tile_sysfs_init(tile); +} + +int xe_tile_init(struct xe_tile *tile) +{ tile->mem.kernel_bb_pool = xe_sa_bo_manager_init(tile, SZ_1M, 16); if (IS_ERR(tile->mem.kernel_bb_pool)) return PTR_ERR(tile->mem.kernel_bb_pool); - xe_wa_apply_tile_workarounds(tile); - - err = xe_tile_sysfs_init(tile); - return 0; } - void xe_tile_migrate_wait(struct xe_tile *tile) { xe_migrate_wait(tile->migrate); diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h index 1c9e42ade6b0..eb939316d55b 100644 --- a/drivers/gpu/drm/xe/xe_tile.h +++ b/drivers/gpu/drm/xe/xe_tile.h @@ -12,6 +12,7 @@ struct xe_tile; int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id); int xe_tile_init_noalloc(struct xe_tile *tile); +int xe_tile_init(struct xe_tile *tile); void xe_tile_migrate_wait(struct xe_tile *tile); diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index d414421f8c13..d9c9d2547aad 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -207,17 +207,16 @@ static u64 detect_stolen(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) #endif } -void xe_ttm_stolen_mgr_init(struct xe_device *xe) +int xe_ttm_stolen_mgr_init(struct xe_device *xe) { - struct xe_ttm_stolen_mgr *mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct xe_ttm_stolen_mgr *mgr; u64 stolen_size, io_size; int err; - if (!mgr) { - drm_dbg_kms(&xe->drm, "Stolen mgr init failed\n"); - return; - } + mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL); + if (!mgr) + return -ENOMEM; if (IS_SRIOV_VF(xe)) stolen_size = 0; @@ -230,7 +229,7 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe) if (!stolen_size) { drm_dbg_kms(&xe->drm, "No stolen memory support\n"); - return; + return 0; } /* @@ -246,7 +245,7 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe) io_size, PAGE_SIZE); if (err) { drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err); - return; + return err; } drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n", @@ -254,6 +253,8 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe) if (io_size) mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, io_size); + + return 0; } u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset) diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h index 1777245ff810..8e877d1e839b 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h @@ -12,7 +12,7 @@ struct ttm_resource; struct xe_bo; struct xe_device; -void xe_ttm_stolen_mgr_init(struct xe_device *xe); +int xe_ttm_stolen_mgr_init(struct xe_device *xe); int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem); bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe); u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset); diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 3c78f3d71559..a61a2917590f 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -7,6 +7,8 @@ #include <kunit/visibility.h> +#include <drm/drm_managed.h> + #include "regs/xe_gt_regs.h" #include "xe_gt_types.h" #include "xe_platform_types.h" @@ -83,8 +85,6 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { XE_RTP_RULES(MEDIA_VERSION(2000)), XE_RTP_ACTIONS(SET(XE2LPM_SCRATCH3_LBCF, RWFLUSHALLEN)) }, - - {} }; static const struct xe_rtp_entry_sr engine_tunings[] = { @@ -93,7 +93,6 @@ static const struct xe_rtp_entry_sr engine_tunings[] = { ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE)) }, - {} }; static const struct xe_rtp_entry_sr lrc_tunings[] = { @@ -131,15 +130,47 @@ static const struct xe_rtp_entry_sr lrc_tunings[] = { XE_RTP_ACTIONS(FIELD_SET(FF_MODE, VS_HIT_MAX_VALUE_MASK, REG_FIELD_PREP(VS_HIT_MAX_VALUE_MASK, 0x3f))) }, - - {} }; +/** + * xe_tuning_init - initialize gt with tunings bookkeeping + * @gt: GT instance to initialize + * + * Returns 0 for success, negative error code otherwise. + */ +int xe_tuning_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + size_t n_lrc, n_engine, n_gt, total; + unsigned long *p; + + n_gt = BITS_TO_LONGS(ARRAY_SIZE(gt_tunings)); + n_engine = BITS_TO_LONGS(ARRAY_SIZE(engine_tunings)); + n_lrc = BITS_TO_LONGS(ARRAY_SIZE(lrc_tunings)); + total = n_gt + n_engine + n_lrc; + + p = drmm_kzalloc(&xe->drm, sizeof(*p) * total, GFP_KERNEL); + if (!p) + return -ENOMEM; + + gt->tuning_active.gt = p; + p += n_gt; + gt->tuning_active.engine = p; + p += n_engine; + gt->tuning_active.lrc = p; + + return 0; +} +ALLOW_ERROR_INJECTION(xe_tuning_init, ERRNO); /* See xe_pci_probe() */ + void xe_tuning_process_gt(struct xe_gt *gt) { struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt); - xe_rtp_process_to_sr(&ctx, gt_tunings, >->reg_sr); + xe_rtp_process_ctx_enable_active_tracking(&ctx, + gt->tuning_active.gt, + ARRAY_SIZE(gt_tunings)); + xe_rtp_process_to_sr(&ctx, gt_tunings, ARRAY_SIZE(gt_tunings), >->reg_sr); } EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt); @@ -147,7 +178,11 @@ void xe_tuning_process_engine(struct xe_hw_engine *hwe) { struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); - xe_rtp_process_to_sr(&ctx, engine_tunings, &hwe->reg_sr); + xe_rtp_process_ctx_enable_active_tracking(&ctx, + hwe->gt->tuning_active.engine, + ARRAY_SIZE(engine_tunings)); + xe_rtp_process_to_sr(&ctx, engine_tunings, ARRAY_SIZE(engine_tunings), + &hwe->reg_sr); } EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_engine); @@ -163,5 +198,25 @@ void xe_tuning_process_lrc(struct xe_hw_engine *hwe) { struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); - xe_rtp_process_to_sr(&ctx, lrc_tunings, &hwe->reg_lrc); + xe_rtp_process_ctx_enable_active_tracking(&ctx, + hwe->gt->tuning_active.lrc, + ARRAY_SIZE(lrc_tunings)); + xe_rtp_process_to_sr(&ctx, lrc_tunings, ARRAY_SIZE(lrc_tunings), &hwe->reg_lrc); +} + +void xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p) +{ + size_t idx; + + drm_printf(p, "GT Tunings\n"); + for_each_set_bit(idx, gt->tuning_active.gt, ARRAY_SIZE(gt_tunings)) + drm_printf_indent(p, 1, "%s\n", gt_tunings[idx].name); + + drm_printf(p, "\nEngine Tunings\n"); + for_each_set_bit(idx, gt->tuning_active.engine, ARRAY_SIZE(engine_tunings)) + drm_printf_indent(p, 1, "%s\n", engine_tunings[idx].name); + + drm_printf(p, "\nLRC Tunings\n"); + for_each_set_bit(idx, gt->tuning_active.lrc, ARRAY_SIZE(lrc_tunings)) + drm_printf_indent(p, 1, "%s\n", lrc_tunings[idx].name); } diff --git a/drivers/gpu/drm/xe/xe_tuning.h b/drivers/gpu/drm/xe/xe_tuning.h index 4f9c3ac3b516..dd0d3ccc9c65 100644 --- a/drivers/gpu/drm/xe/xe_tuning.h +++ b/drivers/gpu/drm/xe/xe_tuning.h @@ -6,11 +6,14 @@ #ifndef _XE_TUNING_ #define _XE_TUNING_ +struct drm_printer; struct xe_gt; struct xe_hw_engine; +int xe_tuning_init(struct xe_gt *gt); void xe_tuning_process_gt(struct xe_gt *gt); void xe_tuning_process_engine(struct xe_hw_engine *hwe); void xe_tuning_process_lrc(struct xe_hw_engine *hwe); +void xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p); #endif diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 0d073a9987c2..bb03c524613f 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -241,7 +241,7 @@ void xe_uc_gucrc_disable(struct xe_uc *uc) void xe_uc_stop_prepare(struct xe_uc *uc) { - xe_gsc_wait_for_worker_completion(&uc->gsc); + xe_gsc_stop_prepare(&uc->gsc); xe_guc_stop_prepare(&uc->guc); } @@ -275,6 +275,12 @@ again: goto again; } +void xe_uc_suspend_prepare(struct xe_uc *uc) +{ + xe_gsc_wait_for_worker_completion(&uc->gsc); + xe_guc_stop_prepare(&uc->guc); +} + int xe_uc_suspend(struct xe_uc *uc) { /* GuC submission not enabled, nothing to do */ diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index 506517c11333..ba2937ab94cf 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -18,6 +18,7 @@ int xe_uc_reset_prepare(struct xe_uc *uc); void xe_uc_stop_prepare(struct xe_uc *uc); void xe_uc_stop(struct xe_uc *uc); int xe_uc_start(struct xe_uc *uc); +void xe_uc_suspend_prepare(struct xe_uc *uc); int xe_uc_suspend(struct xe_uc *uc); int xe_uc_sanitize_reset(struct xe_uc *uc); void xe_uc_remove(struct xe_uc *uc); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 5956631c0d40..785b8960050b 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -8,6 +8,7 @@ #include <linux/dma-fence-array.h> #include <linux/nospec.h> +#include <drm/drm_drv.h> #include <drm/drm_exec.h> #include <drm/drm_print.h> #include <drm/ttm/ttm_tt.h> @@ -1582,9 +1583,40 @@ err_no_resv: static void xe_vm_close(struct xe_vm *vm) { + struct xe_device *xe = vm->xe; + bool bound; + int idx; + + bound = drm_dev_enter(&xe->drm, &idx); + down_write(&vm->lock); + vm->size = 0; + + if (!((vm->flags & XE_VM_FLAG_MIGRATION))) { + struct xe_tile *tile; + struct xe_gt *gt; + u8 id; + + /* Wait for pending binds */ + dma_resv_wait_timeout(xe_vm_resv(vm), + DMA_RESV_USAGE_BOOKKEEP, + false, MAX_SCHEDULE_TIMEOUT); + + if (bound) { + for_each_tile(tile, xe, id) + if (vm->pt_root[id]) + xe_pt_clear(xe, vm->pt_root[id]); + + for_each_gt(gt, xe, id) + xe_gt_tlb_invalidation_vm(gt, vm); + } + } + up_write(&vm->lock); + + if (bound) + drm_dev_exit(idx); } void xe_vm_close_and_put(struct xe_vm *vm) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 2553accf8c51..56257430b364 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -279,8 +279,6 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), RAMDFTUNIT_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, - - {} }; static const struct xe_rtp_entry_sr engine_was[] = { @@ -613,8 +611,16 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(FIELD_SET(SAMPLER_MODE, SMP_WAIT_FETCH_MERGING_COUNTER, SMP_FORCE_128B_OVERFETCH)) }, - - {} + { XE_RTP_NAME("14023061436"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(TDL_CHICKEN, QID_WAIT_FOR_THREAD_NOT_RUN_DISABLE)) + }, + { XE_RTP_NAME("13012615864"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS)) + }, }; static const struct xe_rtp_entry_sr lrc_was[] = { @@ -795,6 +801,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) }, + { XE_RTP_NAME("22021007897"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) + }, /* Xe3_LPG */ { XE_RTP_NAME("14021490052"), @@ -807,8 +817,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = { DIS_PARTIAL_AUTOSTRIP | DIS_AUTOSTRIP)) }, - - {} }; static __maybe_unused const struct xe_rtp_entry oob_was[] = { @@ -850,7 +858,7 @@ void xe_wa_process_gt(struct xe_gt *gt) xe_rtp_process_ctx_enable_active_tracking(&ctx, gt->wa_active.gt, ARRAY_SIZE(gt_was)); - xe_rtp_process_to_sr(&ctx, gt_was, >->reg_sr); + xe_rtp_process_to_sr(&ctx, gt_was, ARRAY_SIZE(gt_was), >->reg_sr); } EXPORT_SYMBOL_IF_KUNIT(xe_wa_process_gt); @@ -868,7 +876,7 @@ void xe_wa_process_engine(struct xe_hw_engine *hwe) xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->wa_active.engine, ARRAY_SIZE(engine_was)); - xe_rtp_process_to_sr(&ctx, engine_was, &hwe->reg_sr); + xe_rtp_process_to_sr(&ctx, engine_was, ARRAY_SIZE(engine_was), &hwe->reg_sr); } /** @@ -885,7 +893,7 @@ void xe_wa_process_lrc(struct xe_hw_engine *hwe) xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->wa_active.lrc, ARRAY_SIZE(lrc_was)); - xe_rtp_process_to_sr(&ctx, lrc_was, &hwe->reg_lrc); + xe_rtp_process_to_sr(&ctx, lrc_was, ARRAY_SIZE(lrc_was), &hwe->reg_lrc); } /** diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 40438c3d9b72..32d3853b08ec 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -30,8 +30,10 @@ 13011645652 GRAPHICS_VERSION(2004) 14022293748 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) + GRAPHICS_VERSION_RANGE(3000, 3001) 22019794406 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) + GRAPHICS_VERSION_RANGE(3000, 3001) 22019338487 MEDIA_VERSION(2000) GRAPHICS_VERSION(2001) MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_not_sriov_vf) |