diff options
Diffstat (limited to 'drivers/gpu/drm/xe/xe_gt.c')
-rw-r--r-- | drivers/gpu/drm/xe/xe_gt.c | 304 |
1 files changed, 196 insertions, 108 deletions
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index d5fd6a089b7c..6c4cb9576fb6 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -12,13 +12,16 @@ #include <generated/xe_wa_oob.h> +#include "instructions/xe_alu_commands.h" #include "instructions/xe_gfxpipe_commands.h" #include "instructions/xe_mi_commands.h" +#include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" #include "xe_assert.h" #include "xe_bb.h" #include "xe_bo.h" #include "xe_device.h" +#include "xe_eu_stall.h" #include "xe_exec_queue.h" #include "xe_execlist.h" #include "xe_force_wake.h" @@ -32,6 +35,7 @@ #include "xe_gt_pagefault.h" #include "xe_gt_printk.h" #include "xe_gt_sriov_pf.h" +#include "xe_gt_sriov_vf.h" #include "xe_gt_sysfs.h" #include "xe_gt_tlb_invalidation.h" #include "xe_gt_topology.h" @@ -77,7 +81,8 @@ struct xe_gt *xe_gt_alloc(struct xe_tile *tile) return ERR_PTR(-ENOMEM); gt->tile = tile; - gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0); + gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", + WQ_MEM_RECLAIM); err = drmm_add_action_or_reset(>_to_xe(gt)->drm, gt_fini, gt); if (err) @@ -97,14 +102,14 @@ void xe_gt_sanitize(struct xe_gt *gt) static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) { + unsigned int fw_ref; u32 reg; - int err; if (!XE_WA(gt, 16023588340)) return; - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (WARN_ON(err)) + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) return; if (!xe_gt_is_media_type(gt)) { @@ -113,14 +118,14 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); } - xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3); - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0xF); + xe_force_wake_put(gt_to_fw(gt), fw_ref); } static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) { + unsigned int fw_ref; u32 reg; - int err; if (!XE_WA(gt, 16023588340)) return; @@ -128,35 +133,15 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) if (xe_gt_is_media_type(gt)) return; - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (WARN_ON(err)) + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) return; reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); reg &= ~CG_DIS_CNTLBUS; xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); -} - -/** - * xe_gt_remove() - Clean up the GT structures before driver removal - * @gt: the GT object - * - * This function should only act on objects/structures that must be cleaned - * before the driver removal callback is complete and therefore can't be - * deferred to a drmm action. - */ -void xe_gt_remove(struct xe_gt *gt) -{ - int i; - - xe_uc_remove(>->uc); - - for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) - xe_hw_fence_irq_finish(>->fence_irq[i]); - - xe_gt_disable_host_l2_vram(gt); + xe_force_wake_put(gt_to_fw(gt), fw_ref); } static void gt_reset_worker(struct work_struct *w); @@ -193,15 +178,6 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) return 0; } -/* - * Convert back from encoded value to type-safe, only to be used when reg.mcr - * is true - */ -static struct xe_reg_mcr to_xe_reg_mcr(const struct xe_reg reg) -{ - return (const struct xe_reg_mcr){.__reg.raw = reg.raw }; -} - static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) { struct xe_reg_sr *sr = &q->hwe->reg_lrc; @@ -211,6 +187,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) struct xe_bb *bb; struct dma_fence *fence; long timeout; + int count_rmw = 0; int count = 0; if (q->hwe->class == XE_ENGINE_CLASS_RENDER) @@ -223,30 +200,32 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) if (IS_ERR(bb)) return PTR_ERR(bb); - xa_for_each(&sr->xa, idx, entry) - ++count; + /* count RMW registers as those will be handled separately */ + xa_for_each(&sr->xa, idx, entry) { + if (entry->reg.masked || entry->clr_bits == ~0) + ++count; + else + ++count_rmw; + } - if (count) { + if (count || count_rmw) xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name); + if (count) { + /* emit single LRI with all non RMW regs */ + bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); xa_for_each(&sr->xa, idx, entry) { struct xe_reg reg = entry->reg; - struct xe_reg_mcr reg_mcr = to_xe_reg_mcr(reg); u32 val; - /* - * Skip reading the register if it's not really needed - */ if (reg.masked) val = entry->clr_bits << 16; - else if (entry->clr_bits + 1) - val = (reg.mcr ? - xe_gt_mcr_unicast_read_any(gt, reg_mcr) : - xe_mmio_read32(gt, reg)) & (~entry->clr_bits); - else + else if (entry->clr_bits == ~0) val = 0; + else + continue; val |= entry->set_bits; @@ -256,6 +235,52 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) } } + if (count_rmw) { + /* emit MI_MATH for each RMW reg */ + + xa_for_each(&sr->xa, idx, entry) { + if (entry->reg.masked || entry->clr_bits == ~0) + continue; + + bb->cs[bb->len++] = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO; + bb->cs[bb->len++] = entry->reg.addr; + bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr; + + bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | + MI_LRI_LRM_CS_MMIO; + bb->cs[bb->len++] = CS_GPR_REG(0, 1).addr; + bb->cs[bb->len++] = entry->clr_bits; + bb->cs[bb->len++] = CS_GPR_REG(0, 2).addr; + bb->cs[bb->len++] = entry->set_bits; + + bb->cs[bb->len++] = MI_MATH(8); + bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCA, REG0); + bb->cs[bb->len++] = CS_ALU_INSTR_LOADINV(SRCB, REG1); + bb->cs[bb->len++] = CS_ALU_INSTR_AND; + bb->cs[bb->len++] = CS_ALU_INSTR_STORE(REG0, ACCU); + bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCA, REG0); + bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCB, REG2); + bb->cs[bb->len++] = CS_ALU_INSTR_OR; + bb->cs[bb->len++] = CS_ALU_INSTR_STORE(REG0, ACCU); + + bb->cs[bb->len++] = MI_LOAD_REGISTER_REG | MI_LRR_SRC_CS_MMIO; + bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr; + bb->cs[bb->len++] = entry->reg.addr; + + xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x\n", + entry->reg.addr, entry->clr_bits, entry->set_bits); + } + + /* reset used GPR */ + bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(3) | MI_LRI_LRM_CS_MMIO; + bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr; + bb->cs[bb->len++] = 0; + bb->cs[bb->len++] = CS_GPR_REG(0, 1).addr; + bb->cs[bb->len++] = 0; + bb->cs[bb->len++] = CS_GPR_REG(0, 2).addr; + bb->cs[bb->len++] = 0; + } + xe_lrc_emit_hwe_state_instructions(q, bb); job = xe_bb_create_job(q, bb); @@ -379,13 +404,19 @@ int xe_gt_init_early(struct xe_gt *gt) if (err) return err; - xe_wa_process_gt(gt); + err = xe_tuning_init(gt); + if (err) + return err; + xe_wa_process_oob(gt); - xe_tuning_process_gt(gt); xe_force_wake_init_gt(gt, gt_to_fw(gt)); spin_lock_init(>->global_invl_lock); + err = xe_gt_tlb_invalidation_init_early(gt); + if (err) + return err; + return 0; } @@ -402,11 +433,12 @@ static void dump_pat_on_error(struct xe_gt *gt) static int gt_fw_domain_init(struct xe_gt *gt) { - int err, i; + unsigned int fw_ref; + int err; - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (err) - goto err_hw_fence_irq; + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return -ETIMEDOUT; if (!xe_gt_is_media_type(gt)) { err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt); @@ -439,32 +471,32 @@ static int gt_fw_domain_init(struct xe_gt *gt) * Stash hardware-reported version. Since this register does not exist * on pre-MTL platforms, reading it there will (correctly) return 0. */ - gt->info.gmdid = xe_mmio_read32(gt, GMD_ID); - - err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); - XE_WARN_ON(err); + gt->info.gmdid = xe_mmio_read32(>->mmio, GMD_ID); + xe_force_wake_put(gt_to_fw(gt), fw_ref); return 0; err_force_wake: dump_pat_on_error(gt); - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); -err_hw_fence_irq: - for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) - xe_hw_fence_irq_finish(>->fence_irq[i]); + xe_force_wake_put(gt_to_fw(gt), fw_ref); return err; } static int all_fw_domain_init(struct xe_gt *gt) { - int err, i; + unsigned int fw_ref; + int err; - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (err) - goto err_hw_fence_irq; + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { + err = -ETIMEDOUT; + goto err_force_wake; + } xe_gt_mcr_set_implicit_defaults(gt); + xe_wa_process_gt(gt); + xe_tuning_process_gt(gt); xe_reg_sr_apply_mmio(>->reg_sr, gt); err = xe_gt_clock_init(gt); @@ -523,19 +555,17 @@ static int all_fw_domain_init(struct xe_gt *gt) if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); - if (IS_SRIOV_PF(gt_to_xe(gt))) + if (IS_SRIOV_PF(gt_to_xe(gt))) { + xe_gt_sriov_pf_init(gt); xe_gt_sriov_pf_init_hw(gt); + } - err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); - XE_WARN_ON(err); + xe_force_wake_put(gt_to_fw(gt), fw_ref); return 0; err_force_wake: - xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); -err_hw_fence_irq: - for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) - xe_hw_fence_irq_finish(>->fence_irq[i]); + xe_force_wake_put(gt_to_fw(gt), fw_ref); return err; } @@ -546,11 +576,12 @@ err_hw_fence_irq: */ int xe_gt_init_hwconfig(struct xe_gt *gt) { + unsigned int fw_ref; int err; - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (err) - goto out; + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return -ETIMEDOUT; xe_gt_mcr_init_early(gt); xe_pat_init(gt); @@ -568,11 +599,21 @@ int xe_gt_init_hwconfig(struct xe_gt *gt) xe_gt_enable_host_l2_vram(gt); out_fw: - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); -out: + xe_force_wake_put(gt_to_fw(gt), fw_ref); return err; } +static void xe_gt_fini(void *arg) +{ + struct xe_gt *gt = arg; + int i; + + for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) + xe_hw_fence_irq_finish(>->fence_irq[i]); + + xe_gt_disable_host_l2_vram(gt); +} + int xe_gt_init(struct xe_gt *gt) { int err; @@ -585,7 +626,7 @@ int xe_gt_init(struct xe_gt *gt) xe_hw_fence_irq_init(>->fence_irq[i]); } - err = xe_gt_tlb_invalidation_init(gt); + err = devm_add_action_or_reset(gt_to_xe(gt)->drm.dev, xe_gt_fini, gt); if (err) return err; @@ -619,9 +660,39 @@ int xe_gt_init(struct xe_gt *gt) xe_gt_record_user_engines(gt); + err = xe_eu_stall_init(gt); + if (err) + return err; + return 0; } +/** + * xe_gt_mmio_init() - Initialize GT's MMIO access + * @gt: the GT object + * + * Initialize GT's MMIO accessor, which will be used to access registers inside + * this GT. + */ +void xe_gt_mmio_init(struct xe_gt *gt) +{ + struct xe_tile *tile = gt_to_tile(gt); + struct xe_device *xe = tile_to_xe(tile); + + xe_mmio_init(>->mmio, tile, tile->mmio.regs, tile->mmio.regs_size); + + if (gt->info.type == XE_GT_TYPE_MEDIA) { + gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET; + gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH; + } else { + gt->mmio.adj_offset = 0; + gt->mmio.adj_limit = 0; + } + + if (IS_SRIOV_VF(xe)) + gt->mmio.sriov_vf_gt = gt; +} + void xe_gt_record_user_engines(struct xe_gt *gt) { struct xe_hw_engine *hwe; @@ -647,10 +718,13 @@ static int do_gt_reset(struct xe_gt *gt) { int err; + if (IS_SRIOV_VF(gt_to_xe(gt))) + return xe_gt_sriov_vf_reset(gt); + xe_gsc_wa_14015076503(gt, true); - xe_mmio_write32(gt, GDRST, GRDOM_FULL); - err = xe_mmio_wait32(gt, GDRST, GRDOM_FULL, 0, 5000, NULL, false); + xe_mmio_write32(>->mmio, GDRST, GRDOM_FULL); + err = xe_mmio_wait32(>->mmio, GDRST, GRDOM_FULL, 0, 5000, NULL, false); if (err) xe_gt_err(gt, "failed to clear GRDOM_FULL (%pe)\n", ERR_PTR(err)); @@ -721,10 +795,8 @@ static int do_gt_restart(struct xe_gt *gt) if (err) return err; - for_each_hw_engine(hwe, gt, id) { + for_each_hw_engine(hwe, gt, id) xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); - xe_reg_sr_apply_whitelist(hwe); - } /* Get CCS mode in sync between sw/hw */ xe_gt_apply_ccs_mode(gt); @@ -740,6 +812,7 @@ static int do_gt_restart(struct xe_gt *gt) static int gt_reset(struct xe_gt *gt) { + unsigned int fw_ref; int err; if (xe_device_wedged(gt_to_xe(gt))) @@ -760,9 +833,11 @@ static int gt_reset(struct xe_gt *gt) xe_gt_sanitize(gt); - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (err) - goto err_msg; + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { + err = -ETIMEDOUT; + goto err_out; + } xe_uc_gucrc_disable(>->uc); xe_uc_stop_prepare(>->uc); @@ -780,8 +855,7 @@ static int gt_reset(struct xe_gt *gt) if (err) goto err_out; - err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); - XE_WARN_ON(err); + xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_pm_runtime_put(gt_to_xe(gt)); xe_gt_info(gt, "reset done\n"); @@ -789,8 +863,7 @@ static int gt_reset(struct xe_gt *gt) return 0; err_out: - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); -err_msg: + xe_force_wake_put(gt_to_fw(gt), fw_ref); XE_WARN_ON(xe_uc_start(>->uc)); err_fail: xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); @@ -810,7 +883,7 @@ static void gt_reset_worker(struct work_struct *w) void xe_gt_reset_async(struct xe_gt *gt) { - xe_gt_info(gt, "trying reset\n"); + xe_gt_info(gt, "trying reset from %ps\n", __builtin_return_address(0)); /* Don't do a reset while one is already in flight */ if (!xe_fault_inject_gt_reset() && xe_uc_reset_prepare(>->uc)) @@ -822,22 +895,25 @@ void xe_gt_reset_async(struct xe_gt *gt) void xe_gt_suspend_prepare(struct xe_gt *gt) { - XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + unsigned int fw_ref; - xe_uc_stop_prepare(>->uc); + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_uc_suspend_prepare(>->uc); + + xe_force_wake_put(gt_to_fw(gt), fw_ref); } int xe_gt_suspend(struct xe_gt *gt) { + unsigned int fw_ref; int err; xe_gt_dbg(gt, "suspending\n"); xe_gt_sanitize(gt); - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (err) + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) goto err_msg; err = xe_uc_suspend(>->uc); @@ -848,19 +924,29 @@ int xe_gt_suspend(struct xe_gt *gt) xe_gt_disable_host_l2_vram(gt); - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_gt_dbg(gt, "suspended\n"); return 0; -err_force_wake: - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); err_msg: + err = -ETIMEDOUT; +err_force_wake: + xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err)); return err; } +void xe_gt_shutdown(struct xe_gt *gt) +{ + unsigned int fw_ref; + + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + do_gt_reset(gt); + xe_force_wake_put(gt_to_fw(gt), fw_ref); +} + /** * xe_gt_sanitize_freq() - Restore saved frequencies if necessary. * @gt: the GT object @@ -883,11 +969,12 @@ int xe_gt_sanitize_freq(struct xe_gt *gt) int xe_gt_resume(struct xe_gt *gt) { + unsigned int fw_ref; int err; xe_gt_dbg(gt, "resuming\n"); - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (err) + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) goto err_msg; err = do_gt_restart(gt); @@ -896,14 +983,15 @@ int xe_gt_resume(struct xe_gt *gt) xe_gt_idle_enable_pg(gt); - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_gt_dbg(gt, "resumed\n"); return 0; -err_force_wake: - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); err_msg: + err = -ETIMEDOUT; +err_force_wake: + xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(err)); return err; |