summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v11_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c1624
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c43
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c11
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c35
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c8
-rw-r--r--drivers/gpu/drm/bridge/aux-hpd-bridge.c3
-rw-r--r--drivers/gpu/drm/bridge/ti-sn65dsi86.c2
-rw-r--r--drivers/gpu/drm/drm_buddy.c43
-rw-r--r--drivers/gpu/drm/drm_framebuffer.c31
-rw-r--r--drivers/gpu/drm/drm_gem.c74
-rw-r--r--drivers/gpu/drm/drm_internal.h2
-rw-r--r--drivers/gpu/drm/exynos/exynos7_drm_decon.c4
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_fimd.c12
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp.c24
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gsc.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_submission.c3
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_request.c20
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_request.c2
-rw-r--r--drivers/gpu/drm/imagination/pvr_power.c4
-rw-r--r--drivers/gpu/drm/mediatek/mtk_crtc.c36
-rw-r--r--drivers/gpu/drm/mediatek/mtk_crtc.h1
-rw-r--r--drivers/gpu/drm/mediatek/mtk_ddp_comp.c1
-rw-r--r--drivers/gpu/drm/mediatek/mtk_ddp_comp.h9
-rw-r--r--drivers/gpu/drm/mediatek/mtk_disp_drv.h1
-rw-r--r--drivers/gpu/drm/mediatek/mtk_disp_ovl.c7
-rw-r--r--drivers/gpu/drm/mediatek/mtk_plane.c12
-rw-r--r--drivers/gpu/drm/mediatek/mtk_plane.h3
-rw-r--r--drivers/gpu/drm/msm/msm_gem_submit.c17
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c20
-rw-r--r--drivers/gpu/drm/scheduler/sched_entity.c21
-rw-r--r--drivers/gpu/drm/tegra/nvdec.c6
-rw-r--r--drivers/gpu/drm/tiny/simpledrm.c4
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo_util.c13
-rw-r--r--drivers/gpu/drm/v3d/v3d_drv.h8
-rw-r--r--drivers/gpu/drm/v3d/v3d_gem.c2
-rw-r--r--drivers/gpu/drm/v3d/v3d_irq.c37
-rw-r--r--drivers/gpu/drm/xe/Kconfig3
-rw-r--r--drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h1
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h2
-rw-r--r--drivers/gpu/drm/xe/display/xe_dsb_buffer.c18
-rw-r--r--drivers/gpu/drm/xe/display/xe_fb_pin.c17
-rw-r--r--drivers/gpu/drm/xe/regs/xe_reg_defs.h2
-rw-r--r--drivers/gpu/drm/xe/tests/xe_mocs.c21
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c78
-rw-r--r--drivers/gpu/drm/xe/xe_bo.h40
-rw-r--r--drivers/gpu/drm/xe/xe_bo_evict.c14
-rw-r--r--drivers/gpu/drm/xe/xe_bo_types.h10
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.c14
-rw-r--r--drivers/gpu/drm/xe/xe_device.c55
-rw-r--r--drivers/gpu/drm/xe/xe_force_wake.h16
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.c35
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c118
-rw-r--r--drivers/gpu/drm/xe/xe_gt_pagefault.c1
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf.c114
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf.h6
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c3
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h10
-rw-r--r--drivers/gpu/drm/xe/xe_guc.c4
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c350
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.h2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct_types.h23
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.c144
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.h2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_irq.c4
-rw-r--r--drivers/gpu/drm/xe/xe_lmtt.c11
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c2
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c1
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c8
-rw-r--r--drivers/gpu/drm/xe/xe_trace_bo.h2
84 files changed, 3032 insertions, 355 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a55e611605fc..8cf224fd4ff2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4144,7 +4144,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->grbm_idx_mutex);
mutex_init(&adev->mn_lock);
mutex_init(&adev->virt.vf_errors.lock);
- mutex_init(&adev->virt.rlcg_reg_lock);
hash_init(adev->mn_hash);
mutex_init(&adev->psp.mutex);
mutex_init(&adev->notifier_lock);
@@ -4170,6 +4169,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
spin_lock_init(&adev->se_cac_idx_lock);
spin_lock_init(&adev->audio_endpt_idx_lock);
spin_lock_init(&adev->mm_stats.lock);
+ spin_lock_init(&adev->virt.rlcg_reg_lock);
spin_lock_init(&adev->wb.lock);
INIT_LIST_HEAD(&adev->reset_list);
@@ -4954,6 +4954,8 @@ exit:
dev->dev->power.disable_depth--;
#endif
}
+
+ amdgpu_vram_mgr_clear_reset_blocks(adev);
adev->in_suspend = false;
if (adev->enable_mes)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 34d41e3ce347..eee434743deb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -112,6 +112,14 @@
#endif
MODULE_FIRMWARE("amdgpu/ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/vega10_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/vega12_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/vega20_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/raven_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/raven2_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/picasso_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_ip_discovery.bin");
#define mmIP_DISCOVERY_VERSION 0x16A00
#define mmRCC_CONFIG_MEMSIZE 0xde3
@@ -400,7 +408,27 @@ static const char *amdgpu_discovery_get_fw_name(struct amdgpu_device *adev)
if (amdgpu_discovery == 2)
return "amdgpu/ip_discovery.bin";
- return NULL;
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ return "amdgpu/vega10_ip_discovery.bin";
+ case CHIP_VEGA12:
+ return "amdgpu/vega12_ip_discovery.bin";
+ case CHIP_RAVEN:
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ return "amdgpu/raven2_ip_discovery.bin";
+ else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+ return "amdgpu/picasso_ip_discovery.bin";
+ else
+ return "amdgpu/raven_ip_discovery.bin";
+ case CHIP_VEGA20:
+ return "amdgpu/vega20_ip_discovery.bin";
+ case CHIP_ARCTURUS:
+ return "amdgpu/arcturus_ip_discovery.bin";
+ case CHIP_ALDEBARAN:
+ return "amdgpu/aldebaran_ip_discovery.bin";
+ default:
+ return NULL;
+ }
}
static int amdgpu_discovery_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 7d4b540340e0..41b88e0ea98b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -860,7 +860,9 @@ int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev,
queue_input.mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
queue_input.wptr_addr = ring->wptr_gpu_addr;
+ amdgpu_mes_lock(&adev->mes);
r = adev->mes.funcs->map_legacy_queue(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
if (r)
DRM_ERROR("failed to map legacy queue\n");
@@ -883,7 +885,9 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
queue_input.trail_fence_addr = gpu_addr;
queue_input.trail_fence_data = seq;
+ amdgpu_mes_lock(&adev->mes);
r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
if (r)
DRM_ERROR("failed to unmap legacy queue\n");
@@ -910,7 +914,9 @@ int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev,
queue_input.vmid = vmid;
queue_input.use_mmio = use_mmio;
+ amdgpu_mes_lock(&adev->mes);
r = adev->mes.funcs->reset_legacy_queue(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
if (r)
DRM_ERROR("failed to reset legacy queue\n");
@@ -931,7 +937,9 @@ uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg)
goto error;
}
+ amdgpu_mes_lock(&adev->mes);
r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ amdgpu_mes_unlock(&adev->mes);
if (r)
DRM_ERROR("failed to read reg (0x%x)\n", reg);
else
@@ -957,7 +965,9 @@ int amdgpu_mes_wreg(struct amdgpu_device *adev,
goto error;
}
+ amdgpu_mes_lock(&adev->mes);
r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ amdgpu_mes_unlock(&adev->mes);
if (r)
DRM_ERROR("failed to write reg (0x%x)\n", reg);
@@ -984,7 +994,9 @@ int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
goto error;
}
+ amdgpu_mes_lock(&adev->mes);
r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ amdgpu_mes_unlock(&adev->mes);
if (r)
DRM_ERROR("failed to reg_write_reg_wait\n");
@@ -1009,7 +1021,9 @@ int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
goto error;
}
+ amdgpu_mes_lock(&adev->mes);
r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ amdgpu_mes_unlock(&adev->mes);
if (r)
DRM_ERROR("failed to reg_write_reg_wait\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 48e30e5f8338..3d42f6c3308e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -3430,7 +3430,10 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name)
uint8_t *ucode_array_start_addr;
int err = 0;
- err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, "amdgpu/%s_sos.bin", chip_name);
+ if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, "amdgpu/%s_sos_kicker.bin", chip_name);
+ else
+ err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, "amdgpu/%s_sos.bin", chip_name);
if (err)
goto out;
@@ -3672,7 +3675,10 @@ int psp_init_ta_microcode(struct psp_context *psp, const char *chip_name)
struct amdgpu_device *adev = psp->adev;
int err;
- err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, "amdgpu/%s_ta.bin", chip_name);
+ if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, "amdgpu/%s_ta_kicker.bin", chip_name);
+ else
+ err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, "amdgpu/%s_ta.bin", chip_name);
if (err)
return err;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 690976665cf6..10da6e550d76 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -439,6 +439,7 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
{
unsigned long flags;
ktime_t deadline;
+ bool ret;
if (unlikely(ring->adev->debug_disable_soft_recovery))
return false;
@@ -453,12 +454,16 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
dma_fence_set_error(fence, -ENODATA);
spin_unlock_irqrestore(fence->lock, flags);
- atomic_inc(&ring->adev->gpu_reset_counter);
while (!dma_fence_is_signaled(fence) &&
ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0)
ring->funcs->soft_recovery(ring, vmid);
- return dma_fence_is_signaled(fence);
+ ret = dma_fence_is_signaled(fence);
+ /* increment the counter only if soft reset worked */
+ if (ret)
+ atomic_inc(&ring->adev->gpu_reset_counter);
+
+ return ret;
}
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index b7742fa74e1d..3c883f1cf068 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -153,6 +153,7 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr,
uint64_t start, uint64_t size);
int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
uint64_t start);
+void amdgpu_vram_mgr_clear_reset_blocks(struct amdgpu_device *adev);
bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
struct ttm_resource *res);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index b6397d3229e1..01dccd489a80 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -1010,6 +1010,7 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f
void *scratch_reg2;
void *scratch_reg3;
void *spare_int;
+ unsigned long flags;
if (!adev->gfx.rlc.rlcg_reg_access_supported) {
dev_err(adev->dev,
@@ -1031,7 +1032,7 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f
scratch_reg2 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg2;
scratch_reg3 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg3;
- mutex_lock(&adev->virt.rlcg_reg_lock);
+ spin_lock_irqsave(&adev->virt.rlcg_reg_lock, flags);
if (reg_access_ctrl->spare_int)
spare_int = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->spare_int;
@@ -1090,7 +1091,7 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f
ret = readl(scratch_reg0);
- mutex_unlock(&adev->virt.rlcg_reg_lock);
+ spin_unlock_irqrestore(&adev->virt.rlcg_reg_lock, flags);
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index b650a2032c42..6a2087abfb7e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -275,7 +275,8 @@ struct amdgpu_virt {
/* the ucode id to signal the autoload */
uint32_t autoload_ucode_id;
- struct mutex rlcg_reg_lock;
+ /* Spinlock to protect access to the RLCG register interface */
+ spinlock_t rlcg_reg_lock;
};
struct amdgpu_video_codec_info;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 8f58ec6f1400..732c79e201c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -783,6 +783,23 @@ uint64_t amdgpu_vram_mgr_vis_usage(struct amdgpu_vram_mgr *mgr)
}
/**
+ * amdgpu_vram_mgr_clear_reset_blocks - reset clear blocks
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Reset the cleared drm buddy blocks.
+ */
+void amdgpu_vram_mgr_clear_reset_blocks(struct amdgpu_device *adev)
+{
+ struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
+ struct drm_buddy *mm = &mgr->mm;
+
+ mutex_lock(&mgr->lock);
+ drm_buddy_reset_clear(mm, false);
+ mutex_unlock(&mgr->lock);
+}
+
+/**
* amdgpu_vram_mgr_intersects - test each drm buddy block for intersection
*
* @man: TTM memory type manager
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 1f06b22dbe7c..96e5c520af31 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -84,6 +84,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_kicker.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
@@ -734,6 +735,9 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
adev->pdev->revision == 0xCE)
err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
"amdgpu/gc_11_0_0_rlc_1.bin");
+ else if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ "amdgpu/%s_rlc_kicker.bin", ucode_prefix);
else
err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
"amdgpu/%s_rlc.bin", ucode_prefix);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 9d741695ca07..1f675d67a1a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4652,6 +4652,7 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
/* reset ring buffer */
ring->wptr = 0;
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
amdgpu_ring_clear_ring(ring);
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
index d4f72e47ae9e..c4f5cbf1ecd7 100644
--- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
@@ -32,6 +32,7 @@
#include "gc/gc_11_0_0_sh_mask.h"
MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu_kicker.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_1_imu.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_2_imu.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_3_imu.bin");
@@ -50,7 +51,10 @@ static int imu_v11_0_init_microcode(struct amdgpu_device *adev)
DRM_DEBUG("\n");
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
- err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, "amdgpu/%s_imu.bin", ucode_prefix);
+ if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, "amdgpu/%s_imu_kicker.bin", ucode_prefix);
+ else
+ err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, "amdgpu/%s_imu.bin", ucode_prefix);
if (err)
goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index bf00de763acb..124f74e862d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -42,7 +42,9 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_5_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_8_toc.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos_kicker.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta_kicker.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_10_sos.bin");
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
new file mode 100644
index 000000000000..cdefd7fcb0da
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
@@ -0,0 +1,1624 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_vcn.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "soc15_hw_ip.h"
+#include "vcn_v2_0.h"
+#include "vcn_v4_0_3.h"
+#include "mmsch_v5_0.h"
+
+#include "vcn/vcn_5_0_0_offset.h"
+#include "vcn/vcn_5_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h"
+#include "vcn_v5_0_0.h"
+#include "vcn_v5_0_1.h"
+
+#include <drm/drm_drv.h>
+
+static int vcn_v5_0_1_start_sriov(struct amdgpu_device *adev);
+static void vcn_v5_0_1_set_unified_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v5_0_1_set_irq_funcs(struct amdgpu_device *adev);
+static int vcn_v5_0_1_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static void vcn_v5_0_1_unified_ring_set_wptr(struct amdgpu_ring *ring);
+static void vcn_v5_0_1_set_ras_funcs(struct amdgpu_device *adev);
+/**
+ * vcn_v5_0_1_early_init - set function pointers and load microcode
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Set ring and irq function pointers
+ * Load microcode from filesystem
+ */
+static int vcn_v5_0_1_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ /* re-use enc ring as unified ring */
+ adev->vcn.inst[i].num_enc_rings = 1;
+
+ vcn_v5_0_1_set_unified_ring_funcs(adev);
+ vcn_v5_0_1_set_irq_funcs(adev);
+ vcn_v5_0_1_set_ras_funcs(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ adev->vcn.inst[i].set_pg_state = vcn_v5_0_1_set_pg_state;
+
+ r = amdgpu_vcn_early_init(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static void vcn_v5_0_1_fw_shared_init(struct amdgpu_device *adev, int inst_idx)
+{
+ struct amdgpu_vcn5_fw_shared *fw_shared;
+
+ fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+
+ if (fw_shared->sq.is_enabled)
+ return;
+ fw_shared->present_flag_0 =
+ cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
+ fw_shared->sq.is_enabled = 1;
+
+ if (amdgpu_vcnfw_log)
+ amdgpu_vcn_fwlog_init(&adev->vcn.inst[inst_idx]);
+}
+
+/**
+ * vcn_v5_0_1_sw_init - sw init for VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Load firmware and sw initialization
+ */
+static int vcn_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, r, vcn_inst;
+
+ /* VCN UNIFIED TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst->irq);
+ if (r)
+ return r;
+
+ /* VCN POISON TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_5_0__SRCID_UVD_POISON, &adev->vcn.inst->ras_poison_irq);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ vcn_inst = GET_INST(VCN, i);
+
+ r = amdgpu_vcn_sw_init(adev, i);
+ if (r)
+ return r;
+
+ amdgpu_vcn_setup_ucode(adev, i);
+
+ r = amdgpu_vcn_resume(adev, i);
+ if (r)
+ return r;
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ ring->use_doorbell = true;
+ if (!amdgpu_sriov_vf(adev))
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 11 * vcn_inst;
+ else
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 32 * vcn_inst;
+
+ ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id);
+ sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id);
+
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, &adev->vcn.inst[i].sched_score);
+ if (r)
+ return r;
+
+ vcn_v5_0_1_fw_shared_init(adev, i);
+ }
+
+ /* TODO: Add queue reset mask when FW fully supports it */
+ adev->vcn.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = amdgpu_virt_alloc_mm_table(adev);
+ if (r)
+ return r;
+ }
+
+ vcn_v5_0_0_alloc_ip_dump(adev);
+
+ return amdgpu_vcn_sysfs_reset_mask_init(adev);
+}
+
+/**
+ * vcn_v5_0_1_sw_fini - sw fini for VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * VCN suspend and free up sw allocation
+ */
+static int vcn_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r, idx;
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ volatile struct amdgpu_vcn5_fw_shared *fw_shared;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = 0;
+ fw_shared->sq.is_enabled = 0;
+ }
+
+ drm_dev_exit(idx);
+ }
+
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_free_mm_table(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(adev, i);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_sw_fini(adev, i);
+ if (r)
+ return r;
+ }
+
+ amdgpu_vcn_sysfs_reset_mask_fini(adev);
+
+ kfree(adev->vcn.ip_dump);
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_hw_init - start and test VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int vcn_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, r, vcn_inst;
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = vcn_v5_0_1_start_sriov(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ ring->wptr = 0;
+ ring->wptr_old = 0;
+ vcn_v5_0_1_unified_ring_set_wptr(ring);
+ ring->sched.ready = true;
+ }
+ } else {
+ if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100)
+ adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ vcn_inst = GET_INST(VCN, i);
+ ring = &adev->vcn.inst[i].ring_enc[0];
+
+ if (ring->use_doorbell)
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 11 * vcn_inst),
+ adev->vcn.inst[i].aid_id);
+
+ /* Re-init fw_shared, if required */
+ vcn_v5_0_1_fw_shared_init(adev, i);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Stop the VCN block, mark ring as not ready any more
+ */
+static int vcn_v5_0_1_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work);
+ if (vinst->cur_state != AMD_PG_STATE_GATE)
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
+ }
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
+ amdgpu_irq_put(adev, &adev->vcn.inst->ras_poison_irq, 0);
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_suspend - suspend VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * HW fini and suspend VCN block
+ */
+static int vcn_v5_0_1_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
+
+ r = vcn_v5_0_1_hw_fini(ip_block);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(ip_block->adev, i);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+/**
+ * vcn_v5_0_1_resume - resume VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Resume firmware and hw init VCN block
+ */
+static int vcn_v5_0_1_resume(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ if (amdgpu_in_reset(adev))
+ vinst->cur_state = AMD_PG_STATE_GATE;
+
+ r = amdgpu_vcn_resume(ip_block->adev, i);
+ if (r)
+ return r;
+ }
+
+ r = vcn_v5_0_1_hw_init(ip_block);
+
+ return r;
+}
+
+/**
+ * vcn_v5_0_1_mc_resume - memory controller programming
+ *
+ * @vinst: VCN instance
+ *
+ * Let the VCN memory controller know it's offsets
+ */
+static void vcn_v5_0_1_mc_resume(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
+ uint32_t offset, size, vcn_inst;
+ const struct common_firmware_header *hdr;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ vcn_inst = GET_INST(VCN, inst);
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, 0);
+ offset = 0;
+ } else {
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr));
+ offset = size;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0,
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE0, size);
+
+ /* cache window 1: stack */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET1, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
+
+ /* cache window 2: context */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET2, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
+
+ /* non-cache window */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_OFFSET0, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_SIZE0,
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)));
+}
+
+/**
+ * vcn_v5_0_1_mc_resume_dpg_mode - memory controller programming for dpg mode
+ *
+ * @vinst: VCN instance
+ * @indirect: indirectly write sram
+ *
+ * Let the VCN memory controller know it's offsets with dpg mode
+ */
+static void vcn_v5_0_1_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t offset, size;
+ const struct common_firmware_header *hdr;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (!indirect) {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN +
+ inst_idx].tmr_mc_addr_lo), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN +
+ inst_idx].tmr_mc_addr_hi), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ } else {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ }
+ offset = 0;
+ } else {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ offset = size;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
+ }
+
+ if (!indirect)
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
+ else
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
+
+ /* cache window 1: stack */
+ if (!indirect) {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ } else {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ }
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
+
+ /* cache window 2: context */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
+
+ /* non-cache window */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_NONCACHE_SIZE0),
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)), 0, indirect);
+
+ /* VCN global tiling registers */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+}
+
+/**
+ * vcn_v5_0_1_disable_clock_gating - disable VCN clock gating
+ *
+ * @vinst: VCN instance
+ *
+ * Disable clock gating for VCN block
+ */
+static void vcn_v5_0_1_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
+{
+}
+
+/**
+ * vcn_v5_0_1_enable_clock_gating - enable VCN clock gating
+ *
+ * @vinst: VCN instance
+ *
+ * Enable clock gating for VCN block
+ */
+static void vcn_v5_0_1_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
+{
+}
+
+/**
+ * vcn_v5_0_1_pause_dpg_mode - VCN pause with dpg mode
+ *
+ * @vinst: VCN instance
+ * @new_state: pause state
+ *
+ * Pause dpg mode for VCN block
+ */
+static int vcn_v5_0_1_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ uint32_t reg_data = 0;
+ int vcn_inst;
+
+ vcn_inst = GET_INST(VCN, vinst->inst);
+
+ /* pause/unpause if state is changed */
+ if (vinst->pause_state.fw_based != new_state->fw_based) {
+ DRM_DEV_DEBUG(adev->dev, "dpg pause state changed %d -> %d %s\n",
+ vinst->pause_state.fw_based, new_state->fw_based,
+ new_state->fw_based ? "VCN_DPG_STATE__PAUSE" : "VCN_DPG_STATE__UNPAUSE");
+ reg_data = RREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE) &
+ (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
+ /* pause DPG */
+ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data);
+
+ /* wait for ACK */
+ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_DPG_PAUSE,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+ } else {
+ /* unpause DPG, no need to wait */
+ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data);
+ }
+ vinst->pause_state.fw_based = new_state->fw_based;
+ }
+
+ return 0;
+}
+
+
+/**
+ * vcn_v5_0_1_start_dpg_mode - VCN start with dpg mode
+ *
+ * @vinst: VCN instance
+ * @indirect: indirectly write sram
+ *
+ * Start VCN block with dpg mode
+ */
+static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ volatile struct amdgpu_vcn5_fw_shared *fw_shared =
+ adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ struct amdgpu_ring *ring;
+ struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__PAUSE};
+ int vcn_inst;
+ uint32_t tmp;
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+
+ /* disable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 1,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* enable dynamic power gating mode */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS);
+ tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS, tmp);
+
+ if (indirect) {
+ adev->vcn.inst[inst_idx].dpg_sram_curr_addr =
+ (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
+ /* Use dummy register 0xDEADBEEF passing AID selection to PSP FW */
+ WREG32_SOC24_DPG_MODE(inst_idx, 0xDEADBEEF,
+ adev->vcn.inst[inst_idx].aid_id, 0, true);
+ }
+
+ /* enable VCPU clock */
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK | UVD_VCPU_CNTL__BLK_RST_MASK;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* disable master interrupt */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MASTINT_EN), 0, 0, indirect);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ UVD_LMI_CTRL__CRC_RESET_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ 0x00100000L);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_CTRL), tmp, 0, indirect);
+
+ vcn_v5_0_1_mc_resume_dpg_mode(vinst, indirect);
+
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* enable LMI MC and UMC channels */
+ tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_CTRL2), tmp, 0, indirect);
+
+ /* enable master interrupt */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
+
+ if (indirect)
+ amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM);
+
+ /* resetting ring, fw should not check RB ring */
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+
+ /* Pause dpg */
+ vcn_v5_0_1_pause_dpg_mode(vinst, &state);
+
+ ring = &adev->vcn.inst[inst_idx].ring_enc[0];
+
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, ring->ring_size / sizeof(uint32_t));
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, tmp);
+ ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ /* resetting done, fw can check RB ring */
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+ /* Read DB_CTRL to flush the write DB_CTRL command. */
+ RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL);
+
+ return 0;
+}
+
+static int vcn_v5_0_1_start_sriov(struct amdgpu_device *adev)
+{
+ int i, vcn_inst;
+ struct amdgpu_ring *ring_enc;
+ uint64_t cache_addr;
+ uint64_t rb_enc_addr;
+ uint64_t ctx_addr;
+ uint32_t param, resp, expected;
+ uint32_t offset, cache_size;
+ uint32_t tmp, timeout;
+
+ struct amdgpu_mm_table *table = &adev->virt.mm_table;
+ uint32_t *table_loc;
+ uint32_t table_size;
+ uint32_t size, size_dw;
+ uint32_t init_status;
+ uint32_t enabled_vcn;
+
+ struct mmsch_v5_0_cmd_direct_write
+ direct_wt = { {0} };
+ struct mmsch_v5_0_cmd_direct_read_modify_write
+ direct_rd_mod_wt = { {0} };
+ struct mmsch_v5_0_cmd_end end = { {0} };
+ struct mmsch_v5_0_init_header header;
+
+ volatile struct amdgpu_vcn5_fw_shared *fw_shared;
+ volatile struct amdgpu_fw_shared_rb_setup *rb_setup;
+
+ direct_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_WRITE;
+ direct_rd_mod_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
+ end.cmd_header.command_type = MMSCH_COMMAND__END;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ vcn_inst = GET_INST(VCN, i);
+
+ vcn_v5_0_1_fw_shared_init(adev, vcn_inst);
+
+ memset(&header, 0, sizeof(struct mmsch_v5_0_init_header));
+ header.version = MMSCH_VERSION;
+ header.total_size = sizeof(struct mmsch_v5_0_init_header) >> 2;
+
+ table_loc = (uint32_t *)table->cpu_addr;
+ table_loc += header.total_size;
+
+ table_size = 0;
+
+ MMSCH_V5_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS),
+ ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
+
+ cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
+
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
+
+ offset = 0;
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_OFFSET0), 0);
+ } else {
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[i].gpu_addr));
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[i].gpu_addr));
+ offset = cache_size;
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_SIZE0),
+ cache_size);
+
+ cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset;
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(cache_addr));
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_OFFSET1), 0);
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE);
+
+ cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE;
+
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(cache_addr));
+
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
+
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_OFFSET2), 0);
+
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE);
+
+ fw_shared = adev->vcn.inst[vcn_inst].fw_shared.cpu_addr;
+ rb_setup = &fw_shared->rb_setup;
+
+ ring_enc = &adev->vcn.inst[vcn_inst].ring_enc[0];
+ ring_enc->wptr = 0;
+ rb_enc_addr = ring_enc->gpu_addr;
+
+ rb_setup->is_rb_enabled_flags |= RB_ENABLED;
+ rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
+ rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
+ rb_setup->rb_size = ring_enc->ring_size / 4;
+ fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
+
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_NONCACHE_SIZE0),
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
+ MMSCH_V5_0_INSERT_END();
+
+ header.vcn0.init_status = 0;
+ header.vcn0.table_offset = header.total_size;
+ header.vcn0.table_size = table_size;
+ header.total_size += table_size;
+
+ /* Send init table to mmsch */
+ size = sizeof(struct mmsch_v5_0_init_header);
+ table_loc = (uint32_t *)table->cpu_addr;
+ memcpy((void *)table_loc, &header, size);
+
+ ctx_addr = table->gpu_addr;
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID);
+ tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+ tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID, tmp);
+
+ size = header.total_size;
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_SIZE, size);
+
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP, 0);
+
+ param = 0x00000001;
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_HOST, param);
+ tmp = 0;
+ timeout = 1000;
+ resp = 0;
+ expected = MMSCH_VF_MAILBOX_RESP__OK;
+ while (resp != expected) {
+ resp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP);
+ if (resp != 0)
+ break;
+
+ udelay(10);
+ tmp = tmp + 10;
+ if (tmp >= timeout) {
+ DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
+ " waiting for regMMSCH_VF_MAILBOX_RESP "\
+ "(expected=0x%08x, readback=0x%08x)\n",
+ tmp, expected, resp);
+ return -EBUSY;
+ }
+ }
+
+ enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0;
+ init_status = ((struct mmsch_v5_0_init_header *)(table_loc))->vcn0.init_status;
+ if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
+ && init_status != MMSCH_VF_ENGINE_STATUS__PASS) {
+ DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\
+ "status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_start - VCN start
+ *
+ * @vinst: VCN instance
+ *
+ * Start VCN block
+ */
+static int vcn_v5_0_1_start(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ volatile struct amdgpu_vcn5_fw_shared *fw_shared;
+ struct amdgpu_ring *ring;
+ uint32_t tmp;
+ int j, k, r, vcn_inst;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return vcn_v5_0_1_start_dpg_mode(vinst, adev->vcn.inst[i].indirect_sram);
+
+ vcn_inst = GET_INST(VCN, i);
+
+ /* set VCN status busy */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, tmp);
+
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL, tmp |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ vcn_v5_0_1_mc_resume(vinst);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (j = 0; j < 10; ++j) {
+ uint32_t status;
+
+ for (k = 0; k < 100; ++k) {
+ status = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(100);
+ if (amdgpu_emu_mode == 1)
+ msleep(20);
+ }
+
+ if (amdgpu_emu_mode == 1) {
+ r = -1;
+ if (status & 2) {
+ r = 0;
+ break;
+ }
+ } else {
+ r = 0;
+ if (status & 2)
+ break;
+
+ dev_err(adev->dev,
+ "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i);
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ mdelay(10);
+ r = -1;
+ }
+ }
+
+ if (r) {
+ dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i);
+ return r;
+ }
+
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+
+ /* Read DB_CTRL to flush the write DB_CTRL command. */
+ RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL);
+
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, ring->ring_size / 4);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, tmp);
+ ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS);
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_stop_dpg_mode - VCN stop with dpg mode
+ *
+ * @vinst: VCN instance
+ *
+ * Stop VCN block with dpg mode
+ */
+static void vcn_v5_0_1_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t tmp;
+ int vcn_inst;
+ struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE};
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+
+ /* Unpause dpg */
+ vcn_v5_0_1_pause_dpg_mode(vinst, &state);
+
+ /* Wait for power status to be 1 */
+ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* wait for read ptr to be equal to write ptr */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
+ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_RB_RPTR, tmp, 0xFFFFFFFF);
+
+ /* disable dynamic power gating mode */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS);
+}
+
+/**
+ * vcn_v5_0_1_stop - VCN stop
+ *
+ * @vinst: VCN instance
+ *
+ * Stop VCN block
+ */
+static int vcn_v5_0_1_stop(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ volatile struct amdgpu_vcn5_fw_shared *fw_shared;
+ uint32_t tmp;
+ int r = 0, vcn_inst;
+
+ vcn_inst = GET_INST(VCN, i);
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ vcn_v5_0_1_stop_dpg_mode(vinst);
+ return 0;
+ }
+
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_STATUS, UVD_STATUS__IDLE, 0x7);
+ if (r)
+ return r;
+
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ return r;
+
+ /* disable LMI UMC channel */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2, tmp);
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ return r;
+
+ /* block VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* reset VCPU */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+
+ /* apply soft reset */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+
+ /* clear status */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, 0);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS);
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_unified_ring_get_rptr - get unified read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified read pointer
+ */
+static uint64_t vcn_v5_0_1_unified_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_RPTR);
+}
+
+/**
+ * vcn_v5_0_1_unified_ring_get_wptr - get unified write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified write pointer
+ */
+static uint64_t vcn_v5_0_1_unified_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR);
+}
+
+/**
+ * vcn_v5_0_1_unified_ring_set_wptr - set enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the enc write pointer to the hardware
+ */
+static void vcn_v5_0_1_unified_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR,
+ lower_32_bits(ring->wptr));
+ }
+}
+
+static const struct amdgpu_ring_funcs vcn_v5_0_1_unified_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_ENC,
+ .align_mask = 0x3f,
+ .nop = VCN_ENC_CMD_NO_OP,
+ .get_rptr = vcn_v5_0_1_unified_ring_get_rptr,
+ .get_wptr = vcn_v5_0_1_unified_ring_get_wptr,
+ .set_wptr = vcn_v5_0_1_unified_ring_set_wptr,
+ .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+ 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
+ 5 +
+ 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
+ 1, /* vcn_v2_0_enc_ring_insert_end */
+ .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
+ .emit_ib = vcn_v2_0_enc_ring_emit_ib,
+ .emit_fence = vcn_v2_0_enc_ring_emit_fence,
+ .emit_vm_flush = vcn_v4_0_3_enc_ring_emit_vm_flush,
+ .emit_hdp_flush = vcn_v4_0_3_ring_emit_hdp_flush,
+ .test_ring = amdgpu_vcn_enc_ring_test_ring,
+ .test_ib = amdgpu_vcn_unified_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .insert_end = vcn_v2_0_enc_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg,
+ .emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+/**
+ * vcn_v5_0_1_set_unified_ring_funcs - set unified ring functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set unified ring functions
+ */
+static void vcn_v5_0_1_set_unified_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, vcn_inst;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v5_0_1_unified_ring_vm_funcs;
+ adev->vcn.inst[i].ring_enc[0].me = i;
+ vcn_inst = GET_INST(VCN, i);
+ adev->vcn.inst[i].aid_id = vcn_inst / adev->vcn.num_inst_per_aid;
+ }
+}
+
+/**
+ * vcn_v5_0_1_is_idle - check VCN block is idle
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block structure
+ *
+ * Check whether VCN block is idle
+ */
+static bool vcn_v5_0_1_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, ret = 1;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ ret &= (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) == UVD_STATUS__IDLE);
+
+ return ret;
+}
+
+/**
+ * vcn_v5_0_1_wait_for_idle - wait for VCN block idle
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Wait for VCN block idle
+ */
+static int vcn_v5_0_1_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, ret = 0;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ret = SOC15_WAIT_ON_RREG(VCN, GET_INST(VCN, i), regUVD_STATUS, UVD_STATUS__IDLE,
+ UVD_STATUS__IDLE);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+/**
+ * vcn_v5_0_1_set_clockgating_state - set VCN block clockgating state
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ * @state: clock gating state
+ *
+ * Set VCN block clockgating state
+ */
+static int vcn_v5_0_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = state == AMD_CG_STATE_GATE;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ if (enable) {
+ if (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) != UVD_STATUS__IDLE)
+ return -EBUSY;
+ vcn_v5_0_1_enable_clock_gating(vinst);
+ } else {
+ vcn_v5_0_1_disable_clock_gating(vinst);
+ }
+ }
+
+ return 0;
+}
+
+static int vcn_v5_0_1_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int ret = 0;
+
+ /* for SRIOV, guest should not control VCN Power-gating
+ * MMSCH FW should control Power-gating and clock-gating
+ * guest should avoid touching CGC and PG
+ */
+ if (amdgpu_sriov_vf(adev)) {
+ vinst->cur_state = AMD_PG_STATE_UNGATE;
+ return 0;
+ }
+
+ if (state == vinst->cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = vcn_v5_0_1_stop(vinst);
+ else
+ ret = vcn_v5_0_1_start(vinst);
+
+ if (!ret)
+ vinst->cur_state = state;
+
+ return ret;
+}
+
+/**
+ * vcn_v5_0_1_process_interrupt - process VCN block interrupt
+ *
+ * @adev: amdgpu_device pointer
+ * @source: interrupt sources
+ * @entry: interrupt entry from clients and sources
+ *
+ * Process VCN block interrupt
+ */
+static int vcn_v5_0_1_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t i, inst;
+
+ i = node_id_to_phys_map[entry->node_id];
+
+ DRM_DEV_DEBUG(adev->dev, "IH: VCN TRAP\n");
+
+ for (inst = 0; inst < adev->vcn.num_vcn_inst; ++inst)
+ if (adev->vcn.inst[inst].aid_id == i)
+ break;
+ if (inst >= adev->vcn.num_vcn_inst) {
+ dev_WARN_ONCE(adev->dev, 1,
+ "Interrupt received for unknown VCN instance %d",
+ entry->node_id);
+ return 0;
+ }
+
+ switch (entry->src_id) {
+ case VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
+ amdgpu_fence_process(&adev->vcn.inst[inst].ring_enc[0]);
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static int vcn_v5_0_1_set_ras_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs vcn_v5_0_1_irq_funcs = {
+ .process = vcn_v5_0_1_process_interrupt,
+};
+
+static const struct amdgpu_irq_src_funcs vcn_v5_0_1_ras_irq_funcs = {
+ .set = vcn_v5_0_1_set_ras_interrupt_state,
+ .process = amdgpu_vcn_process_poison_irq,
+};
+
+
+/**
+ * vcn_v5_0_1_set_irq_funcs - set VCN block interrupt irq functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set VCN block interrupt irq functions
+ */
+static void vcn_v5_0_1_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ adev->vcn.inst->irq.num_types++;
+
+ adev->vcn.inst->irq.funcs = &vcn_v5_0_1_irq_funcs;
+
+ adev->vcn.inst->ras_poison_irq.num_types = 1;
+ adev->vcn.inst->ras_poison_irq.funcs = &vcn_v5_0_1_ras_irq_funcs;
+
+}
+
+static const struct amd_ip_funcs vcn_v5_0_1_ip_funcs = {
+ .name = "vcn_v5_0_1",
+ .early_init = vcn_v5_0_1_early_init,
+ .late_init = NULL,
+ .sw_init = vcn_v5_0_1_sw_init,
+ .sw_fini = vcn_v5_0_1_sw_fini,
+ .hw_init = vcn_v5_0_1_hw_init,
+ .hw_fini = vcn_v5_0_1_hw_fini,
+ .suspend = vcn_v5_0_1_suspend,
+ .resume = vcn_v5_0_1_resume,
+ .is_idle = vcn_v5_0_1_is_idle,
+ .wait_for_idle = vcn_v5_0_1_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = vcn_v5_0_1_set_clockgating_state,
+ .set_powergating_state = vcn_set_powergating_state,
+ .dump_ip_state = vcn_v5_0_0_dump_ip_state,
+ .print_ip_state = vcn_v5_0_0_print_ip_state,
+};
+
+const struct amdgpu_ip_block_version vcn_v5_0_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_VCN,
+ .major = 5,
+ .minor = 0,
+ .rev = 1,
+ .funcs = &vcn_v5_0_1_ip_funcs,
+};
+
+static uint32_t vcn_v5_0_1_query_poison_by_instance(struct amdgpu_device *adev,
+ uint32_t instance, uint32_t sub_block)
+{
+ uint32_t poison_stat = 0, reg_value = 0;
+
+ switch (sub_block) {
+ case AMDGPU_VCN_V5_0_1_VCPU_VCODEC:
+ reg_value = RREG32_SOC15(VCN, instance, regUVD_RAS_VCPU_VCODEC_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_VCPU_VCODEC_STATUS, POISONED_PF);
+ break;
+ default:
+ break;
+ }
+
+ if (poison_stat)
+ dev_info(adev->dev, "Poison detected in VCN%d, sub_block%d\n",
+ instance, sub_block);
+
+ return poison_stat;
+}
+
+static bool vcn_v5_0_1_query_poison_status(struct amdgpu_device *adev)
+{
+ uint32_t inst, sub;
+ uint32_t poison_stat = 0;
+
+ for (inst = 0; inst < adev->vcn.num_vcn_inst; inst++)
+ for (sub = 0; sub < AMDGPU_VCN_V5_0_1_MAX_SUB_BLOCK; sub++)
+ poison_stat +=
+ vcn_v5_0_1_query_poison_by_instance(adev, inst, sub);
+
+ return !!poison_stat;
+}
+
+static const struct amdgpu_ras_block_hw_ops vcn_v5_0_1_ras_hw_ops = {
+ .query_poison_status = vcn_v5_0_1_query_poison_status,
+};
+
+static int vcn_v5_0_1_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct aca_bank_info info;
+ u64 misc0;
+ int ret;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
+ 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
+ ACA_REG__MISC0__ERRCNT(misc0));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+/* reference to smu driver if header file */
+static int vcn_v5_0_1_err_codes[] = {
+ 14, 15, /* VCN */
+};
+
+static bool vcn_v5_0_1_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ u32 instlo;
+
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+
+ if (instlo != mmSMNAID_AID0_MCA_SMU)
+ return false;
+
+ if (aca_bank_check_error_codes(handle->adev, bank,
+ vcn_v5_0_1_err_codes,
+ ARRAY_SIZE(vcn_v5_0_1_err_codes)))
+ return false;
+
+ return true;
+}
+
+static const struct aca_bank_ops vcn_v5_0_1_aca_bank_ops = {
+ .aca_bank_parser = vcn_v5_0_1_aca_bank_parser,
+ .aca_bank_is_valid = vcn_v5_0_1_aca_bank_is_valid,
+};
+
+static const struct aca_info vcn_v5_0_1_aca_info = {
+ .hwip = ACA_HWIP_TYPE_SMU,
+ .mask = ACA_ERROR_UE_MASK,
+ .bank_ops = &vcn_v5_0_1_aca_bank_ops,
+};
+
+static int vcn_v5_0_1_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__VCN,
+ &vcn_v5_0_1_aca_info, NULL);
+ if (r)
+ goto late_fini;
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+
+ return r;
+}
+
+static struct amdgpu_vcn_ras vcn_v5_0_1_ras = {
+ .ras_block = {
+ .hw_ops = &vcn_v5_0_1_ras_hw_ops,
+ .ras_late_init = vcn_v5_0_1_ras_late_init,
+ },
+};
+
+static void vcn_v5_0_1_set_ras_funcs(struct amdgpu_device *adev)
+{
+ adev->vcn.ras = &vcn_v5_0_1_ras;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index f00d41be7fca..3e9e0f36cd3f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1170,13 +1170,12 @@ svm_range_split_head(struct svm_range *prange, uint64_t new_start,
}
static void
-svm_range_add_child(struct svm_range *prange, struct mm_struct *mm,
- struct svm_range *pchild, enum svm_work_list_ops op)
+svm_range_add_child(struct svm_range *prange, struct svm_range *pchild, enum svm_work_list_ops op)
{
pr_debug("add child 0x%p [0x%lx 0x%lx] to prange 0x%p child list %d\n",
pchild, pchild->start, pchild->last, prange, op);
- pchild->work_item.mm = mm;
+ pchild->work_item.mm = NULL;
pchild->work_item.op = op;
list_add_tail(&pchild->child_list, &prange->child_list);
}
@@ -2384,15 +2383,17 @@ svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange,
prange->work_item.op != SVM_OP_UNMAP_RANGE)
prange->work_item.op = op;
} else {
- prange->work_item.op = op;
-
- /* Pairs with mmput in deferred_list_work */
- mmget(mm);
- prange->work_item.mm = mm;
- list_add_tail(&prange->deferred_list,
- &prange->svms->deferred_range_list);
- pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n",
- prange, prange->start, prange->last, op);
+ /* Pairs with mmput in deferred_list_work.
+ * If process is exiting and mm is gone, don't update mmu notifier.
+ */
+ if (mmget_not_zero(mm)) {
+ prange->work_item.mm = mm;
+ prange->work_item.op = op;
+ list_add_tail(&prange->deferred_list,
+ &prange->svms->deferred_range_list);
+ pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n",
+ prange, prange->start, prange->last, op);
+ }
}
spin_unlock(&svms->deferred_list_lock);
}
@@ -2406,8 +2407,7 @@ void schedule_deferred_list_work(struct svm_range_list *svms)
}
static void
-svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent,
- struct svm_range *prange, unsigned long start,
+svm_range_unmap_split(struct svm_range *parent, struct svm_range *prange, unsigned long start,
unsigned long last)
{
struct svm_range *head;
@@ -2428,12 +2428,12 @@ svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent,
svm_range_split(tail, last + 1, tail->last, &head);
if (head != prange && tail != prange) {
- svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE);
- svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE);
+ svm_range_add_child(parent, head, SVM_OP_UNMAP_RANGE);
+ svm_range_add_child(parent, tail, SVM_OP_ADD_RANGE);
} else if (tail != prange) {
- svm_range_add_child(parent, mm, tail, SVM_OP_UNMAP_RANGE);
+ svm_range_add_child(parent, tail, SVM_OP_UNMAP_RANGE);
} else if (head != prange) {
- svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE);
+ svm_range_add_child(parent, head, SVM_OP_UNMAP_RANGE);
} else if (parent != prange) {
prange->work_item.op = SVM_OP_UNMAP_RANGE;
}
@@ -2510,14 +2510,14 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
l = min(last, pchild->last);
if (l >= s)
svm_range_unmap_from_gpus(pchild, s, l, trigger);
- svm_range_unmap_split(mm, prange, pchild, start, last);
+ svm_range_unmap_split(prange, pchild, start, last);
mutex_unlock(&pchild->lock);
}
s = max(start, prange->start);
l = min(last, prange->last);
if (l >= s)
svm_range_unmap_from_gpus(prange, s, l, trigger);
- svm_range_unmap_split(mm, prange, prange, start, last);
+ svm_range_unmap_split(prange, prange, start, last);
if (unmap_parent)
svm_range_add_list_work(svms, prange, mm, SVM_OP_UNMAP_RANGE);
@@ -2560,8 +2560,6 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
if (range->event == MMU_NOTIFY_RELEASE)
return true;
- if (!mmget_not_zero(mni->mm))
- return true;
start = mni->interval_tree.start;
last = mni->interval_tree.last;
@@ -2588,7 +2586,6 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
}
svm_range_unlock(prange);
- mmput(mni->mm);
return true;
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
index 2ac56e79df05..9a31e5da3687 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
@@ -731,7 +731,16 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
* support programmable degamma anywhere.
*/
is_dcn = dm->adev->dm.dc->caps.color.dpp.dcn_arch;
- drm_crtc_enable_color_mgmt(&acrtc->base, is_dcn ? MAX_COLOR_LUT_ENTRIES : 0,
+ /* Dont't enable DRM CRTC degamma property for DCN401 since the
+ * pre-blending degamma LUT doesn't apply to cursor, and therefore
+ * can't work similar to a post-blending degamma LUT as in other hw
+ * versions.
+ * TODO: revisit it once KMS plane color API is merged.
+ */
+ drm_crtc_enable_color_mgmt(&acrtc->base,
+ (is_dcn &&
+ dm->adev->dm.dc->ctx->dce_version != DCN_VERSION_4_01) ?
+ MAX_COLOR_LUT_ENTRIES : 0,
true, MAX_COLOR_LUT_ENTRIES);
drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES);
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c
index 313e52997596..2ee034879f9f 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c
@@ -1700,7 +1700,7 @@ struct clk_mgr_internal *dcn401_clk_mgr_construct(
clk_mgr->base.bw_params = kzalloc(sizeof(*clk_mgr->base.bw_params), GFP_KERNEL);
if (!clk_mgr->base.bw_params) {
BREAK_TO_DEBUGGER();
- kfree(clk_mgr);
+ kfree(clk_mgr401);
return NULL;
}
@@ -1711,6 +1711,7 @@ struct clk_mgr_internal *dcn401_clk_mgr_construct(
if (!clk_mgr->wm_range_table) {
BREAK_TO_DEBUGGER();
kfree(clk_mgr->base.bw_params);
+ kfree(clk_mgr401);
return NULL;
}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index ca446e08f6a2..21aff7fa6375 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -1019,8 +1019,22 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context,
if (pipe_ctx->plane_res.dpp || pipe_ctx->stream_res.opp)
update_state->pg_pipe_res_update[PG_MPCC][pipe_ctx->plane_res.mpcc_inst] = false;
- if (pipe_ctx->stream_res.dsc)
+ if (pipe_ctx->stream_res.dsc) {
update_state->pg_pipe_res_update[PG_DSC][pipe_ctx->stream_res.dsc->inst] = false;
+ if (dc->caps.sequential_ono) {
+ update_state->pg_pipe_res_update[PG_HUBP][pipe_ctx->stream_res.dsc->inst] = false;
+ update_state->pg_pipe_res_update[PG_DPP][pipe_ctx->stream_res.dsc->inst] = false;
+
+ /* All HUBP/DPP instances must be powered if the DSC inst != HUBP inst */
+ if (!pipe_ctx->top_pipe && pipe_ctx->plane_res.hubp &&
+ pipe_ctx->plane_res.hubp->inst != pipe_ctx->stream_res.dsc->inst) {
+ for (j = 0; j < dc->res_pool->pipe_count; ++j) {
+ update_state->pg_pipe_res_update[PG_HUBP][j] = false;
+ update_state->pg_pipe_res_update[PG_DPP][j] = false;
+ }
+ }
+ }
+ }
if (pipe_ctx->stream_res.opp)
update_state->pg_pipe_res_update[PG_OPP][pipe_ctx->stream_res.opp->inst] = false;
@@ -1165,6 +1179,25 @@ void dcn35_calc_blocks_to_ungate(struct dc *dc, struct dc_state *context,
update_state->pg_pipe_res_update[PG_HDMISTREAM][0] = true;
if (dc->caps.sequential_ono) {
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (new_pipe->stream_res.dsc && !new_pipe->top_pipe &&
+ update_state->pg_pipe_res_update[PG_DSC][new_pipe->stream_res.dsc->inst]) {
+ update_state->pg_pipe_res_update[PG_HUBP][new_pipe->stream_res.dsc->inst] = true;
+ update_state->pg_pipe_res_update[PG_DPP][new_pipe->stream_res.dsc->inst] = true;
+
+ /* All HUBP/DPP instances must be powered if the DSC inst != HUBP inst */
+ if (new_pipe->plane_res.hubp &&
+ new_pipe->plane_res.hubp->inst != new_pipe->stream_res.dsc->inst) {
+ for (j = 0; j < dc->res_pool->pipe_count; ++j) {
+ update_state->pg_pipe_res_update[PG_HUBP][j] = true;
+ update_state->pg_pipe_res_update[PG_DPP][j] = true;
+ }
+ }
+ }
+ }
+
for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) {
if (update_state->pg_pipe_res_update[PG_HUBP][i] &&
update_state->pg_pipe_res_update[PG_DPP][i]) {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index 4f78c84da780..c5bca3019de0 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -58,6 +58,7 @@
MODULE_FIRMWARE("amdgpu/aldebaran_smc.bin");
MODULE_FIRMWARE("amdgpu/smu_13_0_0.bin");
+MODULE_FIRMWARE("amdgpu/smu_13_0_0_kicker.bin");
MODULE_FIRMWARE("amdgpu/smu_13_0_7.bin");
MODULE_FIRMWARE("amdgpu/smu_13_0_10.bin");
@@ -92,7 +93,7 @@ const int pmfw_decoded_link_width[7] = {0, 1, 2, 4, 8, 12, 16};
int smu_v13_0_init_microcode(struct smu_context *smu)
{
struct amdgpu_device *adev = smu->adev;
- char ucode_prefix[15];
+ char ucode_prefix[30];
int err = 0;
const struct smc_firmware_header_v1_0 *hdr;
const struct common_firmware_header *header;
@@ -103,7 +104,10 @@ int smu_v13_0_init_microcode(struct smu_context *smu)
return 0;
amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, sizeof(ucode_prefix));
- err = amdgpu_ucode_request(adev, &adev->pm.fw, "amdgpu/%s.bin", ucode_prefix);
+ if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->pm.fw, "amdgpu/%s_kicker.bin", ucode_prefix);
+ else
+ err = amdgpu_ucode_request(adev, &adev->pm.fw, "amdgpu/%s.bin", ucode_prefix);
if (err)
goto out;
diff --git a/drivers/gpu/drm/bridge/aux-hpd-bridge.c b/drivers/gpu/drm/bridge/aux-hpd-bridge.c
index 6886db2d9e00..8e889a38fad0 100644
--- a/drivers/gpu/drm/bridge/aux-hpd-bridge.c
+++ b/drivers/gpu/drm/bridge/aux-hpd-bridge.c
@@ -64,10 +64,11 @@ struct auxiliary_device *devm_drm_dp_hpd_bridge_alloc(struct device *parent, str
adev->id = ret;
adev->name = "dp_hpd_bridge";
adev->dev.parent = parent;
- adev->dev.of_node = of_node_get(parent->of_node);
adev->dev.release = drm_aux_hpd_bridge_release;
adev->dev.platform_data = of_node_get(np);
+ device_set_of_node_from_dev(&adev->dev, parent);
+
ret = auxiliary_device_init(adev);
if (ret) {
of_node_put(adev->dev.platform_data);
diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
index 5500767cda7e..4d17d1e1c38b 100644
--- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c
+++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
@@ -1352,7 +1352,7 @@ static int ti_sn_bridge_probe(struct auxiliary_device *adev,
regmap_update_bits(pdata->regmap, SN_HPD_DISABLE_REG,
HPD_DISABLE, 0);
mutex_unlock(&pdata->comms_mutex);
- };
+ }
drm_bridge_add(&pdata->bridge);
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index ca42e6081d27..16dea3f2fb11 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -401,6 +401,49 @@ drm_get_buddy(struct drm_buddy_block *block)
EXPORT_SYMBOL(drm_get_buddy);
/**
+ * drm_buddy_reset_clear - reset blocks clear state
+ *
+ * @mm: DRM buddy manager
+ * @is_clear: blocks clear state
+ *
+ * Reset the clear state based on @is_clear value for each block
+ * in the freelist.
+ */
+void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear)
+{
+ u64 root_size, size, start;
+ unsigned int order;
+ int i;
+
+ size = mm->size;
+ for (i = 0; i < mm->n_roots; ++i) {
+ order = ilog2(size) - ilog2(mm->chunk_size);
+ start = drm_buddy_block_offset(mm->roots[i]);
+ __force_merge(mm, start, start + size, order);
+
+ root_size = mm->chunk_size << order;
+ size -= root_size;
+ }
+
+ for (i = 0; i <= mm->max_order; ++i) {
+ struct drm_buddy_block *block;
+
+ list_for_each_entry_reverse(block, &mm->free_list[i], link) {
+ if (is_clear != drm_buddy_block_is_clear(block)) {
+ if (is_clear) {
+ mark_cleared(block);
+ mm->clear_avail += drm_buddy_block_size(mm, block);
+ } else {
+ clear_reset(block);
+ mm->clear_avail -= drm_buddy_block_size(mm, block);
+ }
+ }
+ }
+ }
+}
+EXPORT_SYMBOL(drm_buddy_reset_clear);
+
+/**
* drm_buddy_free_block - free a block
*
* @mm: DRM buddy manager
diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c
index 888aadb6a4ac..d6550b54fac1 100644
--- a/drivers/gpu/drm/drm_framebuffer.c
+++ b/drivers/gpu/drm/drm_framebuffer.c
@@ -860,11 +860,23 @@ void drm_framebuffer_free(struct kref *kref)
int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb,
const struct drm_framebuffer_funcs *funcs)
{
+ unsigned int i;
int ret;
+ bool exists;
if (WARN_ON_ONCE(fb->dev != dev || !fb->format))
return -EINVAL;
+ for (i = 0; i < fb->format->num_planes; i++) {
+ if (drm_WARN_ON_ONCE(dev, fb->internal_flags & DRM_FRAMEBUFFER_HAS_HANDLE_REF(i)))
+ fb->internal_flags &= ~DRM_FRAMEBUFFER_HAS_HANDLE_REF(i);
+ if (fb->obj[i]) {
+ exists = drm_gem_object_handle_get_if_exists_unlocked(fb->obj[i]);
+ if (exists)
+ fb->internal_flags |= DRM_FRAMEBUFFER_HAS_HANDLE_REF(i);
+ }
+ }
+
INIT_LIST_HEAD(&fb->filp_head);
fb->funcs = funcs;
@@ -873,7 +885,7 @@ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb,
ret = __drm_mode_object_add(dev, &fb->base, DRM_MODE_OBJECT_FB,
false, drm_framebuffer_free);
if (ret)
- goto out;
+ goto err;
mutex_lock(&dev->mode_config.fb_lock);
dev->mode_config.num_fb++;
@@ -881,7 +893,16 @@ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb,
mutex_unlock(&dev->mode_config.fb_lock);
drm_mode_object_register(dev, &fb->base);
-out:
+
+ return 0;
+
+err:
+ for (i = 0; i < fb->format->num_planes; i++) {
+ if (fb->internal_flags & DRM_FRAMEBUFFER_HAS_HANDLE_REF(i)) {
+ drm_gem_object_handle_put_unlocked(fb->obj[i]);
+ fb->internal_flags &= ~DRM_FRAMEBUFFER_HAS_HANDLE_REF(i);
+ }
+ }
return ret;
}
EXPORT_SYMBOL(drm_framebuffer_init);
@@ -958,6 +979,12 @@ EXPORT_SYMBOL(drm_framebuffer_unregister_private);
void drm_framebuffer_cleanup(struct drm_framebuffer *fb)
{
struct drm_device *dev = fb->dev;
+ unsigned int i;
+
+ for (i = 0; i < fb->format->num_planes; i++) {
+ if (fb->internal_flags & DRM_FRAMEBUFFER_HAS_HANDLE_REF(i))
+ drm_gem_object_handle_put_unlocked(fb->obj[i]);
+ }
mutex_lock(&dev->mode_config.fb_lock);
list_del(&fb->head);
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 426d0867882d..9e8a4da313a0 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -186,6 +186,46 @@ void drm_gem_private_object_fini(struct drm_gem_object *obj)
}
EXPORT_SYMBOL(drm_gem_private_object_fini);
+static void drm_gem_object_handle_get(struct drm_gem_object *obj)
+{
+ struct drm_device *dev = obj->dev;
+
+ drm_WARN_ON(dev, !mutex_is_locked(&dev->object_name_lock));
+
+ if (obj->handle_count++ == 0)
+ drm_gem_object_get(obj);
+}
+
+/**
+ * drm_gem_object_handle_get_if_exists_unlocked - acquire reference on user-space handle, if any
+ * @obj: GEM object
+ *
+ * Acquires a reference on the GEM buffer object's handle. Required to keep
+ * the GEM object alive. Call drm_gem_object_handle_put_if_exists_unlocked()
+ * to release the reference. Does nothing if the buffer object has no handle.
+ *
+ * Returns:
+ * True if a handle exists, or false otherwise
+ */
+bool drm_gem_object_handle_get_if_exists_unlocked(struct drm_gem_object *obj)
+{
+ struct drm_device *dev = obj->dev;
+
+ guard(mutex)(&dev->object_name_lock);
+
+ /*
+ * First ref taken during GEM object creation, if any. Some
+ * drivers set up internal framebuffers with GEM objects that
+ * do not have a GEM handle. Hence, this counter can be zero.
+ */
+ if (!obj->handle_count)
+ return false;
+
+ drm_gem_object_handle_get(obj);
+
+ return true;
+}
+
/**
* drm_gem_object_handle_free - release resources bound to userspace handles
* @obj: GEM object to clean up.
@@ -216,20 +256,26 @@ static void drm_gem_object_exported_dma_buf_free(struct drm_gem_object *obj)
}
}
-static void
-drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj)
+/**
+ * drm_gem_object_handle_put_unlocked - releases reference on user-space handle
+ * @obj: GEM object
+ *
+ * Releases a reference on the GEM buffer object's handle. Possibly releases
+ * the GEM buffer object and associated dma-buf objects.
+ */
+void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj)
{
struct drm_device *dev = obj->dev;
bool final = false;
- if (WARN_ON(READ_ONCE(obj->handle_count) == 0))
+ if (drm_WARN_ON(dev, READ_ONCE(obj->handle_count) == 0))
return;
/*
- * Must bump handle count first as this may be the last
- * ref, in which case the object would disappear before we
- * checked for a name
- */
+ * Must bump handle count first as this may be the last
+ * ref, in which case the object would disappear before
+ * we checked for a name.
+ */
mutex_lock(&dev->object_name_lock);
if (--obj->handle_count == 0) {
@@ -253,6 +299,9 @@ drm_gem_object_release_handle(int id, void *ptr, void *data)
struct drm_file *file_priv = data;
struct drm_gem_object *obj = ptr;
+ if (drm_WARN_ON(obj->dev, !data))
+ return 0;
+
if (obj->funcs->close)
obj->funcs->close(obj, file_priv);
@@ -363,8 +412,8 @@ drm_gem_handle_create_tail(struct drm_file *file_priv,
int ret;
WARN_ON(!mutex_is_locked(&dev->object_name_lock));
- if (obj->handle_count++ == 0)
- drm_gem_object_get(obj);
+
+ drm_gem_object_handle_get(obj);
/*
* Get the user-visible handle using idr. Preload and perform
@@ -373,7 +422,7 @@ drm_gem_handle_create_tail(struct drm_file *file_priv,
idr_preload(GFP_KERNEL);
spin_lock(&file_priv->table_lock);
- ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_NOWAIT);
+ ret = idr_alloc(&file_priv->object_idr, NULL, 1, 0, GFP_NOWAIT);
spin_unlock(&file_priv->table_lock);
idr_preload_end();
@@ -394,6 +443,11 @@ drm_gem_handle_create_tail(struct drm_file *file_priv,
goto err_revoke;
}
+ /* mirrors drm_gem_handle_delete to avoid races */
+ spin_lock(&file_priv->table_lock);
+ obj = idr_replace(&file_priv->object_idr, obj, handle);
+ WARN_ON(obj != NULL);
+ spin_unlock(&file_priv->table_lock);
*handlep = handle;
return 0;
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 1705bfc90b1e..98b73c581c42 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -153,6 +153,8 @@ void drm_sysfs_lease_event(struct drm_device *dev);
/* drm_gem.c */
int drm_gem_init(struct drm_device *dev);
+bool drm_gem_object_handle_get_if_exists_unlocked(struct drm_gem_object *obj);
+void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj);
int drm_gem_handle_create_tail(struct drm_file *file_priv,
struct drm_gem_object *obj,
u32 *handlep);
diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
index 0d185c0564b9..9eeba254cf45 100644
--- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
@@ -601,6 +601,10 @@ static irqreturn_t decon_irq_handler(int irq, void *dev_id)
if (!ctx->drm_dev)
goto out;
+ /* check if crtc and vblank have been initialized properly */
+ if (!drm_dev_has_vblank(ctx->drm_dev))
+ goto out;
+
if (!ctx->i80_if) {
drm_crtc_handle_vblank(&ctx->crtc->base);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index f57df8c48139..05e4a5a63f5d 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -187,6 +187,7 @@ struct fimd_context {
u32 i80ifcon;
bool i80_if;
bool suspended;
+ bool dp_clk_enabled;
wait_queue_head_t wait_vsync_queue;
atomic_t wait_vsync_event;
atomic_t win_updated;
@@ -1047,7 +1048,18 @@ static void fimd_dp_clock_enable(struct exynos_drm_clk *clk, bool enable)
struct fimd_context *ctx = container_of(clk, struct fimd_context,
dp_clk);
u32 val = enable ? DP_MIE_CLK_DP_ENABLE : DP_MIE_CLK_DISABLE;
+
+ if (enable == ctx->dp_clk_enabled)
+ return;
+
+ if (enable)
+ pm_runtime_resume_and_get(ctx->dev);
+
+ ctx->dp_clk_enabled = enable;
writel(val, ctx->regs + DP_MIE_CLKCON);
+
+ if (!enable)
+ pm_runtime_put(ctx->dev);
}
static const struct exynos_drm_crtc_ops fimd_crtc_ops = {
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 45cca965c11b..af80f1ac8880 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -1506,6 +1506,12 @@ int intel_dp_rate_select(struct intel_dp *intel_dp, int rate)
void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock,
u8 *link_bw, u8 *rate_select)
{
+ struct drm_i915_private *i915 = dp_to_i915(intel_dp);
+
+ /* FIXME g4x can't generate an exact 2.7GHz with the 96MHz non-SSC refclk */
+ if (IS_G4X(i915) && port_clock == 268800)
+ port_clock = 270000;
+
/* eDP 1.4 rate select method. */
if (intel_dp->use_rate_select) {
*link_bw = 0;
@@ -4300,6 +4306,24 @@ intel_dp_mst_disconnect(struct intel_dp *intel_dp)
static bool
intel_dp_get_sink_irq_esi(struct intel_dp *intel_dp, u8 *esi)
{
+ struct intel_display *display = to_intel_display(intel_dp);
+ struct drm_i915_private *i915 = dp_to_i915(intel_dp);
+
+ /*
+ * Display WA for HSD #13013007775: mtl/arl/lnl
+ * Read the sink count and link service IRQ registers in separate
+ * transactions to prevent disconnecting the sink on a TBT link
+ * inadvertently.
+ */
+ if (IS_DISPLAY_VER(display, 14, 20) && !IS_BATTLEMAGE(i915)) {
+ if (drm_dp_dpcd_read(&intel_dp->aux, DP_SINK_COUNT_ESI, esi, 3) != 3)
+ return false;
+
+ /* DP_SINK_COUNT_ESI + 3 == DP_LINK_SERVICE_IRQ_VECTOR_ESI0 */
+ return drm_dp_dpcd_readb(&intel_dp->aux, DP_LINK_SERVICE_IRQ_VECTOR_ESI0,
+ &esi[3]) == 1;
+ }
+
return drm_dp_dpcd_read(&intel_dp->aux, DP_SINK_COUNT_ESI, esi, 4) == 4;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.c b/drivers/gpu/drm/i915/gt/intel_gsc.c
index 1e925c75fb08..c43febc862dc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gsc.c
+++ b/drivers/gpu/drm/i915/gt/intel_gsc.c
@@ -284,7 +284,7 @@ static void gsc_irq_handler(struct intel_gt *gt, unsigned int intf_id)
if (gt->gsc.intf[intf_id].irq < 0)
return;
- ret = generic_handle_irq(gt->gsc.intf[intf_id].irq);
+ ret = generic_handle_irq_safe(gt->gsc.intf[intf_id].irq);
if (ret)
gt_err_ratelimited(gt, "error handling GSC irq: %d\n", ret);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 72277bc8322e..f84fa09cdb33 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -575,7 +575,6 @@ static int ring_context_alloc(struct intel_context *ce)
/* One ringbuffer to rule them all */
GEM_BUG_ON(!engine->legacy.ring);
ce->ring = engine->legacy.ring;
- ce->timeline = intel_timeline_get(engine->legacy.timeline);
GEM_BUG_ON(ce->state);
if (engine->context_size) {
@@ -588,6 +587,8 @@ static int ring_context_alloc(struct intel_context *ce)
ce->state = vma;
}
+ ce->timeline = intel_timeline_get(engine->legacy.timeline);
+
return 0;
}
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index acae30a04a94..0122719ee921 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -73,8 +73,8 @@ static int igt_add_request(void *arg)
/* Basic preliminary test to create a request and let it loose! */
request = mock_request(rcs0(i915)->kernel_context, HZ / 10);
- if (!request)
- return -ENOMEM;
+ if (IS_ERR(request))
+ return PTR_ERR(request);
i915_request_add(request);
@@ -91,8 +91,8 @@ static int igt_wait_request(void *arg)
/* Submit a request, then wait upon it */
request = mock_request(rcs0(i915)->kernel_context, T);
- if (!request)
- return -ENOMEM;
+ if (IS_ERR(request))
+ return PTR_ERR(request);
i915_request_get(request);
@@ -160,8 +160,8 @@ static int igt_fence_wait(void *arg)
/* Submit a request, treat it as a fence and wait upon it */
request = mock_request(rcs0(i915)->kernel_context, T);
- if (!request)
- return -ENOMEM;
+ if (IS_ERR(request))
+ return PTR_ERR(request);
if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) {
pr_err("fence wait success before submit (expected timeout)!\n");
@@ -219,8 +219,8 @@ static int igt_request_rewind(void *arg)
GEM_BUG_ON(IS_ERR(ce));
request = mock_request(ce, 2 * HZ);
intel_context_put(ce);
- if (!request) {
- err = -ENOMEM;
+ if (IS_ERR(request)) {
+ err = PTR_ERR(request);
goto err_context_0;
}
@@ -237,8 +237,8 @@ static int igt_request_rewind(void *arg)
GEM_BUG_ON(IS_ERR(ce));
vip = mock_request(ce, 0);
intel_context_put(ce);
- if (!vip) {
- err = -ENOMEM;
+ if (IS_ERR(vip)) {
+ err = PTR_ERR(vip);
goto err_context_1;
}
diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c
index 09f747228dff..1b0cf073e964 100644
--- a/drivers/gpu/drm/i915/selftests/mock_request.c
+++ b/drivers/gpu/drm/i915/selftests/mock_request.c
@@ -35,7 +35,7 @@ mock_request(struct intel_context *ce, unsigned long delay)
/* NB the i915->requests slab cache is enlarged to fit mock_request */
request = intel_context_create_request(ce);
if (IS_ERR(request))
- return NULL;
+ return request;
request->mock.delay = delay;
return request;
diff --git a/drivers/gpu/drm/imagination/pvr_power.c b/drivers/gpu/drm/imagination/pvr_power.c
index ba7816fd28ec..850b318605da 100644
--- a/drivers/gpu/drm/imagination/pvr_power.c
+++ b/drivers/gpu/drm/imagination/pvr_power.c
@@ -363,13 +363,13 @@ pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset)
if (!err) {
if (hard_reset) {
pvr_dev->fw_dev.booted = false;
- WARN_ON(pm_runtime_force_suspend(from_pvr_device(pvr_dev)->dev));
+ WARN_ON(pvr_power_device_suspend(from_pvr_device(pvr_dev)->dev));
err = pvr_fw_hard_reset(pvr_dev);
if (err)
goto err_device_lost;
- err = pm_runtime_force_resume(from_pvr_device(pvr_dev)->dev);
+ err = pvr_power_device_resume(from_pvr_device(pvr_dev)->dev);
pvr_dev->fw_dev.booted = true;
if (err)
goto err_device_lost;
diff --git a/drivers/gpu/drm/mediatek/mtk_crtc.c b/drivers/gpu/drm/mediatek/mtk_crtc.c
index 8f6fba4217ec..bc7527542fdc 100644
--- a/drivers/gpu/drm/mediatek/mtk_crtc.c
+++ b/drivers/gpu/drm/mediatek/mtk_crtc.c
@@ -719,6 +719,39 @@ int mtk_crtc_plane_check(struct drm_crtc *crtc, struct drm_plane *plane,
return 0;
}
+void mtk_crtc_plane_disable(struct drm_crtc *crtc, struct drm_plane *plane)
+{
+#if IS_REACHABLE(CONFIG_MTK_CMDQ)
+ struct mtk_crtc *mtk_crtc = to_mtk_crtc(crtc);
+ struct mtk_plane_state *plane_state = to_mtk_plane_state(plane->state);
+ int i;
+
+ /* no need to wait for disabling the plane by CPU */
+ if (!mtk_crtc->cmdq_client.chan)
+ return;
+
+ if (!mtk_crtc->enabled)
+ return;
+
+ /* set pending plane state to disabled */
+ for (i = 0; i < mtk_crtc->layer_nr; i++) {
+ struct drm_plane *mtk_plane = &mtk_crtc->planes[i];
+ struct mtk_plane_state *mtk_plane_state = to_mtk_plane_state(mtk_plane->state);
+
+ if (mtk_plane->index == plane->index) {
+ memcpy(mtk_plane_state, plane_state, sizeof(*plane_state));
+ break;
+ }
+ }
+ mtk_crtc_update_config(mtk_crtc, false);
+
+ /* wait for planes to be disabled by CMDQ */
+ wait_event_timeout(mtk_crtc->cb_blocking_queue,
+ mtk_crtc->cmdq_vblank_cnt == 0,
+ msecs_to_jiffies(500));
+#endif
+}
+
void mtk_crtc_async_update(struct drm_crtc *crtc, struct drm_plane *plane,
struct drm_atomic_state *state)
{
@@ -930,7 +963,8 @@ static int mtk_crtc_init_comp_planes(struct drm_device *drm_dev,
mtk_ddp_comp_supported_rotations(comp),
mtk_ddp_comp_get_blend_modes(comp),
mtk_ddp_comp_get_formats(comp),
- mtk_ddp_comp_get_num_formats(comp), i);
+ mtk_ddp_comp_get_num_formats(comp),
+ mtk_ddp_comp_is_afbc_supported(comp), i);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/mediatek/mtk_crtc.h b/drivers/gpu/drm/mediatek/mtk_crtc.h
index 388e900b6f4d..828f109b83e7 100644
--- a/drivers/gpu/drm/mediatek/mtk_crtc.h
+++ b/drivers/gpu/drm/mediatek/mtk_crtc.h
@@ -21,6 +21,7 @@ int mtk_crtc_create(struct drm_device *drm_dev, const unsigned int *path,
unsigned int num_conn_routes);
int mtk_crtc_plane_check(struct drm_crtc *crtc, struct drm_plane *plane,
struct mtk_plane_state *state);
+void mtk_crtc_plane_disable(struct drm_crtc *crtc, struct drm_plane *plane);
void mtk_crtc_async_update(struct drm_crtc *crtc, struct drm_plane *plane,
struct drm_atomic_state *plane_state);
struct device *mtk_crtc_dma_dev_get(struct drm_crtc *crtc);
diff --git a/drivers/gpu/drm/mediatek/mtk_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_ddp_comp.c
index edc6417639e6..ac6620e10262 100644
--- a/drivers/gpu/drm/mediatek/mtk_ddp_comp.c
+++ b/drivers/gpu/drm/mediatek/mtk_ddp_comp.c
@@ -366,6 +366,7 @@ static const struct mtk_ddp_comp_funcs ddp_ovl = {
.get_blend_modes = mtk_ovl_get_blend_modes,
.get_formats = mtk_ovl_get_formats,
.get_num_formats = mtk_ovl_get_num_formats,
+ .is_afbc_supported = mtk_ovl_is_afbc_supported,
};
static const struct mtk_ddp_comp_funcs ddp_postmask = {
diff --git a/drivers/gpu/drm/mediatek/mtk_ddp_comp.h b/drivers/gpu/drm/mediatek/mtk_ddp_comp.h
index 39720b27f4e9..7289b3dcf22f 100644
--- a/drivers/gpu/drm/mediatek/mtk_ddp_comp.h
+++ b/drivers/gpu/drm/mediatek/mtk_ddp_comp.h
@@ -83,6 +83,7 @@ struct mtk_ddp_comp_funcs {
u32 (*get_blend_modes)(struct device *dev);
const u32 *(*get_formats)(struct device *dev);
size_t (*get_num_formats)(struct device *dev);
+ bool (*is_afbc_supported)(struct device *dev);
void (*connect)(struct device *dev, struct device *mmsys_dev, unsigned int next);
void (*disconnect)(struct device *dev, struct device *mmsys_dev, unsigned int next);
void (*add)(struct device *dev, struct mtk_mutex *mutex);
@@ -294,6 +295,14 @@ size_t mtk_ddp_comp_get_num_formats(struct mtk_ddp_comp *comp)
return 0;
}
+static inline bool mtk_ddp_comp_is_afbc_supported(struct mtk_ddp_comp *comp)
+{
+ if (comp->funcs && comp->funcs->is_afbc_supported)
+ return comp->funcs->is_afbc_supported(comp->dev);
+
+ return false;
+}
+
static inline bool mtk_ddp_comp_add(struct mtk_ddp_comp *comp, struct mtk_mutex *mutex)
{
if (comp->funcs && comp->funcs->add) {
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_drv.h b/drivers/gpu/drm/mediatek/mtk_disp_drv.h
index 04154db9085c..c0f7f77e0574 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_drv.h
+++ b/drivers/gpu/drm/mediatek/mtk_disp_drv.h
@@ -106,6 +106,7 @@ void mtk_ovl_disable_vblank(struct device *dev);
u32 mtk_ovl_get_blend_modes(struct device *dev);
const u32 *mtk_ovl_get_formats(struct device *dev);
size_t mtk_ovl_get_num_formats(struct device *dev);
+bool mtk_ovl_is_afbc_supported(struct device *dev);
void mtk_ovl_adaptor_add_comp(struct device *dev, struct mtk_mutex *mutex);
void mtk_ovl_adaptor_remove_comp(struct device *dev, struct mtk_mutex *mutex);
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
index 19b0d5083981..ca4a9a60b890 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
@@ -236,6 +236,13 @@ size_t mtk_ovl_get_num_formats(struct device *dev)
return ovl->data->num_formats;
}
+bool mtk_ovl_is_afbc_supported(struct device *dev)
+{
+ struct mtk_disp_ovl *ovl = dev_get_drvdata(dev);
+
+ return ovl->data->supports_afbc;
+}
+
int mtk_ovl_clk_enable(struct device *dev)
{
struct mtk_disp_ovl *ovl = dev_get_drvdata(dev);
diff --git a/drivers/gpu/drm/mediatek/mtk_plane.c b/drivers/gpu/drm/mediatek/mtk_plane.c
index 8a48b3b0a956..74c2704efb66 100644
--- a/drivers/gpu/drm/mediatek/mtk_plane.c
+++ b/drivers/gpu/drm/mediatek/mtk_plane.c
@@ -285,9 +285,14 @@ static void mtk_plane_atomic_disable(struct drm_plane *plane,
struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state,
plane);
struct mtk_plane_state *mtk_plane_state = to_mtk_plane_state(new_state);
+ struct drm_plane_state *old_state = drm_atomic_get_old_plane_state(state,
+ plane);
+
mtk_plane_state->pending.enable = false;
wmb(); /* Make sure the above parameter is set before update */
mtk_plane_state->pending.dirty = true;
+
+ mtk_crtc_plane_disable(old_state->crtc, plane);
}
static void mtk_plane_atomic_update(struct drm_plane *plane,
@@ -321,7 +326,8 @@ static const struct drm_plane_helper_funcs mtk_plane_helper_funcs = {
int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane,
unsigned long possible_crtcs, enum drm_plane_type type,
unsigned int supported_rotations, const u32 blend_modes,
- const u32 *formats, size_t num_formats, unsigned int plane_idx)
+ const u32 *formats, size_t num_formats,
+ bool supports_afbc, unsigned int plane_idx)
{
int err;
@@ -332,7 +338,9 @@ int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane,
err = drm_universal_plane_init(dev, plane, possible_crtcs,
&mtk_plane_funcs, formats,
- num_formats, modifiers, type, NULL);
+ num_formats,
+ supports_afbc ? modifiers : NULL,
+ type, NULL);
if (err) {
DRM_ERROR("failed to initialize plane\n");
return err;
diff --git a/drivers/gpu/drm/mediatek/mtk_plane.h b/drivers/gpu/drm/mediatek/mtk_plane.h
index 3b13b89989c7..95c5fa5295d8 100644
--- a/drivers/gpu/drm/mediatek/mtk_plane.h
+++ b/drivers/gpu/drm/mediatek/mtk_plane.h
@@ -49,5 +49,6 @@ to_mtk_plane_state(struct drm_plane_state *state)
int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane,
unsigned long possible_crtcs, enum drm_plane_type type,
unsigned int supported_rotations, const u32 blend_modes,
- const u32 *formats, size_t num_formats, unsigned int plane_idx);
+ const u32 *formats, size_t num_formats,
+ bool supports_afbc, unsigned int plane_idx);
#endif
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index f775638d239a..4b3a8ee8e278 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -85,6 +85,15 @@ void __msm_gem_submit_destroy(struct kref *kref)
container_of(kref, struct msm_gem_submit, ref);
unsigned i;
+ /*
+ * In error paths, we could unref the submit without calling
+ * drm_sched_entity_push_job(), so msm_job_free() will never
+ * get called. Since drm_sched_job_cleanup() will NULL out
+ * s_fence, we can use that to detect this case.
+ */
+ if (submit->base.s_fence)
+ drm_sched_job_cleanup(&submit->base);
+
if (submit->fence_id) {
spin_lock(&submit->queue->idr_lock);
idr_remove(&submit->queue->fence_idr, submit->fence_id);
@@ -658,6 +667,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
struct msm_ringbuffer *ring;
struct msm_submit_post_dep *post_deps = NULL;
struct drm_syncobj **syncobjs_to_reset = NULL;
+ struct sync_file *sync_file = NULL;
int out_fence_fd = -1;
unsigned i;
int ret;
@@ -868,7 +878,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
}
if (ret == 0 && args->flags & MSM_SUBMIT_FENCE_FD_OUT) {
- struct sync_file *sync_file = sync_file_create(submit->user_fence);
+ sync_file = sync_file_create(submit->user_fence);
if (!sync_file) {
ret = -ENOMEM;
} else {
@@ -902,8 +912,11 @@ out:
out_unlock:
mutex_unlock(&queue->lock);
out_post_unlock:
- if (ret && (out_fence_fd >= 0))
+ if (ret && (out_fence_fd >= 0)) {
put_unused_fd(out_fence_fd);
+ if (sync_file)
+ fput(sync_file->file);
+ }
if (!IS_ERR_OR_NULL(submit)) {
msm_gem_submit_put(submit);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
index fc84ca214f24..3ad4f6e9a8ac 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
@@ -1454,7 +1454,6 @@ r535_gsp_acpi_caps(acpi_handle handle, CAPS_METHOD_DATA *caps)
union acpi_object argv4 = {
.buffer.type = ACPI_TYPE_BUFFER,
.buffer.length = 4,
- .buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL),
}, *obj;
caps->status = 0xffff;
@@ -1462,17 +1461,22 @@ r535_gsp_acpi_caps(acpi_handle handle, CAPS_METHOD_DATA *caps)
if (!acpi_check_dsm(handle, &NVOP_DSM_GUID, NVOP_DSM_REV, BIT_ULL(0x1a)))
return;
+ argv4.buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL);
+ if (!argv4.buffer.pointer)
+ return;
+
obj = acpi_evaluate_dsm(handle, &NVOP_DSM_GUID, NVOP_DSM_REV, 0x1a, &argv4);
if (!obj)
- return;
+ goto done;
if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) ||
WARN_ON(obj->buffer.length != 4))
- return;
+ goto done;
caps->status = 0;
caps->optimusCaps = *(u32 *)obj->buffer.pointer;
+done:
ACPI_FREE(obj);
kfree(argv4.buffer.pointer);
@@ -1489,24 +1493,28 @@ r535_gsp_acpi_jt(acpi_handle handle, JT_METHOD_DATA *jt)
union acpi_object argv4 = {
.buffer.type = ACPI_TYPE_BUFFER,
.buffer.length = sizeof(caps),
- .buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL),
}, *obj;
jt->status = 0xffff;
+ argv4.buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL);
+ if (!argv4.buffer.pointer)
+ return;
+
obj = acpi_evaluate_dsm(handle, &JT_DSM_GUID, JT_DSM_REV, 0x1, &argv4);
if (!obj)
- return;
+ goto done;
if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) ||
WARN_ON(obj->buffer.length != 4))
- return;
+ goto done;
jt->status = 0;
jt->jtCaps = *(u32 *)obj->buffer.pointer;
jt->jtRevId = (jt->jtCaps & 0xfff00000) >> 20;
jt->bSBIOSCaps = 0;
+done:
ACPI_FREE(obj);
kfree(argv4.buffer.pointer);
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index c9c50e3b18a2..3e75fc1f6607 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -368,17 +368,6 @@ void drm_sched_entity_destroy(struct drm_sched_entity *entity)
}
EXPORT_SYMBOL(drm_sched_entity_destroy);
-/* drm_sched_entity_clear_dep - callback to clear the entities dependency */
-static void drm_sched_entity_clear_dep(struct dma_fence *f,
- struct dma_fence_cb *cb)
-{
- struct drm_sched_entity *entity =
- container_of(cb, struct drm_sched_entity, cb);
-
- entity->dependency = NULL;
- dma_fence_put(f);
-}
-
/*
* drm_sched_entity_clear_dep - callback to clear the entities dependency and
* wake up scheduler
@@ -389,7 +378,8 @@ static void drm_sched_entity_wakeup(struct dma_fence *f,
struct drm_sched_entity *entity =
container_of(cb, struct drm_sched_entity, cb);
- drm_sched_entity_clear_dep(f, cb);
+ entity->dependency = NULL;
+ dma_fence_put(f);
drm_sched_wakeup(entity->rq->sched);
}
@@ -442,13 +432,6 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity)
fence = dma_fence_get(&s_fence->scheduled);
dma_fence_put(entity->dependency);
entity->dependency = fence;
- if (!dma_fence_add_callback(fence, &entity->cb,
- drm_sched_entity_clear_dep))
- return true;
-
- /* Ignore it when it is already scheduled */
- dma_fence_put(fence);
- return false;
}
if (!dma_fence_add_callback(entity->dependency, &entity->cb,
diff --git a/drivers/gpu/drm/tegra/nvdec.c b/drivers/gpu/drm/tegra/nvdec.c
index 4860790666af..14ef61b44f47 100644
--- a/drivers/gpu/drm/tegra/nvdec.c
+++ b/drivers/gpu/drm/tegra/nvdec.c
@@ -261,10 +261,8 @@ static int nvdec_load_falcon_firmware(struct nvdec *nvdec)
if (!client->group) {
virt = dma_alloc_coherent(nvdec->dev, size, &iova, GFP_KERNEL);
-
- err = dma_mapping_error(nvdec->dev, iova);
- if (err < 0)
- return err;
+ if (!virt)
+ return -ENOMEM;
} else {
virt = tegra_drm_alloc(tegra, size, &iova);
if (IS_ERR(virt))
diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c
index d19e10289428..07abaf27315f 100644
--- a/drivers/gpu/drm/tiny/simpledrm.c
+++ b/drivers/gpu/drm/tiny/simpledrm.c
@@ -284,7 +284,7 @@ static struct simpledrm_device *simpledrm_device_of_dev(struct drm_device *dev)
static void simpledrm_device_release_clocks(void *res)
{
- struct simpledrm_device *sdev = simpledrm_device_of_dev(res);
+ struct simpledrm_device *sdev = res;
unsigned int i;
for (i = 0; i < sdev->clk_count; ++i) {
@@ -382,7 +382,7 @@ static int simpledrm_device_init_clocks(struct simpledrm_device *sdev)
static void simpledrm_device_release_regulators(void *res)
{
- struct simpledrm_device *sdev = simpledrm_device_of_dev(res);
+ struct simpledrm_device *sdev = res;
unsigned int i;
for (i = 0; i < sdev->regulator_count; ++i) {
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 3c07f4712d5c..b600be2a5c84 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -254,6 +254,13 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
ret = dma_resv_trylock(&fbo->base.base._resv);
WARN_ON(!ret);
+ ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1);
+ if (ret) {
+ dma_resv_unlock(&fbo->base.base._resv);
+ kfree(fbo);
+ return ret;
+ }
+
if (fbo->base.resource) {
ttm_resource_set_bo(fbo->base.resource, &fbo->base);
bo->resource = NULL;
@@ -262,12 +269,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
fbo->base.bulk_move = NULL;
}
- ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1);
- if (ret) {
- kfree(fbo);
- return ret;
- }
-
ttm_bo_get(bo);
fbo->bo = bo;
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index 75b4725d49c7..d4b0549205c2 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -95,6 +95,12 @@ struct v3d_perfmon {
u64 values[] __counted_by(ncounters);
};
+enum v3d_irq {
+ V3D_CORE_IRQ,
+ V3D_HUB_IRQ,
+ V3D_MAX_IRQS,
+};
+
struct v3d_dev {
struct drm_device drm;
@@ -106,6 +112,8 @@ struct v3d_dev {
bool single_irq_line;
+ int irq[V3D_MAX_IRQS];
+
struct v3d_perfmon_info perfmon_info;
void __iomem *hub_regs;
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index da8faf3b9011..6b6ba7a68fcb 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -118,6 +118,8 @@ v3d_reset(struct v3d_dev *v3d)
if (false)
v3d_idle_axi(v3d, 0);
+ v3d_irq_disable(v3d);
+
v3d_idle_gca(v3d);
v3d_reset_v3d(v3d);
diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
index 72b6a119412f..b98e1a4b33c7 100644
--- a/drivers/gpu/drm/v3d/v3d_irq.c
+++ b/drivers/gpu/drm/v3d/v3d_irq.c
@@ -228,7 +228,7 @@ v3d_hub_irq(int irq, void *arg)
int
v3d_irq_init(struct v3d_dev *v3d)
{
- int irq1, ret, core;
+ int irq, ret, core;
INIT_WORK(&v3d->overflow_mem_work, v3d_overflow_mem_work);
@@ -239,17 +239,24 @@ v3d_irq_init(struct v3d_dev *v3d)
V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS(v3d->ver));
V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS(v3d->ver));
- irq1 = platform_get_irq_optional(v3d_to_pdev(v3d), 1);
- if (irq1 == -EPROBE_DEFER)
- return irq1;
- if (irq1 > 0) {
- ret = devm_request_irq(v3d->drm.dev, irq1,
+ irq = platform_get_irq_optional(v3d_to_pdev(v3d), 1);
+ if (irq == -EPROBE_DEFER)
+ return irq;
+ if (irq > 0) {
+ v3d->irq[V3D_CORE_IRQ] = irq;
+
+ ret = devm_request_irq(v3d->drm.dev, v3d->irq[V3D_CORE_IRQ],
v3d_irq, IRQF_SHARED,
"v3d_core0", v3d);
if (ret)
goto fail;
- ret = devm_request_irq(v3d->drm.dev,
- platform_get_irq(v3d_to_pdev(v3d), 0),
+
+ irq = platform_get_irq(v3d_to_pdev(v3d), 0);
+ if (irq < 0)
+ return irq;
+ v3d->irq[V3D_HUB_IRQ] = irq;
+
+ ret = devm_request_irq(v3d->drm.dev, v3d->irq[V3D_HUB_IRQ],
v3d_hub_irq, IRQF_SHARED,
"v3d_hub", v3d);
if (ret)
@@ -257,8 +264,12 @@ v3d_irq_init(struct v3d_dev *v3d)
} else {
v3d->single_irq_line = true;
- ret = devm_request_irq(v3d->drm.dev,
- platform_get_irq(v3d_to_pdev(v3d), 0),
+ irq = platform_get_irq(v3d_to_pdev(v3d), 0);
+ if (irq < 0)
+ return irq;
+ v3d->irq[V3D_CORE_IRQ] = irq;
+
+ ret = devm_request_irq(v3d->drm.dev, v3d->irq[V3D_CORE_IRQ],
v3d_irq, IRQF_SHARED,
"v3d", v3d);
if (ret)
@@ -299,6 +310,12 @@ v3d_irq_disable(struct v3d_dev *v3d)
V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_SET, ~0);
V3D_WRITE(V3D_HUB_INT_MSK_SET, ~0);
+ /* Finish any interrupt handler still in flight. */
+ for (int i = 0; i < V3D_MAX_IRQS; i++) {
+ if (v3d->irq[i])
+ synchronize_irq(v3d->irq[i]);
+ }
+
/* Clear any pending interrupts we might have left. */
for (core = 0; core < v3d->cores; core++)
V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS(v3d->ver));
diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
index 7bbe46a98ff1..93e742c1f21e 100644
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -1,7 +1,8 @@
# SPDX-License-Identifier: GPL-2.0-only
config DRM_XE
tristate "Intel Xe Graphics"
- depends on DRM && PCI && MMU && (m || (y && KUNIT=y))
+ depends on DRM && PCI && MMU
+ depends on KUNIT || !KUNIT
select INTERVAL_TREE
# we need shmfs for the swappable backing store, and in particular
# the shmem_readpage() which depends upon tmpfs
diff --git a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
index 8f86a16dc577..f58198cf2cf6 100644
--- a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
@@ -52,6 +52,7 @@ struct guc_ct_buffer_desc {
#define GUC_CTB_STATUS_OVERFLOW (1 << 0)
#define GUC_CTB_STATUS_UNDERFLOW (1 << 1)
#define GUC_CTB_STATUS_MISMATCH (1 << 2)
+#define GUC_CTB_STATUS_DISABLED (1 << 3)
u32 reserved[13];
} __packed;
static_assert(sizeof(struct guc_ct_buffer_desc) == 64);
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h
index cb6c7598824b..9c4cf050059a 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h
@@ -29,7 +29,7 @@ static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe,
bo = xe_bo_create_locked_range(xe, xe_device_get_root_tile(xe),
NULL, size, start, end,
- ttm_bo_type_kernel, flags);
+ ttm_bo_type_kernel, flags, 0);
if (IS_ERR(bo)) {
err = PTR_ERR(bo);
bo = NULL;
diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
index f99d901a3214..9f941fc2e36b 100644
--- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
+++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
@@ -17,10 +17,7 @@ u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf)
void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val)
{
- struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
-
iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val);
- xe_device_l2_flush(xe);
}
u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
@@ -30,12 +27,9 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size)
{
- struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
-
WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf));
iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size);
- xe_device_l2_flush(xe);
}
bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size)
@@ -48,11 +42,12 @@ bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *d
if (!vma)
return false;
+ /* Set scanout flag for WC mapping */
obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe),
NULL, PAGE_ALIGN(size),
ttm_bo_type_kernel,
XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
- XE_BO_FLAG_GGTT);
+ XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT);
if (IS_ERR(obj)) {
kfree(vma);
return false;
@@ -73,5 +68,12 @@ void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf)
void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf)
{
- /* TODO: add xe specific flush_map() for dsb buffer object. */
+ struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
+
+ /*
+ * The memory barrier here is to ensure coherency of DSB vs MMIO,
+ * both for weak ordering archs and discrete cards.
+ */
+ xe_device_wmb(xe);
+ xe_device_l2_flush(xe);
}
diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
index b58fc4ba2aac..0558b106f8b6 100644
--- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
+++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
@@ -153,7 +153,10 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb,
}
vma->dpt = dpt;
- vma->node = dpt->ggtt_node;
+ vma->node = dpt->ggtt_node[tile0->id];
+
+ /* Ensure DPT writes are flushed */
+ xe_device_l2_flush(xe);
return 0;
}
@@ -203,8 +206,8 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb,
if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K)
align = max_t(u32, align, SZ_64K);
- if (bo->ggtt_node && view->type == I915_GTT_VIEW_NORMAL) {
- vma->node = bo->ggtt_node;
+ if (bo->ggtt_node[ggtt->tile->id] && view->type == I915_GTT_VIEW_NORMAL) {
+ vma->node = bo->ggtt_node[ggtt->tile->id];
} else if (view->type == I915_GTT_VIEW_NORMAL) {
u32 x, size = bo->ttm.base.size;
@@ -318,8 +321,6 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb,
if (ret)
goto err_unpin;
- /* Ensure DPT writes are flushed */
- xe_device_l2_flush(xe);
return vma;
err_unpin:
@@ -333,10 +334,12 @@ err:
static void __xe_unpin_fb_vma(struct i915_vma *vma)
{
+ u8 tile_id = vma->node->ggtt->tile->id;
+
if (vma->dpt)
xe_bo_unpin_map_no_vm(vma->dpt);
- else if (!xe_ggtt_node_allocated(vma->bo->ggtt_node) ||
- vma->bo->ggtt_node->base.start != vma->node->base.start)
+ else if (!xe_ggtt_node_allocated(vma->bo->ggtt_node[tile_id]) ||
+ vma->bo->ggtt_node[tile_id]->base.start != vma->node->base.start)
xe_ggtt_node_remove(vma->node, false);
ttm_bo_reserve(&vma->bo->ttm, false, false, NULL);
diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
index 23f7dc5bbe99..51fd40ffafcb 100644
--- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h
+++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
@@ -128,7 +128,7 @@ struct xe_reg_mcr {
* options.
*/
#define XE_REG_MCR(r_, ...) ((const struct xe_reg_mcr){ \
- .__reg = XE_REG_INITIALIZER(r_, ##__VA_ARGS__, .mcr = 1) \
+ .__reg = XE_REG_INITIALIZER(r_, ##__VA_ARGS__, .mcr = 1) \
})
static inline bool xe_reg_is_valid(struct xe_reg r)
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c
index 61a7d20ce42b..bf3f97d0c9c7 100644
--- a/drivers/gpu/drm/xe/tests/xe_mocs.c
+++ b/drivers/gpu/drm/xe/tests/xe_mocs.c
@@ -43,14 +43,12 @@ static void read_l3cc_table(struct xe_gt *gt,
{
struct kunit *test = kunit_get_current_test();
u32 l3cc, l3cc_expected;
- unsigned int fw_ref, i;
+ unsigned int i;
u32 reg_val;
+ u32 ret;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
- KUNIT_ASSERT_TRUE_MSG(test, true, "Forcewake Failed.\n");
- }
+ ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n");
for (i = 0; i < info->num_mocs_regs; i++) {
if (!(i & 1)) {
@@ -74,7 +72,7 @@ static void read_l3cc_table(struct xe_gt *gt,
KUNIT_EXPECT_EQ_MSG(test, l3cc_expected, l3cc,
"l3cc idx=%u has incorrect val.\n", i);
}
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
}
static void read_mocs_table(struct xe_gt *gt,
@@ -82,14 +80,15 @@ static void read_mocs_table(struct xe_gt *gt,
{
struct kunit *test = kunit_get_current_test();
u32 mocs, mocs_expected;
- unsigned int fw_ref, i;
+ unsigned int i;
u32 reg_val;
+ u32 ret;
KUNIT_EXPECT_TRUE_MSG(test, info->unused_entries_index,
"Unused entries index should have been defined\n");
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- KUNIT_ASSERT_NE_MSG(test, fw_ref, 0, "Forcewake Failed.\n");
+ ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n");
for (i = 0; i < info->num_mocs_regs; i++) {
if (regs_are_mcr(gt))
@@ -107,7 +106,7 @@ static void read_mocs_table(struct xe_gt *gt,
"mocs reg 0x%x has incorrect val.\n", i);
}
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
}
static int mocs_kernel_test_run_device(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 8acc4640f0a2..5f745d9ed6cc 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1130,6 +1130,8 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
{
struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+ struct xe_tile *tile;
+ u8 id;
if (bo->ttm.base.import_attach)
drm_prime_gem_destroy(&bo->ttm.base, NULL);
@@ -1137,8 +1139,9 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
- if (bo->ggtt_node && bo->ggtt_node->base.size)
- xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo);
+ for_each_tile(tile, xe, id)
+ if (bo->ggtt_node[id] && bo->ggtt_node[id]->base.size)
+ xe_ggtt_remove_bo(tile->mem.ggtt, bo);
#ifdef CONFIG_PROC_FS
if (bo->client)
@@ -1308,6 +1311,10 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
return ERR_PTR(-EINVAL);
}
+ /* XE_BO_FLAG_GGTTx requires XE_BO_FLAG_GGTT also be set */
+ if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT))
+ return ERR_PTR(-EINVAL);
+
if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) &&
!(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) &&
((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ||
@@ -1454,7 +1461,8 @@ static struct xe_bo *
__xe_bo_create_locked(struct xe_device *xe,
struct xe_tile *tile, struct xe_vm *vm,
size_t size, u64 start, u64 end,
- u16 cpu_caching, enum ttm_bo_type type, u32 flags)
+ u16 cpu_caching, enum ttm_bo_type type, u32 flags,
+ u64 alignment)
{
struct xe_bo *bo = NULL;
int err;
@@ -1483,6 +1491,8 @@ __xe_bo_create_locked(struct xe_device *xe,
if (IS_ERR(bo))
return bo;
+ bo->min_align = alignment;
+
/*
* Note that instead of taking a reference no the drm_gpuvm_resv_bo(),
* to ensure the shared resv doesn't disappear under the bo, the bo
@@ -1495,19 +1505,29 @@ __xe_bo_create_locked(struct xe_device *xe,
bo->vm = vm;
if (bo->flags & XE_BO_FLAG_GGTT) {
- if (!tile && flags & XE_BO_FLAG_STOLEN)
- tile = xe_device_get_root_tile(xe);
+ struct xe_tile *t;
+ u8 id;
- xe_assert(xe, tile);
+ if (!(bo->flags & XE_BO_FLAG_GGTT_ALL)) {
+ if (!tile && flags & XE_BO_FLAG_STOLEN)
+ tile = xe_device_get_root_tile(xe);
- if (flags & XE_BO_FLAG_FIXED_PLACEMENT) {
- err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo,
- start + bo->size, U64_MAX);
- } else {
- err = xe_ggtt_insert_bo(tile->mem.ggtt, bo);
+ xe_assert(xe, tile);
+ }
+
+ for_each_tile(t, xe, id) {
+ if (t != tile && !(bo->flags & XE_BO_FLAG_GGTTx(t)))
+ continue;
+
+ if (flags & XE_BO_FLAG_FIXED_PLACEMENT) {
+ err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo,
+ start + bo->size, U64_MAX);
+ } else {
+ err = xe_ggtt_insert_bo(t->mem.ggtt, bo);
+ }
+ if (err)
+ goto err_unlock_put_bo;
}
- if (err)
- goto err_unlock_put_bo;
}
return bo;
@@ -1523,16 +1543,18 @@ struct xe_bo *
xe_bo_create_locked_range(struct xe_device *xe,
struct xe_tile *tile, struct xe_vm *vm,
size_t size, u64 start, u64 end,
- enum ttm_bo_type type, u32 flags)
+ enum ttm_bo_type type, u32 flags, u64 alignment)
{
- return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, flags);
+ return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type,
+ flags, alignment);
}
struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
struct xe_vm *vm, size_t size,
enum ttm_bo_type type, u32 flags)
{
- return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type, flags);
+ return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type,
+ flags, 0);
}
struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
@@ -1542,7 +1564,7 @@ struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
{
struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
cpu_caching, ttm_bo_type_device,
- flags | XE_BO_FLAG_USER);
+ flags | XE_BO_FLAG_USER, 0);
if (!IS_ERR(bo))
xe_bo_unlock_vm_held(bo);
@@ -1566,6 +1588,17 @@ struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile
size_t size, u64 offset,
enum ttm_bo_type type, u32 flags)
{
+ return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, offset,
+ type, flags, 0);
+}
+
+struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
+ struct xe_tile *tile,
+ struct xe_vm *vm,
+ size_t size, u64 offset,
+ enum ttm_bo_type type, u32 flags,
+ u64 alignment)
+{
struct xe_bo *bo;
int err;
u64 start = offset == ~0ull ? 0 : offset;
@@ -1576,7 +1609,8 @@ struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile
flags |= XE_BO_FLAG_GGTT;
bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
- flags | XE_BO_FLAG_NEEDS_CPU_ACCESS);
+ flags | XE_BO_FLAG_NEEDS_CPU_ACCESS,
+ alignment);
if (IS_ERR(bo))
return bo;
@@ -2355,14 +2389,18 @@ void xe_bo_put_commit(struct llist_head *deferred)
void xe_bo_put(struct xe_bo *bo)
{
+ struct xe_tile *tile;
+ u8 id;
+
might_sleep();
if (bo) {
#ifdef CONFIG_PROC_FS
if (bo->client)
might_lock(&bo->client->bos_lock);
#endif
- if (bo->ggtt_node && bo->ggtt_node->ggtt)
- might_lock(&bo->ggtt_node->ggtt->lock);
+ for_each_tile(tile, xe_bo_device(bo), id)
+ if (bo->ggtt_node[id] && bo->ggtt_node[id]->ggtt)
+ might_lock(&bo->ggtt_node[id]->ggtt->lock);
drm_gem_object_put(&bo->ttm.base);
}
}
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index d22269a230aa..d04159c59846 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -39,10 +39,22 @@
#define XE_BO_FLAG_NEEDS_64K BIT(15)
#define XE_BO_FLAG_NEEDS_2M BIT(16)
#define XE_BO_FLAG_GGTT_INVALIDATE BIT(17)
+#define XE_BO_FLAG_GGTT0 BIT(18)
+#define XE_BO_FLAG_GGTT1 BIT(19)
+#define XE_BO_FLAG_GGTT2 BIT(20)
+#define XE_BO_FLAG_GGTT3 BIT(21)
+#define XE_BO_FLAG_GGTT_ALL (XE_BO_FLAG_GGTT0 | \
+ XE_BO_FLAG_GGTT1 | \
+ XE_BO_FLAG_GGTT2 | \
+ XE_BO_FLAG_GGTT3)
+
/* this one is trigger internally only */
#define XE_BO_FLAG_INTERNAL_TEST BIT(30)
#define XE_BO_FLAG_INTERNAL_64K BIT(31)
+#define XE_BO_FLAG_GGTTx(tile) \
+ (XE_BO_FLAG_GGTT0 << (tile)->id)
+
#define XE_PTE_SHIFT 12
#define XE_PAGE_SIZE (1 << XE_PTE_SHIFT)
#define XE_PTE_MASK (XE_PAGE_SIZE - 1)
@@ -77,7 +89,7 @@ struct xe_bo *
xe_bo_create_locked_range(struct xe_device *xe,
struct xe_tile *tile, struct xe_vm *vm,
size_t size, u64 start, u64 end,
- enum ttm_bo_type type, u32 flags);
+ enum ttm_bo_type type, u32 flags, u64 alignment);
struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
struct xe_vm *vm, size_t size,
enum ttm_bo_type type, u32 flags);
@@ -94,6 +106,12 @@ struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
struct xe_vm *vm, size_t size, u64 offset,
enum ttm_bo_type type, u32 flags);
+struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
+ struct xe_tile *tile,
+ struct xe_vm *vm,
+ size_t size, u64 offset,
+ enum ttm_bo_type type, u32 flags,
+ u64 alignment);
struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
const void *data, size_t size,
enum ttm_bo_type type, u32 flags);
@@ -188,14 +206,24 @@ xe_bo_main_addr(struct xe_bo *bo, size_t page_size)
}
static inline u32
-xe_bo_ggtt_addr(struct xe_bo *bo)
+__xe_bo_ggtt_addr(struct xe_bo *bo, u8 tile_id)
{
- if (XE_WARN_ON(!bo->ggtt_node))
+ struct xe_ggtt_node *ggtt_node = bo->ggtt_node[tile_id];
+
+ if (XE_WARN_ON(!ggtt_node))
return 0;
- XE_WARN_ON(bo->ggtt_node->base.size > bo->size);
- XE_WARN_ON(bo->ggtt_node->base.start + bo->ggtt_node->base.size > (1ull << 32));
- return bo->ggtt_node->base.start;
+ XE_WARN_ON(ggtt_node->base.size > bo->size);
+ XE_WARN_ON(ggtt_node->base.start + ggtt_node->base.size > (1ull << 32));
+ return ggtt_node->base.start;
+}
+
+static inline u32
+xe_bo_ggtt_addr(struct xe_bo *bo)
+{
+ xe_assert(xe_bo_device(bo), bo->tile);
+
+ return __xe_bo_ggtt_addr(bo, bo->tile->id);
}
int xe_bo_vmap(struct xe_bo *bo);
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
index 8fb2be061003..6a40eedd9db1 100644
--- a/drivers/gpu/drm/xe/xe_bo_evict.c
+++ b/drivers/gpu/drm/xe/xe_bo_evict.c
@@ -152,11 +152,17 @@ int xe_bo_restore_kernel(struct xe_device *xe)
}
if (bo->flags & XE_BO_FLAG_GGTT) {
- struct xe_tile *tile = bo->tile;
+ struct xe_tile *tile;
+ u8 id;
- mutex_lock(&tile->mem.ggtt->lock);
- xe_ggtt_map_bo(tile->mem.ggtt, bo);
- mutex_unlock(&tile->mem.ggtt->lock);
+ for_each_tile(tile, xe, id) {
+ if (tile != bo->tile && !(bo->flags & XE_BO_FLAG_GGTTx(tile)))
+ continue;
+
+ mutex_lock(&tile->mem.ggtt->lock);
+ xe_ggtt_map_bo(tile->mem.ggtt, bo);
+ mutex_unlock(&tile->mem.ggtt->lock);
+ }
}
/*
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index 2ed558ac2264..aa298d33c250 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -13,6 +13,7 @@
#include <drm/ttm/ttm_execbuf_util.h>
#include <drm/ttm/ttm_placement.h>
+#include "xe_device_types.h"
#include "xe_ggtt_types.h"
struct xe_device;
@@ -39,8 +40,8 @@ struct xe_bo {
struct ttm_place placements[XE_BO_MAX_PLACEMENTS];
/** @placement: current placement for this BO */
struct ttm_placement placement;
- /** @ggtt_node: GGTT node if this BO is mapped in the GGTT */
- struct xe_ggtt_node *ggtt_node;
+ /** @ggtt_node: Array of GGTT nodes if this BO is mapped in the GGTTs */
+ struct xe_ggtt_node *ggtt_node[XE_MAX_TILES_PER_DEVICE];
/** @vmap: iosys map of this buffer */
struct iosys_map vmap;
/** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */
@@ -76,6 +77,11 @@ struct xe_bo {
/** @vram_userfault_link: Link into @mem_access.vram_userfault.list */
struct list_head vram_userfault_link;
+
+ /** @min_align: minimum alignment needed for this BO if different
+ * from default
+ */
+ u64 min_align;
};
#define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base)
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index 8050938389b6..e412a70323cc 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -197,7 +197,6 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work);
struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot);
struct xe_device *xe = coredump_to_xe(coredump);
- unsigned int fw_ref;
/*
* NB: Despite passing a GFP_ flags parameter here, more allocations are done
@@ -211,12 +210,11 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
xe_pm_runtime_get(xe);
/* keep going if fw fails as we still want to save the memory and SW data */
- fw_ref = xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
+ if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL))
xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
xe_vm_snapshot_capture_delayed(ss->vm);
xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
- xe_force_wake_put(gt_to_fw(ss->gt), fw_ref);
+ xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
xe_pm_runtime_put(xe);
@@ -243,9 +241,8 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
u32 width_mask = (0x1 << q->width) - 1;
const char *process_name = "no process";
- unsigned int fw_ref;
- bool cookie;
int i;
+ bool cookie;
ss->snapshot_time = ktime_get_real();
ss->boot_time = ktime_get_boottime();
@@ -268,7 +265,8 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
}
/* keep going if fw fails as we still want to save the memory and SW data */
- fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
+ if (xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL))
+ xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
ss->ct = xe_guc_ct_snapshot_capture(&guc->ct, true);
ss->ge = xe_guc_exec_queue_snapshot_capture(q);
@@ -286,7 +284,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
queue_work(system_unbound_wq, &ss->work);
- xe_force_wake_put(gt_to_fw(q->gt), fw_ref);
+ xe_force_wake_put(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
dma_fence_end_signalling(cookie);
}
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 0c3db53b93d8..82da51a6616a 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -37,6 +37,7 @@
#include "xe_gt_printk.h"
#include "xe_gt_sriov_vf.h"
#include "xe_guc.h"
+#include "xe_guc_pc.h"
#include "xe_hw_engine_group.h"
#include "xe_hwmon.h"
#include "xe_irq.h"
@@ -871,31 +872,37 @@ void xe_device_td_flush(struct xe_device *xe)
if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
return;
- if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
+ gt = xe_root_mmio_gt(xe);
+ if (XE_WA(gt, 16023588340)) {
+ /* A transient flush is not sufficient: flush the L2 */
xe_device_l2_flush(xe);
- return;
- }
-
- for_each_gt(gt, xe, id) {
- if (xe_gt_is_media_type(gt))
- continue;
-
- if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GT))
- return;
-
- xe_mmio_write32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST);
- /*
- * FIXME: We can likely do better here with our choice of
- * timeout. Currently we just assume the worst case, i.e. 150us,
- * which is believed to be sufficient to cover the worst case
- * scenario on current platforms if all cache entries are
- * transient and need to be flushed..
- */
- if (xe_mmio_wait32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0,
- 150, NULL, false))
- xe_gt_err_once(gt, "TD flush timeout\n");
-
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ } else {
+ xe_guc_pc_apply_flush_freq_limit(&gt->uc.guc.pc);
+
+ /* Execute TDF flush on all graphics GTs */
+ for_each_gt(gt, xe, id) {
+ if (xe_gt_is_media_type(gt))
+ continue;
+
+ if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GT))
+ return;
+
+ xe_mmio_write32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST);
+ /*
+ * FIXME: We can likely do better here with our choice of
+ * timeout. Currently we just assume the worst case, i.e. 150us,
+ * which is believed to be sufficient to cover the worst case
+ * scenario on current platforms if all cache entries are
+ * transient and need to be flushed..
+ */
+ if (xe_mmio_wait32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0,
+ 150, NULL, false))
+ xe_gt_err_once(gt, "TD flush timeout\n");
+
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ }
+
+ xe_guc_pc_remove_flush_freq_limit(&xe_root_mmio_gt(xe)->uc.guc.pc);
}
}
diff --git a/drivers/gpu/drm/xe/xe_force_wake.h b/drivers/gpu/drm/xe/xe_force_wake.h
index 1608a55edc84..a2577672f4e3 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.h
+++ b/drivers/gpu/drm/xe/xe_force_wake.h
@@ -46,20 +46,4 @@ xe_force_wake_assert_held(struct xe_force_wake *fw,
xe_gt_assert(fw->gt, fw->awake_domains & domain);
}
-/**
- * xe_force_wake_ref_has_domain - verifies if the domains are in fw_ref
- * @fw_ref : the force_wake reference
- * @domain : forcewake domain to verify
- *
- * This function confirms whether the @fw_ref includes a reference to the
- * specified @domain.
- *
- * Return: true if domain is refcounted.
- */
-static inline bool
-xe_force_wake_ref_has_domain(unsigned int fw_ref, enum xe_force_wake_domains domain)
-{
- return fw_ref & domain;
-}
-
#endif
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index e9820126feb9..76e1092f51d9 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -605,10 +605,10 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
u64 start;
u64 offset, pte;
- if (XE_WARN_ON(!bo->ggtt_node))
+ if (XE_WARN_ON(!bo->ggtt_node[ggtt->tile->id]))
return;
- start = bo->ggtt_node->base.start;
+ start = bo->ggtt_node[ggtt->tile->id]->base.start;
for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) {
pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index);
@@ -619,15 +619,16 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
u64 start, u64 end)
{
+ u64 alignment = bo->min_align > 0 ? bo->min_align : XE_PAGE_SIZE;
+ u8 tile_id = ggtt->tile->id;
int err;
- u64 alignment = XE_PAGE_SIZE;
if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K)
alignment = SZ_64K;
- if (XE_WARN_ON(bo->ggtt_node)) {
+ if (XE_WARN_ON(bo->ggtt_node[tile_id])) {
/* Someone's already inserted this BO in the GGTT */
- xe_tile_assert(ggtt->tile, bo->ggtt_node->base.size == bo->size);
+ xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size);
return 0;
}
@@ -637,19 +638,19 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile));
- bo->ggtt_node = xe_ggtt_node_init(ggtt);
- if (IS_ERR(bo->ggtt_node)) {
- err = PTR_ERR(bo->ggtt_node);
- bo->ggtt_node = NULL;
+ bo->ggtt_node[tile_id] = xe_ggtt_node_init(ggtt);
+ if (IS_ERR(bo->ggtt_node[tile_id])) {
+ err = PTR_ERR(bo->ggtt_node[tile_id]);
+ bo->ggtt_node[tile_id] = NULL;
goto out;
}
mutex_lock(&ggtt->lock);
- err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node->base, bo->size,
- alignment, 0, start, end, 0);
+ err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node[tile_id]->base,
+ bo->size, alignment, 0, start, end, 0);
if (err) {
- xe_ggtt_node_fini(bo->ggtt_node);
- bo->ggtt_node = NULL;
+ xe_ggtt_node_fini(bo->ggtt_node[tile_id]);
+ bo->ggtt_node[tile_id] = NULL;
} else {
xe_ggtt_map_bo(ggtt, bo);
}
@@ -698,13 +699,15 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
*/
void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
{
- if (XE_WARN_ON(!bo->ggtt_node))
+ u8 tile_id = ggtt->tile->id;
+
+ if (XE_WARN_ON(!bo->ggtt_node[tile_id]))
return;
/* This BO is not currently in the GGTT */
- xe_tile_assert(ggtt->tile, bo->ggtt_node->base.size == bo->size);
+ xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size);
- xe_ggtt_node_remove(bo->ggtt_node,
+ xe_ggtt_node_remove(bo->ggtt_node[tile_id],
bo->flags & XE_BO_FLAG_GGTT_INVALIDATE);
}
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 231ed53cf907..3b53d46aad54 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -98,14 +98,14 @@ void xe_gt_sanitize(struct xe_gt *gt)
static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
{
- unsigned int fw_ref;
u32 reg;
+ int err;
if (!XE_WA(gt, 16023588340))
return;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (!fw_ref)
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (WARN_ON(err))
return;
if (!xe_gt_is_media_type(gt)) {
@@ -115,13 +115,13 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
}
xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0xF);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
}
static void xe_gt_disable_host_l2_vram(struct xe_gt *gt)
{
- unsigned int fw_ref;
u32 reg;
+ int err;
if (!XE_WA(gt, 16023588340))
return;
@@ -129,15 +129,15 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt)
if (xe_gt_is_media_type(gt))
return;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (!fw_ref)
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (WARN_ON(err))
return;
reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL);
reg &= ~CG_DIS_CNTLBUS;
xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
}
/**
@@ -389,6 +389,8 @@ int xe_gt_init_early(struct xe_gt *gt)
if (err)
return err;
+ xe_mocs_init_early(gt);
+
return 0;
}
@@ -405,14 +407,11 @@ static void dump_pat_on_error(struct xe_gt *gt)
static int gt_fw_domain_init(struct xe_gt *gt)
{
- unsigned int fw_ref;
int err, i;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (!fw_ref) {
- err = -ETIMEDOUT;
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (err)
goto err_hw_fence_irq;
- }
if (!xe_gt_is_media_type(gt)) {
err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt);
@@ -447,12 +446,14 @@ static int gt_fw_domain_init(struct xe_gt *gt)
*/
gt->info.gmdid = xe_mmio_read32(gt, GMD_ID);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ XE_WARN_ON(err);
+
return 0;
err_force_wake:
dump_pat_on_error(gt);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
err_hw_fence_irq:
for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
xe_hw_fence_irq_finish(&gt->fence_irq[i]);
@@ -462,14 +463,11 @@ err_hw_fence_irq:
static int all_fw_domain_init(struct xe_gt *gt)
{
- unsigned int fw_ref;
int err, i;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
- err = -ETIMEDOUT;
- goto err_force_wake;
- }
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (err)
+ goto err_hw_fence_irq;
xe_gt_mcr_set_implicit_defaults(gt);
xe_wa_process_gt(gt);
@@ -535,12 +533,14 @@ static int all_fw_domain_init(struct xe_gt *gt)
if (IS_SRIOV_PF(gt_to_xe(gt)))
xe_gt_sriov_pf_init_hw(gt);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ XE_WARN_ON(err);
return 0;
err_force_wake:
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+err_hw_fence_irq:
for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
xe_hw_fence_irq_finish(&gt->fence_irq[i]);
@@ -553,12 +553,11 @@ err_force_wake:
*/
int xe_gt_init_hwconfig(struct xe_gt *gt)
{
- unsigned int fw_ref;
int err;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (!fw_ref)
- return -ETIMEDOUT;
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (err)
+ goto out;
xe_gt_mcr_init_early(gt);
xe_pat_init(gt);
@@ -576,7 +575,8 @@ int xe_gt_init_hwconfig(struct xe_gt *gt)
xe_gt_enable_host_l2_vram(gt);
out_fw:
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+out:
return err;
}
@@ -592,17 +592,15 @@ int xe_gt_init(struct xe_gt *gt)
xe_hw_fence_irq_init(&gt->fence_irq[i]);
}
- err = xe_gt_pagefault_init(gt);
+ err = xe_gt_sysfs_init(gt);
if (err)
return err;
- xe_mocs_init_early(gt);
-
- err = xe_gt_sysfs_init(gt);
+ err = gt_fw_domain_init(gt);
if (err)
return err;
- err = gt_fw_domain_init(gt);
+ err = xe_gt_pagefault_init(gt);
if (err)
return err;
@@ -746,7 +744,6 @@ static int do_gt_restart(struct xe_gt *gt)
static int gt_reset(struct xe_gt *gt)
{
- unsigned int fw_ref;
int err;
if (xe_device_wedged(gt_to_xe(gt)))
@@ -767,11 +764,12 @@ static int gt_reset(struct xe_gt *gt)
xe_gt_sanitize(gt);
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
- err = -ETIMEDOUT;
- goto err_out;
- }
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (err)
+ goto err_msg;
+
+ if (IS_SRIOV_PF(gt_to_xe(gt)))
+ xe_gt_sriov_pf_stop_prepare(gt);
xe_uc_gucrc_disable(&gt->uc);
xe_uc_stop_prepare(&gt->uc);
@@ -789,7 +787,8 @@ static int gt_reset(struct xe_gt *gt)
if (err)
goto err_out;
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ XE_WARN_ON(err);
xe_pm_runtime_put(gt_to_xe(gt));
xe_gt_info(gt, "reset done\n");
@@ -797,7 +796,8 @@ static int gt_reset(struct xe_gt *gt)
return 0;
err_out:
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+err_msg:
XE_WARN_ON(xe_uc_start(&gt->uc));
err_fail:
xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err));
@@ -829,25 +829,22 @@ void xe_gt_reset_async(struct xe_gt *gt)
void xe_gt_suspend_prepare(struct xe_gt *gt)
{
- unsigned int fw_ref;
-
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
xe_uc_suspend_prepare(&gt->uc);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
}
int xe_gt_suspend(struct xe_gt *gt)
{
- unsigned int fw_ref;
int err;
xe_gt_dbg(gt, "suspending\n");
xe_gt_sanitize(gt);
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (err)
goto err_msg;
err = xe_uc_suspend(&gt->uc);
@@ -858,15 +855,14 @@ int xe_gt_suspend(struct xe_gt *gt)
xe_gt_disable_host_l2_vram(gt);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
xe_gt_dbg(gt, "suspended\n");
return 0;
-err_msg:
- err = -ETIMEDOUT;
err_force_wake:
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+err_msg:
xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err));
return err;
@@ -874,11 +870,9 @@ err_force_wake:
void xe_gt_shutdown(struct xe_gt *gt)
{
- unsigned int fw_ref;
-
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
do_gt_reset(gt);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
}
/**
@@ -903,12 +897,11 @@ int xe_gt_sanitize_freq(struct xe_gt *gt)
int xe_gt_resume(struct xe_gt *gt)
{
- unsigned int fw_ref;
int err;
xe_gt_dbg(gt, "resuming\n");
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (err)
goto err_msg;
err = do_gt_restart(gt);
@@ -917,15 +910,14 @@ int xe_gt_resume(struct xe_gt *gt)
xe_gt_idle_enable_pg(gt);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
xe_gt_dbg(gt, "resumed\n");
return 0;
-err_msg:
- err = -ETIMEDOUT;
err_force_wake:
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+err_msg:
xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(err));
return err;
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index db540c8be6c7..656c2ab6ca9f 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -432,6 +432,7 @@ static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue)
#define PF_MULTIPLIER 8
pf_queue->num_dw =
(num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER;
+ pf_queue->num_dw = roundup_pow_of_two(pf_queue->num_dw);
#undef PF_MULTIPLIER
pf_queue->gt = gt;
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
index 905f409db74b..57e9eddc092e 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
@@ -5,14 +5,20 @@
#include <drm/drm_managed.h>
+#include "regs/xe_guc_regs.h"
#include "regs/xe_regs.h"
+#include "xe_gt.h"
#include "xe_gt_sriov_pf.h"
#include "xe_gt_sriov_pf_config.h"
#include "xe_gt_sriov_pf_control.h"
#include "xe_gt_sriov_pf_helpers.h"
#include "xe_gt_sriov_pf_service.h"
+#include "xe_gt_sriov_printk.h"
#include "xe_mmio.h"
+#include "xe_pm.h"
+
+static void pf_worker_restart_func(struct work_struct *w);
/*
* VF's metadata is maintained in the flexible array where:
@@ -38,6 +44,11 @@ static int pf_alloc_metadata(struct xe_gt *gt)
return 0;
}
+static void pf_init_workers(struct xe_gt *gt)
+{
+ INIT_WORK(&gt->sriov.pf.workers.restart, pf_worker_restart_func);
+}
+
/**
* xe_gt_sriov_pf_init_early - Prepare SR-IOV PF data structures on PF.
* @gt: the &xe_gt to initialize
@@ -62,6 +73,8 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt)
if (err)
return err;
+ pf_init_workers(gt);
+
return 0;
}
@@ -89,14 +102,111 @@ void xe_gt_sriov_pf_init_hw(struct xe_gt *gt)
xe_gt_sriov_pf_service_update(gt);
}
+static u32 pf_get_vf_regs_stride(struct xe_device *xe)
+{
+ return GRAPHICS_VERx100(xe) > 1200 ? 0x400 : 0x1000;
+}
+
+static struct xe_reg xe_reg_vf_to_pf(struct xe_reg vf_reg, unsigned int vfid, u32 stride)
+{
+ struct xe_reg pf_reg = vf_reg;
+
+ pf_reg.vf = 0;
+ pf_reg.addr += stride * vfid;
+
+ return pf_reg;
+}
+
+static void pf_clear_vf_scratch_regs(struct xe_gt *gt, unsigned int vfid)
+{
+ u32 stride = pf_get_vf_regs_stride(gt_to_xe(gt));
+ struct xe_reg scratch;
+ int n, count;
+
+ if (xe_gt_is_media_type(gt)) {
+ count = MED_VF_SW_FLAG_COUNT;
+ for (n = 0; n < count; n++) {
+ scratch = xe_reg_vf_to_pf(MED_VF_SW_FLAG(n), vfid, stride);
+ xe_mmio_write32(gt, scratch, 0);
+ }
+ } else {
+ count = VF_SW_FLAG_COUNT;
+ for (n = 0; n < count; n++) {
+ scratch = xe_reg_vf_to_pf(VF_SW_FLAG(n), vfid, stride);
+ xe_mmio_write32(gt, scratch, 0);
+ }
+ }
+}
+
/**
- * xe_gt_sriov_pf_restart - Restart SR-IOV support after a GT reset.
+ * xe_gt_sriov_pf_sanitize_hw() - Reset hardware state related to a VF.
* @gt: the &xe_gt
+ * @vfid: the VF identifier
*
* This function can only be called on PF.
*/
-void xe_gt_sriov_pf_restart(struct xe_gt *gt)
+void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+ pf_clear_vf_scratch_regs(gt, vfid);
+}
+
+static void pf_cancel_restart(struct xe_gt *gt)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+ if (cancel_work_sync(&gt->sriov.pf.workers.restart))
+ xe_gt_sriov_dbg_verbose(gt, "pending restart canceled!\n");
+}
+
+/**
+ * xe_gt_sriov_pf_stop_prepare() - Prepare to stop SR-IOV support.
+ * @gt: the &xe_gt
+ *
+ * This function can only be called on the PF.
+ */
+void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt)
{
+ pf_cancel_restart(gt);
+}
+
+static void pf_restart(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ xe_pm_runtime_get(xe);
xe_gt_sriov_pf_config_restart(gt);
xe_gt_sriov_pf_control_restart(gt);
+ xe_pm_runtime_put(xe);
+
+ xe_gt_sriov_dbg(gt, "restart completed\n");
+}
+
+static void pf_worker_restart_func(struct work_struct *w)
+{
+ struct xe_gt *gt = container_of(w, typeof(*gt), sriov.pf.workers.restart);
+
+ pf_restart(gt);
+}
+
+static void pf_queue_restart(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ xe_gt_assert(gt, IS_SRIOV_PF(xe));
+
+ if (!queue_work(xe->sriov.wq, &gt->sriov.pf.workers.restart))
+ xe_gt_sriov_dbg(gt, "restart already in queue!\n");
+}
+
+/**
+ * xe_gt_sriov_pf_restart - Restart SR-IOV support after a GT reset.
+ * @gt: the &xe_gt
+ *
+ * This function can only be called on PF.
+ */
+void xe_gt_sriov_pf_restart(struct xe_gt *gt)
+{
+ pf_queue_restart(gt);
}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
index f0cb726a6919..165ba31d0391 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
@@ -11,6 +11,8 @@ struct xe_gt;
#ifdef CONFIG_PCI_IOV
int xe_gt_sriov_pf_init_early(struct xe_gt *gt);
void xe_gt_sriov_pf_init_hw(struct xe_gt *gt);
+void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid);
+void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt);
void xe_gt_sriov_pf_restart(struct xe_gt *gt);
#else
static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt)
@@ -22,6 +24,10 @@ static inline void xe_gt_sriov_pf_init_hw(struct xe_gt *gt)
{
}
+static inline void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt)
+{
+}
+
static inline void xe_gt_sriov_pf_restart(struct xe_gt *gt)
{
}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
index 02f7328bd6ce..b4fd5a81aff1 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
@@ -9,6 +9,7 @@
#include "xe_device.h"
#include "xe_gt.h"
+#include "xe_gt_sriov_pf.h"
#include "xe_gt_sriov_pf_config.h"
#include "xe_gt_sriov_pf_control.h"
#include "xe_gt_sriov_pf_helpers.h"
@@ -1008,7 +1009,7 @@ static bool pf_exit_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid)
if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO))
return false;
- /* XXX: placeholder */
+ xe_gt_sriov_pf_sanitize_hw(gt, vfid);
pf_enter_vf_flr_send_finish(gt, vfid);
return true;
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
index 28e1b130bf87..a69d128c4f45 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
@@ -32,7 +32,16 @@ struct xe_gt_sriov_metadata {
};
/**
+ * struct xe_gt_sriov_pf_workers - GT level workers used by the PF.
+ */
+struct xe_gt_sriov_pf_workers {
+ /** @restart: worker that executes actions post GT reset */
+ struct work_struct restart;
+};
+
+/**
* struct xe_gt_sriov_pf - GT level PF virtualization data.
+ * @workers: workers data.
* @service: service data.
* @control: control data.
* @policy: policy data.
@@ -40,6 +49,7 @@ struct xe_gt_sriov_metadata {
* @vfs: metadata for all VFs.
*/
struct xe_gt_sriov_pf {
+ struct xe_gt_sriov_pf_workers workers;
struct xe_gt_sriov_pf_service service;
struct xe_gt_sriov_pf_control control;
struct xe_gt_sriov_pf_policy policy;
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 52df28032a6f..96373cdb366b 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -985,7 +985,7 @@ timeout:
BUILD_BUG_ON(FIELD_MAX(GUC_HXG_MSG_0_TYPE) != GUC_HXG_TYPE_RESPONSE_SUCCESS);
BUILD_BUG_ON((GUC_HXG_TYPE_RESPONSE_SUCCESS ^ GUC_HXG_TYPE_RESPONSE_FAILURE) != 1);
- ret = xe_mmio_wait32(gt, reply_reg, resp_mask, resp_mask,
+ ret = xe_mmio_wait32(gt, reply_reg, resp_mask, resp_mask,
1000000, &header, false);
if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) !=
@@ -1175,7 +1175,7 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
- xe_guc_ct_print(&guc->ct, p, false);
+ xe_guc_ct_print(&guc->ct, p);
xe_guc_submit_print(guc, p);
}
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 1f74f6bd50f3..f1ce4e14dcb5 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -25,12 +25,53 @@
#include "xe_gt_sriov_pf_monitor.h"
#include "xe_gt_tlb_invalidation.h"
#include "xe_guc.h"
+#include "xe_guc_log.h"
#include "xe_guc_relay.h"
#include "xe_guc_submit.h"
#include "xe_map.h"
#include "xe_pm.h"
#include "xe_trace_guc.h"
+static void receive_g2h(struct xe_guc_ct *ct);
+static void g2h_worker_func(struct work_struct *w);
+static void safe_mode_worker_func(struct work_struct *w);
+static void ct_exit_safe_mode(struct xe_guc_ct *ct);
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+enum {
+ /* Internal states, not error conditions */
+ CT_DEAD_STATE_REARM, /* 0x0001 */
+ CT_DEAD_STATE_CAPTURE, /* 0x0002 */
+
+ /* Error conditions */
+ CT_DEAD_SETUP, /* 0x0004 */
+ CT_DEAD_H2G_WRITE, /* 0x0008 */
+ CT_DEAD_H2G_HAS_ROOM, /* 0x0010 */
+ CT_DEAD_G2H_READ, /* 0x0020 */
+ CT_DEAD_G2H_RECV, /* 0x0040 */
+ CT_DEAD_G2H_RELEASE, /* 0x0080 */
+ CT_DEAD_DEADLOCK, /* 0x0100 */
+ CT_DEAD_PROCESS_FAILED, /* 0x0200 */
+ CT_DEAD_FAST_G2H, /* 0x0400 */
+ CT_DEAD_PARSE_G2H_RESPONSE, /* 0x0800 */
+ CT_DEAD_PARSE_G2H_UNKNOWN, /* 0x1000 */
+ CT_DEAD_PARSE_G2H_ORIGIN, /* 0x2000 */
+ CT_DEAD_PARSE_G2H_TYPE, /* 0x4000 */
+};
+
+static void ct_dead_worker_func(struct work_struct *w);
+static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reason_code);
+
+#define CT_DEAD(ct, ctb, reason_code) ct_dead_capture((ct), (ctb), CT_DEAD_##reason_code)
+#else
+#define CT_DEAD(ct, ctb, reason) \
+ do { \
+ struct guc_ctb *_ctb = (ctb); \
+ if (_ctb) \
+ _ctb->info.broken = true; \
+ } while (0)
+#endif
+
/* Used when a CT send wants to block and / or receive data */
struct g2h_fence {
u32 *response_buffer;
@@ -147,14 +188,11 @@ static void guc_ct_fini(struct drm_device *drm, void *arg)
{
struct xe_guc_ct *ct = arg;
+ ct_exit_safe_mode(ct);
destroy_workqueue(ct->g2h_wq);
xa_destroy(&ct->fence_lookup);
}
-static void receive_g2h(struct xe_guc_ct *ct);
-static void g2h_worker_func(struct work_struct *w);
-static void safe_mode_worker_func(struct work_struct *w);
-
static void primelockdep(struct xe_guc_ct *ct)
{
if (!IS_ENABLED(CONFIG_LOCKDEP))
@@ -182,7 +220,11 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
spin_lock_init(&ct->fast_lock);
xa_init(&ct->fence_lookup);
INIT_WORK(&ct->g2h_worker, g2h_worker_func);
- INIT_DELAYED_WORK(&ct->safe_mode_worker, safe_mode_worker_func);
+ INIT_DELAYED_WORK(&ct->safe_mode_worker, safe_mode_worker_func);
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+ spin_lock_init(&ct->dead.lock);
+ INIT_WORK(&ct->dead.worker, ct_dead_worker_func);
+#endif
init_waitqueue_head(&ct->wq);
init_waitqueue_head(&ct->g2h_fence_wq);
@@ -419,10 +461,22 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct)
if (ct_needs_safe_mode(ct))
ct_enter_safe_mode(ct);
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+ /*
+ * The CT has now been reset so the dumper can be re-armed
+ * after any existing dead state has been dumped.
+ */
+ spin_lock_irq(&ct->dead.lock);
+ if (ct->dead.reason)
+ ct->dead.reason |= (1 << CT_DEAD_STATE_REARM);
+ spin_unlock_irq(&ct->dead.lock);
+#endif
+
return 0;
err_out:
xe_gt_err(gt, "Failed to enable GuC CT (%pe)\n", ERR_PTR(err));
+ CT_DEAD(ct, NULL, SETUP);
return err;
}
@@ -469,6 +523,19 @@ static bool h2g_has_room(struct xe_guc_ct *ct, u32 cmd_len)
if (cmd_len > h2g->info.space) {
h2g->info.head = desc_read(ct_to_xe(ct), h2g, head);
+
+ if (h2g->info.head > h2g->info.size) {
+ struct xe_device *xe = ct_to_xe(ct);
+ u32 desc_status = desc_read(xe, h2g, status);
+
+ desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+
+ xe_gt_err(ct_to_gt(ct), "CT: invalid head offset %u >= %u)\n",
+ h2g->info.head, h2g->info.size);
+ CT_DEAD(ct, h2g, H2G_HAS_ROOM);
+ return false;
+ }
+
h2g->info.space = CIRC_SPACE(h2g->info.tail, h2g->info.head,
h2g->info.size) -
h2g->info.resv_space;
@@ -524,10 +591,24 @@ static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h)
static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
{
+ bool bad = false;
+
lockdep_assert_held(&ct->fast_lock);
- xe_gt_assert(ct_to_gt(ct), ct->ctbs.g2h.info.space + g2h_len <=
- ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space);
- xe_gt_assert(ct_to_gt(ct), ct->g2h_outstanding);
+
+ bad = ct->ctbs.g2h.info.space + g2h_len >
+ ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space;
+ bad |= !ct->g2h_outstanding;
+
+ if (bad) {
+ xe_gt_err(ct_to_gt(ct), "Invalid G2H release: %d + %d vs %d - %d -> %d vs %d, outstanding = %d!\n",
+ ct->ctbs.g2h.info.space, g2h_len,
+ ct->ctbs.g2h.info.size, ct->ctbs.g2h.info.resv_space,
+ ct->ctbs.g2h.info.space + g2h_len,
+ ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space,
+ ct->g2h_outstanding);
+ CT_DEAD(ct, &ct->ctbs.g2h, G2H_RELEASE);
+ return;
+ }
ct->ctbs.g2h.info.space += g2h_len;
if (!--ct->g2h_outstanding)
@@ -554,12 +635,43 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
u32 full_len;
struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&h2g->cmds,
tail * sizeof(u32));
+ u32 desc_status;
full_len = len + GUC_CTB_HDR_LEN;
lockdep_assert_held(&ct->lock);
xe_gt_assert(gt, full_len <= GUC_CTB_MSG_MAX_LEN);
- xe_gt_assert(gt, tail <= h2g->info.size);
+
+ desc_status = desc_read(xe, h2g, status);
+ if (desc_status) {
+ xe_gt_err(gt, "CT write: non-zero status: %u\n", desc_status);
+ goto corrupted;
+ }
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+ u32 desc_tail = desc_read(xe, h2g, tail);
+ u32 desc_head = desc_read(xe, h2g, head);
+
+ if (tail != desc_tail) {
+ desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_MISMATCH);
+ xe_gt_err(gt, "CT write: tail was modified %u != %u\n", desc_tail, tail);
+ goto corrupted;
+ }
+
+ if (tail > h2g->info.size) {
+ desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+ xe_gt_err(gt, "CT write: tail out of range: %u vs %u\n",
+ tail, h2g->info.size);
+ goto corrupted;
+ }
+
+ if (desc_head >= h2g->info.size) {
+ desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+ xe_gt_err(gt, "CT write: invalid head offset %u >= %u)\n",
+ desc_head, h2g->info.size);
+ goto corrupted;
+ }
+ }
/* Command will wrap, zero fill (NOPs), return and check credits again */
if (tail + full_len > h2g->info.size) {
@@ -612,6 +724,10 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
desc_read(xe, h2g, head), h2g->info.tail);
return 0;
+
+corrupted:
+ CT_DEAD(ct, &ct->ctbs.h2g, H2G_WRITE);
+ return -EPIPE;
}
/*
@@ -719,7 +835,6 @@ static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
{
struct xe_device *xe = ct_to_xe(ct);
struct xe_gt *gt = ct_to_gt(ct);
- struct drm_printer p = xe_gt_info_printer(gt);
unsigned int sleep_period_ms = 1;
int ret;
@@ -772,8 +887,13 @@ try_again:
goto broken;
#undef g2h_avail
- if (dequeue_one_g2h(ct) < 0)
+ ret = dequeue_one_g2h(ct);
+ if (ret < 0) {
+ if (ret != -ECANCELED)
+ xe_gt_err(ct_to_gt(ct), "CTB receive failed (%pe)",
+ ERR_PTR(ret));
goto broken;
+ }
goto try_again;
}
@@ -782,8 +902,7 @@ try_again:
broken:
xe_gt_err(gt, "No forward process on H2G, reset required\n");
- xe_guc_ct_print(ct, &p, true);
- ct->ctbs.h2g.info.broken = true;
+ CT_DEAD(ct, &ct->ctbs.h2g, DEADLOCK);
return -EDEADLK;
}
@@ -851,7 +970,7 @@ static bool retry_failure(struct xe_guc_ct *ct, int ret)
#define ct_alive(ct) \
(xe_guc_ct_enabled(ct) && !ct->ctbs.h2g.info.broken && \
!ct->ctbs.g2h.info.broken)
- if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct), HZ * 5))
+ if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct), HZ * 5))
return false;
#undef ct_alive
@@ -1049,6 +1168,7 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
else
xe_gt_err(gt, "unexpected response %u for FAST_REQ H2G fence 0x%x!\n",
type, fence);
+ CT_DEAD(ct, NULL, PARSE_G2H_RESPONSE);
return -EPROTO;
}
@@ -1056,6 +1176,7 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
g2h_fence = xa_erase(&ct->fence_lookup, fence);
if (unlikely(!g2h_fence)) {
/* Don't tear down channel, as send could've timed out */
+ /* CT_DEAD(ct, NULL, PARSE_G2H_UNKNOWN); */
xe_gt_warn(gt, "G2H fence (%u) not found!\n", fence);
g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
return 0;
@@ -1100,7 +1221,7 @@ static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
if (unlikely(origin != GUC_HXG_ORIGIN_GUC)) {
xe_gt_err(gt, "G2H channel broken on read, origin=%u, reset required\n",
origin);
- ct->ctbs.g2h.info.broken = true;
+ CT_DEAD(ct, &ct->ctbs.g2h, PARSE_G2H_ORIGIN);
return -EPROTO;
}
@@ -1118,7 +1239,7 @@ static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
default:
xe_gt_err(gt, "G2H channel broken on read, type=%u, reset required\n",
type);
- ct->ctbs.g2h.info.broken = true;
+ CT_DEAD(ct, &ct->ctbs.g2h, PARSE_G2H_TYPE);
ret = -EOPNOTSUPP;
}
@@ -1195,9 +1316,11 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action);
}
- if (ret)
+ if (ret) {
xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n",
action, ERR_PTR(ret));
+ CT_DEAD(ct, NULL, PROCESS_FAILED);
+ }
return 0;
}
@@ -1207,7 +1330,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
struct xe_device *xe = ct_to_xe(ct);
struct xe_gt *gt = ct_to_gt(ct);
struct guc_ctb *g2h = &ct->ctbs.g2h;
- u32 tail, head, len;
+ u32 tail, head, len, desc_status;
s32 avail;
u32 action;
u32 *hxg;
@@ -1226,6 +1349,63 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
xe_gt_assert(gt, xe_guc_ct_enabled(ct));
+ desc_status = desc_read(xe, g2h, status);
+ if (desc_status) {
+ if (desc_status & GUC_CTB_STATUS_DISABLED) {
+ /*
+ * Potentially valid if a CLIENT_RESET request resulted in
+ * contexts/engines being reset. But should never happen as
+ * no contexts should be active when CLIENT_RESET is sent.
+ */
+ xe_gt_err(gt, "CT read: unexpected G2H after GuC has stopped!\n");
+ desc_status &= ~GUC_CTB_STATUS_DISABLED;
+ }
+
+ if (desc_status) {
+ xe_gt_err(gt, "CT read: non-zero status: %u\n", desc_status);
+ goto corrupted;
+ }
+ }
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+ u32 desc_tail = desc_read(xe, g2h, tail);
+ /*
+ u32 desc_head = desc_read(xe, g2h, head);
+
+ * info.head and desc_head are updated back-to-back at the end of
+ * this function and nowhere else. Hence, they cannot be different
+ * unless two g2h_read calls are running concurrently. Which is not
+ * possible because it is guarded by ct->fast_lock. And yet, some
+ * discrete platforms are reguarly hitting this error :(.
+ *
+ * desc_head rolling backwards shouldn't cause any noticeable
+ * problems - just a delay in GuC being allowed to proceed past that
+ * point in the queue. So for now, just disable the error until it
+ * can be root caused.
+ *
+ if (g2h->info.head != desc_head) {
+ desc_write(xe, g2h, status, desc_status | GUC_CTB_STATUS_MISMATCH);
+ xe_gt_err(gt, "CT read: head was modified %u != %u\n",
+ desc_head, g2h->info.head);
+ goto corrupted;
+ }
+ */
+
+ if (g2h->info.head > g2h->info.size) {
+ desc_write(xe, g2h, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+ xe_gt_err(gt, "CT read: head out of range: %u vs %u\n",
+ g2h->info.head, g2h->info.size);
+ goto corrupted;
+ }
+
+ if (desc_tail >= g2h->info.size) {
+ desc_write(xe, g2h, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
+ xe_gt_err(gt, "CT read: invalid tail offset %u >= %u)\n",
+ desc_tail, g2h->info.size);
+ goto corrupted;
+ }
+ }
+
/* Calculate DW available to read */
tail = desc_read(xe, g2h, tail);
avail = tail - g2h->info.head;
@@ -1242,9 +1422,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
if (len > avail) {
xe_gt_err(gt, "G2H channel broken on read, avail=%d, len=%d, reset required\n",
avail, len);
- g2h->info.broken = true;
-
- return -EPROTO;
+ goto corrupted;
}
head = (g2h->info.head + 1) % g2h->info.size;
@@ -1290,6 +1468,10 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
action, len, g2h->info.head, tail);
return len;
+
+corrupted:
+ CT_DEAD(ct, &ct->ctbs.g2h, G2H_READ);
+ return -EPROTO;
}
static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len)
@@ -1316,9 +1498,11 @@ static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len)
xe_gt_warn(gt, "NOT_POSSIBLE");
}
- if (ret)
+ if (ret) {
xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n",
action, ERR_PTR(ret));
+ CT_DEAD(ct, NULL, FAST_G2H);
+ }
}
/**
@@ -1378,7 +1562,6 @@ static int dequeue_one_g2h(struct xe_guc_ct *ct)
static void receive_g2h(struct xe_guc_ct *ct)
{
- struct xe_gt *gt = ct_to_gt(ct);
bool ongoing;
int ret;
@@ -1415,9 +1598,8 @@ static void receive_g2h(struct xe_guc_ct *ct)
mutex_unlock(&ct->lock);
if (unlikely(ret == -EPROTO || ret == -EOPNOTSUPP)) {
- struct drm_printer p = xe_gt_info_printer(gt);
-
- xe_guc_ct_print(ct, &p, false);
+ xe_gt_err(ct_to_gt(ct), "CT dequeue failed: %d", ret);
+ CT_DEAD(ct, NULL, G2H_RECV);
kick_reset(ct);
}
} while (ret == 1);
@@ -1445,9 +1627,8 @@ static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb,
snapshot->cmds = kmalloc_array(ctb->info.size, sizeof(u32),
atomic ? GFP_ATOMIC : GFP_KERNEL);
-
if (!snapshot->cmds) {
- drm_err(&xe->drm, "Skipping CTB commands snapshot. Only CTB info will be available.\n");
+ drm_err(&xe->drm, "Skipping CTB commands snapshot. Only CT info will be available.\n");
return;
}
@@ -1528,7 +1709,7 @@ struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct,
atomic ? GFP_ATOMIC : GFP_KERNEL);
if (!snapshot) {
- drm_err(&xe->drm, "Skipping CTB snapshot entirely.\n");
+ xe_gt_err(ct_to_gt(ct), "Skipping CTB snapshot entirely.\n");
return NULL;
}
@@ -1592,16 +1773,119 @@ void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot)
* xe_guc_ct_print - GuC CT Print.
* @ct: GuC CT.
* @p: drm_printer where it will be printed out.
- * @atomic: Boolean to indicate if this is called from atomic context like
- * reset or CTB handler or from some regular path like debugfs.
*
* This function quickly capture a snapshot and immediately print it out.
*/
-void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic)
+void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p)
{
struct xe_guc_ct_snapshot *snapshot;
- snapshot = xe_guc_ct_snapshot_capture(ct, atomic);
+ snapshot = xe_guc_ct_snapshot_capture(ct, false);
xe_guc_ct_snapshot_print(snapshot, p);
xe_guc_ct_snapshot_free(snapshot);
}
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reason_code)
+{
+ struct xe_guc_log_snapshot *snapshot_log;
+ struct xe_guc_ct_snapshot *snapshot_ct;
+ struct xe_guc *guc = ct_to_guc(ct);
+ unsigned long flags;
+ bool have_capture;
+
+ if (ctb)
+ ctb->info.broken = true;
+
+ /* Ignore further errors after the first dump until a reset */
+ if (ct->dead.reported)
+ return;
+
+ spin_lock_irqsave(&ct->dead.lock, flags);
+
+ /* And only capture one dump at a time */
+ have_capture = ct->dead.reason & (1 << CT_DEAD_STATE_CAPTURE);
+ ct->dead.reason |= (1 << reason_code) |
+ (1 << CT_DEAD_STATE_CAPTURE);
+
+ spin_unlock_irqrestore(&ct->dead.lock, flags);
+
+ if (have_capture)
+ return;
+
+ snapshot_log = xe_guc_log_snapshot_capture(&guc->log, true);
+ snapshot_ct = xe_guc_ct_snapshot_capture((ct), true);
+
+ spin_lock_irqsave(&ct->dead.lock, flags);
+
+ if (ct->dead.snapshot_log || ct->dead.snapshot_ct) {
+ xe_gt_err(ct_to_gt(ct), "Got unexpected dead CT capture!\n");
+ xe_guc_log_snapshot_free(snapshot_log);
+ xe_guc_ct_snapshot_free(snapshot_ct);
+ } else {
+ ct->dead.snapshot_log = snapshot_log;
+ ct->dead.snapshot_ct = snapshot_ct;
+ }
+
+ spin_unlock_irqrestore(&ct->dead.lock, flags);
+
+ queue_work(system_unbound_wq, &(ct)->dead.worker);
+}
+
+static void ct_dead_print(struct xe_dead_ct *dead)
+{
+ struct xe_guc_ct *ct = container_of(dead, struct xe_guc_ct, dead);
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ static int g_count;
+ struct drm_printer ip = xe_gt_info_printer(gt);
+ struct drm_printer lp = drm_line_printer(&ip, "Capture", ++g_count);
+
+ if (!dead->reason) {
+ xe_gt_err(gt, "CTB is dead for no reason!?\n");
+ return;
+ }
+
+ drm_printf(&lp, "CTB is dead - reason=0x%X\n", dead->reason);
+
+ /* Can't generate a genuine core dump at this point, so just do the good bits */
+ drm_puts(&lp, "**** Xe Device Coredump ****\n");
+ xe_device_snapshot_print(xe, &lp);
+
+ drm_printf(&lp, "**** GT #%d ****\n", gt->info.id);
+ drm_printf(&lp, "\tTile: %d\n", gt->tile->id);
+
+ drm_puts(&lp, "**** GuC Log ****\n");
+ xe_guc_log_snapshot_print(dead->snapshot_log, &lp);
+
+ drm_puts(&lp, "**** GuC CT ****\n");
+ xe_guc_ct_snapshot_print(dead->snapshot_ct, &lp);
+
+ drm_puts(&lp, "Done.\n");
+}
+
+static void ct_dead_worker_func(struct work_struct *w)
+{
+ struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, dead.worker);
+
+ if (!ct->dead.reported) {
+ ct->dead.reported = true;
+ ct_dead_print(&ct->dead);
+ }
+
+ spin_lock_irq(&ct->dead.lock);
+
+ xe_guc_log_snapshot_free(ct->dead.snapshot_log);
+ ct->dead.snapshot_log = NULL;
+ xe_guc_ct_snapshot_free(ct->dead.snapshot_ct);
+ ct->dead.snapshot_ct = NULL;
+
+ if (ct->dead.reason & (1 << CT_DEAD_STATE_REARM)) {
+ /* A reset has occurred so re-arm the error reporting */
+ ct->dead.reason = 0;
+ ct->dead.reported = false;
+ }
+
+ spin_unlock_irq(&ct->dead.lock);
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
index 13e316668e90..c7ac9407b861 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct.h
@@ -21,7 +21,7 @@ xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, bool atomic);
void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
struct drm_printer *p);
void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot);
-void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic);
+void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p);
static inline bool xe_guc_ct_initialized(struct xe_guc_ct *ct)
{
diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h
index 761cb9031298..85e127ec91d7 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h
@@ -86,6 +86,24 @@ enum xe_guc_ct_state {
XE_GUC_CT_STATE_ENABLED,
};
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+/** struct xe_dead_ct - Information for debugging a dead CT */
+struct xe_dead_ct {
+ /** @lock: protects memory allocation/free operations, and @reason updates */
+ spinlock_t lock;
+ /** @reason: bit mask of CT_DEAD_* reason codes */
+ unsigned int reason;
+ /** @reported: for preventing multiple dumps per error sequence */
+ bool reported;
+ /** @worker: worker thread to get out of interrupt context before dumping */
+ struct work_struct worker;
+ /** snapshot_ct: copy of CT state and CTB content at point of error */
+ struct xe_guc_ct_snapshot *snapshot_ct;
+ /** snapshot_log: copy of GuC log at point of error */
+ struct xe_guc_log_snapshot *snapshot_log;
+};
+#endif
+
/**
* struct xe_guc_ct - GuC command transport (CT) layer
*
@@ -128,6 +146,11 @@ struct xe_guc_ct {
u32 msg[GUC_CTB_MSG_MAX_LEN];
/** @fast_msg: Message buffer */
u32 fast_msg[GUC_CTB_MSG_MAX_LEN];
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+ /** @dead: information for debugging dead CTs */
+ struct xe_dead_ct dead;
+#endif
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index f978da8be35c..af02803c145b 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -6,6 +6,9 @@
#include "xe_guc_pc.h"
#include <linux/delay.h>
+#include <linux/jiffies.h>
+#include <linux/ktime.h>
+#include <linux/wait_bit.h>
#include <drm/drm_managed.h>
#include <generated/xe_wa_oob.h>
@@ -47,6 +50,12 @@
#define LNL_MERT_FREQ_CAP 800
#define BMG_MERT_FREQ_CAP 2133
+#define BMG_MIN_FREQ 1200
+#define BMG_MERT_FLUSH_FREQ_CAP 2600
+
+#define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */
+#define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */
+#define SLPC_ACT_FREQ_TIMEOUT_MS 100
/**
* DOC: GuC Power Conservation (PC)
@@ -133,6 +142,36 @@ static int wait_for_pc_state(struct xe_guc_pc *pc,
return -ETIMEDOUT;
}
+static int wait_for_flush_complete(struct xe_guc_pc *pc)
+{
+ const unsigned long timeout = msecs_to_jiffies(30);
+
+ if (!wait_var_event_timeout(&pc->flush_freq_limit,
+ !atomic_read(&pc->flush_freq_limit),
+ timeout))
+ return -ETIMEDOUT;
+
+ return 0;
+}
+
+static int wait_for_act_freq_limit(struct xe_guc_pc *pc, u32 freq)
+{
+ int timeout_us = SLPC_ACT_FREQ_TIMEOUT_MS * USEC_PER_MSEC;
+ int slept, wait = 10;
+
+ for (slept = 0; slept < timeout_us;) {
+ if (xe_guc_pc_get_act_freq(pc) <= freq)
+ return 0;
+
+ usleep_range(wait, wait << 1);
+ slept += wait;
+ wait <<= 1;
+ if (slept + wait > timeout_us)
+ wait = timeout_us - slept;
+ }
+
+ return -ETIMEDOUT;
+}
static int pc_action_reset(struct xe_guc_pc *pc)
{
struct xe_guc_ct *ct = pc_to_ct(pc);
@@ -584,6 +623,11 @@ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
{
int ret;
+ if (XE_WA(pc_to_gt(pc), 22019338487)) {
+ if (wait_for_flush_complete(pc) != 0)
+ return -EAGAIN;
+ }
+
mutex_lock(&pc->freq_lock);
if (!pc->freq_ready) {
/* Might be in the middle of a gt reset */
@@ -793,6 +837,106 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc)
return ret;
}
+static bool needs_flush_freq_limit(struct xe_guc_pc *pc)
+{
+ struct xe_gt *gt = pc_to_gt(pc);
+
+ return XE_WA(gt, 22019338487) &&
+ pc->rp0_freq > BMG_MERT_FLUSH_FREQ_CAP;
+}
+
+/**
+ * xe_guc_pc_apply_flush_freq_limit() - Limit max GT freq during L2 flush
+ * @pc: the xe_guc_pc object
+ *
+ * As per the WA, reduce max GT frequency during L2 cache flush
+ */
+void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc)
+{
+ struct xe_gt *gt = pc_to_gt(pc);
+ u32 max_freq;
+ int ret;
+
+ if (!needs_flush_freq_limit(pc))
+ return;
+
+ mutex_lock(&pc->freq_lock);
+
+ if (!pc->freq_ready) {
+ mutex_unlock(&pc->freq_lock);
+ return;
+ }
+
+ ret = pc_action_query_task_state(pc);
+ if (ret) {
+ mutex_unlock(&pc->freq_lock);
+ return;
+ }
+
+ max_freq = pc_get_max_freq(pc);
+ if (max_freq > BMG_MERT_FLUSH_FREQ_CAP) {
+ ret = pc_set_max_freq(pc, BMG_MERT_FLUSH_FREQ_CAP);
+ if (ret) {
+ xe_gt_err_once(gt, "Failed to cap max freq on flush to %u, %pe\n",
+ BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret));
+ mutex_unlock(&pc->freq_lock);
+ return;
+ }
+
+ atomic_set(&pc->flush_freq_limit, 1);
+
+ /*
+ * If user has previously changed max freq, stash that value to
+ * restore later, otherwise use the current max. New user
+ * requests wait on flush.
+ */
+ if (pc->user_requested_max != 0)
+ pc->stashed_max_freq = pc->user_requested_max;
+ else
+ pc->stashed_max_freq = max_freq;
+ }
+
+ mutex_unlock(&pc->freq_lock);
+
+ /*
+ * Wait for actual freq to go below the flush cap: even if the previous
+ * max was below cap, the current one might still be above it
+ */
+ ret = wait_for_act_freq_limit(pc, BMG_MERT_FLUSH_FREQ_CAP);
+ if (ret)
+ xe_gt_err_once(gt, "Actual freq did not reduce to %u, %pe\n",
+ BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret));
+}
+
+/**
+ * xe_guc_pc_remove_flush_freq_limit() - Remove max GT freq limit after L2 flush completes.
+ * @pc: the xe_guc_pc object
+ *
+ * Retrieve the previous GT max frequency value.
+ */
+void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc)
+{
+ struct xe_gt *gt = pc_to_gt(pc);
+ int ret = 0;
+
+ if (!needs_flush_freq_limit(pc))
+ return;
+
+ if (!atomic_read(&pc->flush_freq_limit))
+ return;
+
+ mutex_lock(&pc->freq_lock);
+
+ ret = pc_set_max_freq(&gt->uc.guc.pc, pc->stashed_max_freq);
+ if (ret)
+ xe_gt_err_once(gt, "Failed to restore max freq %u:%d",
+ pc->stashed_max_freq, ret);
+
+ atomic_set(&pc->flush_freq_limit, 0);
+ mutex_unlock(&pc->freq_lock);
+ wake_up_var(&pc->flush_freq_limit);
+}
+
static int pc_set_mert_freq_cap(struct xe_guc_pc *pc)
{
int ret = 0;
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
index efda432fadfc..7154b3aab0d8 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc.h
@@ -34,5 +34,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc);
void xe_guc_pc_init_early(struct xe_guc_pc *pc);
int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc);
void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc);
+void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc);
+void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc);
#endif /* _XE_GUC_PC_H_ */
diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h
index 13810be015db..5b86d91296cb 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h
@@ -15,6 +15,8 @@
struct xe_guc_pc {
/** @bo: GGTT buffer object that is shared with GuC PC */
struct xe_bo *bo;
+ /** @flush_freq_limit: 1 when max freq changes are limited by driver */
+ atomic_t flush_freq_limit;
/** @rp0_freq: HW RP0 frequency - The Maximum one */
u32 rp0_freq;
/** @rpe_freq: HW RPe frequency - The Efficient one */
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 5f2c368c35ad..14c3a476597a 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -173,7 +173,7 @@ void xe_irq_enable_hwe(struct xe_gt *gt)
if (ccs_mask & (BIT(0)|BIT(1)))
xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK, ~dmask);
if (ccs_mask & (BIT(2)|BIT(3)))
- xe_mmio_write32(gt, CCS2_CCS3_INTR_MASK, ~dmask);
+ xe_mmio_write32(gt, CCS2_CCS3_INTR_MASK, ~dmask);
}
if (xe_gt_is_media_type(gt) || MEDIA_VER(xe) < 13) {
@@ -504,7 +504,7 @@ static void gt_irq_reset(struct xe_tile *tile)
if (ccs_mask & (BIT(0)|BIT(1)))
xe_mmio_write32(mmio, CCS0_CCS1_INTR_MASK, ~0);
if (ccs_mask & (BIT(2)|BIT(3)))
- xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, ~0);
+ xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, ~0);
if ((tile->media_gt &&
xe_hw_engine_mask_per_class(tile->media_gt, XE_ENGINE_CLASS_OTHER)) ||
diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c
index 8999ac511555..485658f69fba 100644
--- a/drivers/gpu/drm/xe/xe_lmtt.c
+++ b/drivers/gpu/drm/xe/xe_lmtt.c
@@ -78,6 +78,9 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level
}
lmtt_assert(lmtt, xe_bo_is_vram(bo));
+ lmtt_debug(lmtt, "level=%u addr=%#llx\n", level, (u64)xe_bo_main_addr(bo, XE_PAGE_SIZE));
+
+ xe_map_memset(lmtt_to_xe(lmtt), &bo->vmap, 0, 0, bo->size);
pt->level = level;
pt->bo = bo;
@@ -91,6 +94,9 @@ out:
static void lmtt_pt_free(struct xe_lmtt_pt *pt)
{
+ lmtt_debug(&pt->bo->tile->sriov.pf.lmtt, "level=%u addr=%llx\n",
+ pt->level, (u64)xe_bo_main_addr(pt->bo, XE_PAGE_SIZE));
+
xe_bo_unpin_map_no_vm(pt->bo);
kfree(pt);
}
@@ -226,9 +232,14 @@ static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt,
switch (lmtt->ops->lmtt_pte_size(level)) {
case sizeof(u32):
+ lmtt_assert(lmtt, !overflows_type(pte, u32));
+ lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u32), u32));
+
xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u32), u32, pte);
break;
case sizeof(u64):
+ lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u64), u64));
+
xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u64), u64, pte);
break;
default:
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 6431697c6169..c2da2691fd2b 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -860,7 +860,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it))
xe_res_next(&src_it, src_L0);
else
- emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs,
+ emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs || use_comp_pat,
&src_it, src_L0, src);
if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it))
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 23028afbbe1d..da09c26249f5 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -164,7 +164,6 @@ static const struct xe_graphics_desc graphics_xelpg = {
.has_asid = 1, \
.has_atomic_enable_pte_bit = 1, \
.has_flat_ccs = 1, \
- .has_indirect_ring_state = 1, \
.has_range_tlb_invalidation = 1, \
.has_usm = 1, \
.va_bits = 48, \
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 06f50aa31326..46c73ff10c74 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -682,11 +682,13 @@ void xe_pm_assert_unbounded_bridge(struct xe_device *xe)
}
/**
- * xe_pm_set_vram_threshold - Set a vram threshold for allowing/blocking D3Cold
+ * xe_pm_set_vram_threshold - Set a VRAM threshold for allowing/blocking D3Cold
* @xe: xe device instance
- * @threshold: VRAM size in bites for the D3cold threshold
+ * @threshold: VRAM size in MiB for the D3cold threshold
*
- * Returns 0 for success, negative error code otherwise.
+ * Return:
+ * * 0 - success
+ * * -EINVAL - invalid argument
*/
int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold)
{
diff --git a/drivers/gpu/drm/xe/xe_trace_bo.h b/drivers/gpu/drm/xe/xe_trace_bo.h
index ba0f61e7d2d6..4ff023b5d040 100644
--- a/drivers/gpu/drm/xe/xe_trace_bo.h
+++ b/drivers/gpu/drm/xe/xe_trace_bo.h
@@ -189,7 +189,7 @@ DECLARE_EVENT_CLASS(xe_vm,
),
TP_printk("dev=%s, vm=%p, asid=0x%05x", __get_str(dev),
- __entry->vm, __entry->asid)
+ __entry->vm, __entry->asid)
);
DEFINE_EVENT(xe_vm, xe_vm_kill,