summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-06-27 02:41:30 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2026-06-27 02:41:30 +0300
commitfa6fe449343c3d97ed93fd01b020860c663f8807 (patch)
treeaf95b1cf04e9b9185667f76cb661d51b48f48340
parent5422e496b313b9b0b2f6df068902d6c79925d5e9 (diff)
parentb41df707b6d7b7ae6188c6fc37ba81859293cb94 (diff)
downloadlinux-fa6fe449343c3d97ed93fd01b020860c663f8807.tar.xz
Merge tag 'drm-next-2026-06-27' of https://gitlab.freedesktop.org/drm/kernel
Pull drm merge window fixes from Dave Airlie: "This is the merge window fixes from our next tree, i915/xe and amdgpu make up all of it. I've got a separate fixes pull from our fixes branch arriving after this. i915: - Fix corrupted display output on GLK, #16209 - Add missing Spectre mitigation for parallel submit IOCTL - MTL+ fix for DP resume - clear CRTC blobs after dropping refs - fix sharpness filter on DP MST xe: - Set TTM beneficial order to 9 in Xe - Several error path cleanups - Fix TDR for unstarted jobs on kernel queues - Several TLB invalidation fixes related to suspending LR queues - Some small RAS fixes - Multi-queue suspend fix for LR queues - Revert inclusion of NVL_S firmware amdgpu: - devcoredump fixes - SMU15 fix - Various irq put/get imbalance cleanup fixes - 8K panel fix - DCN3.5 fix - lockdep fix - Cleaner shader sysfs IB overflow fix - Async flip fixes - GET_MAPPING_INFO fix - CP_GFX_SHADOW fix - Ctx pstate handling fix - GTT bo move handling fixes - Old UVD BO placement fixes - GC9 mode2 reset fix - IH6.1 version fix - Soft IH ring fix amdkfd: - Fix doorbell/mmio double unpin on free - CRIU fixes - SMI event fixes - Sysfs teardown fix - Various boundary checking fixes - Various error checking fixes - SVM fix" * tag 'drm-next-2026-06-27' of https://gitlab.freedesktop.org/drm/kernel: (52 commits) drm/i915/cdclk: Fix up CDCLK_FREQ_DECIMAL without a full PLL re-enable drm/i915/gem: Add missing nospec on parallel submit slot drm/amdgpu: Use system unbound workqueue for soft IH ring amdgpu/ih6.1: Fix minor version drm/amdkfd: Use exclusive bounds for SVM split alignment checks drm/amdgpu/gfx9: Fix Ring and IB test fail after mode2 drm/amdgpu/uvd: Fix forcing MSG, FB BOs into VCPU segment when it isn't at 0 (v2) drm/amdgpu/uvd: Place VCPU BO only in VRAM for UVD 4.x and older drm/amdgpu: Fix amdgpu_bo_move() when old_mem and new_mem are both GTT drm/amdgpu: Respect placement requirements in amdgpu_gtt_mgr functions drm/amdgpu: Fix context pstate override handling drm/amdkfd: Use memdup_array_user to copy data from/to user space at kfd ioctls drm/amdkfd: check find_first_zero_bit before __set_bit on kfd->doorbell_bitmap drm/amdkfd: Let driver decide buffer size at AMDKFD_IOC_GET_DMABUF_INFO ioctl drm/amdgpu: fix recursive ww_mutex acquire in amdgpu_devcoredump_format drm/amdgpu: convert amdgpu_vm_lock_by_pasid() to drm_exec drm/amdgpu: Don't use UTS_RELEASE directly drm/amdkfd: Fix NULL deref during sysfs teardown drm/amdgpu: validate CP_GFX_SHADOW chunk size in CS pass1 drm/amdgpu: check amdgpu_vm_bo_find() result in GET_MAPPING_INFO ...
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c71
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c220
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_lockdep.c103
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c50
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c91
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c39
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v6_1.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c56
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.c8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c46
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c12
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c99
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h14
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c15
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c10
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c13
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0_0_ppt.c7
-rw-r--r--drivers/gpu/drm/i915/display/intel_atomic.c6
-rw-r--r--drivers/gpu/drm/i915/display/intel_cdclk.c41
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi.c11
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_mst.c4
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.c1
-rw-r--r--drivers/gpu/drm/radeon/r100.c13
-rw-r--r--drivers/gpu/drm/xe/Makefile4
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gtt_defs.h6
-rw-r--r--drivers/gpu/drm/xe/xe_device.c3
-rw-r--r--drivers/gpu/drm/xe/xe_guc_capture.c10
-rw-r--r--drivers/gpu/drm/xe/xe_pt.c133
44 files changed, 766 insertions, 457 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 9783a3cefb04..da325863ad76 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -558,7 +558,7 @@ uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev)
int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
struct amdgpu_device **dmabuf_adev,
- uint64_t *bo_size, void *metadata_buffer,
+ uint64_t *bo_size, void **metadata_buffer,
size_t buffer_size, uint32_t *metadata_size,
uint32_t *flags, int8_t *xcp_id)
{
@@ -593,9 +593,24 @@ int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
*dmabuf_adev = adev;
if (bo_size)
*bo_size = amdgpu_bo_size(bo);
- if (metadata_buffer)
- r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size,
- metadata_size, &metadata_flags);
+ if (metadata_buffer) {
+ /* first get metadata_size by buffer = NULL */
+ r = amdgpu_bo_get_metadata(bo, NULL, 0,
+ metadata_size, NULL);
+
+ /* user buf_size is bigger than bo metadata_size
+ * allocate a buf at kernel space and copy */
+ if (*metadata_size <= buffer_size) {
+ *metadata_buffer = kzalloc(*metadata_size, GFP_KERNEL);
+
+ if (!*metadata_buffer)
+ return -ENOMEM;
+
+ r = amdgpu_bo_get_metadata(bo, *metadata_buffer, *metadata_size,
+ NULL, &metadata_flags);
+ } else
+ r = -EINVAL;
+ }
if (flags) {
*flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
KFD_IOC_ALLOC_MEM_FLAGS_VRAM
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 5333e052d56d..e443a7277299 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -262,7 +262,7 @@ uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev);
uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev);
int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
struct amdgpu_device **dmabuf_adev,
- uint64_t *bo_size, void *metadata_buffer,
+ uint64_t *bo_size, void **metadata_buffer,
size_t buffer_size, uint32_t *metadata_size,
uint32_t *flags, int8_t *xcp_id);
int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index d54794e5b18b..35fe2c974699 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1914,13 +1914,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
mutex_lock(&mem->lock);
- /* Unpin MMIO/DOORBELL BO's that were pinned during allocation */
- if (mem->alloc_flags &
- (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
- KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
- amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo);
- }
-
mapped_to_gpu_memory = mem->mapped_to_gpu_memory;
is_imported = mem->is_imported;
mutex_unlock(&mem->lock);
@@ -1934,6 +1927,15 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
return -EBUSY;
}
+ /* At this point the BO is guaranteed to be freed, so unpin the
+ * MMIO/DOORBELL BOs that were pinned during allocation.
+ */
+ if (mem->alloc_flags &
+ (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+ KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
+ amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo);
+ }
+
/* Make sure restore workers don't access the BO any more */
mutex_lock(&process_info->lock);
if (!list_empty(&mem->validate_list))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 115b134b4cd1..c2e6495a28bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -247,13 +247,17 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
goto free_partial_kdata;
break;
+ case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
+ if (size < sizeof(struct drm_amdgpu_cs_chunk_cp_gfx_shadow))
+ goto free_partial_kdata;
+ break;
+
case AMDGPU_CHUNK_ID_DEPENDENCIES:
case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
- case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 0d7f6cd74f79..ce35b415093d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -326,7 +326,6 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
struct drm_file *filp, struct amdgpu_ctx *ctx)
{
struct amdgpu_fpriv *fpriv = filp->driver_priv;
- u32 current_stable_pstate;
int r;
r = amdgpu_ctx_priority_permit(filp, priority);
@@ -344,36 +343,21 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
ctx->generation = amdgpu_vm_generation(mgr->adev, &fpriv->vm);
ctx->init_priority = priority;
ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET;
-
- r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
- if (r)
- return r;
-
- if (mgr->adev->pm.stable_pstate_ctx)
- ctx->stable_pstate = mgr->adev->pm.stable_pstate_ctx->stable_pstate;
- else
- ctx->stable_pstate = current_stable_pstate;
+ ctx->stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE;
return 0;
}
-static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
- u32 stable_pstate)
+static int __amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
+ u32 stable_pstate)
{
struct amdgpu_device *adev = ctx->mgr->adev;
enum amd_dpm_forced_level level;
+ struct amdgpu_ctx *current_ctx;
u32 current_stable_pstate;
- int r;
+ int r = 0;
- mutex_lock(&adev->pm.stable_pstate_ctx_lock);
- if (adev->pm.stable_pstate_ctx && adev->pm.stable_pstate_ctx != ctx) {
- r = -EBUSY;
- goto done;
- }
-
- r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
- if (r || (stable_pstate == current_stable_pstate))
- goto done;
+ lockdep_assert_held(&adev->pm.stable_pstate_ctx_lock);
switch (stable_pstate) {
case AMDGPU_CTX_STABLE_PSTATE_NONE:
@@ -392,17 +376,41 @@ static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK;
break;
default:
- r = -EINVAL;
- goto done;
+ return -EINVAL;
}
+ current_ctx = adev->pm.stable_pstate_ctx;
+ if (current_ctx && current_ctx != ctx)
+ return -EBUSY;
+
+ r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
+ if (r || current_stable_pstate == stable_pstate)
+ return r;
+
r = amdgpu_dpm_force_performance_level(adev, level);
+ if (r)
+ return r;
- if (level == AMD_DPM_FORCED_LEVEL_AUTO)
- adev->pm.stable_pstate_ctx = NULL;
- else
+ if (!current_ctx) {
adev->pm.stable_pstate_ctx = ctx;
-done:
+ /*
+ * Serialized by context taking ownership for the first time
+ * while holding adev->pm.stable_pstate_ctx_lock).
+ */
+ WRITE_ONCE(ctx->stable_pstate, current_stable_pstate);
+ }
+
+ return 0;
+}
+
+static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
+ u32 stable_pstate)
+{
+ struct amdgpu_device *adev = ctx->mgr->adev;
+ int r;
+
+ mutex_lock(&adev->pm.stable_pstate_ctx_lock);
+ r = __amdgpu_ctx_set_stable_pstate(ctx, stable_pstate);
mutex_unlock(&adev->pm.stable_pstate_ctx_lock);
return r;
@@ -428,7 +436,12 @@ static void amdgpu_ctx_fini(struct kref *ref)
}
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
- amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate);
+ mutex_lock(&adev->pm.stable_pstate_ctx_lock);
+ if (adev->pm.stable_pstate_ctx == ctx) {
+ __amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate);
+ adev->pm.stable_pstate_ctx = NULL;
+ }
+ mutex_unlock(&adev->pm.stable_pstate_ctx_lock);
drm_dev_exit(idx);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
index 27830518a230..e77db76b48b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
@@ -22,8 +22,9 @@
*
*/
-#include <generated/utsrelease.h>
#include <linux/devcoredump.h>
+#include <linux/utsname.h>
+#include <drm/drm_exec.h>
#include "amdgpu_dev_coredump.h"
#include "atom.h"
@@ -207,28 +208,143 @@ static void amdgpu_devcoredump_fw_info(struct amdgpu_device *adev,
}
}
+static void
+amdgpu_devcoredump_print_ibs(struct drm_printer *p,
+ struct amdgpu_coredump_info *coredump,
+ bool sizing_pass)
+{
+ struct amdgpu_device *adev = coredump->adev;
+ struct amdgpu_bo_va_mapping *mapping;
+ struct amdgpu_bo *abo;
+ struct drm_exec exec;
+ struct amdgpu_vm *vm;
+ u32 *ib_content;
+ u64 va_start, offset;
+ u8 *kptr;
+ u32 off;
+ int r;
+
+ /*
+ * On the sizing pass there is no VM to look up and no BO to lock; the
+ * size estimate doesn't depend on whether the IB BOs are reachable.
+ * Just emit the per-IB headers (the content is not written anywhere).
+ */
+ if (sizing_pass) {
+ for (int i = 0; i < coredump->num_ibs; i++) {
+ drm_printf(p, "\nIB #%d 0x%llx %d dw\n", i,
+ coredump->ibs[i].gpu_addr,
+ coredump->ibs[i].ib_size_dw);
+ }
+ return;
+ }
+
+ /*
+ * Lock the VM root PD and every IB BO together in a single drm_exec
+ * ticket. Reserving the IB BOs one by one while the root PD is held
+ * would be a recursive reservation_ww_class_mutex acquire without a
+ * ww_acquire_ctx, which trips lockdep and self-deadlocks for IB BOs
+ * that share their dma_resv with the root PD (always-valid BOs).
+ */
+ drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 1 + coredump->num_ibs);
+ drm_exec_until_all_locked(&exec) {
+ vm = amdgpu_vm_lock_by_pasid(adev, coredump->pasid, &exec);
+ if (!vm)
+ goto unlock;
+
+ for (int i = 0; i < coredump->num_ibs; i++) {
+ u64 pfn = (coredump->ibs[i].gpu_addr &
+ AMDGPU_GMC_HOLE_MASK) / AMDGPU_GPU_PAGE_SIZE;
+
+ mapping = amdgpu_vm_bo_lookup_mapping(vm, pfn);
+ if (!mapping)
+ continue;
+
+ abo = mapping->bo_va->base.bo;
+ r = drm_exec_lock_obj(&exec, &abo->tbo.base);
+ drm_exec_retry_on_contention(&exec);
+ if (r)
+ goto unlock;
+ }
+ }
+
+ for (int i = 0; i < coredump->num_ibs; i++) {
+ bool emit_content = false;
+
+ ib_content = kvmalloc_array(coredump->ibs[i].ib_size_dw, 4,
+ GFP_KERNEL);
+ if (!ib_content)
+ continue;
+
+ va_start = coredump->ibs[i].gpu_addr & AMDGPU_GMC_HOLE_MASK;
+ mapping = amdgpu_vm_bo_lookup_mapping(vm,
+ va_start / AMDGPU_GPU_PAGE_SIZE);
+ if (!mapping)
+ goto output_ib_content;
+
+ abo = mapping->bo_va->base.bo;
+ offset = va_start - mapping->start * AMDGPU_GPU_PAGE_SIZE;
+
+ if (abo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) {
+ struct amdgpu_res_cursor cursor;
+
+ off = 0;
+
+ if (abo->tbo.resource->mem_type != TTM_PL_VRAM)
+ goto output_ib_content;
+
+ amdgpu_res_first(abo->tbo.resource, offset,
+ coredump->ibs[i].ib_size_dw * 4, &cursor);
+ while (cursor.remaining) {
+ amdgpu_device_mm_access(adev, cursor.start / 4,
+ &ib_content[off], cursor.size / 4,
+ false);
+ off += cursor.size;
+ amdgpu_res_next(&cursor, cursor.size);
+ }
+ emit_content = true;
+ } else {
+ r = ttm_bo_kmap(&abo->tbo, 0, PFN_UP(abo->tbo.base.size),
+ &abo->kmap);
+ if (r)
+ goto output_ib_content;
+
+ kptr = amdgpu_bo_kptr(abo);
+ kptr += offset;
+ memcpy(ib_content, kptr, coredump->ibs[i].ib_size_dw * 4);
+
+ amdgpu_bo_kunmap(abo);
+ emit_content = true;
+ }
+
+output_ib_content:
+ drm_printf(p, "\nIB #%d 0x%llx %d dw\n", i,
+ coredump->ibs[i].gpu_addr, coredump->ibs[i].ib_size_dw);
+ if (emit_content) {
+ for (int j = 0; j < coredump->ibs[i].ib_size_dw; j++)
+ drm_printf(p, "0x%08x\n", ib_content[j]);
+ }
+ kvfree(ib_content);
+ }
+
+unlock:
+ drm_exec_fini(&exec);
+}
+
static ssize_t
amdgpu_devcoredump_format(char *buffer, size_t count, struct amdgpu_coredump_info *coredump)
{
- struct amdgpu_device *adev = coredump->adev;
struct drm_printer p;
struct drm_print_iterator iter;
struct amdgpu_vm_fault_info *fault_info;
- struct amdgpu_bo_va_mapping *mapping;
struct amdgpu_ip_block *ip_block;
- struct amdgpu_res_cursor cursor;
- struct amdgpu_bo *abo, *root;
- uint64_t va_start, offset;
struct amdgpu_ring *ring;
- struct amdgpu_vm *vm;
- u32 *ib_content;
- uint8_t *kptr;
- int ver, i, j, r;
+ int ver, i, j;
u32 ring_idx, off;
bool sizing_pass;
sizing_pass = buffer == NULL;
iter.data = buffer;
+ iter.start = 0;
iter.offset = 0;
iter.remain = count;
@@ -236,7 +352,7 @@ amdgpu_devcoredump_format(char *buffer, size_t count, struct amdgpu_coredump_inf
drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
drm_printf(&p, "version: " AMDGPU_COREDUMP_VERSION "\n");
- drm_printf(&p, "kernel: " UTS_RELEASE "\n");
+ drm_printf(&p, "kernel: %s\n", init_utsname()->release);
drm_printf(&p, "module: " KBUILD_MODNAME "\n");
drm_printf(&p, "time: %ptSp\n", &coredump->reset_time);
@@ -342,86 +458,8 @@ amdgpu_devcoredump_format(char *buffer, size_t count, struct amdgpu_coredump_inf
else if (coredump->reset_vram_lost)
drm_printf(&p, "VRAM is lost due to GPU reset!\n");
- if (coredump->num_ibs) {
- /* Don't try to lookup the VM or map the BOs when calculating the
- * size required to store the devcoredump.
- */
- if (sizing_pass)
- vm = NULL;
- else
- vm = amdgpu_vm_lock_by_pasid(adev, &root, coredump->pasid);
-
- for (int i = 0; i < coredump->num_ibs && (sizing_pass || vm); i++) {
- ib_content = kvmalloc_array(coredump->ibs[i].ib_size_dw, 4,
- GFP_KERNEL);
- if (!ib_content)
- continue;
-
- /* vm=NULL can only happen when 'sizing_pass' is true. Skip to the
- * drm_printf() calls (ib_content doesn't need to be initialized
- * as its content won't be written anywhere).
- */
- if (!vm)
- goto output_ib_content;
-
- va_start = coredump->ibs[i].gpu_addr & AMDGPU_GMC_HOLE_MASK;
- mapping = amdgpu_vm_bo_lookup_mapping(vm, va_start / AMDGPU_GPU_PAGE_SIZE);
- if (!mapping)
- goto free_ib_content;
-
- offset = va_start - (mapping->start * AMDGPU_GPU_PAGE_SIZE);
- abo = amdgpu_bo_ref(mapping->bo_va->base.bo);
- r = amdgpu_bo_reserve(abo, false);
- if (r)
- goto free_ib_content;
-
- if (abo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) {
- off = 0;
-
- if (abo->tbo.resource->mem_type != TTM_PL_VRAM)
- goto unreserve_abo;
-
- amdgpu_res_first(abo->tbo.resource, offset,
- coredump->ibs[i].ib_size_dw * 4,
- &cursor);
- while (cursor.remaining) {
- amdgpu_device_mm_access(adev, cursor.start / 4,
- &ib_content[off], cursor.size / 4,
- false);
- off += cursor.size;
- amdgpu_res_next(&cursor, cursor.size);
- }
- } else {
- r = ttm_bo_kmap(&abo->tbo, 0,
- PFN_UP(abo->tbo.base.size),
- &abo->kmap);
- if (r)
- goto unreserve_abo;
-
- kptr = amdgpu_bo_kptr(abo);
- kptr += offset;
- memcpy(ib_content, kptr,
- coredump->ibs[i].ib_size_dw * 4);
-
- amdgpu_bo_kunmap(abo);
- }
-
-output_ib_content:
- drm_printf(&p, "\nIB #%d 0x%llx %d dw\n",
- i, coredump->ibs[i].gpu_addr, coredump->ibs[i].ib_size_dw);
- for (int j = 0; j < coredump->ibs[i].ib_size_dw; j++)
- drm_printf(&p, "0x%08x\n", ib_content[j]);
-unreserve_abo:
- if (vm)
- amdgpu_bo_unreserve(abo);
-free_ib_content:
- kvfree(ib_content);
- }
- if (vm) {
- amdgpu_bo_unreserve(root);
- amdgpu_bo_unref(&root);
- }
- }
+ if (coredump->num_ibs)
+ amdgpu_devcoredump_print_ibs(&p, coredump, sizing_pass);
return count - iter.remain;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 942f0251c748..211d30f03d25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3043,7 +3043,7 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
- if (!adev->ip_blocks[i].status.valid)
+ if (!adev->ip_blocks[i].status.valid || !adev->ip_blocks[i].status.hw)
continue;
/* displays are handled in phase1 */
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
@@ -3771,6 +3771,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->gfx.workload_profile_mutex);
mutex_init(&adev->vcn.workload_profile_mutex);
+ spin_lock_init(&adev->irq.lock);
+
amdgpu_device_init_apu_flags(adev);
r = amdgpu_device_check_arguments(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 212c14d99f6b..76da3f932f24 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -1094,6 +1094,11 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
* If that number is larger than the size of the array, the ioctl must
* be retried.
*/
+ if (!bo_va) {
+ r = -ENOENT;
+ goto out_exec;
+ }
+
if (args->num_entries > INT_MAX / sizeof(*vm_entries)) {
r = -EINVAL;
goto out_exec;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 1e190fb54a97..85372af1216d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -1664,12 +1664,13 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
struct drm_gpu_scheduler *sched = &ring->sched;
struct drm_sched_entity entity;
+ unsigned int ib_size_dw = 16;
static atomic_t counter;
struct dma_fence *f;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
void *owner;
- int i, r;
+ int r;
/* Initialize the scheduler entity */
r = drm_sched_entity_init(&entity, DRM_SCHED_PRIORITY_NORMAL,
@@ -1687,7 +1688,7 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
owner = (void *)(unsigned long)atomic_inc_return(&counter);
r = amdgpu_job_alloc_with_ib(ring->adev, &entity, owner,
- 64, 0, &job,
+ ib_size_dw * sizeof(uint32_t), 0, &job,
AMDGPU_KERNEL_JOB_ID_CLEANER_SHADER);
if (r)
goto err;
@@ -1697,9 +1698,8 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
job->run_cleaner_shader = true;
ib = &job->ibs[0];
- for (i = 0; i <= ring->funcs->align_mask; ++i)
- ib->ptr[i] = ring->funcs->nop;
- ib->length_dw = ring->funcs->align_mask + 1;
+ memset32(ib->ptr, ring->funcs->nop, ib_size_dw);
+ ib->length_dw = ib_size_dw;
f = amdgpu_job_submit(job);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index d23a91d029aa..0ea32561c4bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -272,7 +272,20 @@ static bool amdgpu_gtt_mgr_intersects(struct ttm_resource_manager *man,
const struct ttm_place *place,
size_t size)
{
- return !place->lpfn || amdgpu_gtt_mgr_has_gart_addr(res);
+ const struct drm_mm_node *const node = &to_ttm_range_mgr_node(res)->mm_nodes[0];
+ const u32 num_pages = PFN_UP(size);
+
+ if (!place->lpfn)
+ return true;
+
+ if (!amdgpu_gtt_mgr_has_gart_addr(res))
+ return false;
+
+ if (place->fpfn >= (node->start + num_pages) ||
+ (place->lpfn && place->lpfn <= node->start))
+ return false;
+
+ return true;
}
/**
@@ -290,7 +303,20 @@ static bool amdgpu_gtt_mgr_compatible(struct ttm_resource_manager *man,
const struct ttm_place *place,
size_t size)
{
- return !place->lpfn || amdgpu_gtt_mgr_has_gart_addr(res);
+ const struct drm_mm_node *const node = &to_ttm_range_mgr_node(res)->mm_nodes[0];
+ const u32 num_pages = PFN_UP(size);
+
+ if (!place->lpfn)
+ return true;
+
+ if (!amdgpu_gtt_mgr_has_gart_addr(res))
+ return false;
+
+ if (node->start < place->fpfn ||
+ (place->lpfn && (node->start + num_pages) > place->lpfn))
+ return false;
+
+ return true;
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 254a4e983f40..53be764968e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -309,8 +309,6 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
unsigned int irq, flags;
int r;
- spin_lock_init(&adev->irq.lock);
-
/* Enable MSI if not disabled by module parameter */
adev->irq.msi_enabled = false;
@@ -547,7 +545,7 @@ void amdgpu_irq_delegate(struct amdgpu_device *adev,
unsigned int num_dw)
{
amdgpu_ih_ring_write(adev, &adev->irq.ih_soft, entry->iv_entry, num_dw);
- schedule_work(&adev->irq.ih_soft_work);
+ queue_work(system_unbound_wq, &adev->irq.ih_soft_work);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_lockdep.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_lockdep.c
index d5d71fd7c70d..61450af539a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_lockdep.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_lockdep.c
@@ -16,6 +16,17 @@
#ifdef CONFIG_LOCKDEP
+struct amdgpu_lockdep_dummy_locks {
+ struct mutex reset_lock;
+ struct mutex userq_sch_mutex;
+ struct mutex userq_mutex;
+ struct mutex notifier_lock;
+ struct mutex vram_lock;
+ struct mutex srbm_mutex;
+ struct mutex grbm_idx_mutex;
+ spinlock_t mmio_idx_lock;
+};
+
/* Lock class keys for associating with real driver locks */
static struct lock_class_key amdgpu_userq_sch_mutex_key;
static struct lock_class_key amdgpu_userq_mutex_key;
@@ -84,72 +95,65 @@ void amdgpu_lockdep_set_class(struct amdgpu_device *adev)
int amdgpu_lockdep_init(void)
{
struct amdgpu_reset_domain *reset_domain = NULL;
- struct amdgpu_reset_control reset_ctl;
- struct mutex userq_sch_mutex;
- struct mutex userq_mutex;
- struct mutex notifier_lock;
- struct mutex vram_lock;
- struct mutex srbm_mutex;
- struct mutex grbm_idx_mutex;
- spinlock_t mmio_idx_lock;
+ struct amdgpu_lockdep_dummy_locks *locks;
unsigned long flags;
+ locks = kzalloc(sizeof(*locks), GFP_KERNEL);
+ if (!locks)
+ return -ENOMEM;
+
/*
* Initialize dummy reset domain
*/
reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE,
"lockdep_test");
- if (!reset_domain)
+ if (!reset_domain) {
+ kfree(locks);
return -ENOMEM;
-
+ }
/* Initialize dummy locks */
- mutex_init(&userq_sch_mutex);
- mutex_init(&userq_mutex);
- mutex_init(&notifier_lock);
- mutex_init(&vram_lock);
- mutex_init(&reset_ctl.reset_lock);
- mutex_init(&srbm_mutex);
- mutex_init(&grbm_idx_mutex);
- spin_lock_init(&mmio_idx_lock);
+ mutex_init(&locks->userq_sch_mutex);
+ mutex_init(&locks->userq_mutex);
+ mutex_init(&locks->notifier_lock);
+ mutex_init(&locks->vram_lock);
+ mutex_init(&locks->reset_lock);
+ mutex_init(&locks->srbm_mutex);
+ mutex_init(&locks->grbm_idx_mutex);
+ spin_lock_init(&locks->mmio_idx_lock);
/*
* Associate dummy locks with the same class keys used for real
* driver locks. This ensures lockdep connects the ordering learned
* here with the actual locks used at runtime.
*/
- lockdep_set_class(&userq_sch_mutex, &amdgpu_userq_sch_mutex_key);
- lockdep_set_class(&userq_mutex, &amdgpu_userq_mutex_key);
- lockdep_set_class(&notifier_lock, &amdgpu_notifier_lock_key);
- lockdep_set_class(&vram_lock, &amdgpu_vram_lock_key);
+ lockdep_set_class(&locks->userq_sch_mutex, &amdgpu_userq_sch_mutex_key);
+ lockdep_set_class(&locks->userq_mutex, &amdgpu_userq_mutex_key);
+ lockdep_set_class(&locks->notifier_lock, &amdgpu_notifier_lock_key);
+ lockdep_set_class(&locks->vram_lock, &amdgpu_vram_lock_key);
lockdep_set_class(&reset_domain->sem, &amdgpu_reset_sem_key);
- lockdep_set_class(&reset_ctl.reset_lock, &amdgpu_reset_lock_key);
- lockdep_set_class(&srbm_mutex, &amdgpu_srbm_lock_key);
- lockdep_set_class(&grbm_idx_mutex, &amdgpu_grbm_lock_key);
- lockdep_set_class(&mmio_idx_lock, &amdgpu_mmio_lock_key);
-
+ lockdep_set_class(&locks->reset_lock, &amdgpu_reset_lock_key);
+ lockdep_set_class(&locks->srbm_mutex, &amdgpu_srbm_lock_key);
+ lockdep_set_class(&locks->grbm_idx_mutex, &amdgpu_grbm_lock_key);
+ lockdep_set_class(&locks->mmio_idx_lock, &amdgpu_mmio_lock_key);
/*
* Take locks in the correct order to train lockdep.
* This establishes the dependency chain.
*/
/* Level 1: Global userq scheduler mutex (outermost) */
- mutex_lock(&userq_sch_mutex);
+ mutex_lock(&locks->userq_sch_mutex);
/* Level 2: Per-context userq mutex */
- mutex_lock(&userq_mutex);
-
+ mutex_lock(&locks->userq_mutex);
/* Level 3: MMU notifier lock */
- mutex_lock(&notifier_lock);
-
+ mutex_lock(&locks->notifier_lock);
/* Level 4: VRAM allocator lock */
- mutex_lock(&vram_lock);
-
+ mutex_lock(&locks->vram_lock);
/* Level 5: Reset domain semaphore */
down_read(&reset_domain->sem);
/* Level 6: Reset control lock */
- mutex_lock(&reset_ctl.reset_lock);
-
+ mutex_lock(&locks->reset_lock);
/*
* Mark potential memory reclaim boundary.
* GPU operations might trigger memory allocation/reclaim.
@@ -157,36 +161,35 @@ int amdgpu_lockdep_init(void)
fs_reclaim_acquire(GFP_KERNEL);
/* Level 7: SRBM register access */
- mutex_lock(&srbm_mutex);
-
+ mutex_lock(&locks->srbm_mutex);
/* Level 8: GRBM index access */
- mutex_lock(&grbm_idx_mutex);
+ mutex_lock(&locks->grbm_idx_mutex);
/* Level 9: MMIO index access (innermost lock, spinlock) */
- spin_lock_irqsave(&mmio_idx_lock, flags);
-
+ spin_lock_irqsave(&locks->mmio_idx_lock, flags);
/*
* All locks acquired in order.
* Lockdep has now learned the valid dependency chain.
*/
/* Release in reverse order */
- spin_unlock_irqrestore(&mmio_idx_lock, flags);
- mutex_unlock(&grbm_idx_mutex);
- mutex_unlock(&srbm_mutex);
-
+ spin_unlock_irqrestore(&locks->mmio_idx_lock, flags);
+ mutex_unlock(&locks->grbm_idx_mutex);
+ mutex_unlock(&locks->srbm_mutex);
fs_reclaim_release(GFP_KERNEL);
- mutex_unlock(&reset_ctl.reset_lock);
+ mutex_unlock(&locks->reset_lock);
up_read(&reset_domain->sem);
- mutex_unlock(&vram_lock);
- mutex_unlock(&notifier_lock);
- mutex_unlock(&userq_mutex);
- mutex_unlock(&userq_sch_mutex);
+
+ mutex_unlock(&locks->vram_lock);
+ mutex_unlock(&locks->notifier_lock);
+ mutex_unlock(&locks->userq_mutex);
+ mutex_unlock(&locks->userq_sch_mutex);
/* Cleanup */
amdgpu_reset_put_reset_domain(reset_domain);
+ kfree(locks);
pr_info("AMDGPU: Lockdep annotations initialized (9 lock levels)\n");
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 2740de94e93c..16c060badaee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -515,6 +515,15 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
if (new_mem->mem_type == TTM_PL_TT ||
new_mem->mem_type == AMDGPU_PL_PREEMPT) {
+ if (old_mem && (old_mem->mem_type == TTM_PL_TT ||
+ old_mem->mem_type == AMDGPU_PL_PREEMPT)) {
+ r = ttm_bo_wait_ctx(bo, ctx);
+ if (r)
+ return r;
+
+ amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm);
+ }
+
r = amdgpu_ttm_backend_bind(bo->bdev, bo->ttm, new_mem);
if (r)
return r;
@@ -549,6 +558,15 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
ttm_bo_assign_mem(bo, new_mem);
return 0;
}
+ if ((old_mem->mem_type == TTM_PL_TT ||
+ old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
+ (new_mem->mem_type == TTM_PL_TT ||
+ new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
+ amdgpu_bo_move_notify(bo, evict, new_mem);
+ ttm_resource_free(bo, &bo->resource);
+ ttm_bo_assign_mem(bo, new_mem);
+ return 0;
+ }
if (old_mem->mem_type == AMDGPU_PL_GDS ||
old_mem->mem_type == AMDGPU_PL_GWS ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 3a3bc0d370fa..480bf88def46 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -135,7 +135,7 @@ MODULE_FIRMWARE(FIRMWARE_VEGA12);
MODULE_FIRMWARE(FIRMWARE_VEGA20);
static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
-static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo);
+static void amdgpu_uvd_force_into_vcpu_segment(struct amdgpu_bo *abo);
static int amdgpu_uvd_create_msg_bo_helper(struct amdgpu_device *adev,
uint32_t size,
@@ -158,7 +158,7 @@ static int amdgpu_uvd_create_msg_bo_helper(struct amdgpu_device *adev,
amdgpu_bo_kunmap(bo);
amdgpu_bo_unpin(bo);
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
- amdgpu_uvd_force_into_uvd_segment(bo);
+ amdgpu_uvd_force_into_vcpu_segment(bo);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (r)
goto err;
@@ -188,6 +188,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
const struct common_firmware_header *hdr;
unsigned int family_id;
int i, j, r;
+ u32 vcpu_bo_domain;
INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
@@ -319,12 +320,20 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+ /* UVD 5.0 and newer HW can use 64 bit addressing. */
+ adev->uvd.address_64_bit =
+ !amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0);
+
+ vcpu_bo_domain = AMDGPU_GEM_DOMAIN_VRAM;
+ if (adev->uvd.address_64_bit)
+ vcpu_bo_domain |= AMDGPU_GEM_DOMAIN_GTT;
+
for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
if (adev->uvd.harvest_config & (1 << j))
continue;
+
r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM |
- AMDGPU_GEM_DOMAIN_GTT,
+ vcpu_bo_domain,
&adev->uvd.inst[j].vcpu_bo,
&adev->uvd.inst[j].gpu_addr,
&adev->uvd.inst[j].cpu_addr);
@@ -339,10 +348,6 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
adev->uvd.filp[i] = NULL;
}
- /* from uvd v5.0 HW addressing capacity increased to 64 bits */
- if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
- adev->uvd.address_64_bit = true;
-
r = amdgpu_uvd_create_msg_bo_helper(adev, 128 << 10, &adev->uvd.ib_bo);
if (r)
return r;
@@ -545,6 +550,24 @@ void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
}
}
+static void amdgpu_uvd_force_into_vcpu_segment(struct amdgpu_bo *bo)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct amdgpu_bo *vcpu_bo = adev->uvd.inst[0].vcpu_bo;
+ struct amdgpu_res_cursor vcpu_cur;
+
+ amdgpu_res_first(vcpu_bo->tbo.resource, 0,
+ amdgpu_bo_size(vcpu_bo), &vcpu_cur);
+
+ bo->placement.num_placement = 1;
+ bo->placement.placement = &bo->placements[0];
+ bo->placements[0].fpfn = ALIGN_DOWN(vcpu_cur.start, SZ_256M) >> PAGE_SHIFT;
+ bo->placements[0].lpfn = bo->placements[0].fpfn + (SZ_256M >> PAGE_SHIFT);
+ bo->placements[0].mem_type = vcpu_bo->tbo.resource->mem_type;
+ if (bo->placements[0].mem_type == TTM_PL_VRAM)
+ bo->placements[0].flags |= TTM_PL_FLAG_CONTIGUOUS;
+}
+
static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo)
{
int i;
@@ -595,13 +618,10 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx)
if (!ctx->parser->adev->uvd.address_64_bit) {
/* check if it's a message or feedback command */
cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx) >> 1;
- if (cmd == 0x0 || cmd == 0x3) {
- /* yes, force it into VRAM */
- uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
-
- amdgpu_bo_placement_from_domain(bo, domain);
- }
- amdgpu_uvd_force_into_uvd_segment(bo);
+ if (cmd == 0x0 || cmd == 0x3)
+ amdgpu_uvd_force_into_vcpu_segment(bo);
+ else
+ amdgpu_uvd_force_into_uvd_segment(bo);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &tctx);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 7d51880b4860..fee4c94c2585 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2920,47 +2920,56 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
}
/**
- * amdgpu_vm_lock_by_pasid - return an amdgpu_vm and its root bo from a pasid, if possible.
+ * amdgpu_vm_lock_by_pasid - look up a VM by PASID and lock its root PD
* @adev: amdgpu device pointer
- * @root: root BO of the VM
* @pasid: PASID of the VM
- * The caller needs to unreserve and unref the root bo on success.
+ * @exec: drm_exec context to lock the root PD in
+ *
+ * Must be called from within a drm_exec_until_all_locked() loop; the caller
+ * runs drm_exec_retry_on_contention() afterwards. The drm_exec context holds
+ * a reference on the root BO until it is finalised.
+ *
+ * Return: the VM on success, or NULL if the PASID has no VM, the VM is being
+ * torn down, or locking the root PD failed.
*/
struct amdgpu_vm *amdgpu_vm_lock_by_pasid(struct amdgpu_device *adev,
- struct amdgpu_bo **root, u32 pasid)
+ u32 pasid, struct drm_exec *exec)
{
unsigned long irqflags;
+ struct amdgpu_bo *root;
struct amdgpu_vm *vm;
int r;
xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
vm = xa_load(&adev->vm_manager.pasids, pasid);
- *root = vm ? amdgpu_bo_ref(vm->root.bo) : NULL;
+ root = vm ? amdgpu_bo_ref(vm->root.bo) : NULL;
xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
- if (!*root)
+ if (!root)
return NULL;
- r = amdgpu_bo_reserve(*root, true);
- if (r)
- goto error_unref;
+ r = drm_exec_lock_obj(exec, &root->tbo.base);
+ if (r) {
+ amdgpu_bo_unref(&root);
+ return NULL;
+ }
/* Double check that the VM still exists */
xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
vm = xa_load(&adev->vm_manager.pasids, pasid);
- if (vm && vm->root.bo != *root)
+ if (vm && vm->root.bo != root)
vm = NULL;
xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
- if (!vm)
- goto error_unlock;
+ if (!vm) {
+ drm_exec_unlock_obj(exec, &root->tbo.base);
+ amdgpu_bo_unref(&root);
+ return NULL;
+ }
- return vm;
-error_unlock:
- amdgpu_bo_unreserve(*root);
+ /* The drm_exec context holds its own reference on the root BO. */
+ amdgpu_bo_unref(&root);
-error_unref:
- amdgpu_bo_unref(root);
- return NULL;
+ return vm;
}
/**
@@ -2982,33 +2991,49 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
uint64_t ts, bool write_fault)
{
bool is_compute_context = false;
- struct amdgpu_bo *root;
+ struct drm_exec exec;
uint64_t value, flags;
struct amdgpu_vm *vm;
int r;
- vm = amdgpu_vm_lock_by_pasid(adev, &root, pasid);
- if (!vm)
+ drm_exec_init(&exec, 0, 1);
+ drm_exec_until_all_locked(&exec) {
+ vm = amdgpu_vm_lock_by_pasid(adev, pasid, &exec);
+ drm_exec_retry_on_contention(&exec);
+ if (!vm)
+ break;
+ }
+ if (!vm) {
+ drm_exec_fini(&exec);
return false;
+ }
is_compute_context = vm->is_compute_context;
if (is_compute_context) {
- /* Unreserve root since svm_range_restore_pages might try to reserve it. */
- /* TODO: rework svm_range_restore_pages so that this isn't necessary. */
- amdgpu_bo_unreserve(root);
+ /* Release the root PD lock since svm_range_restore_pages
+ * might try to take it.
+ * TODO: rework svm_range_restore_pages so that this isn't
+ * necessary.
+ */
+ drm_exec_fini(&exec);
if (!svm_range_restore_pages(adev, pasid, vmid,
- node_id, addr >> PAGE_SHIFT, ts, write_fault)) {
- amdgpu_bo_unref(&root);
+ node_id, addr >> PAGE_SHIFT, ts, write_fault))
return true;
- }
- amdgpu_bo_unref(&root);
/* Re-acquire the VM lock, could be that the VM was freed in between. */
- vm = amdgpu_vm_lock_by_pasid(adev, &root, pasid);
- if (!vm)
+ drm_exec_init(&exec, 0, 1);
+ drm_exec_until_all_locked(&exec) {
+ vm = amdgpu_vm_lock_by_pasid(adev, pasid, &exec);
+ drm_exec_retry_on_contention(&exec);
+ if (!vm)
+ break;
+ }
+ if (!vm) {
+ drm_exec_fini(&exec);
return false;
+ }
}
addr /= AMDGPU_GPU_PAGE_SIZE;
@@ -3032,7 +3057,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
value = 0;
}
- r = dma_resv_reserve_fences(root->tbo.base.resv, 1);
+ r = dma_resv_reserve_fences(vm->root.bo->tbo.base.resv, 1);
if (r) {
pr_debug("failed %d to reserve fence slot\n", r);
goto error_unlock;
@@ -3046,12 +3071,10 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
r = amdgpu_vm_update_pdes(adev, vm, true);
error_unlock:
- amdgpu_bo_unreserve(root);
+ drm_exec_fini(&exec);
if (r < 0)
dev_err(adev->dev, "Can't handle page fault (%d)\n", r);
- amdgpu_bo_unref(&root);
-
return false;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 3695299f1a03..b32f51a78cd8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -592,7 +592,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
bool write_fault);
struct amdgpu_vm *amdgpu_vm_lock_by_pasid(struct amdgpu_device *adev,
- struct amdgpu_bo **root, u32 pasid);
+ u32 pasid, struct drm_exec *exec);
void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 47721d0c3781..81a759a98725 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4071,6 +4071,41 @@ err_priv_inst:
return r;
}
+static void gfx_v9_0_deactivate_kcq_hqd(struct amdgpu_device *adev)
+{
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ for (int i = 0; i < adev->gfx.num_compute_rings; i++) {
+ u32 tmp;
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
+
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
+ tmp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
+ /* disable the queue if it's active */
+ if (tmp & CP_HQD_ACTIVE__ACTIVE_MASK) {
+ int j;
+
+ WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
+ for (j = 0; j < adev->usec_timeout; j++) {
+ tmp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
+ if (!(tmp & CP_HQD_ACTIVE__ACTIVE_MASK))
+ break;
+ udelay(1);
+ }
+ if (j == AMDGPU_MAX_USEC_TIMEOUT) {
+ DRM_DEBUG("comp_%u_%u_%u dequeue request failed.\n",
+ ring->me, ring->pipe, ring->queue);
+ /* Manual disable if dequeue request times out */
+ WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 0);
+ }
+ WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0);
+ }
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ }
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+}
+
static int gfx_v9_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
@@ -4095,6 +4130,10 @@ static int gfx_v9_0_hw_fini(struct amdgpu_ip_block *ip_block)
return 0;
}
+ if ((adev->flags & AMD_IS_APU) && amdgpu_in_reset(adev) &&
+ amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_MODE2)
+ gfx_v9_0_deactivate_kcq_hqd(adev);
+
/* Use deinitialize sequence from CAIL when unbinding device from driver,
* otherwise KIQ is hanging when binding back
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
index 95b3f4e55ec3..699c274d357e 100644
--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
@@ -790,7 +790,7 @@ static void ih_v6_1_set_interrupt_funcs(struct amdgpu_device *adev)
const struct amdgpu_ip_block_version ih_v6_1_ip_block = {
.type = AMD_IP_BLOCK_TYPE_IH,
.major = 6,
- .minor = 0,
+ .minor = 1,
.rev = 0,
.funcs = &ih_v6_1_ip_funcs,
};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index a2b100d14425..531e20748198 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1299,18 +1299,11 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
return -EINVAL;
}
- devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
- GFP_KERNEL);
- if (!devices_arr)
- return -ENOMEM;
+ devices_arr = memdup_array_user((void *)args->device_ids_array_ptr,
+ args->n_devices, sizeof(*devices_arr));
- err = copy_from_user(devices_arr,
- (void __user *)args->device_ids_array_ptr,
- args->n_devices * sizeof(*devices_arr));
- if (err != 0) {
- err = -EFAULT;
- goto copy_from_user_failed;
- }
+ if (IS_ERR(devices_arr))
+ return PTR_ERR(devices_arr);
mutex_lock(&p->mutex);
pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));
@@ -1391,7 +1384,6 @@ get_mem_obj_from_handle_failed:
map_memory_to_gpu_failed:
sync_memory_failed:
mutex_unlock(&p->mutex);
-copy_from_user_failed:
kfree(devices_arr);
return err;
@@ -1416,18 +1408,11 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
return -EINVAL;
}
- devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
- GFP_KERNEL);
- if (!devices_arr)
- return -ENOMEM;
+ devices_arr = memdup_array_user((void *)args->device_ids_array_ptr,
+ args->n_devices, sizeof(*devices_arr));
- err = copy_from_user(devices_arr,
- (void __user *)args->device_ids_array_ptr,
- args->n_devices * sizeof(*devices_arr));
- if (err != 0) {
- err = -EFAULT;
- goto copy_from_user_failed;
- }
+ if (IS_ERR(devices_arr))
+ return PTR_ERR(devices_arr);
mutex_lock(&p->mutex);
pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));
@@ -1493,7 +1478,6 @@ get_mem_obj_from_handle_failed:
unmap_memory_from_gpu_failed:
sync_memory_failed:
mutex_unlock(&p->mutex);
-copy_from_user_failed:
kfree(devices_arr);
return err;
}
@@ -1562,16 +1546,10 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep,
if (!dev)
return -EINVAL;
- if (args->metadata_ptr) {
- metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL);
- if (!metadata_buffer)
- return -ENOMEM;
- }
-
/* Get dmabuf info from KGD */
r = amdgpu_amdkfd_get_dmabuf_info(dev->adev, args->dmabuf_fd,
&dmabuf_adev, &args->size,
- metadata_buffer, args->metadata_size,
+ &metadata_buffer, args->metadata_size,
&args->metadata_size, &flags, &xcp_id);
if (r)
goto exit;
@@ -1583,7 +1561,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep,
args->flags = flags;
/* Copy metadata buffer to user mode */
- if (metadata_buffer) {
+ if (metadata_buffer && args->metadata_ptr) {
r = copy_to_user((void __user *)args->metadata_ptr,
metadata_buffer, args->metadata_size);
if (r != 0)
@@ -2359,17 +2337,11 @@ static int criu_restore_devices(struct kfd_process *p,
if (*priv_offset + (args->num_devices * sizeof(*device_privs)) > max_priv_data_size)
return -EINVAL;
- device_buckets = kmalloc_objs(*device_buckets, args->num_devices);
- if (!device_buckets)
- return -ENOMEM;
+ device_buckets = memdup_array_user((void *)args->devices,
+ args->num_devices, sizeof(*device_buckets));
- ret = copy_from_user(device_buckets, (void __user *)args->devices,
- args->num_devices * sizeof(*device_buckets));
- if (ret) {
- pr_err("Failed to copy devices buckets from user\n");
- ret = -EFAULT;
- goto exit;
- }
+ if (IS_ERR(device_buckets))
+ return PTR_ERR(device_buckets);
for (i = 0; i < args->num_devices; i++) {
struct kfd_node *dev;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index 05c74887fd6f..fdcf7f2d1b5b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -153,14 +153,16 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
u32 inx;
mutex_lock(&kfd->doorbell_mutex);
+
inx = find_first_zero_bit(kfd->doorbell_bitmap, PAGE_SIZE / sizeof(u32));
+ if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
+ mutex_unlock(&kfd->doorbell_mutex);
+ return NULL;
+ }
__set_bit(inx, kfd->doorbell_bitmap);
mutex_unlock(&kfd->doorbell_mutex);
- if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
- return NULL;
-
*doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev,
kfd->doorbells,
inx,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 28dc6886c1ff..226e76ae0be7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -424,7 +424,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
migrate.dst = migrate.src + npages;
scratch = (dma_addr_t *)(migrate.dst + npages);
- kfd_smi_event_migration_start(node, p->lead_thread->pid,
+ kfd_smi_event_migration_start(node, p->lead_thread,
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
0, node->id, prange->prefetch_loc,
prange->preferred_loc, trigger);
@@ -462,7 +462,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
out_free:
kvfree(buf);
- kfd_smi_event_migration_end(node, p->lead_thread->pid,
+ kfd_smi_event_migration_end(node, p->lead_thread,
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
0, node->id, trigger, r);
out:
@@ -727,7 +727,7 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
migrate.fault_page = fault_page;
scratch = (dma_addr_t *)(migrate.dst + npages);
- kfd_smi_event_migration_start(node, p->lead_thread->pid,
+ kfd_smi_event_migration_start(node, p->lead_thread,
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
node->id, 0, prange->prefetch_loc,
prange->preferred_loc, trigger);
@@ -766,7 +766,7 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
out_free:
kvfree(buf);
- kfd_smi_event_migration_end(node, p->lead_thread->pid,
+ kfd_smi_event_migration_end(node, p->lead_thread,
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
node->id, 0, trigger, r);
out:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 368283d53077..ca71fa726e32 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1175,10 +1175,12 @@ static void kfd_process_remove_sysfs(struct kfd_process *p)
if (!p->kobj)
return;
- sysfs_remove_file(p->kobj, &p->attr_pasid);
- kobject_del(p->kobj_queues);
- kobject_put(p->kobj_queues);
- p->kobj_queues = NULL;
+ if (p->kobj_queues) {
+ sysfs_remove_file(p->kobj, &p->attr_pasid);
+ kobject_del(p->kobj_queues);
+ kobject_put(p->kobj_queues);
+ p->kobj_queues = NULL;
+ }
for (i = 0; i < p->n_pdds; i++) {
pdd = p->pdds[i];
@@ -1186,17 +1188,21 @@ static void kfd_process_remove_sysfs(struct kfd_process *p)
sysfs_remove_file(p->kobj, &pdd->attr_vram);
sysfs_remove_file(p->kobj, &pdd->attr_sdma);
- sysfs_remove_file(pdd->kobj_stats, &pdd->attr_evict);
- if (pdd->dev->kfd2kgd->get_cu_occupancy)
- sysfs_remove_file(pdd->kobj_stats,
- &pdd->attr_cu_occupancy);
- kobject_del(pdd->kobj_stats);
- kobject_put(pdd->kobj_stats);
- pdd->kobj_stats = NULL;
+ if (pdd->kobj_stats) {
+ sysfs_remove_file(pdd->kobj_stats, &pdd->attr_evict);
+ if (pdd->dev->kfd2kgd->get_cu_occupancy)
+ sysfs_remove_file(pdd->kobj_stats,
+ &pdd->attr_cu_occupancy);
+ kobject_del(pdd->kobj_stats);
+ kobject_put(pdd->kobj_stats);
+ pdd->kobj_stats = NULL;
+ }
}
for_each_set_bit(i, p->svms.bitmap_supported, p->n_pdds) {
pdd = p->pdds[i];
+ if (!pdd->kobj_counters)
+ continue;
sysfs_remove_file(pdd->kobj_counters, &pdd->attr_faults);
sysfs_remove_file(pdd->kobj_counters, &pdd->attr_page_in);
@@ -1254,6 +1260,13 @@ static void kfd_process_wq_release(struct work_struct *work)
kfd_debugfs_remove_process(p);
+ /*
+ * Remove the proc/sysfs entries before destroying PDDs. The removal path
+ * walks the PDD array and sysfs callbacks dereference PDD fields, so the
+ * backing data must remain valid until sysfs removal has completed.
+ */
+ kfd_process_remove_sysfs(p);
+
kfd_process_kunmap_signal_bo(p);
kfd_process_free_outstanding_kfd_bos(p);
svm_range_list_fini(p);
@@ -1267,11 +1280,6 @@ static void kfd_process_wq_release(struct work_struct *work)
put_task_struct(p->lead_thread);
- /* the last step is removing process entries under /sys
- * to indicate the process has been terminated.
- */
- kfd_process_remove_sysfs(p);
-
kfree(p);
}
@@ -1969,7 +1977,7 @@ int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger)
struct kfd_process_device *pdd = p->pdds[i];
struct device *dev = pdd->dev->adev->dev;
- kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread->pid,
+ kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread,
trigger);
r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
@@ -1999,7 +2007,7 @@ fail:
if (n_evicted == 0)
break;
- kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid);
+ kfd_smi_event_queue_restore(pdd->dev, p->lead_thread);
if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
&pdd->qpd))
@@ -2022,7 +2030,7 @@ int kfd_process_restore_queues(struct kfd_process *p)
struct kfd_process_device *pdd = p->pdds[i];
struct device *dev = pdd->dev->adev->dev;
- kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid);
+ kfd_smi_event_queue_restore(pdd->dev, p->lead_thread);
r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
&pdd->qpd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 44e39ce222b7..0ac35789b239 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -962,8 +962,8 @@ static void set_queue_properties_from_criu(struct queue_properties *qp,
qp->priority = q_data->priority;
qp->queue_address = q_data->q_address;
qp->queue_size = q_data->q_size;
- qp->read_ptr = (uint32_t *) q_data->read_ptr_addr;
- qp->write_ptr = (uint32_t *) q_data->write_ptr_addr;
+ qp->read_ptr = (void __user *)q_data->read_ptr_addr;
+ qp->write_ptr = (void __user *)q_data->write_ptr_addr;
qp->eop_ring_buffer_address = q_data->eop_ring_buffer_address;
qp->eop_ring_buffer_size = q_data->eop_ring_buffer_size;
qp->ctx_save_restore_area_address = q_data->ctx_save_restore_area_address;
@@ -1042,10 +1042,18 @@ int kfd_criu_restore_queue(struct kfd_process *p,
memset(&qp, 0, sizeof(qp));
set_queue_properties_from_criu(&qp, q_data, NUM_XCC(pdd->dev->adev->gfx.xcc_mask));
+ ret = kfd_queue_acquire_buffers(pdd, &qp);
+ if (ret) {
+ pr_debug("failed to acquire user queue buffers for CRIU\n");
+ goto exit;
+ }
+
print_queue_properties(&qp);
ret = pqm_create_queue(&p->pqm, pdd->dev, &qp, &queue_id, q_data, mqd, ctl_stack, NULL);
if (ret) {
+ kfd_queue_unref_bo_vas(pdd, &qp);
+ kfd_queue_release_buffers(pdd, &qp);
pr_err("Failed to create new queue err:%d\n", ret);
goto exit;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index dfbde5a571f6..e659cd50eb0b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -195,17 +195,35 @@ static void add_event_to_kfifo(pid_t pid, struct kfd_node *dev,
rcu_read_unlock();
}
+/**
+ * kfd_smi_task_to_pid - Convert task to namespace-aware PID
+ * @task: task_struct pointer (typically p->lead_thread)
+ *
+ * Returns the PID as it appears in the task's own PID namespace.
+ * For containerized processes, this returns the container-local PID
+ * (what getpid() returns), not the global host PID.
+ *
+ * Returns 0 if task is NULL.
+ */
+static inline pid_t kfd_smi_task_to_pid(struct task_struct *task)
+{
+ return task ? task_tgid_nr_ns(task, task_active_pid_ns(task)) : 0;
+}
+
__printf(4, 5)
-static void kfd_smi_event_add(pid_t pid, struct kfd_node *dev,
+static void kfd_smi_event_add(struct task_struct *task, struct kfd_node *dev,
unsigned int event, char *fmt, ...)
{
char fifo_in[KFD_SMI_EVENT_MSG_SIZE];
int len;
va_list args;
+ pid_t pid;
if (list_empty(&dev->smi_clients))
return;
+ pid = kfd_smi_task_to_pid(task);
+
len = snprintf(fifo_in, sizeof(fifo_in), "%x ", event);
va_start(args, fmt);
@@ -234,14 +252,15 @@ void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset,
amdgpu_reset_get_desc(reset_context, reset_cause,
sizeof(reset_cause));
- kfd_smi_event_add(0, dev, event, KFD_EVENT_FMT_UPDATE_GPU_RESET(
+ kfd_smi_event_add(NULL, dev, event, KFD_EVENT_FMT_UPDATE_GPU_RESET(
dev->reset_seq_num, reset_cause));
}
void kfd_smi_event_update_thermal_throttling(struct kfd_node *dev,
uint64_t throttle_bitmask)
{
- kfd_smi_event_add(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, KFD_EVENT_FMT_THERMAL_THROTTLING(
+ kfd_smi_event_add(NULL, dev, KFD_SMI_EVENT_THERMAL_THROTTLE,
+ KFD_EVENT_FMT_THERMAL_THROTTLING(
throttle_bitmask,
amdgpu_dpm_get_thermal_throttling_counter(dev->adev)));
}
@@ -254,67 +273,67 @@ void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid)
if (task_info) {
/* Report VM faults from user applications, not retry from kernel */
if (task_info->task.pid)
- kfd_smi_event_add(task_info->tgid, dev,
- KFD_SMI_EVENT_VMFAULT,
- KFD_EVENT_FMT_VMFAULT(task_info->task.pid,
- task_info->task.comm));
+ kfd_smi_event_add(NULL, dev, KFD_SMI_EVENT_VMFAULT, KFD_EVENT_FMT_VMFAULT(
+ task_info->task.pid, task_info->task.comm));
amdgpu_vm_put_task_info(task_info);
}
}
-void kfd_smi_event_page_fault_start(struct kfd_node *node, pid_t pid,
+void kfd_smi_event_page_fault_start(struct kfd_node *node, struct task_struct *task,
unsigned long address, bool write_fault,
ktime_t ts)
{
- kfd_smi_event_add(pid, node, KFD_SMI_EVENT_PAGE_FAULT_START,
- KFD_EVENT_FMT_PAGEFAULT_START(ktime_to_ns(ts), pid,
- address, node->id, write_fault ? 'W' : 'R'));
+ kfd_smi_event_add(task, node, KFD_SMI_EVENT_PAGE_FAULT_START,
+ KFD_EVENT_FMT_PAGEFAULT_START(ktime_to_ns(ts),
+ kfd_smi_task_to_pid(task), address, node->id,
+ write_fault ? 'W' : 'R'));
}
-void kfd_smi_event_page_fault_end(struct kfd_node *node, pid_t pid,
+void kfd_smi_event_page_fault_end(struct kfd_node *node, struct task_struct *task,
unsigned long address, bool migration)
{
- kfd_smi_event_add(pid, node, KFD_SMI_EVENT_PAGE_FAULT_END,
+ kfd_smi_event_add(task, node, KFD_SMI_EVENT_PAGE_FAULT_END,
KFD_EVENT_FMT_PAGEFAULT_END(ktime_get_boottime_ns(),
- pid, address, node->id, migration ? 'M' : 'U'));
+ kfd_smi_task_to_pid(task), address, node->id,
+ migration ? 'M' : 'U'));
}
-void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid,
+void kfd_smi_event_migration_start(struct kfd_node *node, struct task_struct *task,
unsigned long start, unsigned long end,
uint32_t from, uint32_t to,
uint32_t prefetch_loc, uint32_t preferred_loc,
uint32_t trigger)
{
- kfd_smi_event_add(pid, node, KFD_SMI_EVENT_MIGRATE_START,
- KFD_EVENT_FMT_MIGRATE_START(
- ktime_get_boottime_ns(), pid, start, end - start,
- from, to, prefetch_loc, preferred_loc, trigger));
+ kfd_smi_event_add(task, node, KFD_SMI_EVENT_MIGRATE_START,
+ KFD_EVENT_FMT_MIGRATE_START(ktime_get_boottime_ns(),
+ kfd_smi_task_to_pid(task), start, end - start, from,
+ to, prefetch_loc, preferred_loc, trigger));
}
-void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid,
+void kfd_smi_event_migration_end(struct kfd_node *node, struct task_struct *task,
unsigned long start, unsigned long end,
uint32_t from, uint32_t to, uint32_t trigger,
int error_code)
{
- kfd_smi_event_add(pid, node, KFD_SMI_EVENT_MIGRATE_END,
- KFD_EVENT_FMT_MIGRATE_END(
- ktime_get_boottime_ns(), pid, start, end - start,
- from, to, trigger, error_code));
+ kfd_smi_event_add(task, node, KFD_SMI_EVENT_MIGRATE_END,
+ KFD_EVENT_FMT_MIGRATE_END(ktime_get_boottime_ns(),
+ kfd_smi_task_to_pid(task), start, end - start, from,
+ to, trigger, error_code));
}
-void kfd_smi_event_queue_eviction(struct kfd_node *node, pid_t pid,
+void kfd_smi_event_queue_eviction(struct kfd_node *node, struct task_struct *task,
uint32_t trigger)
{
- kfd_smi_event_add(pid, node, KFD_SMI_EVENT_QUEUE_EVICTION,
- KFD_EVENT_FMT_QUEUE_EVICTION(ktime_get_boottime_ns(), pid,
- node->id, trigger));
+ kfd_smi_event_add(task, node, KFD_SMI_EVENT_QUEUE_EVICTION,
+ KFD_EVENT_FMT_QUEUE_EVICTION(ktime_get_boottime_ns(),
+ kfd_smi_task_to_pid(task), node->id, trigger));
}
-void kfd_smi_event_queue_restore(struct kfd_node *node, pid_t pid)
+void kfd_smi_event_queue_restore(struct kfd_node *node, struct task_struct *task)
{
- kfd_smi_event_add(pid, node, KFD_SMI_EVENT_QUEUE_RESTORE,
- KFD_EVENT_FMT_QUEUE_RESTORE(ktime_get_boottime_ns(), pid,
- node->id, '0'));
+ kfd_smi_event_add(task, node, KFD_SMI_EVENT_QUEUE_RESTORE,
+ KFD_EVENT_FMT_QUEUE_RESTORE(ktime_get_boottime_ns(),
+ kfd_smi_task_to_pid(task), node->id, '0'));
}
void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm)
@@ -329,21 +348,23 @@ void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm)
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i];
- kfd_smi_event_add(p->lead_thread->pid, pdd->dev,
+ kfd_smi_event_add(p->lead_thread, pdd->dev,
KFD_SMI_EVENT_QUEUE_RESTORE,
KFD_EVENT_FMT_QUEUE_RESTORE(ktime_get_boottime_ns(),
- p->lead_thread->pid, pdd->dev->id, 'R'));
+ kfd_smi_task_to_pid(p->lead_thread),
+ pdd->dev->id, 'R'));
}
kfd_unref_process(p);
}
-void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, pid_t pid,
+void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, struct task_struct *task,
unsigned long address, unsigned long last,
uint32_t trigger)
{
- kfd_smi_event_add(pid, node, KFD_SMI_EVENT_UNMAP_FROM_GPU,
+ kfd_smi_event_add(task, node, KFD_SMI_EVENT_UNMAP_FROM_GPU,
KFD_EVENT_FMT_UNMAP_FROM_GPU(ktime_get_boottime_ns(),
- pid, address, last - address + 1, node->id, trigger));
+ kfd_smi_task_to_pid(task), address,
+ last - address + 1, node->id, trigger));
}
void kfd_smi_event_process(struct kfd_process_device *pdd, bool start)
@@ -358,7 +379,7 @@ void kfd_smi_event_process(struct kfd_process_device *pdd, bool start)
task_info = amdgpu_vm_get_task_info_vm(avm);
if (task_info) {
- kfd_smi_event_add(task_info->tgid, pdd->dev,
+ kfd_smi_event_add(NULL, pdd->dev,
start ? KFD_SMI_EVENT_PROCESS_START :
KFD_SMI_EVENT_PROCESS_END,
KFD_EVENT_FMT_PROCESS(task_info->task.pid,
@@ -387,7 +408,7 @@ int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd)
spin_lock_init(&client->lock);
client->events = 0;
client->dev = dev;
- client->pid = current->tgid;
+ client->pid = kfd_smi_task_to_pid(current);
client->suser = capable(CAP_SYS_ADMIN);
spin_lock(&dev->smi_lock);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
index bb4d72b57387..afa93d7cfa7f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
@@ -32,25 +32,25 @@ void kfd_smi_event_update_thermal_throttling(struct kfd_node *dev,
uint64_t throttle_bitmask);
void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset,
struct amdgpu_reset_context *reset_context);
-void kfd_smi_event_page_fault_start(struct kfd_node *node, pid_t pid,
+void kfd_smi_event_page_fault_start(struct kfd_node *node, struct task_struct *task,
unsigned long address, bool write_fault,
ktime_t ts);
-void kfd_smi_event_page_fault_end(struct kfd_node *node, pid_t pid,
+void kfd_smi_event_page_fault_end(struct kfd_node *node, struct task_struct *task,
unsigned long address, bool migration);
-void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid,
+void kfd_smi_event_migration_start(struct kfd_node *node, struct task_struct *task,
unsigned long start, unsigned long end,
uint32_t from, uint32_t to,
uint32_t prefetch_loc, uint32_t preferred_loc,
uint32_t trigger);
-void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid,
+void kfd_smi_event_migration_end(struct kfd_node *node, struct task_struct *task,
unsigned long start, unsigned long end,
uint32_t from, uint32_t to, uint32_t trigger,
int error_code);
-void kfd_smi_event_queue_eviction(struct kfd_node *node, pid_t pid,
+void kfd_smi_event_queue_eviction(struct kfd_node *node, struct task_struct *task,
uint32_t trigger);
-void kfd_smi_event_queue_restore(struct kfd_node *node, pid_t pid);
+void kfd_smi_event_queue_restore(struct kfd_node *node, struct task_struct *task);
void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm);
-void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, pid_t pid,
+void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, struct task_struct *task,
unsigned long address, unsigned long last,
uint32_t trigger);
void kfd_smi_event_process(struct kfd_process_device *pdd, bool start);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 3841943da5ec..0900bb23349e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1144,7 +1144,7 @@ static int
svm_range_split_tail(struct svm_range *prange, uint64_t new_last,
struct list_head *insert_list, struct list_head *remap_list)
{
- unsigned long last_align_down = ALIGN_DOWN(prange->last, 512);
+ unsigned long last_align_down = ALIGN_DOWN(prange->last + 1, 512);
unsigned long start_align = ALIGN(prange->start, 512);
bool huge_page_mapping = last_align_down > start_align;
struct svm_range *tail = NULL;
@@ -1168,7 +1168,7 @@ static int
svm_range_split_head(struct svm_range *prange, uint64_t new_start,
struct list_head *insert_list, struct list_head *remap_list)
{
- unsigned long last_align_down = ALIGN_DOWN(prange->last, 512);
+ unsigned long last_align_down = ALIGN_DOWN(prange->last + 1, 512);
unsigned long start_align = ALIGN(prange->start, 512);
bool huge_page_mapping = last_align_down > start_align;
struct svm_range *head = NULL;
@@ -1181,8 +1181,8 @@ svm_range_split_head(struct svm_range *prange, uint64_t new_start,
list_add(&head->list, insert_list);
- if (huge_page_mapping && head->last + 1 > start_align &&
- head->last + 1 < last_align_down && (!IS_ALIGNED(head->last, 512)))
+ if (huge_page_mapping && new_start > start_align &&
+ new_start < last_align_down && !IS_ALIGNED(new_start, 512))
list_add(&head->update_list, remap_list);
return 0;
@@ -1408,7 +1408,7 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
return -EINVAL;
}
- kfd_smi_event_unmap_from_gpu(pdd->dev, p->lead_thread->pid,
+ kfd_smi_event_unmap_from_gpu(pdd->dev, p->lead_thread,
start, last, trigger);
r = svm_range_unmap_from_gpu(pdd->dev->adev,
@@ -3205,7 +3205,7 @@ retry_write_locked:
svms, prange->start, prange->last, best_loc,
prange->actual_loc);
- kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr,
+ kfd_smi_event_page_fault_start(node, p->lead_thread, addr,
write_fault, timestamp);
/* Align migration range start and size to granularity size */
@@ -3248,7 +3248,7 @@ retry_write_locked:
r, svms, start, last);
out_migrate_fail:
- kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr,
+ kfd_smi_event_page_fault_end(node, p->lead_thread, addr,
migration);
out_unlock_range:
@@ -4115,6 +4115,7 @@ exit:
list_for_each_entry_safe(criu_svm_md, next, &svms->criu_svm_metadata_list, list) {
pr_debug("freeing criu_svm_md[]\n\tstart: 0x%llx\n",
criu_svm_md->data.start_addr);
+ list_del(&criu_svm_md->list);
kfree(criu_svm_md);
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 97ab1e83b318..d3a8d681227a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -12942,13 +12942,11 @@ static bool amdgpu_dm_crtc_mem_type_changed(struct drm_device *dev,
struct drm_plane_state *new_plane_state, *old_plane_state;
drm_for_each_plane_mask(plane, dev, crtc_state->plane_mask) {
- new_plane_state = drm_atomic_get_plane_state(state, plane);
- old_plane_state = drm_atomic_get_plane_state(state, plane);
+ new_plane_state = drm_atomic_get_new_plane_state(state, plane);
+ old_plane_state = drm_atomic_get_old_plane_state(state, plane);
- if (IS_ERR(new_plane_state) || IS_ERR(old_plane_state)) {
- drm_err(dev, "Failed to get plane state for plane %s\n", plane->name);
- return false;
- }
+ if (!old_plane_state || !new_plane_state)
+ continue;
if (old_plane_state->fb && new_plane_state->fb &&
get_mem_type(old_plane_state->fb) != get_mem_type(new_plane_state->fb))
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index f257ea91a34d..c6f94eb71ffa 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -95,8 +95,11 @@ static u32 edid_extract_panel_id(struct edid *edid)
(u32)EDID_PRODUCT_ID(edid);
}
-static void apply_edid_quirks(struct drm_device *dev, struct edid *edid, struct dc_edid_caps *edid_caps)
+static void apply_edid_quirks(struct dc_link *link, struct edid *edid,
+ struct dc_edid_caps *edid_caps)
{
+ struct amdgpu_dm_connector *aconnector = link->priv;
+ struct drm_device *dev = aconnector->base.dev;
uint32_t panel_id = edid_extract_panel_id(edid);
switch (panel_id) {
@@ -126,6 +129,11 @@ static void apply_edid_quirks(struct drm_device *dev, struct edid *edid, struct
drm_dbg_driver(dev, "Disabling VSC on monitor with panel id %X\n", panel_id);
edid_caps->panel_patch.disable_colorimetry = true;
break;
+ /* Workaround for monitors that get corrupted by the PHY SSC reduction */
+ case drm_edid_encode_panel_id('D', 'E', 'L', 0x4147):
+ drm_dbg_driver(dev, "Skip PHY SSC reduction on panel id %X\n", panel_id);
+ link->wa_flags.skip_phy_ssc_reduction = true;
+ break;
default:
return;
}
@@ -147,7 +155,6 @@ enum dc_edid_status dm_helpers_parse_edid_caps(
{
struct amdgpu_dm_connector *aconnector = link->priv;
struct drm_connector *connector = &aconnector->base;
- struct drm_device *dev = connector->dev;
struct edid *edid_buf = edid ? (struct edid *) edid->raw_edid : NULL;
struct cea_sad *sads;
int sad_count = -1;
@@ -188,7 +195,7 @@ enum dc_edid_status dm_helpers_parse_edid_caps(
edid_caps->frl_dsc_max_frl_rate, edid_caps->frl_dsc_total_chunk_kbytes);
}
- apply_edid_quirks(dev, edid_buf, edid_caps);
+ apply_edid_quirks(link, edid_buf, edid_caps);
sad_count = drm_edid_to_sad((struct edid *) edid->raw_edid, &sads);
if (sad_count <= 0)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index e957657b06c7..c7f8e08feaf4 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1859,6 +1859,7 @@ static const struct drm_plane_funcs dm_plane_funcs = {
.atomic_duplicate_state = amdgpu_dm_plane_drm_plane_duplicate_state,
.atomic_destroy_state = amdgpu_dm_plane_drm_plane_destroy_state,
.format_mod_supported = amdgpu_dm_plane_format_mod_supported,
+ .format_mod_supported_async = amdgpu_dm_plane_format_mod_supported,
#ifdef AMD_PRIVATE_COLOR
.atomic_set_property = dm_atomic_plane_set_property,
.atomic_get_property = dm_atomic_plane_get_property,
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
index 00c4be7c3aa4..ff47af3854b6 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
@@ -158,7 +158,6 @@ void dcn31_update_clocks(struct clk_mgr *clk_mgr_base,
if (new_clocks->zstate_support != DCN_ZSTATE_SUPPORT_DISALLOW &&
new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) {
dcn31_smu_set_zstate_support(clk_mgr, new_clocks->zstate_support);
- dm_helpers_enable_periodic_detection(clk_mgr_base->ctx, true);
clk_mgr_base->clks.zstate_support = new_clocks->zstate_support;
}
@@ -184,7 +183,6 @@ void dcn31_update_clocks(struct clk_mgr *clk_mgr_base,
if (new_clocks->zstate_support == DCN_ZSTATE_SUPPORT_DISALLOW &&
new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) {
dcn31_smu_set_zstate_support(clk_mgr, DCN_ZSTATE_SUPPORT_DISALLOW);
- dm_helpers_enable_periodic_detection(clk_mgr_base->ctx, false);
clk_mgr_base->clks.zstate_support = new_clocks->zstate_support;
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
index dd6f11ecb9c9..24f6304011ae 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
@@ -230,7 +230,6 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
if (new_clocks->zstate_support != DCN_ZSTATE_SUPPORT_DISALLOW &&
new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) {
dcn314_smu_set_zstate_support(clk_mgr, new_clocks->zstate_support);
- dm_helpers_enable_periodic_detection(clk_mgr_base->ctx, true);
clk_mgr_base->clks.zstate_support = new_clocks->zstate_support;
}
@@ -255,7 +254,6 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
if (new_clocks->zstate_support == DCN_ZSTATE_SUPPORT_DISALLOW &&
new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) {
dcn314_smu_set_zstate_support(clk_mgr, DCN_ZSTATE_SUPPORT_DISALLOW);
- dm_helpers_enable_periodic_detection(clk_mgr_base->ctx, false);
clk_mgr_base->clks.zstate_support = new_clocks->zstate_support;
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
index 103013e2a0de..a69824e1eb26 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -419,6 +419,7 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
if (new_clocks->zstate_support != DCN_ZSTATE_SUPPORT_DISALLOW &&
new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) {
dcn35_smu_set_zstate_support(clk_mgr, new_clocks->zstate_support);
+ dm_helpers_enable_periodic_detection(clk_mgr_base->ctx, true);
clk_mgr_base->clks.zstate_support = new_clocks->zstate_support;
}
@@ -438,6 +439,7 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
if (new_clocks->zstate_support == DCN_ZSTATE_SUPPORT_DISALLOW &&
new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) {
dcn35_smu_set_zstate_support(clk_mgr, DCN_ZSTATE_SUPPORT_DISALLOW);
+ dm_helpers_enable_periodic_detection(clk_mgr_base->ctx, false);
clk_mgr_base->clks.zstate_support = new_clocks->zstate_support;
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0_0_ppt.c
index fb1145691410..a214ddbd4c86 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0_0_ppt.c
@@ -227,9 +227,14 @@ static int smu_v15_0_0_system_features_control(struct smu_context *smu, bool en)
struct amdgpu_device *adev = smu->adev;
int ret = 0;
- if (!en && !adev->in_s0ix)
+ if (!en && !adev->in_s0ix) {
ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL);
+ /* SMU resets BIF_FB_EN to zero, re-enable MC access on APUs with SMU V15 */
+ if (!ret && adev->nbio.funcs && adev->nbio.funcs->mc_access_enable)
+ adev->nbio.funcs->mc_access_enable(adev, true);
+ }
+
return ret;
}
diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c
index 0e4f0678c53c..9d0d47c79dd1 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic.c
+++ b/drivers/gpu/drm/i915/display/intel_atomic.c
@@ -288,6 +288,12 @@ static void intel_crtc_put_color_blobs(struct intel_crtc_state *crtc_state)
drm_property_blob_put(crtc_state->pre_csc_lut);
drm_property_blob_put(crtc_state->post_csc_lut);
+
+ crtc_state->hw.degamma_lut = NULL;
+ crtc_state->hw.gamma_lut = NULL;
+ crtc_state->hw.ctm = NULL;
+ crtc_state->pre_csc_lut = NULL;
+ crtc_state->post_csc_lut = NULL;
}
void intel_crtc_free_hw_state(struct intel_crtc_state *crtc_state)
diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index 189ae2d3cfc9..7bc9b956554b 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -1256,9 +1256,22 @@ static void skl_sanitize_cdclk(struct intel_display *display)
cdctl = intel_de_read(display, CDCLK_CTL);
expected = (cdctl & CDCLK_FREQ_SEL_MASK) |
skl_cdclk_decimal(display->cdclk.hw.cdclk);
- if (cdctl == expected)
- /* All well; nothing to sanitize */
- return;
+
+ if (cdctl != expected) {
+ cdctl &= ~CDCLK_FREQ_DECIMAL_MASK;
+ cdctl |= expected & CDCLK_FREQ_DECIMAL_MASK;
+
+ if (cdctl != expected)
+ goto sanitize;
+
+ drm_dbg_kms(display->drm, "Sanitizing CDCLK decimal divider (CDCLK_CTL 0x%x, expected 0x%x)\n",
+ intel_de_read(display, CDCLK_CTL), expected);
+
+ intel_de_write(display, CDCLK_CTL, expected);
+ }
+
+ /* All well; nothing to sanitize */
+ return;
sanitize:
drm_dbg_kms(display->drm, "Sanitizing cdclk programmed by pre-os\n");
@@ -2354,11 +2367,25 @@ static void bxt_sanitize_cdclk(struct intel_display *display)
* (PIPE_NONE).
*/
cdctl &= ~bxt_cdclk_cd2x_pipe(display, INVALID_PIPE);
- expected &= ~bxt_cdclk_cd2x_pipe(display, INVALID_PIPE);
+ cdctl |= bxt_cdclk_cd2x_pipe(display, INVALID_PIPE);
- if (cdctl == expected)
- /* All well; nothing to sanitize */
- return;
+ if (cdctl != expected) {
+ if (DISPLAY_VER(display) < 20) {
+ cdctl &= ~CDCLK_FREQ_DECIMAL_MASK;
+ cdctl |= expected & CDCLK_FREQ_DECIMAL_MASK;
+ }
+
+ if (cdctl != expected)
+ goto sanitize;
+
+ drm_dbg_kms(display->drm, "Sanitizing CDCLK decimal divider (CDCLK_CTL 0x%x, expected 0x%x)\n",
+ intel_de_read(display, CDCLK_CTL), expected);
+
+ intel_de_write(display, CDCLK_CTL, expected);
+ }
+
+ /* All well; nothing to sanitize */
+ return;
sanitize:
drm_dbg_kms(display->drm, "Sanitizing cdclk programmed by pre-os\n");
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 205978c9feb6..6296635c4e79 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -2652,9 +2652,6 @@ static void mtl_ddi_pre_enable_dp(struct intel_atomic_state *state,
/* 3. Select Thunderbolt */
mtl_port_buf_ctl_io_selection(encoder);
- /* 4. Enable Panel Power if PPS is required */
- intel_pps_on(intel_dp);
-
/* 5. Enable the port PLL */
intel_ddi_enable_clock(encoder, crtc_state);
@@ -3708,6 +3705,14 @@ intel_ddi_pre_pll_enable(struct intel_atomic_state *state,
else if (display->platform.geminilake || display->platform.broxton)
bxt_dpio_phy_set_lane_optim_mask(encoder,
crtc_state->lane_lat_optim_mask);
+
+ /*
+ * There is no direct connection between the PLL and PPS, however
+ * enabling PPS before PLL is required to avoid PLL/DDI BUF timeouts
+ * during system resume. Do that matching the Bspec order as well.
+ */
+ if (DISPLAY_VER(display) >= 14)
+ intel_pps_on(&dig_port->dp);
}
static void adlp_tbt_to_dp_alt_switch_wa(struct intel_encoder *encoder)
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index bcdc50491347..0aa3e6b4c781 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -726,6 +726,10 @@ static int mst_stream_compute_config(struct intel_encoder *encoder,
if (ret)
return ret;
+ ret = intel_pfit_compute_config(pipe_config, conn_state);
+ if (ret)
+ return ret;
+
for_each_joiner_candidate(connector, adjusted_mode, num_joined_pipes) {
if (num_joined_pipes > 1)
pipe_config->joiner_pipes = GENMASK(crtc->pipe + num_joined_pipes - 1,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 6ac0f23570f3..aeafe1742d30 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -613,6 +613,7 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base,
return -EINVAL;
}
+ slot = array_index_nospec(slot, set->num_engines);
if (set->engines[slot].type != I915_GEM_ENGINE_TYPE_INVALID) {
drm_dbg(&i915->drm,
"Invalid placement[%d], already occupied\n", slot);
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 3ac1a79b6f13..533215d6e9cb 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -906,6 +906,7 @@ struct radeon_fence *r100_copy_blit(struct radeon_device *rdev,
{
struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
struct radeon_fence *fence;
+ uint64_t cur_src_offset, cur_dst_offset;
uint32_t cur_pages;
uint32_t stride_bytes = RADEON_GPU_PAGE_SIZE;
uint32_t pitch;
@@ -934,6 +935,10 @@ struct radeon_fence *r100_copy_blit(struct radeon_device *rdev,
cur_pages = 8191;
}
num_gpu_pages -= cur_pages;
+ cur_src_offset = src_offset +
+ (uint64_t)num_gpu_pages * RADEON_GPU_PAGE_SIZE;
+ cur_dst_offset = dst_offset +
+ (uint64_t)num_gpu_pages * RADEON_GPU_PAGE_SIZE;
/* pages are in Y direction - height
page width in X direction - width */
@@ -950,13 +955,13 @@ struct radeon_fence *r100_copy_blit(struct radeon_device *rdev,
RADEON_DP_SRC_SOURCE_MEMORY |
RADEON_GMC_CLR_CMP_CNTL_DIS |
RADEON_GMC_WR_MSK_DIS);
- radeon_ring_write(ring, (pitch << 22) | (src_offset >> 10));
- radeon_ring_write(ring, (pitch << 22) | (dst_offset >> 10));
+ radeon_ring_write(ring, (pitch << 22) | (cur_src_offset >> 10));
+ radeon_ring_write(ring, (pitch << 22) | (cur_dst_offset >> 10));
radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
radeon_ring_write(ring, 0);
radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
- radeon_ring_write(ring, num_gpu_pages);
- radeon_ring_write(ring, num_gpu_pages);
+ radeon_ring_write(ring, 0);
+ radeon_ring_write(ring, 0);
radeon_ring_write(ring, cur_pages | (stride_pixels << 16));
}
radeon_ring_write(ring, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0));
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 09661f079d03..8e7b146880f4 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -16,14 +16,14 @@ subdir-ccflags-y += -I$(obj) -I$(src)
hostprogs := xe_gen_wa_oob
generated_oob := $(obj)/generated/xe_wa_oob.c $(obj)/generated/xe_wa_oob.h
quiet_cmd_wa_oob = GEN $(notdir $(generated_oob))
- cmd_wa_oob = mkdir -p $(@D); $^ $(generated_oob)
+ cmd_wa_oob = mkdir -p $(@D); $(obj)/xe_gen_wa_oob $(src)/xe_wa_oob.rules $(generated_oob)
$(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \
$(src)/xe_wa_oob.rules
$(call cmd,wa_oob)
generated_device_oob := $(obj)/generated/xe_device_wa_oob.c $(obj)/generated/xe_device_wa_oob.h
quiet_cmd_device_wa_oob = GEN $(notdir $(generated_device_oob))
- cmd_device_wa_oob = mkdir -p $(@D); $^ $(generated_device_oob)
+ cmd_device_wa_oob = mkdir -p $(@D); $(obj)/xe_gen_wa_oob $(src)/xe_device_wa_oob.rules $(generated_device_oob)
$(obj)/generated/%_device_wa_oob.c $(obj)/generated/%_device_wa_oob.h: $(obj)/xe_gen_wa_oob \
$(src)/xe_device_wa_oob.rules
$(call cmd,device_wa_oob)
diff --git a/drivers/gpu/drm/xe/regs/xe_gtt_defs.h b/drivers/gpu/drm/xe/regs/xe_gtt_defs.h
index 4d83461e538b..d6bc19ef277b 100644
--- a/drivers/gpu/drm/xe/regs/xe_gtt_defs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gtt_defs.h
@@ -9,7 +9,11 @@
#define XELPG_GGTT_PTE_PAT0 BIT_ULL(52)
#define XELPG_GGTT_PTE_PAT1 BIT_ULL(53)
-#define XE_PTE_ADDR_MASK GENMASK_ULL(51, 12)
+/*
+ * Mask for PTE address bits [51:shift].
+ * shift is the lower address boundary of page.
+ */
+#define XE_PAGE_ADDR_MASK(shift) GENMASK_ULL(51, (shift))
#define GGTT_PTE_VFID GENMASK_ULL(11, 2)
#define GUC_GGTT_TOP 0xFEE00000
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index d224861b6f6f..abe25aedeead 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -526,7 +526,8 @@ int xe_device_init_early(struct xe_device *xe)
err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev,
xe->drm.anon_inode->i_mapping,
- xe->drm.vma_offset_manager, 0);
+ xe->drm.vma_offset_manager,
+ TTM_ALLOCATION_POOL_BENEFICIAL_ORDER(get_order(SZ_2M)));
if (err)
return err;
diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c
index 21f7caf9ea08..1a019137ddf4 100644
--- a/drivers/gpu/drm/xe/xe_guc_capture.c
+++ b/drivers/gpu/drm/xe/xe_guc_capture.c
@@ -461,8 +461,14 @@ static void guc_capture_alloc_steered_lists(struct xe_guc *guc)
if (!list || guc->capture->extlists)
return;
- total = bitmap_weight(gt->fuse_topo.g_dss_mask, sizeof(gt->fuse_topo.g_dss_mask) * 8) *
- guc_capture_get_steer_reg_num(guc_to_xe(guc));
+ {
+ xe_dss_mask_t all_dss;
+
+ total = bitmap_weighted_or(all_dss, gt->fuse_topo.g_dss_mask,
+ gt->fuse_topo.c_dss_mask,
+ XE_MAX_DSS_FUSE_BITS) *
+ guc_capture_get_steer_reg_num(guc_to_xe(guc));
+ }
if (!total)
return;
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 2669ff5ee747..18a98667c0e6 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -1602,23 +1602,21 @@ static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level,
return false;
}
-/* page_size = 2^(reclamation_size + XE_PTE_SHIFT) */
-#define COMPUTE_RECLAIM_ADDRESS_MASK(page_size) \
-({ \
- BUILD_BUG_ON(!__builtin_constant_p(page_size)); \
- ilog2(page_size) - XE_PTE_SHIFT; \
-})
-
static int generate_reclaim_entry(struct xe_tile *tile,
struct xe_page_reclaim_list *prl,
u64 pte, struct xe_pt *xe_child)
{
struct xe_gt *gt = tile->primary_gt;
struct xe_guc_page_reclaim_entry *reclaim_entries = prl->entries;
- u64 phys_addr = pte & XE_PTE_ADDR_MASK;
+ bool is_2m = xe_child->level == 1 && (pte & XE_PDE_PS_2M);
+ bool is_64k = xe_child->level == 0 && ((pte & XE_PTE_PS64) || xe_child->is_compact);
+ u32 page_shift = is_2m ? ilog2(SZ_2M) : is_64k ? ilog2(SZ_64K) : ilog2(SZ_4K);
+ /* Physical address bits start at page shift: 2M->[51:21], 64K->[51:16], 4K->[51:12] */
+ u64 phys_addr = pte & XE_PAGE_ADDR_MASK(page_shift);
+ /* Page address is relative to 4K page regardless of entry level */
u64 phys_page = phys_addr >> XE_PTE_SHIFT;
int num_entries = prl->num_entries;
- u32 reclamation_size;
+ u32 reclamation_size = page_shift - XE_PTE_SHIFT;
xe_tile_assert(tile, xe_child->level <= MAX_HUGEPTE_LEVEL);
xe_tile_assert(tile, reclaim_entries);
@@ -1633,18 +1631,12 @@ static int generate_reclaim_entry(struct xe_tile *tile,
* Page size is computed as 2^(reclamation_size + XE_PTE_SHIFT) bytes.
* Only 4K, 64K (level 0), and 2M pages are supported by hardware for page reclaim
*/
- if (xe_child->level == 0 && !(pte & XE_PTE_PS64)) {
- xe_gt_stats_incr(gt, XE_GT_STATS_ID_PRL_4K_ENTRY_COUNT, 1);
- reclamation_size = COMPUTE_RECLAIM_ADDRESS_MASK(SZ_4K); /* reclamation_size = 0 */
- xe_tile_assert(tile, phys_addr % SZ_4K == 0);
- } else if (xe_child->level == 0) {
- xe_gt_stats_incr(gt, XE_GT_STATS_ID_PRL_64K_ENTRY_COUNT, 1);
- reclamation_size = COMPUTE_RECLAIM_ADDRESS_MASK(SZ_64K); /* reclamation_size = 4 */
- xe_tile_assert(tile, phys_addr % SZ_64K == 0);
- } else if (xe_child->level == 1 && pte & XE_PDE_PS_2M) {
+ if (is_2m) {
xe_gt_stats_incr(gt, XE_GT_STATS_ID_PRL_2M_ENTRY_COUNT, 1);
- reclamation_size = COMPUTE_RECLAIM_ADDRESS_MASK(SZ_2M); /* reclamation_size = 9 */
- xe_tile_assert(tile, phys_addr % SZ_2M == 0);
+ } else if (is_64k) {
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_PRL_64K_ENTRY_COUNT, 1);
+ } else if (xe_child->level == 0) {
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_PRL_4K_ENTRY_COUNT, 1);
} else {
xe_page_reclaim_list_abort(tile->primary_gt, prl,
"unsupported PTE level=%u pte=%#llx",
@@ -1665,6 +1657,48 @@ static int generate_reclaim_entry(struct xe_tile *tile,
return 0;
}
+static int add_pte_to_prl(struct xe_tile *tile, struct xe_page_reclaim_list *prl,
+ struct xe_pt *xe_child, u64 pte, u64 addr)
+{
+ /*
+ * In rare scenarios, pte may not be written yet due to racy conditions.
+ * In such cases, invalidate the PRL and fallback to full PPC invalidation.
+ */
+ if (!pte) {
+ xe_page_reclaim_list_abort(tile->primary_gt, prl,
+ "found zero pte at addr=%#llx", addr);
+ return -EINVAL;
+ }
+
+ /* Ensure it is a defined page */
+ xe_tile_assert(tile, xe_child->level == 0 ||
+ (pte & (XE_PDE_PS_2M | XE_PDPE_PS_1G)));
+
+ /* Account for NULL terminated entry on end (-1) */
+ if (prl->num_entries >= XE_PAGE_RECLAIM_MAX_ENTRIES - 1) {
+ xe_page_reclaim_list_abort(tile->primary_gt, prl,
+ "overflow while adding pte=%#llx", pte);
+ return -ENOSPC;
+ }
+
+ return generate_reclaim_entry(tile, prl, pte, xe_child);
+}
+
+static bool add_compact_pt_prl(struct xe_tile *tile, struct xe_page_reclaim_list *prl,
+ struct xe_device *xe, struct xe_pt *compact_pt, u64 addr)
+{
+ struct iosys_map *map = &compact_pt->bo->vmap;
+
+ for (pgoff_t i = 0; i < SZ_2M / SZ_64K && xe_page_reclaim_list_valid(prl); i++) {
+ u64 pte = xe_map_rd(xe, map, i * sizeof(u64), u64);
+
+ if (add_pte_to_prl(tile, prl, compact_pt, pte, addr + i * SZ_64K))
+ break;
+ }
+
+ return xe_page_reclaim_list_valid(prl);
+}
+
static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset,
unsigned int level, u64 addr, u64 next,
struct xe_ptw **child,
@@ -1674,21 +1708,22 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset,
struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
struct xe_pt_stage_unbind_walk *xe_walk =
container_of(walk, typeof(*xe_walk), base);
- struct xe_device *xe = tile_to_xe(xe_walk->tile);
+ struct xe_page_reclaim_list *prl = xe_walk->prl;
+ struct xe_tile *tile = xe_walk->tile;
+ struct xe_device *xe = tile_to_xe(tile);
pgoff_t first = xe_pt_offset(addr, xe_child->level, walk);
bool killed;
XE_WARN_ON(!*child);
XE_WARN_ON(!level);
/* Check for leaf node */
- if (xe_walk->prl && xe_page_reclaim_list_valid(xe_walk->prl) &&
+ if (prl && xe_page_reclaim_list_valid(prl) &&
xe_child->level <= MAX_HUGEPTE_LEVEL) {
struct iosys_map *leaf_map = &xe_child->bo->vmap;
pgoff_t count = xe_pt_num_entries(addr, next, xe_child->level, walk);
for (pgoff_t i = 0; i < count; i++) {
u64 pte;
- int ret;
/*
* If not a leaf pt, skip unless non-leaf pt is interleaved between
@@ -1698,10 +1733,23 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset,
u64 pt_size = 1ULL << walk->shifts[xe_child->level];
bool edge_pt = (i == 0 && !IS_ALIGNED(addr, pt_size)) ||
(i == count - 1 && !IS_ALIGNED(next, pt_size));
-
- if (!edge_pt) {
- xe_page_reclaim_list_abort(xe_walk->tile->primary_gt,
- xe_walk->prl,
+ struct xe_pt *child_pt =
+ container_of(xe_child->base.children[first + i],
+ struct xe_pt, base);
+
+ /* Compact PTs always fill a full 2M-aligned slot, never an edge. */
+ XE_WARN_ON(child_pt->is_compact && edge_pt);
+ if (edge_pt)
+ continue;
+
+ /* Walker never descends into compact PTs, descend now */
+ if (child_pt->is_compact) {
+ if (!add_compact_pt_prl(tile, prl, xe, child_pt,
+ addr + (u64)i * pt_size))
+ break;
+ } else {
+ xe_page_reclaim_list_abort(tile->primary_gt,
+ prl,
"PT is skipped by walk at level=%u offset=%lu",
xe_child->level, first + i);
break;
@@ -1711,37 +1759,12 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset,
pte = xe_map_rd(xe, leaf_map, (first + i) * sizeof(u64), u64);
- /*
- * In rare scenarios, pte may not be written yet due to racy conditions.
- * In such cases, invalidate the PRL and fallback to full PPC invalidation.
- */
- if (!pte) {
- xe_page_reclaim_list_abort(xe_walk->tile->primary_gt, xe_walk->prl,
- "found zero pte at addr=%#llx", addr);
+ if (add_pte_to_prl(tile, prl, xe_child, pte, addr))
break;
- }
-
- /* Ensure it is a defined page */
- xe_tile_assert(xe_walk->tile, xe_child->level == 0 ||
- (pte & (XE_PDE_PS_2M | XE_PDPE_PS_1G)));
/* An entry should be added for 64KB but contigious 4K have XE_PTE_PS64 */
if (pte & XE_PTE_PS64)
i += 15; /* Skip other 15 consecutive 4K pages in the 64K page */
-
- /* Account for NULL terminated entry on end (-1) */
- if (xe_walk->prl->num_entries < XE_PAGE_RECLAIM_MAX_ENTRIES - 1) {
- ret = generate_reclaim_entry(xe_walk->tile, xe_walk->prl,
- pte, xe_child);
- if (ret)
- break;
- } else {
- /* overflow, mark as invalid */
- xe_page_reclaim_list_abort(xe_walk->tile->primary_gt, xe_walk->prl,
- "overflow while adding pte=%#llx",
- pte);
- break;
- }
}
}
@@ -1751,7 +1774,7 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset,
* Verify if any PTE are potentially dropped at non-leaf levels, either from being
* killed or the page walk covers the region.
*/
- if (xe_walk->prl && xe_page_reclaim_list_valid(xe_walk->prl) &&
+ if (prl && xe_page_reclaim_list_valid(prl) &&
xe_child->level > MAX_HUGEPTE_LEVEL && xe_child->num_live) {
bool covered = xe_pt_covers(addr, next, xe_child->level, &xe_walk->base);
@@ -1760,7 +1783,7 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset,
* we need to invalidate the PRL.
*/
if (killed || covered)
- xe_page_reclaim_list_abort(xe_walk->tile->primary_gt, xe_walk->prl,
+ xe_page_reclaim_list_abort(tile->primary_gt, prl,
"kill at level=%u addr=%#llx next=%#llx num_live=%u",
level, addr, next, xe_child->num_live);
}