diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-06-27 02:41:30 +0300 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-06-27 02:41:30 +0300 |
| commit | fa6fe449343c3d97ed93fd01b020860c663f8807 (patch) | |
| tree | af95b1cf04e9b9185667f76cb661d51b48f48340 | |
| parent | 5422e496b313b9b0b2f6df068902d6c79925d5e9 (diff) | |
| parent | b41df707b6d7b7ae6188c6fc37ba81859293cb94 (diff) | |
| download | linux-fa6fe449343c3d97ed93fd01b020860c663f8807.tar.xz | |
Merge tag 'drm-next-2026-06-27' of https://gitlab.freedesktop.org/drm/kernel
Pull drm merge window fixes from Dave Airlie:
"This is the merge window fixes from our next tree, i915/xe and amdgpu
make up all of it.
I've got a separate fixes pull from our fixes branch arriving after
this.
i915:
- Fix corrupted display output on GLK, #16209
- Add missing Spectre mitigation for parallel submit IOCTL
- MTL+ fix for DP resume
- clear CRTC blobs after dropping refs
- fix sharpness filter on DP MST
xe:
- Set TTM beneficial order to 9 in Xe
- Several error path cleanups
- Fix TDR for unstarted jobs on kernel queues
- Several TLB invalidation fixes related to suspending LR queues
- Some small RAS fixes
- Multi-queue suspend fix for LR queues
- Revert inclusion of NVL_S firmware
amdgpu:
- devcoredump fixes
- SMU15 fix
- Various irq put/get imbalance cleanup fixes
- 8K panel fix
- DCN3.5 fix
- lockdep fix
- Cleaner shader sysfs IB overflow fix
- Async flip fixes
- GET_MAPPING_INFO fix
- CP_GFX_SHADOW fix
- Ctx pstate handling fix
- GTT bo move handling fixes
- Old UVD BO placement fixes
- GC9 mode2 reset fix
- IH6.1 version fix
- Soft IH ring fix
amdkfd:
- Fix doorbell/mmio double unpin on free
- CRIU fixes
- SMI event fixes
- Sysfs teardown fix
- Various boundary checking fixes
- Various error checking fixes
- SVM fix"
* tag 'drm-next-2026-06-27' of https://gitlab.freedesktop.org/drm/kernel: (52 commits)
drm/i915/cdclk: Fix up CDCLK_FREQ_DECIMAL without a full PLL re-enable
drm/i915/gem: Add missing nospec on parallel submit slot
drm/amdgpu: Use system unbound workqueue for soft IH ring
amdgpu/ih6.1: Fix minor version
drm/amdkfd: Use exclusive bounds for SVM split alignment checks
drm/amdgpu/gfx9: Fix Ring and IB test fail after mode2
drm/amdgpu/uvd: Fix forcing MSG, FB BOs into VCPU segment when it isn't at 0 (v2)
drm/amdgpu/uvd: Place VCPU BO only in VRAM for UVD 4.x and older
drm/amdgpu: Fix amdgpu_bo_move() when old_mem and new_mem are both GTT
drm/amdgpu: Respect placement requirements in amdgpu_gtt_mgr functions
drm/amdgpu: Fix context pstate override handling
drm/amdkfd: Use memdup_array_user to copy data from/to user space at kfd ioctls
drm/amdkfd: check find_first_zero_bit before __set_bit on kfd->doorbell_bitmap
drm/amdkfd: Let driver decide buffer size at AMDKFD_IOC_GET_DMABUF_INFO ioctl
drm/amdgpu: fix recursive ww_mutex acquire in amdgpu_devcoredump_format
drm/amdgpu: convert amdgpu_vm_lock_by_pasid() to drm_exec
drm/amdgpu: Don't use UTS_RELEASE directly
drm/amdkfd: Fix NULL deref during sysfs teardown
drm/amdgpu: validate CP_GFX_SHADOW chunk size in CS pass1
drm/amdgpu: check amdgpu_vm_bo_find() result in GET_MAPPING_INFO
...
44 files changed, 766 insertions, 457 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 9783a3cefb04..da325863ad76 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -558,7 +558,7 @@ uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev) int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd, struct amdgpu_device **dmabuf_adev, - uint64_t *bo_size, void *metadata_buffer, + uint64_t *bo_size, void **metadata_buffer, size_t buffer_size, uint32_t *metadata_size, uint32_t *flags, int8_t *xcp_id) { @@ -593,9 +593,24 @@ int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd, *dmabuf_adev = adev; if (bo_size) *bo_size = amdgpu_bo_size(bo); - if (metadata_buffer) - r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size, - metadata_size, &metadata_flags); + if (metadata_buffer) { + /* first get metadata_size by buffer = NULL */ + r = amdgpu_bo_get_metadata(bo, NULL, 0, + metadata_size, NULL); + + /* user buf_size is bigger than bo metadata_size + * allocate a buf at kernel space and copy */ + if (*metadata_size <= buffer_size) { + *metadata_buffer = kzalloc(*metadata_size, GFP_KERNEL); + + if (!*metadata_buffer) + return -ENOMEM; + + r = amdgpu_bo_get_metadata(bo, *metadata_buffer, *metadata_size, + NULL, &metadata_flags); + } else + r = -EINVAL; + } if (flags) { *flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? KFD_IOC_ALLOC_MEM_FLAGS_VRAM diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 5333e052d56d..e443a7277299 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -262,7 +262,7 @@ uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev); uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev); int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd, struct amdgpu_device **dmabuf_adev, - uint64_t *bo_size, void *metadata_buffer, + uint64_t *bo_size, void **metadata_buffer, size_t buffer_size, uint32_t *metadata_size, uint32_t *flags, int8_t *xcp_id); int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index d54794e5b18b..35fe2c974699 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1914,13 +1914,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( mutex_lock(&mem->lock); - /* Unpin MMIO/DOORBELL BO's that were pinned during allocation */ - if (mem->alloc_flags & - (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | - KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { - amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo); - } - mapped_to_gpu_memory = mem->mapped_to_gpu_memory; is_imported = mem->is_imported; mutex_unlock(&mem->lock); @@ -1934,6 +1927,15 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( return -EBUSY; } + /* At this point the BO is guaranteed to be freed, so unpin the + * MMIO/DOORBELL BOs that were pinned during allocation. + */ + if (mem->alloc_flags & + (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | + KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { + amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo); + } + /* Make sure restore workers don't access the BO any more */ mutex_lock(&process_info->lock); if (!list_empty(&mem->validate_list)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 115b134b4cd1..c2e6495a28bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -247,13 +247,17 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, goto free_partial_kdata; break; + case AMDGPU_CHUNK_ID_CP_GFX_SHADOW: + if (size < sizeof(struct drm_amdgpu_cs_chunk_cp_gfx_shadow)) + goto free_partial_kdata; + break; + case AMDGPU_CHUNK_ID_DEPENDENCIES: case AMDGPU_CHUNK_ID_SYNCOBJ_IN: case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: - case AMDGPU_CHUNK_ID_CP_GFX_SHADOW: break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 0d7f6cd74f79..ce35b415093d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -326,7 +326,6 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, struct drm_file *filp, struct amdgpu_ctx *ctx) { struct amdgpu_fpriv *fpriv = filp->driver_priv; - u32 current_stable_pstate; int r; r = amdgpu_ctx_priority_permit(filp, priority); @@ -344,36 +343,21 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, ctx->generation = amdgpu_vm_generation(mgr->adev, &fpriv->vm); ctx->init_priority = priority; ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET; - - r = amdgpu_ctx_get_stable_pstate(ctx, ¤t_stable_pstate); - if (r) - return r; - - if (mgr->adev->pm.stable_pstate_ctx) - ctx->stable_pstate = mgr->adev->pm.stable_pstate_ctx->stable_pstate; - else - ctx->stable_pstate = current_stable_pstate; + ctx->stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE; return 0; } -static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, - u32 stable_pstate) +static int __amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, + u32 stable_pstate) { struct amdgpu_device *adev = ctx->mgr->adev; enum amd_dpm_forced_level level; + struct amdgpu_ctx *current_ctx; u32 current_stable_pstate; - int r; + int r = 0; - mutex_lock(&adev->pm.stable_pstate_ctx_lock); - if (adev->pm.stable_pstate_ctx && adev->pm.stable_pstate_ctx != ctx) { - r = -EBUSY; - goto done; - } - - r = amdgpu_ctx_get_stable_pstate(ctx, ¤t_stable_pstate); - if (r || (stable_pstate == current_stable_pstate)) - goto done; + lockdep_assert_held(&adev->pm.stable_pstate_ctx_lock); switch (stable_pstate) { case AMDGPU_CTX_STABLE_PSTATE_NONE: @@ -392,17 +376,41 @@ static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK; break; default: - r = -EINVAL; - goto done; + return -EINVAL; } + current_ctx = adev->pm.stable_pstate_ctx; + if (current_ctx && current_ctx != ctx) + return -EBUSY; + + r = amdgpu_ctx_get_stable_pstate(ctx, ¤t_stable_pstate); + if (r || current_stable_pstate == stable_pstate) + return r; + r = amdgpu_dpm_force_performance_level(adev, level); + if (r) + return r; - if (level == AMD_DPM_FORCED_LEVEL_AUTO) - adev->pm.stable_pstate_ctx = NULL; - else + if (!current_ctx) { adev->pm.stable_pstate_ctx = ctx; -done: + /* + * Serialized by context taking ownership for the first time + * while holding adev->pm.stable_pstate_ctx_lock). + */ + WRITE_ONCE(ctx->stable_pstate, current_stable_pstate); + } + + return 0; +} + +static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, + u32 stable_pstate) +{ + struct amdgpu_device *adev = ctx->mgr->adev; + int r; + + mutex_lock(&adev->pm.stable_pstate_ctx_lock); + r = __amdgpu_ctx_set_stable_pstate(ctx, stable_pstate); mutex_unlock(&adev->pm.stable_pstate_ctx_lock); return r; @@ -428,7 +436,12 @@ static void amdgpu_ctx_fini(struct kref *ref) } if (drm_dev_enter(adev_to_drm(adev), &idx)) { - amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate); + mutex_lock(&adev->pm.stable_pstate_ctx_lock); + if (adev->pm.stable_pstate_ctx == ctx) { + __amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate); + adev->pm.stable_pstate_ctx = NULL; + } + mutex_unlock(&adev->pm.stable_pstate_ctx_lock); drm_dev_exit(idx); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c index 27830518a230..e77db76b48b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c @@ -22,8 +22,9 @@ * */ -#include <generated/utsrelease.h> #include <linux/devcoredump.h> +#include <linux/utsname.h> +#include <drm/drm_exec.h> #include "amdgpu_dev_coredump.h" #include "atom.h" @@ -207,28 +208,143 @@ static void amdgpu_devcoredump_fw_info(struct amdgpu_device *adev, } } +static void +amdgpu_devcoredump_print_ibs(struct drm_printer *p, + struct amdgpu_coredump_info *coredump, + bool sizing_pass) +{ + struct amdgpu_device *adev = coredump->adev; + struct amdgpu_bo_va_mapping *mapping; + struct amdgpu_bo *abo; + struct drm_exec exec; + struct amdgpu_vm *vm; + u32 *ib_content; + u64 va_start, offset; + u8 *kptr; + u32 off; + int r; + + /* + * On the sizing pass there is no VM to look up and no BO to lock; the + * size estimate doesn't depend on whether the IB BOs are reachable. + * Just emit the per-IB headers (the content is not written anywhere). + */ + if (sizing_pass) { + for (int i = 0; i < coredump->num_ibs; i++) { + drm_printf(p, "\nIB #%d 0x%llx %d dw\n", i, + coredump->ibs[i].gpu_addr, + coredump->ibs[i].ib_size_dw); + } + return; + } + + /* + * Lock the VM root PD and every IB BO together in a single drm_exec + * ticket. Reserving the IB BOs one by one while the root PD is held + * would be a recursive reservation_ww_class_mutex acquire without a + * ww_acquire_ctx, which trips lockdep and self-deadlocks for IB BOs + * that share their dma_resv with the root PD (always-valid BOs). + */ + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 1 + coredump->num_ibs); + drm_exec_until_all_locked(&exec) { + vm = amdgpu_vm_lock_by_pasid(adev, coredump->pasid, &exec); + if (!vm) + goto unlock; + + for (int i = 0; i < coredump->num_ibs; i++) { + u64 pfn = (coredump->ibs[i].gpu_addr & + AMDGPU_GMC_HOLE_MASK) / AMDGPU_GPU_PAGE_SIZE; + + mapping = amdgpu_vm_bo_lookup_mapping(vm, pfn); + if (!mapping) + continue; + + abo = mapping->bo_va->base.bo; + r = drm_exec_lock_obj(&exec, &abo->tbo.base); + drm_exec_retry_on_contention(&exec); + if (r) + goto unlock; + } + } + + for (int i = 0; i < coredump->num_ibs; i++) { + bool emit_content = false; + + ib_content = kvmalloc_array(coredump->ibs[i].ib_size_dw, 4, + GFP_KERNEL); + if (!ib_content) + continue; + + va_start = coredump->ibs[i].gpu_addr & AMDGPU_GMC_HOLE_MASK; + mapping = amdgpu_vm_bo_lookup_mapping(vm, + va_start / AMDGPU_GPU_PAGE_SIZE); + if (!mapping) + goto output_ib_content; + + abo = mapping->bo_va->base.bo; + offset = va_start - mapping->start * AMDGPU_GPU_PAGE_SIZE; + + if (abo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) { + struct amdgpu_res_cursor cursor; + + off = 0; + + if (abo->tbo.resource->mem_type != TTM_PL_VRAM) + goto output_ib_content; + + amdgpu_res_first(abo->tbo.resource, offset, + coredump->ibs[i].ib_size_dw * 4, &cursor); + while (cursor.remaining) { + amdgpu_device_mm_access(adev, cursor.start / 4, + &ib_content[off], cursor.size / 4, + false); + off += cursor.size; + amdgpu_res_next(&cursor, cursor.size); + } + emit_content = true; + } else { + r = ttm_bo_kmap(&abo->tbo, 0, PFN_UP(abo->tbo.base.size), + &abo->kmap); + if (r) + goto output_ib_content; + + kptr = amdgpu_bo_kptr(abo); + kptr += offset; + memcpy(ib_content, kptr, coredump->ibs[i].ib_size_dw * 4); + + amdgpu_bo_kunmap(abo); + emit_content = true; + } + +output_ib_content: + drm_printf(p, "\nIB #%d 0x%llx %d dw\n", i, + coredump->ibs[i].gpu_addr, coredump->ibs[i].ib_size_dw); + if (emit_content) { + for (int j = 0; j < coredump->ibs[i].ib_size_dw; j++) + drm_printf(p, "0x%08x\n", ib_content[j]); + } + kvfree(ib_content); + } + +unlock: + drm_exec_fini(&exec); +} + static ssize_t amdgpu_devcoredump_format(char *buffer, size_t count, struct amdgpu_coredump_info *coredump) { - struct amdgpu_device *adev = coredump->adev; struct drm_printer p; struct drm_print_iterator iter; struct amdgpu_vm_fault_info *fault_info; - struct amdgpu_bo_va_mapping *mapping; struct amdgpu_ip_block *ip_block; - struct amdgpu_res_cursor cursor; - struct amdgpu_bo *abo, *root; - uint64_t va_start, offset; struct amdgpu_ring *ring; - struct amdgpu_vm *vm; - u32 *ib_content; - uint8_t *kptr; - int ver, i, j, r; + int ver, i, j; u32 ring_idx, off; bool sizing_pass; sizing_pass = buffer == NULL; iter.data = buffer; + iter.start = 0; iter.offset = 0; iter.remain = count; @@ -236,7 +352,7 @@ amdgpu_devcoredump_format(char *buffer, size_t count, struct amdgpu_coredump_inf drm_printf(&p, "**** AMDGPU Device Coredump ****\n"); drm_printf(&p, "version: " AMDGPU_COREDUMP_VERSION "\n"); - drm_printf(&p, "kernel: " UTS_RELEASE "\n"); + drm_printf(&p, "kernel: %s\n", init_utsname()->release); drm_printf(&p, "module: " KBUILD_MODNAME "\n"); drm_printf(&p, "time: %ptSp\n", &coredump->reset_time); @@ -342,86 +458,8 @@ amdgpu_devcoredump_format(char *buffer, size_t count, struct amdgpu_coredump_inf else if (coredump->reset_vram_lost) drm_printf(&p, "VRAM is lost due to GPU reset!\n"); - if (coredump->num_ibs) { - /* Don't try to lookup the VM or map the BOs when calculating the - * size required to store the devcoredump. - */ - if (sizing_pass) - vm = NULL; - else - vm = amdgpu_vm_lock_by_pasid(adev, &root, coredump->pasid); - - for (int i = 0; i < coredump->num_ibs && (sizing_pass || vm); i++) { - ib_content = kvmalloc_array(coredump->ibs[i].ib_size_dw, 4, - GFP_KERNEL); - if (!ib_content) - continue; - - /* vm=NULL can only happen when 'sizing_pass' is true. Skip to the - * drm_printf() calls (ib_content doesn't need to be initialized - * as its content won't be written anywhere). - */ - if (!vm) - goto output_ib_content; - - va_start = coredump->ibs[i].gpu_addr & AMDGPU_GMC_HOLE_MASK; - mapping = amdgpu_vm_bo_lookup_mapping(vm, va_start / AMDGPU_GPU_PAGE_SIZE); - if (!mapping) - goto free_ib_content; - - offset = va_start - (mapping->start * AMDGPU_GPU_PAGE_SIZE); - abo = amdgpu_bo_ref(mapping->bo_va->base.bo); - r = amdgpu_bo_reserve(abo, false); - if (r) - goto free_ib_content; - - if (abo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) { - off = 0; - - if (abo->tbo.resource->mem_type != TTM_PL_VRAM) - goto unreserve_abo; - - amdgpu_res_first(abo->tbo.resource, offset, - coredump->ibs[i].ib_size_dw * 4, - &cursor); - while (cursor.remaining) { - amdgpu_device_mm_access(adev, cursor.start / 4, - &ib_content[off], cursor.size / 4, - false); - off += cursor.size; - amdgpu_res_next(&cursor, cursor.size); - } - } else { - r = ttm_bo_kmap(&abo->tbo, 0, - PFN_UP(abo->tbo.base.size), - &abo->kmap); - if (r) - goto unreserve_abo; - - kptr = amdgpu_bo_kptr(abo); - kptr += offset; - memcpy(ib_content, kptr, - coredump->ibs[i].ib_size_dw * 4); - - amdgpu_bo_kunmap(abo); - } - -output_ib_content: - drm_printf(&p, "\nIB #%d 0x%llx %d dw\n", - i, coredump->ibs[i].gpu_addr, coredump->ibs[i].ib_size_dw); - for (int j = 0; j < coredump->ibs[i].ib_size_dw; j++) - drm_printf(&p, "0x%08x\n", ib_content[j]); -unreserve_abo: - if (vm) - amdgpu_bo_unreserve(abo); -free_ib_content: - kvfree(ib_content); - } - if (vm) { - amdgpu_bo_unreserve(root); - amdgpu_bo_unref(&root); - } - } + if (coredump->num_ibs) + amdgpu_devcoredump_print_ibs(&p, coredump, sizing_pass); return count - iter.remain; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 942f0251c748..211d30f03d25 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3043,7 +3043,7 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry); for (i = adev->num_ip_blocks - 1; i >= 0; i--) { - if (!adev->ip_blocks[i].status.valid) + if (!adev->ip_blocks[i].status.valid || !adev->ip_blocks[i].status.hw) continue; /* displays are handled in phase1 */ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) @@ -3771,6 +3771,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->gfx.workload_profile_mutex); mutex_init(&adev->vcn.workload_profile_mutex); + spin_lock_init(&adev->irq.lock); + amdgpu_device_init_apu_flags(adev); r = amdgpu_device_check_arguments(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 212c14d99f6b..76da3f932f24 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -1094,6 +1094,11 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, * If that number is larger than the size of the array, the ioctl must * be retried. */ + if (!bo_va) { + r = -ENOENT; + goto out_exec; + } + if (args->num_entries > INT_MAX / sizeof(*vm_entries)) { r = -EINVAL; goto out_exec; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 1e190fb54a97..85372af1216d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1664,12 +1664,13 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; struct drm_gpu_scheduler *sched = &ring->sched; struct drm_sched_entity entity; + unsigned int ib_size_dw = 16; static atomic_t counter; struct dma_fence *f; struct amdgpu_job *job; struct amdgpu_ib *ib; void *owner; - int i, r; + int r; /* Initialize the scheduler entity */ r = drm_sched_entity_init(&entity, DRM_SCHED_PRIORITY_NORMAL, @@ -1687,7 +1688,7 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) owner = (void *)(unsigned long)atomic_inc_return(&counter); r = amdgpu_job_alloc_with_ib(ring->adev, &entity, owner, - 64, 0, &job, + ib_size_dw * sizeof(uint32_t), 0, &job, AMDGPU_KERNEL_JOB_ID_CLEANER_SHADER); if (r) goto err; @@ -1697,9 +1698,8 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) job->run_cleaner_shader = true; ib = &job->ibs[0]; - for (i = 0; i <= ring->funcs->align_mask; ++i) - ib->ptr[i] = ring->funcs->nop; - ib->length_dw = ring->funcs->align_mask + 1; + memset32(ib->ptr, ring->funcs->nop, ib_size_dw); + ib->length_dw = ib_size_dw; f = amdgpu_job_submit(job); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index d23a91d029aa..0ea32561c4bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -272,7 +272,20 @@ static bool amdgpu_gtt_mgr_intersects(struct ttm_resource_manager *man, const struct ttm_place *place, size_t size) { - return !place->lpfn || amdgpu_gtt_mgr_has_gart_addr(res); + const struct drm_mm_node *const node = &to_ttm_range_mgr_node(res)->mm_nodes[0]; + const u32 num_pages = PFN_UP(size); + + if (!place->lpfn) + return true; + + if (!amdgpu_gtt_mgr_has_gart_addr(res)) + return false; + + if (place->fpfn >= (node->start + num_pages) || + (place->lpfn && place->lpfn <= node->start)) + return false; + + return true; } /** @@ -290,7 +303,20 @@ static bool amdgpu_gtt_mgr_compatible(struct ttm_resource_manager *man, const struct ttm_place *place, size_t size) { - return !place->lpfn || amdgpu_gtt_mgr_has_gart_addr(res); + const struct drm_mm_node *const node = &to_ttm_range_mgr_node(res)->mm_nodes[0]; + const u32 num_pages = PFN_UP(size); + + if (!place->lpfn) + return true; + + if (!amdgpu_gtt_mgr_has_gart_addr(res)) + return false; + + if (node->start < place->fpfn || + (place->lpfn && (node->start + num_pages) > place->lpfn)) + return false; + + return true; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 254a4e983f40..53be764968e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -309,8 +309,6 @@ int amdgpu_irq_init(struct amdgpu_device *adev) unsigned int irq, flags; int r; - spin_lock_init(&adev->irq.lock); - /* Enable MSI if not disabled by module parameter */ adev->irq.msi_enabled = false; @@ -547,7 +545,7 @@ void amdgpu_irq_delegate(struct amdgpu_device *adev, unsigned int num_dw) { amdgpu_ih_ring_write(adev, &adev->irq.ih_soft, entry->iv_entry, num_dw); - schedule_work(&adev->irq.ih_soft_work); + queue_work(system_unbound_wq, &adev->irq.ih_soft_work); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_lockdep.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_lockdep.c index d5d71fd7c70d..61450af539a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_lockdep.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_lockdep.c @@ -16,6 +16,17 @@ #ifdef CONFIG_LOCKDEP +struct amdgpu_lockdep_dummy_locks { + struct mutex reset_lock; + struct mutex userq_sch_mutex; + struct mutex userq_mutex; + struct mutex notifier_lock; + struct mutex vram_lock; + struct mutex srbm_mutex; + struct mutex grbm_idx_mutex; + spinlock_t mmio_idx_lock; +}; + /* Lock class keys for associating with real driver locks */ static struct lock_class_key amdgpu_userq_sch_mutex_key; static struct lock_class_key amdgpu_userq_mutex_key; @@ -84,72 +95,65 @@ void amdgpu_lockdep_set_class(struct amdgpu_device *adev) int amdgpu_lockdep_init(void) { struct amdgpu_reset_domain *reset_domain = NULL; - struct amdgpu_reset_control reset_ctl; - struct mutex userq_sch_mutex; - struct mutex userq_mutex; - struct mutex notifier_lock; - struct mutex vram_lock; - struct mutex srbm_mutex; - struct mutex grbm_idx_mutex; - spinlock_t mmio_idx_lock; + struct amdgpu_lockdep_dummy_locks *locks; unsigned long flags; + locks = kzalloc(sizeof(*locks), GFP_KERNEL); + if (!locks) + return -ENOMEM; + /* * Initialize dummy reset domain */ reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "lockdep_test"); - if (!reset_domain) + if (!reset_domain) { + kfree(locks); return -ENOMEM; - + } /* Initialize dummy locks */ - mutex_init(&userq_sch_mutex); - mutex_init(&userq_mutex); - mutex_init(¬ifier_lock); - mutex_init(&vram_lock); - mutex_init(&reset_ctl.reset_lock); - mutex_init(&srbm_mutex); - mutex_init(&grbm_idx_mutex); - spin_lock_init(&mmio_idx_lock); + mutex_init(&locks->userq_sch_mutex); + mutex_init(&locks->userq_mutex); + mutex_init(&locks->notifier_lock); + mutex_init(&locks->vram_lock); + mutex_init(&locks->reset_lock); + mutex_init(&locks->srbm_mutex); + mutex_init(&locks->grbm_idx_mutex); + spin_lock_init(&locks->mmio_idx_lock); /* * Associate dummy locks with the same class keys used for real * driver locks. This ensures lockdep connects the ordering learned * here with the actual locks used at runtime. */ - lockdep_set_class(&userq_sch_mutex, &amdgpu_userq_sch_mutex_key); - lockdep_set_class(&userq_mutex, &amdgpu_userq_mutex_key); - lockdep_set_class(¬ifier_lock, &amdgpu_notifier_lock_key); - lockdep_set_class(&vram_lock, &amdgpu_vram_lock_key); + lockdep_set_class(&locks->userq_sch_mutex, &amdgpu_userq_sch_mutex_key); + lockdep_set_class(&locks->userq_mutex, &amdgpu_userq_mutex_key); + lockdep_set_class(&locks->notifier_lock, &amdgpu_notifier_lock_key); + lockdep_set_class(&locks->vram_lock, &amdgpu_vram_lock_key); lockdep_set_class(&reset_domain->sem, &amdgpu_reset_sem_key); - lockdep_set_class(&reset_ctl.reset_lock, &amdgpu_reset_lock_key); - lockdep_set_class(&srbm_mutex, &amdgpu_srbm_lock_key); - lockdep_set_class(&grbm_idx_mutex, &amdgpu_grbm_lock_key); - lockdep_set_class(&mmio_idx_lock, &amdgpu_mmio_lock_key); - + lockdep_set_class(&locks->reset_lock, &amdgpu_reset_lock_key); + lockdep_set_class(&locks->srbm_mutex, &amdgpu_srbm_lock_key); + lockdep_set_class(&locks->grbm_idx_mutex, &amdgpu_grbm_lock_key); + lockdep_set_class(&locks->mmio_idx_lock, &amdgpu_mmio_lock_key); /* * Take locks in the correct order to train lockdep. * This establishes the dependency chain. */ /* Level 1: Global userq scheduler mutex (outermost) */ - mutex_lock(&userq_sch_mutex); + mutex_lock(&locks->userq_sch_mutex); /* Level 2: Per-context userq mutex */ - mutex_lock(&userq_mutex); - + mutex_lock(&locks->userq_mutex); /* Level 3: MMU notifier lock */ - mutex_lock(¬ifier_lock); - + mutex_lock(&locks->notifier_lock); /* Level 4: VRAM allocator lock */ - mutex_lock(&vram_lock); - + mutex_lock(&locks->vram_lock); /* Level 5: Reset domain semaphore */ down_read(&reset_domain->sem); /* Level 6: Reset control lock */ - mutex_lock(&reset_ctl.reset_lock); - + mutex_lock(&locks->reset_lock); /* * Mark potential memory reclaim boundary. * GPU operations might trigger memory allocation/reclaim. @@ -157,36 +161,35 @@ int amdgpu_lockdep_init(void) fs_reclaim_acquire(GFP_KERNEL); /* Level 7: SRBM register access */ - mutex_lock(&srbm_mutex); - + mutex_lock(&locks->srbm_mutex); /* Level 8: GRBM index access */ - mutex_lock(&grbm_idx_mutex); + mutex_lock(&locks->grbm_idx_mutex); /* Level 9: MMIO index access (innermost lock, spinlock) */ - spin_lock_irqsave(&mmio_idx_lock, flags); - + spin_lock_irqsave(&locks->mmio_idx_lock, flags); /* * All locks acquired in order. * Lockdep has now learned the valid dependency chain. */ /* Release in reverse order */ - spin_unlock_irqrestore(&mmio_idx_lock, flags); - mutex_unlock(&grbm_idx_mutex); - mutex_unlock(&srbm_mutex); - + spin_unlock_irqrestore(&locks->mmio_idx_lock, flags); + mutex_unlock(&locks->grbm_idx_mutex); + mutex_unlock(&locks->srbm_mutex); fs_reclaim_release(GFP_KERNEL); - mutex_unlock(&reset_ctl.reset_lock); + mutex_unlock(&locks->reset_lock); up_read(&reset_domain->sem); - mutex_unlock(&vram_lock); - mutex_unlock(¬ifier_lock); - mutex_unlock(&userq_mutex); - mutex_unlock(&userq_sch_mutex); + + mutex_unlock(&locks->vram_lock); + mutex_unlock(&locks->notifier_lock); + mutex_unlock(&locks->userq_mutex); + mutex_unlock(&locks->userq_sch_mutex); /* Cleanup */ amdgpu_reset_put_reset_domain(reset_domain); + kfree(locks); pr_info("AMDGPU: Lockdep annotations initialized (9 lock levels)\n"); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 2740de94e93c..16c060badaee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -515,6 +515,15 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, if (new_mem->mem_type == TTM_PL_TT || new_mem->mem_type == AMDGPU_PL_PREEMPT) { + if (old_mem && (old_mem->mem_type == TTM_PL_TT || + old_mem->mem_type == AMDGPU_PL_PREEMPT)) { + r = ttm_bo_wait_ctx(bo, ctx); + if (r) + return r; + + amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm); + } + r = amdgpu_ttm_backend_bind(bo->bdev, bo->ttm, new_mem); if (r) return r; @@ -549,6 +558,15 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, ttm_bo_assign_mem(bo, new_mem); return 0; } + if ((old_mem->mem_type == TTM_PL_TT || + old_mem->mem_type == AMDGPU_PL_PREEMPT) && + (new_mem->mem_type == TTM_PL_TT || + new_mem->mem_type == AMDGPU_PL_PREEMPT)) { + amdgpu_bo_move_notify(bo, evict, new_mem); + ttm_resource_free(bo, &bo->resource); + ttm_bo_assign_mem(bo, new_mem); + return 0; + } if (old_mem->mem_type == AMDGPU_PL_GDS || old_mem->mem_type == AMDGPU_PL_GWS || diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 3a3bc0d370fa..480bf88def46 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -135,7 +135,7 @@ MODULE_FIRMWARE(FIRMWARE_VEGA12); MODULE_FIRMWARE(FIRMWARE_VEGA20); static void amdgpu_uvd_idle_work_handler(struct work_struct *work); -static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo); +static void amdgpu_uvd_force_into_vcpu_segment(struct amdgpu_bo *abo); static int amdgpu_uvd_create_msg_bo_helper(struct amdgpu_device *adev, uint32_t size, @@ -158,7 +158,7 @@ static int amdgpu_uvd_create_msg_bo_helper(struct amdgpu_device *adev, amdgpu_bo_kunmap(bo); amdgpu_bo_unpin(bo); amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM); - amdgpu_uvd_force_into_uvd_segment(bo); + amdgpu_uvd_force_into_vcpu_segment(bo); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) goto err; @@ -188,6 +188,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) const struct common_firmware_header *hdr; unsigned int family_id; int i, j, r; + u32 vcpu_bo_domain; INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler); @@ -319,12 +320,20 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); + /* UVD 5.0 and newer HW can use 64 bit addressing. */ + adev->uvd.address_64_bit = + !amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0); + + vcpu_bo_domain = AMDGPU_GEM_DOMAIN_VRAM; + if (adev->uvd.address_64_bit) + vcpu_bo_domain |= AMDGPU_GEM_DOMAIN_GTT; + for (j = 0; j < adev->uvd.num_uvd_inst; j++) { if (adev->uvd.harvest_config & (1 << j)) continue; + r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM | - AMDGPU_GEM_DOMAIN_GTT, + vcpu_bo_domain, &adev->uvd.inst[j].vcpu_bo, &adev->uvd.inst[j].gpu_addr, &adev->uvd.inst[j].cpu_addr); @@ -339,10 +348,6 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) adev->uvd.filp[i] = NULL; } - /* from uvd v5.0 HW addressing capacity increased to 64 bits */ - if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) - adev->uvd.address_64_bit = true; - r = amdgpu_uvd_create_msg_bo_helper(adev, 128 << 10, &adev->uvd.ib_bo); if (r) return r; @@ -545,6 +550,24 @@ void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp) } } +static void amdgpu_uvd_force_into_vcpu_segment(struct amdgpu_bo *bo) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct amdgpu_bo *vcpu_bo = adev->uvd.inst[0].vcpu_bo; + struct amdgpu_res_cursor vcpu_cur; + + amdgpu_res_first(vcpu_bo->tbo.resource, 0, + amdgpu_bo_size(vcpu_bo), &vcpu_cur); + + bo->placement.num_placement = 1; + bo->placement.placement = &bo->placements[0]; + bo->placements[0].fpfn = ALIGN_DOWN(vcpu_cur.start, SZ_256M) >> PAGE_SHIFT; + bo->placements[0].lpfn = bo->placements[0].fpfn + (SZ_256M >> PAGE_SHIFT); + bo->placements[0].mem_type = vcpu_bo->tbo.resource->mem_type; + if (bo->placements[0].mem_type == TTM_PL_VRAM) + bo->placements[0].flags |= TTM_PL_FLAG_CONTIGUOUS; +} + static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo) { int i; @@ -595,13 +618,10 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx) if (!ctx->parser->adev->uvd.address_64_bit) { /* check if it's a message or feedback command */ cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx) >> 1; - if (cmd == 0x0 || cmd == 0x3) { - /* yes, force it into VRAM */ - uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM; - - amdgpu_bo_placement_from_domain(bo, domain); - } - amdgpu_uvd_force_into_uvd_segment(bo); + if (cmd == 0x0 || cmd == 0x3) + amdgpu_uvd_force_into_vcpu_segment(bo); + else + amdgpu_uvd_force_into_uvd_segment(bo); r = ttm_bo_validate(&bo->tbo, &bo->placement, &tctx); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 7d51880b4860..fee4c94c2585 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2920,47 +2920,56 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) } /** - * amdgpu_vm_lock_by_pasid - return an amdgpu_vm and its root bo from a pasid, if possible. + * amdgpu_vm_lock_by_pasid - look up a VM by PASID and lock its root PD * @adev: amdgpu device pointer - * @root: root BO of the VM * @pasid: PASID of the VM - * The caller needs to unreserve and unref the root bo on success. + * @exec: drm_exec context to lock the root PD in + * + * Must be called from within a drm_exec_until_all_locked() loop; the caller + * runs drm_exec_retry_on_contention() afterwards. The drm_exec context holds + * a reference on the root BO until it is finalised. + * + * Return: the VM on success, or NULL if the PASID has no VM, the VM is being + * torn down, or locking the root PD failed. */ struct amdgpu_vm *amdgpu_vm_lock_by_pasid(struct amdgpu_device *adev, - struct amdgpu_bo **root, u32 pasid) + u32 pasid, struct drm_exec *exec) { unsigned long irqflags; + struct amdgpu_bo *root; struct amdgpu_vm *vm; int r; xa_lock_irqsave(&adev->vm_manager.pasids, irqflags); vm = xa_load(&adev->vm_manager.pasids, pasid); - *root = vm ? amdgpu_bo_ref(vm->root.bo) : NULL; + root = vm ? amdgpu_bo_ref(vm->root.bo) : NULL; xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags); - if (!*root) + if (!root) return NULL; - r = amdgpu_bo_reserve(*root, true); - if (r) - goto error_unref; + r = drm_exec_lock_obj(exec, &root->tbo.base); + if (r) { + amdgpu_bo_unref(&root); + return NULL; + } /* Double check that the VM still exists */ xa_lock_irqsave(&adev->vm_manager.pasids, irqflags); vm = xa_load(&adev->vm_manager.pasids, pasid); - if (vm && vm->root.bo != *root) + if (vm && vm->root.bo != root) vm = NULL; xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags); - if (!vm) - goto error_unlock; + if (!vm) { + drm_exec_unlock_obj(exec, &root->tbo.base); + amdgpu_bo_unref(&root); + return NULL; + } - return vm; -error_unlock: - amdgpu_bo_unreserve(*root); + /* The drm_exec context holds its own reference on the root BO. */ + amdgpu_bo_unref(&root); -error_unref: - amdgpu_bo_unref(root); - return NULL; + return vm; } /** @@ -2982,33 +2991,49 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, uint64_t ts, bool write_fault) { bool is_compute_context = false; - struct amdgpu_bo *root; + struct drm_exec exec; uint64_t value, flags; struct amdgpu_vm *vm; int r; - vm = amdgpu_vm_lock_by_pasid(adev, &root, pasid); - if (!vm) + drm_exec_init(&exec, 0, 1); + drm_exec_until_all_locked(&exec) { + vm = amdgpu_vm_lock_by_pasid(adev, pasid, &exec); + drm_exec_retry_on_contention(&exec); + if (!vm) + break; + } + if (!vm) { + drm_exec_fini(&exec); return false; + } is_compute_context = vm->is_compute_context; if (is_compute_context) { - /* Unreserve root since svm_range_restore_pages might try to reserve it. */ - /* TODO: rework svm_range_restore_pages so that this isn't necessary. */ - amdgpu_bo_unreserve(root); + /* Release the root PD lock since svm_range_restore_pages + * might try to take it. + * TODO: rework svm_range_restore_pages so that this isn't + * necessary. + */ + drm_exec_fini(&exec); if (!svm_range_restore_pages(adev, pasid, vmid, - node_id, addr >> PAGE_SHIFT, ts, write_fault)) { - amdgpu_bo_unref(&root); + node_id, addr >> PAGE_SHIFT, ts, write_fault)) return true; - } - amdgpu_bo_unref(&root); /* Re-acquire the VM lock, could be that the VM was freed in between. */ - vm = amdgpu_vm_lock_by_pasid(adev, &root, pasid); - if (!vm) + drm_exec_init(&exec, 0, 1); + drm_exec_until_all_locked(&exec) { + vm = amdgpu_vm_lock_by_pasid(adev, pasid, &exec); + drm_exec_retry_on_contention(&exec); + if (!vm) + break; + } + if (!vm) { + drm_exec_fini(&exec); return false; + } } addr /= AMDGPU_GPU_PAGE_SIZE; @@ -3032,7 +3057,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, value = 0; } - r = dma_resv_reserve_fences(root->tbo.base.resv, 1); + r = dma_resv_reserve_fences(vm->root.bo->tbo.base.resv, 1); if (r) { pr_debug("failed %d to reserve fence slot\n", r); goto error_unlock; @@ -3046,12 +3071,10 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, r = amdgpu_vm_update_pdes(adev, vm, true); error_unlock: - amdgpu_bo_unreserve(root); + drm_exec_fini(&exec); if (r < 0) dev_err(adev->dev, "Can't handle page fault (%d)\n", r); - amdgpu_bo_unref(&root); - return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 3695299f1a03..b32f51a78cd8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -592,7 +592,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, bool write_fault); struct amdgpu_vm *amdgpu_vm_lock_by_pasid(struct amdgpu_device *adev, - struct amdgpu_bo **root, u32 pasid); + u32 pasid, struct drm_exec *exec); void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 47721d0c3781..81a759a98725 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4071,6 +4071,41 @@ err_priv_inst: return r; } +static void gfx_v9_0_deactivate_kcq_hqd(struct amdgpu_device *adev) +{ + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); + for (int i = 0; i < adev->gfx.num_compute_rings; i++) { + u32 tmp; + struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; + + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE); + /* disable the queue if it's active */ + if (tmp & CP_HQD_ACTIVE__ACTIVE_MASK) { + int j; + + WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); + for (j = 0; j < adev->usec_timeout; j++) { + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE); + if (!(tmp & CP_HQD_ACTIVE__ACTIVE_MASK)) + break; + udelay(1); + } + if (j == AMDGPU_MAX_USEC_TIMEOUT) { + DRM_DEBUG("comp_%u_%u_%u dequeue request failed.\n", + ring->me, ring->pipe, ring->queue); + /* Manual disable if dequeue request times out */ + WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 0); + } + WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0); + } + soc15_grbm_select(adev, 0, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); +} + static int gfx_v9_0_hw_fini(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -4095,6 +4130,10 @@ static int gfx_v9_0_hw_fini(struct amdgpu_ip_block *ip_block) return 0; } + if ((adev->flags & AMD_IS_APU) && amdgpu_in_reset(adev) && + amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_MODE2) + gfx_v9_0_deactivate_kcq_hqd(adev); + /* Use deinitialize sequence from CAIL when unbinding device from driver, * otherwise KIQ is hanging when binding back */ diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c index 95b3f4e55ec3..699c274d357e 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c @@ -790,7 +790,7 @@ static void ih_v6_1_set_interrupt_funcs(struct amdgpu_device *adev) const struct amdgpu_ip_block_version ih_v6_1_ip_block = { .type = AMD_IP_BLOCK_TYPE_IH, .major = 6, - .minor = 0, + .minor = 1, .rev = 0, .funcs = &ih_v6_1_ip_funcs, }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index a2b100d14425..531e20748198 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1299,18 +1299,11 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, return -EINVAL; } - devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), - GFP_KERNEL); - if (!devices_arr) - return -ENOMEM; + devices_arr = memdup_array_user((void *)args->device_ids_array_ptr, + args->n_devices, sizeof(*devices_arr)); - err = copy_from_user(devices_arr, - (void __user *)args->device_ids_array_ptr, - args->n_devices * sizeof(*devices_arr)); - if (err != 0) { - err = -EFAULT; - goto copy_from_user_failed; - } + if (IS_ERR(devices_arr)) + return PTR_ERR(devices_arr); mutex_lock(&p->mutex); pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle)); @@ -1391,7 +1384,6 @@ get_mem_obj_from_handle_failed: map_memory_to_gpu_failed: sync_memory_failed: mutex_unlock(&p->mutex); -copy_from_user_failed: kfree(devices_arr); return err; @@ -1416,18 +1408,11 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, return -EINVAL; } - devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), - GFP_KERNEL); - if (!devices_arr) - return -ENOMEM; + devices_arr = memdup_array_user((void *)args->device_ids_array_ptr, + args->n_devices, sizeof(*devices_arr)); - err = copy_from_user(devices_arr, - (void __user *)args->device_ids_array_ptr, - args->n_devices * sizeof(*devices_arr)); - if (err != 0) { - err = -EFAULT; - goto copy_from_user_failed; - } + if (IS_ERR(devices_arr)) + return PTR_ERR(devices_arr); mutex_lock(&p->mutex); pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle)); @@ -1493,7 +1478,6 @@ get_mem_obj_from_handle_failed: unmap_memory_from_gpu_failed: sync_memory_failed: mutex_unlock(&p->mutex); -copy_from_user_failed: kfree(devices_arr); return err; } @@ -1562,16 +1546,10 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep, if (!dev) return -EINVAL; - if (args->metadata_ptr) { - metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL); - if (!metadata_buffer) - return -ENOMEM; - } - /* Get dmabuf info from KGD */ r = amdgpu_amdkfd_get_dmabuf_info(dev->adev, args->dmabuf_fd, &dmabuf_adev, &args->size, - metadata_buffer, args->metadata_size, + &metadata_buffer, args->metadata_size, &args->metadata_size, &flags, &xcp_id); if (r) goto exit; @@ -1583,7 +1561,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep, args->flags = flags; /* Copy metadata buffer to user mode */ - if (metadata_buffer) { + if (metadata_buffer && args->metadata_ptr) { r = copy_to_user((void __user *)args->metadata_ptr, metadata_buffer, args->metadata_size); if (r != 0) @@ -2359,17 +2337,11 @@ static int criu_restore_devices(struct kfd_process *p, if (*priv_offset + (args->num_devices * sizeof(*device_privs)) > max_priv_data_size) return -EINVAL; - device_buckets = kmalloc_objs(*device_buckets, args->num_devices); - if (!device_buckets) - return -ENOMEM; + device_buckets = memdup_array_user((void *)args->devices, + args->num_devices, sizeof(*device_buckets)); - ret = copy_from_user(device_buckets, (void __user *)args->devices, - args->num_devices * sizeof(*device_buckets)); - if (ret) { - pr_err("Failed to copy devices buckets from user\n"); - ret = -EFAULT; - goto exit; - } + if (IS_ERR(device_buckets)) + return PTR_ERR(device_buckets); for (i = 0; i < args->num_devices; i++) { struct kfd_node *dev; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index 05c74887fd6f..fdcf7f2d1b5b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -153,14 +153,16 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, u32 inx; mutex_lock(&kfd->doorbell_mutex); + inx = find_first_zero_bit(kfd->doorbell_bitmap, PAGE_SIZE / sizeof(u32)); + if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { + mutex_unlock(&kfd->doorbell_mutex); + return NULL; + } __set_bit(inx, kfd->doorbell_bitmap); mutex_unlock(&kfd->doorbell_mutex); - if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) - return NULL; - *doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kfd->doorbells, inx, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 28dc6886c1ff..226e76ae0be7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -424,7 +424,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, migrate.dst = migrate.src + npages; scratch = (dma_addr_t *)(migrate.dst + npages); - kfd_smi_event_migration_start(node, p->lead_thread->pid, + kfd_smi_event_migration_start(node, p->lead_thread, start >> PAGE_SHIFT, end >> PAGE_SHIFT, 0, node->id, prange->prefetch_loc, prange->preferred_loc, trigger); @@ -462,7 +462,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, out_free: kvfree(buf); - kfd_smi_event_migration_end(node, p->lead_thread->pid, + kfd_smi_event_migration_end(node, p->lead_thread, start >> PAGE_SHIFT, end >> PAGE_SHIFT, 0, node->id, trigger, r); out: @@ -727,7 +727,7 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, migrate.fault_page = fault_page; scratch = (dma_addr_t *)(migrate.dst + npages); - kfd_smi_event_migration_start(node, p->lead_thread->pid, + kfd_smi_event_migration_start(node, p->lead_thread, start >> PAGE_SHIFT, end >> PAGE_SHIFT, node->id, 0, prange->prefetch_loc, prange->preferred_loc, trigger); @@ -766,7 +766,7 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, out_free: kvfree(buf); - kfd_smi_event_migration_end(node, p->lead_thread->pid, + kfd_smi_event_migration_end(node, p->lead_thread, start >> PAGE_SHIFT, end >> PAGE_SHIFT, node->id, 0, trigger, r); out: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 368283d53077..ca71fa726e32 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1175,10 +1175,12 @@ static void kfd_process_remove_sysfs(struct kfd_process *p) if (!p->kobj) return; - sysfs_remove_file(p->kobj, &p->attr_pasid); - kobject_del(p->kobj_queues); - kobject_put(p->kobj_queues); - p->kobj_queues = NULL; + if (p->kobj_queues) { + sysfs_remove_file(p->kobj, &p->attr_pasid); + kobject_del(p->kobj_queues); + kobject_put(p->kobj_queues); + p->kobj_queues = NULL; + } for (i = 0; i < p->n_pdds; i++) { pdd = p->pdds[i]; @@ -1186,17 +1188,21 @@ static void kfd_process_remove_sysfs(struct kfd_process *p) sysfs_remove_file(p->kobj, &pdd->attr_vram); sysfs_remove_file(p->kobj, &pdd->attr_sdma); - sysfs_remove_file(pdd->kobj_stats, &pdd->attr_evict); - if (pdd->dev->kfd2kgd->get_cu_occupancy) - sysfs_remove_file(pdd->kobj_stats, - &pdd->attr_cu_occupancy); - kobject_del(pdd->kobj_stats); - kobject_put(pdd->kobj_stats); - pdd->kobj_stats = NULL; + if (pdd->kobj_stats) { + sysfs_remove_file(pdd->kobj_stats, &pdd->attr_evict); + if (pdd->dev->kfd2kgd->get_cu_occupancy) + sysfs_remove_file(pdd->kobj_stats, + &pdd->attr_cu_occupancy); + kobject_del(pdd->kobj_stats); + kobject_put(pdd->kobj_stats); + pdd->kobj_stats = NULL; + } } for_each_set_bit(i, p->svms.bitmap_supported, p->n_pdds) { pdd = p->pdds[i]; + if (!pdd->kobj_counters) + continue; sysfs_remove_file(pdd->kobj_counters, &pdd->attr_faults); sysfs_remove_file(pdd->kobj_counters, &pdd->attr_page_in); @@ -1254,6 +1260,13 @@ static void kfd_process_wq_release(struct work_struct *work) kfd_debugfs_remove_process(p); + /* + * Remove the proc/sysfs entries before destroying PDDs. The removal path + * walks the PDD array and sysfs callbacks dereference PDD fields, so the + * backing data must remain valid until sysfs removal has completed. + */ + kfd_process_remove_sysfs(p); + kfd_process_kunmap_signal_bo(p); kfd_process_free_outstanding_kfd_bos(p); svm_range_list_fini(p); @@ -1267,11 +1280,6 @@ static void kfd_process_wq_release(struct work_struct *work) put_task_struct(p->lead_thread); - /* the last step is removing process entries under /sys - * to indicate the process has been terminated. - */ - kfd_process_remove_sysfs(p); - kfree(p); } @@ -1969,7 +1977,7 @@ int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger) struct kfd_process_device *pdd = p->pdds[i]; struct device *dev = pdd->dev->adev->dev; - kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread->pid, + kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread, trigger); r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm, @@ -1999,7 +2007,7 @@ fail: if (n_evicted == 0) break; - kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid); + kfd_smi_event_queue_restore(pdd->dev, p->lead_thread); if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, &pdd->qpd)) @@ -2022,7 +2030,7 @@ int kfd_process_restore_queues(struct kfd_process *p) struct kfd_process_device *pdd = p->pdds[i]; struct device *dev = pdd->dev->adev->dev; - kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid); + kfd_smi_event_queue_restore(pdd->dev, p->lead_thread); r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, &pdd->qpd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 44e39ce222b7..0ac35789b239 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -962,8 +962,8 @@ static void set_queue_properties_from_criu(struct queue_properties *qp, qp->priority = q_data->priority; qp->queue_address = q_data->q_address; qp->queue_size = q_data->q_size; - qp->read_ptr = (uint32_t *) q_data->read_ptr_addr; - qp->write_ptr = (uint32_t *) q_data->write_ptr_addr; + qp->read_ptr = (void __user *)q_data->read_ptr_addr; + qp->write_ptr = (void __user *)q_data->write_ptr_addr; qp->eop_ring_buffer_address = q_data->eop_ring_buffer_address; qp->eop_ring_buffer_size = q_data->eop_ring_buffer_size; qp->ctx_save_restore_area_address = q_data->ctx_save_restore_area_address; @@ -1042,10 +1042,18 @@ int kfd_criu_restore_queue(struct kfd_process *p, memset(&qp, 0, sizeof(qp)); set_queue_properties_from_criu(&qp, q_data, NUM_XCC(pdd->dev->adev->gfx.xcc_mask)); + ret = kfd_queue_acquire_buffers(pdd, &qp); + if (ret) { + pr_debug("failed to acquire user queue buffers for CRIU\n"); + goto exit; + } + print_queue_properties(&qp); ret = pqm_create_queue(&p->pqm, pdd->dev, &qp, &queue_id, q_data, mqd, ctl_stack, NULL); if (ret) { + kfd_queue_unref_bo_vas(pdd, &qp); + kfd_queue_release_buffers(pdd, &qp); pr_err("Failed to create new queue err:%d\n", ret); goto exit; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c index dfbde5a571f6..e659cd50eb0b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c @@ -195,17 +195,35 @@ static void add_event_to_kfifo(pid_t pid, struct kfd_node *dev, rcu_read_unlock(); } +/** + * kfd_smi_task_to_pid - Convert task to namespace-aware PID + * @task: task_struct pointer (typically p->lead_thread) + * + * Returns the PID as it appears in the task's own PID namespace. + * For containerized processes, this returns the container-local PID + * (what getpid() returns), not the global host PID. + * + * Returns 0 if task is NULL. + */ +static inline pid_t kfd_smi_task_to_pid(struct task_struct *task) +{ + return task ? task_tgid_nr_ns(task, task_active_pid_ns(task)) : 0; +} + __printf(4, 5) -static void kfd_smi_event_add(pid_t pid, struct kfd_node *dev, +static void kfd_smi_event_add(struct task_struct *task, struct kfd_node *dev, unsigned int event, char *fmt, ...) { char fifo_in[KFD_SMI_EVENT_MSG_SIZE]; int len; va_list args; + pid_t pid; if (list_empty(&dev->smi_clients)) return; + pid = kfd_smi_task_to_pid(task); + len = snprintf(fifo_in, sizeof(fifo_in), "%x ", event); va_start(args, fmt); @@ -234,14 +252,15 @@ void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset, amdgpu_reset_get_desc(reset_context, reset_cause, sizeof(reset_cause)); - kfd_smi_event_add(0, dev, event, KFD_EVENT_FMT_UPDATE_GPU_RESET( + kfd_smi_event_add(NULL, dev, event, KFD_EVENT_FMT_UPDATE_GPU_RESET( dev->reset_seq_num, reset_cause)); } void kfd_smi_event_update_thermal_throttling(struct kfd_node *dev, uint64_t throttle_bitmask) { - kfd_smi_event_add(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, KFD_EVENT_FMT_THERMAL_THROTTLING( + kfd_smi_event_add(NULL, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, + KFD_EVENT_FMT_THERMAL_THROTTLING( throttle_bitmask, amdgpu_dpm_get_thermal_throttling_counter(dev->adev))); } @@ -254,67 +273,67 @@ void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid) if (task_info) { /* Report VM faults from user applications, not retry from kernel */ if (task_info->task.pid) - kfd_smi_event_add(task_info->tgid, dev, - KFD_SMI_EVENT_VMFAULT, - KFD_EVENT_FMT_VMFAULT(task_info->task.pid, - task_info->task.comm)); + kfd_smi_event_add(NULL, dev, KFD_SMI_EVENT_VMFAULT, KFD_EVENT_FMT_VMFAULT( + task_info->task.pid, task_info->task.comm)); amdgpu_vm_put_task_info(task_info); } } -void kfd_smi_event_page_fault_start(struct kfd_node *node, pid_t pid, +void kfd_smi_event_page_fault_start(struct kfd_node *node, struct task_struct *task, unsigned long address, bool write_fault, ktime_t ts) { - kfd_smi_event_add(pid, node, KFD_SMI_EVENT_PAGE_FAULT_START, - KFD_EVENT_FMT_PAGEFAULT_START(ktime_to_ns(ts), pid, - address, node->id, write_fault ? 'W' : 'R')); + kfd_smi_event_add(task, node, KFD_SMI_EVENT_PAGE_FAULT_START, + KFD_EVENT_FMT_PAGEFAULT_START(ktime_to_ns(ts), + kfd_smi_task_to_pid(task), address, node->id, + write_fault ? 'W' : 'R')); } -void kfd_smi_event_page_fault_end(struct kfd_node *node, pid_t pid, +void kfd_smi_event_page_fault_end(struct kfd_node *node, struct task_struct *task, unsigned long address, bool migration) { - kfd_smi_event_add(pid, node, KFD_SMI_EVENT_PAGE_FAULT_END, + kfd_smi_event_add(task, node, KFD_SMI_EVENT_PAGE_FAULT_END, KFD_EVENT_FMT_PAGEFAULT_END(ktime_get_boottime_ns(), - pid, address, node->id, migration ? 'M' : 'U')); + kfd_smi_task_to_pid(task), address, node->id, + migration ? 'M' : 'U')); } -void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid, +void kfd_smi_event_migration_start(struct kfd_node *node, struct task_struct *task, unsigned long start, unsigned long end, uint32_t from, uint32_t to, uint32_t prefetch_loc, uint32_t preferred_loc, uint32_t trigger) { - kfd_smi_event_add(pid, node, KFD_SMI_EVENT_MIGRATE_START, - KFD_EVENT_FMT_MIGRATE_START( - ktime_get_boottime_ns(), pid, start, end - start, - from, to, prefetch_loc, preferred_loc, trigger)); + kfd_smi_event_add(task, node, KFD_SMI_EVENT_MIGRATE_START, + KFD_EVENT_FMT_MIGRATE_START(ktime_get_boottime_ns(), + kfd_smi_task_to_pid(task), start, end - start, from, + to, prefetch_loc, preferred_loc, trigger)); } -void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid, +void kfd_smi_event_migration_end(struct kfd_node *node, struct task_struct *task, unsigned long start, unsigned long end, uint32_t from, uint32_t to, uint32_t trigger, int error_code) { - kfd_smi_event_add(pid, node, KFD_SMI_EVENT_MIGRATE_END, - KFD_EVENT_FMT_MIGRATE_END( - ktime_get_boottime_ns(), pid, start, end - start, - from, to, trigger, error_code)); + kfd_smi_event_add(task, node, KFD_SMI_EVENT_MIGRATE_END, + KFD_EVENT_FMT_MIGRATE_END(ktime_get_boottime_ns(), + kfd_smi_task_to_pid(task), start, end - start, from, + to, trigger, error_code)); } -void kfd_smi_event_queue_eviction(struct kfd_node *node, pid_t pid, +void kfd_smi_event_queue_eviction(struct kfd_node *node, struct task_struct *task, uint32_t trigger) { - kfd_smi_event_add(pid, node, KFD_SMI_EVENT_QUEUE_EVICTION, - KFD_EVENT_FMT_QUEUE_EVICTION(ktime_get_boottime_ns(), pid, - node->id, trigger)); + kfd_smi_event_add(task, node, KFD_SMI_EVENT_QUEUE_EVICTION, + KFD_EVENT_FMT_QUEUE_EVICTION(ktime_get_boottime_ns(), + kfd_smi_task_to_pid(task), node->id, trigger)); } -void kfd_smi_event_queue_restore(struct kfd_node *node, pid_t pid) +void kfd_smi_event_queue_restore(struct kfd_node *node, struct task_struct *task) { - kfd_smi_event_add(pid, node, KFD_SMI_EVENT_QUEUE_RESTORE, - KFD_EVENT_FMT_QUEUE_RESTORE(ktime_get_boottime_ns(), pid, - node->id, '0')); + kfd_smi_event_add(task, node, KFD_SMI_EVENT_QUEUE_RESTORE, + KFD_EVENT_FMT_QUEUE_RESTORE(ktime_get_boottime_ns(), + kfd_smi_task_to_pid(task), node->id, '0')); } void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm) @@ -329,21 +348,23 @@ void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm) for (i = 0; i < p->n_pdds; i++) { struct kfd_process_device *pdd = p->pdds[i]; - kfd_smi_event_add(p->lead_thread->pid, pdd->dev, + kfd_smi_event_add(p->lead_thread, pdd->dev, KFD_SMI_EVENT_QUEUE_RESTORE, KFD_EVENT_FMT_QUEUE_RESTORE(ktime_get_boottime_ns(), - p->lead_thread->pid, pdd->dev->id, 'R')); + kfd_smi_task_to_pid(p->lead_thread), + pdd->dev->id, 'R')); } kfd_unref_process(p); } -void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, pid_t pid, +void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, struct task_struct *task, unsigned long address, unsigned long last, uint32_t trigger) { - kfd_smi_event_add(pid, node, KFD_SMI_EVENT_UNMAP_FROM_GPU, + kfd_smi_event_add(task, node, KFD_SMI_EVENT_UNMAP_FROM_GPU, KFD_EVENT_FMT_UNMAP_FROM_GPU(ktime_get_boottime_ns(), - pid, address, last - address + 1, node->id, trigger)); + kfd_smi_task_to_pid(task), address, + last - address + 1, node->id, trigger)); } void kfd_smi_event_process(struct kfd_process_device *pdd, bool start) @@ -358,7 +379,7 @@ void kfd_smi_event_process(struct kfd_process_device *pdd, bool start) task_info = amdgpu_vm_get_task_info_vm(avm); if (task_info) { - kfd_smi_event_add(task_info->tgid, pdd->dev, + kfd_smi_event_add(NULL, pdd->dev, start ? KFD_SMI_EVENT_PROCESS_START : KFD_SMI_EVENT_PROCESS_END, KFD_EVENT_FMT_PROCESS(task_info->task.pid, @@ -387,7 +408,7 @@ int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd) spin_lock_init(&client->lock); client->events = 0; client->dev = dev; - client->pid = current->tgid; + client->pid = kfd_smi_task_to_pid(current); client->suser = capable(CAP_SYS_ADMIN); spin_lock(&dev->smi_lock); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h index bb4d72b57387..afa93d7cfa7f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h @@ -32,25 +32,25 @@ void kfd_smi_event_update_thermal_throttling(struct kfd_node *dev, uint64_t throttle_bitmask); void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset, struct amdgpu_reset_context *reset_context); -void kfd_smi_event_page_fault_start(struct kfd_node *node, pid_t pid, +void kfd_smi_event_page_fault_start(struct kfd_node *node, struct task_struct *task, unsigned long address, bool write_fault, ktime_t ts); -void kfd_smi_event_page_fault_end(struct kfd_node *node, pid_t pid, +void kfd_smi_event_page_fault_end(struct kfd_node *node, struct task_struct *task, unsigned long address, bool migration); -void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid, +void kfd_smi_event_migration_start(struct kfd_node *node, struct task_struct *task, unsigned long start, unsigned long end, uint32_t from, uint32_t to, uint32_t prefetch_loc, uint32_t preferred_loc, uint32_t trigger); -void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid, +void kfd_smi_event_migration_end(struct kfd_node *node, struct task_struct *task, unsigned long start, unsigned long end, uint32_t from, uint32_t to, uint32_t trigger, int error_code); -void kfd_smi_event_queue_eviction(struct kfd_node *node, pid_t pid, +void kfd_smi_event_queue_eviction(struct kfd_node *node, struct task_struct *task, uint32_t trigger); -void kfd_smi_event_queue_restore(struct kfd_node *node, pid_t pid); +void kfd_smi_event_queue_restore(struct kfd_node *node, struct task_struct *task); void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm); -void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, pid_t pid, +void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, struct task_struct *task, unsigned long address, unsigned long last, uint32_t trigger); void kfd_smi_event_process(struct kfd_process_device *pdd, bool start); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 3841943da5ec..0900bb23349e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1144,7 +1144,7 @@ static int svm_range_split_tail(struct svm_range *prange, uint64_t new_last, struct list_head *insert_list, struct list_head *remap_list) { - unsigned long last_align_down = ALIGN_DOWN(prange->last, 512); + unsigned long last_align_down = ALIGN_DOWN(prange->last + 1, 512); unsigned long start_align = ALIGN(prange->start, 512); bool huge_page_mapping = last_align_down > start_align; struct svm_range *tail = NULL; @@ -1168,7 +1168,7 @@ static int svm_range_split_head(struct svm_range *prange, uint64_t new_start, struct list_head *insert_list, struct list_head *remap_list) { - unsigned long last_align_down = ALIGN_DOWN(prange->last, 512); + unsigned long last_align_down = ALIGN_DOWN(prange->last + 1, 512); unsigned long start_align = ALIGN(prange->start, 512); bool huge_page_mapping = last_align_down > start_align; struct svm_range *head = NULL; @@ -1181,8 +1181,8 @@ svm_range_split_head(struct svm_range *prange, uint64_t new_start, list_add(&head->list, insert_list); - if (huge_page_mapping && head->last + 1 > start_align && - head->last + 1 < last_align_down && (!IS_ALIGNED(head->last, 512))) + if (huge_page_mapping && new_start > start_align && + new_start < last_align_down && !IS_ALIGNED(new_start, 512)) list_add(&head->update_list, remap_list); return 0; @@ -1408,7 +1408,7 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start, return -EINVAL; } - kfd_smi_event_unmap_from_gpu(pdd->dev, p->lead_thread->pid, + kfd_smi_event_unmap_from_gpu(pdd->dev, p->lead_thread, start, last, trigger); r = svm_range_unmap_from_gpu(pdd->dev->adev, @@ -3205,7 +3205,7 @@ retry_write_locked: svms, prange->start, prange->last, best_loc, prange->actual_loc); - kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr, + kfd_smi_event_page_fault_start(node, p->lead_thread, addr, write_fault, timestamp); /* Align migration range start and size to granularity size */ @@ -3248,7 +3248,7 @@ retry_write_locked: r, svms, start, last); out_migrate_fail: - kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr, + kfd_smi_event_page_fault_end(node, p->lead_thread, addr, migration); out_unlock_range: @@ -4115,6 +4115,7 @@ exit: list_for_each_entry_safe(criu_svm_md, next, &svms->criu_svm_metadata_list, list) { pr_debug("freeing criu_svm_md[]\n\tstart: 0x%llx\n", criu_svm_md->data.start_addr); + list_del(&criu_svm_md->list); kfree(criu_svm_md); } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 97ab1e83b318..d3a8d681227a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -12942,13 +12942,11 @@ static bool amdgpu_dm_crtc_mem_type_changed(struct drm_device *dev, struct drm_plane_state *new_plane_state, *old_plane_state; drm_for_each_plane_mask(plane, dev, crtc_state->plane_mask) { - new_plane_state = drm_atomic_get_plane_state(state, plane); - old_plane_state = drm_atomic_get_plane_state(state, plane); + new_plane_state = drm_atomic_get_new_plane_state(state, plane); + old_plane_state = drm_atomic_get_old_plane_state(state, plane); - if (IS_ERR(new_plane_state) || IS_ERR(old_plane_state)) { - drm_err(dev, "Failed to get plane state for plane %s\n", plane->name); - return false; - } + if (!old_plane_state || !new_plane_state) + continue; if (old_plane_state->fb && new_plane_state->fb && get_mem_type(old_plane_state->fb) != get_mem_type(new_plane_state->fb)) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index f257ea91a34d..c6f94eb71ffa 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -95,8 +95,11 @@ static u32 edid_extract_panel_id(struct edid *edid) (u32)EDID_PRODUCT_ID(edid); } -static void apply_edid_quirks(struct drm_device *dev, struct edid *edid, struct dc_edid_caps *edid_caps) +static void apply_edid_quirks(struct dc_link *link, struct edid *edid, + struct dc_edid_caps *edid_caps) { + struct amdgpu_dm_connector *aconnector = link->priv; + struct drm_device *dev = aconnector->base.dev; uint32_t panel_id = edid_extract_panel_id(edid); switch (panel_id) { @@ -126,6 +129,11 @@ static void apply_edid_quirks(struct drm_device *dev, struct edid *edid, struct drm_dbg_driver(dev, "Disabling VSC on monitor with panel id %X\n", panel_id); edid_caps->panel_patch.disable_colorimetry = true; break; + /* Workaround for monitors that get corrupted by the PHY SSC reduction */ + case drm_edid_encode_panel_id('D', 'E', 'L', 0x4147): + drm_dbg_driver(dev, "Skip PHY SSC reduction on panel id %X\n", panel_id); + link->wa_flags.skip_phy_ssc_reduction = true; + break; default: return; } @@ -147,7 +155,6 @@ enum dc_edid_status dm_helpers_parse_edid_caps( { struct amdgpu_dm_connector *aconnector = link->priv; struct drm_connector *connector = &aconnector->base; - struct drm_device *dev = connector->dev; struct edid *edid_buf = edid ? (struct edid *) edid->raw_edid : NULL; struct cea_sad *sads; int sad_count = -1; @@ -188,7 +195,7 @@ enum dc_edid_status dm_helpers_parse_edid_caps( edid_caps->frl_dsc_max_frl_rate, edid_caps->frl_dsc_total_chunk_kbytes); } - apply_edid_quirks(dev, edid_buf, edid_caps); + apply_edid_quirks(link, edid_buf, edid_caps); sad_count = drm_edid_to_sad((struct edid *) edid->raw_edid, &sads); if (sad_count <= 0) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index e957657b06c7..c7f8e08feaf4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1859,6 +1859,7 @@ static const struct drm_plane_funcs dm_plane_funcs = { .atomic_duplicate_state = amdgpu_dm_plane_drm_plane_duplicate_state, .atomic_destroy_state = amdgpu_dm_plane_drm_plane_destroy_state, .format_mod_supported = amdgpu_dm_plane_format_mod_supported, + .format_mod_supported_async = amdgpu_dm_plane_format_mod_supported, #ifdef AMD_PRIVATE_COLOR .atomic_set_property = dm_atomic_plane_set_property, .atomic_get_property = dm_atomic_plane_get_property, diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c index 00c4be7c3aa4..ff47af3854b6 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c @@ -158,7 +158,6 @@ void dcn31_update_clocks(struct clk_mgr *clk_mgr_base, if (new_clocks->zstate_support != DCN_ZSTATE_SUPPORT_DISALLOW && new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) { dcn31_smu_set_zstate_support(clk_mgr, new_clocks->zstate_support); - dm_helpers_enable_periodic_detection(clk_mgr_base->ctx, true); clk_mgr_base->clks.zstate_support = new_clocks->zstate_support; } @@ -184,7 +183,6 @@ void dcn31_update_clocks(struct clk_mgr *clk_mgr_base, if (new_clocks->zstate_support == DCN_ZSTATE_SUPPORT_DISALLOW && new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) { dcn31_smu_set_zstate_support(clk_mgr, DCN_ZSTATE_SUPPORT_DISALLOW); - dm_helpers_enable_periodic_detection(clk_mgr_base->ctx, false); clk_mgr_base->clks.zstate_support = new_clocks->zstate_support; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c index dd6f11ecb9c9..24f6304011ae 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c @@ -230,7 +230,6 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, if (new_clocks->zstate_support != DCN_ZSTATE_SUPPORT_DISALLOW && new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) { dcn314_smu_set_zstate_support(clk_mgr, new_clocks->zstate_support); - dm_helpers_enable_periodic_detection(clk_mgr_base->ctx, true); clk_mgr_base->clks.zstate_support = new_clocks->zstate_support; } @@ -255,7 +254,6 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, if (new_clocks->zstate_support == DCN_ZSTATE_SUPPORT_DISALLOW && new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) { dcn314_smu_set_zstate_support(clk_mgr, DCN_ZSTATE_SUPPORT_DISALLOW); - dm_helpers_enable_periodic_detection(clk_mgr_base->ctx, false); clk_mgr_base->clks.zstate_support = new_clocks->zstate_support; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 103013e2a0de..a69824e1eb26 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -419,6 +419,7 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, if (new_clocks->zstate_support != DCN_ZSTATE_SUPPORT_DISALLOW && new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) { dcn35_smu_set_zstate_support(clk_mgr, new_clocks->zstate_support); + dm_helpers_enable_periodic_detection(clk_mgr_base->ctx, true); clk_mgr_base->clks.zstate_support = new_clocks->zstate_support; } @@ -438,6 +439,7 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, if (new_clocks->zstate_support == DCN_ZSTATE_SUPPORT_DISALLOW && new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) { dcn35_smu_set_zstate_support(clk_mgr, DCN_ZSTATE_SUPPORT_DISALLOW); + dm_helpers_enable_periodic_detection(clk_mgr_base->ctx, false); clk_mgr_base->clks.zstate_support = new_clocks->zstate_support; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0_0_ppt.c index fb1145691410..a214ddbd4c86 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0_0_ppt.c @@ -227,9 +227,14 @@ static int smu_v15_0_0_system_features_control(struct smu_context *smu, bool en) struct amdgpu_device *adev = smu->adev; int ret = 0; - if (!en && !adev->in_s0ix) + if (!en && !adev->in_s0ix) { ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL); + /* SMU resets BIF_FB_EN to zero, re-enable MC access on APUs with SMU V15 */ + if (!ret && adev->nbio.funcs && adev->nbio.funcs->mc_access_enable) + adev->nbio.funcs->mc_access_enable(adev, true); + } + return ret; } diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index 0e4f0678c53c..9d0d47c79dd1 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -288,6 +288,12 @@ static void intel_crtc_put_color_blobs(struct intel_crtc_state *crtc_state) drm_property_blob_put(crtc_state->pre_csc_lut); drm_property_blob_put(crtc_state->post_csc_lut); + + crtc_state->hw.degamma_lut = NULL; + crtc_state->hw.gamma_lut = NULL; + crtc_state->hw.ctm = NULL; + crtc_state->pre_csc_lut = NULL; + crtc_state->post_csc_lut = NULL; } void intel_crtc_free_hw_state(struct intel_crtc_state *crtc_state) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 189ae2d3cfc9..7bc9b956554b 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1256,9 +1256,22 @@ static void skl_sanitize_cdclk(struct intel_display *display) cdctl = intel_de_read(display, CDCLK_CTL); expected = (cdctl & CDCLK_FREQ_SEL_MASK) | skl_cdclk_decimal(display->cdclk.hw.cdclk); - if (cdctl == expected) - /* All well; nothing to sanitize */ - return; + + if (cdctl != expected) { + cdctl &= ~CDCLK_FREQ_DECIMAL_MASK; + cdctl |= expected & CDCLK_FREQ_DECIMAL_MASK; + + if (cdctl != expected) + goto sanitize; + + drm_dbg_kms(display->drm, "Sanitizing CDCLK decimal divider (CDCLK_CTL 0x%x, expected 0x%x)\n", + intel_de_read(display, CDCLK_CTL), expected); + + intel_de_write(display, CDCLK_CTL, expected); + } + + /* All well; nothing to sanitize */ + return; sanitize: drm_dbg_kms(display->drm, "Sanitizing cdclk programmed by pre-os\n"); @@ -2354,11 +2367,25 @@ static void bxt_sanitize_cdclk(struct intel_display *display) * (PIPE_NONE). */ cdctl &= ~bxt_cdclk_cd2x_pipe(display, INVALID_PIPE); - expected &= ~bxt_cdclk_cd2x_pipe(display, INVALID_PIPE); + cdctl |= bxt_cdclk_cd2x_pipe(display, INVALID_PIPE); - if (cdctl == expected) - /* All well; nothing to sanitize */ - return; + if (cdctl != expected) { + if (DISPLAY_VER(display) < 20) { + cdctl &= ~CDCLK_FREQ_DECIMAL_MASK; + cdctl |= expected & CDCLK_FREQ_DECIMAL_MASK; + } + + if (cdctl != expected) + goto sanitize; + + drm_dbg_kms(display->drm, "Sanitizing CDCLK decimal divider (CDCLK_CTL 0x%x, expected 0x%x)\n", + intel_de_read(display, CDCLK_CTL), expected); + + intel_de_write(display, CDCLK_CTL, expected); + } + + /* All well; nothing to sanitize */ + return; sanitize: drm_dbg_kms(display->drm, "Sanitizing cdclk programmed by pre-os\n"); diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 205978c9feb6..6296635c4e79 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -2652,9 +2652,6 @@ static void mtl_ddi_pre_enable_dp(struct intel_atomic_state *state, /* 3. Select Thunderbolt */ mtl_port_buf_ctl_io_selection(encoder); - /* 4. Enable Panel Power if PPS is required */ - intel_pps_on(intel_dp); - /* 5. Enable the port PLL */ intel_ddi_enable_clock(encoder, crtc_state); @@ -3708,6 +3705,14 @@ intel_ddi_pre_pll_enable(struct intel_atomic_state *state, else if (display->platform.geminilake || display->platform.broxton) bxt_dpio_phy_set_lane_optim_mask(encoder, crtc_state->lane_lat_optim_mask); + + /* + * There is no direct connection between the PLL and PPS, however + * enabling PPS before PLL is required to avoid PLL/DDI BUF timeouts + * during system resume. Do that matching the Bspec order as well. + */ + if (DISPLAY_VER(display) >= 14) + intel_pps_on(&dig_port->dp); } static void adlp_tbt_to_dp_alt_switch_wa(struct intel_encoder *encoder) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index bcdc50491347..0aa3e6b4c781 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -726,6 +726,10 @@ static int mst_stream_compute_config(struct intel_encoder *encoder, if (ret) return ret; + ret = intel_pfit_compute_config(pipe_config, conn_state); + if (ret) + return ret; + for_each_joiner_candidate(connector, adjusted_mode, num_joined_pipes) { if (num_joined_pipes > 1) pipe_config->joiner_pipes = GENMASK(crtc->pipe + num_joined_pipes - 1, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 6ac0f23570f3..aeafe1742d30 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -613,6 +613,7 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, return -EINVAL; } + slot = array_index_nospec(slot, set->num_engines); if (set->engines[slot].type != I915_GEM_ENGINE_TYPE_INVALID) { drm_dbg(&i915->drm, "Invalid placement[%d], already occupied\n", slot); diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 3ac1a79b6f13..533215d6e9cb 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -906,6 +906,7 @@ struct radeon_fence *r100_copy_blit(struct radeon_device *rdev, { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; struct radeon_fence *fence; + uint64_t cur_src_offset, cur_dst_offset; uint32_t cur_pages; uint32_t stride_bytes = RADEON_GPU_PAGE_SIZE; uint32_t pitch; @@ -934,6 +935,10 @@ struct radeon_fence *r100_copy_blit(struct radeon_device *rdev, cur_pages = 8191; } num_gpu_pages -= cur_pages; + cur_src_offset = src_offset + + (uint64_t)num_gpu_pages * RADEON_GPU_PAGE_SIZE; + cur_dst_offset = dst_offset + + (uint64_t)num_gpu_pages * RADEON_GPU_PAGE_SIZE; /* pages are in Y direction - height page width in X direction - width */ @@ -950,13 +955,13 @@ struct radeon_fence *r100_copy_blit(struct radeon_device *rdev, RADEON_DP_SRC_SOURCE_MEMORY | RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS); - radeon_ring_write(ring, (pitch << 22) | (src_offset >> 10)); - radeon_ring_write(ring, (pitch << 22) | (dst_offset >> 10)); + radeon_ring_write(ring, (pitch << 22) | (cur_src_offset >> 10)); + radeon_ring_write(ring, (pitch << 22) | (cur_dst_offset >> 10)); radeon_ring_write(ring, (0x1fff) | (0x1fff << 16)); radeon_ring_write(ring, 0); radeon_ring_write(ring, (0x1fff) | (0x1fff << 16)); - radeon_ring_write(ring, num_gpu_pages); - radeon_ring_write(ring, num_gpu_pages); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); radeon_ring_write(ring, cur_pages | (stride_pixels << 16)); } radeon_ring_write(ring, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0)); diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 09661f079d03..8e7b146880f4 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -16,14 +16,14 @@ subdir-ccflags-y += -I$(obj) -I$(src) hostprogs := xe_gen_wa_oob generated_oob := $(obj)/generated/xe_wa_oob.c $(obj)/generated/xe_wa_oob.h quiet_cmd_wa_oob = GEN $(notdir $(generated_oob)) - cmd_wa_oob = mkdir -p $(@D); $^ $(generated_oob) + cmd_wa_oob = mkdir -p $(@D); $(obj)/xe_gen_wa_oob $(src)/xe_wa_oob.rules $(generated_oob) $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \ $(src)/xe_wa_oob.rules $(call cmd,wa_oob) generated_device_oob := $(obj)/generated/xe_device_wa_oob.c $(obj)/generated/xe_device_wa_oob.h quiet_cmd_device_wa_oob = GEN $(notdir $(generated_device_oob)) - cmd_device_wa_oob = mkdir -p $(@D); $^ $(generated_device_oob) + cmd_device_wa_oob = mkdir -p $(@D); $(obj)/xe_gen_wa_oob $(src)/xe_device_wa_oob.rules $(generated_device_oob) $(obj)/generated/%_device_wa_oob.c $(obj)/generated/%_device_wa_oob.h: $(obj)/xe_gen_wa_oob \ $(src)/xe_device_wa_oob.rules $(call cmd,device_wa_oob) diff --git a/drivers/gpu/drm/xe/regs/xe_gtt_defs.h b/drivers/gpu/drm/xe/regs/xe_gtt_defs.h index 4d83461e538b..d6bc19ef277b 100644 --- a/drivers/gpu/drm/xe/regs/xe_gtt_defs.h +++ b/drivers/gpu/drm/xe/regs/xe_gtt_defs.h @@ -9,7 +9,11 @@ #define XELPG_GGTT_PTE_PAT0 BIT_ULL(52) #define XELPG_GGTT_PTE_PAT1 BIT_ULL(53) -#define XE_PTE_ADDR_MASK GENMASK_ULL(51, 12) +/* + * Mask for PTE address bits [51:shift]. + * shift is the lower address boundary of page. + */ +#define XE_PAGE_ADDR_MASK(shift) GENMASK_ULL(51, (shift)) #define GGTT_PTE_VFID GENMASK_ULL(11, 2) #define GUC_GGTT_TOP 0xFEE00000 diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index d224861b6f6f..abe25aedeead 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -526,7 +526,8 @@ int xe_device_init_early(struct xe_device *xe) err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev, xe->drm.anon_inode->i_mapping, - xe->drm.vma_offset_manager, 0); + xe->drm.vma_offset_manager, + TTM_ALLOCATION_POOL_BENEFICIAL_ORDER(get_order(SZ_2M))); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index 21f7caf9ea08..1a019137ddf4 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -461,8 +461,14 @@ static void guc_capture_alloc_steered_lists(struct xe_guc *guc) if (!list || guc->capture->extlists) return; - total = bitmap_weight(gt->fuse_topo.g_dss_mask, sizeof(gt->fuse_topo.g_dss_mask) * 8) * - guc_capture_get_steer_reg_num(guc_to_xe(guc)); + { + xe_dss_mask_t all_dss; + + total = bitmap_weighted_or(all_dss, gt->fuse_topo.g_dss_mask, + gt->fuse_topo.c_dss_mask, + XE_MAX_DSS_FUSE_BITS) * + guc_capture_get_steer_reg_num(guc_to_xe(guc)); + } if (!total) return; diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 2669ff5ee747..18a98667c0e6 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1602,23 +1602,21 @@ static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level, return false; } -/* page_size = 2^(reclamation_size + XE_PTE_SHIFT) */ -#define COMPUTE_RECLAIM_ADDRESS_MASK(page_size) \ -({ \ - BUILD_BUG_ON(!__builtin_constant_p(page_size)); \ - ilog2(page_size) - XE_PTE_SHIFT; \ -}) - static int generate_reclaim_entry(struct xe_tile *tile, struct xe_page_reclaim_list *prl, u64 pte, struct xe_pt *xe_child) { struct xe_gt *gt = tile->primary_gt; struct xe_guc_page_reclaim_entry *reclaim_entries = prl->entries; - u64 phys_addr = pte & XE_PTE_ADDR_MASK; + bool is_2m = xe_child->level == 1 && (pte & XE_PDE_PS_2M); + bool is_64k = xe_child->level == 0 && ((pte & XE_PTE_PS64) || xe_child->is_compact); + u32 page_shift = is_2m ? ilog2(SZ_2M) : is_64k ? ilog2(SZ_64K) : ilog2(SZ_4K); + /* Physical address bits start at page shift: 2M->[51:21], 64K->[51:16], 4K->[51:12] */ + u64 phys_addr = pte & XE_PAGE_ADDR_MASK(page_shift); + /* Page address is relative to 4K page regardless of entry level */ u64 phys_page = phys_addr >> XE_PTE_SHIFT; int num_entries = prl->num_entries; - u32 reclamation_size; + u32 reclamation_size = page_shift - XE_PTE_SHIFT; xe_tile_assert(tile, xe_child->level <= MAX_HUGEPTE_LEVEL); xe_tile_assert(tile, reclaim_entries); @@ -1633,18 +1631,12 @@ static int generate_reclaim_entry(struct xe_tile *tile, * Page size is computed as 2^(reclamation_size + XE_PTE_SHIFT) bytes. * Only 4K, 64K (level 0), and 2M pages are supported by hardware for page reclaim */ - if (xe_child->level == 0 && !(pte & XE_PTE_PS64)) { - xe_gt_stats_incr(gt, XE_GT_STATS_ID_PRL_4K_ENTRY_COUNT, 1); - reclamation_size = COMPUTE_RECLAIM_ADDRESS_MASK(SZ_4K); /* reclamation_size = 0 */ - xe_tile_assert(tile, phys_addr % SZ_4K == 0); - } else if (xe_child->level == 0) { - xe_gt_stats_incr(gt, XE_GT_STATS_ID_PRL_64K_ENTRY_COUNT, 1); - reclamation_size = COMPUTE_RECLAIM_ADDRESS_MASK(SZ_64K); /* reclamation_size = 4 */ - xe_tile_assert(tile, phys_addr % SZ_64K == 0); - } else if (xe_child->level == 1 && pte & XE_PDE_PS_2M) { + if (is_2m) { xe_gt_stats_incr(gt, XE_GT_STATS_ID_PRL_2M_ENTRY_COUNT, 1); - reclamation_size = COMPUTE_RECLAIM_ADDRESS_MASK(SZ_2M); /* reclamation_size = 9 */ - xe_tile_assert(tile, phys_addr % SZ_2M == 0); + } else if (is_64k) { + xe_gt_stats_incr(gt, XE_GT_STATS_ID_PRL_64K_ENTRY_COUNT, 1); + } else if (xe_child->level == 0) { + xe_gt_stats_incr(gt, XE_GT_STATS_ID_PRL_4K_ENTRY_COUNT, 1); } else { xe_page_reclaim_list_abort(tile->primary_gt, prl, "unsupported PTE level=%u pte=%#llx", @@ -1665,6 +1657,48 @@ static int generate_reclaim_entry(struct xe_tile *tile, return 0; } +static int add_pte_to_prl(struct xe_tile *tile, struct xe_page_reclaim_list *prl, + struct xe_pt *xe_child, u64 pte, u64 addr) +{ + /* + * In rare scenarios, pte may not be written yet due to racy conditions. + * In such cases, invalidate the PRL and fallback to full PPC invalidation. + */ + if (!pte) { + xe_page_reclaim_list_abort(tile->primary_gt, prl, + "found zero pte at addr=%#llx", addr); + return -EINVAL; + } + + /* Ensure it is a defined page */ + xe_tile_assert(tile, xe_child->level == 0 || + (pte & (XE_PDE_PS_2M | XE_PDPE_PS_1G))); + + /* Account for NULL terminated entry on end (-1) */ + if (prl->num_entries >= XE_PAGE_RECLAIM_MAX_ENTRIES - 1) { + xe_page_reclaim_list_abort(tile->primary_gt, prl, + "overflow while adding pte=%#llx", pte); + return -ENOSPC; + } + + return generate_reclaim_entry(tile, prl, pte, xe_child); +} + +static bool add_compact_pt_prl(struct xe_tile *tile, struct xe_page_reclaim_list *prl, + struct xe_device *xe, struct xe_pt *compact_pt, u64 addr) +{ + struct iosys_map *map = &compact_pt->bo->vmap; + + for (pgoff_t i = 0; i < SZ_2M / SZ_64K && xe_page_reclaim_list_valid(prl); i++) { + u64 pte = xe_map_rd(xe, map, i * sizeof(u64), u64); + + if (add_pte_to_prl(tile, prl, compact_pt, pte, addr + i * SZ_64K)) + break; + } + + return xe_page_reclaim_list_valid(prl); +} + static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, unsigned int level, u64 addr, u64 next, struct xe_ptw **child, @@ -1674,21 +1708,22 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); struct xe_pt_stage_unbind_walk *xe_walk = container_of(walk, typeof(*xe_walk), base); - struct xe_device *xe = tile_to_xe(xe_walk->tile); + struct xe_page_reclaim_list *prl = xe_walk->prl; + struct xe_tile *tile = xe_walk->tile; + struct xe_device *xe = tile_to_xe(tile); pgoff_t first = xe_pt_offset(addr, xe_child->level, walk); bool killed; XE_WARN_ON(!*child); XE_WARN_ON(!level); /* Check for leaf node */ - if (xe_walk->prl && xe_page_reclaim_list_valid(xe_walk->prl) && + if (prl && xe_page_reclaim_list_valid(prl) && xe_child->level <= MAX_HUGEPTE_LEVEL) { struct iosys_map *leaf_map = &xe_child->bo->vmap; pgoff_t count = xe_pt_num_entries(addr, next, xe_child->level, walk); for (pgoff_t i = 0; i < count; i++) { u64 pte; - int ret; /* * If not a leaf pt, skip unless non-leaf pt is interleaved between @@ -1698,10 +1733,23 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, u64 pt_size = 1ULL << walk->shifts[xe_child->level]; bool edge_pt = (i == 0 && !IS_ALIGNED(addr, pt_size)) || (i == count - 1 && !IS_ALIGNED(next, pt_size)); - - if (!edge_pt) { - xe_page_reclaim_list_abort(xe_walk->tile->primary_gt, - xe_walk->prl, + struct xe_pt *child_pt = + container_of(xe_child->base.children[first + i], + struct xe_pt, base); + + /* Compact PTs always fill a full 2M-aligned slot, never an edge. */ + XE_WARN_ON(child_pt->is_compact && edge_pt); + if (edge_pt) + continue; + + /* Walker never descends into compact PTs, descend now */ + if (child_pt->is_compact) { + if (!add_compact_pt_prl(tile, prl, xe, child_pt, + addr + (u64)i * pt_size)) + break; + } else { + xe_page_reclaim_list_abort(tile->primary_gt, + prl, "PT is skipped by walk at level=%u offset=%lu", xe_child->level, first + i); break; @@ -1711,37 +1759,12 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, pte = xe_map_rd(xe, leaf_map, (first + i) * sizeof(u64), u64); - /* - * In rare scenarios, pte may not be written yet due to racy conditions. - * In such cases, invalidate the PRL and fallback to full PPC invalidation. - */ - if (!pte) { - xe_page_reclaim_list_abort(xe_walk->tile->primary_gt, xe_walk->prl, - "found zero pte at addr=%#llx", addr); + if (add_pte_to_prl(tile, prl, xe_child, pte, addr)) break; - } - - /* Ensure it is a defined page */ - xe_tile_assert(xe_walk->tile, xe_child->level == 0 || - (pte & (XE_PDE_PS_2M | XE_PDPE_PS_1G))); /* An entry should be added for 64KB but contigious 4K have XE_PTE_PS64 */ if (pte & XE_PTE_PS64) i += 15; /* Skip other 15 consecutive 4K pages in the 64K page */ - - /* Account for NULL terminated entry on end (-1) */ - if (xe_walk->prl->num_entries < XE_PAGE_RECLAIM_MAX_ENTRIES - 1) { - ret = generate_reclaim_entry(xe_walk->tile, xe_walk->prl, - pte, xe_child); - if (ret) - break; - } else { - /* overflow, mark as invalid */ - xe_page_reclaim_list_abort(xe_walk->tile->primary_gt, xe_walk->prl, - "overflow while adding pte=%#llx", - pte); - break; - } } } @@ -1751,7 +1774,7 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, * Verify if any PTE are potentially dropped at non-leaf levels, either from being * killed or the page walk covers the region. */ - if (xe_walk->prl && xe_page_reclaim_list_valid(xe_walk->prl) && + if (prl && xe_page_reclaim_list_valid(prl) && xe_child->level > MAX_HUGEPTE_LEVEL && xe_child->num_live) { bool covered = xe_pt_covers(addr, next, xe_child->level, &xe_walk->base); @@ -1760,7 +1783,7 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, * we need to invalidate the PRL. */ if (killed || covered) - xe_page_reclaim_list_abort(xe_walk->tile->primary_gt, xe_walk->prl, + xe_page_reclaim_list_abort(tile->primary_gt, prl, "kill at level=%u addr=%#llx next=%#llx num_live=%u", level, addr, next, xe_child->num_live); } |
