summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2024-06-11 07:01:55 +0300
committerDave Airlie <airlied@redhat.com>2024-06-11 07:01:55 +0300
commit1ddaaa244021aba8496536a6627b4ad2bc0f936a (patch)
tree2b37ec6170094757daaa0c7445670eebf3b996d9 /drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
parent7957066ca614b63aa6687e825ccbc215fa4584ea (diff)
parentb95fa494d6b74c30eeb4a50481aa1041c631754e (diff)
downloadlinux-1ddaaa244021aba8496536a6627b4ad2bc0f936a.tar.xz
Merge tag 'amd-drm-next-6.11-2024-06-07' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.11-2024-06-07: amdgpu: - DCN 4.0.x support - DCN 3.5 updates - GC 12.0 support - DP MST fixes - Cursor fixes - MES11 updates - MMHUB 4.1 support - DML2 Updates - DCN 3.1.5 fixes - IPS fixes - Various code cleanups - GMC 12.0 support - SDMA 7.0 support - SMU 13 updates - SR-IOV fixes - VCN 5.x fixes - MES12 support - SMU 14.x updates - Devcoredump improvements - Fixes for HDP flush on platforms with >4k pages - GC 9.4.3 fixes - RAS ACA updates - Silence UBSAN flex array warnings - MMHUB 3.3 updates amdkfd: - Contiguous VRAM allocations - GC 12.0 support - SDMA 7.0 support - SR-IOV fixes radeon: - Backlight workaround for iMac - Silence UBSAN flex array warnings UAPI: - GFX12 modifier and DCC support Proposed Mesa changes: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29510 - KFD GFX ALU exceptions Proposed ROCdebugger changes: https://github.com/ROCm/ROCdbgapi/commit/08c760622b6601abf906f75abbc5e21d9fd425df https://github.com/ROCm/ROCgdb/commit/944fe1c1414a68700414e86e32273b6bfa62ba6f - KFD Contiguous VRAM allocation flag Proposed ROCr/HIP changes: https://github.com/ROCm/ROCT-Thunk-Interface/commit/f7b4a269914a3ab4f1e2453c2879adb97b5cc9e5 https://github.com/ROCm/ROCR-Runtime/pull/214/commits/26e8530d05a775872cb06dde6693db72be0c454a https://github.com/ROCm/clr/commit/1d48f2a1ab38b632919c4b7274899b3faf4279ff Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240607195900.902537-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c46
1 files changed, 38 insertions, 8 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 48ad0c04aa72..11672bfe4fad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -172,6 +172,8 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
{
uint64_t reserved_for_pt =
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ uint64_t reserved_for_ras = (con ? con->reserved_pages_in_bytes : 0);
size_t system_mem_needed, ttm_mem_needed, vram_needed;
int ret = 0;
uint64_t vram_size = 0;
@@ -220,7 +222,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
kfd_mem_limit.max_ttm_mem_limit) ||
(adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed >
- vram_size - reserved_for_pt - atomic64_read(&adev->vram_pin_size))) {
+ vram_size - reserved_for_pt - reserved_for_ras - atomic64_read(&adev->vram_pin_size))) {
ret = -ENOMEM;
goto release;
}
@@ -415,6 +417,10 @@ static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
"Called with userptr BO"))
return -EINVAL;
+ /* bo has been pinned, not need validate it */
+ if (bo->tbo.pin_count)
+ return 0;
+
amdgpu_bo_placement_from_domain(bo, domain);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
@@ -1088,7 +1094,10 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, &range);
if (ret) {
- pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
+ if (ret == -EAGAIN)
+ pr_debug("Failed to get user pages, try again\n");
+ else
+ pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
goto unregister_out;
}
@@ -1472,13 +1481,30 @@ static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain)
if (unlikely(ret))
return ret;
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) {
+ /*
+ * If bo is not contiguous on VRAM, move to system memory first to ensure
+ * we can get contiguous VRAM space after evicting other BOs.
+ */
+ if (!(bo->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) {
+ struct ttm_operation_ctx ctx = { true, false };
+
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (unlikely(ret)) {
+ pr_debug("validate bo 0x%p to GTT failed %d\n", &bo->tbo, ret);
+ goto out;
+ }
+ }
+ }
+
ret = amdgpu_bo_pin_restricted(bo, domain, 0, 0);
if (ret)
pr_err("Error in Pinning BO to domain: %d\n", domain);
amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);
+out:
amdgpu_bo_unreserve(bo);
-
return ret;
}
@@ -1649,6 +1675,8 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
{
uint64_t reserved_for_pt =
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ uint64_t reserved_for_ras = (con ? con->reserved_pages_in_bytes : 0);
ssize_t available;
uint64_t vram_available, system_mem_available, ttm_mem_available;
@@ -1656,7 +1684,8 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
- adev->kfd.vram_used_aligned[xcp_id]
- atomic64_read(&adev->vram_pin_size)
- - reserved_for_pt;
+ - reserved_for_pt
+ - reserved_for_ras;
if (adev->flags & AMD_IS_APU) {
system_mem_available = no_system_mem_limit ?
@@ -1714,6 +1743,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0;
+
+ /* For contiguous VRAM allocation */
+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_CONTIGUOUS)
+ alloc_flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
}
xcp_id = fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION ?
0 : fpriv->xcp_id;
@@ -2712,7 +2745,7 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i
/* keep mem without hmm range at userptr_inval_list */
if (!mem->range)
- continue;
+ continue;
/* Only check mem with hmm range associated */
valid = amdgpu_ttm_tt_get_user_pages_done(
@@ -2957,9 +2990,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
if (!attachment->is_mapped)
continue;
- if (attachment->bo_va->base.bo->tbo.pin_count)
- continue;
-
kfd_mem_dmaunmap_attachment(mem, attachment);
ret = update_gpuvm_pte(mem, attachment, &sync_obj);
if (ret) {