summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu.h
diff options
context:
space:
mode:
authorChristian König <christian.koenig@amd.com>2025-01-15 15:44:26 +0300
committerAlex Deucher <alexander.deucher@amd.com>2025-03-21 19:16:34 +0300
commitbd22e44ad415ac22e3a4f9a983d2a085f6cb4427 (patch)
treefb16198f66751ca2ad807bf57f4b31f010585121 /drivers/gpu/drm/amd/amdgpu/amdgpu.h
parent7f11c59e0700721c849b81e565bf56a7d8ceaa2d (diff)
downloadlinux-bd22e44ad415ac22e3a4f9a983d2a085f6cb4427.tar.xz
drm/amdgpu: rework how isolation is enforced v2
Limiting the number of available VMIDs to enforce isolation causes some issues with gang submit and applying certain HW workarounds which require multiple VMIDs to work correctly. So instead start to track all submissions to the relevant engines in a per partition data structure and use the dma_fences of the submissions to enforce isolation similar to what a VMID limit does. v2: use ~0l for jobs without isolation to distinct it from kernel submissions which uses NULL for the owner. Add some warning when we are OOM. Signed-off-by: Christian König <christian.koenig@amd.com> Acked-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h13
1 files changed, 11 insertions, 2 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 2a9a41f4e748..6d83ccfa42ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1194,9 +1194,15 @@ struct amdgpu_device {
bool debug_exp_resets;
bool debug_disable_gpu_ring_reset;
- bool enforce_isolation[MAX_XCP];
- /* Added this mutex for cleaner shader isolation between GFX and compute processes */
+ /* Protection for the following isolation structure */
struct mutex enforce_isolation_mutex;
+ bool enforce_isolation[MAX_XCP];
+ struct amdgpu_isolation {
+ void *owner;
+ struct dma_fence *spearhead;
+ struct amdgpu_sync active;
+ struct amdgpu_sync prev;
+ } isolation[MAX_XCP];
struct amdgpu_init_level *init_lvl;
@@ -1482,6 +1488,9 @@ void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev);
struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
struct dma_fence *gang);
+struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ struct amdgpu_job *job);
bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev);
ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring);
ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset);