diff options
author | Maxime Ripard <mripard@kernel.org> | 2024-04-23 09:48:56 +0300 |
---|---|---|
committer | Maxime Ripard <mripard@kernel.org> | 2024-04-23 09:48:56 +0300 |
commit | c058e7a8f8af355e4a441c89400a6e95a16320e5 (patch) | |
tree | ed96dcd06e69f508a3f8a112a29fd228289429a6 /drivers/gpu/drm/xe | |
parent | 6221deb716b9d5397c09ba6567f7ae61d8cbeb98 (diff) | |
parent | 0208ca55aa9c9b997da1f5bc45c4e98916323f08 (diff) | |
download | linux-c058e7a8f8af355e4a441c89400a6e95a16320e5.tar.xz |
Merge drm/drm-next into drm-misc-next
Maíra needs a backmerge to apply v3d patches, and Danilo for some
nouveau patches.
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Diffstat (limited to 'drivers/gpu/drm/xe')
28 files changed, 170 insertions, 281 deletions
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index c29a850859ad..6885c13214ee 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -172,9 +172,6 @@ subdir-ccflags-$(CONFIG_DRM_XE_DISPLAY) += \ -Ddrm_i915_gem_object=xe_bo \ -Ddrm_i915_private=xe_device -CFLAGS_i915-display/intel_fbdev.o = -Wno-override-init -CFLAGS_i915-display/intel_display_device.o = -Wno-override-init - # Rule to build SOC code shared with i915 $(obj)/i915-soc/%.o: $(srctree)/drivers/gpu/drm/i915/soc/%.c FORCE $(call cmd,force_checksrc) @@ -278,6 +275,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_vdsc.o \ i915-display/intel_vga.o \ i915-display/intel_vrr.o \ + i915-display/intel_dmc_wl.o \ i915-display/intel_wm.o \ i915-display/skl_scaler.o \ i915-display/skl_universal_plane.o \ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index 420eba0e4be0..ea6b8e0f1f35 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -19,14 +19,12 @@ #include "xe_bo.h" #include "xe_pm.h" #include "xe_step.h" -#include "i915_gem.h" #include "i915_gem_stolen.h" #include "i915_gpu_error.h" #include "i915_reg_defs.h" #include "i915_utils.h" #include "intel_gt_types.h" #include "intel_step.h" -#include "intel_uc_fw.h" #include "intel_uncore.h" #include "intel_runtime_pm.h" #include <linux/pm_runtime.h> @@ -41,12 +39,8 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev) return dev_get_drvdata(kdev); } - -#define INTEL_JASPERLAKE 0 -#define INTEL_ELKHARTLAKE 0 #define IS_PLATFORM(xe, x) ((xe)->info.platform == x) #define INTEL_INFO(dev_priv) (&((dev_priv)->info)) -#define INTEL_DEVID(dev_priv) ((dev_priv)->info.devid) #define IS_I830(dev_priv) (dev_priv && 0) #define IS_I845G(dev_priv) (dev_priv && 0) #define IS_I85X(dev_priv) (dev_priv && 0) @@ -85,9 +79,7 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev) #define IS_DG1(dev_priv) IS_PLATFORM(dev_priv, XE_DG1) #define IS_ALDERLAKE_S(dev_priv) IS_PLATFORM(dev_priv, XE_ALDERLAKE_S) #define IS_ALDERLAKE_P(dev_priv) IS_PLATFORM(dev_priv, XE_ALDERLAKE_P) -#define IS_XEHPSDV(dev_priv) (dev_priv && 0) #define IS_DG2(dev_priv) IS_PLATFORM(dev_priv, XE_DG2) -#define IS_PONTEVECCHIO(dev_priv) IS_PLATFORM(dev_priv, XE_PVC) #define IS_METEORLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_METEORLAKE) #define IS_LUNARLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_LUNARLAKE) @@ -97,19 +89,12 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev) #define IP_VER(ver, rel) ((ver) << 8 | (rel)) -#define INTEL_DISPLAY_ENABLED(xe) (HAS_DISPLAY((xe)) && !intel_opregion_headless_sku((xe))) - -#define IS_GRAPHICS_VER(xe, first, last) \ - ((xe)->info.graphics_verx100 >= first * 100 && \ - (xe)->info.graphics_verx100 <= (last*100 + 99)) #define IS_MOBILE(xe) (xe && 0) -#define HAS_LLC(xe) (!IS_DGFX((xe))) #define HAS_GMD_ID(xe) GRAPHICS_VERx100(xe) >= 1270 /* Workarounds not handled yet */ #define IS_DISPLAY_STEP(xe, first, last) ({u8 __step = (xe)->info.step.display; first <= __step && __step <= last; }) -#define IS_GRAPHICS_STEP(xe, first, last) ({u8 __step = (xe)->info.step.graphics; first <= __step && __step <= last; }) #define IS_LP(xe) (0) #define IS_GEN9_LP(xe) (0) @@ -126,27 +111,6 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev) #define IS_KABYLAKE_ULT(xe) (xe && 0) #define IS_SKYLAKE_ULT(xe) (xe && 0) -#define IS_DG1_GRAPHICS_STEP(xe, first, last) (IS_DG1(xe) && IS_GRAPHICS_STEP(xe, first, last)) -#define IS_DG2_GRAPHICS_STEP(xe, variant, first, last) \ - ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_ ## variant && \ - IS_GRAPHICS_STEP(xe, first, last)) -#define IS_XEHPSDV_GRAPHICS_STEP(xe, first, last) (IS_XEHPSDV(xe) && IS_GRAPHICS_STEP(xe, first, last)) - -/* XXX: No basedie stepping support yet */ -#define IS_PVC_BD_STEP(xe, first, last) (!WARN_ON(1) && IS_PONTEVECCHIO(xe)) - -#define IS_TIGERLAKE_DISPLAY_STEP(xe, first, last) (IS_TIGERLAKE(xe) && IS_DISPLAY_STEP(xe, first, last)) -#define IS_ROCKETLAKE_DISPLAY_STEP(xe, first, last) (IS_ROCKETLAKE(xe) && IS_DISPLAY_STEP(xe, first, last)) -#define IS_DG1_DISPLAY_STEP(xe, first, last) (IS_DG1(xe) && IS_DISPLAY_STEP(xe, first, last)) -#define IS_DG2_DISPLAY_STEP(xe, first, last) (IS_DG2(xe) && IS_DISPLAY_STEP(xe, first, last)) -#define IS_ADLP_DISPLAY_STEP(xe, first, last) (IS_ALDERLAKE_P(xe) && IS_DISPLAY_STEP(xe, first, last)) -#define IS_ADLS_DISPLAY_STEP(xe, first, last) (IS_ALDERLAKE_S(xe) && IS_DISPLAY_STEP(xe, first, last)) -#define IS_JSL_EHL_DISPLAY_STEP(xe, first, last) (IS_JSL_EHL(xe) && IS_DISPLAY_STEP(xe, first, last)) -#define IS_MTL_DISPLAY_STEP(xe, first, last) (IS_METEORLAKE(xe) && IS_DISPLAY_STEP(xe, first, last)) - -/* FIXME: Add subplatform here */ -#define IS_MTL_GRAPHICS_STEP(xe, sub, first, last) (IS_METEORLAKE(xe) && IS_DISPLAY_STEP(xe, first, last)) - #define IS_DG2_G10(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G10) #define IS_DG2_G11(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G11) #define IS_DG2_G12(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G12) @@ -154,12 +118,9 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev) #define IS_ICL_WITH_PORT_F(xe) (xe && 0) #define HAS_FLAT_CCS(xe) (xe_device_has_flat_ccs(xe)) #define to_intel_bo(x) gem_to_xe_bo((x)) -#define mkwrite_device_info(xe) (INTEL_INFO(xe)) #define HAS_128_BYTE_Y_TILING(xe) (xe || 1) -#define intel_has_gpu_reset(a) (a && 0) - #include "intel_wakeref.h" static inline intel_wakeref_t intel_runtime_pm_get(struct xe_runtime_pm *pm) @@ -217,7 +178,6 @@ struct i915_sched_attr { #define RUNTIME_INFO(xe) (&(xe)->info.i915_runtime) #define FORCEWAKE_ALL XE_FORCEWAKE_ALL -#define HPD_STORM_DEFAULT_THRESHOLD 50 #ifdef CONFIG_ARM64 /* diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_fixed.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_fixed.h deleted file mode 100644 index 12c671fd5235..000000000000 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_fixed.h +++ /dev/null @@ -1,6 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#include "../../i915/i915_fixed.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem.h deleted file mode 100644 index 06b723a479c5..000000000000 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#ifndef __I915_GEM_H__ -#define __I915_GEM_H__ -#define GEM_BUG_ON -#endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h index 80b024d435dc..4931c7198f13 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h @@ -9,36 +9,10 @@ #include <linux/types.h> struct drm_i915_private; -struct i915_ggtt; -static inline void intel_vgpu_detect(struct drm_i915_private *i915) -{ -} static inline bool intel_vgpu_active(struct drm_i915_private *i915) { return false; } -static inline void intel_vgpu_register(struct drm_i915_private *i915) -{ -} -static inline bool intel_vgpu_has_full_ppgtt(struct drm_i915_private *i915) -{ - return false; -} -static inline bool intel_vgpu_has_hwsp_emulation(struct drm_i915_private *i915) -{ - return false; -} -static inline bool intel_vgpu_has_huge_gtt(struct drm_i915_private *i915) -{ - return false; -} -static inline int intel_vgt_balloon(struct i915_ggtt *ggtt) -{ - return 0; -} -static inline void intel_vgt_deballoon(struct i915_ggtt *ggtt) -{ -} #endif /* _I915_VGPU_H_ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uc_fw.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uc_fw.h deleted file mode 100644 index 009745328992..000000000000 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_uc_fw.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#ifndef _INTEL_UC_FW_H_ -#define _INTEL_UC_FW_H_ - -#define INTEL_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git" - -#endif diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.c b/drivers/gpu/drm/xe/display/intel_fb_bo.c index b21da7b745a5..a9c1f9885c6b 100644 --- a/drivers/gpu/drm/xe/display/intel_fb_bo.c +++ b/drivers/gpu/drm/xe/display/intel_fb_bo.c @@ -31,7 +31,7 @@ int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb, ret = ttm_bo_reserve(&bo->ttm, true, false, NULL); if (ret) - return ret; + goto err; if (!(bo->flags & XE_BO_SCANOUT_BIT)) { /* @@ -42,12 +42,16 @@ int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb, */ if (XE_IOCTL_DBG(i915, !list_empty(&bo->ttm.base.gpuva.list))) { ttm_bo_unreserve(&bo->ttm); - return -EINVAL; + ret = -EINVAL; + goto err; } bo->flags |= XE_BO_SCANOUT_BIT; } ttm_bo_unreserve(&bo->ttm); + return 0; +err: + xe_bo_put(bo); return ret; } diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index e4db069f0db3..6ec375c1c4b6 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -108,11 +108,6 @@ int xe_display_create(struct xe_device *xe) xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0); drmm_mutex_init(&xe->drm, &xe->sb_lock); - drmm_mutex_init(&xe->drm, &xe->display.backlight.lock); - drmm_mutex_init(&xe->drm, &xe->display.audio.mutex); - drmm_mutex_init(&xe->drm, &xe->display.wm.wm_mutex); - drmm_mutex_init(&xe->drm, &xe->display.pps.mutex); - drmm_mutex_init(&xe->drm, &xe->display.hdcp.hdcp_mutex); xe->enabled_irq_mask = ~0; err = drmm_add_action_or_reset(&xe->drm, display_destroy, NULL); diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 0b1266c88a6a..deddc8be48c0 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -125,7 +125,7 @@ #define RING_EXECLIST_STATUS_LO(base) XE_REG((base) + 0x234) #define RING_EXECLIST_STATUS_HI(base) XE_REG((base) + 0x234 + 4) -#define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244) +#define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244, XE_REG_OPTION_MASKED) #define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3) #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT REG_BIT(0) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index ca85e81fdb44..d32ff3857e65 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -193,6 +193,9 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy) { struct xe_device *xe = to_xe_device(dev); + if (xe->preempt_fence_wq) + destroy_workqueue(xe->preempt_fence_wq); + if (xe->ordered_wq) destroy_workqueue(xe->ordered_wq); @@ -258,9 +261,15 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, INIT_LIST_HEAD(&xe->pinned.external_vram); INIT_LIST_HEAD(&xe->pinned.evicted); + xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0); xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0); xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0); - if (!xe->ordered_wq || !xe->unordered_wq) { + if (!xe->ordered_wq || !xe->unordered_wq || + !xe->preempt_fence_wq) { + /* + * Cleanup done in xe_device_destroy via + * drmm_add_action_or_reset register above + */ drm_err(&xe->drm, "Failed to allocate xe workqueues\n"); err = -ENOMEM; goto err; diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 9785eef2e5a4..faa32407efa5 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -363,6 +363,9 @@ struct xe_device { /** @ufence_wq: user fence wait queue */ wait_queue_head_t ufence_wq; + /** @preempt_fence_wq: used to serialize preempt fences */ + struct workqueue_struct *preempt_fence_wq; + /** @ordered_wq: used to serialize compute mode resume */ struct workqueue_struct *ordered_wq; @@ -497,20 +500,9 @@ struct xe_device { /* For pcode */ struct mutex sb_lock; - /* Should be in struct intel_display */ - u32 skl_preferred_vco_freq, max_dotclk_freq, hti_state; - u8 snps_phy_failed_calibration; - struct drm_atomic_state *modeset_restore_state; - struct list_head global_obj_list; - - union { - /* only to allow build, not used functionally */ - u32 irq_mask; - u32 de_irq_mask[I915_MAX_PIPES]; - }; - u32 pipestat_irq_mask[I915_MAX_PIPES]; + /* only to allow build, not used functionally */ + u32 irq_mask; - bool display_irqs_enabled; u32 enabled_irq_mask; struct intel_uncore { @@ -522,7 +514,6 @@ struct xe_device { unsigned int hpll_freq; unsigned int czclk_freq; unsigned int fsb_freq, mem_freq, is_ddr3; - u8 vblank_enabled; }; struct { const char *dmc_firmware_path; diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 826c8b389672..cc5e0f75de3c 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -94,48 +94,16 @@ * Unlock all */ +/* + * Add validation and rebinding to the drm_exec locking loop, since both can + * trigger eviction which may require sleeping dma_resv locks. + */ static int xe_exec_fn(struct drm_gpuvm_exec *vm_exec) { struct xe_vm *vm = container_of(vm_exec->vm, struct xe_vm, gpuvm); - struct drm_gem_object *obj; - unsigned long index; - int num_fences; - int ret; - - ret = drm_gpuvm_validate(vm_exec->vm, &vm_exec->exec); - if (ret) - return ret; - - /* - * 1 fence slot for the final submit, and 1 more for every per-tile for - * GPU bind and 1 extra for CPU bind. Note that there are potentially - * many vma per object/dma-resv, however the fence slot will just be - * re-used, since they are largely the same timeline and the seqno - * should be in order. In the case of CPU bind there is dummy fence used - * for all CPU binds, so no need to have a per-tile slot for that. - */ - num_fences = 1 + 1 + vm->xe->info.tile_count; - /* - * We don't know upfront exactly how many fence slots we will need at - * the start of the exec, since the TTM bo_validate above can consume - * numerous fence slots. Also due to how the dma_resv_reserve_fences() - * works it only ensures that at least that many fence slots are - * available i.e if there are already 10 slots available and we reserve - * two more, it can just noop without reserving anything. With this it - * is quite possible that TTM steals some of the fence slots and then - * when it comes time to do the vma binding and final exec stage we are - * lacking enough fence slots, leading to some nasty BUG_ON() when - * adding the fences. Hence just add our own fences here, after the - * validate stage. - */ - drm_exec_for_each_locked_object(&vm_exec->exec, index, obj) { - ret = dma_resv_reserve_fences(obj->resv, num_fences); - if (ret) - return ret; - } - - return 0; + /* The fence slot added here is intended for the exec sched job. */ + return xe_vm_validate_rebind(vm, &vm_exec->exec, 1); } int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) @@ -152,7 +120,6 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct drm_exec *exec = &vm_exec.exec; u32 i, num_syncs = 0, num_ufence = 0; struct xe_sched_job *job; - struct dma_fence *rebind_fence; struct xe_vm *vm; bool write_locked, skip_retry = false; ktime_t end = 0; @@ -290,39 +257,7 @@ retry: goto err_exec; } - /* - * Rebind any invalidated userptr or evicted BOs in the VM, non-compute - * VM mode only. - */ - rebind_fence = xe_vm_rebind(vm, false); - if (IS_ERR(rebind_fence)) { - err = PTR_ERR(rebind_fence); - goto err_put_job; - } - - /* - * We store the rebind_fence in the VM so subsequent execs don't get - * scheduled before the rebinds of userptrs / evicted BOs is complete. - */ - if (rebind_fence) { - dma_fence_put(vm->rebind_fence); - vm->rebind_fence = rebind_fence; - } - if (vm->rebind_fence) { - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &vm->rebind_fence->flags)) { - dma_fence_put(vm->rebind_fence); - vm->rebind_fence = NULL; - } else { - dma_fence_get(vm->rebind_fence); - err = drm_sched_job_add_dependency(&job->drm, - vm->rebind_fence); - if (err) - goto err_put_job; - } - } - - /* Wait behind munmap style rebinds */ + /* Wait behind rebinds */ if (!xe_vm_in_lr_mode(vm)) { err = drm_sched_job_add_resv_dependencies(&job->drm, xe_vm_resv(vm), diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 62b3d9d1d7cd..462b33195032 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -148,6 +148,11 @@ struct xe_exec_queue { const struct xe_ring_ops *ring_ops; /** @entity: DRM sched entity for this exec queue (1 to 1 relationship) */ struct drm_sched_entity *entity; + /** + * @tlb_flush_seqno: The seqno of the last rebind tlb flush performed + * Protected by @vm's resv. Unused if @vm == NULL. + */ + u64 tlb_flush_seqno; /** @lrc: logical ring context for this exec queue */ struct xe_lrc lrc[]; }; diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 241c294270d9..fa9e9853c53b 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -100,10 +100,9 @@ static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, { struct xe_bo *bo = xe_vma_bo(vma); struct xe_vm *vm = xe_vma_vm(vma); - unsigned int num_shared = 2; /* slots for bind + move */ int err; - err = xe_vm_prepare_vma(exec, vma, num_shared); + err = xe_vm_lock_vma(exec, vma); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index f03e077f81a0..e598a4363d01 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -61,7 +61,6 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt) INIT_LIST_HEAD(>->tlb_invalidation.pending_fences); spin_lock_init(>->tlb_invalidation.pending_lock); spin_lock_init(>->tlb_invalidation.lock); - gt->tlb_invalidation.fence_context = dma_fence_context_alloc(1); INIT_DELAYED_WORK(>->tlb_invalidation.fence_tdr, xe_gt_tlb_fence_timeout); diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 70c615dd1498..07b2f724ec45 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -177,13 +177,6 @@ struct xe_gt { * xe_gt_tlb_fence_timeout after the timeut interval is over. */ struct delayed_work fence_tdr; - /** @tlb_invalidation.fence_context: context for TLB invalidation fences */ - u64 fence_context; - /** - * @tlb_invalidation.fence_seqno: seqno to TLB invalidation fences, protected by - * tlb_invalidation.lock - */ - u32 fence_seqno; /** @tlb_invalidation.lock: protects TLB invalidation fences */ spinlock_t lock; } tlb_invalidation; diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index b82233a41606..9ac7fbe201b3 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -290,7 +290,7 @@ xe_hwmon_power1_max_interval_show(struct device *dev, struct device_attribute *a * As y can be < 2, we compute tau4 = (4 | x) << y * and then add 2 when doing the final right shift to account for units */ - tau4 = ((1 << x_w) | x) << y; + tau4 = (u64)((1 << x_w) | x) << y; /* val in hwmon interface units (millisec) */ out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w); @@ -330,7 +330,7 @@ xe_hwmon_power1_max_interval_store(struct device *dev, struct device_attribute * r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT); x = REG_FIELD_GET(PKG_MAX_WIN_X, r); y = REG_FIELD_GET(PKG_MAX_WIN_Y, r); - tau4 = ((1 << x_w) | x) << y; + tau4 = (u64)((1 << x_w) | x) << y; max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w); if (val > max_win) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 1426febe86eb..57066faf575e 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -525,9 +525,8 @@ static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class) static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) { - regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH) | - _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | - CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT; + regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | + CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); /* TODO: Timestamp */ } diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index ee1bb938c493..2ba4fb9511f6 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -227,7 +227,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, if (vm->flags & XE_VM_FLAG_64K && level == 1) flags = XE_PDE_64K; - entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (level - 1) * + entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (u64)(level - 1) * XE_PAGE_SIZE, pat_index); xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64, entry | flags); @@ -235,7 +235,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, /* Write PDE's that point to our BO. */ for (i = 0; i < num_entries - num_level; i++) { - entry = vm->pt_ops->pde_encode_bo(bo, i * XE_PAGE_SIZE, + entry = vm->pt_ops->pde_encode_bo(bo, (u64)i * XE_PAGE_SIZE, pat_index); xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE + @@ -291,7 +291,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, #define VM_SA_UPDATE_UNIT_SIZE (XE_PAGE_SIZE / NUM_VMUSA_UNIT_PER_PAGE) #define NUM_VMUSA_WRITES_PER_UNIT (VM_SA_UPDATE_UNIT_SIZE / sizeof(u64)) drm_suballoc_manager_init(&m->vm_update_sa, - (map_ofs / XE_PAGE_SIZE - NUM_KERNEL_PDE) * + (size_t)(map_ofs / XE_PAGE_SIZE - NUM_KERNEL_PDE) * NUM_VMUSA_UNIT_PER_PAGE, 0); m->pt_bo = bo; @@ -490,7 +490,7 @@ static void emit_pte(struct xe_migrate *m, struct xe_vm *vm = m->q->vm; u16 pat_index; u32 ptes; - u64 ofs = at_pt * XE_PAGE_SIZE; + u64 ofs = (u64)at_pt * XE_PAGE_SIZE; u64 cur_ofs; /* Indirect access needs compression enabled uncached PAT index */ diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 557f2d88a8c1..75a8817b2631 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -333,6 +333,7 @@ static const struct xe_device_desc mtl_desc = { static const struct xe_device_desc lnl_desc = { PLATFORM(XE_LUNARLAKE), + .has_display = true, .require_force_probe = true, }; diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c index 7bce2a332603..7d50c6e89d8e 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence.c +++ b/drivers/gpu/drm/xe/xe_preempt_fence.c @@ -49,7 +49,7 @@ static bool preempt_fence_enable_signaling(struct dma_fence *fence) struct xe_exec_queue *q = pfence->q; pfence->error = q->ops->suspend(q); - queue_work(system_unbound_wq, &pfence->preempt_work); + queue_work(q->vm->xe->preempt_fence_wq, &pfence->preempt_work); return true; } diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 7f54bc3e389d..4efc8c1a3d7a 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1135,8 +1135,7 @@ static int invalidation_fence_init(struct xe_gt *gt, spin_lock_irq(>->tlb_invalidation.lock); dma_fence_init(&ifence->base.base, &invalidation_fence_ops, >->tlb_invalidation.lock, - gt->tlb_invalidation.fence_context, - ++gt->tlb_invalidation.fence_seqno); + dma_fence_context_alloc(1), 1); spin_unlock_irq(>->tlb_invalidation.lock); INIT_LIST_HEAD(&ifence->base.link); @@ -1236,6 +1235,13 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue err = xe_pt_prepare_bind(tile, vma, entries, &num_entries); if (err) goto err; + + err = dma_resv_reserve_fences(xe_vm_resv(vm), 1); + if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) + err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1); + if (err) + goto err; + xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries)); xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); @@ -1254,11 +1260,13 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue * non-faulting LR, in particular on user-space batch buffer chaining, * it needs to be done here. */ - if ((rebind && !xe_vm_in_lr_mode(vm) && !vm->batch_invalidate_tlb) || - (!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) { + if ((!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) { ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); if (!ifence) return ERR_PTR(-ENOMEM); + } else if (rebind && !xe_vm_in_lr_mode(vm)) { + /* We bump also if batch_invalidate_tlb is true */ + vm->tlb_flush_seqno++; } rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); @@ -1297,7 +1305,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue } /* add shared fence now for pagetable delayed destroy */ - dma_resv_add_fence(xe_vm_resv(vm), fence, !rebind && + dma_resv_add_fence(xe_vm_resv(vm), fence, rebind || last_munmap_rebind ? DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_BOOKKEEP); @@ -1576,6 +1584,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu struct dma_fence *fence = NULL; struct invalidation_fence *ifence; struct xe_range_fence *rfence; + int err; LLIST_HEAD(deferred); @@ -1593,6 +1602,12 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries, num_entries); + err = dma_resv_reserve_fences(xe_vm_resv(vm), 1); + if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) + err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1); + if (err) + return ERR_PTR(err); + ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); if (!ifence) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index c4edffcd4a32..5b2b37b59813 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -219,10 +219,9 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc { u32 dw[MAX_JOB_SIZE_DW], i = 0; u32 ppgtt_flag = get_ppgtt_flag(job); - struct xe_vm *vm = job->q->vm; struct xe_gt *gt = job->q->gt; - if (vm && vm->batch_invalidate_tlb) { + if (job->ring_ops_flush_tlb) { dw[i++] = preparser_disable(true); i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), seqno, true, dw, i); @@ -270,7 +269,6 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, struct xe_gt *gt = job->q->gt; struct xe_device *xe = gt_to_xe(gt); bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE; - struct xe_vm *vm = job->q->vm; dw[i++] = preparser_disable(true); @@ -282,13 +280,13 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, i = emit_aux_table_inv(gt, VE0_AUX_INV, dw, i); } - if (vm && vm->batch_invalidate_tlb) + if (job->ring_ops_flush_tlb) i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), seqno, true, dw, i); dw[i++] = preparser_disable(false); - if (!vm || !vm->batch_invalidate_tlb) + if (!job->ring_ops_flush_tlb) i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), seqno, dw, i); @@ -317,7 +315,6 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, struct xe_gt *gt = job->q->gt; struct xe_device *xe = gt_to_xe(gt); bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK); - struct xe_vm *vm = job->q->vm; u32 mask_flags = 0; dw[i++] = preparser_disable(true); @@ -327,7 +324,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS; /* See __xe_pt_bind_vma() for a discussion on TLB invalidations. */ - i = emit_pipe_invalidate(mask_flags, vm && vm->batch_invalidate_tlb, dw, i); + i = emit_pipe_invalidate(mask_flags, job->ring_ops_flush_tlb, dw, i); /* hsdes: 1809175790 */ if (has_aux_ccs(xe)) diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 8151ddafb940..b0c7fa4693cf 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -250,6 +250,16 @@ bool xe_sched_job_completed(struct xe_sched_job *job) void xe_sched_job_arm(struct xe_sched_job *job) { + struct xe_exec_queue *q = job->q; + struct xe_vm *vm = q->vm; + + if (vm && !xe_sched_job_is_migration(q) && !xe_vm_in_lr_mode(vm) && + (vm->batch_invalidate_tlb || vm->tlb_flush_seqno != q->tlb_flush_seqno)) { + xe_vm_assert_held(vm); + q->tlb_flush_seqno = vm->tlb_flush_seqno; + job->ring_ops_flush_tlb = true; + } + drm_sched_job_arm(&job->drm); } diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h index b1d83da50a53..5e12724219fd 100644 --- a/drivers/gpu/drm/xe/xe_sched_job_types.h +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h @@ -39,6 +39,8 @@ struct xe_sched_job { } user_fence; /** @migrate_flush_flags: Additional flush flags for migration jobs */ u32 migrate_flush_flags; + /** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */ + bool ring_ops_flush_tlb; /** @batch_addr: batch buffer address of job */ u64 batch_addr[]; }; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index f88faef4142b..3d4c8f342e21 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -482,17 +482,53 @@ static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) return 0; } +/** + * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas + * @vm: The vm for which we are rebinding. + * @exec: The struct drm_exec with the locked GEM objects. + * @num_fences: The number of fences to reserve for the operation, not + * including rebinds and validations. + * + * Validates all evicted gem objects and rebinds their vmas. Note that + * rebindings may cause evictions and hence the validation-rebind + * sequence is rerun until there are no more objects to validate. + * + * Return: 0 on success, negative error code on error. In particular, + * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if + * the drm_exec transaction needs to be restarted. + */ +int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, + unsigned int num_fences) +{ + struct drm_gem_object *obj; + unsigned long index; + int ret; + + do { + ret = drm_gpuvm_validate(&vm->gpuvm, exec); + if (ret) + return ret; + + ret = xe_vm_rebind(vm, false); + if (ret) + return ret; + } while (!list_empty(&vm->gpuvm.evict.list)); + + drm_exec_for_each_locked_object(exec, index, obj) { + ret = dma_resv_reserve_fences(obj->resv, num_fences); + if (ret) + return ret; + } + + return 0; +} + static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, bool *done) { int err; - /* - * 1 fence for each preempt fence plus a fence for each tile from a - * possible rebind - */ - err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, vm->preempt.num_exec_queues + - vm->xe->info.tile_count); + err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); if (err) return err; @@ -507,7 +543,7 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, return 0; } - err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, vm->preempt.num_exec_queues); + err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); if (err) return err; @@ -515,14 +551,19 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, if (err) return err; - return drm_gpuvm_validate(&vm->gpuvm, exec); + /* + * Add validation and rebinding to the locking loop since both can + * cause evictions which may require blocing dma_resv locks. + * The fence reservation here is intended for the new preempt fences + * we attach at the end of the rebind work. + */ + return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); } static void preempt_rebind_work_func(struct work_struct *w) { struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); struct drm_exec exec; - struct dma_fence *rebind_fence; unsigned int fence_count = 0; LIST_HEAD(preempt_fences); ktime_t end = 0; @@ -568,18 +609,11 @@ retry: if (err) goto out_unlock; - rebind_fence = xe_vm_rebind(vm, true); - if (IS_ERR(rebind_fence)) { - err = PTR_ERR(rebind_fence); + err = xe_vm_rebind(vm, true); + if (err) goto out_unlock; - } - - if (rebind_fence) { - dma_fence_wait(rebind_fence, false); - dma_fence_put(rebind_fence); - } - /* Wait on munmap style VM unbinds */ + /* Wait on rebinds and munmap style VM unbinds */ wait = dma_resv_wait_timeout(xe_vm_resv(vm), DMA_RESV_USAGE_KERNEL, false, MAX_SCHEDULE_TIMEOUT); @@ -773,14 +807,14 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs, bool first_op, bool last_op); -struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) +int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) { - struct dma_fence *fence = NULL; + struct dma_fence *fence; struct xe_vma *vma, *next; lockdep_assert_held(&vm->lock); if (xe_vm_in_lr_mode(vm) && !rebind_worker) - return NULL; + return 0; xe_vm_assert_held(vm); list_for_each_entry_safe(vma, next, &vm->rebind_list, @@ -788,17 +822,17 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) xe_assert(vm->xe, vma->tile_present); list_del_init(&vma->combined_links.rebind); - dma_fence_put(fence); if (rebind_worker) trace_xe_vma_rebind_worker(vma); else trace_xe_vma_rebind_exec(vma); fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false); if (IS_ERR(fence)) - return fence; + return PTR_ERR(fence); + dma_fence_put(fence); } - return fence; + return 0; } static void xe_vma_free(struct xe_vma *vma) @@ -1004,35 +1038,26 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) } /** - * xe_vm_prepare_vma() - drm_exec utility to lock a vma + * xe_vm_lock_vma() - drm_exec utility to lock a vma * @exec: The drm_exec object we're currently locking for. * @vma: The vma for witch we want to lock the vm resv and any attached * object's resv. - * @num_shared: The number of dma-fence slots to pre-allocate in the - * objects' reservation objects. * * Return: 0 on success, negative error code on error. In particular * may return -EDEADLK on WW transaction contention and -EINTR if * an interruptible wait is terminated by a signal. */ -int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma, - unsigned int num_shared) +int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) { struct xe_vm *vm = xe_vma_vm(vma); struct xe_bo *bo = xe_vma_bo(vma); int err; XE_WARN_ON(!vm); - if (num_shared) - err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared); - else - err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); - if (!err && bo && !bo->vm) { - if (num_shared) - err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared); - else - err = drm_exec_lock_obj(exec, &bo->ttm.base); - } + + err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); + if (!err && bo && !bo->vm) + err = drm_exec_lock_obj(exec, &bo->ttm.base); return err; } @@ -1044,7 +1069,7 @@ static void xe_vma_destroy_unlocked(struct xe_vma *vma) drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { - err = xe_vm_prepare_vma(&exec, vma, 0); + err = xe_vm_lock_vma(&exec, vma); drm_exec_retry_on_contention(&exec); if (XE_WARN_ON(err)) break; @@ -1552,6 +1577,16 @@ void xe_vm_close_and_put(struct xe_vm *vm) xe->usm.num_vm_in_fault_mode--; else if (!(vm->flags & XE_VM_FLAG_MIGRATION)) xe->usm.num_vm_in_non_fault_mode--; + + if (vm->usm.asid) { + void *lookup; + + xe_assert(xe, xe->info.has_asid); + xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION)); + + lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); + xe_assert(xe, lookup == vm); + } mutex_unlock(&xe->usm.lock); for_each_tile(tile, xe, id) @@ -1567,29 +1602,19 @@ static void vm_destroy_work_func(struct work_struct *w) struct xe_device *xe = vm->xe; struct xe_tile *tile; u8 id; - void *lookup; /* xe_vm_close_and_put was not called? */ xe_assert(xe, !vm->size); mutex_destroy(&vm->snap_mutex); - if (!(vm->flags & XE_VM_FLAG_MIGRATION)) { + if (!(vm->flags & XE_VM_FLAG_MIGRATION)) xe_device_mem_access_put(xe); - if (xe->info.has_asid && vm->usm.asid) { - mutex_lock(&xe->usm.lock); - lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); - xe_assert(xe, lookup == vm); - mutex_unlock(&xe->usm.lock); - } - } - for_each_tile(tile, xe, id) XE_WARN_ON(vm->pt_root[id]); trace_xe_vm_free(vm); - dma_fence_put(vm->rebind_fence); kfree(vm); } @@ -2512,7 +2537,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm, lockdep_assert_held_write(&vm->lock); - err = xe_vm_prepare_vma(exec, vma, 1); + err = xe_vm_lock_vma(exec, vma); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 6df1f1c7f85d..306cd0934a19 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -207,7 +207,7 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm); int xe_vm_userptr_check_repin(struct xe_vm *vm); -struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker); +int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker); int xe_vm_invalidate_vma(struct xe_vma *vma); @@ -242,8 +242,10 @@ bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end); int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id); -int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma, - unsigned int num_shared); +int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma); + +int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, + unsigned int num_fences); /** * xe_vm_resv() - Return's the vm's reservation object diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index ae5fb565f6bf..badf3945083d 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -177,9 +177,6 @@ struct xe_vm { */ struct list_head rebind_list; - /** @rebind_fence: rebind fence from execbuf */ - struct dma_fence *rebind_fence; - /** * @destroy_work: worker to destroy VM, needed as a dma_fence signaling * from an irq context can be last put and the destroy needs to be able @@ -264,6 +261,11 @@ struct xe_vm { bool capture_once; } error_capture; + /** + * @tlb_flush_seqno: Required TLB flush seqno for the next exec. + * protected by the vm resv. + */ + u64 tlb_flush_seqno; /** @batch_invalidate_tlb: Always invalidate TLB before batch start */ bool batch_invalidate_tlb; /** @xef: XE file handle for tracking this VM's drm client */ |