diff options
Diffstat (limited to 'drivers/gpu/drm/xe')
101 files changed, 3015 insertions, 1291 deletions
diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 5c2f459a2925..9bce047901b2 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -39,7 +39,6 @@ config DRM_XE select DRM_TTM_HELPER select DRM_EXEC select DRM_GPUVM - select DRM_GPUSVM if !UML && DEVICE_PRIVATE select DRM_SCHED select MMU_NOTIFIER select WANT_DEV_COREDUMP @@ -74,9 +73,22 @@ config DRM_XE_DP_TUNNEL If in doubt say "Y". +config DRM_XE_GPUSVM + bool "Enable CPU to GPU address mirroring" + depends on DRM_XE + depends on !UML + depends on DEVICE_PRIVATE + default y + select DRM_GPUSVM + help + Enable this option if you want support for CPU to GPU address + mirroring. + + If in doubut say "Y". + config DRM_XE_DEVMEM_MIRROR bool "Enable device memory mirror" - depends on DRM_XE + depends on DRM_XE_GPUSVM select GET_FREE_REGION default y help diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 9699b08585f7..e4bf484d4121 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -125,12 +125,13 @@ xe-y += xe_bb.o \ xe_wopcm.o xe-$(CONFIG_HMM_MIRROR) += xe_hmm.o -xe-$(CONFIG_DRM_GPUSVM) += xe_svm.o +xe-$(CONFIG_DRM_XE_GPUSVM) += xe_svm.o # graphics hardware monitoring (HWMON) support xe-$(CONFIG_HWMON) += xe_hwmon.o xe-$(CONFIG_PERF_EVENTS) += xe_pmu.o +xe-$(CONFIG_CONFIGFS_FS) += xe_configfs.o # graphics virtualization (SR-IOV) support xe-y += \ @@ -185,7 +186,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ display/intel_fbdev_fb.o \ display/xe_display.o \ display/xe_display_misc.o \ - display/xe_display_rps.o \ + display/xe_display_rpm.o \ display/xe_display_wa.o \ display/xe_dsb_buffer.o \ display/xe_fb_pin.o \ @@ -196,7 +197,6 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ # SOC code shared with i915 xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-soc/intel_dram.o \ - i915-soc/intel_pch.o \ i915-soc/intel_rom.o # Display code shared with i915 @@ -271,6 +271,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_panel.o \ i915-display/intel_pfit.o \ i915-display/intel_pmdemand.o \ + i915-display/intel_pch.o \ i915-display/intel_pps.o \ i915-display/intel_psr.o \ i915-display/intel_qp_tables.o \ diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h index ec516e838ee8..448afb86e05c 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h @@ -141,6 +141,7 @@ enum xe_guc_action { XE_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507, XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A, XE_GUC_ACTION_SET_DEVICE_ENGINE_ACTIVITY_BUFFER = 0x550C, + XE_GUC_ACTION_SET_FUNCTION_ENGINE_ACTIVITY_BUFFER = 0x550D, XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR = 0x6000, XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC = 0x6002, XE_GUC_ACTION_PAGE_FAULT_RES_DESC = 0x6003, diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index d633f1c739e4..7de8f827281f 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -367,6 +367,7 @@ enum xe_guc_klv_ids { GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE = 0x9008, GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET = 0x9009, GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO = 0x900a, + GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH = 0x900b, }; #endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gt/intel_rps.h b/drivers/gpu/drm/xe/compat-i915-headers/gt/intel_rps.h deleted file mode 100644 index 21fec9cc837c..000000000000 --- a/drivers/gpu/drm/xe/compat-i915-headers/gt/intel_rps.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#ifndef __INTEL_RPS_H__ -#define __INTEL_RPS_H__ - -#define gen5_rps_irq_handler(x) ({}) - -#endif /* __INTEL_RPS_H__ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index dfec5108d2c3..9b7572e06f34 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -13,7 +13,6 @@ #include <drm/drm_drv.h> #include "i915_utils.h" -#include "intel_runtime_pm.h" #include "xe_device.h" /* for xe_device_has_flat_ccs() */ #include "xe_device_types.h" @@ -22,28 +21,12 @@ static inline struct drm_i915_private *to_i915(const struct drm_device *dev) return container_of(dev, struct drm_i915_private, drm); } +/* compat platform checks only for soc/ usage */ #define IS_PLATFORM(xe, x) ((xe)->info.platform == x) -#define INTEL_INFO(dev_priv) (&((dev_priv)->info)) -#define IS_I830(dev_priv) (dev_priv && 0) -#define IS_I845G(dev_priv) (dev_priv && 0) -#define IS_I85X(dev_priv) (dev_priv && 0) -#define IS_I865G(dev_priv) (dev_priv && 0) #define IS_I915G(dev_priv) (dev_priv && 0) #define IS_I915GM(dev_priv) (dev_priv && 0) -#define IS_I945G(dev_priv) (dev_priv && 0) -#define IS_I945GM(dev_priv) (dev_priv && 0) -#define IS_I965G(dev_priv) (dev_priv && 0) -#define IS_I965GM(dev_priv) (dev_priv && 0) -#define IS_G45(dev_priv) (dev_priv && 0) -#define IS_GM45(dev_priv) (dev_priv && 0) -#define IS_G4X(dev_priv) (dev_priv && 0) #define IS_PINEVIEW(dev_priv) (dev_priv && 0) -#define IS_G33(dev_priv) (dev_priv && 0) -#define IS_IRONLAKE(dev_priv) (dev_priv && 0) -#define IS_IRONLAKE_M(dev_priv) (dev_priv && 0) -#define IS_SANDYBRIDGE(dev_priv) (dev_priv && 0) #define IS_IVYBRIDGE(dev_priv) (dev_priv && 0) -#define IS_IVB_GT1(dev_priv) (dev_priv && 0) #define IS_VALLEYVIEW(dev_priv) (dev_priv && 0) #define IS_CHERRYVIEW(dev_priv) (dev_priv && 0) #define IS_HASWELL(dev_priv) (dev_priv && 0) @@ -71,39 +54,10 @@ static inline struct drm_i915_private *to_i915(const struct drm_device *dev) #define IS_HASWELL_ULT(dev_priv) (dev_priv && 0) #define IS_BROADWELL_ULT(dev_priv) (dev_priv && 0) -#define IS_BROADWELL_ULX(dev_priv) (dev_priv && 0) #define IS_MOBILE(xe) (xe && 0) -#define IS_TIGERLAKE_UY(xe) (xe && 0) -#define IS_COMETLAKE_ULX(xe) (xe && 0) -#define IS_COFFEELAKE_ULX(xe) (xe && 0) -#define IS_KABYLAKE_ULX(xe) (xe && 0) -#define IS_SKYLAKE_ULX(xe) (xe && 0) -#define IS_HASWELL_ULX(xe) (xe && 0) -#define IS_COMETLAKE_ULT(xe) (xe && 0) -#define IS_COFFEELAKE_ULT(xe) (xe && 0) -#define IS_KABYLAKE_ULT(xe) (xe && 0) -#define IS_SKYLAKE_ULT(xe) (xe && 0) - -#define IS_DG2_G10(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G10) -#define IS_DG2_G11(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G11) -#define IS_DG2_G12(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G12) -#define IS_RAPTORLAKE_U(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_ALDERLAKE_P_RPLU) -#define IS_ICL_WITH_PORT_F(xe) (xe && 0) #define HAS_FLAT_CCS(xe) (xe_device_has_flat_ccs(xe)) - #define HAS_128_BYTE_Y_TILING(xe) (xe || 1) -#ifdef CONFIG_ARM64 -/* - * arm64 indirectly includes linux/rtc.h, - * which defines a irq_lock, so include it - * here before #define-ing it - */ -#include <linux/rtc.h> -#endif - -#define irq_lock irq.lock - #endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h deleted file mode 100644 index 274042bff1be..000000000000 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h +++ /dev/null @@ -1,76 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#ifndef __INTEL_RUNTIME_PM_H__ -#define __INTEL_RUNTIME_PM_H__ - -#include "intel_wakeref.h" -#include "xe_device_types.h" -#include "xe_pm.h" - -#define intel_runtime_pm xe_runtime_pm - -static inline void disable_rpm_wakeref_asserts(void *rpm) -{ -} - -static inline void enable_rpm_wakeref_asserts(void *rpm) -{ -} - -static inline bool -intel_runtime_pm_suspended(struct xe_runtime_pm *pm) -{ - struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm); - - return pm_runtime_suspended(xe->drm.dev); -} - -static inline intel_wakeref_t intel_runtime_pm_get(struct xe_runtime_pm *pm) -{ - struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm); - - return xe_pm_runtime_resume_and_get(xe) ? INTEL_WAKEREF_DEF : NULL; -} - -static inline intel_wakeref_t intel_runtime_pm_get_if_in_use(struct xe_runtime_pm *pm) -{ - struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm); - - return xe_pm_runtime_get_if_in_use(xe) ? INTEL_WAKEREF_DEF : NULL; -} - -static inline intel_wakeref_t intel_runtime_pm_get_noresume(struct xe_runtime_pm *pm) -{ - struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm); - - xe_pm_runtime_get_noresume(xe); - - return INTEL_WAKEREF_DEF; -} - -static inline void intel_runtime_pm_put_unchecked(struct xe_runtime_pm *pm) -{ - struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm); - - xe_pm_runtime_put(xe); -} - -static inline void intel_runtime_pm_put(struct xe_runtime_pm *pm, intel_wakeref_t wakeref) -{ - if (wakeref) - intel_runtime_pm_put_unchecked(pm); -} - -#define intel_runtime_pm_get_raw intel_runtime_pm_get -#define intel_runtime_pm_put_raw intel_runtime_pm_put -#define assert_rpm_wakelock_held(x) do { } while (0) -#define assert_rpm_raw_wakeref_held(x) do { } while (0) - -#define with_intel_runtime_pm(rpm, wf) \ - for ((wf) = intel_runtime_pm_get(rpm); (wf); \ - intel_runtime_pm_put((rpm), (wf)), (wf) = NULL) - -#endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h deleted file mode 100644 index 9c46556d33a4..000000000000 --- a/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h +++ /dev/null @@ -1,6 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#include "../../../i915/soc/intel_pch.h" diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c index 3a1e505ff182..e8191562d122 100644 --- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c @@ -45,7 +45,7 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, NULL, size, ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | XE_BO_FLAG_STOLEN | - XE_BO_FLAG_GGTT | XE_BO_FLAG_PINNED); + XE_BO_FLAG_GGTT); if (!IS_ERR(obj)) drm_info(&xe->drm, "Allocated fbdev into stolen\n"); else @@ -56,7 +56,7 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, size, ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | - XE_BO_FLAG_GGTT | XE_BO_FLAG_PINNED); + XE_BO_FLAG_GGTT); } if (IS_ERR(obj)) { @@ -79,11 +79,11 @@ err: return ERR_CAST(fb); } -int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info, - struct drm_gem_object *_obj, struct i915_vma *vma) +int intel_fbdev_fb_fill_info(struct intel_display *display, struct fb_info *info, + struct drm_gem_object *_obj, struct i915_vma *vma) { struct xe_bo *obj = gem_to_xe_bo(_obj); - struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + struct pci_dev *pdev = to_pci_dev(display->drm->dev); if (!(obj->flags & XE_BO_FLAG_SYSTEM)) { if (obj->flags & XE_BO_FLAG_STOLEN) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 0b0aca7a25af..68f064f33d4b 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -133,9 +133,6 @@ int xe_display_init_early(struct xe_device *xe) /* Fake uncore lock */ spin_lock_init(&xe->uncore.lock); - /* This must be called before any calls to HAS_PCH_* */ - intel_detect_pch(xe); - intel_display_driver_early_probe(display); /* Early display init.. */ @@ -147,7 +144,7 @@ int xe_display_init_early(struct xe_device *xe) */ intel_dram_detect(xe); - intel_bw_init_hw(xe); + intel_bw_init_hw(display); intel_display_device_info_runtime_init(display); @@ -173,7 +170,7 @@ static void xe_display_fini(void *arg) struct xe_device *xe = arg; struct intel_display *display = &xe->display; - intel_hpd_poll_fini(xe); + intel_hpd_poll_fini(display); intel_hdcp_component_fini(display); intel_audio_deinit(display); intel_display_driver_remove(display); @@ -220,11 +217,13 @@ void xe_display_unregister(struct xe_device *xe) void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl) { + struct intel_display *display = &xe->display; + if (!xe->info.probe_display) return; if (master_ctl & DISPLAY_IRQ) - gen11_display_irq_handler(xe); + gen11_display_irq_handler(display); } void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) @@ -240,19 +239,23 @@ void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) void xe_display_irq_reset(struct xe_device *xe) { + struct intel_display *display = &xe->display; + if (!xe->info.probe_display) return; - gen11_display_irq_reset(xe); + gen11_display_irq_reset(display); } void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) { + struct intel_display *display = &xe->display; + if (!xe->info.probe_display) return; if (gt->info.id == XE_GT0) - gen11_de_irq_postinstall(xe); + gen11_de_irq_postinstall(display); } static bool suspend_to_idle(void) @@ -305,7 +308,7 @@ static void xe_display_enable_d3cold(struct xe_device *xe) intel_dmc_suspend(display); if (has_display(xe)) - intel_hpd_poll_enable(xe); + intel_hpd_poll_enable(display); } static void xe_display_disable_d3cold(struct xe_device *xe) @@ -322,10 +325,10 @@ static void xe_display_disable_d3cold(struct xe_device *xe) intel_display_driver_init_hw(display); - intel_hpd_init(xe); + intel_hpd_init(display); if (has_display(xe)) - intel_hpd_poll_disable(xe); + intel_hpd_poll_disable(display); intel_opregion_resume(display); @@ -355,7 +358,7 @@ void xe_display_pm_suspend(struct xe_device *xe) xe_display_flush_cleanup_work(xe); - intel_hpd_cancel_work(xe); + intel_hpd_cancel_work(display); if (has_display(xe)) { intel_display_driver_suspend_access(display); @@ -385,7 +388,7 @@ void xe_display_pm_shutdown(struct xe_device *xe) xe_display_flush_cleanup_work(xe); intel_dp_mst_suspend(display); - intel_hpd_cancel_work(xe); + intel_hpd_cancel_work(display); if (has_display(xe)) intel_display_driver_suspend_access(display); @@ -400,6 +403,8 @@ void xe_display_pm_shutdown(struct xe_device *xe) void xe_display_pm_runtime_suspend(struct xe_device *xe) { + struct intel_display *display = &xe->display; + if (!xe->info.probe_display) return; @@ -408,7 +413,7 @@ void xe_display_pm_runtime_suspend(struct xe_device *xe) return; } - intel_hpd_poll_enable(xe); + intel_hpd_poll_enable(display); } void xe_display_pm_suspend_late(struct xe_device *xe) @@ -482,7 +487,7 @@ void xe_display_pm_resume(struct xe_device *xe) if (has_display(xe)) intel_display_driver_resume_access(display); - intel_hpd_init(xe); + intel_hpd_init(display); if (has_display(xe)) { intel_display_driver_resume(display); @@ -491,7 +496,7 @@ void xe_display_pm_resume(struct xe_device *xe) } if (has_display(xe)) - intel_hpd_poll_disable(xe); + intel_hpd_poll_disable(display); intel_opregion_resume(display); @@ -502,6 +507,8 @@ void xe_display_pm_resume(struct xe_device *xe) void xe_display_pm_runtime_resume(struct xe_device *xe) { + struct intel_display *display = &xe->display; + if (!xe->info.probe_display) return; @@ -510,9 +517,9 @@ void xe_display_pm_runtime_resume(struct xe_device *xe) return; } - intel_hpd_init(xe); - intel_hpd_poll_disable(xe); - skl_watermark_ipc_update(xe); + intel_hpd_init(display); + intel_hpd_poll_disable(display); + skl_watermark_ipc_update(display); } diff --git a/drivers/gpu/drm/xe/display/xe_display_rpm.c b/drivers/gpu/drm/xe/display/xe_display_rpm.c new file mode 100644 index 000000000000..1955153aadba --- /dev/null +++ b/drivers/gpu/drm/xe/display/xe_display_rpm.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: MIT +/* Copyright © 2025 Intel Corporation */ + +#include "intel_display_rpm.h" +#include "xe_device_types.h" +#include "xe_pm.h" + +static struct xe_device *display_to_xe(struct intel_display *display) +{ + return container_of(display, struct xe_device, display); +} + +struct ref_tracker *intel_display_rpm_get_raw(struct intel_display *display) +{ + return intel_display_rpm_get(display); +} + +void intel_display_rpm_put_raw(struct intel_display *display, struct ref_tracker *wakeref) +{ + intel_display_rpm_put(display, wakeref); +} + +struct ref_tracker *intel_display_rpm_get(struct intel_display *display) +{ + return xe_pm_runtime_resume_and_get(display_to_xe(display)) ? INTEL_WAKEREF_DEF : NULL; +} + +struct ref_tracker *intel_display_rpm_get_if_in_use(struct intel_display *display) +{ + return xe_pm_runtime_get_if_in_use(display_to_xe(display)) ? INTEL_WAKEREF_DEF : NULL; +} + +struct ref_tracker *intel_display_rpm_get_noresume(struct intel_display *display) +{ + xe_pm_runtime_get_noresume(display_to_xe(display)); + + return INTEL_WAKEREF_DEF; +} + +void intel_display_rpm_put(struct intel_display *display, struct ref_tracker *wakeref) +{ + if (wakeref) + xe_pm_runtime_put(display_to_xe(display)); +} + +void intel_display_rpm_put_unchecked(struct intel_display *display) +{ + xe_pm_runtime_put(display_to_xe(display)); +} + +bool intel_display_rpm_suspended(struct intel_display *display) +{ + struct xe_device *xe = display_to_xe(display); + + return pm_runtime_suspended(xe->drm.dev); +} + +void assert_display_rpm_held(struct intel_display *display) +{ + /* FIXME */ +} + +void intel_display_rpm_assert_block(struct intel_display *display) +{ + /* FIXME */ +} + +void intel_display_rpm_assert_unblock(struct intel_display *display) +{ + /* FIXME */ +} diff --git a/drivers/gpu/drm/xe/display/xe_display_rps.c b/drivers/gpu/drm/xe/display/xe_display_rps.c deleted file mode 100644 index fa616f9688a5..000000000000 --- a/drivers/gpu/drm/xe/display/xe_display_rps.c +++ /dev/null @@ -1,17 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2023 Intel Corporation - */ - -#include "intel_display_rps.h" - -void intel_display_rps_boost_after_vblank(struct drm_crtc *crtc, - struct dma_fence *fence) -{ -} - -void intel_display_rps_mark_interactive(struct intel_display *display, - struct intel_atomic_state *state, - bool interactive) -{ -} diff --git a/drivers/gpu/drm/xe/display/xe_display_wa.c b/drivers/gpu/drm/xe/display/xe_display_wa.c index 68e3d1959ad6..2933ca97d673 100644 --- a/drivers/gpu/drm/xe/display/xe_display_wa.c +++ b/drivers/gpu/drm/xe/display/xe_display_wa.c @@ -10,7 +10,9 @@ #include <generated/xe_wa_oob.h> -bool intel_display_needs_wa_16023588340(struct drm_i915_private *i915) +bool intel_display_needs_wa_16023588340(struct intel_display *display) { - return XE_WA(xe_root_mmio_gt(i915), 16023588340); + struct xe_device *xe = to_xe_device(display->drm); + + return XE_WA(xe_root_mmio_gt(xe), 16023588340); } diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index 7c02323e9531..b35a6f201d4a 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -9,7 +9,6 @@ #include "abi/gsc_command_header_abi.h" #include "intel_hdcp_gsc.h" -#include "intel_hdcp_gsc_message.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_device_types.h" @@ -22,7 +21,8 @@ #define HECI_MEADDRESS_HDCP 18 -struct intel_hdcp_gsc_message { +struct intel_hdcp_gsc_context { + struct xe_device *xe; struct xe_bo *hdcp_bo; u64 hdcp_cmd_in; u64 hdcp_cmd_out; @@ -30,14 +30,9 @@ struct intel_hdcp_gsc_message { #define HDCP_GSC_HEADER_SIZE sizeof(struct intel_gsc_mtl_header) -bool intel_hdcp_gsc_cs_required(struct intel_display *display) +bool intel_hdcp_gsc_check_status(struct drm_device *drm) { - return DISPLAY_VER(display) >= 14; -} - -bool intel_hdcp_gsc_check_status(struct intel_display *display) -{ - struct xe_device *xe = to_xe_device(display->drm); + struct xe_device *xe = to_xe_device(drm); struct xe_tile *tile = xe_device_get_root_tile(xe); struct xe_gt *gt = tile->media_gt; struct xe_gsc *gsc = >->uc.gsc; @@ -69,10 +64,9 @@ out: } /*This function helps allocate memory for the command that we will send to gsc cs */ -static int intel_hdcp_gsc_initialize_message(struct intel_display *display, - struct intel_hdcp_gsc_message *hdcp_message) +static int intel_hdcp_gsc_initialize_message(struct xe_device *xe, + struct intel_hdcp_gsc_context *gsc_context) { - struct xe_device *xe = to_xe_device(display->drm); struct xe_bo *bo = NULL; u64 cmd_in, cmd_out; int ret = 0; @@ -84,7 +78,7 @@ static int intel_hdcp_gsc_initialize_message(struct intel_display *display, XE_BO_FLAG_GGTT); if (IS_ERR(bo)) { - drm_err(display->drm, "Failed to allocate bo for HDCP streaming command!\n"); + drm_err(&xe->drm, "Failed to allocate bo for HDCP streaming command!\n"); ret = PTR_ERR(bo); goto out; } @@ -93,104 +87,60 @@ static int intel_hdcp_gsc_initialize_message(struct intel_display *display, cmd_out = cmd_in + PAGE_SIZE; xe_map_memset(xe, &bo->vmap, 0, 0, bo->size); - hdcp_message->hdcp_bo = bo; - hdcp_message->hdcp_cmd_in = cmd_in; - hdcp_message->hdcp_cmd_out = cmd_out; + gsc_context->hdcp_bo = bo; + gsc_context->hdcp_cmd_in = cmd_in; + gsc_context->hdcp_cmd_out = cmd_out; + gsc_context->xe = xe; + out: return ret; } -static int intel_hdcp_gsc_hdcp2_init(struct intel_display *display) +struct intel_hdcp_gsc_context *intel_hdcp_gsc_context_alloc(struct drm_device *drm) { - struct intel_hdcp_gsc_message *hdcp_message; + struct xe_device *xe = to_xe_device(drm); + struct intel_hdcp_gsc_context *gsc_context; int ret; - hdcp_message = kzalloc(sizeof(*hdcp_message), GFP_KERNEL); - - if (!hdcp_message) - return -ENOMEM; + gsc_context = kzalloc(sizeof(*gsc_context), GFP_KERNEL); + if (!gsc_context) + return ERR_PTR(-ENOMEM); /* * NOTE: No need to lock the comp mutex here as it is already * going to be taken before this function called */ - ret = intel_hdcp_gsc_initialize_message(display, hdcp_message); + ret = intel_hdcp_gsc_initialize_message(xe, gsc_context); if (ret) { - drm_err(display->drm, "Could not initialize hdcp_message\n"); - kfree(hdcp_message); - return ret; + drm_err(&xe->drm, "Could not initialize gsc_context\n"); + kfree(gsc_context); + gsc_context = ERR_PTR(ret); } - display->hdcp.hdcp_message = hdcp_message; - return ret; -} - -static const struct i915_hdcp_ops gsc_hdcp_ops = { - .initiate_hdcp2_session = intel_hdcp_gsc_initiate_session, - .verify_receiver_cert_prepare_km = - intel_hdcp_gsc_verify_receiver_cert_prepare_km, - .verify_hprime = intel_hdcp_gsc_verify_hprime, - .store_pairing_info = intel_hdcp_gsc_store_pairing_info, - .initiate_locality_check = intel_hdcp_gsc_initiate_locality_check, - .verify_lprime = intel_hdcp_gsc_verify_lprime, - .get_session_key = intel_hdcp_gsc_get_session_key, - .repeater_check_flow_prepare_ack = - intel_hdcp_gsc_repeater_check_flow_prepare_ack, - .verify_mprime = intel_hdcp_gsc_verify_mprime, - .enable_hdcp_authentication = intel_hdcp_gsc_enable_authentication, - .close_hdcp_session = intel_hdcp_gsc_close_session, -}; - -int intel_hdcp_gsc_init(struct intel_display *display) -{ - struct i915_hdcp_arbiter *data; - int ret; - - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) - return -ENOMEM; - - mutex_lock(&display->hdcp.hdcp_mutex); - display->hdcp.arbiter = data; - display->hdcp.arbiter->hdcp_dev = display->drm->dev; - display->hdcp.arbiter->ops = &gsc_hdcp_ops; - ret = intel_hdcp_gsc_hdcp2_init(display); - if (ret) - kfree(data); - - mutex_unlock(&display->hdcp.hdcp_mutex); - - return ret; + return gsc_context; } -void intel_hdcp_gsc_fini(struct intel_display *display) +void intel_hdcp_gsc_context_free(struct intel_hdcp_gsc_context *gsc_context) { - struct intel_hdcp_gsc_message *hdcp_message = - display->hdcp.hdcp_message; - struct i915_hdcp_arbiter *arb = display->hdcp.arbiter; - - if (hdcp_message) { - xe_bo_unpin_map_no_vm(hdcp_message->hdcp_bo); - kfree(hdcp_message); - display->hdcp.hdcp_message = NULL; - } + if (!gsc_context) + return; - kfree(arb); - display->hdcp.arbiter = NULL; + xe_bo_unpin_map_no_vm(gsc_context->hdcp_bo); + kfree(gsc_context); } static int xe_gsc_send_sync(struct xe_device *xe, - struct intel_hdcp_gsc_message *hdcp_message, + struct intel_hdcp_gsc_context *gsc_context, u32 msg_size_in, u32 msg_size_out, u32 addr_out_off) { - struct xe_gt *gt = hdcp_message->hdcp_bo->tile->media_gt; - struct iosys_map *map = &hdcp_message->hdcp_bo->vmap; + struct xe_gt *gt = gsc_context->hdcp_bo->tile->media_gt; + struct iosys_map *map = &gsc_context->hdcp_bo->vmap; struct xe_gsc *gsc = >->uc.gsc; int ret; - ret = xe_gsc_pkt_submit_kernel(gsc, hdcp_message->hdcp_cmd_in, msg_size_in, - hdcp_message->hdcp_cmd_out, msg_size_out); + ret = xe_gsc_pkt_submit_kernel(gsc, gsc_context->hdcp_cmd_in, msg_size_in, + gsc_context->hdcp_cmd_out, msg_size_out); if (ret) { drm_err(&xe->drm, "failed to send gsc HDCP msg (%d)\n", ret); return ret; @@ -205,12 +155,12 @@ static int xe_gsc_send_sync(struct xe_device *xe, return ret; } -ssize_t intel_hdcp_gsc_msg_send(struct xe_device *xe, u8 *msg_in, - size_t msg_in_len, u8 *msg_out, - size_t msg_out_len) +ssize_t intel_hdcp_gsc_msg_send(struct intel_hdcp_gsc_context *gsc_context, + void *msg_in, size_t msg_in_len, + void *msg_out, size_t msg_out_len) { + struct xe_device *xe = gsc_context->xe; const size_t max_msg_size = PAGE_SIZE - HDCP_GSC_HEADER_SIZE; - struct intel_hdcp_gsc_message *hdcp_message; u64 host_session_id; u32 msg_size_in, msg_size_out; u32 addr_out_off, addr_in_wr_off = 0; @@ -223,15 +173,14 @@ ssize_t intel_hdcp_gsc_msg_send(struct xe_device *xe, u8 *msg_in, msg_size_in = msg_in_len + HDCP_GSC_HEADER_SIZE; msg_size_out = msg_out_len + HDCP_GSC_HEADER_SIZE; - hdcp_message = xe->display.hdcp.hdcp_message; addr_out_off = PAGE_SIZE; host_session_id = xe_gsc_create_host_session_id(); xe_pm_runtime_get_noresume(xe); - addr_in_wr_off = xe_gsc_emit_header(xe, &hdcp_message->hdcp_bo->vmap, + addr_in_wr_off = xe_gsc_emit_header(xe, &gsc_context->hdcp_bo->vmap, addr_in_wr_off, HECI_MEADDRESS_HDCP, host_session_id, msg_in_len); - xe_map_memcpy_to(xe, &hdcp_message->hdcp_bo->vmap, addr_in_wr_off, + xe_map_memcpy_to(xe, &gsc_context->hdcp_bo->vmap, addr_in_wr_off, msg_in, msg_in_len); /* * Keep sending request in case the pending bit is set no need to add @@ -240,7 +189,7 @@ ssize_t intel_hdcp_gsc_msg_send(struct xe_device *xe, u8 *msg_in, * 20 times each message 50 ms apart */ do { - ret = xe_gsc_send_sync(xe, hdcp_message, msg_size_in, msg_size_out, + ret = xe_gsc_send_sync(xe, gsc_context, msg_size_in, msg_size_out, addr_out_off); /* Only try again if gsc says so */ @@ -254,7 +203,7 @@ ssize_t intel_hdcp_gsc_msg_send(struct xe_device *xe, u8 *msg_in, if (ret) goto out; - xe_map_memcpy_from(xe, msg_out, &hdcp_message->hdcp_bo->vmap, + xe_map_memcpy_from(xe, msg_out, &gsc_context->hdcp_bo->vmap, addr_out_off + HDCP_GSC_HEADER_SIZE, msg_out_len); diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c index 4ca0cb571194..6502b8274173 100644 --- a/drivers/gpu/drm/xe/display/xe_plane_initial.c +++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c @@ -83,7 +83,7 @@ initial_plane_bo(struct xe_device *xe, if (plane_config->size == 0) return NULL; - flags = XE_BO_FLAG_PINNED | XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT; + flags = XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT; base = round_down(plane_config->base, page_size); if (IS_DGFX(xe)) { diff --git a/drivers/gpu/drm/xe/instructions/xe_alu_commands.h b/drivers/gpu/drm/xe/instructions/xe_alu_commands.h new file mode 100644 index 000000000000..2987b10d3e16 --- /dev/null +++ b/drivers/gpu/drm/xe/instructions/xe_alu_commands.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_ALU_COMMANDS_H_ +#define _XE_ALU_COMMANDS_H_ + +#include "instructions/xe_instr_defs.h" + +/* Instruction Opcodes */ +#define CS_ALU_OPCODE_NOOP 0x000 +#define CS_ALU_OPCODE_FENCE_RD 0x001 +#define CS_ALU_OPCODE_FENCE_WR 0x002 +#define CS_ALU_OPCODE_LOAD 0x080 +#define CS_ALU_OPCODE_LOADINV 0x480 +#define CS_ALU_OPCODE_LOAD0 0x081 +#define CS_ALU_OPCODE_LOAD1 0x481 +#define CS_ALU_OPCODE_LOADIND 0x082 +#define CS_ALU_OPCODE_ADD 0x100 +#define CS_ALU_OPCODE_SUB 0x101 +#define CS_ALU_OPCODE_AND 0x102 +#define CS_ALU_OPCODE_OR 0x103 +#define CS_ALU_OPCODE_XOR 0x104 +#define CS_ALU_OPCODE_SHL 0x105 +#define CS_ALU_OPCODE_SHR 0x106 +#define CS_ALU_OPCODE_SAR 0x107 +#define CS_ALU_OPCODE_STORE 0x180 +#define CS_ALU_OPCODE_STOREINV 0x580 +#define CS_ALU_OPCODE_STOREIND 0x181 + +/* Instruction Operands */ +#define CS_ALU_OPERAND_REG(n) REG_FIELD_PREP(GENMASK(3, 0), (n)) +#define CS_ALU_OPERAND_REG0 0x0 +#define CS_ALU_OPERAND_REG1 0x1 +#define CS_ALU_OPERAND_REG2 0x2 +#define CS_ALU_OPERAND_REG3 0x3 +#define CS_ALU_OPERAND_REG4 0x4 +#define CS_ALU_OPERAND_REG5 0x5 +#define CS_ALU_OPERAND_REG6 0x6 +#define CS_ALU_OPERAND_REG7 0x7 +#define CS_ALU_OPERAND_REG8 0x8 +#define CS_ALU_OPERAND_REG9 0x9 +#define CS_ALU_OPERAND_REG10 0xa +#define CS_ALU_OPERAND_REG11 0xb +#define CS_ALU_OPERAND_REG12 0xc +#define CS_ALU_OPERAND_REG13 0xd +#define CS_ALU_OPERAND_REG14 0xe +#define CS_ALU_OPERAND_REG15 0xf +#define CS_ALU_OPERAND_SRCA 0x20 +#define CS_ALU_OPERAND_SRCB 0x21 +#define CS_ALU_OPERAND_ACCU 0x31 +#define CS_ALU_OPERAND_ZF 0x32 +#define CS_ALU_OPERAND_CF 0x33 +#define CS_ALU_OPERAND_NA 0 /* N/A operand */ + +/* Command Streamer ALU Instructions */ +#define CS_ALU_INSTR(opcode, op1, op2) (REG_FIELD_PREP(GENMASK(31, 20), (opcode)) | \ + REG_FIELD_PREP(GENMASK(19, 10), (op1)) | \ + REG_FIELD_PREP(GENMASK(9, 0), (op2))) + +#define __CS_ALU_INSTR(opcode, op1, op2) CS_ALU_INSTR(CS_ALU_OPCODE_##opcode, \ + CS_ALU_OPERAND_##op1, \ + CS_ALU_OPERAND_##op2) + +#define CS_ALU_INSTR_NOOP __CS_ALU_INSTR(NOOP, NA, NA) +#define CS_ALU_INSTR_LOAD(op1, op2) __CS_ALU_INSTR(LOAD, op1, op2) +#define CS_ALU_INSTR_LOADINV(op1, op2) __CS_ALU_INSTR(LOADINV, op1, op2) +#define CS_ALU_INSTR_LOAD0(op1) __CS_ALU_INSTR(LOAD0, op1, NA) +#define CS_ALU_INSTR_LOAD1(op1) __CS_ALU_INSTR(LOAD1, op1, NA) +#define CS_ALU_INSTR_ADD __CS_ALU_INSTR(ADD, NA, NA) +#define CS_ALU_INSTR_SUB __CS_ALU_INSTR(SUB, NA, NA) +#define CS_ALU_INSTR_AND __CS_ALU_INSTR(AND, NA, NA) +#define CS_ALU_INSTR_OR __CS_ALU_INSTR(OR, NA, NA) +#define CS_ALU_INSTR_XOR __CS_ALU_INSTR(XOR, NA, NA) +#define CS_ALU_INSTR_STORE(op1, op2) __CS_ALU_INSTR(STORE, op1, op2) +#define CS_ALU_INSTR_STOREINV(op1, op2) __CS_ALU_INSTR(STOREINV, op1, op2) + +#endif diff --git a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h index 31d28a67ef6a..457881af8af9 100644 --- a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h @@ -137,6 +137,7 @@ #define CMD_3DSTATE_CLIP_MESH GFXPIPE_3D_CMD(0x0, 0x81) #define CMD_3DSTATE_SBE_MESH GFXPIPE_3D_CMD(0x0, 0x82) #define CMD_3DSTATE_CPSIZE_CONTROL_BUFFER GFXPIPE_3D_CMD(0x0, 0x83) +#define CMD_3DSTATE_COARSE_PIXEL GFXPIPE_3D_CMD(0x0, 0x89) #define CMD_3DSTATE_DRAWING_RECTANGLE GFXPIPE_3D_CMD(0x1, 0x0) #define CMD_3DSTATE_CHROMA_KEY GFXPIPE_3D_CMD(0x1, 0x4) diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h index 5a47991b4b81..e3f5e8bb3ebc 100644 --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h @@ -32,6 +32,7 @@ #define MI_BATCH_BUFFER_END __MI_INSTR(0xA) #define MI_TOPOLOGY_FILTER __MI_INSTR(0xD) #define MI_FORCE_WAKEUP __MI_INSTR(0x1D) +#define MI_MATH(n) (__MI_INSTR(0x1A) | XE_INSTR_NUM_DW((n) + 1)) #define MI_STORE_DATA_IMM __MI_INSTR(0x20) #define MI_SDI_GGTT REG_BIT(22) @@ -65,6 +66,10 @@ #define MI_LOAD_REGISTER_MEM (__MI_INSTR(0x29) | XE_INSTR_NUM_DW(4)) #define MI_LRM_USE_GGTT REG_BIT(22) +#define MI_LOAD_REGISTER_REG (__MI_INSTR(0x2a) | XE_INSTR_NUM_DW(3)) +#define MI_LRR_DST_CS_MMIO REG_BIT(19) +#define MI_LRR_SRC_CS_MMIO REG_BIT(18) + #define MI_COPY_MEM_MEM (__MI_INSTR(0x2e) | XE_INSTR_NUM_DW(5)) #define MI_COPY_MEM_MEM_SRC_GGTT REG_BIT(22) #define MI_COPY_MEM_MEM_DST_GGTT REG_BIT(21) diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 891f928d80ce..7ade41e2b7b3 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -193,6 +193,10 @@ #define PREEMPT_GPGPU_LEVEL_MASK PREEMPT_GPGPU_LEVEL(1, 1) #define PREEMPT_3D_OBJECT_LEVEL REG_BIT(0) +#define CS_GPR_DATA(base, n) XE_REG((base) + 0x600 + (n) * 4) +#define CS_GPR_REG(base, n) CS_GPR_DATA((base), (n) * 2) +#define CS_GPR_REG_UDW(base, n) CS_GPR_DATA((base), (n) * 2 + 1) + #define VDBOX_CGCTL3F08(base) XE_REG((base) + 0x3f08) #define CG3DDISHRS_CLKGATE_DIS REG_BIT(5) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 181913967ac9..5cd5ab8529c5 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -62,7 +62,6 @@ #define LE_SSE_MASK REG_GENMASK(18, 17) #define LE_SSE(value) REG_FIELD_PREP(LE_SSE_MASK, value) #define LE_COS_MASK REG_GENMASK(16, 15) -#define LE_COS(value) REG_FIELD_PREP(LE_COS_MASK) #define LE_SCF_MASK REG_BIT(14) #define LE_SCF(value) REG_FIELD_PREP(LE_SCF_MASK, value) #define LE_PFM_MASK REG_GENMASK(13, 11) @@ -393,6 +392,18 @@ #define XEHP_L3NODEARBCFG XE_REG_MCR(0xb0b4) #define XEHP_LNESPARE REG_BIT(19) +#define LSN_VC_REG2 XE_REG_MCR(0xb0c8) +#define LSN_LNI_WGT_MASK REG_GENMASK(31, 28) +#define LSN_LNI_WGT(value) REG_FIELD_PREP(LSN_LNI_WGT_MASK, value) +#define LSN_LNE_WGT_MASK REG_GENMASK(27, 24) +#define LSN_LNE_WGT(value) REG_FIELD_PREP(LSN_LNE_WGT_MASK, value) +#define LSN_DIM_X_WGT_MASK REG_GENMASK(23, 20) +#define LSN_DIM_X_WGT(value) REG_FIELD_PREP(LSN_DIM_X_WGT_MASK, value) +#define LSN_DIM_Y_WGT_MASK REG_GENMASK(19, 16) +#define LSN_DIM_Y_WGT(value) REG_FIELD_PREP(LSN_DIM_Y_WGT_MASK, value) +#define LSN_DIM_Z_WGT_MASK REG_GENMASK(15, 12) +#define LSN_DIM_Z_WGT(value) REG_FIELD_PREP(LSN_DIM_Z_WGT_MASK, value) + #define L3SQCREG2 XE_REG_MCR(0xb104) #define COMPMEMRD256BOVRFETCHEN REG_BIT(20) diff --git a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h index 8846eb9ce2a4..c7d5d782e3f9 100644 --- a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h @@ -21,6 +21,9 @@ #define BMG_PACKAGE_POWER_SKU XE_REG(0x138098) #define BMG_PACKAGE_POWER_SKU_UNIT XE_REG(0x1380dc) #define BMG_PACKAGE_ENERGY_STATUS XE_REG(0x138120) +#define BMG_FAN_1_SPEED XE_REG(0x138140) +#define BMG_FAN_2_SPEED XE_REG(0x138170) +#define BMG_FAN_3_SPEED XE_REG(0x1381a0) #define BMG_VRAM_TEMPERATURE XE_REG(0x1382c0) #define BMG_PACKAGE_TEMPERATURE XE_REG(0x138434) #define BMG_PACKAGE_RAPL_LIMIT XE_REG(0x138440) diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 9fde67ca989f..378dcd0fb414 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -60,7 +60,7 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, } /* Evict to system. CCS data should be copied. */ - ret = xe_bo_evict(bo, true); + ret = xe_bo_evict(bo); if (ret) { KUNIT_FAIL(test, "Failed to evict bo.\n"); return ret; @@ -252,7 +252,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc for_each_gt(__gt, xe, id) xe_gt_sanitize(__gt); - err = xe_bo_restore_kernel(xe); + err = xe_bo_restore_early(xe); /* * Snapshotting the CTB and copying back a potentially old * version seems risky, depending on what might have been @@ -273,7 +273,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc goto cleanup_all; } - err = xe_bo_restore_user(xe); + err = xe_bo_restore_late(xe); if (err) { KUNIT_FAIL(test, "restore user err=%pe\n", ERR_PTR(err)); goto cleanup_all; diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c index cedd3e88a6fb..c53f67ce4b0a 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -65,7 +65,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported, * the exporter and the importer should be the same bo. */ swap(exported->ttm.base.dma_buf, dmabuf); - ret = xe_bo_evict(exported, true); + ret = xe_bo_evict(exported); swap(exported->ttm.base.dma_buf, dmabuf); if (ret) { if (ret != -EINTR && ret != -ERESTARTSYS) diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index d5fe0ea889ad..4a65e3103f77 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -202,8 +202,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) big = xe_bo_create_pin_map(xe, tile, m->q->vm, SZ_4M, ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_PINNED); + XE_BO_FLAG_VRAM_IF_DGFX(tile)); if (IS_ERR(big)) { KUNIT_FAIL(test, "Failed to allocate bo: %li\n", PTR_ERR(big)); goto vunmap; @@ -211,8 +210,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) pt = xe_bo_create_pin_map(xe, tile, m->q->vm, XE_PAGE_SIZE, ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_PINNED); + XE_BO_FLAG_VRAM_IF_DGFX(tile)); if (IS_ERR(pt)) { KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", PTR_ERR(pt)); @@ -222,8 +220,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) tiny = xe_bo_create_pin_map(xe, tile, m->q->vm, 2 * SZ_4K, ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_PINNED); + XE_BO_FLAG_VRAM_IF_DGFX(tile)); if (IS_ERR(tiny)) { KUNIT_FAIL(test, "Failed to allocate tiny fake pt: %li\n", PTR_ERR(tiny)); @@ -512,7 +509,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, dma_fence_put(fence); kunit_info(test, "Evict vram buffer object\n"); - ret = xe_bo_evict(vram_bo, true); + ret = xe_bo_evict(vram_bo); if (ret) { KUNIT_FAIL(test, "Failed to evict bo.\n"); return; diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 64f9c936eea0..d99d91fe8aa9 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -55,6 +55,8 @@ static struct ttm_placement sys_placement = { .placement = &sys_placement_flags, }; +static struct ttm_placement purge_placement; + static const struct ttm_place tt_placement_flags[] = { { .fpfn = 0, @@ -189,11 +191,18 @@ static void try_add_system(struct xe_device *xe, struct xe_bo *bo, static bool force_contiguous(u32 bo_flags) { + if (bo_flags & XE_BO_FLAG_STOLEN) + return true; /* users expect this */ + else if (bo_flags & XE_BO_FLAG_PINNED && + !(bo_flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) + return true; /* needs vmap */ + /* * For eviction / restore on suspend / resume objects pinned in VRAM * must be contiguous, also only contiguous BOs support xe_bo_vmap. */ - return bo_flags & (XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT); + return bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS && + bo_flags & XE_BO_FLAG_PINNED; } static void add_vram(struct xe_device *xe, struct xe_bo *bo, @@ -281,6 +290,8 @@ int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, static void xe_evict_flags(struct ttm_buffer_object *tbo, struct ttm_placement *placement) { + struct xe_device *xe = container_of(tbo->bdev, typeof(*xe), ttm); + bool device_unplugged = drm_dev_is_unplugged(&xe->drm); struct xe_bo *bo; if (!xe_bo_is_xe_bo(tbo)) { @@ -290,7 +301,7 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo, return; } - *placement = sys_placement; + *placement = device_unplugged ? purge_placement : sys_placement; return; } @@ -300,6 +311,11 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo, return; } + if (device_unplugged && !tbo->base.dma_buf) { + *placement = purge_placement; + return; + } + /* * For xe, sg bos that are evicted to system just triggers a * rebind of the sg list upon subsequent validation to XE_PL_TT. @@ -657,11 +673,20 @@ static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo, struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt, ttm); struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); + bool device_unplugged = drm_dev_is_unplugged(&xe->drm); struct sg_table *sg; xe_assert(xe, attach); xe_assert(xe, ttm_bo->ttm); + if (device_unplugged && new_res->mem_type == XE_PL_SYSTEM && + ttm_bo->sg) { + dma_resv_wait_timeout(ttm_bo->base.resv, DMA_RESV_USAGE_BOOKKEEP, + false, MAX_SCHEDULE_TIMEOUT); + dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL); + ttm_bo->sg = NULL; + } + if (new_res->mem_type == XE_PL_SYSTEM) goto out; @@ -898,79 +923,44 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, xe_pm_runtime_get_noresume(xe); } - if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) { - /* - * Kernel memory that is pinned should only be moved on suspend - * / resume, some of the pinned memory is required for the - * device to resume / use the GPU to move other evicted memory - * (user memory) around. This likely could be optimized a bit - * further where we find the minimum set of pinned memory - * required for resume but for simplity doing a memcpy for all - * pinned memory. - */ - ret = xe_bo_vmap(bo); - if (!ret) { - ret = ttm_bo_move_memcpy(ttm_bo, ctx, new_mem); - - /* Create a new VMAP once kernel BO back in VRAM */ - if (!ret && resource_is_vram(new_mem)) { - struct xe_vram_region *vram = res_to_mem_region(new_mem); - void __iomem *new_addr = vram->mapping + - (new_mem->start << PAGE_SHIFT); + if (move_lacks_source) { + u32 flags = 0; - if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) { - ret = -EINVAL; - xe_pm_runtime_put(xe); - goto out; - } + if (mem_type_is_vram(new_mem->mem_type)) + flags |= XE_MIGRATE_CLEAR_FLAG_FULL; + else if (handle_system_ccs) + flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA; - xe_assert(xe, new_mem->start == - bo->placements->fpfn); - - iosys_map_set_vaddr_iomem(&bo->vmap, new_addr); - } - } + fence = xe_migrate_clear(migrate, bo, new_mem, flags); } else { - if (move_lacks_source) { - u32 flags = 0; - - if (mem_type_is_vram(new_mem->mem_type)) - flags |= XE_MIGRATE_CLEAR_FLAG_FULL; - else if (handle_system_ccs) - flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA; - - fence = xe_migrate_clear(migrate, bo, new_mem, flags); - } - else - fence = xe_migrate_copy(migrate, bo, bo, old_mem, - new_mem, handle_system_ccs); - if (IS_ERR(fence)) { - ret = PTR_ERR(fence); - xe_pm_runtime_put(xe); - goto out; - } - if (!move_lacks_source) { - ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict, - true, new_mem); - if (ret) { - dma_fence_wait(fence, false); - ttm_bo_move_null(ttm_bo, new_mem); - ret = 0; - } - } else { - /* - * ttm_bo_move_accel_cleanup() may blow up if - * bo->resource == NULL, so just attach the - * fence and set the new resource. - */ - dma_resv_add_fence(ttm_bo->base.resv, fence, - DMA_RESV_USAGE_KERNEL); + fence = xe_migrate_copy(migrate, bo, bo, old_mem, new_mem, + handle_system_ccs); + } + if (IS_ERR(fence)) { + ret = PTR_ERR(fence); + xe_pm_runtime_put(xe); + goto out; + } + if (!move_lacks_source) { + ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict, true, + new_mem); + if (ret) { + dma_fence_wait(fence, false); ttm_bo_move_null(ttm_bo, new_mem); + ret = 0; } - - dma_fence_put(fence); + } else { + /* + * ttm_bo_move_accel_cleanup() may blow up if + * bo->resource == NULL, so just attach the + * fence and set the new resource. + */ + dma_resv_add_fence(ttm_bo->base.resv, fence, + DMA_RESV_USAGE_KERNEL); + ttm_bo_move_null(ttm_bo, new_mem); } + dma_fence_put(fence); xe_pm_runtime_put(xe); out: @@ -1095,6 +1085,80 @@ out_unref: } /** + * xe_bo_notifier_prepare_pinned() - Prepare a pinned VRAM object to be backed + * up in system memory. + * @bo: The buffer object to prepare. + * + * On successful completion, the object backup pages are allocated. Expectation + * is that this is called from the PM notifier, prior to suspend/hibernation. + * + * Return: 0 on success. Negative error code on failure. + */ +int xe_bo_notifier_prepare_pinned(struct xe_bo *bo) +{ + struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); + struct xe_bo *backup; + int ret = 0; + + xe_bo_lock(bo, false); + + xe_assert(xe, !bo->backup_obj); + + /* + * Since this is called from the PM notifier we might have raced with + * someone unpinning this after we dropped the pinned list lock and + * grabbing the above bo lock. + */ + if (!xe_bo_is_pinned(bo)) + goto out_unlock_bo; + + if (!xe_bo_is_vram(bo)) + goto out_unlock_bo; + + if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE) + goto out_unlock_bo; + + backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size, + DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | + XE_BO_FLAG_PINNED); + if (IS_ERR(backup)) { + ret = PTR_ERR(backup); + goto out_unlock_bo; + } + + backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */ + ttm_bo_pin(&backup->ttm); + bo->backup_obj = backup; + +out_unlock_bo: + xe_bo_unlock(bo); + return ret; +} + +/** + * xe_bo_notifier_unprepare_pinned() - Undo the previous prepare operation. + * @bo: The buffer object to undo the prepare for. + * + * Always returns 0. The backup object is removed, if still present. Expectation + * it that this called from the PM notifier when undoing the prepare step. + * + * Return: Always returns 0. + */ +int xe_bo_notifier_unprepare_pinned(struct xe_bo *bo) +{ + xe_bo_lock(bo, false); + if (bo->backup_obj) { + ttm_bo_unpin(&bo->backup_obj->ttm); + xe_bo_put(bo->backup_obj); + bo->backup_obj = NULL; + } + xe_bo_unlock(bo); + + return 0; +} + +/** * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory * @bo: The buffer object to move. * @@ -1107,59 +1171,99 @@ out_unref: */ int xe_bo_evict_pinned(struct xe_bo *bo) { - struct ttm_place place = { - .mem_type = XE_PL_TT, - }; - struct ttm_placement placement = { - .placement = &place, - .num_placement = 1, - }; - struct ttm_operation_ctx ctx = { - .interruptible = false, - .gfp_retry_mayfail = true, - }; - struct ttm_resource *new_mem; - int ret; + struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); + struct xe_bo *backup = bo->backup_obj; + bool backup_created = false; + bool unmap = false; + int ret = 0; - xe_bo_assert_held(bo); + xe_bo_lock(bo, false); - if (WARN_ON(!bo->ttm.resource)) - return -EINVAL; + if (WARN_ON(!bo->ttm.resource)) { + ret = -EINVAL; + goto out_unlock_bo; + } - if (WARN_ON(!xe_bo_is_pinned(bo))) - return -EINVAL; + if (WARN_ON(!xe_bo_is_pinned(bo))) { + ret = -EINVAL; + goto out_unlock_bo; + } if (!xe_bo_is_vram(bo)) - return 0; + goto out_unlock_bo; + + if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE) + goto out_unlock_bo; + + if (!backup) { + backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size, + DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | + XE_BO_FLAG_PINNED); + if (IS_ERR(backup)) { + ret = PTR_ERR(backup); + goto out_unlock_bo; + } + backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */ + backup_created = true; + } - ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem, &ctx); - if (ret) - return ret; + if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) { + struct xe_migrate *migrate; + struct dma_fence *fence; + + if (bo->tile) + migrate = bo->tile->migrate; + else + migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type); + + ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1); + if (ret) + goto out_backup; + + ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1); + if (ret) + goto out_backup; - if (!bo->ttm.ttm) { - bo->ttm.ttm = xe_ttm_tt_create(&bo->ttm, 0); - if (!bo->ttm.ttm) { - ret = -ENOMEM; - goto err_res_free; + fence = xe_migrate_copy(migrate, bo, backup, bo->ttm.resource, + backup->ttm.resource, false); + if (IS_ERR(fence)) { + ret = PTR_ERR(fence); + goto out_backup; } - } - ret = ttm_bo_populate(&bo->ttm, &ctx); - if (ret) - goto err_res_free; + dma_resv_add_fence(bo->ttm.base.resv, fence, + DMA_RESV_USAGE_KERNEL); + dma_resv_add_fence(backup->ttm.base.resv, fence, + DMA_RESV_USAGE_KERNEL); + dma_fence_put(fence); + } else { + ret = xe_bo_vmap(backup); + if (ret) + goto out_backup; - ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1); - if (ret) - goto err_res_free; + if (iosys_map_is_null(&bo->vmap)) { + ret = xe_bo_vmap(bo); + if (ret) + goto out_backup; + unmap = true; + } - ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL); - if (ret) - goto err_res_free; + xe_map_memcpy_from(xe, backup->vmap.vaddr, &bo->vmap, 0, + bo->size); + } - return 0; + if (!bo->backup_obj) + bo->backup_obj = backup; -err_res_free: - ttm_resource_free(&bo->ttm, &new_mem); +out_backup: + xe_bo_vunmap(backup); + if (ret && backup_created) + xe_bo_put(backup); +out_unlock_bo: + if (unmap) + xe_bo_vunmap(bo); + xe_bo_unlock(bo); return ret; } @@ -1180,50 +1284,109 @@ int xe_bo_restore_pinned(struct xe_bo *bo) .interruptible = false, .gfp_retry_mayfail = false, }; - struct ttm_resource *new_mem; - struct ttm_place *place = &bo->placements[0]; + struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); + struct xe_bo *backup = bo->backup_obj; + bool unmap = false; int ret; - xe_bo_assert_held(bo); + if (!backup) + return 0; - if (WARN_ON(!bo->ttm.resource)) - return -EINVAL; + xe_bo_lock(bo, false); - if (WARN_ON(!xe_bo_is_pinned(bo))) - return -EINVAL; + if (!xe_bo_is_pinned(backup)) { + ret = ttm_bo_validate(&backup->ttm, &backup->placement, &ctx); + if (ret) + goto out_unlock_bo; + } - if (WARN_ON(xe_bo_is_vram(bo))) - return -EINVAL; + if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) { + struct xe_migrate *migrate; + struct dma_fence *fence; - if (WARN_ON(!bo->ttm.ttm && !xe_bo_is_stolen(bo))) - return -EINVAL; + if (bo->tile) + migrate = bo->tile->migrate; + else + migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type); - if (!mem_type_is_vram(place->mem_type)) - return 0; + ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1); + if (ret) + goto out_unlock_bo; - ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem, &ctx); - if (ret) - return ret; + ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1); + if (ret) + goto out_unlock_bo; - ret = ttm_bo_populate(&bo->ttm, &ctx); - if (ret) - goto err_res_free; + fence = xe_migrate_copy(migrate, backup, bo, + backup->ttm.resource, bo->ttm.resource, + false); + if (IS_ERR(fence)) { + ret = PTR_ERR(fence); + goto out_unlock_bo; + } - ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1); - if (ret) - goto err_res_free; + dma_resv_add_fence(bo->ttm.base.resv, fence, + DMA_RESV_USAGE_KERNEL); + dma_resv_add_fence(backup->ttm.base.resv, fence, + DMA_RESV_USAGE_KERNEL); + dma_fence_put(fence); + } else { + ret = xe_bo_vmap(backup); + if (ret) + goto out_unlock_bo; - ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL); - if (ret) - goto err_res_free; + if (iosys_map_is_null(&bo->vmap)) { + ret = xe_bo_vmap(bo); + if (ret) + goto out_backup; + unmap = true; + } - return 0; + xe_map_memcpy_to(xe, &bo->vmap, 0, backup->vmap.vaddr, + bo->size); + } + + bo->backup_obj = NULL; -err_res_free: - ttm_resource_free(&bo->ttm, &new_mem); +out_backup: + xe_bo_vunmap(backup); + if (!bo->backup_obj) { + if (xe_bo_is_pinned(backup)) + ttm_bo_unpin(&backup->ttm); + xe_bo_put(backup); + } +out_unlock_bo: + if (unmap) + xe_bo_vunmap(bo); + xe_bo_unlock(bo); return ret; } +int xe_bo_dma_unmap_pinned(struct xe_bo *bo) +{ + struct ttm_buffer_object *ttm_bo = &bo->ttm; + struct ttm_tt *tt = ttm_bo->ttm; + + if (tt) { + struct xe_ttm_tt *xe_tt = container_of(tt, typeof(*xe_tt), ttm); + + if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) { + dma_buf_unmap_attachment(ttm_bo->base.import_attach, + ttm_bo->sg, + DMA_BIDIRECTIONAL); + ttm_bo->sg = NULL; + xe_tt->sg = NULL; + } else if (xe_tt->sg) { + dma_unmap_sgtable(xe_tt->xe->drm.dev, xe_tt->sg, + DMA_BIDIRECTIONAL, 0); + sg_free_table(xe_tt->sg); + xe_tt->sg = NULL; + } + } + + return 0; +} + static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo, unsigned long page_offset) { @@ -1371,6 +1534,7 @@ static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo, struct xe_res_cursor cursor; struct xe_vram_region *vram; int bytes_left = len; + int err = 0; xe_bo_assert_held(bo); xe_device_assert_mem_access(xe); @@ -1378,9 +1542,14 @@ static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo, if (!mem_type_is_vram(ttm_bo->resource->mem_type)) return -EIO; - /* FIXME: Use GPU for non-visible VRAM */ - if (!xe_ttm_resource_visible(ttm_bo->resource)) - return -EIO; + if (!xe_ttm_resource_visible(ttm_bo->resource) || len >= SZ_16K) { + struct xe_migrate *migrate = + mem_type_to_migrate(xe, ttm_bo->resource->mem_type); + + err = xe_migrate_access_memory(migrate, bo, offset, buf, len, + write); + goto out; + } vram = res_to_mem_region(ttm_bo->resource); xe_res_first(ttm_bo->resource, offset & PAGE_MASK, @@ -1404,7 +1573,8 @@ static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo, xe_res_next(&cursor, PAGE_SIZE); } while (bytes_left); - return len; +out: + return err ?: len; } const struct ttm_device_funcs xe_ttm_funcs = { @@ -1448,6 +1618,9 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo) if (bo->vm && xe_bo_is_user(bo)) xe_vm_put(bo->vm); + if (bo->parent_obj) + xe_bo_put(bo->parent_obj); + mutex_lock(&xe->mem_access.vram_userfault.lock); if (!list_empty(&bo->vram_userfault_link)) list_del(&bo->vram_userfault_link); @@ -1947,7 +2120,7 @@ struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe, flags |= XE_BO_FLAG_GGTT; bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type, - flags | XE_BO_FLAG_NEEDS_CPU_ACCESS, + flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED, alignment); if (IS_ERR(bo)) return bo; @@ -2049,7 +2222,8 @@ int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, str struct xe_bo *bo; u32 dst_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT; - dst_flags |= (*src)->flags & XE_BO_FLAG_GGTT_INVALIDATE; + dst_flags |= (*src)->flags & (XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_PINNED_NORESTORE); xe_assert(xe, IS_DGFX(xe)); xe_assert(xe, !(*src)->vmap.is_iomem); @@ -2073,10 +2247,16 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res) { struct xe_device *xe = ttm_to_xe_device(res->bo->bdev); - if (res->mem_type == XE_PL_STOLEN) + switch (res->mem_type) { + case XE_PL_STOLEN: return xe_ttm_stolen_gpu_offset(xe); - - return res_to_mem_region(res)->dpa_base; + case XE_PL_TT: + case XE_PL_SYSTEM: + return 0; + default: + return res_to_mem_region(res)->dpa_base; + } + return 0; } /** @@ -2102,12 +2282,9 @@ int xe_bo_pin_external(struct xe_bo *bo) if (err) return err; - if (xe_bo_is_vram(bo)) { - spin_lock(&xe->pinned.lock); - list_add_tail(&bo->pinned_link, - &xe->pinned.external_vram); - spin_unlock(&xe->pinned.lock); - } + spin_lock(&xe->pinned.lock); + list_add_tail(&bo->pinned_link, &xe->pinned.late.external); + spin_unlock(&xe->pinned.lock); } ttm_bo_pin(&bo->ttm); @@ -2149,25 +2326,12 @@ int xe_bo_pin(struct xe_bo *bo) if (err) return err; - /* - * For pinned objects in on DGFX, which are also in vram, we expect - * these to be in contiguous VRAM memory. Required eviction / restore - * during suspend / resume (force restore to same physical address). - */ - if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) && - bo->flags & XE_BO_FLAG_INTERNAL_TEST)) { - if (mem_type_is_vram(place->mem_type)) { - xe_assert(xe, place->flags & TTM_PL_FLAG_CONTIGUOUS); - - place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) - - vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT; - place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT); - } - } - if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) { spin_lock(&xe->pinned.lock); - list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present); + if (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE) + list_add_tail(&bo->pinned_link, &xe->pinned.late.kernel_bo_present); + else + list_add_tail(&bo->pinned_link, &xe->pinned.early.kernel_bo_present); spin_unlock(&xe->pinned.lock); } @@ -2231,6 +2395,13 @@ void xe_bo_unpin(struct xe_bo *bo) xe_assert(xe, !list_empty(&bo->pinned_link)); list_del_init(&bo->pinned_link); spin_unlock(&xe->pinned.lock); + + if (bo->backup_obj) { + if (xe_bo_is_pinned(bo->backup_obj)) + ttm_bo_unpin(&bo->backup_obj->ttm); + xe_bo_put(bo->backup_obj); + bo->backup_obj = NULL; + } } ttm_bo_unpin(&bo->ttm); if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm)) @@ -2398,7 +2569,7 @@ static int gem_create_user_ext_set_property(struct xe_device *xe, int err; u32 idx; - err = __copy_from_user(&ext, address, sizeof(ext)); + err = copy_from_user(&ext, address, sizeof(ext)); if (XE_IOCTL_DBG(xe, err)) return -EFAULT; @@ -2435,7 +2606,7 @@ static int gem_create_user_extensions(struct xe_device *xe, struct xe_bo *bo, if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) return -E2BIG; - err = __copy_from_user(&ext, address, sizeof(ext)); + err = copy_from_user(&ext, address, sizeof(ext)); if (XE_IOCTL_DBG(xe, err)) return -EFAULT; @@ -2759,19 +2930,17 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type) /** * xe_bo_evict - Evict an object to evict placement * @bo: The buffer object to migrate. - * @force_alloc: Set force_alloc in ttm_operation_ctx * * On successful completion, the object memory will be moved to evict * placement. This function blocks until the object has been fully moved. * * Return: 0 on success. Negative error code on failure. */ -int xe_bo_evict(struct xe_bo *bo, bool force_alloc) +int xe_bo_evict(struct xe_bo *bo) { struct ttm_operation_ctx ctx = { .interruptible = false, .no_wait_gpu = false, - .force_alloc = force_alloc, .gfp_retry_mayfail = true, }; struct ttm_placement placement; diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index ec3e4446d027..02ada1fb8a23 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -39,20 +39,23 @@ #define XE_BO_FLAG_NEEDS_64K BIT(15) #define XE_BO_FLAG_NEEDS_2M BIT(16) #define XE_BO_FLAG_GGTT_INVALIDATE BIT(17) -#define XE_BO_FLAG_GGTT0 BIT(18) -#define XE_BO_FLAG_GGTT1 BIT(19) -#define XE_BO_FLAG_GGTT2 BIT(20) -#define XE_BO_FLAG_GGTT3 BIT(21) -#define XE_BO_FLAG_GGTT_ALL (XE_BO_FLAG_GGTT0 | \ - XE_BO_FLAG_GGTT1 | \ - XE_BO_FLAG_GGTT2 | \ - XE_BO_FLAG_GGTT3) -#define XE_BO_FLAG_CPU_ADDR_MIRROR BIT(22) +#define XE_BO_FLAG_PINNED_NORESTORE BIT(18) +#define XE_BO_FLAG_PINNED_LATE_RESTORE BIT(19) +#define XE_BO_FLAG_GGTT0 BIT(20) +#define XE_BO_FLAG_GGTT1 BIT(21) +#define XE_BO_FLAG_GGTT2 BIT(22) +#define XE_BO_FLAG_GGTT3 BIT(23) +#define XE_BO_FLAG_CPU_ADDR_MIRROR BIT(24) /* this one is trigger internally only */ #define XE_BO_FLAG_INTERNAL_TEST BIT(30) #define XE_BO_FLAG_INTERNAL_64K BIT(31) +#define XE_BO_FLAG_GGTT_ALL (XE_BO_FLAG_GGTT0 | \ + XE_BO_FLAG_GGTT1 | \ + XE_BO_FLAG_GGTT2 | \ + XE_BO_FLAG_GGTT3) + #define XE_BO_FLAG_GGTTx(tile) \ (XE_BO_FLAG_GGTT0 << (tile)->id) @@ -271,11 +274,15 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res); bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type); int xe_bo_migrate(struct xe_bo *bo, u32 mem_type); -int xe_bo_evict(struct xe_bo *bo, bool force_alloc); +int xe_bo_evict(struct xe_bo *bo); int xe_bo_evict_pinned(struct xe_bo *bo); +int xe_bo_notifier_prepare_pinned(struct xe_bo *bo); +int xe_bo_notifier_unprepare_pinned(struct xe_bo *bo); int xe_bo_restore_pinned(struct xe_bo *bo); +int xe_bo_dma_unmap_pinned(struct xe_bo *bo); + extern const struct ttm_device_funcs xe_ttm_funcs; extern const char *const xe_mem_type_to_name[]; diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index 6a40eedd9db1..ed3746d32b27 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -10,28 +10,103 @@ #include "xe_ggtt.h" #include "xe_tile.h" +typedef int (*xe_pinned_fn)(struct xe_bo *bo); + +static int xe_bo_apply_to_pinned(struct xe_device *xe, + struct list_head *pinned_list, + struct list_head *new_list, + const xe_pinned_fn pinned_fn) +{ + LIST_HEAD(still_in_list); + struct xe_bo *bo; + int ret = 0; + + spin_lock(&xe->pinned.lock); + while (!ret) { + bo = list_first_entry_or_null(pinned_list, typeof(*bo), + pinned_link); + if (!bo) + break; + xe_bo_get(bo); + list_move_tail(&bo->pinned_link, &still_in_list); + spin_unlock(&xe->pinned.lock); + + ret = pinned_fn(bo); + if (ret && pinned_list != new_list) { + spin_lock(&xe->pinned.lock); + /* + * We might no longer be pinned, since PM notifier can + * call this. If the pinned link is now empty, keep it + * that way. + */ + if (!list_empty(&bo->pinned_link)) + list_move(&bo->pinned_link, pinned_list); + spin_unlock(&xe->pinned.lock); + } + xe_bo_put(bo); + spin_lock(&xe->pinned.lock); + } + list_splice_tail(&still_in_list, new_list); + spin_unlock(&xe->pinned.lock); + + return ret; +} + /** - * xe_bo_evict_all - evict all BOs from VRAM + * xe_bo_notifier_prepare_all_pinned() - Pre-allocate the backing pages for all + * pinned VRAM objects which need to be saved. + * @xe: xe device + * + * Should be called from PM notifier when preparing for s3/s4. * + * Return: 0 on success, negative error code on error. + */ +int xe_bo_notifier_prepare_all_pinned(struct xe_device *xe) +{ + int ret; + + ret = xe_bo_apply_to_pinned(xe, &xe->pinned.early.kernel_bo_present, + &xe->pinned.early.kernel_bo_present, + xe_bo_notifier_prepare_pinned); + if (!ret) + ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present, + &xe->pinned.late.kernel_bo_present, + xe_bo_notifier_prepare_pinned); + + return ret; +} + +/** + * xe_bo_notifier_unprepare_all_pinned() - Remove the backing pages for all + * pinned VRAM objects which have been restored. * @xe: xe device * - * Evict non-pinned user BOs first (via GPU), evict pinned external BOs next - * (via GPU), wait for evictions, and finally evict pinned kernel BOs via CPU. - * All eviction magic done via TTM calls. + * Should be called from PM notifier after exiting s3/s4 (either on success or + * failure). + */ +void xe_bo_notifier_unprepare_all_pinned(struct xe_device *xe) +{ + (void)xe_bo_apply_to_pinned(xe, &xe->pinned.early.kernel_bo_present, + &xe->pinned.early.kernel_bo_present, + xe_bo_notifier_unprepare_pinned); + + (void)xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present, + &xe->pinned.late.kernel_bo_present, + xe_bo_notifier_unprepare_pinned); +} + +/** + * xe_bo_evict_all_user - evict all non-pinned user BOs from VRAM + * @xe: xe device * - * Evict == move VRAM BOs to temporary (typically system) memory. + * Evict non-pinned user BOs (via GPU). * - * This function should be called before the device goes into a suspend state - * where the VRAM loses power. + * Evict == move VRAM BOs to temporary (typically system) memory. */ -int xe_bo_evict_all(struct xe_device *xe) +int xe_bo_evict_all_user(struct xe_device *xe) { struct ttm_device *bdev = &xe->ttm; - struct xe_bo *bo; - struct xe_tile *tile; - struct list_head still_in_list; u32 mem_type; - u8 id; int ret; /* User memory */ @@ -57,34 +132,38 @@ int xe_bo_evict_all(struct xe_device *xe) } } - /* Pinned user memory in VRAM */ - INIT_LIST_HEAD(&still_in_list); - spin_lock(&xe->pinned.lock); - for (;;) { - bo = list_first_entry_or_null(&xe->pinned.external_vram, - typeof(*bo), pinned_link); - if (!bo) - break; - xe_bo_get(bo); - list_move_tail(&bo->pinned_link, &still_in_list); - spin_unlock(&xe->pinned.lock); + return 0; +} - xe_bo_lock(bo, false); - ret = xe_bo_evict_pinned(bo); - xe_bo_unlock(bo); - xe_bo_put(bo); - if (ret) { - spin_lock(&xe->pinned.lock); - list_splice_tail(&still_in_list, - &xe->pinned.external_vram); - spin_unlock(&xe->pinned.lock); - return ret; - } +/** + * xe_bo_evict_all - evict all BOs from VRAM + * @xe: xe device + * + * Evict non-pinned user BOs first (via GPU), evict pinned external BOs next + * (via GPU), wait for evictions, and finally evict pinned kernel BOs via CPU. + * All eviction magic done via TTM calls. + * + * Evict == move VRAM BOs to temporary (typically system) memory. + * + * This function should be called before the device goes into a suspend state + * where the VRAM loses power. + */ +int xe_bo_evict_all(struct xe_device *xe) +{ + struct xe_tile *tile; + u8 id; + int ret; - spin_lock(&xe->pinned.lock); - } - list_splice_tail(&still_in_list, &xe->pinned.external_vram); - spin_unlock(&xe->pinned.lock); + ret = xe_bo_evict_all_user(xe); + if (ret) + return ret; + + ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present, + &xe->pinned.late.evicted, xe_bo_evict_pinned); + + if (!ret) + ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present, + &xe->pinned.late.evicted, xe_bo_evict_pinned); /* * Wait for all user BO to be evicted as those evictions depend on the @@ -93,32 +172,49 @@ int xe_bo_evict_all(struct xe_device *xe) for_each_tile(tile, xe, id) xe_tile_migrate_wait(tile); - spin_lock(&xe->pinned.lock); - for (;;) { - bo = list_first_entry_or_null(&xe->pinned.kernel_bo_present, - typeof(*bo), pinned_link); - if (!bo) - break; - xe_bo_get(bo); - list_move_tail(&bo->pinned_link, &xe->pinned.evicted); - spin_unlock(&xe->pinned.lock); + if (ret) + return ret; - xe_bo_lock(bo, false); - ret = xe_bo_evict_pinned(bo); - xe_bo_unlock(bo); - xe_bo_put(bo); - if (ret) - return ret; + return xe_bo_apply_to_pinned(xe, &xe->pinned.early.kernel_bo_present, + &xe->pinned.early.evicted, + xe_bo_evict_pinned); +} - spin_lock(&xe->pinned.lock); +static int xe_bo_restore_and_map_ggtt(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + int ret; + + ret = xe_bo_restore_pinned(bo); + if (ret) + return ret; + + if (bo->flags & XE_BO_FLAG_GGTT) { + struct xe_tile *tile; + u8 id; + + for_each_tile(tile, xe_bo_device(bo), id) { + if (tile != bo->tile && !(bo->flags & XE_BO_FLAG_GGTTx(tile))) + continue; + + mutex_lock(&tile->mem.ggtt->lock); + xe_ggtt_map_bo(tile->mem.ggtt, bo); + mutex_unlock(&tile->mem.ggtt->lock); + } } - spin_unlock(&xe->pinned.lock); + + /* + * We expect validate to trigger a move VRAM and our move code + * should setup the iosys map. + */ + xe_assert(xe, !(bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE) || + !iosys_map_is_null(&bo->vmap)); return 0; } /** - * xe_bo_restore_kernel - restore kernel BOs to VRAM + * xe_bo_restore_early - restore early phase kernel BOs to VRAM * * @xe: xe device * @@ -128,111 +224,130 @@ int xe_bo_evict_all(struct xe_device *xe) * This function should be called early, before trying to init the GT, on device * resume. */ -int xe_bo_restore_kernel(struct xe_device *xe) +int xe_bo_restore_early(struct xe_device *xe) { - struct xe_bo *bo; - int ret; + return xe_bo_apply_to_pinned(xe, &xe->pinned.early.evicted, + &xe->pinned.early.kernel_bo_present, + xe_bo_restore_and_map_ggtt); +} - spin_lock(&xe->pinned.lock); - for (;;) { - bo = list_first_entry_or_null(&xe->pinned.evicted, - typeof(*bo), pinned_link); - if (!bo) - break; - xe_bo_get(bo); - list_move_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present); - spin_unlock(&xe->pinned.lock); +/** + * xe_bo_restore_late - restore pinned late phase BOs + * + * @xe: xe device + * + * Move pinned user and kernel BOs which can use blitter from temporary + * (typically system) memory to VRAM. All moves done via TTM calls. + * + * This function should be called late, after GT init, on device resume. + */ +int xe_bo_restore_late(struct xe_device *xe) +{ + struct xe_tile *tile; + int ret, id; - xe_bo_lock(bo, false); - ret = xe_bo_restore_pinned(bo); - xe_bo_unlock(bo); - if (ret) { - xe_bo_put(bo); - return ret; - } + ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.evicted, + &xe->pinned.late.kernel_bo_present, + xe_bo_restore_and_map_ggtt); - if (bo->flags & XE_BO_FLAG_GGTT) { - struct xe_tile *tile; - u8 id; + for_each_tile(tile, xe, id) + xe_tile_migrate_wait(tile); - for_each_tile(tile, xe, id) { - if (tile != bo->tile && !(bo->flags & XE_BO_FLAG_GGTTx(tile))) - continue; + if (ret) + return ret; - mutex_lock(&tile->mem.ggtt->lock); - xe_ggtt_map_bo(tile->mem.ggtt, bo); - mutex_unlock(&tile->mem.ggtt->lock); - } - } + if (!IS_DGFX(xe)) + return 0; - /* - * We expect validate to trigger a move VRAM and our move code - * should setup the iosys map. - */ - xe_assert(xe, !iosys_map_is_null(&bo->vmap)); + /* Pinned user memory in VRAM should be validated on resume */ + ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.external, + &xe->pinned.late.external, + xe_bo_restore_pinned); - xe_bo_put(bo); + /* Wait for restore to complete */ + for_each_tile(tile, xe, id) + xe_tile_migrate_wait(tile); - spin_lock(&xe->pinned.lock); - } - spin_unlock(&xe->pinned.lock); + return ret; +} - return 0; +static void xe_bo_pci_dev_remove_pinned(struct xe_device *xe) +{ + struct xe_tile *tile; + unsigned int id; + + (void)xe_bo_apply_to_pinned(xe, &xe->pinned.late.external, + &xe->pinned.late.external, + xe_bo_dma_unmap_pinned); + for_each_tile(tile, xe, id) + xe_tile_migrate_wait(tile); } /** - * xe_bo_restore_user - restore pinned user BOs to VRAM - * - * @xe: xe device + * xe_bo_pci_dev_remove_all() - Handle bos when the pci_device is about to be removed + * @xe: The xe device. * - * Move pinned user BOs from temporary (typically system) memory to VRAM via - * CPU. All moves done via TTM calls. + * On pci_device removal we need to drop all dma mappings and move + * the data of exported bos out to system. This includes SVM bos and + * exported dma-buf bos. This is done by evicting all bos, but + * the evict placement in xe_evict_flags() is chosen such that all + * bos except those mentioned are purged, and thus their memory + * is released. * - * This function should be called late, after GT init, on device resume. + * For pinned bos, we're unmapping dma. */ -int xe_bo_restore_user(struct xe_device *xe) +void xe_bo_pci_dev_remove_all(struct xe_device *xe) { - struct xe_bo *bo; - struct xe_tile *tile; - struct list_head still_in_list; - u8 id; - int ret; + unsigned int mem_type; - if (!IS_DGFX(xe)) - return 0; + /* + * Move pagemap bos and exported dma-buf to system, and + * purge everything else. + */ + for (mem_type = XE_PL_VRAM1; mem_type >= XE_PL_TT; --mem_type) { + struct ttm_resource_manager *man = + ttm_manager_type(&xe->ttm, mem_type); - /* Pinned user memory in VRAM should be validated on resume */ - INIT_LIST_HEAD(&still_in_list); - spin_lock(&xe->pinned.lock); - for (;;) { - bo = list_first_entry_or_null(&xe->pinned.external_vram, - typeof(*bo), pinned_link); - if (!bo) - break; - list_move_tail(&bo->pinned_link, &still_in_list); - xe_bo_get(bo); - spin_unlock(&xe->pinned.lock); + if (man) { + int ret = ttm_resource_manager_evict_all(&xe->ttm, man); - xe_bo_lock(bo, false); - ret = xe_bo_restore_pinned(bo); - xe_bo_unlock(bo); - xe_bo_put(bo); - if (ret) { - spin_lock(&xe->pinned.lock); - list_splice_tail(&still_in_list, - &xe->pinned.external_vram); - spin_unlock(&xe->pinned.lock); - return ret; + drm_WARN_ON(&xe->drm, ret); } - - spin_lock(&xe->pinned.lock); } - list_splice_tail(&still_in_list, &xe->pinned.external_vram); - spin_unlock(&xe->pinned.lock); - /* Wait for restore to complete */ - for_each_tile(tile, xe, id) - xe_tile_migrate_wait(tile); + xe_bo_pci_dev_remove_pinned(xe); +} - return 0; +static void xe_bo_pinned_fini(void *arg) +{ + struct xe_device *xe = arg; + + (void)xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present, + &xe->pinned.late.kernel_bo_present, + xe_bo_dma_unmap_pinned); + (void)xe_bo_apply_to_pinned(xe, &xe->pinned.early.kernel_bo_present, + &xe->pinned.early.kernel_bo_present, + xe_bo_dma_unmap_pinned); +} + +/** + * xe_bo_pinned_init() - Initialize pinned bo tracking + * @xe: The xe device. + * + * Initializes the lists and locks required for pinned bo + * tracking and registers a callback to dma-unmap + * any remaining pinned bos on pci device removal. + * + * Return: %0 on success, negative error code on error. + */ +int xe_bo_pinned_init(struct xe_device *xe) +{ + spin_lock_init(&xe->pinned.lock); + INIT_LIST_HEAD(&xe->pinned.early.kernel_bo_present); + INIT_LIST_HEAD(&xe->pinned.early.evicted); + INIT_LIST_HEAD(&xe->pinned.late.kernel_bo_present); + INIT_LIST_HEAD(&xe->pinned.late.evicted); + INIT_LIST_HEAD(&xe->pinned.late.external); + + return devm_add_action_or_reset(xe->drm.dev, xe_bo_pinned_fini, xe); } diff --git a/drivers/gpu/drm/xe/xe_bo_evict.h b/drivers/gpu/drm/xe/xe_bo_evict.h index 746894798852..e8385cb7f5e9 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.h +++ b/drivers/gpu/drm/xe/xe_bo_evict.h @@ -9,7 +9,13 @@ struct xe_device; int xe_bo_evict_all(struct xe_device *xe); -int xe_bo_restore_kernel(struct xe_device *xe); -int xe_bo_restore_user(struct xe_device *xe); +int xe_bo_evict_all_user(struct xe_device *xe); +int xe_bo_notifier_prepare_all_pinned(struct xe_device *xe); +void xe_bo_notifier_unprepare_all_pinned(struct xe_device *xe); +int xe_bo_restore_early(struct xe_device *xe); +int xe_bo_restore_late(struct xe_device *xe); +void xe_bo_pci_dev_remove_all(struct xe_device *xe); + +int xe_bo_pinned_init(struct xe_device *xe); #endif diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 15a92e3d4898..eb5e83c5f233 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -28,6 +28,10 @@ struct xe_vm; struct xe_bo { /** @ttm: TTM base buffer object */ struct ttm_buffer_object ttm; + /** @backup_obj: The backup object when pinned and suspended (vram only) */ + struct xe_bo *backup_obj; + /** @parent_obj: Ref to parent bo if this a backup_obj */ + struct xe_bo *parent_obj; /** @size: Size of this buffer object */ size_t size; /** @flags: flags for this buffer object */ diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c new file mode 100644 index 000000000000..cb9f175c89a1 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_configfs.c @@ -0,0 +1,250 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/configfs.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/pci.h> + +#include "xe_configfs.h" +#include "xe_module.h" + +/** + * DOC: Xe Configfs + * + * Overview + * ========= + * + * Configfs is a filesystem-based manager of kernel objects. XE KMD registers a + * configfs subsystem called ``'xe'`` that creates a directory in the mounted configfs directory + * The user can create devices under this directory and configure them as necessary + * See Documentation/filesystems/configfs.rst for more information about how configfs works. + * + * Create devices + * =============== + * + * In order to create a device, the user has to create a directory inside ``'xe'``:: + * + * mkdir /sys/kernel/config/xe/0000:03:00.0/ + * + * Every device created is populated by the driver with entries that can be + * used to configure it:: + * + * /sys/kernel/config/xe/ + * .. 0000:03:00.0/ + * ... survivability_mode + * + * Configure Attributes + * ==================== + * + * Survivability mode: + * ------------------- + * + * Enable survivability mode on supported cards. This setting only takes + * effect when probing the device. Example to enable it:: + * + * # echo 1 > /sys/kernel/config/xe/0000:03:00.0/survivability_mode + * # echo 0000:03:00.0 > /sys/bus/pci/drivers/xe/bind (Enters survivability mode if supported) + * + * Remove devices + * ============== + * + * The created device directories can be removed using ``rmdir``:: + * + * rmdir /sys/kernel/config/xe/0000:03:00.0/ + */ + +struct xe_config_device { + struct config_group group; + + bool survivability_mode; + + /* protects attributes */ + struct mutex lock; +}; + +static struct xe_config_device *to_xe_config_device(struct config_item *item) +{ + return container_of(to_config_group(item), struct xe_config_device, group); +} + +static ssize_t survivability_mode_show(struct config_item *item, char *page) +{ + struct xe_config_device *dev = to_xe_config_device(item); + + return sprintf(page, "%d\n", dev->survivability_mode); +} + +static ssize_t survivability_mode_store(struct config_item *item, const char *page, size_t len) +{ + struct xe_config_device *dev = to_xe_config_device(item); + bool survivability_mode; + int ret; + + ret = kstrtobool(page, &survivability_mode); + if (ret) + return ret; + + mutex_lock(&dev->lock); + dev->survivability_mode = survivability_mode; + mutex_unlock(&dev->lock); + + return len; +} + +CONFIGFS_ATTR(, survivability_mode); + +static struct configfs_attribute *xe_config_device_attrs[] = { + &attr_survivability_mode, + NULL, +}; + +static void xe_config_device_release(struct config_item *item) +{ + struct xe_config_device *dev = to_xe_config_device(item); + + mutex_destroy(&dev->lock); + kfree(dev); +} + +static struct configfs_item_operations xe_config_device_ops = { + .release = xe_config_device_release, +}; + +static const struct config_item_type xe_config_device_type = { + .ct_item_ops = &xe_config_device_ops, + .ct_attrs = xe_config_device_attrs, + .ct_owner = THIS_MODULE, +}; + +static struct config_group *xe_config_make_device_group(struct config_group *group, + const char *name) +{ + unsigned int domain, bus, slot, function; + struct xe_config_device *dev; + struct pci_dev *pdev; + int ret; + + ret = sscanf(name, "%04x:%02x:%02x.%x", &domain, &bus, &slot, &function); + if (ret != 4) + return ERR_PTR(-EINVAL); + + pdev = pci_get_domain_bus_and_slot(domain, bus, PCI_DEVFN(slot, function)); + if (!pdev) + return ERR_PTR(-EINVAL); + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return ERR_PTR(-ENOMEM); + + config_group_init_type_name(&dev->group, name, &xe_config_device_type); + + mutex_init(&dev->lock); + + return &dev->group; +} + +static struct configfs_group_operations xe_config_device_group_ops = { + .make_group = xe_config_make_device_group, +}; + +static const struct config_item_type xe_configfs_type = { + .ct_group_ops = &xe_config_device_group_ops, + .ct_owner = THIS_MODULE, +}; + +static struct configfs_subsystem xe_configfs = { + .su_group = { + .cg_item = { + .ci_namebuf = "xe", + .ci_type = &xe_configfs_type, + }, + }, +}; + +static struct xe_config_device *configfs_find_group(struct pci_dev *pdev) +{ + struct config_item *item; + char name[64]; + + snprintf(name, sizeof(name), "%04x:%02x:%02x.%x", pci_domain_nr(pdev->bus), + pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + + mutex_lock(&xe_configfs.su_mutex); + item = config_group_find_item(&xe_configfs.su_group, name); + mutex_unlock(&xe_configfs.su_mutex); + + if (!item) + return NULL; + + return to_xe_config_device(item); +} + +/** + * xe_configfs_get_survivability_mode - get configfs survivability mode attribute + * @pdev: pci device + * + * find the configfs group that belongs to the pci device and return + * the survivability mode attribute + * + * Return: survivability mode if config group is found, false otherwise + */ +bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) +{ + struct xe_config_device *dev = configfs_find_group(pdev); + bool mode; + + if (!dev) + return false; + + mode = dev->survivability_mode; + config_item_put(&dev->group.cg_item); + + return mode; +} + +/** + * xe_configfs_clear_survivability_mode - clear configfs survivability mode attribute + * @pdev: pci device + * + * find the configfs group that belongs to the pci device and clear survivability + * mode attribute + */ +void xe_configfs_clear_survivability_mode(struct pci_dev *pdev) +{ + struct xe_config_device *dev = configfs_find_group(pdev); + + if (!dev) + return; + + mutex_lock(&dev->lock); + dev->survivability_mode = 0; + mutex_unlock(&dev->lock); + + config_item_put(&dev->group.cg_item); +} + +int __init xe_configfs_init(void) +{ + struct config_group *root = &xe_configfs.su_group; + int ret; + + config_group_init(root); + mutex_init(&xe_configfs.su_mutex); + ret = configfs_register_subsystem(&xe_configfs); + if (ret) { + pr_err("Error %d while registering %s subsystem\n", + ret, root->cg_item.ci_namebuf); + return ret; + } + + return 0; +} + +void __exit xe_configfs_exit(void) +{ + configfs_unregister_subsystem(&xe_configfs); +} + diff --git a/drivers/gpu/drm/xe/xe_configfs.h b/drivers/gpu/drm/xe/xe_configfs.h new file mode 100644 index 000000000000..d7d041ec2611 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_configfs.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ +#ifndef _XE_CONFIGFS_H_ +#define _XE_CONFIGFS_H_ + +#include <linux/types.h> + +struct pci_dev; + +#if IS_ENABLED(CONFIG_CONFIGFS_FS) +int xe_configfs_init(void); +void xe_configfs_exit(void); +bool xe_configfs_get_survivability_mode(struct pci_dev *pdev); +void xe_configfs_clear_survivability_mode(struct pci_dev *pdev); +#else +static inline int xe_configfs_init(void) { return 0; }; +static inline void xe_configfs_exit(void) {}; +static inline bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) { return false; }; +static inline void xe_configfs_clear_survivability_mode(struct pci_dev *pdev) {}; +#endif + +#endif diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 81b9d9bb3f57..7a8af2311318 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -80,7 +80,8 @@ static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q) return &q->gt->uc.guc; } -static ssize_t __xe_devcoredump_read(char *buffer, size_t count, +static ssize_t __xe_devcoredump_read(char *buffer, ssize_t count, + ssize_t start, struct xe_devcoredump *coredump) { struct xe_device *xe; @@ -94,7 +95,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count, ss = &coredump->snapshot; iter.data = buffer; - iter.start = 0; + iter.start = start; iter.remain = count; p = drm_coredump_printer(&iter); @@ -168,12 +169,16 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss) ss->vm = NULL; } +#define XE_DEVCOREDUMP_CHUNK_MAX (SZ_512M + SZ_1G) + static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, size_t count, void *data, size_t datalen) { struct xe_devcoredump *coredump = data; struct xe_devcoredump_snapshot *ss; ssize_t byte_copied; + u32 chunk_offset; + ssize_t new_chunk_position; if (!coredump) return -ENODEV; @@ -183,6 +188,9 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, /* Ensure delayed work is captured before continuing */ flush_work(&ss->work); + if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) + xe_pm_runtime_get(gt_to_xe(ss->gt)); + mutex_lock(&coredump->lock); if (!ss->read.buffer) { @@ -195,12 +203,29 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, return 0; } + new_chunk_position = div_u64_rem(offset, + XE_DEVCOREDUMP_CHUNK_MAX, + &chunk_offset); + + if (offset >= ss->read.chunk_position + XE_DEVCOREDUMP_CHUNK_MAX || + offset < ss->read.chunk_position) { + ss->read.chunk_position = new_chunk_position * + XE_DEVCOREDUMP_CHUNK_MAX; + + __xe_devcoredump_read(ss->read.buffer, + XE_DEVCOREDUMP_CHUNK_MAX, + ss->read.chunk_position, coredump); + } + byte_copied = count < ss->read.size - offset ? count : ss->read.size - offset; - memcpy(buffer, ss->read.buffer + offset, byte_copied); + memcpy(buffer, ss->read.buffer + chunk_offset, byte_copied); mutex_unlock(&coredump->lock); + if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) + xe_pm_runtime_put(gt_to_xe(ss->gt)); + return byte_copied; } @@ -254,17 +279,32 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work) xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); xe_force_wake_put(gt_to_fw(ss->gt), fw_ref); - xe_pm_runtime_put(xe); + ss->read.chunk_position = 0; /* Calculate devcoredump size */ - ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump); - - ss->read.buffer = kvmalloc(ss->read.size, GFP_USER); - if (!ss->read.buffer) - return; + ss->read.size = __xe_devcoredump_read(NULL, LONG_MAX, 0, coredump); + + if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) { + ss->read.buffer = kvmalloc(XE_DEVCOREDUMP_CHUNK_MAX, + GFP_USER); + if (!ss->read.buffer) + goto put_pm; + + __xe_devcoredump_read(ss->read.buffer, + XE_DEVCOREDUMP_CHUNK_MAX, + 0, coredump); + } else { + ss->read.buffer = kvmalloc(ss->read.size, GFP_USER); + if (!ss->read.buffer) + goto put_pm; + + __xe_devcoredump_read(ss->read.buffer, ss->read.size, 0, + coredump); + xe_devcoredump_snapshot_free(ss); + } - __xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump); - xe_devcoredump_snapshot_free(ss); +put_pm: + xe_pm_runtime_put(xe); } static void devcoredump_snapshot(struct xe_devcoredump *coredump, @@ -425,7 +465,7 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffi if (offset & 3) drm_printf(p, "Offset not word aligned: %zu", offset); - line_buff = kzalloc(DMESG_MAX_LINE_LEN, GFP_KERNEL); + line_buff = kzalloc(DMESG_MAX_LINE_LEN, GFP_ATOMIC); if (!line_buff) { drm_printf(p, "Failed to allocate line buffer\n"); return; diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h index 1a1d16a96b2d..a174385a6d83 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h @@ -66,6 +66,8 @@ struct xe_devcoredump_snapshot { struct { /** @read.size: size of devcoredump in human readable format */ ssize_t size; + /** @read.chunk_position: position of devcoredump chunk */ + ssize_t chunk_position; /** @read.buffer: buffer of devcoredump in human readable format */ char *buffer; } read; diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 00191227bc95..c02c4c4e9412 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -23,8 +23,10 @@ #include "regs/xe_gt_regs.h" #include "regs/xe_regs.h" #include "xe_bo.h" +#include "xe_bo_evict.h" #include "xe_debugfs.h" #include "xe_devcoredump.h" +#include "xe_device_sysfs.h" #include "xe_dma_buf.h" #include "xe_drm_client.h" #include "xe_drv.h" @@ -467,10 +469,9 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, xa_erase(&xe->usm.asid_to_vm, asid); } - spin_lock_init(&xe->pinned.lock); - INIT_LIST_HEAD(&xe->pinned.kernel_bo_present); - INIT_LIST_HEAD(&xe->pinned.external_vram); - INIT_LIST_HEAD(&xe->pinned.evicted); + err = xe_bo_pinned_init(xe); + if (err) + goto err; xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", WQ_MEM_RECLAIM); @@ -505,7 +506,15 @@ ALLOW_ERROR_INJECTION(xe_device_create, ERRNO); /* See xe_pci_probe() */ static bool xe_driver_flr_disabled(struct xe_device *xe) { - return xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS; + if (IS_SRIOV_VF(xe)) + return true; + + if (xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS) { + drm_info(&xe->drm, "Driver-FLR disabled by BIOS\n"); + return true; + } + + return false; } /* @@ -523,7 +532,7 @@ static bool xe_driver_flr_disabled(struct xe_device *xe) */ static void __xe_driver_flr(struct xe_device *xe) { - const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */ + const unsigned int flr_timeout = 3 * USEC_PER_SEC; /* specs recommend a 3s wait */ struct xe_mmio *mmio = xe_root_tile_mmio(xe); int ret; @@ -569,10 +578,8 @@ static void __xe_driver_flr(struct xe_device *xe) static void xe_driver_flr(struct xe_device *xe) { - if (xe_driver_flr_disabled(xe)) { - drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n"); + if (xe_driver_flr_disabled(xe)) return; - } __xe_driver_flr(xe); } @@ -706,7 +713,7 @@ int xe_device_probe_early(struct xe_device *xe) sriov_update_device_info(xe); err = xe_pcode_probe_early(xe); - if (err) { + if (err || xe_survivability_mode_is_requested(xe)) { int save_err = err; /* @@ -729,6 +736,7 @@ int xe_device_probe_early(struct xe_device *xe) return 0; } +ALLOW_ERROR_INJECTION(xe_device_probe_early, ERRNO); /* See xe_pci_probe() */ static int probe_has_flat_ccs(struct xe_device *xe) { @@ -908,6 +916,10 @@ int xe_device_probe(struct xe_device *xe) if (err) goto err_unregister_display; + err = xe_device_sysfs_init(xe); + if (err) + goto err_unregister_display; + xe_debugfs_register(xe); err = xe_hwmon_register(xe); @@ -932,6 +944,8 @@ void xe_device_remove(struct xe_device *xe) xe_display_unregister(xe); drm_dev_unplug(&xe->drm); + + xe_bo_pci_dev_remove_all(xe); } void xe_device_shutdown(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c index 7efbd4c52791..2e657692e5b5 100644 --- a/drivers/gpu/drm/xe/xe_device_sysfs.c +++ b/drivers/gpu/drm/xe/xe_device_sysfs.c @@ -3,14 +3,16 @@ * Copyright © 2023 Intel Corporation */ +#include <linux/device.h> #include <linux/kobject.h> #include <linux/pci.h> #include <linux/sysfs.h> -#include <drm/drm_managed.h> - #include "xe_device.h" #include "xe_device_sysfs.h" +#include "xe_mmio.h" +#include "xe_pcode_api.h" +#include "xe_pcode.h" #include "xe_pm.h" /** @@ -63,11 +65,94 @@ vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR_RW(vram_d3cold_threshold); +/** + * DOC: PCIe Gen5 Limitations + * + * Default link speed of discrete GPUs is determined by configuration parameters + * stored in their flash memory, which are subject to override through user + * initiated firmware updates. It has been observed that devices configured with + * PCIe Gen5 as their default link speed can come across link quality issues due + * to host or motherboard limitations and may have to auto-downgrade their link + * to PCIe Gen4 speed when faced with unstable link at Gen5, which makes + * firmware updates rather risky on such setups. It is required to ensure that + * the device is capable of auto-downgrading its link to PCIe Gen4 speed before + * pushing the firmware image with PCIe Gen5 as default configuration. This can + * be done by reading ``auto_link_downgrade_capable`` sysfs entry, which will + * denote if the device is capable of auto-downgrading its link to PCIe Gen4 + * speed with boolean output value of ``0`` or ``1``, meaning `incapable` or + * `capable` respectively. + * + * .. code-block:: shell + * + * $ cat /sys/bus/pci/devices/<bdf>/auto_link_downgrade_capable + * + * Pushing the firmware image with PCIe Gen5 as default configuration on a auto + * link downgrade incapable device and facing link instability due to host or + * motherboard limitations can result in driver failing to bind to the device, + * making further firmware updates impossible with RMA being the only last + * resort. + * + * Link downgrade status of auto link downgrade capable devices is available + * through ``auto_link_downgrade_status`` sysfs entry with boolean output value + * of ``0`` or ``1``, where ``0`` means no auto-downgrading was required during + * link training (which is the optimal scenario) and ``1`` means the device has + * auto-downgraded its link to PCIe Gen4 speed due to unstable Gen5 link. + * + * .. code-block:: shell + * + * $ cat /sys/bus/pci/devices/<bdf>/auto_link_downgrade_status + */ + +static ssize_t +auto_link_downgrade_capable_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct xe_device *xe = pdev_to_xe_device(pdev); + u32 cap, val; + + xe_pm_runtime_get(xe); + val = xe_mmio_read32(xe_root_tile_mmio(xe), BMG_PCIE_CAP); + xe_pm_runtime_put(xe); + + cap = REG_FIELD_GET(LINK_DOWNGRADE, val); + return sysfs_emit(buf, "%u\n", cap == DOWNGRADE_CAPABLE ? true : false); +} +static DEVICE_ATTR_ADMIN_RO(auto_link_downgrade_capable); + +static ssize_t +auto_link_downgrade_status_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct xe_device *xe = pdev_to_xe_device(pdev); + /* default the auto_link_downgrade status to 0 */ + u32 val = 0; + int ret; + + xe_pm_runtime_get(xe); + ret = xe_pcode_read(xe_device_get_root_tile(xe), + PCODE_MBOX(DGFX_PCODE_STATUS, DGFX_GET_INIT_STATUS, 0), + &val, NULL); + xe_pm_runtime_put(xe); + + return ret ?: sysfs_emit(buf, "%u\n", REG_FIELD_GET(DGFX_LINK_DOWNGRADE_STATUS, val)); +} +static DEVICE_ATTR_ADMIN_RO(auto_link_downgrade_status); + +static const struct attribute *auto_link_downgrade_attrs[] = { + &dev_attr_auto_link_downgrade_capable.attr, + &dev_attr_auto_link_downgrade_status.attr, + NULL +}; + static void xe_device_sysfs_fini(void *arg) { struct xe_device *xe = arg; - sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr); + if (xe->d3cold.capable) + sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr); + + if (xe->info.platform == XE_BATTLEMAGE) + sysfs_remove_files(&xe->drm.dev->kobj, auto_link_downgrade_attrs); } int xe_device_sysfs_init(struct xe_device *xe) @@ -75,9 +160,17 @@ int xe_device_sysfs_init(struct xe_device *xe) struct device *dev = xe->drm.dev; int ret; - ret = sysfs_create_file(&dev->kobj, &dev_attr_vram_d3cold_threshold.attr); - if (ret) - return ret; + if (xe->d3cold.capable) { + ret = sysfs_create_file(&dev->kobj, &dev_attr_vram_d3cold_threshold.attr); + if (ret) + return ret; + } + + if (xe->info.platform == XE_BATTLEMAGE) { + ret = sysfs_create_files(&dev->kobj, auto_link_downgrade_attrs); + if (ret) + return ret; + } return devm_add_action_or_reset(dev, xe_device_sysfs_fini, xe); } diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 0482f26aa480..c8fa2c011666 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -31,7 +31,6 @@ #endif #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) -#include "soc/intel_pch.h" #include "intel_display_core.h" #include "intel_display_device.h" #endif @@ -107,6 +106,9 @@ struct xe_vram_region { resource_size_t actual_physical_size; /** @mapping: pointer to VRAM mappable space */ void __iomem *mapping; + /** @ttm: VRAM TTM manager */ + struct xe_ttm_vram_mgr ttm; +#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) /** @pagemap: Used to remap device memory as ZONE_DEVICE */ struct dev_pagemap pagemap; /** @@ -120,8 +122,7 @@ struct xe_vram_region { * This is generated when remap device memory as ZONE_DEVICE */ resource_size_t hpa_base; - /** @ttm: VRAM TTM manager */ - struct xe_ttm_vram_mgr ttm; +#endif }; /** @@ -314,6 +315,8 @@ struct xe_device { u8 has_atomic_enable_pte_bit:1; /** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */ u8 has_device_atomics_on_smem:1; + /** @info.has_fan_control: Device supports fan control */ + u8 has_fan_control:1; /** @info.has_flat_ccs: Whether flat CCS metadata is used */ u8 has_flat_ccs:1; /** @info.has_heci_cscfi: device has heci cscfi */ @@ -334,6 +337,8 @@ struct xe_device { u8 has_64bit_timestamp:1; /** @info.is_dgfx: is discrete device */ u8 is_dgfx:1; + /** @info.needs_scratch: needs scratch page for oob prefetch to work */ + u8 needs_scratch:1; /** * @info.probe_display: Probe display hardware. If set to * false, the driver will behave as if there is no display @@ -420,12 +425,22 @@ struct xe_device { struct { /** @pinned.lock: protected pinned BO list state */ spinlock_t lock; - /** @pinned.kernel_bo_present: pinned kernel BO that are present */ - struct list_head kernel_bo_present; - /** @pinned.evicted: pinned BO that have been evicted */ - struct list_head evicted; - /** @pinned.external_vram: pinned external BO in vram*/ - struct list_head external_vram; + /** @pinned.early: early pinned lists */ + struct { + /** @pinned.early.kernel_bo_present: pinned kernel BO that are present */ + struct list_head kernel_bo_present; + /** @pinned.early.evicted: pinned BO that have been evicted */ + struct list_head evicted; + } early; + /** @pinned.late: late pinned lists */ + struct { + /** @pinned.late.kernel_bo_present: pinned kernel BO that are present */ + struct list_head kernel_bo_present; + /** @pinned.late.evicted: pinned BO that have been evicted */ + struct list_head evicted; + /** @pinned.external: pinned external and dma-buf. */ + struct list_head external; + } late; } pinned; /** @ufence_wq: user fence wait queue */ @@ -508,6 +523,9 @@ struct xe_device { struct mutex lock; } d3cold; + /** @pm_notifier: Our PM notifier to perform actions in response to various PM events. */ + struct notifier_block pm_notifier; + /** @pmt: Support the PMT driver callback interface */ struct { /** @pmt.lock: protect access for telemetry data */ @@ -572,7 +590,6 @@ struct xe_device { * migrating to the right sub-structs */ struct intel_display display; - enum intel_pch pch_type; struct dram_info { bool wm_lv_0_adjust_needed; @@ -588,6 +605,7 @@ struct xe_device { INTEL_DRAM_LPDDR5, INTEL_DRAM_GDDR, INTEL_DRAM_GDDR_ECC, + __INTEL_DRAM_TYPE_MAX, } type; u8 num_qgv_points; u8 num_psf_gv_points; diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index f7a20264ea33..346f857f3837 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -233,7 +233,7 @@ static void xe_dma_buf_move_notify(struct dma_buf_attachment *attach) struct drm_gem_object *obj = attach->importer_priv; struct xe_bo *bo = gem_to_xe_bo(obj); - XE_WARN_ON(xe_bo_evict(bo, false)); + XE_WARN_ON(xe_bo_evict(bo)); } static const struct dma_buf_attach_ops xe_dma_buf_attach_ops = { diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index e2bb156c71fb..96732613b4b7 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -283,7 +283,7 @@ static int xe_eu_stall_user_ext_set_property(struct xe_device *xe, u64 extension int err; u32 idx; - err = __copy_from_user(&ext, address, sizeof(ext)); + err = copy_from_user(&ext, address, sizeof(ext)); if (XE_IOCTL_DBG(xe, err)) return -EFAULT; @@ -313,7 +313,7 @@ static int xe_eu_stall_user_extensions(struct xe_device *xe, u64 extension, if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) return -E2BIG; - err = __copy_from_user(&ext, address, sizeof(ext)); + err = copy_from_user(&ext, address, sizeof(ext)); if (XE_IOCTL_DBG(xe, err)) return -EFAULT; diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index b75adfc99fb7..44364c042ad7 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -176,8 +176,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } if (xe_exec_queue_is_parallel(q)) { - err = __copy_from_user(addresses, addresses_user, sizeof(u64) * - q->width); + err = copy_from_user(addresses, addresses_user, sizeof(u64) * + q->width); if (err) { err = -EFAULT; goto err_syncs; diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index cd9b1c32f30f..ce78cee5dec6 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -479,7 +479,7 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe, int err; u32 idx; - err = __copy_from_user(&ext, address, sizeof(ext)); + err = copy_from_user(&ext, address, sizeof(ext)); if (XE_IOCTL_DBG(xe, err)) return -EFAULT; @@ -518,7 +518,7 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) return -E2BIG; - err = __copy_from_user(&ext, address, sizeof(ext)); + err = copy_from_user(&ext, address, sizeof(ext)); if (XE_IOCTL_DBG(xe, err)) return -EFAULT; @@ -618,9 +618,8 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE)) return -EINVAL; - err = __copy_from_user(eci, user_eci, - sizeof(struct drm_xe_engine_class_instance) * - len); + err = copy_from_user(eci, user_eci, + sizeof(struct drm_xe_engine_class_instance) * len); if (XE_IOCTL_DBG(xe, err)) return -EFAULT; diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index 4f6784e5abf8..8a5cba22b586 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -49,9 +49,6 @@ void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw) fw->gt = gt; spin_lock_init(&fw->lock); - /* Assuming gen11+ so assert this assumption is correct */ - xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11); - if (xe->info.graphics_verx100 >= 1270) { init_domain(fw, XE_FW_DOMAIN_ID_GT, FORCEWAKE_GT, @@ -67,9 +64,6 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) { int i, j; - /* Assuming gen11+ so assert this assumption is correct */ - xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11); - if (!xe_gt_is_media_type(gt)) init_domain(fw, XE_FW_DOMAIN_ID_RENDER, FORCEWAKE_RENDER, diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 5fcb2b4c2c13..7062115909f2 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -365,7 +365,7 @@ int xe_ggtt_init(struct xe_ggtt *ggtt) * scratch entries, rather keep the scratch page in system memory on * platforms where 64K pages are needed for VRAM. */ - flags = XE_BO_FLAG_PINNED; + flags = 0; if (ggtt->flags & XE_GGTT_FLAGS_64K) flags |= XE_BO_FLAG_SYSTEM; else diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 66198cf2662c..0e5d243c9451 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -12,8 +12,10 @@ #include <generated/xe_wa_oob.h> +#include "instructions/xe_alu_commands.h" #include "instructions/xe_gfxpipe_commands.h" #include "instructions/xe_mi_commands.h" +#include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" #include "xe_assert.h" #include "xe_bb.h" @@ -176,15 +178,6 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) return 0; } -/* - * Convert back from encoded value to type-safe, only to be used when reg.mcr - * is true - */ -static struct xe_reg_mcr to_xe_reg_mcr(const struct xe_reg reg) -{ - return (const struct xe_reg_mcr){.__reg.raw = reg.raw }; -} - static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) { struct xe_reg_sr *sr = &q->hwe->reg_lrc; @@ -194,6 +187,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) struct xe_bb *bb; struct dma_fence *fence; long timeout; + int count_rmw = 0; int count = 0; if (q->hwe->class == XE_ENGINE_CLASS_RENDER) @@ -206,30 +200,32 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) if (IS_ERR(bb)) return PTR_ERR(bb); - xa_for_each(&sr->xa, idx, entry) - ++count; + /* count RMW registers as those will be handled separately */ + xa_for_each(&sr->xa, idx, entry) { + if (entry->reg.masked || entry->clr_bits == ~0) + ++count; + else + ++count_rmw; + } - if (count) { + if (count || count_rmw) xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name); + if (count) { + /* emit single LRI with all non RMW regs */ + bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); xa_for_each(&sr->xa, idx, entry) { struct xe_reg reg = entry->reg; - struct xe_reg_mcr reg_mcr = to_xe_reg_mcr(reg); u32 val; - /* - * Skip reading the register if it's not really needed - */ if (reg.masked) val = entry->clr_bits << 16; - else if (entry->clr_bits + 1) - val = (reg.mcr ? - xe_gt_mcr_unicast_read_any(gt, reg_mcr) : - xe_mmio_read32(>->mmio, reg)) & (~entry->clr_bits); - else + else if (entry->clr_bits == ~0) val = 0; + else + continue; val |= entry->set_bits; @@ -239,6 +235,52 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) } } + if (count_rmw) { + /* emit MI_MATH for each RMW reg */ + + xa_for_each(&sr->xa, idx, entry) { + if (entry->reg.masked || entry->clr_bits == ~0) + continue; + + bb->cs[bb->len++] = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO; + bb->cs[bb->len++] = entry->reg.addr; + bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr; + + bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | + MI_LRI_LRM_CS_MMIO; + bb->cs[bb->len++] = CS_GPR_REG(0, 1).addr; + bb->cs[bb->len++] = entry->clr_bits; + bb->cs[bb->len++] = CS_GPR_REG(0, 2).addr; + bb->cs[bb->len++] = entry->set_bits; + + bb->cs[bb->len++] = MI_MATH(8); + bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCA, REG0); + bb->cs[bb->len++] = CS_ALU_INSTR_LOADINV(SRCB, REG1); + bb->cs[bb->len++] = CS_ALU_INSTR_AND; + bb->cs[bb->len++] = CS_ALU_INSTR_STORE(REG0, ACCU); + bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCA, REG0); + bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCB, REG2); + bb->cs[bb->len++] = CS_ALU_INSTR_OR; + bb->cs[bb->len++] = CS_ALU_INSTR_STORE(REG0, ACCU); + + bb->cs[bb->len++] = MI_LOAD_REGISTER_REG | MI_LRR_SRC_CS_MMIO; + bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr; + bb->cs[bb->len++] = entry->reg.addr; + + xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x\n", + entry->reg.addr, entry->clr_bits, entry->set_bits); + } + + /* reset used GPR */ + bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(3) | MI_LRI_LRM_CS_MMIO; + bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr; + bb->cs[bb->len++] = 0; + bb->cs[bb->len++] = CS_GPR_REG(0, 1).addr; + bb->cs[bb->len++] = 0; + bb->cs[bb->len++] = CS_GPR_REG(0, 2).addr; + bb->cs[bb->len++] = 0; + } + xe_lrc_emit_hwe_state_instructions(q, bb); job = xe_bb_create_job(q, bb); diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index f7005a3643e6..119a55bb7580 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -300,20 +300,20 @@ static int hwconfig(struct xe_gt *gt, struct drm_printer *p) return 0; } -static const struct drm_info_list debugfs_list[] = { - {"hw_engines", .show = xe_gt_debugfs_simple_show, .data = hw_engines}, +/* + * only for GT debugfs files which can be safely used on the VF as well: + * - without access to the GT privileged registers + * - without access to the PF specific data + */ +static const struct drm_info_list vf_safe_debugfs_list[] = { {"force_reset", .show = xe_gt_debugfs_simple_show, .data = force_reset}, {"force_reset_sync", .show = xe_gt_debugfs_simple_show, .data = force_reset_sync}, {"sa_info", .show = xe_gt_debugfs_simple_show, .data = sa_info}, {"topology", .show = xe_gt_debugfs_simple_show, .data = topology}, - {"steering", .show = xe_gt_debugfs_simple_show, .data = steering}, {"ggtt", .show = xe_gt_debugfs_simple_show, .data = ggtt}, - {"powergate_info", .show = xe_gt_debugfs_simple_show, .data = powergate_info}, {"register-save-restore", .show = xe_gt_debugfs_simple_show, .data = register_save_restore}, {"workarounds", .show = xe_gt_debugfs_simple_show, .data = workarounds}, {"tunings", .show = xe_gt_debugfs_simple_show, .data = tunings}, - {"pat", .show = xe_gt_debugfs_simple_show, .data = pat}, - {"mocs", .show = xe_gt_debugfs_simple_show, .data = mocs}, {"default_lrc_rcs", .show = xe_gt_debugfs_simple_show, .data = rcs_default_lrc}, {"default_lrc_ccs", .show = xe_gt_debugfs_simple_show, .data = ccs_default_lrc}, {"default_lrc_bcs", .show = xe_gt_debugfs_simple_show, .data = bcs_default_lrc}, @@ -323,6 +323,15 @@ static const struct drm_info_list debugfs_list[] = { {"hwconfig", .show = xe_gt_debugfs_simple_show, .data = hwconfig}, }; +/* everything else should be added here */ +static const struct drm_info_list pf_only_debugfs_list[] = { + {"hw_engines", .show = xe_gt_debugfs_simple_show, .data = hw_engines}, + {"mocs", .show = xe_gt_debugfs_simple_show, .data = mocs}, + {"pat", .show = xe_gt_debugfs_simple_show, .data = pat}, + {"powergate_info", .show = xe_gt_debugfs_simple_show, .data = powergate_info}, + {"steering", .show = xe_gt_debugfs_simple_show, .data = steering}, +}; + void xe_gt_debugfs_register(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); @@ -346,10 +355,15 @@ void xe_gt_debugfs_register(struct xe_gt *gt) */ root->d_inode->i_private = gt; - drm_debugfs_create_files(debugfs_list, - ARRAY_SIZE(debugfs_list), + drm_debugfs_create_files(vf_safe_debugfs_list, + ARRAY_SIZE(vf_safe_debugfs_list), root, minor); + if (!IS_SRIOV_VF(xe)) + drm_debugfs_create_files(pf_only_debugfs_list, + ARRAY_SIZE(pf_only_debugfs_list), + root, minor); + xe_uc_debugfs_register(>->uc, root); if (IS_SRIOV_PF(xe)) diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index 604bdc7c8173..868a5d2c1a52 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -56,9 +56,10 @@ dev_to_xe(struct device *dev) return gt_to_xe(kobj_to_gt(dev->kobj.parent)); } -static ssize_t act_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t act_freq_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; @@ -68,11 +69,12 @@ static ssize_t act_freq_show(struct device *dev, return sysfs_emit(buf, "%d\n", freq); } -static DEVICE_ATTR_RO(act_freq); +static struct kobj_attribute attr_act_freq = __ATTR_RO(act_freq); -static ssize_t cur_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t cur_freq_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; ssize_t ret; @@ -85,11 +87,12 @@ static ssize_t cur_freq_show(struct device *dev, return sysfs_emit(buf, "%d\n", freq); } -static DEVICE_ATTR_RO(cur_freq); +static struct kobj_attribute attr_cur_freq = __ATTR_RO(cur_freq); -static ssize_t rp0_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t rp0_freq_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; @@ -99,11 +102,12 @@ static ssize_t rp0_freq_show(struct device *dev, return sysfs_emit(buf, "%d\n", freq); } -static DEVICE_ATTR_RO(rp0_freq); +static struct kobj_attribute attr_rp0_freq = __ATTR_RO(rp0_freq); -static ssize_t rpe_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t rpe_freq_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; @@ -113,11 +117,12 @@ static ssize_t rpe_freq_show(struct device *dev, return sysfs_emit(buf, "%d\n", freq); } -static DEVICE_ATTR_RO(rpe_freq); +static struct kobj_attribute attr_rpe_freq = __ATTR_RO(rpe_freq); -static ssize_t rpa_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t rpa_freq_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; @@ -127,20 +132,22 @@ static ssize_t rpa_freq_show(struct device *dev, return sysfs_emit(buf, "%d\n", freq); } -static DEVICE_ATTR_RO(rpa_freq); +static struct kobj_attribute attr_rpa_freq = __ATTR_RO(rpa_freq); -static ssize_t rpn_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t rpn_freq_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rpn_freq(pc)); } -static DEVICE_ATTR_RO(rpn_freq); +static struct kobj_attribute attr_rpn_freq = __ATTR_RO(rpn_freq); -static ssize_t min_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t min_freq_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; ssize_t ret; @@ -154,9 +161,10 @@ static ssize_t min_freq_show(struct device *dev, return sysfs_emit(buf, "%d\n", freq); } -static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr, - const char *buff, size_t count) +static ssize_t min_freq_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buff, size_t count) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; ssize_t ret; @@ -173,11 +181,12 @@ static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr, return count; } -static DEVICE_ATTR_RW(min_freq); +static struct kobj_attribute attr_min_freq = __ATTR_RW(min_freq); -static ssize_t max_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t max_freq_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; ssize_t ret; @@ -191,9 +200,10 @@ static ssize_t max_freq_show(struct device *dev, return sysfs_emit(buf, "%d\n", freq); } -static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr, - const char *buff, size_t count) +static ssize_t max_freq_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buff, size_t count) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; ssize_t ret; @@ -210,17 +220,17 @@ static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr, return count; } -static DEVICE_ATTR_RW(max_freq); +static struct kobj_attribute attr_max_freq = __ATTR_RW(max_freq); static const struct attribute *freq_attrs[] = { - &dev_attr_act_freq.attr, - &dev_attr_cur_freq.attr, - &dev_attr_rp0_freq.attr, - &dev_attr_rpa_freq.attr, - &dev_attr_rpe_freq.attr, - &dev_attr_rpn_freq.attr, - &dev_attr_min_freq.attr, - &dev_attr_max_freq.attr, + &attr_act_freq.attr, + &attr_cur_freq.attr, + &attr_rp0_freq.attr, + &attr_rpa_freq.attr, + &attr_rpe_freq.attr, + &attr_rpn_freq.attr, + &attr_min_freq.attr, + &attr_max_freq.attr, NULL }; diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index fbbace7b0b12..c11206410a4d 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -249,9 +249,10 @@ int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p) return 0; } -static ssize_t name_show(struct device *dev, - struct device_attribute *attr, char *buff) +static ssize_t name_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt_idle *gtidle = dev_to_gtidle(dev); struct xe_guc_pc *pc = gtidle_to_pc(gtidle); ssize_t ret; @@ -262,11 +263,12 @@ static ssize_t name_show(struct device *dev, return ret; } -static DEVICE_ATTR_RO(name); +static struct kobj_attribute name_attr = __ATTR_RO(name); -static ssize_t idle_status_show(struct device *dev, - struct device_attribute *attr, char *buff) +static ssize_t idle_status_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt_idle *gtidle = dev_to_gtidle(dev); struct xe_guc_pc *pc = gtidle_to_pc(gtidle); enum xe_gt_idle_state state; @@ -277,6 +279,7 @@ static ssize_t idle_status_show(struct device *dev, return sysfs_emit(buff, "%s\n", gt_idle_state_to_string(state)); } +static struct kobj_attribute idle_status_attr = __ATTR_RO(idle_status); u64 xe_gt_idle_residency_msec(struct xe_gt_idle *gtidle) { @@ -291,10 +294,11 @@ u64 xe_gt_idle_residency_msec(struct xe_gt_idle *gtidle) return residency; } -static DEVICE_ATTR_RO(idle_status); -static ssize_t idle_residency_ms_show(struct device *dev, - struct device_attribute *attr, char *buff) + +static ssize_t idle_residency_ms_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt_idle *gtidle = dev_to_gtidle(dev); struct xe_guc_pc *pc = gtidle_to_pc(gtidle); u64 residency; @@ -305,12 +309,12 @@ static ssize_t idle_residency_ms_show(struct device *dev, return sysfs_emit(buff, "%llu\n", residency); } -static DEVICE_ATTR_RO(idle_residency_ms); +static struct kobj_attribute idle_residency_attr = __ATTR_RO(idle_residency_ms); static const struct attribute *gt_idle_attrs[] = { - &dev_attr_name.attr, - &dev_attr_idle_status.attr, - &dev_attr_idle_residency_ms.attr, + &name_attr.attr, + &idle_status_attr.attr, + &idle_residency_attr.attr, NULL, }; diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 605aad3554e7..d4d9730f0d2c 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -345,7 +345,8 @@ fallback: * Some older platforms don't have tables or don't have complete tables. * Newer platforms should always have the required info. */ - if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 2000) + if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 2000 && + !gt_to_xe(gt)->info.force_execlist) xe_gt_err(gt, "Slice/Subslice counts missing from hwconfig table; using typical fallback values\n"); if (gt_to_xe(gt)->info.platform == XE_PVC) diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 0c22b3a36655..10622ca471a2 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -240,7 +240,7 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) atomic = access_is_atomic(pf->access_type); if (xe_vma_is_cpu_addr_mirror(vma)) - err = xe_svm_handle_pagefault(vm, vma, gt_to_tile(gt), + err = xe_svm_handle_pagefault(vm, vma, gt, pf->page_addr, atomic); else err = handle_vma_pagefault(gt, vma, atomic); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 10be109bf357..2420a548cacc 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -1444,15 +1444,23 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) return 0; xe_gt_assert(gt, pf_get_lmem_alignment(gt) == SZ_2M); - bo = xe_bo_create_pin_map(xe, tile, NULL, - ALIGN(size, PAGE_SIZE), - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_NEEDS_2M | - XE_BO_FLAG_PINNED); + bo = xe_bo_create_locked(xe, tile, NULL, + ALIGN(size, PAGE_SIZE), + ttm_bo_type_kernel, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_NEEDS_2M | + XE_BO_FLAG_PINNED | + XE_BO_FLAG_PINNED_LATE_RESTORE); if (IS_ERR(bo)) return PTR_ERR(bo); + err = xe_bo_pin(bo); + xe_bo_unlock(bo); + if (unlikely(err)) { + xe_bo_put(bo); + return err; + } + config->lmem_obj = bo; if (xe_device_has_lmtt(xe)) { diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c index b2521dd6ec42..0fe47f41b63c 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c @@ -51,27 +51,18 @@ static unsigned int extract_vfid(struct dentry *d) * /sys/kernel/debug/dri/0/ * ├── gt0 * │ ├── pf - * │ │ ├── ggtt_available - * │ │ ├── ggtt_provisioned * │ │ ├── contexts_provisioned * │ │ ├── doorbells_provisioned * │ │ ├── runtime_registers * │ │ ├── negotiated_versions * │ │ ├── adverse_events + * ├── gt1 + * │ ├── pf + * │ │ ├── ... */ static const struct drm_info_list pf_info[] = { { - "ggtt_available", - .show = xe_gt_debugfs_simple_show, - .data = xe_gt_sriov_pf_config_print_available_ggtt, - }, - { - "ggtt_provisioned", - .show = xe_gt_debugfs_simple_show, - .data = xe_gt_sriov_pf_config_print_ggtt, - }, - { "contexts_provisioned", .show = xe_gt_debugfs_simple_show, .data = xe_gt_sriov_pf_config_print_ctxs, @@ -82,11 +73,6 @@ static const struct drm_info_list pf_info[] = { .data = xe_gt_sriov_pf_config_print_dbs, }, { - "lmem_provisioned", - .show = xe_gt_debugfs_simple_show, - .data = xe_gt_sriov_pf_config_print_lmem, - }, - { "runtime_registers", .show = xe_gt_debugfs_simple_show, .data = xe_gt_sriov_pf_service_print_runtime, @@ -107,6 +93,42 @@ static const struct drm_info_list pf_info[] = { * /sys/kernel/debug/dri/0/ * ├── gt0 * │ ├── pf + * │ │ ├── ggtt_available + * │ │ ├── ggtt_provisioned + */ + +static const struct drm_info_list pf_ggtt_info[] = { + { + "ggtt_available", + .show = xe_gt_debugfs_simple_show, + .data = xe_gt_sriov_pf_config_print_available_ggtt, + }, + { + "ggtt_provisioned", + .show = xe_gt_debugfs_simple_show, + .data = xe_gt_sriov_pf_config_print_ggtt, + }, +}; + +/* + * /sys/kernel/debug/dri/0/ + * ├── gt0 + * │ ├── pf + * │ │ ├── lmem_provisioned + */ + +static const struct drm_info_list pf_lmem_info[] = { + { + "lmem_provisioned", + .show = xe_gt_debugfs_simple_show, + .data = xe_gt_sriov_pf_config_print_lmem, + }, +}; + +/* + * /sys/kernel/debug/dri/0/ + * ├── gt0 + * │ ├── pf * │ │ ├── reset_engine * │ │ ├── sample_period * │ │ ├── sched_if_idle @@ -532,6 +554,16 @@ void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root) pfdentry->d_inode->i_private = gt; drm_debugfs_create_files(pf_info, ARRAY_SIZE(pf_info), pfdentry, minor); + if (!xe_gt_is_media_type(gt)) { + drm_debugfs_create_files(pf_ggtt_info, + ARRAY_SIZE(pf_ggtt_info), + pfdentry, minor); + if (IS_DGFX(gt_to_xe(gt))) + drm_debugfs_create_files(pf_lmem_info, + ARRAY_SIZE(pf_lmem_info), + pfdentry, minor); + } + pf_add_policy_attrs(gt, pfdentry); pf_add_config_attrs(gt, pfdentry, PFID); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c index 4efde5f46b43..821cfcc34e6b 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c @@ -112,7 +112,6 @@ static const struct xe_reg tgl_runtime_regs[] = { XELP_GT_SLICE_ENABLE, /* _MMIO(0x9138) */ XELP_GT_GEOMETRY_DSS_ENABLE, /* _MMIO(0x913c) */ GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */ - CTC_MODE, /* _MMIO(0xa26c) */ HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ }; @@ -124,7 +123,6 @@ static const struct xe_reg ats_m_runtime_regs[] = { XELP_GT_GEOMETRY_DSS_ENABLE, /* _MMIO(0x913c) */ GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */ XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */ - CTC_MODE, /* _MMIO(0xa26c) */ HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ }; @@ -136,7 +134,6 @@ static const struct xe_reg pvc_runtime_regs[] = { GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */ XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */ XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */ - CTC_MODE, /* _MMIO(0xA26C) */ HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ }; @@ -150,7 +147,6 @@ static const struct xe_reg ver_1270_runtime_regs[] = { GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */ XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */ XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */ - CTC_MODE, /* _MMIO(0xa26c) */ HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ }; @@ -167,7 +163,6 @@ static const struct xe_reg ver_2000_runtime_regs[] = { XE2_GT_COMPUTE_DSS_2, /* _MMIO(0x914c) */ XE2_GT_GEOMETRY_DSS_1, /* _MMIO(0x9150) */ XE2_GT_GEOMETRY_DSS_2, /* _MMIO(0x9154) */ - CTC_MODE, /* _MMIO(0xa26c) */ HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ }; @@ -185,7 +180,6 @@ static const struct xe_reg ver_3000_runtime_regs[] = { XE2_GT_COMPUTE_DSS_2, /* _MMIO(0x914c) */ XE2_GT_GEOMETRY_DSS_1, /* _MMIO(0x9150) */ XE2_GT_GEOMETRY_DSS_2, /* _MMIO(0x9154) */ - CTC_MODE, /* _MMIO(0xa26c) */ HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ }; diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c index 6155ea354432..30f942671c2b 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.c +++ b/drivers/gpu/drm/xe/xe_gt_stats.c @@ -27,6 +27,7 @@ void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr) } static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = { + "svm_pagefault_count", "tlb_inval_count", "vma_pagefault_count", "vma_pagefault_kb", diff --git a/drivers/gpu/drm/xe/xe_gt_stats_types.h b/drivers/gpu/drm/xe/xe_gt_stats_types.h index d556771f99d6..be3244d7133c 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats_types.h +++ b/drivers/gpu/drm/xe/xe_gt_stats_types.h @@ -7,6 +7,7 @@ #define _XE_GT_STATS_TYPES_H_ enum xe_gt_stats_id { + XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT, XE_GT_STATS_ID_TLB_INVAL, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, XE_GT_STATS_ID_VMA_PAGEFAULT_KB, diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c index 8db78d616b6f..aa962c783cdf 100644 --- a/drivers/gpu/drm/xe/xe_gt_throttle.c +++ b/drivers/gpu/drm/xe/xe_gt_throttle.c @@ -114,115 +114,115 @@ static u32 read_reason_vr_tdc(struct xe_gt *gt) return tdc; } -static ssize_t status_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static ssize_t status_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt *gt = dev_to_gt(dev); bool status = !!read_status(gt); return sysfs_emit(buff, "%u\n", status); } -static DEVICE_ATTR_RO(status); +static struct kobj_attribute attr_status = __ATTR_RO(status); -static ssize_t reason_pl1_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static ssize_t reason_pl1_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt *gt = dev_to_gt(dev); bool pl1 = !!read_reason_pl1(gt); return sysfs_emit(buff, "%u\n", pl1); } -static DEVICE_ATTR_RO(reason_pl1); +static struct kobj_attribute attr_reason_pl1 = __ATTR_RO(reason_pl1); -static ssize_t reason_pl2_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static ssize_t reason_pl2_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt *gt = dev_to_gt(dev); bool pl2 = !!read_reason_pl2(gt); return sysfs_emit(buff, "%u\n", pl2); } -static DEVICE_ATTR_RO(reason_pl2); +static struct kobj_attribute attr_reason_pl2 = __ATTR_RO(reason_pl2); -static ssize_t reason_pl4_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static ssize_t reason_pl4_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt *gt = dev_to_gt(dev); bool pl4 = !!read_reason_pl4(gt); return sysfs_emit(buff, "%u\n", pl4); } -static DEVICE_ATTR_RO(reason_pl4); +static struct kobj_attribute attr_reason_pl4 = __ATTR_RO(reason_pl4); -static ssize_t reason_thermal_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static ssize_t reason_thermal_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt *gt = dev_to_gt(dev); bool thermal = !!read_reason_thermal(gt); return sysfs_emit(buff, "%u\n", thermal); } -static DEVICE_ATTR_RO(reason_thermal); +static struct kobj_attribute attr_reason_thermal = __ATTR_RO(reason_thermal); -static ssize_t reason_prochot_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static ssize_t reason_prochot_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt *gt = dev_to_gt(dev); bool prochot = !!read_reason_prochot(gt); return sysfs_emit(buff, "%u\n", prochot); } -static DEVICE_ATTR_RO(reason_prochot); +static struct kobj_attribute attr_reason_prochot = __ATTR_RO(reason_prochot); -static ssize_t reason_ratl_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static ssize_t reason_ratl_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt *gt = dev_to_gt(dev); bool ratl = !!read_reason_ratl(gt); return sysfs_emit(buff, "%u\n", ratl); } -static DEVICE_ATTR_RO(reason_ratl); +static struct kobj_attribute attr_reason_ratl = __ATTR_RO(reason_ratl); -static ssize_t reason_vr_thermalert_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static ssize_t reason_vr_thermalert_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt *gt = dev_to_gt(dev); bool thermalert = !!read_reason_vr_thermalert(gt); return sysfs_emit(buff, "%u\n", thermalert); } -static DEVICE_ATTR_RO(reason_vr_thermalert); +static struct kobj_attribute attr_reason_vr_thermalert = __ATTR_RO(reason_vr_thermalert); -static ssize_t reason_vr_tdc_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static ssize_t reason_vr_tdc_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt *gt = dev_to_gt(dev); bool tdc = !!read_reason_vr_tdc(gt); return sysfs_emit(buff, "%u\n", tdc); } -static DEVICE_ATTR_RO(reason_vr_tdc); +static struct kobj_attribute attr_reason_vr_tdc = __ATTR_RO(reason_vr_tdc); static struct attribute *throttle_attrs[] = { - &dev_attr_status.attr, - &dev_attr_reason_pl1.attr, - &dev_attr_reason_pl2.attr, - &dev_attr_reason_pl4.attr, - &dev_attr_reason_thermal.attr, - &dev_attr_reason_prochot.attr, - &dev_attr_reason_ratl.attr, - &dev_attr_reason_vr_thermalert.attr, - &dev_attr_reason_vr_tdc.attr, + &attr_status.attr, + &attr_reason_pl1.attr, + &attr_reason_pl2.attr, + &attr_reason_pl4.attr, + &attr_reason_thermal.attr, + &attr_reason_prochot.attr, + &attr_reason_ratl.attr, + &attr_reason_vr_thermalert.attr, + &attr_reason_vr_tdc.attr, NULL }; diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index bc5714a5b36b..bac5471a1a78 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -483,7 +483,8 @@ static int guc_g2g_alloc(struct xe_guc *guc) XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_ALL | - XE_BO_FLAG_GGTT_INVALIDATE); + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_PINNED_NORESTORE); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -1393,6 +1394,7 @@ proto: /* Use data from the GuC response as our return value */ return FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, header); } +ALLOW_ERROR_INJECTION(xe_guc_mmio_send_recv, ERRNO); int xe_guc_mmio_send(struct xe_guc *guc, const u32 *request, u32 len) { @@ -1508,30 +1510,32 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p) xe_uc_fw_print(&guc->fw, p); - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) - return; + if (!IS_SRIOV_VF(gt_to_xe(gt))) { + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return; + + status = xe_mmio_read32(>->mmio, GUC_STATUS); + + drm_printf(p, "\nGuC status 0x%08x:\n", status); + drm_printf(p, "\tBootrom status = 0x%x\n", + REG_FIELD_GET(GS_BOOTROM_MASK, status)); + drm_printf(p, "\tuKernel status = 0x%x\n", + REG_FIELD_GET(GS_UKERNEL_MASK, status)); + drm_printf(p, "\tMIA Core status = 0x%x\n", + REG_FIELD_GET(GS_MIA_MASK, status)); + drm_printf(p, "\tLog level = %d\n", + xe_guc_log_get_level(&guc->log)); + + drm_puts(p, "\nScratch registers:\n"); + for (i = 0; i < SOFT_SCRATCH_COUNT; i++) { + drm_printf(p, "\t%2d: \t0x%x\n", + i, xe_mmio_read32(>->mmio, SOFT_SCRATCH(i))); + } - status = xe_mmio_read32(>->mmio, GUC_STATUS); - - drm_printf(p, "\nGuC status 0x%08x:\n", status); - drm_printf(p, "\tBootrom status = 0x%x\n", - REG_FIELD_GET(GS_BOOTROM_MASK, status)); - drm_printf(p, "\tuKernel status = 0x%x\n", - REG_FIELD_GET(GS_UKERNEL_MASK, status)); - drm_printf(p, "\tMIA Core status = 0x%x\n", - REG_FIELD_GET(GS_MIA_MASK, status)); - drm_printf(p, "\tLog level = %d\n", - xe_guc_log_get_level(&guc->log)); - - drm_puts(p, "\nScratch registers:\n"); - for (i = 0; i < SOFT_SCRATCH_COUNT; i++) { - drm_printf(p, "\t%2d: \t0x%x\n", - i, xe_mmio_read32(>->mmio, SOFT_SCRATCH(i))); + xe_force_wake_put(gt_to_fw(gt), fw_ref); } - xe_force_wake_put(gt_to_fw(gt), fw_ref); - drm_puts(p, "\n"); xe_guc_ct_print(&guc->ct, p, false); diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 7031542a70ce..44c1fa2fe7c8 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -376,6 +376,11 @@ static void guc_waklv_init(struct xe_guc_ads *ads) GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET, &offset, &remain); + if (GUC_FIRMWARE_VER(>->uc.guc) >= MAKE_GUC_VER(70, 44, 0) && XE_WA(gt, 16026508708)) + guc_waklv_enable_simple(ads, + GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH, + &offset, &remain); + size = guc_ads_waklv_size(ads) - remain; if (!size) return; @@ -414,7 +419,8 @@ int xe_guc_ads_init(struct xe_guc_ads *ads) bo = xe_managed_bo_create_pin_map(xe, tile, guc_ads_size(ads) + MAX_GOLDEN_LRC_SIZE, XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_PINNED_NORESTORE); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -710,8 +716,8 @@ static int guc_capture_prep_lists(struct xe_guc_ads *ads) } if (ads->capture_size != PAGE_ALIGN(total_size)) - xe_gt_dbg(gt, "ADS capture alloc size changed from %d to %d\n", - ads->capture_size, PAGE_ALIGN(total_size)); + xe_gt_dbg(gt, "Updated ADS capture size %d (was %d)\n", + PAGE_ALIGN(total_size), ads->capture_size); return PAGE_ALIGN(total_size); } diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index 9095618648bc..859a3ba91be5 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -105,49 +105,49 @@ struct __guc_capture_parsed_output { * 3. Incorrect order will trigger XE_WARN. */ #define COMMON_XELP_BASE_GLOBAL \ - { FORCEWAKE_GT, REG_32BIT, 0, 0, "FORCEWAKE_GT"} + { FORCEWAKE_GT, REG_32BIT, 0, 0, 0, "FORCEWAKE_GT"} #define COMMON_BASE_ENGINE_INSTANCE \ - { RING_HWSTAM(0), REG_32BIT, 0, 0, "HWSTAM"}, \ - { RING_HWS_PGA(0), REG_32BIT, 0, 0, "RING_HWS_PGA"}, \ - { RING_HEAD(0), REG_32BIT, 0, 0, "RING_HEAD"}, \ - { RING_TAIL(0), REG_32BIT, 0, 0, "RING_TAIL"}, \ - { RING_CTL(0), REG_32BIT, 0, 0, "RING_CTL"}, \ - { RING_MI_MODE(0), REG_32BIT, 0, 0, "RING_MI_MODE"}, \ - { RING_MODE(0), REG_32BIT, 0, 0, "RING_MODE"}, \ - { RING_ESR(0), REG_32BIT, 0, 0, "RING_ESR"}, \ - { RING_EMR(0), REG_32BIT, 0, 0, "RING_EMR"}, \ - { RING_EIR(0), REG_32BIT, 0, 0, "RING_EIR"}, \ - { RING_IMR(0), REG_32BIT, 0, 0, "RING_IMR"}, \ - { RING_IPEHR(0), REG_32BIT, 0, 0, "IPEHR"}, \ - { RING_INSTDONE(0), REG_32BIT, 0, 0, "RING_INSTDONE"}, \ - { INDIRECT_RING_STATE(0), REG_32BIT, 0, 0, "INDIRECT_RING_STATE"}, \ - { RING_ACTHD(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \ - { RING_ACTHD_UDW(0), REG_64BIT_HI_DW, 0, 0, "ACTHD"}, \ - { RING_BBADDR(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \ - { RING_BBADDR_UDW(0), REG_64BIT_HI_DW, 0, 0, "RING_BBADDR"}, \ - { RING_START(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \ - { RING_START_UDW(0), REG_64BIT_HI_DW, 0, 0, "RING_START"}, \ - { RING_DMA_FADD(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \ - { RING_DMA_FADD_UDW(0), REG_64BIT_HI_DW, 0, 0, "RING_DMA_FADD"}, \ - { RING_EXECLIST_STATUS_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \ - { RING_EXECLIST_STATUS_HI(0), REG_64BIT_HI_DW, 0, 0, "RING_EXECLIST_STATUS"}, \ - { RING_EXECLIST_SQ_CONTENTS_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \ - { RING_EXECLIST_SQ_CONTENTS_HI(0), REG_64BIT_HI_DW, 0, 0, "RING_EXECLIST_SQ_CONTENTS"} + { RING_HWSTAM(0), REG_32BIT, 0, 0, 0, "HWSTAM"}, \ + { RING_HWS_PGA(0), REG_32BIT, 0, 0, 0, "RING_HWS_PGA"}, \ + { RING_HEAD(0), REG_32BIT, 0, 0, 0, "RING_HEAD"}, \ + { RING_TAIL(0), REG_32BIT, 0, 0, 0, "RING_TAIL"}, \ + { RING_CTL(0), REG_32BIT, 0, 0, 0, "RING_CTL"}, \ + { RING_MI_MODE(0), REG_32BIT, 0, 0, 0, "RING_MI_MODE"}, \ + { RING_MODE(0), REG_32BIT, 0, 0, 0, "RING_MODE"}, \ + { RING_ESR(0), REG_32BIT, 0, 0, 0, "RING_ESR"}, \ + { RING_EMR(0), REG_32BIT, 0, 0, 0, "RING_EMR"}, \ + { RING_EIR(0), REG_32BIT, 0, 0, 0, "RING_EIR"}, \ + { RING_IMR(0), REG_32BIT, 0, 0, 0, "RING_IMR"}, \ + { RING_IPEHR(0), REG_32BIT, 0, 0, 0, "IPEHR"}, \ + { RING_INSTDONE(0), REG_32BIT, 0, 0, 0, "RING_INSTDONE"}, \ + { INDIRECT_RING_STATE(0), REG_32BIT, 0, 0, 0, "INDIRECT_RING_STATE"}, \ + { RING_ACTHD(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \ + { RING_ACTHD_UDW(0), REG_64BIT_HI_DW, 0, 0, 0, "ACTHD"}, \ + { RING_BBADDR(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \ + { RING_BBADDR_UDW(0), REG_64BIT_HI_DW, 0, 0, 0, "RING_BBADDR"}, \ + { RING_START(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \ + { RING_START_UDW(0), REG_64BIT_HI_DW, 0, 0, 0, "RING_START"}, \ + { RING_DMA_FADD(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \ + { RING_DMA_FADD_UDW(0), REG_64BIT_HI_DW, 0, 0, 0, "RING_DMA_FADD"}, \ + { RING_EXECLIST_STATUS_LO(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \ + { RING_EXECLIST_STATUS_HI(0), REG_64BIT_HI_DW, 0, 0, 0, "RING_EXECLIST_STATUS"}, \ + { RING_EXECLIST_SQ_CONTENTS_LO(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \ + { RING_EXECLIST_SQ_CONTENTS_HI(0), REG_64BIT_HI_DW, 0, 0, 0, "RING_EXECLIST_SQ_CONTENTS"} #define COMMON_XELP_RC_CLASS \ - { RCU_MODE, REG_32BIT, 0, 0, "RCU_MODE"} + { RCU_MODE, REG_32BIT, 0, 0, 0, "RCU_MODE"} #define COMMON_XELP_RC_CLASS_INSTDONE \ - { SC_INSTDONE, REG_32BIT, 0, 0, "SC_INSTDONE"}, \ - { SC_INSTDONE_EXTRA, REG_32BIT, 0, 0, "SC_INSTDONE_EXTRA"}, \ - { SC_INSTDONE_EXTRA2, REG_32BIT, 0, 0, "SC_INSTDONE_EXTRA2"} + { SC_INSTDONE, REG_32BIT, 0, 0, 0, "SC_INSTDONE"}, \ + { SC_INSTDONE_EXTRA, REG_32BIT, 0, 0, 0, "SC_INSTDONE_EXTRA"}, \ + { SC_INSTDONE_EXTRA2, REG_32BIT, 0, 0, 0, "SC_INSTDONE_EXTRA2"} #define XELP_VEC_CLASS_REGS \ - { SFC_DONE(0), 0, 0, 0, "SFC_DONE[0]"}, \ - { SFC_DONE(1), 0, 0, 0, "SFC_DONE[1]"}, \ - { SFC_DONE(2), 0, 0, 0, "SFC_DONE[2]"}, \ - { SFC_DONE(3), 0, 0, 0, "SFC_DONE[3]"} + { SFC_DONE(0), 0, 0, 0, 0, "SFC_DONE[0]"}, \ + { SFC_DONE(1), 0, 0, 0, 0, "SFC_DONE[1]"}, \ + { SFC_DONE(2), 0, 0, 0, 0, "SFC_DONE[2]"}, \ + { SFC_DONE(3), 0, 0, 0, 0, "SFC_DONE[3]"} /* XE_LP Global */ static const struct __guc_mmio_reg_descr xe_lp_global_regs[] = { @@ -352,7 +352,7 @@ static const struct __ext_steer_reg xehpg_extregs[] = { static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext, const struct __ext_steer_reg *extlist, - int slice_id, int subslice_id) + u32 dss_id, u16 slice_id, u16 subslice_id) { if (!ext || !extlist) return; @@ -361,6 +361,7 @@ static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext, ext->flags = FIELD_PREP(GUC_REGSET_STEERING_NEEDED, 1); ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id); ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id); + ext->dss_id = dss_id; ext->regname = extlist->name; } @@ -397,7 +398,7 @@ static void guc_capture_alloc_steered_lists(struct xe_guc *guc) { struct xe_gt *gt = guc_to_gt(guc); u16 slice, subslice; - int iter, i, total = 0; + int dss, i, total = 0; const struct __guc_mmio_reg_descr_group *lists = guc->capture->reglists; const struct __guc_mmio_reg_descr_group *list; struct __guc_mmio_reg_descr_group *extlists; @@ -454,15 +455,15 @@ static void guc_capture_alloc_steered_lists(struct xe_guc *guc) /* For steering registers, the list is generated at run-time */ extarray = (struct __guc_mmio_reg_descr *)extlists[0].list; - for_each_dss_steering(iter, gt, slice, subslice) { + for_each_dss_steering(dss, gt, slice, subslice) { for (i = 0; i < ARRAY_SIZE(xe_extregs); ++i) { - __fill_ext_reg(extarray, &xe_extregs[i], slice, subslice); + __fill_ext_reg(extarray, &xe_extregs[i], dss, slice, subslice); ++extarray; } if (has_xehpg_extregs) for (i = 0; i < ARRAY_SIZE(xehpg_extregs); ++i) { - __fill_ext_reg(extarray, &xehpg_extregs[i], slice, subslice); + __fill_ext_reg(extarray, &xehpg_extregs[i], dss, slice, subslice); ++extarray; } } @@ -1672,18 +1673,16 @@ snapshot_print_by_list_order(struct xe_hw_engine_snapshot *snapshot, struct drm_ { struct xe_gt *gt = snapshot->hwe->gt; struct xe_device *xe = gt_to_xe(gt); - struct xe_guc *guc = >->uc.guc; struct xe_devcoredump *devcoredump = &xe->devcoredump; struct xe_devcoredump_snapshot *devcore_snapshot = &devcoredump->snapshot; struct gcap_reg_list_info *reginfo = NULL; u32 i, last_value = 0; - bool is_ext, low32_ready = false; + bool low32_ready = false; if (!list || !list->list || list->num_regs == 0) return; XE_WARN_ON(!devcore_snapshot->matched_node); - is_ext = list == guc->capture->extlists; reginfo = &devcore_snapshot->matched_node->reginfo[type]; /* @@ -1749,17 +1748,12 @@ snapshot_print_by_list_order(struct xe_hw_engine_snapshot *snapshot, struct drm_ */ XE_WARN_ON(low32_ready); - if (is_ext) { - int dss, group, instance; - - group = FIELD_GET(GUC_REGSET_STEERING_GROUP, reg_desc->flags); - instance = FIELD_GET(GUC_REGSET_STEERING_INSTANCE, reg_desc->flags); - dss = xe_gt_mcr_steering_info_to_dss_id(gt, group, instance); - - drm_printf(p, "\t%s[%u]: 0x%08x\n", reg_desc->regname, dss, value); - } else { + if (FIELD_GET(GUC_REGSET_STEERING_NEEDED, reg_desc->flags)) + drm_printf(p, "\t%s[%u]: 0x%08x\n", reg_desc->regname, + reg_desc->dss_id, value); + else drm_printf(p, "\t%s: 0x%08x\n", reg_desc->regname, value); - } + break; } } diff --git a/drivers/gpu/drm/xe/xe_guc_capture_types.h b/drivers/gpu/drm/xe/xe_guc_capture_types.h index ca2d390ccbee..6cb439115597 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture_types.h +++ b/drivers/gpu/drm/xe/xe_guc_capture_types.h @@ -39,6 +39,8 @@ struct __guc_mmio_reg_descr { u32 flags; /** @mask: The mask to apply */ u32 mask; + /** @dss_id: Cached index for steered registers */ + u32 dss_id; /** @regname: Name of the register */ const char *regname; }; diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 72ad576fc18e..2447de0ebedf 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -238,7 +238,8 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(), XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_PINNED_NORESTORE); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -1088,6 +1089,7 @@ int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len, KUNIT_STATIC_STUB_REDIRECT(xe_guc_ct_send_recv, ct, action, len, response_buffer); return guc_ct_send_recv(ct, action, len, response_buffer, false); } +ALLOW_ERROR_INJECTION(xe_guc_ct_send_recv, ERRNO); int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action, u32 len, u32 *response_buffer) @@ -1828,10 +1830,10 @@ static void ct_dead_print(struct xe_dead_ct *dead) return; } - drm_printf(&lp, "CTB is dead - reason=0x%X\n", dead->reason); /* Can't generate a genuine core dump at this point, so just do the good bits */ drm_puts(&lp, "**** Xe Device Coredump ****\n"); + drm_printf(&lp, "Reason: CTB is dead - 0x%X\n", dead->reason); xe_device_snapshot_print(xe, &lp); drm_printf(&lp, "**** GT #%d ****\n", gt->info.id); diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c index c569ff456e74..0b102ab46c4d 100644 --- a/drivers/gpu/drm/xe/xe_guc_debugfs.c +++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c @@ -17,101 +17,130 @@ #include "xe_macros.h" #include "xe_pm.h" -static struct xe_guc *node_to_guc(struct drm_info_node *node) -{ - return node->info_ent->data; -} - -static int guc_info(struct seq_file *m, void *data) +/* + * guc_debugfs_show - A show callback for struct drm_info_list + * @m: the &seq_file + * @data: data used by the drm debugfs helpers + * + * This callback can be used in struct drm_info_list to describe debugfs + * files that are &xe_guc specific in similar way how we handle &xe_gt + * specific files using &xe_gt_debugfs_simple_show. + * + * It is assumed that those debugfs files will be created on directory entry + * which grandparent struct dentry d_inode->i_private points to &xe_gt. + * + * /sys/kernel/debug/dri/0/ + * ├── gt0 # dent->d_parent->d_parent (d_inode->i_private == gt) + * │ ├── uc # dent->d_parent + * │ │ ├── guc_info # dent + * │ │ ├── guc_... + * + * This function assumes that &m->private will be set to the &struct + * drm_info_node corresponding to the instance of the info on a given &struct + * drm_minor (see struct drm_info_list.show for details). + * + * This function also assumes that struct drm_info_list.data will point to the + * function code that will actually print a file content:: + * + * int (*print)(struct xe_guc *, struct drm_printer *) + * + * Example:: + * + * int foo(struct xe_guc *guc, struct drm_printer *p) + * { + * drm_printf(p, "enabled %d\n", guc->submission_state.enabled); + * return 0; + * } + * + * static const struct drm_info_list bar[] = { + * { name = "foo", .show = guc_debugfs_show, .data = foo }, + * }; + * + * parent = debugfs_create_dir("uc", gtdir); + * drm_debugfs_create_files(bar, ARRAY_SIZE(bar), parent, minor); + * + * Return: 0 on success or a negative error code on failure. + */ +static int guc_debugfs_show(struct seq_file *m, void *data) { - struct xe_guc *guc = node_to_guc(m->private); - struct xe_device *xe = guc_to_xe(guc); struct drm_printer p = drm_seq_file_printer(m); + struct drm_info_node *node = m->private; + struct dentry *parent = node->dent->d_parent; + struct dentry *grandparent = parent->d_parent; + struct xe_gt *gt = grandparent->d_inode->i_private; + struct xe_device *xe = gt_to_xe(gt); + int (*print)(struct xe_guc *, struct drm_printer *) = node->info_ent->data; + int ret; xe_pm_runtime_get(xe); - xe_guc_print_info(guc, &p); + ret = print(>->uc.guc, &p); xe_pm_runtime_put(xe); - return 0; + return ret; } -static int guc_log(struct seq_file *m, void *data) +static int guc_log(struct xe_guc *guc, struct drm_printer *p) { - struct xe_guc *guc = node_to_guc(m->private); - struct xe_device *xe = guc_to_xe(guc); - struct drm_printer p = drm_seq_file_printer(m); - - xe_pm_runtime_get(xe); - xe_guc_log_print(&guc->log, &p); - xe_pm_runtime_put(xe); - + xe_guc_log_print(&guc->log, p); return 0; } -static int guc_log_dmesg(struct seq_file *m, void *data) +static int guc_log_dmesg(struct xe_guc *guc, struct drm_printer *p) { - struct xe_guc *guc = node_to_guc(m->private); - struct xe_device *xe = guc_to_xe(guc); - - xe_pm_runtime_get(xe); xe_guc_log_print_dmesg(&guc->log); - xe_pm_runtime_put(xe); - return 0; } -static int guc_ctb(struct seq_file *m, void *data) +static int guc_ctb(struct xe_guc *guc, struct drm_printer *p) { - struct xe_guc *guc = node_to_guc(m->private); - struct xe_device *xe = guc_to_xe(guc); - struct drm_printer p = drm_seq_file_printer(m); - - xe_pm_runtime_get(xe); - xe_guc_ct_print(&guc->ct, &p, true); - xe_pm_runtime_put(xe); - + xe_guc_ct_print(&guc->ct, p, true); return 0; } -static int guc_pc(struct seq_file *m, void *data) +static int guc_pc(struct xe_guc *guc, struct drm_printer *p) { - struct xe_guc *guc = node_to_guc(m->private); - struct xe_device *xe = guc_to_xe(guc); - struct drm_printer p = drm_seq_file_printer(m); - - xe_pm_runtime_get(xe); - xe_guc_pc_print(&guc->pc, &p); - xe_pm_runtime_put(xe); - + xe_guc_pc_print(&guc->pc, p); return 0; } -static const struct drm_info_list debugfs_list[] = { - {"guc_info", guc_info, 0}, - {"guc_log", guc_log, 0}, - {"guc_log_dmesg", guc_log_dmesg, 0}, - {"guc_ctb", guc_ctb, 0}, - {"guc_pc", guc_pc, 0}, +/* + * only for GuC debugfs files which can be safely used on the VF as well: + * - without access to the GuC privileged registers + * - without access to the PF specific GuC objects + */ +static const struct drm_info_list vf_safe_debugfs_list[] = { + { "guc_info", .show = guc_debugfs_show, .data = xe_guc_print_info }, + { "guc_ctb", .show = guc_debugfs_show, .data = guc_ctb }, +}; + +/* For GuC debugfs files that require the SLPC support */ +static const struct drm_info_list slpc_debugfs_list[] = { + { "guc_pc", .show = guc_debugfs_show, .data = guc_pc }, +}; + +/* everything else should be added here */ +static const struct drm_info_list pf_only_debugfs_list[] = { + { "guc_log", .show = guc_debugfs_show, .data = guc_log }, + { "guc_log_dmesg", .show = guc_debugfs_show, .data = guc_log_dmesg }, }; void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent) { - struct drm_minor *minor = guc_to_xe(guc)->drm.primary; - struct drm_info_list *local; - int i; - -#define DEBUGFS_SIZE (ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)) - local = drmm_kmalloc(&guc_to_xe(guc)->drm, DEBUGFS_SIZE, GFP_KERNEL); - if (!local) - return; + struct xe_device *xe = guc_to_xe(guc); + struct drm_minor *minor = xe->drm.primary; - memcpy(local, debugfs_list, DEBUGFS_SIZE); -#undef DEBUGFS_SIZE + drm_debugfs_create_files(vf_safe_debugfs_list, + ARRAY_SIZE(vf_safe_debugfs_list), + parent, minor); - for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i) - local[i].data = guc; + if (!IS_SRIOV_VF(xe)) { + drm_debugfs_create_files(pf_only_debugfs_list, + ARRAY_SIZE(pf_only_debugfs_list), + parent, minor); - drm_debugfs_create_files(local, - ARRAY_SIZE(debugfs_list), - parent, minor); + if (!xe->info.skip_guc_pc) + drm_debugfs_create_files(slpc_debugfs_list, + ARRAY_SIZE(slpc_debugfs_list), + parent, minor); + } } diff --git a/drivers/gpu/drm/xe/xe_guc_engine_activity.c b/drivers/gpu/drm/xe/xe_guc_engine_activity.c index 2a457dcf31d5..0fb48f8f05d8 100644 --- a/drivers/gpu/drm/xe/xe_guc_engine_activity.c +++ b/drivers/gpu/drm/xe/xe_guc_engine_activity.c @@ -17,36 +17,61 @@ #include "xe_hw_engine.h" #include "xe_map.h" #include "xe_mmio.h" +#include "xe_sriov_pf_helpers.h" #include "xe_trace_guc.h" #define TOTAL_QUANTA 0x8000 -static struct iosys_map engine_activity_map(struct xe_guc *guc, struct xe_hw_engine *hwe) +static struct iosys_map engine_activity_map(struct xe_guc *guc, struct xe_hw_engine *hwe, + unsigned int index) { struct xe_guc_engine_activity *engine_activity = &guc->engine_activity; - struct engine_activity_buffer *buffer = &engine_activity->device_buffer; + struct engine_activity_buffer *buffer; u16 guc_class = xe_engine_class_to_guc_class(hwe->class); size_t offset; - offset = offsetof(struct guc_engine_activity_data, + if (engine_activity->num_functions) { + buffer = &engine_activity->function_buffer; + offset = sizeof(struct guc_engine_activity_data) * index; + } else { + buffer = &engine_activity->device_buffer; + offset = 0; + } + + offset += offsetof(struct guc_engine_activity_data, engine_activity[guc_class][hwe->logical_instance]); return IOSYS_MAP_INIT_OFFSET(&buffer->activity_bo->vmap, offset); } -static struct iosys_map engine_metadata_map(struct xe_guc *guc) +static struct iosys_map engine_metadata_map(struct xe_guc *guc, + unsigned int index) { struct xe_guc_engine_activity *engine_activity = &guc->engine_activity; - struct engine_activity_buffer *buffer = &engine_activity->device_buffer; + struct engine_activity_buffer *buffer; + size_t offset; - return buffer->metadata_bo->vmap; + if (engine_activity->num_functions) { + buffer = &engine_activity->function_buffer; + offset = sizeof(struct guc_engine_activity_metadata) * index; + } else { + buffer = &engine_activity->device_buffer; + offset = 0; + } + + return IOSYS_MAP_INIT_OFFSET(&buffer->metadata_bo->vmap, offset); } static int allocate_engine_activity_group(struct xe_guc *guc) { struct xe_guc_engine_activity *engine_activity = &guc->engine_activity; struct xe_device *xe = guc_to_xe(guc); - u32 num_activity_group = 1; /* Will be modified for VF */ + u32 num_activity_group; + + /* + * An additional activity group is allocated for PF + */ + num_activity_group = IS_SRIOV_PF(xe) ? xe_sriov_pf_get_totalvfs(xe) + 1 : 1; engine_activity->eag = drmm_kcalloc(&xe->drm, num_activity_group, sizeof(struct engine_activity_group), GFP_KERNEL); @@ -60,10 +85,11 @@ static int allocate_engine_activity_group(struct xe_guc *guc) } static int allocate_engine_activity_buffers(struct xe_guc *guc, - struct engine_activity_buffer *buffer) + struct engine_activity_buffer *buffer, + int count) { - u32 metadata_size = sizeof(struct guc_engine_activity_metadata); - u32 size = sizeof(struct guc_engine_activity_data); + u32 metadata_size = sizeof(struct guc_engine_activity_metadata) * count; + u32 size = sizeof(struct guc_engine_activity_data) * count; struct xe_gt *gt = guc_to_gt(guc); struct xe_tile *tile = gt_to_tile(gt); struct xe_bo *bo, *metadata_bo; @@ -118,10 +144,11 @@ static bool is_engine_activity_supported(struct xe_guc *guc) return true; } -static struct engine_activity *hw_engine_to_engine_activity(struct xe_hw_engine *hwe) +static struct engine_activity *hw_engine_to_engine_activity(struct xe_hw_engine *hwe, + unsigned int index) { struct xe_guc *guc = &hwe->gt->uc.guc; - struct engine_activity_group *eag = &guc->engine_activity.eag[0]; + struct engine_activity_group *eag = &guc->engine_activity.eag[index]; u16 guc_class = xe_engine_class_to_guc_class(hwe->class); return &eag->engine[guc_class][hwe->logical_instance]; @@ -138,9 +165,10 @@ static u64 cpu_ns_to_guc_tsc_tick(ktime_t ns, u32 freq) #define read_metadata_record(xe_, map_, field_) \ xe_map_rd_field(xe_, map_, 0, struct guc_engine_activity_metadata, field_) -static u64 get_engine_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe) +static u64 get_engine_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe, + unsigned int index) { - struct engine_activity *ea = hw_engine_to_engine_activity(hwe); + struct engine_activity *ea = hw_engine_to_engine_activity(hwe, index); struct guc_engine_activity *cached_activity = &ea->activity; struct guc_engine_activity_metadata *cached_metadata = &ea->metadata; struct xe_guc_engine_activity *engine_activity = &guc->engine_activity; @@ -151,8 +179,8 @@ static u64 get_engine_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe) u64 active_ticks, gpm_ts; u16 change_num; - activity_map = engine_activity_map(guc, hwe); - metadata_map = engine_metadata_map(guc); + activity_map = engine_activity_map(guc, hwe, index); + metadata_map = engine_metadata_map(guc, index); global_change_num = read_metadata_record(xe, &metadata_map, global_change_num); /* GuC has not initialized activity data yet, return 0 */ @@ -194,9 +222,9 @@ update: return ea->total + ea->active; } -static u64 get_engine_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe) +static u64 get_engine_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe, unsigned int index) { - struct engine_activity *ea = hw_engine_to_engine_activity(hwe); + struct engine_activity *ea = hw_engine_to_engine_activity(hwe, index); struct guc_engine_activity_metadata *cached_metadata = &ea->metadata; struct guc_engine_activity *cached_activity = &ea->activity; struct iosys_map activity_map, metadata_map; @@ -205,8 +233,8 @@ static u64 get_engine_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe) u64 numerator; u16 quanta_ratio; - activity_map = engine_activity_map(guc, hwe); - metadata_map = engine_metadata_map(guc); + activity_map = engine_activity_map(guc, hwe, index); + metadata_map = engine_metadata_map(guc, index); if (!cached_metadata->guc_tsc_frequency_hz) cached_metadata->guc_tsc_frequency_hz = read_metadata_record(xe, &metadata_map, @@ -245,12 +273,39 @@ static int enable_engine_activity_stats(struct xe_guc *guc) return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action)); } -static void engine_activity_set_cpu_ts(struct xe_guc *guc) +static int enable_function_engine_activity_stats(struct xe_guc *guc, bool enable) { struct xe_guc_engine_activity *engine_activity = &guc->engine_activity; - struct engine_activity_group *eag = &engine_activity->eag[0]; + u32 metadata_ggtt_addr = 0, ggtt_addr = 0, num_functions = 0; + struct engine_activity_buffer *buffer = &engine_activity->function_buffer; + u32 action[6]; + int len = 0; + + if (enable) { + metadata_ggtt_addr = xe_bo_ggtt_addr(buffer->metadata_bo); + ggtt_addr = xe_bo_ggtt_addr(buffer->activity_bo); + num_functions = engine_activity->num_functions; + } + + action[len++] = XE_GUC_ACTION_SET_FUNCTION_ENGINE_ACTIVITY_BUFFER; + action[len++] = num_functions; + action[len++] = metadata_ggtt_addr; + action[len++] = 0; + action[len++] = ggtt_addr; + action[len++] = 0; + + /* Blocking here to ensure the buffers are ready before reading them */ + return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action)); +} + +static void engine_activity_set_cpu_ts(struct xe_guc *guc, unsigned int index) +{ + struct xe_guc_engine_activity *engine_activity = &guc->engine_activity; + struct engine_activity_group *eag = &engine_activity->eag[index]; int i, j; + xe_gt_assert(guc_to_gt(guc), index < engine_activity->num_activity_group); + for (i = 0; i < GUC_MAX_ENGINE_CLASSES; i++) for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; j++) eag->engine[i][j].last_cpu_ts = ktime_get(); @@ -265,34 +320,107 @@ static u32 gpm_timestamp_shift(struct xe_gt *gt) return 3 - REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg); } +static bool is_function_valid(struct xe_guc *guc, unsigned int fn_id) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_guc_engine_activity *engine_activity = &guc->engine_activity; + + if (!IS_SRIOV_PF(xe) && fn_id) + return false; + + if (engine_activity->num_functions && fn_id >= engine_activity->num_functions) + return false; + + return true; +} + +static int engine_activity_disable_function_stats(struct xe_guc *guc) +{ + struct xe_guc_engine_activity *engine_activity = &guc->engine_activity; + struct engine_activity_buffer *buffer = &engine_activity->function_buffer; + int ret; + + if (!engine_activity->num_functions) + return 0; + + ret = enable_function_engine_activity_stats(guc, false); + if (ret) + return ret; + + free_engine_activity_buffers(buffer); + engine_activity->num_functions = 0; + + return 0; +} + +static int engine_activity_enable_function_stats(struct xe_guc *guc, int num_vfs) +{ + struct xe_guc_engine_activity *engine_activity = &guc->engine_activity; + struct engine_activity_buffer *buffer = &engine_activity->function_buffer; + int ret, i; + + if (!num_vfs) + return 0; + + /* This includes 1 PF and num_vfs */ + engine_activity->num_functions = num_vfs + 1; + + ret = allocate_engine_activity_buffers(guc, buffer, engine_activity->num_functions); + if (ret) + return ret; + + ret = enable_function_engine_activity_stats(guc, true); + if (ret) { + free_engine_activity_buffers(buffer); + engine_activity->num_functions = 0; + return ret; + } + + /* skip PF as it was already setup */ + for (i = 1; i < engine_activity->num_functions; i++) + engine_activity_set_cpu_ts(guc, i); + + return 0; +} + /** * xe_guc_engine_activity_active_ticks - Get engine active ticks * @guc: The GuC object * @hwe: The hw_engine object + * @fn_id: function id to report on * * Return: accumulated ticks @hwe was active since engine activity stats were enabled. */ -u64 xe_guc_engine_activity_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe) +u64 xe_guc_engine_activity_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe, + unsigned int fn_id) { if (!xe_guc_engine_activity_supported(guc)) return 0; - return get_engine_active_ticks(guc, hwe); + if (!is_function_valid(guc, fn_id)) + return 0; + + return get_engine_active_ticks(guc, hwe, fn_id); } /** * xe_guc_engine_activity_total_ticks - Get engine total ticks * @guc: The GuC object * @hwe: The hw_engine object + * @fn_id: function id to report on * * Return: accumulated quanta of ticks allocated for the engine */ -u64 xe_guc_engine_activity_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe) +u64 xe_guc_engine_activity_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe, + unsigned int fn_id) { if (!xe_guc_engine_activity_supported(guc)) return 0; - return get_engine_total_ticks(guc, hwe); + if (!is_function_valid(guc, fn_id)) + return 0; + + return get_engine_total_ticks(guc, hwe, fn_id); } /** @@ -311,6 +439,25 @@ bool xe_guc_engine_activity_supported(struct xe_guc *guc) } /** + * xe_guc_engine_activity_function_stats - Enable/Disable per-function engine activity stats + * @guc: The GuC object + * @num_vfs: number of vfs + * @enable: true to enable, false otherwise + * + * Return: 0 on success, negative error code otherwise + */ +int xe_guc_engine_activity_function_stats(struct xe_guc *guc, int num_vfs, bool enable) +{ + if (!xe_guc_engine_activity_supported(guc)) + return 0; + + if (enable) + return engine_activity_enable_function_stats(guc, num_vfs); + + return engine_activity_disable_function_stats(guc); +} + +/** * xe_guc_engine_activity_enable_stats - Enable engine activity stats * @guc: The GuC object * @@ -327,7 +474,7 @@ void xe_guc_engine_activity_enable_stats(struct xe_guc *guc) if (ret) xe_gt_err(guc_to_gt(guc), "failed to enable activity stats%d\n", ret); else - engine_activity_set_cpu_ts(guc); + engine_activity_set_cpu_ts(guc, 0); } static void engine_activity_fini(void *arg) @@ -360,7 +507,7 @@ int xe_guc_engine_activity_init(struct xe_guc *guc) return ret; } - ret = allocate_engine_activity_buffers(guc, &engine_activity->device_buffer); + ret = allocate_engine_activity_buffers(guc, &engine_activity->device_buffer, 1); if (ret) { xe_gt_err(gt, "failed to allocate engine activity buffers (%pe)\n", ERR_PTR(ret)); return ret; diff --git a/drivers/gpu/drm/xe/xe_guc_engine_activity.h b/drivers/gpu/drm/xe/xe_guc_engine_activity.h index a042d4cb404c..b32926c2d208 100644 --- a/drivers/gpu/drm/xe/xe_guc_engine_activity.h +++ b/drivers/gpu/drm/xe/xe_guc_engine_activity.h @@ -14,6 +14,9 @@ struct xe_guc; int xe_guc_engine_activity_init(struct xe_guc *guc); bool xe_guc_engine_activity_supported(struct xe_guc *guc); void xe_guc_engine_activity_enable_stats(struct xe_guc *guc); -u64 xe_guc_engine_activity_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe); -u64 xe_guc_engine_activity_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe); +int xe_guc_engine_activity_function_stats(struct xe_guc *guc, int num_vfs, bool enable); +u64 xe_guc_engine_activity_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe, + unsigned int fn_id); +u64 xe_guc_engine_activity_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe, + unsigned int fn_id); #endif diff --git a/drivers/gpu/drm/xe/xe_guc_engine_activity_types.h b/drivers/gpu/drm/xe/xe_guc_engine_activity_types.h index 5cdd034b6b70..48f69ddefa36 100644 --- a/drivers/gpu/drm/xe/xe_guc_engine_activity_types.h +++ b/drivers/gpu/drm/xe/xe_guc_engine_activity_types.h @@ -79,14 +79,24 @@ struct xe_guc_engine_activity { /** @num_activity_group: number of activity groups */ u32 num_activity_group; + /** @num_functions: number of functions */ + u32 num_functions; + /** @supported: indicates support for engine activity stats */ bool supported; - /** @eag: holds the device level engine activity data */ + /** + * @eag: holds the device level engine activity data in native mode. + * In SRIOV mode, points to an array with entries which holds the engine + * activity data for PF and VF's + */ struct engine_activity_group *eag; /** @device_buffer: buffer object for global engine activity */ struct engine_activity_buffer device_buffer; + + /** @function_buffer: buffer object for per-function engine activity */ + struct engine_activity_buffer function_buffer; }; #endif diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index 80514a446ba2..38039c411387 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -260,7 +260,8 @@ int xe_guc_log_init(struct xe_guc_log *log) bo = xe_managed_bo_create_pin_map(xe, tile, guc_log_size(), XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_PINNED_NORESTORE); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 43b1192ba61c..18c623992035 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -462,6 +462,21 @@ static u32 get_cur_freq(struct xe_gt *gt) } /** + * xe_guc_pc_get_cur_freq_fw - With fw held, get requested frequency + * @pc: The GuC PC + * + * Returns: the requested frequency for that GT instance + */ +u32 xe_guc_pc_get_cur_freq_fw(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + + return get_cur_freq(gt); +} + +/** * xe_guc_pc_get_cur_freq - Get Current requested frequency * @pc: The GuC PC * @freq: A pointer to a u32 where the freq value will be returned @@ -1170,7 +1185,8 @@ int xe_guc_pc_init(struct xe_guc_pc *pc) bo = xe_managed_bo_create_pin_map(xe, tile, size, XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_PINNED_NORESTORE); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index 39102b79602f..0a2664d5c811 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -22,6 +22,7 @@ void xe_guc_pc_print(struct xe_guc_pc *pc, struct drm_printer *p); u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc); int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq); +u32 xe_guc_pc_get_cur_freq_fw(struct xe_guc_pc *pc); u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc); u32 xe_guc_pc_get_rpa_freq(struct xe_guc_pc *pc); u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 769781d577df..2ad38f6b103e 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -300,6 +300,8 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) primelockdep(guc); + guc->submission_state.initialized = true; + return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); } @@ -834,6 +836,13 @@ void xe_guc_submit_wedge(struct xe_guc *guc) xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); + /* + * If device is being wedged even before submission_state is + * initialized, there's nothing to do here. + */ + if (!guc->submission_state.initialized) + return; + err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, guc_submit_wedged_fini, guc); if (err) { @@ -1170,9 +1179,12 @@ trigger_reset: process_name = q->vm->xef->process_name; pid = q->vm->xef->pid; } - xe_gt_notice(guc_to_gt(guc), "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]", - xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), - q->guc->id, q->flags, process_name, pid); + + if (!exec_queue_killed(q)) + xe_gt_notice(guc_to_gt(guc), + "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]", + xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), + q->guc->id, q->flags, process_name, pid); trace_xe_sched_job_timedout(job); diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h index 63bac64429a5..1fde7614fcc5 100644 --- a/drivers/gpu/drm/xe/xe_guc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -89,6 +89,11 @@ struct xe_guc { struct mutex lock; /** @submission_state.enabled: submission is enabled */ bool enabled; + /** + * @submission_state.initialized: mark when submission state is + * even initialized - before that not even the lock is valid + */ + bool initialized; /** @submission_state.fini_wq: submit fini wait queue */ wait_queue_head_t fini_wq; } submission_state; diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c index a440442b4d72..640950172088 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c @@ -605,6 +605,7 @@ err_object: kobject_put(kobj); return err; } +ALLOW_ERROR_INJECTION(xe_add_hw_engine_class_defaults, ERRNO); /* See xe_pci_probe() */ static void hw_engine_class_sysfs_fini(void *arg) diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 48d80ffdf7bb..eb293aec36a0 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -5,6 +5,7 @@ #include <linux/hwmon-sysfs.h> #include <linux/hwmon.h> +#include <linux/jiffies.h> #include <linux/types.h> #include <linux/units.h> @@ -27,6 +28,7 @@ enum xe_hwmon_reg { REG_PKG_POWER_SKU_UNIT, REG_GT_PERF_STATUS, REG_PKG_ENERGY_STATUS, + REG_FAN_SPEED, }; enum xe_hwmon_reg_operation { @@ -42,6 +44,13 @@ enum xe_hwmon_channel { CHANNEL_MAX, }; +enum xe_fan_channel { + FAN_1, + FAN_2, + FAN_3, + FAN_MAX, +}; + /* * SF_* - scale factors for particular quantities according to hwmon spec. */ @@ -62,6 +71,16 @@ struct xe_hwmon_energy_info { }; /** + * struct xe_hwmon_fan_info - to cache previous fan reading + */ +struct xe_hwmon_fan_info { + /** @reg_val_prev: previous fan reg val */ + u32 reg_val_prev; + /** @time_prev: previous timestamp */ + u64 time_prev; +}; + +/** * struct xe_hwmon - xe hwmon data structure */ struct xe_hwmon { @@ -79,6 +98,8 @@ struct xe_hwmon { int scl_shift_time; /** @ei: Energy info for energyN_input */ struct xe_hwmon_energy_info ei[CHANNEL_MAX]; + /** @fi: Fan info for fanN_input */ + struct xe_hwmon_fan_info fi[FAN_MAX]; }; static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg, @@ -144,6 +165,14 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg return PCU_CR_PACKAGE_ENERGY_STATUS; } break; + case REG_FAN_SPEED: + if (channel == FAN_1) + return BMG_FAN_1_SPEED; + else if (channel == FAN_2) + return BMG_FAN_2_SPEED; + else if (channel == FAN_3) + return BMG_FAN_3_SPEED; + break; default: drm_warn(&xe->drm, "Unknown xe hwmon reg id: %d\n", hwmon_reg); break; @@ -454,6 +483,7 @@ static const struct hwmon_channel_info * const hwmon_info[] = { HWMON_CHANNEL_INFO(curr, HWMON_C_LABEL, HWMON_C_CRIT | HWMON_C_LABEL), HWMON_CHANNEL_INFO(in, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL), HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT | HWMON_E_LABEL, HWMON_E_INPUT | HWMON_E_LABEL), + HWMON_CHANNEL_INFO(fan, HWMON_F_INPUT, HWMON_F_INPUT, HWMON_F_INPUT), NULL }; @@ -480,6 +510,19 @@ static int xe_hwmon_pcode_write_i1(const struct xe_hwmon *hwmon, u32 uval) (uval & POWER_SETUP_I1_DATA_MASK)); } +static int xe_hwmon_pcode_read_fan_control(const struct xe_hwmon *hwmon, u32 subcmd, u32 *uval) +{ + struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); + + /* Platforms that don't return correct value */ + if (hwmon->xe->info.platform == XE_DG2 && subcmd == FSC_READ_NUM_FANS) { + *uval = 2; + return 0; + } + + return xe_pcode_read(root_tile, PCODE_MBOX(FAN_SPEED_CONTROL, subcmd, 0), uval, NULL); +} + static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, int channel, long *value, u32 scale_factor) { @@ -706,6 +749,75 @@ xe_hwmon_energy_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) } static umode_t +xe_hwmon_fan_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) +{ + u32 uval; + + if (!hwmon->xe->info.has_fan_control) + return 0; + + switch (attr) { + case hwmon_fan_input: + if (xe_hwmon_pcode_read_fan_control(hwmon, FSC_READ_NUM_FANS, &uval)) + return 0; + + return channel < uval ? 0444 : 0; + default: + return 0; + } +} + +static int +xe_hwmon_fan_input_read(struct xe_hwmon *hwmon, int channel, long *val) +{ + struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); + struct xe_hwmon_fan_info *fi = &hwmon->fi[channel]; + u64 rotations, time_now, time; + u32 reg_val; + int ret = 0; + + mutex_lock(&hwmon->hwmon_lock); + + reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_FAN_SPEED, channel)); + time_now = get_jiffies_64(); + + /* + * HW register value is accumulated count of pulses from PWM fan with the scale + * of 2 pulses per rotation. + */ + rotations = (reg_val - fi->reg_val_prev) / 2; + + time = jiffies_delta_to_msecs(time_now - fi->time_prev); + if (unlikely(!time)) { + ret = -EAGAIN; + goto unlock; + } + + /* + * Calculate fan speed in RPM by time averaging two subsequent readings in minutes. + * RPM = number of rotations * msecs per minute / time in msecs + */ + *val = DIV_ROUND_UP_ULL(rotations * (MSEC_PER_SEC * 60), time); + + fi->reg_val_prev = reg_val; + fi->time_prev = time_now; +unlock: + mutex_unlock(&hwmon->hwmon_lock); + return ret; +} + +static int +xe_hwmon_fan_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) +{ + switch (attr) { + case hwmon_fan_input: + return xe_hwmon_fan_input_read(hwmon, channel, val); + default: + return -EOPNOTSUPP; + } +} + +static umode_t xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, u32 attr, int channel) { @@ -730,6 +842,9 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, case hwmon_energy: ret = xe_hwmon_energy_is_visible(hwmon, attr, channel); break; + case hwmon_fan: + ret = xe_hwmon_fan_is_visible(hwmon, attr, channel); + break; default: ret = 0; break; @@ -765,6 +880,9 @@ xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, case hwmon_energy: ret = xe_hwmon_energy_read(hwmon, attr, channel, val); break; + case hwmon_fan: + ret = xe_hwmon_fan_read(hwmon, attr, channel, val); + break; default: ret = -EOPNOTSUPP; break; @@ -842,7 +960,7 @@ static void xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon) { struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); - long energy; + long energy, fan_speed; u64 val_sku_unit = 0; int channel; struct xe_reg pkg_power_sku_unit; @@ -866,6 +984,11 @@ xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon) for (channel = 0; channel < CHANNEL_MAX; channel++) if (xe_hwmon_is_visible(hwmon, hwmon_energy, hwmon_energy_input, channel)) xe_hwmon_energy_get(hwmon, channel, &energy); + + /* Initialize 'struct xe_hwmon_fan_info' with initial fan register reading. */ + for (channel = 0; channel < FAN_MAX; channel++) + if (xe_hwmon_is_visible(hwmon, hwmon_fan, hwmon_fan_input, channel)) + xe_hwmon_fan_input_read(hwmon, channel, &fan_speed); } static void xe_hwmon_mutex_destroy(void *arg) diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c index 89393dcb53d9..63db66df064b 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.c +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -71,7 +71,7 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level lmtt->ops->lmtt_pte_num(level)), ttm_bo_type_kernel, XE_BO_FLAG_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) | - XE_BO_FLAG_NEEDS_64K | XE_BO_FLAG_PINNED); + XE_BO_FLAG_NEEDS_64K); if (IS_ERR(bo)) { err = PTR_ERR(bo); goto out_free_pt; diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 03bfba696b37..61a2e87990a9 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -38,6 +38,7 @@ #define LRC_ENGINE_CLASS GENMASK_ULL(63, 61) #define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48) +#define LRC_PPHWSP_SIZE SZ_4K #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K static struct xe_device * @@ -51,19 +52,22 @@ size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) struct xe_device *xe = gt_to_xe(gt); size_t size; + /* Per-process HW status page (PPHWSP) */ + size = LRC_PPHWSP_SIZE; + + /* Engine context image */ switch (class) { case XE_ENGINE_CLASS_RENDER: if (GRAPHICS_VER(xe) >= 20) - size = 4 * SZ_4K; + size += 3 * SZ_4K; else - size = 14 * SZ_4K; + size += 13 * SZ_4K; break; case XE_ENGINE_CLASS_COMPUTE: - /* 14 pages since graphics_ver == 11 */ if (GRAPHICS_VER(xe) >= 20) - size = 3 * SZ_4K; + size += 2 * SZ_4K; else - size = 14 * SZ_4K; + size += 13 * SZ_4K; break; default: WARN(1, "Unknown engine class: %d", class); @@ -72,7 +76,7 @@ size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) case XE_ENGINE_CLASS_VIDEO_DECODE: case XE_ENGINE_CLASS_VIDEO_ENHANCE: case XE_ENGINE_CLASS_OTHER: - size = 2 * SZ_4K; + size += 1 * SZ_4K; } /* Add indirect ring state page */ @@ -652,7 +656,6 @@ u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) #define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8) #define LRC_PARALLEL_PPHWSP_OFFSET 2048 #define LRC_ENGINE_ID_PPHWSP_OFFSET 2096 -#define LRC_PPHWSP_SIZE SZ_4K u32 xe_lrc_regs_offset(struct xe_lrc *lrc) { @@ -997,6 +1000,8 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE; + if (vm && vm->xef) /* userspace */ + bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE; /* * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address @@ -1566,6 +1571,7 @@ static int dump_gfxpipe_command(struct drm_printer *p, MATCH3D(3DSTATE_CLIP_MESH); MATCH3D(3DSTATE_SBE_MESH); MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER); + MATCH3D(3DSTATE_COARSE_PIXEL); MATCH3D(3DSTATE_DRAWING_RECTANGLE); MATCH3D(3DSTATE_CHROMA_KEY); diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c index 404fa2a456d5..49c45ec3e83c 100644 --- a/drivers/gpu/drm/xe/xe_memirq.c +++ b/drivers/gpu/drm/xe/xe_memirq.c @@ -86,7 +86,7 @@ static const char *guc_name(struct xe_guc *guc) * This object needs to be 4KiB aligned. * * - _`Interrupt Source Report Page`: this is the equivalent of the - * GEN11_GT_INTR_DWx registers, with each bit in those registers being + * GT_INTR_DWx registers, with each bit in those registers being * mapped to a byte here. The offsets are the same, just bytes instead * of bits. This object needs to be cacheline aligned. * diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 5a3e89022c38..8f8e9fdfb2a8 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -97,7 +97,7 @@ struct xe_exec_queue *xe_tile_migrate_exec_queue(struct xe_tile *tile) return tile->migrate->q; } -static void xe_migrate_fini(struct drm_device *dev, void *arg) +static void xe_migrate_fini(void *arg) { struct xe_migrate *m = arg; @@ -209,7 +209,6 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, num_entries * XE_PAGE_SIZE, ttm_bo_type_kernel, XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_PINNED | XE_BO_FLAG_PAGETABLE); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -401,7 +400,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) struct xe_vm *vm; int err; - m = drmm_kzalloc(&xe->drm, sizeof(*m), GFP_KERNEL); + m = devm_kzalloc(xe->drm.dev, sizeof(*m), GFP_KERNEL); if (!m) return ERR_PTR(-ENOMEM); @@ -455,7 +454,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) might_lock(&m->job_mutex); fs_reclaim_release(GFP_KERNEL); - err = drmm_add_action_or_reset(&xe->drm, xe_migrate_fini, m); + err = devm_add_action_or_reset(xe->drm.dev, xe_migrate_fini, m); if (err) return ERR_PTR(err); @@ -670,6 +669,7 @@ static void emit_copy(struct xe_gt *gt, struct xe_bb *bb, u32 mocs = 0; u32 tile_y = 0; + xe_gt_assert(gt, !(pitch & 3)); xe_gt_assert(gt, size / pitch <= S16_MAX); xe_gt_assert(gt, pitch / 4 <= S16_MAX); xe_gt_assert(gt, pitch <= U16_MAX); @@ -779,10 +779,12 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, bool dst_is_pltt = dst->mem_type == XE_PL_TT; bool src_is_vram = mem_type_is_vram(src->mem_type); bool dst_is_vram = mem_type_is_vram(dst->mem_type); + bool type_device = src_bo->ttm.type == ttm_bo_type_device; + bool needs_ccs_emit = type_device && xe_migrate_needs_ccs_emit(xe); bool copy_ccs = xe_device_has_flat_ccs(xe) && xe_bo_needs_ccs_pages(src_bo) && xe_bo_needs_ccs_pages(dst_bo); bool copy_system_ccs = copy_ccs && (!src_is_vram || !dst_is_vram); - bool use_comp_pat = xe_device_has_flat_ccs(xe) && + bool use_comp_pat = type_device && xe_device_has_flat_ccs(xe) && GRAPHICS_VER(xe) >= 20 && src_is_vram && !dst_is_vram; /* Copying CCS between two different BOs is not supported yet. */ @@ -839,6 +841,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, avail_pts, avail_pts); if (copy_system_ccs) { + xe_assert(xe, type_device); ccs_size = xe_device_ccs_bytes(xe, src_L0); batch_size += pte_update_size(m, 0, NULL, &ccs_it, &ccs_size, &ccs_ofs, &ccs_pt, 0, @@ -849,7 +852,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, /* Add copy commands size here */ batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) + - ((xe_migrate_needs_ccs_emit(xe) ? EMIT_COPY_CCS_DW : 0)); + ((needs_ccs_emit ? EMIT_COPY_CCS_DW : 0)); bb = xe_bb_new(gt, batch_size, usm); if (IS_ERR(bb)) { @@ -878,7 +881,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, if (!copy_only_ccs) emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, XE_PAGE_SIZE); - if (xe_migrate_needs_ccs_emit(xe)) + if (needs_ccs_emit) flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, IS_DGFX(xe) ? src_is_vram : src_is_pltt, dst_L0_ofs, @@ -1601,55 +1604,63 @@ enum xe_migrate_copy_dir { XE_MIGRATE_COPY_TO_SRAM, }; +#define XE_CACHELINE_BYTES 64ull +#define XE_CACHELINE_MASK (XE_CACHELINE_BYTES - 1) + static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, - unsigned long npages, + unsigned long len, + unsigned long sram_offset, dma_addr_t *sram_addr, u64 vram_addr, const enum xe_migrate_copy_dir dir) { struct xe_gt *gt = m->tile->primary_gt; struct xe_device *xe = gt_to_xe(gt); + bool use_usm_batch = xe->info.has_usm; struct dma_fence *fence = NULL; u32 batch_size = 2; u64 src_L0_ofs, dst_L0_ofs; - u64 round_update_size; struct xe_sched_job *job; struct xe_bb *bb; u32 update_idx, pt_slot = 0; + unsigned long npages = DIV_ROUND_UP(len + sram_offset, PAGE_SIZE); + unsigned int pitch = len >= PAGE_SIZE && !(len & ~PAGE_MASK) ? + PAGE_SIZE : 4; int err; - if (npages * PAGE_SIZE > MAX_PREEMPTDISABLE_TRANSFER) - return ERR_PTR(-EINVAL); + if (drm_WARN_ON(&xe->drm, (len & XE_CACHELINE_MASK) || + (sram_offset | vram_addr) & XE_CACHELINE_MASK)) + return ERR_PTR(-EOPNOTSUPP); + + xe_assert(xe, npages * PAGE_SIZE <= MAX_PREEMPTDISABLE_TRANSFER); - round_update_size = npages * PAGE_SIZE; - batch_size += pte_update_cmd_size(round_update_size); + batch_size += pte_update_cmd_size(len); batch_size += EMIT_COPY_DW; - bb = xe_bb_new(gt, batch_size, true); + bb = xe_bb_new(gt, batch_size, use_usm_batch); if (IS_ERR(bb)) { err = PTR_ERR(bb); return ERR_PTR(err); } build_pt_update_batch_sram(m, bb, pt_slot * XE_PAGE_SIZE, - sram_addr, round_update_size); + sram_addr, len + sram_offset); if (dir == XE_MIGRATE_COPY_TO_VRAM) { - src_L0_ofs = xe_migrate_vm_addr(pt_slot, 0); + src_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) + sram_offset; dst_L0_ofs = xe_migrate_vram_ofs(xe, vram_addr, false); } else { src_L0_ofs = xe_migrate_vram_ofs(xe, vram_addr, false); - dst_L0_ofs = xe_migrate_vm_addr(pt_slot, 0); + dst_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) + sram_offset; } bb->cs[bb->len++] = MI_BATCH_BUFFER_END; update_idx = bb->len; - emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, round_update_size, - XE_PAGE_SIZE); + emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, len, pitch); job = xe_bb_create_migration_job(m->q, bb, - xe_migrate_batch_base(m, true), + xe_migrate_batch_base(m, use_usm_batch), update_idx); if (IS_ERR(job)) { err = PTR_ERR(job); @@ -1694,7 +1705,7 @@ struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m, dma_addr_t *src_addr, u64 dst_addr) { - return xe_migrate_vram(m, npages, src_addr, dst_addr, + return xe_migrate_vram(m, npages * PAGE_SIZE, 0, src_addr, dst_addr, XE_MIGRATE_COPY_TO_VRAM); } @@ -1715,10 +1726,193 @@ struct dma_fence *xe_migrate_from_vram(struct xe_migrate *m, u64 src_addr, dma_addr_t *dst_addr) { - return xe_migrate_vram(m, npages, dst_addr, src_addr, + return xe_migrate_vram(m, npages * PAGE_SIZE, 0, dst_addr, src_addr, XE_MIGRATE_COPY_TO_SRAM); } +static void xe_migrate_dma_unmap(struct xe_device *xe, dma_addr_t *dma_addr, + int len, int write) +{ + unsigned long i, npages = DIV_ROUND_UP(len, PAGE_SIZE); + + for (i = 0; i < npages; ++i) { + if (!dma_addr[i]) + break; + + dma_unmap_page(xe->drm.dev, dma_addr[i], PAGE_SIZE, + write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); + } + kfree(dma_addr); +} + +static dma_addr_t *xe_migrate_dma_map(struct xe_device *xe, + void *buf, int len, int write) +{ + dma_addr_t *dma_addr; + unsigned long i, npages = DIV_ROUND_UP(len, PAGE_SIZE); + + dma_addr = kcalloc(npages, sizeof(*dma_addr), GFP_KERNEL); + if (!dma_addr) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < npages; ++i) { + dma_addr_t addr; + struct page *page; + + if (is_vmalloc_addr(buf)) + page = vmalloc_to_page(buf); + else + page = virt_to_page(buf); + + addr = dma_map_page(xe->drm.dev, + page, 0, PAGE_SIZE, + write ? DMA_TO_DEVICE : + DMA_FROM_DEVICE); + if (dma_mapping_error(xe->drm.dev, addr)) + goto err_fault; + + dma_addr[i] = addr; + buf += PAGE_SIZE; + } + + return dma_addr; + +err_fault: + xe_migrate_dma_unmap(xe, dma_addr, len, write); + return ERR_PTR(-EFAULT); +} + +/** + * xe_migrate_access_memory - Access memory of a BO via GPU + * + * @m: The migration context. + * @bo: buffer object + * @offset: access offset into buffer object + * @buf: pointer to caller memory to read into or write from + * @len: length of access + * @write: write access + * + * Access memory of a BO via GPU either reading in or writing from a passed in + * pointer. Pointer is dma mapped for GPU access and GPU commands are issued to + * read to or write from pointer. + * + * Returns: + * 0 if successful, negative error code on failure. + */ +int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, + unsigned long offset, void *buf, int len, + int write) +{ + struct xe_tile *tile = m->tile; + struct xe_device *xe = tile_to_xe(tile); + struct xe_res_cursor cursor; + struct dma_fence *fence = NULL; + dma_addr_t *dma_addr; + unsigned long page_offset = (unsigned long)buf & ~PAGE_MASK; + int bytes_left = len, current_page = 0; + void *orig_buf = buf; + + xe_bo_assert_held(bo); + + /* Use bounce buffer for small access and unaligned access */ + if (len & XE_CACHELINE_MASK || + ((uintptr_t)buf | offset) & XE_CACHELINE_MASK) { + int buf_offset = 0; + + /* + * Less than ideal for large unaligned access but this should be + * fairly rare, can fixup if this becomes common. + */ + do { + u8 bounce[XE_CACHELINE_BYTES]; + void *ptr = (void *)bounce; + int err; + int copy_bytes = min_t(int, bytes_left, + XE_CACHELINE_BYTES - + (offset & XE_CACHELINE_MASK)); + int ptr_offset = offset & XE_CACHELINE_MASK; + + err = xe_migrate_access_memory(m, bo, + offset & + ~XE_CACHELINE_MASK, + (void *)ptr, + sizeof(bounce), 0); + if (err) + return err; + + if (write) { + memcpy(ptr + ptr_offset, buf + buf_offset, copy_bytes); + + err = xe_migrate_access_memory(m, bo, + offset & ~XE_CACHELINE_MASK, + (void *)ptr, + sizeof(bounce), 0); + if (err) + return err; + } else { + memcpy(buf + buf_offset, ptr + ptr_offset, + copy_bytes); + } + + bytes_left -= copy_bytes; + buf_offset += copy_bytes; + offset += copy_bytes; + } while (bytes_left); + + return 0; + } + + dma_addr = xe_migrate_dma_map(xe, buf, len + page_offset, write); + if (IS_ERR(dma_addr)) + return PTR_ERR(dma_addr); + + xe_res_first(bo->ttm.resource, offset, bo->size - offset, &cursor); + + do { + struct dma_fence *__fence; + u64 vram_addr = vram_region_gpu_offset(bo->ttm.resource) + + cursor.start; + int current_bytes; + + if (cursor.size > MAX_PREEMPTDISABLE_TRANSFER) + current_bytes = min_t(int, bytes_left, + MAX_PREEMPTDISABLE_TRANSFER); + else + current_bytes = min_t(int, bytes_left, cursor.size); + + if (fence) + dma_fence_put(fence); + + __fence = xe_migrate_vram(m, current_bytes, + (unsigned long)buf & ~PAGE_MASK, + dma_addr + current_page, + vram_addr, write ? + XE_MIGRATE_COPY_TO_VRAM : + XE_MIGRATE_COPY_TO_SRAM); + if (IS_ERR(__fence)) { + if (fence) + dma_fence_wait(fence, false); + fence = __fence; + goto out_err; + } + fence = __fence; + + buf += current_bytes; + offset += current_bytes; + current_page = (int)(buf - orig_buf) / PAGE_SIZE; + bytes_left -= current_bytes; + if (bytes_left) + xe_res_next(&cursor, current_bytes); + } while (bytes_left); + + dma_fence_wait(fence, false); + dma_fence_put(fence); + +out_err: + xe_migrate_dma_unmap(xe, dma_addr, len + page_offset, write); + return IS_ERR(fence) ? PTR_ERR(fence) : 0; +} + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) #include "tests/xe_migrate.c" #endif diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index 6ff9a963425c..fb9839c1bae0 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -112,6 +112,10 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct ttm_resource *dst, bool copy_only_ccs); +int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, + unsigned long offset, void *buf, int len, + int write); + #define XE_MIGRATE_CLEAR_FLAG_BO_DATA BIT(0) #define XE_MIGRATE_CLEAR_FLAG_CCS_DATA BIT(1) #define XE_MIGRATE_CLEAR_FLAG_FULL (XE_MIGRATE_CLEAR_FLAG_BO_DATA | \ diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 46301f341773..7357458bc0d2 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -138,6 +138,7 @@ int xe_mmio_probe_early(struct xe_device *xe) return devm_add_action_or_reset(xe->drm.dev, mmio_fini, xe); } +ALLOW_ERROR_INJECTION(xe_mmio_probe_early, ERRNO); /* See xe_pci_probe() */ /** * xe_mmio_init() - Initialize an MMIO instance @@ -204,8 +205,9 @@ void xe_mmio_write32(struct xe_mmio *mmio, struct xe_reg reg, u32 val) trace_xe_reg_rw(mmio, true, addr, val, sizeof(val)); - if (!reg.vf && mmio->sriov_vf_gt) - xe_gt_sriov_vf_write32(mmio->sriov_vf_gt, reg, val); + if (!reg.vf && IS_SRIOV_VF(mmio->tile->xe)) + xe_gt_sriov_vf_write32(mmio->sriov_vf_gt ?: + mmio->tile->primary_gt, reg, val); else writel(val, mmio->regs + addr); } @@ -218,8 +220,9 @@ u32 xe_mmio_read32(struct xe_mmio *mmio, struct xe_reg reg) /* Wa_15015404425 */ mmio_flush_pending_writes(mmio); - if (!reg.vf && mmio->sriov_vf_gt) - val = xe_gt_sriov_vf_read32(mmio->sriov_vf_gt, reg); + if (!reg.vf && IS_SRIOV_VF(mmio->tile->xe)) + val = xe_gt_sriov_vf_read32(mmio->sriov_vf_gt ?: + mmio->tile->primary_gt, reg); else val = readl(mmio->regs + addr); diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index e861c694f336..e4742e27e2cd 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -11,6 +11,7 @@ #include <drm/drm_module.h> #include "xe_drv.h" +#include "xe_configfs.h" #include "xe_hw_fence.h" #include "xe_pci.h" #include "xe_pm.h" @@ -35,8 +36,8 @@ MODULE_PARM_DESC(force_execlist, "Force Execlist submission"); module_param_named(probe_display, xe_modparam.probe_display, bool, 0444); MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched (default: true)"); -module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, uint, 0600); -MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size(in MiB)"); +module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, int, 0600); +MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size (in MiB) - <0=disable-resize, 0=max-needed-size[default], >0=force-size"); module_param_named(guc_log_level, xe_modparam.guc_log_level, int, 0600); MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1..5=enable with verbosity min..max)"); @@ -86,6 +87,10 @@ static const struct init_funcs init_funcs[] = { .init = xe_check_nomodeset, }, { + .init = xe_configfs_init, + .exit = xe_configfs_exit, + }, + { .init = xe_hw_fence_module_init, .exit = xe_hw_fence_module_exit, }, diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 7ffc98f67e69..fb842fa0552e 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -1301,7 +1301,7 @@ static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_fr int err; u32 idx; - err = __copy_from_user(&ext, address, sizeof(ext)); + err = copy_from_user(&ext, address, sizeof(ext)); if (XE_IOCTL_DBG(oa->xe, err)) return -EFAULT; @@ -1338,7 +1338,7 @@ static int xe_oa_user_extensions(struct xe_oa *oa, enum xe_oa_user_extn_from fro if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS)) return -E2BIG; - err = __copy_from_user(&ext, address, sizeof(ext)); + err = copy_from_user(&ext, address, sizeof(ext)); if (XE_IOCTL_DBG(oa->xe, err)) return -EFAULT; @@ -2221,6 +2221,7 @@ addr_err: kfree(oa_regs); return ERR_PTR(err); } +ALLOW_ERROR_INJECTION(xe_oa_alloc_regs, ERRNO); static ssize_t show_dynamic_id(struct kobject *kobj, struct kobj_attribute *attr, @@ -2280,7 +2281,7 @@ int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *fi return -EACCES; } - err = __copy_from_user(¶m, u64_to_user_ptr(data), sizeof(param)); + err = copy_from_user(¶m, u64_to_user_ptr(data), sizeof(param)); if (XE_IOCTL_DBG(oa->xe, err)) return -EFAULT; diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index f4d108dc49b1..024175cfe61e 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -62,11 +62,13 @@ struct xe_device_desc { u8 is_dgfx:1; u8 has_display:1; + u8 has_fan_control:1; u8 has_heci_gscfi:1; u8 has_heci_cscfi:1; u8 has_llc:1; u8 has_pxp:1; u8 has_sriov:1; + u8 needs_scratch:1; u8 skip_guc_pc:1; u8 skip_mtcfg:1; u8 skip_pcode:1; @@ -303,6 +305,7 @@ static const struct xe_device_desc dg2_desc = { DG2_FEATURES, .has_display = true, + .has_fan_control = true, }; static const __maybe_unused struct xe_device_desc pvc_desc = { @@ -330,6 +333,7 @@ static const struct xe_device_desc lnl_desc = { .dma_mask_size = 46, .has_display = true, .has_pxp = true, + .needs_scratch = true, }; static const struct xe_device_desc bmg_desc = { @@ -337,7 +341,9 @@ static const struct xe_device_desc bmg_desc = { PLATFORM(BATTLEMAGE), .dma_mask_size = 46, .has_display = true, + .has_fan_control = true, .has_heci_cscfi = 1, + .needs_scratch = true, }; static const struct xe_device_desc ptl_desc = { @@ -346,6 +352,7 @@ static const struct xe_device_desc ptl_desc = { .has_display = true, .has_sriov = true, .require_force_probe = true, + .needs_scratch = true, }; #undef PLATFORM @@ -576,6 +583,7 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.dma_mask_size = desc->dma_mask_size; xe->info.is_dgfx = desc->is_dgfx; + xe->info.has_fan_control = desc->has_fan_control; xe->info.has_heci_gscfi = desc->has_heci_gscfi; xe->info.has_heci_cscfi = desc->has_heci_cscfi; xe->info.has_llc = desc->has_llc; @@ -584,6 +592,7 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.skip_guc_pc = desc->skip_guc_pc; xe->info.skip_mtcfg = desc->skip_mtcfg; xe->info.skip_pcode = desc->skip_pcode; + xe->info.needs_scratch = desc->needs_scratch; xe->info.probe_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && xe_modparam.probe_display && @@ -735,7 +744,7 @@ static void xe_pci_remove(struct pci_dev *pdev) return; xe_device_remove(xe); - xe_pm_runtime_fini(xe); + xe_pm_fini(xe); } /* @@ -805,18 +814,17 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return err; err = xe_device_probe_early(xe); - if (err) { - /* - * In Boot Survivability mode, no drm card is exposed and driver - * is loaded with bare minimum to allow for firmware to be - * flashed through mei. If early probe failed, but it managed to - * enable survivability mode, return success. - */ - if (xe_survivability_mode_is_enabled(xe)) - return 0; + /* + * In Boot Survivability mode, no drm card is exposed and driver + * is loaded with bare minimum to allow for firmware to be + * flashed through mei. Return success, if survivability mode + * is enabled due to pcode failure or configfs being set + */ + if (xe_survivability_mode_is_enabled(xe)) + return 0; + if (err) return err; - } err = xe_info_init(xe, desc); if (err) @@ -922,6 +930,7 @@ static int xe_pci_suspend(struct device *dev) pci_save_state(pdev); pci_disable_device(pdev); + pci_set_power_state(pdev, PCI_D3cold); return 0; } diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c index 09ee8a06fe2e..8813efdcafbb 100644 --- a/drivers/gpu/drm/xe/xe_pci_sriov.c +++ b/drivers/gpu/drm/xe/xe_pci_sriov.c @@ -7,6 +7,8 @@ #include "xe_device.h" #include "xe_gt_sriov_pf_config.h" #include "xe_gt_sriov_pf_control.h" +#include "xe_gt_sriov_printk.h" +#include "xe_guc_engine_activity.h" #include "xe_pci_sriov.h" #include "xe_pm.h" #include "xe_sriov.h" @@ -111,6 +113,20 @@ static void pf_link_vfs(struct xe_device *xe, int num_vfs) } } +static void pf_engine_activity_stats(struct xe_device *xe, unsigned int num_vfs, bool enable) +{ + struct xe_gt *gt; + unsigned int id; + int ret = 0; + + for_each_gt(gt, xe, id) { + ret = xe_guc_engine_activity_function_stats(>->uc.guc, num_vfs, enable); + if (ret) + xe_gt_sriov_info(gt, "Failed to %s engine activity function stats (%pe)\n", + str_enable_disable(enable), ERR_PTR(ret)); + } +} + static int pf_enable_vfs(struct xe_device *xe, int num_vfs) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -145,6 +161,9 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs) xe_sriov_info(xe, "Enabled %u of %u VF%s\n", num_vfs, total_vfs, str_plural(total_vfs)); + + pf_engine_activity_stats(xe, num_vfs, true); + return num_vfs; failed: @@ -168,6 +187,8 @@ static int pf_disable_vfs(struct xe_device *xe) if (!num_vfs) return 0; + pf_engine_activity_stats(xe, num_vfs, false); + pci_disable_sriov(pdev); pf_reset_vfs(xe, num_vfs); diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index 9333ce776a6e..cf955b3ed52c 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -7,6 +7,7 @@ #include <linux/delay.h> #include <linux/errno.h> +#include <linux/error-injection.h> #include <drm/drm_managed.h> @@ -323,3 +324,4 @@ int xe_pcode_probe_early(struct xe_device *xe) { return xe_pcode_ready(xe, false); } +ALLOW_ERROR_INJECTION(xe_pcode_probe_early, ERRNO); /* See xe_pci_probe */ diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h index 2bae9afdbd35..127d4d26c4cf 100644 --- a/drivers/gpu/drm/xe/xe_pcode_api.h +++ b/drivers/gpu/drm/xe/xe_pcode_api.h @@ -34,6 +34,7 @@ #define DGFX_PCODE_STATUS 0x7E #define DGFX_GET_INIT_STATUS 0x0 #define DGFX_INIT_STATUS_COMPLETE 0x1 +#define DGFX_LINK_DOWNGRADE_STATUS REG_BIT(31) #define PCODE_POWER_SETUP 0x7C #define POWER_SETUP_SUBCOMMAND_READ_I1 0x4 @@ -49,6 +50,9 @@ /* Domain IDs (param2) */ #define PCODE_MBOX_DOMAIN_HBM 0x2 +#define FAN_SPEED_CONTROL 0x7D +#define FSC_READ_NUM_FANS 0x4 + #define PCODE_SCRATCH(x) XE_REG(0x138320 + ((x) * 4)) /* PCODE_SCRATCH0 */ #define AUXINFO_REG_OFFSET REG_GENMASK(17, 15) @@ -63,6 +67,10 @@ /* Auxiliary info bits */ #define AUXINFO_HISTORY_OFFSET REG_GENMASK(31, 29) +#define BMG_PCIE_CAP XE_REG(0x138340) +#define LINK_DOWNGRADE REG_GENMASK(1, 0) +#define DOWNGRADE_CAPABLE 2 + struct pcode_err_decode { int errno; const char *str; diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 7b6b754ad6eb..ff749edc005b 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -16,7 +16,6 @@ #include "xe_bo.h" #include "xe_bo_evict.h" #include "xe_device.h" -#include "xe_device_sysfs.h" #include "xe_ggtt.h" #include "xe_gt.h" #include "xe_guc.h" @@ -188,7 +187,7 @@ int xe_pm_resume(struct xe_device *xe) * This only restores pinned memory which is the memory required for the * GT(s) to resume. */ - err = xe_bo_restore_kernel(xe); + err = xe_bo_restore_early(xe); if (err) goto err; @@ -199,7 +198,7 @@ int xe_pm_resume(struct xe_device *xe) xe_display_pm_resume(xe); - err = xe_bo_restore_user(xe); + err = xe_bo_restore_late(xe); if (err) goto err; @@ -273,6 +272,7 @@ int xe_pm_init_early(struct xe_device *xe) if (err) return err; + xe->d3cold.capable = xe_pm_pci_d3cold_capable(xe); return 0; } ALLOW_ERROR_INJECTION(xe_pm_init_early, ERRNO); /* See xe_pci_probe() */ @@ -286,6 +286,42 @@ static u32 vram_threshold_value(struct xe_device *xe) return DEFAULT_VRAM_THRESHOLD; } +static int xe_pm_notifier_callback(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct xe_device *xe = container_of(nb, struct xe_device, pm_notifier); + int err = 0; + + switch (action) { + case PM_HIBERNATION_PREPARE: + case PM_SUSPEND_PREPARE: + xe_pm_runtime_get(xe); + err = xe_bo_evict_all_user(xe); + if (err) { + drm_dbg(&xe->drm, "Notifier evict user failed (%d)\n", err); + xe_pm_runtime_put(xe); + break; + } + + err = xe_bo_notifier_prepare_all_pinned(xe); + if (err) { + drm_dbg(&xe->drm, "Notifier prepare pin failed (%d)\n", err); + xe_pm_runtime_put(xe); + } + break; + case PM_POST_HIBERNATION: + case PM_POST_SUSPEND: + xe_bo_notifier_unprepare_all_pinned(xe); + xe_pm_runtime_put(xe); + break; + } + + if (err) + return NOTIFY_BAD; + + return NOTIFY_DONE; +} + /** * xe_pm_init - Initialize Xe Power Management * @xe: xe device instance @@ -299,33 +335,31 @@ int xe_pm_init(struct xe_device *xe) u32 vram_threshold; int err; + xe->pm_notifier.notifier_call = xe_pm_notifier_callback; + err = register_pm_notifier(&xe->pm_notifier); + if (err) + return err; + /* For now suspend/resume is only allowed with GuC */ if (!xe_device_uc_enabled(xe)) return 0; - xe->d3cold.capable = xe_pm_pci_d3cold_capable(xe); - if (xe->d3cold.capable) { - err = xe_device_sysfs_init(xe); - if (err) - return err; - vram_threshold = vram_threshold_value(xe); err = xe_pm_set_vram_threshold(xe, vram_threshold); if (err) - return err; + goto err_unregister; } xe_pm_runtime_init(xe); - return 0; + +err_unregister: + unregister_pm_notifier(&xe->pm_notifier); + return err; } -/** - * xe_pm_runtime_fini - Finalize Runtime PM - * @xe: xe device instance - */ -void xe_pm_runtime_fini(struct xe_device *xe) +static void xe_pm_runtime_fini(struct xe_device *xe) { struct device *dev = xe->drm.dev; @@ -333,6 +367,18 @@ void xe_pm_runtime_fini(struct xe_device *xe) pm_runtime_forbid(dev); } +/** + * xe_pm_fini - Finalize PM + * @xe: xe device instance + */ +void xe_pm_fini(struct xe_device *xe) +{ + if (xe_device_uc_enabled(xe)) + xe_pm_runtime_fini(xe); + + unregister_pm_notifier(&xe->pm_notifier); +} + static void xe_pm_write_callback_task(struct xe_device *xe, struct task_struct *task) { @@ -484,7 +530,7 @@ int xe_pm_runtime_resume(struct xe_device *xe) * This only restores pinned memory which is the memory * required for the GT(s) to resume. */ - err = xe_bo_restore_kernel(xe); + err = xe_bo_restore_early(xe); if (err) goto out; } @@ -497,7 +543,7 @@ int xe_pm_runtime_resume(struct xe_device *xe) xe_display_pm_runtime_resume(xe); if (xe->d3cold.allowed) { - err = xe_bo_restore_user(xe); + err = xe_bo_restore_late(xe); if (err) goto out; } @@ -641,7 +687,7 @@ static bool xe_pm_suspending_or_resuming(struct xe_device *xe) return dev->power.runtime_status == RPM_SUSPENDING || dev->power.runtime_status == RPM_RESUMING || - pm_suspend_target_state != PM_SUSPEND_ON; + pm_suspend_in_progress(); #else return false; #endif diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index 998d1ed64556..59678b310e55 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -17,7 +17,7 @@ int xe_pm_resume(struct xe_device *xe); int xe_pm_init_early(struct xe_device *xe); int xe_pm_init(struct xe_device *xe); -void xe_pm_runtime_fini(struct xe_device *xe); +void xe_pm_fini(struct xe_device *xe); bool xe_pm_runtime_suspended(struct xe_device *xe); int xe_pm_runtime_suspend(struct xe_device *xe); int xe_pm_runtime_resume(struct xe_device *xe); diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c index 4f62a6e515d6..69df0e3520a5 100644 --- a/drivers/gpu/drm/xe/xe_pmu.c +++ b/drivers/gpu/drm/xe/xe_pmu.c @@ -10,9 +10,11 @@ #include "xe_force_wake.h" #include "xe_gt_idle.h" #include "xe_guc_engine_activity.h" +#include "xe_guc_pc.h" #include "xe_hw_engine.h" #include "xe_pm.h" #include "xe_pmu.h" +#include "xe_sriov_pf_helpers.h" /** * DOC: Xe PMU (Performance Monitoring Unit) @@ -32,9 +34,10 @@ * gt[60:63] Selects gt for the event * engine_class[20:27] Selects engine-class for event * engine_instance[12:19] Selects the engine-instance for the event + * function[44:59] Selects the function of the event (SRIOV enabled) * * For engine specific events (engine-*), gt, engine_class and engine_instance parameters must be - * set as populated by DRM_XE_DEVICE_QUERY_ENGINES. + * set as populated by DRM_XE_DEVICE_QUERY_ENGINES and function if SRIOV is enabled. * * For gt specific events (gt-*) gt parameter must be passed. All other parameters will be 0. * @@ -49,6 +52,7 @@ */ #define XE_PMU_EVENT_GT_MASK GENMASK_ULL(63, 60) +#define XE_PMU_EVENT_FUNCTION_MASK GENMASK_ULL(59, 44) #define XE_PMU_EVENT_ENGINE_CLASS_MASK GENMASK_ULL(27, 20) #define XE_PMU_EVENT_ENGINE_INSTANCE_MASK GENMASK_ULL(19, 12) #define XE_PMU_EVENT_ID_MASK GENMASK_ULL(11, 0) @@ -58,6 +62,11 @@ static unsigned int config_to_event_id(u64 config) return FIELD_GET(XE_PMU_EVENT_ID_MASK, config); } +static unsigned int config_to_function_id(u64 config) +{ + return FIELD_GET(XE_PMU_EVENT_FUNCTION_MASK, config); +} + static unsigned int config_to_engine_class(u64 config) { return FIELD_GET(XE_PMU_EVENT_ENGINE_CLASS_MASK, config); @@ -76,6 +85,8 @@ static unsigned int config_to_gt_id(u64 config) #define XE_PMU_EVENT_GT_C6_RESIDENCY 0x01 #define XE_PMU_EVENT_ENGINE_ACTIVE_TICKS 0x02 #define XE_PMU_EVENT_ENGINE_TOTAL_TICKS 0x03 +#define XE_PMU_EVENT_GT_ACTUAL_FREQUENCY 0x04 +#define XE_PMU_EVENT_GT_REQUESTED_FREQUENCY 0x05 static struct xe_gt *event_to_gt(struct perf_event *event) { @@ -111,6 +122,14 @@ static bool is_engine_event(u64 config) event_id == XE_PMU_EVENT_ENGINE_ACTIVE_TICKS); } +static bool is_gt_frequency_event(struct perf_event *event) +{ + u32 id = config_to_event_id(event->attr.config); + + return id == XE_PMU_EVENT_GT_ACTUAL_FREQUENCY || + id == XE_PMU_EVENT_GT_REQUESTED_FREQUENCY; +} + static bool event_gt_forcewake(struct perf_event *event) { struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base); @@ -118,7 +137,7 @@ static bool event_gt_forcewake(struct perf_event *event) struct xe_gt *gt; unsigned int *fw_ref; - if (!is_engine_event(config)) + if (!is_engine_event(config) && !is_gt_frequency_event(event)) return true; gt = xe_device_get_gt(xe, config_to_gt_id(config)); @@ -151,7 +170,7 @@ static bool event_supported(struct xe_pmu *pmu, unsigned int gt, static bool event_param_valid(struct perf_event *event) { struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base); - unsigned int engine_class, engine_instance; + unsigned int engine_class, engine_instance, function_id; u64 config = event->attr.config; struct xe_gt *gt; @@ -161,16 +180,28 @@ static bool event_param_valid(struct perf_event *event) engine_class = config_to_engine_class(config); engine_instance = config_to_engine_instance(config); + function_id = config_to_function_id(config); switch (config_to_event_id(config)) { case XE_PMU_EVENT_GT_C6_RESIDENCY: - if (engine_class || engine_instance) + case XE_PMU_EVENT_GT_ACTUAL_FREQUENCY: + case XE_PMU_EVENT_GT_REQUESTED_FREQUENCY: + if (engine_class || engine_instance || function_id) return false; break; case XE_PMU_EVENT_ENGINE_ACTIVE_TICKS: case XE_PMU_EVENT_ENGINE_TOTAL_TICKS: if (!event_to_hwe(event)) return false; + + /* PF(0) and total vfs when SRIOV is enabled */ + if (IS_SRIOV_PF(xe)) { + if (function_id > xe_sriov_pf_get_totalvfs(xe)) + return false; + } else if (function_id) { + return false; + } + break; } @@ -242,13 +273,17 @@ static int xe_pmu_event_init(struct perf_event *event) static u64 read_engine_events(struct xe_gt *gt, struct perf_event *event) { struct xe_hw_engine *hwe; - u64 val = 0; + unsigned int function_id; + u64 config, val = 0; + + config = event->attr.config; + function_id = config_to_function_id(config); hwe = event_to_hwe(event); - if (config_to_event_id(event->attr.config) == XE_PMU_EVENT_ENGINE_ACTIVE_TICKS) - val = xe_guc_engine_activity_active_ticks(>->uc.guc, hwe); + if (config_to_event_id(config) == XE_PMU_EVENT_ENGINE_ACTIVE_TICKS) + val = xe_guc_engine_activity_active_ticks(>->uc.guc, hwe, function_id); else - val = xe_guc_engine_activity_total_ticks(>->uc.guc, hwe); + val = xe_guc_engine_activity_total_ticks(>->uc.guc, hwe, function_id); return val; } @@ -266,6 +301,10 @@ static u64 __xe_pmu_event_read(struct perf_event *event) case XE_PMU_EVENT_ENGINE_ACTIVE_TICKS: case XE_PMU_EVENT_ENGINE_TOTAL_TICKS: return read_engine_events(gt, event); + case XE_PMU_EVENT_GT_ACTUAL_FREQUENCY: + return xe_guc_pc_get_act_freq(>->uc.guc.pc); + case XE_PMU_EVENT_GT_REQUESTED_FREQUENCY: + return xe_guc_pc_get_cur_freq_fw(>->uc.guc.pc); } return 0; @@ -281,7 +320,14 @@ static void xe_pmu_event_update(struct perf_event *event) new = __xe_pmu_event_read(event); } while (!local64_try_cmpxchg(&hwc->prev_count, &prev, new)); - local64_add(new - prev, &event->count); + /* + * GT frequency is not a monotonically increasing counter, so add the + * instantaneous value instead. + */ + if (is_gt_frequency_event(event)) + local64_add(new, &event->count); + else + local64_add(new - prev, &event->count); } static void xe_pmu_event_read(struct perf_event *event) @@ -351,6 +397,7 @@ static void xe_pmu_event_del(struct perf_event *event, int flags) } PMU_FORMAT_ATTR(gt, "config:60-63"); +PMU_FORMAT_ATTR(function, "config:44-59"); PMU_FORMAT_ATTR(engine_class, "config:20-27"); PMU_FORMAT_ATTR(engine_instance, "config:12-19"); PMU_FORMAT_ATTR(event, "config:0-11"); @@ -359,6 +406,7 @@ static struct attribute *pmu_format_attrs[] = { &format_attr_event.attr, &format_attr_engine_class.attr, &format_attr_engine_instance.attr, + &format_attr_function.attr, &format_attr_gt.attr, NULL, }; @@ -419,6 +467,10 @@ static ssize_t event_attr_show(struct device *dev, XE_EVENT_ATTR_SIMPLE(gt-c6-residency, gt_c6_residency, XE_PMU_EVENT_GT_C6_RESIDENCY, "ms"); XE_EVENT_ATTR_NOUNIT(engine-active-ticks, engine_active_ticks, XE_PMU_EVENT_ENGINE_ACTIVE_TICKS); XE_EVENT_ATTR_NOUNIT(engine-total-ticks, engine_total_ticks, XE_PMU_EVENT_ENGINE_TOTAL_TICKS); +XE_EVENT_ATTR_SIMPLE(gt-actual-frequency, gt_actual_frequency, + XE_PMU_EVENT_GT_ACTUAL_FREQUENCY, "MHz"); +XE_EVENT_ATTR_SIMPLE(gt-requested-frequency, gt_requested_frequency, + XE_PMU_EVENT_GT_REQUESTED_FREQUENCY, "MHz"); static struct attribute *pmu_empty_event_attrs[] = { /* Empty - all events are added as groups with .attr_update() */ @@ -434,6 +486,8 @@ static const struct attribute_group *pmu_events_attr_update[] = { &pmu_group_gt_c6_residency, &pmu_group_engine_active_ticks, &pmu_group_engine_total_ticks, + &pmu_group_gt_actual_frequency, + &pmu_group_gt_requested_frequency, NULL, }; @@ -442,8 +496,11 @@ static void set_supported_events(struct xe_pmu *pmu) struct xe_device *xe = container_of(pmu, typeof(*xe), pmu); struct xe_gt *gt = xe_device_get_gt(xe, 0); - if (!xe->info.skip_guc_pc) + if (!xe->info.skip_guc_pc) { pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_GT_C6_RESIDENCY); + pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_GT_ACTUAL_FREQUENCY); + pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_GT_REQUESTED_FREQUENCY); + } if (xe_guc_engine_activity_supported(>->uc.guc)) { pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_ENGINE_ACTIVE_TICKS); diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 856038553b81..b04756a97cdc 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -103,6 +103,7 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, { struct xe_pt *pt; struct xe_bo *bo; + u32 bo_flags; int err; if (level) { @@ -115,14 +116,16 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, if (!pt) return ERR_PTR(-ENOMEM); + bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE | + XE_BO_FLAG_NO_RESV_EVICT | XE_BO_FLAG_PAGETABLE; + if (vm->xef) /* userspace */ + bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE; + pt->level = level; bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K, ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE | - XE_BO_FLAG_PINNED | - XE_BO_FLAG_NO_RESV_EVICT | - XE_BO_FLAG_PAGETABLE); + bo_flags); if (IS_ERR(bo)) { err = PTR_ERR(bo); goto err_kfree; @@ -269,8 +272,11 @@ struct xe_pt_update { bool preexisting; }; +/** + * struct xe_pt_stage_bind_walk - Walk state for the stage_bind walk. + */ struct xe_pt_stage_bind_walk { - /** base: The base class. */ + /** @base: The base class. */ struct xe_pt_walk base; /* Input parameters for the walk */ @@ -278,15 +284,19 @@ struct xe_pt_stage_bind_walk { struct xe_vm *vm; /** @tile: The tile we're building for. */ struct xe_tile *tile; - /** @default_pte: PTE flag only template. No address is associated */ - u64 default_pte; + /** @default_vram_pte: PTE flag only template for VRAM. No address is associated */ + u64 default_vram_pte; + /** @default_system_pte: PTE flag only template for System. No address is associated */ + u64 default_system_pte; /** @dma_offset: DMA offset to add to the PTE. */ u64 dma_offset; /** - * @needs_64k: This address range enforces 64K alignment and - * granularity. + * @needs_64K: This address range enforces 64K alignment and + * granularity on VRAM. */ bool needs_64K; + /** @clear_pt: clear page table entries during the bind walk */ + bool clear_pt; /** * @vma: VMA being mapped */ @@ -299,6 +309,7 @@ struct xe_pt_stage_bind_walk { u64 va_curs_start; /* Output */ + /** @wupd: Walk output data for page-table updates. */ struct xe_walk_update { /** @wupd.entries: Caller provided storage. */ struct xe_vm_pgtable_update *entries; @@ -316,7 +327,7 @@ struct xe_pt_stage_bind_walk { u64 l0_end_addr; /** @addr_64K: The start address of the current 64K chunk. */ u64 addr_64K; - /** @found_64: Whether @add_64K actually points to a 64K chunk. */ + /** @found_64K: Whether @add_64K actually points to a 64K chunk. */ bool found_64K; }; @@ -436,6 +447,10 @@ static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level, if (xe_vma_is_null(xe_walk->vma)) return true; + /* if we are clearing page table, no dma addresses*/ + if (xe_walk->clear_pt) + return true; + /* Is the DMA address huge PTE size aligned? */ size = next - addr; dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs); @@ -515,24 +530,35 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) { struct xe_res_cursor *curs = xe_walk->curs; bool is_null = xe_vma_is_null(xe_walk->vma); + bool is_vram = is_null ? false : xe_res_is_vram(curs); XE_WARN_ON(xe_walk->va_curs_start != addr); - pte = vm->pt_ops->pte_encode_vma(is_null ? 0 : - xe_res_dma(curs) + xe_walk->dma_offset, - xe_walk->vma, pat_index, level); - pte |= xe_walk->default_pte; + if (xe_walk->clear_pt) { + pte = 0; + } else { + pte = vm->pt_ops->pte_encode_vma(is_null ? 0 : + xe_res_dma(curs) + + xe_walk->dma_offset, + xe_walk->vma, + pat_index, level); + if (!is_null) + pte |= is_vram ? xe_walk->default_vram_pte : + xe_walk->default_system_pte; - /* - * Set the XE_PTE_PS64 hint if possible, otherwise if - * this device *requires* 64K PTE size for VRAM, fail. - */ - if (level == 0 && !xe_parent->is_compact) { - if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) { - xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K; - pte |= XE_PTE_PS64; - } else if (XE_WARN_ON(xe_walk->needs_64K)) { - return -EINVAL; + /* + * Set the XE_PTE_PS64 hint if possible, otherwise if + * this device *requires* 64K PTE size for VRAM, fail. + */ + if (level == 0 && !xe_parent->is_compact) { + if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) { + xe_walk->vma->gpuva.flags |= + XE_VMA_PTE_64K; + pte |= XE_PTE_PS64; + } else if (XE_WARN_ON(xe_walk->needs_64K && + is_vram)) { + return -EINVAL; + } } } @@ -540,7 +566,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, if (unlikely(ret)) return ret; - if (!is_null) + if (!is_null && !xe_walk->clear_pt) xe_res_next(curs, next - addr); xe_walk->va_curs_start = next; xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level); @@ -603,6 +629,44 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { .pt_entry = xe_pt_stage_bind_entry, }; +/* + * Default atomic expectations for different allocation scenarios are as follows: + * + * 1. Traditional API: When the VM is not in LR mode: + * - Device atomics are expected to function with all allocations. + * + * 2. Compute/SVM API: When the VM is in LR mode: + * - Device atomics are the default behavior when the bo is placed in a single region. + * - In all other cases device atomics will be disabled with AE=0 until an application + * request differently using a ioctl like madvise. + */ +static bool xe_atomic_for_vram(struct xe_vm *vm) +{ + return true; +} + +static bool xe_atomic_for_system(struct xe_vm *vm, struct xe_bo *bo) +{ + struct xe_device *xe = vm->xe; + + if (!xe->info.has_device_atomics_on_smem) + return false; + + /* + * If a SMEM+LMEM allocation is backed by SMEM, a device + * atomics will cause a gpu page fault and which then + * gets migrated to LMEM, bind such allocations with + * device atomics enabled. + * + * TODO: Revisit this. Perhaps add something like a + * fault_on_atomics_in_system UAPI flag. + * Note that this also prohibits GPU atomics in LR mode for + * userptr and system memory on DGFX. + */ + return (!IS_DGFX(xe) || (!xe_vm_in_lr_mode(vm) || + (bo && xe_bo_has_single_placement(bo)))); +} + /** * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address * range. @@ -612,6 +676,7 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { * @entries: Storage for the update entries used for connecting the tree to * the main tree at commit time. * @num_entries: On output contains the number of @entries used. + * @clear_pt: Clear the page table entries. * * This function builds a disconnected page-table tree for a given address * range. The tree is connected to the main vm tree for the gpu using @@ -625,13 +690,13 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { static int xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, struct xe_svm_range *range, - struct xe_vm_pgtable_update *entries, u32 *num_entries) + struct xe_vm_pgtable_update *entries, + u32 *num_entries, bool clear_pt) { struct xe_device *xe = tile_to_xe(tile); struct xe_bo *bo = xe_vma_bo(vma); - bool is_devmem = !xe_vma_is_userptr(vma) && bo && - (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)); struct xe_res_cursor curs; + struct xe_vm *vm = xe_vma_vm(vma); struct xe_pt_stage_bind_walk xe_walk = { .base = { .ops = &xe_pt_stage_bind_ops, @@ -639,34 +704,31 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, .max_level = XE_PT_HIGHEST_LEVEL, .staging = true, }, - .vm = xe_vma_vm(vma), + .vm = vm, .tile = tile, .curs = &curs, .va_curs_start = range ? range->base.itree.start : xe_vma_start(vma), .vma = vma, .wupd.entries = entries, + .clear_pt = clear_pt, }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; + struct xe_pt *pt = vm->pt_root[tile->id]; int ret; if (range) { /* Move this entire thing to xe_svm.c? */ - xe_svm_notifier_lock(xe_vma_vm(vma)); + xe_svm_notifier_lock(vm); if (!xe_svm_range_pages_valid(range)) { xe_svm_range_debug(range, "BIND PREPARE - RETRY"); - xe_svm_notifier_unlock(xe_vma_vm(vma)); + xe_svm_notifier_unlock(vm); return -EAGAIN; } if (xe_svm_range_has_dma_mapping(range)) { xe_res_first_dma(range->base.dma_addr, 0, range->base.itree.last + 1 - range->base.itree.start, &curs); - is_devmem = xe_res_is_vram(&curs); - if (is_devmem) - xe_svm_range_debug(range, "BIND PREPARE - DMA VRAM"); - else - xe_svm_range_debug(range, "BIND PREPARE - DMA"); + xe_svm_range_debug(range, "BIND PREPARE - MIXED"); } else { xe_assert(xe, false); } @@ -674,54 +736,21 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, * Note, when unlocking the resource cursor dma addresses may become * stale, but the bind will be aborted anyway at commit time. */ - xe_svm_notifier_unlock(xe_vma_vm(vma)); + xe_svm_notifier_unlock(vm); } - xe_walk.needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem; + xe_walk.needs_64K = (vm->flags & XE_VM_FLAG_64K); + if (clear_pt) + goto walk_pt; - /** - * Default atomic expectations for different allocation scenarios are as follows: - * - * 1. Traditional API: When the VM is not in LR mode: - * - Device atomics are expected to function with all allocations. - * - * 2. Compute/SVM API: When the VM is in LR mode: - * - Device atomics are the default behavior when the bo is placed in a single region. - * - In all other cases device atomics will be disabled with AE=0 until an application - * request differently using a ioctl like madvise. - */ if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) { - if (xe_vm_in_lr_mode(xe_vma_vm(vma))) { - if (bo && xe_bo_has_single_placement(bo)) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - /** - * If a SMEM+LMEM allocation is backed by SMEM, a device - * atomics will cause a gpu page fault and which then - * gets migrated to LMEM, bind such allocations with - * device atomics enabled. - */ - else if (is_devmem) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } else { - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } - - /** - * Unset AE if the platform(PVC) doesn't support it on an - * allocation - */ - if (!xe->info.has_device_atomics_on_smem && !is_devmem) - xe_walk.default_pte &= ~XE_USM_PPGTT_PTE_AE; - } - - if (is_devmem) { - xe_walk.default_pte |= XE_PPGTT_PTE_DM; - xe_walk.dma_offset = bo ? vram_region_gpu_offset(bo->ttm.resource) : 0; + xe_walk.default_vram_pte = xe_atomic_for_vram(vm) ? XE_USM_PPGTT_PTE_AE : 0; + xe_walk.default_system_pte = xe_atomic_for_system(vm, bo) ? + XE_USM_PPGTT_PTE_AE : 0; } - if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo)) - xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo)); - + xe_walk.default_vram_pte |= XE_PPGTT_PTE_DM; + xe_walk.dma_offset = bo ? vram_region_gpu_offset(bo->ttm.resource) : 0; if (!range) xe_bo_assert_held(bo); @@ -739,6 +768,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, curs.size = xe_vma_size(vma); } +walk_pt: ret = xe_pt_walk_range(&pt->base, pt->level, range ? range->base.itree.start : xe_vma_start(vma), range ? range->base.itree.last + 1 : xe_vma_end(vma), @@ -1103,12 +1133,14 @@ static void xe_pt_free_bind(struct xe_vm_pgtable_update *entries, static int xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma, struct xe_svm_range *range, - struct xe_vm_pgtable_update *entries, u32 *num_entries) + struct xe_vm_pgtable_update *entries, + u32 *num_entries, bool invalidate_on_bind) { int err; *num_entries = 0; - err = xe_pt_stage_bind(tile, vma, range, entries, num_entries); + err = xe_pt_stage_bind(tile, vma, range, entries, num_entries, + invalidate_on_bind); if (!err) xe_tile_assert(tile, *num_entries); @@ -1420,6 +1452,7 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) return err; } +#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM) static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update) { struct xe_vm *vm = pt_update->vops->vm; @@ -1453,6 +1486,7 @@ static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update) return 0; } +#endif struct invalidation_fence { struct xe_gt_tlb_invalidation_fence base; @@ -1791,7 +1825,7 @@ static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma) static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) + struct xe_vma *vma, bool invalidate_on_bind) { u32 current_op = pt_update_ops->current_op; struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; @@ -1813,7 +1847,7 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, return err; err = xe_pt_prepare_bind(tile, vma, NULL, pt_op->entries, - &pt_op->num_entries); + &pt_op->num_entries, invalidate_on_bind); if (!err) { xe_tile_assert(tile, pt_op->num_entries <= ARRAY_SIZE(pt_op->entries)); @@ -1835,11 +1869,11 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, * If !rebind, and scratch enabled VMs, there is a chance the scratch * PTE is already cached in the TLB so it needs to be invalidated. * On !LR VMs this is done in the ring ops preceding a batch, but on - * non-faulting LR, in particular on user-space batch buffer chaining, - * it needs to be done here. + * LR, in particular on user-space batch buffer chaining, it needs to + * be done here. */ if ((!pt_op->rebind && xe_vm_has_scratch(vm) && - xe_vm_in_preempt_fence_mode(vm))) + xe_vm_in_lr_mode(vm))) pt_update_ops->needs_invalidation = true; else if (pt_op->rebind && !xe_vm_in_lr_mode(vm)) /* We bump also if batch_invalidate_tlb is true */ @@ -1875,7 +1909,7 @@ static int bind_range_prepare(struct xe_vm *vm, struct xe_tile *tile, pt_op->rebind = BIT(tile->id) & range->tile_present; err = xe_pt_prepare_bind(tile, vma, range, pt_op->entries, - &pt_op->num_entries); + &pt_op->num_entries, false); if (!err) { xe_tile_assert(tile, pt_op->num_entries <= ARRAY_SIZE(pt_op->entries)); @@ -1987,11 +2021,13 @@ static int op_prepare(struct xe_vm *vm, switch (op->base.op) { case DRM_GPUVA_OP_MAP: - if ((!op->map.immediate && xe_vm_in_fault_mode(vm)) || + if ((!op->map.immediate && xe_vm_in_fault_mode(vm) && + !op->map.invalidate_on_bind) || op->map.is_cpu_addr_mirror) break; - err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma); + err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma, + op->map.invalidate_on_bind); pt_update_ops->wait_vm_kernel = true; break; case DRM_GPUVA_OP_REMAP: @@ -2005,12 +2041,12 @@ static int op_prepare(struct xe_vm *vm, if (!err && op->remap.prev) { err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.prev); + op->remap.prev, false); pt_update_ops->wait_vm_bookkeep = true; } if (!err && op->remap.next) { err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.next); + op->remap.next, false); pt_update_ops->wait_vm_bookkeep = true; } break; @@ -2032,7 +2068,7 @@ static int op_prepare(struct xe_vm *vm, if (xe_vma_is_cpu_addr_mirror(vma)) break; - err = bind_op_prepare(vm, tile, pt_update_ops, vma); + err = bind_op_prepare(vm, tile, pt_update_ops, vma, false); pt_update_ops->wait_vm_kernel = true; break; } @@ -2115,7 +2151,7 @@ ALLOW_ERROR_INJECTION(xe_pt_update_ops_prepare, ERRNO); static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, struct xe_vma *vma, struct dma_fence *fence, - struct dma_fence *fence2) + struct dma_fence *fence2, bool invalidate_on_bind) { xe_tile_assert(tile, !xe_vma_is_cpu_addr_mirror(vma)); @@ -2132,6 +2168,8 @@ static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, } vma->tile_present |= BIT(tile->id); vma->tile_staged &= ~BIT(tile->id); + if (invalidate_on_bind) + vma->tile_invalidated |= BIT(tile->id); if (xe_vma_is_userptr(vma)) { lockdep_assert_held_read(&vm->userptr.notifier_lock); to_userptr_vma(vma)->userptr.initial_bind = true; @@ -2193,7 +2231,7 @@ static void op_commit(struct xe_vm *vm, break; bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence, - fence2); + fence2, op->map.invalidate_on_bind); break; case DRM_GPUVA_OP_REMAP: { @@ -2206,10 +2244,10 @@ static void op_commit(struct xe_vm *vm, if (op->remap.prev) bind_op_commit(vm, tile, pt_update_ops, op->remap.prev, - fence, fence2); + fence, fence2, false); if (op->remap.next) bind_op_commit(vm, tile, pt_update_ops, op->remap.next, - fence, fence2); + fence, fence2, false); break; } case DRM_GPUVA_OP_UNMAP: @@ -2227,7 +2265,7 @@ static void op_commit(struct xe_vm *vm, if (!xe_vma_is_cpu_addr_mirror(vma)) bind_op_commit(vm, tile, pt_update_ops, vma, fence, - fence2); + fence2, false); break; } case DRM_GPUVA_OP_DRIVER: @@ -2265,11 +2303,15 @@ static const struct xe_migrate_pt_update_ops userptr_migrate_ops = { .pre_commit = xe_pt_userptr_pre_commit, }; +#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM) static const struct xe_migrate_pt_update_ops svm_migrate_ops = { .populate = xe_vm_populate_pgtable, .clear = xe_migrate_clear_pgtable_callback, .pre_commit = xe_pt_svm_pre_commit, }; +#else +static const struct xe_migrate_pt_update_ops svm_migrate_ops; +#endif /** * xe_pt_update_ops_run() - Run PT update operations diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 5e65830dad25..2dbf4066d86f 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -340,7 +340,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) if (xe_device_get_root_tile(xe)->mem.vram.usable_size) config->info[DRM_XE_QUERY_CONFIG_FLAGS] |= DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM; - if (xe->info.has_usm && IS_ENABLED(CONFIG_DRM_GPUSVM)) + if (xe->info.has_usm && IS_ENABLED(CONFIG_DRM_XE_GPUSVM)) config->info[DRM_XE_QUERY_CONFIG_FLAGS] |= DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR; config->info[DRM_XE_QUERY_CONFIG_FLAGS] |= diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index 9475e3f74958..fc8447a838c4 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -173,6 +173,9 @@ void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt) if (xa_empty(&sr->xa)) return; + if (IS_SRIOV_VF(gt_to_xe(gt))) + return; + xe_gt_dbg(gt, "Applying %s save-restore MMIOs\n", sr->name); fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/xe/xe_ring_ops_types.h b/drivers/gpu/drm/xe/xe_ring_ops_types.h index 1ae56e2ee7b4..d7e3e150a9a5 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops_types.h +++ b/drivers/gpu/drm/xe/xe_ring_ops_types.h @@ -8,7 +8,7 @@ struct xe_sched_job; -#define MAX_JOB_SIZE_DW 48 +#define MAX_JOB_SIZE_DW 58 #define MAX_JOB_SIZE_BYTES (MAX_JOB_SIZE_DW * 4) /** diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 13bb62d3e615..29e694bb1219 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -258,9 +258,6 @@ void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, rtp_get_context(ctx, &hwe, >, &xe); - if (IS_SRIOV_VF(xe)) - return; - xe_assert(xe, entries); for (entry = entries; entry - entries < n_entries; entry++) { diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index f8fe61e25518..1d43e183ca21 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -60,7 +60,8 @@ struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u3 bo = xe_managed_bo_create_pin_map(xe, tile, size, XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_PINNED_NORESTORE); if (IS_ERR(bo)) { drm_err(&xe->drm, "Failed to prepare %uKiB BO for SA manager (%pe)\n", size / SZ_1K, bo); diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c index cb813b337fd3..1f710b3fc599 100644 --- a/drivers/gpu/drm/xe/xe_survivability_mode.c +++ b/drivers/gpu/drm/xe/xe_survivability_mode.c @@ -10,6 +10,7 @@ #include <linux/pci.h> #include <linux/sysfs.h> +#include "xe_configfs.h" #include "xe_device.h" #include "xe_gt.h" #include "xe_heci_gsc.h" @@ -28,20 +29,32 @@ * This is implemented by loading the driver with bare minimum (no drm card) to allow the firmware * to be flashed through mei and collect telemetry. The driver's probe flow is modified * such that it enters survivability mode when pcode initialization is incomplete and boot status - * denotes a failure. The driver then populates the survivability_mode PCI sysfs indicating - * survivability mode and provides additional information required for debug + * denotes a failure. * - * KMD exposes below admin-only readable sysfs in survivability mode + * Survivability mode can also be entered manually using the survivability mode attribute available + * through configfs which is beneficial in several usecases. It can be used to address scenarios + * where pcode does not detect failure or for validation purposes. It can also be used in + * In-Field-Repair (IFR) to repair a single card without impacting the other cards in a node. * - * device/survivability_mode: The presence of this file indicates that the card is in survivability - * mode. Also, provides additional information on why the driver entered - * survivability mode. + * Use below command enable survivability mode manually:: * - * Capability Information - Provides boot status - * Postcode Information - Provides information about the failure - * Overflow Information - Provides history of previous failures - * Auxiliary Information - Certain failures may have information in - * addition to postcode information + * # echo 1 > /sys/kernel/config/xe/0000:03:00.0/survivability_mode + * + * Refer :ref:`xe_configfs` for more details on how to use configfs + * + * Survivability mode is indicated by the below admin-only readable sysfs which provides additional + * debug information:: + * + * /sys/bus/pci/devices/<device>/surivability_mode + * + * Capability Information: + * Provides boot status + * Postcode Information: + * Provides information about the failure + * Overflow Information + * Provides history of previous failures + * Auxiliary Information + * Certain failures may have information in addition to postcode information */ static u32 aux_history_offset(u32 reg_value) @@ -133,6 +146,7 @@ static void xe_survivability_mode_fini(void *arg) struct pci_dev *pdev = to_pci_dev(xe->drm.dev); struct device *dev = &pdev->dev; + xe_configfs_clear_survivability_mode(pdev); sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr); } @@ -186,24 +200,41 @@ bool xe_survivability_mode_is_enabled(struct xe_device *xe) return xe->survivability.mode; } -/* - * survivability_mode_requested - check if it's possible to enable - * survivability mode and that was requested by firmware +/** + * xe_survivability_mode_is_requested - check if it's possible to enable survivability + * mode that was requested by firmware or userspace + * @xe: xe device instance * - * This function reads the boot status from Pcode. + * This function reads configfs and boot status from Pcode. * * Return: true if platform support is available and boot status indicates - * failure, false otherwise. + * failure or if survivability mode is requested, false otherwise. */ -static bool survivability_mode_requested(struct xe_device *xe) +bool xe_survivability_mode_is_requested(struct xe_device *xe) { struct xe_survivability *survivability = &xe->survivability; struct xe_mmio *mmio = xe_root_tile_mmio(xe); + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); u32 data; + bool survivability_mode; - if (!IS_DGFX(xe) || xe->info.platform < XE_BATTLEMAGE || IS_SRIOV_VF(xe)) + if (!IS_DGFX(xe) || IS_SRIOV_VF(xe)) return false; + survivability_mode = xe_configfs_get_survivability_mode(pdev); + + if (xe->info.platform < XE_BATTLEMAGE) { + if (survivability_mode) { + dev_err(&pdev->dev, "Survivability Mode is not supported on this card\n"); + xe_configfs_clear_survivability_mode(pdev); + } + return false; + } + + /* Enable survivability mode if set via configfs */ + if (survivability_mode) + return true; + data = xe_mmio_read32(mmio, PCODE_SCRATCH(0)); survivability->boot_status = REG_FIELD_GET(BOOT_STATUS, data); @@ -226,7 +257,7 @@ int xe_survivability_mode_enable(struct xe_device *xe) struct xe_survivability_info *info; struct pci_dev *pdev = to_pci_dev(xe->drm.dev); - if (!survivability_mode_requested(xe)) + if (!xe_survivability_mode_is_requested(xe)) return 0; survivability->size = MAX_SCRATCH_MMIO; diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.h b/drivers/gpu/drm/xe/xe_survivability_mode.h index d7e64885570d..02231c2bf008 100644 --- a/drivers/gpu/drm/xe/xe_survivability_mode.h +++ b/drivers/gpu/drm/xe/xe_survivability_mode.h @@ -12,5 +12,6 @@ struct xe_device; int xe_survivability_mode_enable(struct xe_device *xe); bool xe_survivability_mode_is_enabled(struct xe_device *xe); +bool xe_survivability_mode_is_requested(struct xe_device *xe); #endif /* _XE_SURVIVABILITY_MODE_H_ */ diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 975094c1a582..6345896585de 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -4,6 +4,7 @@ */ #include "xe_bo.h" +#include "xe_gt_stats.h" #include "xe_gt_tlb_invalidation.h" #include "xe_migrate.h" #include "xe_module.h" @@ -348,6 +349,8 @@ static void xe_svm_garbage_collector_work_func(struct work_struct *w) up_write(&vm->lock); } +#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) + static struct xe_vram_region *page_to_vr(struct page *page) { return container_of(page_pgmap(page), struct xe_vram_region, pagemap); @@ -586,6 +589,8 @@ static const struct drm_gpusvm_devmem_ops gpusvm_devmem_ops = { .copy_to_ram = xe_svm_copy_to_ram, }; +#endif + static const struct drm_gpusvm_ops gpusvm_ops = { .range_alloc = xe_svm_range_alloc, .range_free = xe_svm_range_free, @@ -666,6 +671,7 @@ static bool xe_svm_range_is_valid(struct xe_svm_range *range, (!devmem_only || xe_svm_range_in_vram(range)); } +#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) static struct xe_vram_region *tile_to_vr(struct xe_tile *tile) { return &tile->mem.vram; @@ -727,6 +733,14 @@ unlock: return err; } +#else +static int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, + struct xe_svm_range *range, + const struct drm_gpusvm_ctx *ctx) +{ + return -EOPNOTSUPP; +} +#endif static bool supports_4K_migration(struct xe_device *xe) { @@ -762,7 +776,7 @@ static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, * xe_svm_handle_pagefault() - SVM handle page fault * @vm: The VM. * @vma: The CPU address mirror VMA. - * @tile: The tile upon the fault occurred. + * @gt: The gt upon the fault occurred. * @fault_addr: The GPU fault address. * @atomic: The fault atomic access bit. * @@ -772,7 +786,7 @@ static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, * Return: 0 on success, negative error code on error. */ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, - struct xe_tile *tile, u64 fault_addr, + struct xe_gt *gt, u64 fault_addr, bool atomic) { struct drm_gpusvm_ctx ctx = { @@ -791,12 +805,15 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, struct drm_exec exec; struct dma_fence *fence; int migrate_try_count = ctx.devmem_only ? 3 : 1; + struct xe_tile *tile = gt_to_tile(gt); ktime_t end = 0; int err; lockdep_assert_held_write(&vm->lock); xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma)); + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT, 1); + retry: /* Always process UNMAPs first so view SVM ranges is current */ err = xe_svm_garbage_collector(vm); @@ -930,6 +947,7 @@ int xe_svm_bo_evict(struct xe_bo *bo) } #if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) + static struct drm_pagemap_device_addr xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap, struct device *dev, diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h index fe58ac2f4baa..30fc78b85b30 100644 --- a/drivers/gpu/drm/xe/xe_svm.h +++ b/drivers/gpu/drm/xe/xe_svm.h @@ -6,16 +6,19 @@ #ifndef _XE_SVM_H_ #define _XE_SVM_H_ +#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM) + #include <drm/drm_pagemap.h> #include <drm/drm_gpusvm.h> #define XE_INTERCONNECT_VRAM DRM_INTERCONNECT_DRIVER struct xe_bo; -struct xe_vram_region; +struct xe_gt; struct xe_tile; struct xe_vm; struct xe_vma; +struct xe_vram_region; /** struct xe_svm_range - SVM range */ struct xe_svm_range { @@ -38,7 +41,6 @@ struct xe_svm_range { u8 tile_invalidated; }; -#if IS_ENABLED(CONFIG_DRM_GPUSVM) /** * xe_svm_range_pages_valid() - SVM range pages valid * @range: SVM range @@ -59,7 +61,7 @@ void xe_svm_fini(struct xe_vm *vm); void xe_svm_close(struct xe_vm *vm); int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, - struct xe_tile *tile, u64 fault_addr, + struct xe_gt *gt, u64 fault_addr, bool atomic); bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end); @@ -68,9 +70,51 @@ int xe_svm_bo_evict(struct xe_bo *bo); void xe_svm_range_debug(struct xe_svm_range *range, const char *operation); +/** + * xe_svm_range_has_dma_mapping() - SVM range has DMA mapping + * @range: SVM range + * + * Return: True if SVM range has a DMA mapping, False otherwise + */ +static inline bool xe_svm_range_has_dma_mapping(struct xe_svm_range *range) +{ + lockdep_assert_held(&range->base.gpusvm->notifier_lock); + return range->base.flags.has_dma_mapping; +} + +#define xe_svm_assert_in_notifier(vm__) \ + lockdep_assert_held_write(&(vm__)->svm.gpusvm.notifier_lock) + +#define xe_svm_notifier_lock(vm__) \ + drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm) + +#define xe_svm_notifier_unlock(vm__) \ + drm_gpusvm_notifier_unlock(&(vm__)->svm.gpusvm) + void xe_svm_flush(struct xe_vm *vm); #else +#include <linux/interval_tree.h> + +struct drm_pagemap_device_addr; +struct xe_bo; +struct xe_gt; +struct xe_vm; +struct xe_vma; +struct xe_tile; +struct xe_vram_region; + +#define XE_INTERCONNECT_VRAM 1 + +struct xe_svm_range { + struct { + struct interval_tree_node itree; + const struct drm_pagemap_device_addr *dma_addr; + } base; + u32 tile_present; + u32 tile_invalidated; +}; + static inline bool xe_svm_range_pages_valid(struct xe_svm_range *range) { return false; @@ -100,7 +144,7 @@ void xe_svm_close(struct xe_vm *vm) static inline int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, - struct xe_tile *tile, u64 fault_addr, + struct xe_gt *gt, u64 fault_addr, bool atomic) { return 0; @@ -123,31 +167,19 @@ void xe_svm_range_debug(struct xe_svm_range *range, const char *operation) { } -static inline void xe_svm_flush(struct xe_vm *vm) +#define xe_svm_assert_in_notifier(...) do {} while (0) +#define xe_svm_range_has_dma_mapping(...) false + +static inline void xe_svm_notifier_lock(struct xe_vm *vm) { } -#endif - -/** - * xe_svm_range_has_dma_mapping() - SVM range has DMA mapping - * @range: SVM range - * - * Return: True if SVM range has a DMA mapping, False otherwise - */ -static inline bool xe_svm_range_has_dma_mapping(struct xe_svm_range *range) +static inline void xe_svm_notifier_unlock(struct xe_vm *vm) { - lockdep_assert_held(&range->base.gpusvm->notifier_lock); - return range->base.flags.has_dma_mapping; } -#define xe_svm_assert_in_notifier(vm__) \ - lockdep_assert_held_write(&(vm__)->svm.gpusvm.notifier_lock) - -#define xe_svm_notifier_lock(vm__) \ - drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm) - -#define xe_svm_notifier_unlock(vm__) \ - drm_gpusvm_notifier_unlock(&(vm__)->svm.gpusvm) - +static inline void xe_svm_flush(struct xe_vm *vm) +{ +} +#endif #endif diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index fb0eda3d5682..2741849bbf4d 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -92,6 +92,8 @@ struct uc_fw_entry { enum xe_platform platform; + enum xe_gt_type gt_type; + struct { const char *path; u16 major; @@ -106,32 +108,37 @@ struct fw_blobs_by_type { u32 count; }; -#define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ - fw_def(BATTLEMAGE, major_ver(xe, guc, bmg, 70, 29, 2)) \ - fw_def(LUNARLAKE, major_ver(xe, guc, lnl, 70, 29, 2)) \ - fw_def(METEORLAKE, major_ver(i915, guc, mtl, 70, 29, 2)) \ - fw_def(DG2, major_ver(i915, guc, dg2, 70, 29, 2)) \ - fw_def(DG1, major_ver(i915, guc, dg1, 70, 29, 2)) \ - fw_def(ALDERLAKE_N, major_ver(i915, guc, tgl, 70, 29, 2)) \ - fw_def(ALDERLAKE_P, major_ver(i915, guc, adlp, 70, 29, 2)) \ - fw_def(ALDERLAKE_S, major_ver(i915, guc, tgl, 70, 29, 2)) \ - fw_def(ROCKETLAKE, major_ver(i915, guc, tgl, 70, 29, 2)) \ - fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 29, 2)) +/* + * Add an "ANY" define just to convey the meaning it's given here. + */ +#define XE_GT_TYPE_ANY XE_GT_TYPE_UNINITIALIZED + +#define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ + fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 44, 1)) \ + fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 44, 1)) \ + fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, guc, mtl, 70, 44, 1)) \ + fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 44, 1)) \ + fw_def(DG1, GT_TYPE_ANY, major_ver(i915, guc, dg1, 70, 44, 1)) \ + fw_def(ALDERLAKE_N, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) \ + fw_def(ALDERLAKE_P, GT_TYPE_ANY, major_ver(i915, guc, adlp, 70, 44, 1)) \ + fw_def(ALDERLAKE_S, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) \ + fw_def(ROCKETLAKE, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) \ + fw_def(TIGERLAKE, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) #define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver) \ - fw_def(BATTLEMAGE, no_ver(xe, huc, bmg)) \ - fw_def(LUNARLAKE, no_ver(xe, huc, lnl)) \ - fw_def(METEORLAKE, no_ver(i915, huc_gsc, mtl)) \ - fw_def(DG1, no_ver(i915, huc, dg1)) \ - fw_def(ALDERLAKE_P, no_ver(i915, huc, tgl)) \ - fw_def(ALDERLAKE_S, no_ver(i915, huc, tgl)) \ - fw_def(ROCKETLAKE, no_ver(i915, huc, tgl)) \ - fw_def(TIGERLAKE, no_ver(i915, huc, tgl)) + fw_def(BATTLEMAGE, GT_TYPE_ANY, no_ver(xe, huc, bmg)) \ + fw_def(LUNARLAKE, GT_TYPE_ANY, no_ver(xe, huc, lnl)) \ + fw_def(METEORLAKE, GT_TYPE_ANY, no_ver(i915, huc_gsc, mtl)) \ + fw_def(DG1, GT_TYPE_ANY, no_ver(i915, huc, dg1)) \ + fw_def(ALDERLAKE_P, GT_TYPE_ANY, no_ver(i915, huc, tgl)) \ + fw_def(ALDERLAKE_S, GT_TYPE_ANY, no_ver(i915, huc, tgl)) \ + fw_def(ROCKETLAKE, GT_TYPE_ANY, no_ver(i915, huc, tgl)) \ + fw_def(TIGERLAKE, GT_TYPE_ANY, no_ver(i915, huc, tgl)) /* for the GSC FW we match the compatibility version and not the release one */ #define XE_GSC_FIRMWARE_DEFS(fw_def, major_ver) \ - fw_def(LUNARLAKE, major_ver(xe, gsc, lnl, 104, 1, 0)) \ - fw_def(METEORLAKE, major_ver(i915, gsc, mtl, 102, 1, 0)) + fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, gsc, lnl, 104, 1, 0)) \ + fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, gsc, mtl, 102, 1, 0)) #define MAKE_FW_PATH(dir__, uc__, shortname__, version__) \ __stringify(dir__) "/" __stringify(shortname__) "_" __stringify(uc__) version__ ".bin" @@ -159,12 +166,13 @@ struct fw_blobs_by_type { a, b, c } /* All blobs need to be declared via MODULE_FIRMWARE() */ -#define XE_UC_MODULE_FIRMWARE(platform__, fw_filename) \ +#define XE_UC_MODULE_FIRMWARE(platform__, gt_type__, fw_filename) \ MODULE_FIRMWARE(fw_filename); -#define XE_UC_FW_ENTRY(platform__, entry__) \ +#define XE_UC_FW_ENTRY(platform__, gt_type__, entry__) \ { \ .platform = XE_ ## platform__, \ + .gt_type = XE_ ## gt_type__, \ entry__, \ }, @@ -222,30 +230,38 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw) [XE_UC_FW_TYPE_HUC] = { entries_huc, ARRAY_SIZE(entries_huc) }, [XE_UC_FW_TYPE_GSC] = { entries_gsc, ARRAY_SIZE(entries_gsc) }, }; - static const struct uc_fw_entry *entries; + struct xe_gt *gt = uc_fw_to_gt(uc_fw); enum xe_platform p = xe->info.platform; + const struct uc_fw_entry *entries; u32 count; int i; - xe_assert(xe, uc_fw->type < ARRAY_SIZE(blobs_all)); + xe_gt_assert(gt, uc_fw->type < ARRAY_SIZE(blobs_all)); + xe_gt_assert(gt, gt->info.type != XE_GT_TYPE_UNINITIALIZED); + entries = blobs_all[uc_fw->type].entries; count = blobs_all[uc_fw->type].count; for (i = 0; i < count && p <= entries[i].platform; i++) { - if (p == entries[i].platform) { - uc_fw->path = entries[i].path; - uc_fw->versions.wanted.major = entries[i].major; - uc_fw->versions.wanted.minor = entries[i].minor; - uc_fw->versions.wanted.patch = entries[i].patch; - uc_fw->full_ver_required = entries[i].full_ver_required; - - if (uc_fw->type == XE_UC_FW_TYPE_GSC) - uc_fw->versions.wanted_type = XE_UC_FW_VER_COMPATIBILITY; - else - uc_fw->versions.wanted_type = XE_UC_FW_VER_RELEASE; - - break; - } + if (p != entries[i].platform) + continue; + + if (entries[i].gt_type != XE_GT_TYPE_ANY && + entries[i].gt_type != gt->info.type) + continue; + + uc_fw->path = entries[i].path; + uc_fw->versions.wanted.major = entries[i].major; + uc_fw->versions.wanted.minor = entries[i].minor; + uc_fw->versions.wanted.patch = entries[i].patch; + uc_fw->full_ver_required = entries[i].full_ver_required; + + if (uc_fw->type == XE_UC_FW_TYPE_GSC) + uc_fw->versions.wanted_type = XE_UC_FW_VER_COMPATIBILITY; + else + uc_fw->versions.wanted_type = XE_UC_FW_VER_RELEASE; + + break; } } diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 367c84b90e9e..79323c78130f 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2049,7 +2049,8 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, return -EINVAL; if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && - args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) + args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && + !xe->info.needs_scratch)) return -EINVAL; if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && @@ -2201,6 +2202,20 @@ static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) } #endif +static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags) +{ + if (!xe_vm_in_fault_mode(vm)) + return false; + + if (!xe_vm_has_scratch(vm)) + return false; + + if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE) + return false; + + return true; +} + /* * Create operations list from IOCTL arguments, setup operations fields so parse * and commit steps are decoupled from IOCTL arguments. This step can fail. @@ -2273,6 +2288,8 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; op->map.pat_index = pat_index; + op->map.invalidate_on_bind = + __xe_vm_needs_clear_scratch_pages(vm, flags); } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { op->prefetch.region = prefetch_region; } @@ -2472,8 +2489,9 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, return PTR_ERR(vma); op->map.vma = vma; - if ((op->map.immediate || !xe_vm_in_fault_mode(vm)) && - !op->map.is_cpu_addr_mirror) + if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) && + !op->map.is_cpu_addr_mirror) || + op->map.invalidate_on_bind) xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); break; @@ -2726,9 +2744,10 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, switch (op->base.op) { case DRM_GPUVA_OP_MAP: - err = vma_lock_and_validate(exec, op->map.vma, - !xe_vm_in_fault_mode(vm) || - op->map.immediate); + if (!op->map.invalidate_on_bind) + err = vma_lock_and_validate(exec, op->map.vma, + !xe_vm_in_fault_mode(vm) || + op->map.immediate); break; case DRM_GPUVA_OP_REMAP: err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); @@ -3082,9 +3101,9 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, if (!*bind_ops) return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; - err = __copy_from_user(*bind_ops, bind_user, - sizeof(struct drm_xe_vm_bind_op) * - args->num_binds); + err = copy_from_user(*bind_ops, bind_user, + sizeof(struct drm_xe_vm_bind_op) * + args->num_binds); if (XE_IOCTL_DBG(xe, err)) { err = -EFAULT; goto free_bind_ops; @@ -3109,7 +3128,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror && (!xe_vm_in_fault_mode(vm) || - !IS_ENABLED(CONFIG_DRM_GPUSVM)))) { + !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) { err = -EINVAL; goto free_bind_ops; } @@ -3243,7 +3262,7 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, XE_64K_PAGE_MASK) || XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { - return -EINVAL; + return -EINVAL; } } @@ -3251,7 +3270,7 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, if (bo->cpu_caching) { if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { - return -EINVAL; + return -EINVAL; } } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { /* @@ -3260,7 +3279,7 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, * how it was mapped on the CPU. Just assume is it * potentially cached on CPU side. */ - return -EINVAL; + return -EINVAL; } /* If a BO is protected it can only be mapped if the key is still valid */ @@ -3846,6 +3865,9 @@ void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) } drm_puts(p, "\n"); + + if (drm_coredump_printer_is_full(p)) + return; } } diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 84fa41b9fa20..1662604c4486 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -330,6 +330,8 @@ struct xe_vma_op_map { bool is_cpu_addr_mirror; /** @dumpable: whether BO is dumped on GPU hang */ bool dumpable; + /** @invalidate: invalidate the VMA before bind */ + bool invalidate_on_bind; /** @pat_index: The pat index to use for this operation. */ u16 pat_index; }; diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c index b1f81dca610d..e421a74fb87c 100644 --- a/drivers/gpu/drm/xe/xe_vram.c +++ b/drivers/gpu/drm/xe/xe_vram.c @@ -49,7 +49,7 @@ _resize_bar(struct xe_device *xe, int resno, resource_size_t size) */ static void resize_vram_bar(struct xe_device *xe) { - u64 force_vram_bar_size = xe_modparam.force_vram_bar_size; + int force_vram_bar_size = xe_modparam.force_vram_bar_size; struct pci_dev *pdev = to_pci_dev(xe->drm.dev); struct pci_bus *root = pdev->bus; resource_size_t current_size; @@ -66,6 +66,9 @@ static void resize_vram_bar(struct xe_device *xe) if (!bar_size_mask) return; + if (force_vram_bar_size < 0) + return; + /* set to a specific size? */ if (force_vram_bar_size) { u32 bar_size_bit; diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 2f833f0d575f..67196baa4249 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -230,6 +230,18 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, + /* Xe2_HPG */ + + { XE_RTP_NAME("16025250150"), + XE_RTP_RULES(GRAPHICS_VERSION(2001)), + XE_RTP_ACTIONS(SET(LSN_VC_REG2, + LSN_LNI_WGT(1) | + LSN_LNE_WGT(1) | + LSN_DIM_X_WGT(1) | + LSN_DIM_Y_WGT(1) | + LSN_DIM_Z_WGT(1))) + }, + /* Xe2_HPM */ { XE_RTP_NAME("16021867713"), diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 9b9e176992a8..9efc5accd43d 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -57,3 +57,5 @@ no_media_l3 MEDIA_VERSION(3000) GRAPHICS_VERSION(1260), GRAPHICS_STEP(A0, B0) 16023105232 GRAPHICS_VERSION_RANGE(2001, 3001) MEDIA_VERSION_RANGE(1301, 3000) +16026508708 GRAPHICS_VERSION_RANGE(1200, 3001) + MEDIA_VERSION_RANGE(1300, 3000) |