diff options
87 files changed, 5355 insertions, 829 deletions
diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c index 82aeb09b3d1b..c795edad1b96 100644 --- a/drivers/base/devcoredump.c +++ b/drivers/base/devcoredump.c @@ -18,9 +18,6 @@ static struct class devcd_class; /* global disable flag, for security purposes */ static bool devcd_disabled; -/* if data isn't read by userspace after 5 minutes then delete it */ -#define DEVCD_TIMEOUT (HZ * 60 * 5) - struct devcd_entry { struct device devcd_dev; void *data; @@ -328,7 +325,8 @@ void dev_coredump_put(struct device *dev) EXPORT_SYMBOL_GPL(dev_coredump_put); /** - * dev_coredumpm - create device coredump with read/free methods + * dev_coredumpm_timeout - create device coredump with read/free methods with a + * custom timeout. * @dev: the struct device for the crashed device * @owner: the module that contains the read/free functions, use %THIS_MODULE * @data: data cookie for the @read/@free functions @@ -336,17 +334,20 @@ EXPORT_SYMBOL_GPL(dev_coredump_put); * @gfp: allocation flags * @read: function to read from the given buffer * @free: function to free the given buffer + * @timeout: time in jiffies to remove coredump * * Creates a new device coredump for the given device. If a previous one hasn't * been read yet, the new coredump is discarded. The data lifetime is determined * by the device coredump framework and when it is no longer needed the @free * function will be called to free the data. */ -void dev_coredumpm(struct device *dev, struct module *owner, - void *data, size_t datalen, gfp_t gfp, - ssize_t (*read)(char *buffer, loff_t offset, size_t count, - void *data, size_t datalen), - void (*free)(void *data)) +void dev_coredumpm_timeout(struct device *dev, struct module *owner, + void *data, size_t datalen, gfp_t gfp, + ssize_t (*read)(char *buffer, loff_t offset, + size_t count, void *data, + size_t datalen), + void (*free)(void *data), + unsigned long timeout) { static atomic_t devcd_count = ATOMIC_INIT(0); struct devcd_entry *devcd; @@ -403,7 +404,7 @@ void dev_coredumpm(struct device *dev, struct module *owner, dev_set_uevent_suppress(&devcd->devcd_dev, false); kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD); INIT_DELAYED_WORK(&devcd->del_wk, devcd_del); - schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT); + schedule_delayed_work(&devcd->del_wk, timeout); mutex_unlock(&devcd->mutex); return; put_device: @@ -414,7 +415,7 @@ void dev_coredumpm(struct device *dev, struct module *owner, free: free(data); } -EXPORT_SYMBOL_GPL(dev_coredumpm); +EXPORT_SYMBOL_GPL(dev_coredumpm_timeout); /** * dev_coredumpsg - create device coredump that uses scatterlist as data diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 63f1e2d1649f..7bbe46a98ff1 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -25,7 +25,6 @@ config DRM_XE select BACKLIGHT_CLASS_DEVICE if ACPI select INPUT if ACPI select ACPI_VIDEO if X86 && ACPI - select ACPI_BUTTON if ACPI select X86_PLATFORM_DEVICES if X86 && ACPI select ACPI_WMI if X86 && ACPI select SYNC_FILE diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index cd076d0c52a5..b1e03bfe4a68 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -24,9 +24,12 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \ $(call cmd,wa_oob) uses_generated_oob := \ + $(obj)/xe_ggtt.o \ $(obj)/xe_gsc.o \ + $(obj)/xe_gt.o \ $(obj)/xe_guc.o \ $(obj)/xe_guc_ads.o \ + $(obj)/xe_guc_pc.o \ $(obj)/xe_migrate.o \ $(obj)/xe_ring_ops.o \ $(obj)/xe_vm.o \ @@ -92,9 +95,11 @@ xe-y += xe_bb.o \ xe_mmio.o \ xe_mocs.o \ xe_module.o \ + xe_oa.o \ xe_pat.o \ xe_pci.o \ xe_pcode.o \ + xe_perf.o \ xe_pm.o \ xe_preempt_fence.o \ xe_pt.o \ @@ -112,6 +117,8 @@ xe-y += xe_bb.o \ xe_tile.o \ xe_tile_sysfs.o \ xe_trace.o \ + xe_trace_bo.o \ + xe_trace_guc.o \ xe_ttm_sys_mgr.o \ xe_ttm_stolen_mgr.o \ xe_ttm_vram_mgr.o \ diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h index 79ba98a169f9..43ad4652c2b2 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h @@ -128,7 +128,6 @@ enum xe_guc_action { XE_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008, XE_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009, XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES = 0x100B, - XE_GUC_ACTION_SETUP_PC_GUCRC = 0x3004, XE_GUC_ACTION_AUTHENTICATE_HUC = 0x4000, XE_GUC_ACTION_GET_HWCONFIG = 0x4100, XE_GUC_ACTION_REGISTER_CONTEXT = 0x4502, @@ -153,11 +152,6 @@ enum xe_guc_action { XE_GUC_ACTION_LIMIT }; -enum xe_guc_rc_options { - XE_GUCRC_HOST_CONTROL, - XE_GUCRC_FIRMWARE_CONTROL, -}; - enum xe_guc_preempt_options { XE_GUC_PREEMPT_OPTION_DROP_WORK_Q = 0x4, XE_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q = 0x8, diff --git a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h index c165e26c0976..85abe4f09ae2 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h @@ -246,4 +246,26 @@ struct slpc_shared_data { #define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC (0xffu << 0) #define HOST2GUC_PC_SLPC_REQUEST_MSG_N_EVENT_DATA_N GUC_HXG_REQUEST_MSG_n_DATAn +/** + * DOC: SETUP_PC_GUCRC + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_FAST_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_SETUP_PC_GUCRC` = 0x3004 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | **MODE** = GUCRC_HOST_CONTROL(0), GUCRC_FIRMWARE_CONTROL(1) | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_ACTION_HOST2GUC_SETUP_PC_GUCRC 0x3004u +#define GUCRC_HOST_CONTROL 0u +#define GUCRC_FIRMWARE_CONTROL 1u + #endif diff --git a/drivers/gpu/drm/xe/abi/guc_errors_abi.h b/drivers/gpu/drm/xe/abi/guc_errors_abi.h index d0b5fed6876f..2c627a21648f 100644 --- a/drivers/gpu/drm/xe/abi/guc_errors_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_errors_abi.h @@ -8,10 +8,41 @@ enum xe_guc_response_status { XE_GUC_RESPONSE_STATUS_SUCCESS = 0x0, + XE_GUC_RESPONSE_ERROR_PROTOCOL = 0x04, + XE_GUC_RESPONSE_INVALID_STATE = 0x0A, + XE_GUC_RESPONSE_UNSUPPORTED_VERSION = 0x0B, + XE_GUC_RESPONSE_INVALID_VFID = 0x0C, + XE_GUC_RESPONSE_UNPROVISIONED_VF = 0x0D, + XE_GUC_RESPONSE_INVALID_EVENT = 0x0E, XE_GUC_RESPONSE_NOT_SUPPORTED = 0x20, + XE_GUC_RESPONSE_UNKNOWN_ACTION = 0x30, + XE_GUC_RESPONSE_ACTION_ABORTED = 0x31, + XE_GUC_RESPONSE_NO_PERMISSION = 0x40, + XE_GUC_RESPONSE_CANNOT_COMPLETE_ACTION = 0x41, + XE_GUC_RESPONSE_INVALID_KLV_DATA = 0x50, + XE_GUC_RESPONSE_INVALID_PARAMS = 0x60, + XE_GUC_RESPONSE_INVALID_BUFFER_RANGE = 0x70, + XE_GUC_RESPONSE_INVALID_BUFFER = 0x71, + XE_GUC_RESPONSE_INVALID_GGTT_ADDRESS = 0x80, + XE_GUC_RESPONSE_PENDING_ACTION = 0x90, + XE_GUC_RESPONSE_INVALID_SIZE = 0x102, + XE_GUC_RESPONSE_MALFORMED_KLV = 0x103, + XE_GUC_RESPONSE_INVALID_KLV_KEY = 0x105, + XE_GUC_RESPONSE_DATA_TOO_LARGE = 0x106, + XE_GUC_RESPONSE_VF_MIGRATED = 0x107, XE_GUC_RESPONSE_NO_ATTRIBUTE_TABLE = 0x201, XE_GUC_RESPONSE_NO_DECRYPTION_KEY = 0x202, XE_GUC_RESPONSE_DECRYPTION_FAILED = 0x204, + XE_GUC_RESPONSE_VGT_DISABLED = 0x300, + XE_GUC_RESPONSE_CTB_FULL = 0x301, + XE_GUC_RESPONSE_VGT_UNAUTHORIZED_REQUEST = 0x302, + XE_GUC_RESPONSE_CTB_INVALID = 0x303, + XE_GUC_RESPONSE_CTB_NOT_REGISTERED = 0x304, + XE_GUC_RESPONSE_CTB_IN_USE = 0x305, + XE_GUC_RESPONSE_CTB_INVALID_DESC = 0x306, + XE_GUC_RESPONSE_CTB_SOURCE_INVALID_DESCRIPTOR = 0x30D, + XE_GUC_RESPONSE_CTB_DESTINATION_INVALID_DESCRIPTOR = 0x30E, + XE_GUC_RESPONSE_INVALID_CONFIG_STATE = 0x30F, XE_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000, }; diff --git a/drivers/gpu/drm/xe/abi/guc_messages_abi.h b/drivers/gpu/drm/xe/abi/guc_messages_abi.h index 534a39db7772..f6ed4dfd215c 100644 --- a/drivers/gpu/drm/xe/abi/guc_messages_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_messages_abi.h @@ -92,6 +92,34 @@ #define GUC_HXG_REQUEST_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD /** + * DOC: HXG Fast Request + * + * The `HXG Request`_ message should be used to initiate asynchronous activity + * for which confirmation or return data is not expected. + * + * If confirmation is required then `HXG Request`_ shall be used instead. + * + * The recipient of this message may only use `HXG Failure`_ message if it was + * unable to accept this request (like invalid data). + * + * Format of `HXG Fast Request`_ message is same as `HXG Request`_ except @TYPE. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN - see `HXG Message`_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = `GUC_HXG_TYPE_FAST_REQUEST`_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 - see `HXG Request`_ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION - see `HXG Request`_ | + * +---+-------+--------------------------------------------------------------+ + * |...| | DATAn - see `HXG Request`_ | + * +---+-------+--------------------------------------------------------------+ + */ + +/** * DOC: HXG Event * * The `HXG Event`_ message should be used to initiate asynchronous activity @@ -220,17 +248,4 @@ #define GUC_HXG_RESPONSE_MSG_0_DATA0 GUC_HXG_MSG_0_AUX #define GUC_HXG_RESPONSE_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD -/* deprecated */ -#define INTEL_GUC_MSG_TYPE_SHIFT 28 -#define INTEL_GUC_MSG_TYPE_MASK (0xF << INTEL_GUC_MSG_TYPE_SHIFT) -#define INTEL_GUC_MSG_DATA_SHIFT 16 -#define INTEL_GUC_MSG_DATA_MASK (0xFFF << INTEL_GUC_MSG_DATA_SHIFT) -#define INTEL_GUC_MSG_CODE_SHIFT 0 -#define INTEL_GUC_MSG_CODE_MASK (0xFFFF << INTEL_GUC_MSG_CODE_SHIFT) - -enum intel_guc_msg_type { - INTEL_GUC_MSG_TYPE_REQUEST = 0x0, - INTEL_GUC_MSG_TYPE_RESPONSE = 0xF, -}; - #endif diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index a2f417209124..d270bcd11686 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -171,7 +171,7 @@ write_ggtt_rotated(struct xe_bo *bo, struct xe_ggtt *ggtt, u32 *ggtt_ofs, u32 bo u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE, xe->pat.idx[XE_CACHE_NONE]); - xe_ggtt_set_pte(ggtt, *ggtt_ofs, pte); + ggtt->pt_ops->ggtt_set_pte(ggtt, *ggtt_ofs, pte); *ggtt_ofs += XE_PAGE_SIZE; src_idx -= src_stride; } @@ -217,7 +217,7 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x, xe->pat.idx[XE_CACHE_NONE]); - xe_ggtt_set_pte(ggtt, vma->node.start + x, pte); + ggtt->pt_ops->ggtt_set_pte(ggtt, vma->node.start + x, pte); } } else { u32 i, ggtt_ofs; diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h index c74ceb550dce..10ec2920d31b 100644 --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h @@ -45,6 +45,7 @@ #define MI_LRI_MMIO_REMAP_EN REG_BIT(17) #define MI_LRI_NUM_REGS(x) XE_INSTR_NUM_DW(2 * (x) + 1) #define MI_LRI_FORCE_POSTED REG_BIT(12) +#define MI_LRI_LEN(x) (((x) & 0xff) + 1) #define MI_FLUSH_DW __MI_INSTR(0x26) #define MI_FLUSH_DW_STORE_INDEX REG_BIT(21) @@ -59,6 +60,10 @@ #define MI_LOAD_REGISTER_MEM (__MI_INSTR(0x29) | XE_INSTR_NUM_DW(4)) #define MI_LRM_USE_GGTT REG_BIT(22) +#define MI_COPY_MEM_MEM (__MI_INSTR(0x2e) | XE_INSTR_NUM_DW(5)) +#define MI_COPY_MEM_MEM_SRC_GGTT REG_BIT(22) +#define MI_COPY_MEM_MEM_DST_GGTT REG_BIT(21) + #define MI_BATCH_BUFFER_START __MI_INSTR(0x31) #endif diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 263ffc7bc2ef..c38db2a74614 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -129,6 +129,8 @@ #define RING_EXECLIST_STATUS_HI(base) XE_REG((base) + 0x234 + 4) #define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244, XE_REG_OPTION_MASKED) +#define CTX_CTRL_OAC_CONTEXT_ENABLE REG_BIT(8) +#define CTX_CTRL_RUN_ALONE REG_BIT(7) #define CTX_CTRL_INDIRECT_RING_STATE_ENABLE REG_BIT(4) #define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3) #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT REG_BIT(0) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 011fbbe00168..d44564bad009 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -170,6 +170,8 @@ #define SQCNT1 XE_REG_MCR(0x8718) #define XELPMP_SQCNT1 XE_REG(0x8718) +#define SQCNT1_PMON_ENABLE REG_BIT(30) +#define SQCNT1_OABPC REG_BIT(29) #define ENFORCE_RAR REG_BIT(23) #define XEHP_SQCM XE_REG_MCR(0x8724) @@ -432,6 +434,7 @@ #define ROW_CHICKEN XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED) #define UGM_BACKUP_MODE REG_BIT(13) #define MDQ_ARBITRATION_MODE REG_BIT(12) +#define STALL_DOP_GATING_DISABLE REG_BIT(5) #define EARLY_EOT_DIS REG_BIT(1) #define ROW_CHICKEN2 XE_REG_MCR(0xe4f4, XE_REG_OPTION_MASKED) @@ -490,9 +493,11 @@ ((ccs) << ((cslice) * CCS_MODE_CSLICE_WIDTH)) #define FORCEWAKE_ACK_GT XE_REG(0x130044) -#define FORCEWAKE_KERNEL BIT(0) -#define FORCEWAKE_USER BIT(1) -#define FORCEWAKE_KERNEL_FALLBACK BIT(15) + +/* Applicable for all FORCEWAKE_DOMAIN and FORCEWAKE_ACK_DOMAIN regs */ +#define FORCEWAKE_KERNEL 0 +#define FORCEWAKE_MT(bit) BIT(bit) +#define FORCEWAKE_MT_MASK(bit) BIT((bit) + 16) #define MTL_MEDIA_PERF_LIMIT_REASONS XE_REG(0x138030) #define MTL_MEDIA_MC6 XE_REG(0x138048) diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h new file mode 100644 index 000000000000..1189f5a540a8 --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __XE_OA_REGS__ +#define __XE_OA_REGS__ + +#define RPM_CONFIG1 XE_REG(0xd04) +#define GT_NOA_ENABLE REG_BIT(9) + +#define EU_PERF_CNTL0 XE_REG(0xe458) +#define EU_PERF_CNTL4 XE_REG(0xe45c) +#define EU_PERF_CNTL1 XE_REG(0xe558) +#define EU_PERF_CNTL5 XE_REG(0xe55c) +#define EU_PERF_CNTL2 XE_REG(0xe658) +#define EU_PERF_CNTL6 XE_REG(0xe65c) +#define EU_PERF_CNTL3 XE_REG(0xe758) + +#define OA_TLB_INV_CR XE_REG(0xceec) + +/* OAR unit */ +#define OAR_OACONTROL XE_REG(0x2960) +#define OAR_OACONTROL_COUNTER_SEL_MASK REG_GENMASK(3, 1) +#define OAR_OACONTROL_COUNTER_ENABLE REG_BIT(0) + +#define OACTXCONTROL(base) XE_REG((base) + 0x360) +#define OAR_OASTATUS XE_REG(0x2968) +#define OA_COUNTER_RESUME REG_BIT(0) + +/* OAG unit */ +#define OAG_OAGLBCTXCTRL XE_REG(0x2b28) +#define OAG_OAGLBCTXCTRL_TIMER_PERIOD_MASK REG_GENMASK(7, 2) +#define OAG_OAGLBCTXCTRL_TIMER_ENABLE REG_BIT(1) +#define OAG_OAGLBCTXCTRL_COUNTER_RESUME REG_BIT(0) + +#define OAG_OAHEADPTR XE_REG(0xdb00) +#define OAG_OAHEADPTR_MASK REG_GENMASK(31, 6) +#define OAG_OATAILPTR XE_REG(0xdb04) +#define OAG_OATAILPTR_MASK REG_GENMASK(31, 6) + +#define OAG_OABUFFER XE_REG(0xdb08) +#define OABUFFER_SIZE_MASK REG_GENMASK(5, 3) +#define OABUFFER_SIZE_128K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 0) +#define OABUFFER_SIZE_256K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 1) +#define OABUFFER_SIZE_512K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 2) +#define OABUFFER_SIZE_1M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 3) +#define OABUFFER_SIZE_2M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 4) +#define OABUFFER_SIZE_4M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 5) +#define OABUFFER_SIZE_8M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 6) +#define OABUFFER_SIZE_16M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 7) +#define OAG_OABUFFER_MEMORY_SELECT REG_BIT(0) /* 0: PPGTT, 1: GGTT */ + +#define OAG_OACONTROL XE_REG(0xdaf4) +#define OAG_OACONTROL_OA_CCS_SELECT_MASK REG_GENMASK(18, 16) +#define OAG_OACONTROL_OA_COUNTER_SEL_MASK REG_GENMASK(4, 2) +#define OAG_OACONTROL_OA_COUNTER_ENABLE REG_BIT(0) +/* Common to all OA units */ +#define OA_OACONTROL_REPORT_BC_MASK REG_GENMASK(9, 9) +#define OA_OACONTROL_COUNTER_SIZE_MASK REG_GENMASK(8, 8) + +#define OAG_OA_DEBUG XE_REG(0xdaf8, XE_REG_OPTION_MASKED) +#define OAG_OA_DEBUG_DISABLE_MMIO_TRG REG_BIT(14) +#define OAG_OA_DEBUG_START_TRIGGER_SCOPE_CONTROL REG_BIT(13) +#define OAG_OA_DEBUG_DISABLE_START_TRG_2_COUNT_QUAL REG_BIT(8) +#define OAG_OA_DEBUG_DISABLE_START_TRG_1_COUNT_QUAL REG_BIT(7) +#define OAG_OA_DEBUG_INCLUDE_CLK_RATIO REG_BIT(6) +#define OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS REG_BIT(5) +#define OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS REG_BIT(1) + +#define OAG_OASTATUS XE_REG(0xdafc) +#define OASTATUS_MMIO_TRG_Q_FULL REG_BIT(6) +#define OASTATUS_COUNTER_OVERFLOW REG_BIT(2) +#define OASTATUS_BUFFER_OVERFLOW REG_BIT(1) +#define OASTATUS_REPORT_LOST REG_BIT(0) +#define OAG_MMIOTRIGGER XE_REG(0xdb1c) +/* OAC unit */ +#define OAC_OACONTROL XE_REG(0x15114) + +/* OAM unit */ +#define OAM_HEAD_POINTER_OFFSET (0x1a0) +#define OAM_TAIL_POINTER_OFFSET (0x1a4) +#define OAM_BUFFER_OFFSET (0x1a8) +#define OAM_CONTEXT_CONTROL_OFFSET (0x1bc) +#define OAM_CONTROL_OFFSET (0x194) +#define OAM_CONTROL_COUNTER_SEL_MASK REG_GENMASK(3, 1) +#define OAM_DEBUG_OFFSET (0x198) +#define OAM_STATUS_OFFSET (0x19c) +#define OAM_MMIO_TRG_OFFSET (0x1d0) + +#define OAM_HEAD_POINTER(base) XE_REG((base) + OAM_HEAD_POINTER_OFFSET) +#define OAM_TAIL_POINTER(base) XE_REG((base) + OAM_TAIL_POINTER_OFFSET) +#define OAM_BUFFER(base) XE_REG((base) + OAM_BUFFER_OFFSET) +#define OAM_CONTEXT_CONTROL(base) XE_REG((base) + OAM_CONTEXT_CONTROL_OFFSET) +#define OAM_CONTROL(base) XE_REG((base) + OAM_CONTROL_OFFSET) +#define OAM_DEBUG(base) XE_REG((base) + OAM_DEBUG_OFFSET) +#define OAM_STATUS(base) XE_REG((base) + OAM_STATUS_OFFSET) +#define OAM_MMIO_TRG(base) XE_REG((base) + OAM_MMIO_TRG_OFFSET) + +#endif diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c index 06759d754783..f217445c246a 100644 --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c @@ -91,6 +91,59 @@ static const struct rtp_test_case cases[] = { }, }, { + .name = "match-or", + .expected_reg = REGULAR_REG1, + .expected_set_bits = REG_BIT(0) | REG_BIT(1) | REG_BIT(2), + .expected_clr_bits = REG_BIT(0) | REG_BIT(1) | REG_BIT(2), + .expected_count = 1, + .entries = (const struct xe_rtp_entry_sr[]) { + { XE_RTP_NAME("first"), + XE_RTP_RULES(FUNC(match_yes), OR, FUNC(match_no)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) + }, + { XE_RTP_NAME("middle"), + XE_RTP_RULES(FUNC(match_no), FUNC(match_no), OR, + FUNC(match_yes), OR, + FUNC(match_no)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(1))) + }, + { XE_RTP_NAME("last"), + XE_RTP_RULES(FUNC(match_no), OR, FUNC(match_yes)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(2))) + }, + { XE_RTP_NAME("no-match"), + XE_RTP_RULES(FUNC(match_no), OR, FUNC(match_no)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(3))) + }, + {} + }, + }, + { + .name = "match-or-xfail", + .expected_reg = REGULAR_REG1, + .expected_count = 0, + .entries = (const struct xe_rtp_entry_sr[]) { + { XE_RTP_NAME("leading-or"), + XE_RTP_RULES(OR, FUNC(match_yes)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) + }, + { XE_RTP_NAME("trailing-or"), + /* + * First condition is match_no, otherwise the failure + * wouldn't really trigger as RTP stops processing as + * soon as it has a matching set of rules + */ + XE_RTP_RULES(FUNC(match_no), OR), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(1))) + }, + { XE_RTP_NAME("no-or-or-yes"), + XE_RTP_RULES(FUNC(match_no), OR, OR, FUNC(match_yes)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(2))) + }, + {} + }, + }, + { .name = "no-match-no-add-multiple-rules", .expected_reg = REGULAR_REG1, .expected_set_bits = REG_BIT(0), @@ -255,9 +308,14 @@ static void xe_rtp_process_tests(struct kunit *test) } KUNIT_EXPECT_EQ(test, count, param->expected_count); - KUNIT_EXPECT_EQ(test, sr_entry->clr_bits, param->expected_clr_bits); - KUNIT_EXPECT_EQ(test, sr_entry->set_bits, param->expected_set_bits); - KUNIT_EXPECT_EQ(test, sr_entry->reg.raw, param->expected_reg.raw); + if (count) { + KUNIT_EXPECT_EQ(test, sr_entry->clr_bits, param->expected_clr_bits); + KUNIT_EXPECT_EQ(test, sr_entry->set_bits, param->expected_set_bits); + KUNIT_EXPECT_EQ(test, sr_entry->reg.raw, param->expected_reg.raw); + } else { + KUNIT_EXPECT_NULL(test, sr_entry); + } + KUNIT_EXPECT_EQ(test, reg_sr->errors, param->expected_sr_errors); } diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 2bae01ce4e5b..65c696966e96 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -25,7 +25,7 @@ #include "xe_pm.h" #include "xe_preempt_fence.h" #include "xe_res_cursor.h" -#include "xe_trace.h" +#include "xe_trace_bo.h" #include "xe_ttm_stolen_mgr.h" #include "xe_vm.h" @@ -378,6 +378,15 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_FLAG_PAGETABLE)) caching = ttm_write_combined; + if (bo->flags & XE_BO_FLAG_NEEDS_UC) { + /* + * Valid only for internally-created buffers only, for + * which cpu_caching is never initialized. + */ + xe_assert(xe, bo->cpu_caching == 0); + caching = ttm_uncached; + } + err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages); if (err) { kfree(tt); diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index d7f2d19a77c1..62c2b10fbf1d 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -53,6 +53,9 @@ #ifdef CONFIG_DEV_COREDUMP +/* 1 hour timeout */ +#define XE_COREDUMP_TIMEOUT_JIFFIES (60 * 60 * HZ) + static struct xe_device *coredump_to_xe(const struct xe_devcoredump *coredump) { return container_of(coredump, struct xe_device, devcoredump); @@ -247,8 +250,9 @@ void xe_devcoredump(struct xe_sched_job *job) drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n", xe->drm.primary->index); - dev_coredumpm(xe->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL, - xe_devcoredump_read, xe_devcoredump_free); + dev_coredumpm_timeout(xe->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL, + xe_devcoredump_read, xe_devcoredump_free, + XE_COREDUMP_TIMEOUT_JIFFIES); } static void xe_driver_devcoredump_fini(void *arg) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 94dbfe5cf19c..cfda7cb5df2c 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -44,6 +44,7 @@ #include "xe_module.h" #include "xe_pat.h" #include "xe_pcode.h" +#include "xe_perf.h" #include "xe_pm.h" #include "xe_query.h" #include "xe_sriov.h" @@ -141,6 +142,7 @@ static const struct drm_ioctl_desc xe_ioctls[] = { DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_PERF, xe_perf_ioctl, DRM_RENDER_ALLOW), }; static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) @@ -484,6 +486,17 @@ static int wait_for_lmem_ready(struct xe_device *xe) return 0; } +static void update_device_info(struct xe_device *xe) +{ + /* disable features that are not available/applicable to VFs */ + if (IS_SRIOV_VF(xe)) { + xe->info.enable_display = 0; + xe->info.has_heci_gscfi = 0; + xe->info.skip_guc_pc = 1; + xe->info.skip_pcode = 1; + } +} + /** * xe_device_probe_early: Device early probe * @xe: xe device instance @@ -504,6 +517,8 @@ int xe_device_probe_early(struct xe_device *xe) xe_sriov_probe_early(xe); + update_device_info(xe); + err = xe_pcode_probe_early(xe); if (err) return err; @@ -619,16 +634,16 @@ int xe_device_probe(struct xe_device *xe) err = xe_device_set_has_flat_ccs(xe); if (err) - goto err_irq_shutdown; + goto err; err = xe_vram_probe(xe); if (err) - goto err_irq_shutdown; + goto err; for_each_tile(tile, xe, id) { err = xe_tile_init_noalloc(tile); if (err) - goto err_irq_shutdown; + goto err; } /* Allocate and map stolen after potential VRAM resize */ @@ -642,7 +657,7 @@ int xe_device_probe(struct xe_device *xe) */ err = xe_display_init_noaccel(xe); if (err) - goto err_irq_shutdown; + goto err; for_each_gt(gt, xe, id) { last_gt = id; @@ -654,25 +669,37 @@ int xe_device_probe(struct xe_device *xe) xe_heci_gsc_init(xe); - err = xe_display_init(xe); + err = xe_oa_init(xe); if (err) goto err_fini_gt; + err = xe_display_init(xe); + if (err) + goto err_fini_oa; + err = drm_dev_register(&xe->drm, 0); if (err) goto err_fini_display; xe_display_register(xe); + xe_oa_register(xe); + xe_debugfs_register(xe); xe_hwmon_register(xe); + for_each_gt(gt, xe, id) + xe_gt_sanitize_freq(gt); + return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe); err_fini_display: xe_display_driver_remove(xe); +err_fini_oa: + xe_oa_fini(xe); + err_fini_gt: for_each_gt(gt, xe, id) { if (id < last_gt) @@ -681,8 +708,6 @@ err_fini_gt: break; } -err_irq_shutdown: - xe_irq_shutdown(xe); err: xe_display_fini(xe); return err; @@ -701,16 +726,18 @@ void xe_device_remove(struct xe_device *xe) struct xe_gt *gt; u8 id; + xe_oa_unregister(xe); + xe_device_remove_display(xe); xe_display_fini(xe); + xe_oa_fini(xe); + xe_heci_gsc_fini(xe); for_each_gt(gt, xe, id) xe_gt_remove(gt); - - xe_irq_shutdown(xe); } void xe_device_shutdown(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index f1c09824b145..c37be471d11c 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -17,6 +17,7 @@ #include "xe_gt_types.h" #include "xe_lmtt_types.h" #include "xe_memirq_types.h" +#include "xe_oa.h" #include "xe_platform_types.h" #include "xe_pt_types.h" #include "xe_sriov_types.h" @@ -462,6 +463,9 @@ struct xe_device { /** @heci_gsc: graphics security controller */ struct xe_heci_gsc heci_gsc; + /** @oa: oa perf counter subsystem */ + struct xe_oa oa; + /** @needs_flr_on_fini: requests function-reset on fini */ bool needs_flr_on_fini; diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 97eeb973e897..2d72cdec3a0b 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -141,7 +141,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) q->width != args->num_batch_buffer)) return -EINVAL; - if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_BANNED)) { + if (XE_IOCTL_DBG(xe, q->ops->reset_status(q))) { err = -ECANCELED; goto err_exec_queue; } @@ -259,9 +259,9 @@ retry: /* Wait behind rebinds */ if (!xe_vm_in_lr_mode(vm)) { - err = drm_sched_job_add_resv_dependencies(&job->drm, - xe_vm_resv(vm), - DMA_RESV_USAGE_KERNEL); + err = xe_sched_job_add_deps(job, + xe_vm_resv(vm), + DMA_RESV_USAGE_KERNEL); if (err) goto err_put_job; } diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 27215075c799..0ba37835849b 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -67,7 +67,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, q->fence_irq = >->fence_irq[hwe->class]; q->ring_ops = gt->ring_ops[hwe->class]; q->ops = gt->exec_queue_ops; - INIT_LIST_HEAD(&q->compute.link); + INIT_LIST_HEAD(&q->lr.link); INIT_LIST_HEAD(&q->multi_gt_link); q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; @@ -633,8 +633,8 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, return PTR_ERR(q); if (xe_vm_in_preempt_fence_mode(vm)) { - q->compute.context = dma_fence_context_alloc(1); - spin_lock_init(&q->compute.lock); + q->lr.context = dma_fence_context_alloc(1); + spin_lock_init(&q->lr.lock); err = xe_vm_add_compute_exec_queue(vm, q); if (XE_IOCTL_DBG(xe, err)) @@ -677,7 +677,7 @@ int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, switch (args->property) { case DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN: - args->value = !!(q->flags & EXEC_QUEUE_FLAG_BANNED); + args->value = q->ops->reset_status(q); ret = 0; break; default: diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 18d8b2a60928..201588ec33c3 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -70,18 +70,16 @@ struct xe_exec_queue { */ struct dma_fence *last_fence; -/* queue no longer allowed to submit */ -#define EXEC_QUEUE_FLAG_BANNED BIT(0) /* queue used for kernel submission only */ -#define EXEC_QUEUE_FLAG_KERNEL BIT(1) +#define EXEC_QUEUE_FLAG_KERNEL BIT(0) /* kernel engine only destroyed at driver unload */ -#define EXEC_QUEUE_FLAG_PERMANENT BIT(2) +#define EXEC_QUEUE_FLAG_PERMANENT BIT(1) /* for VM jobs. Caller needs to hold rpm ref when creating queue with this flag */ -#define EXEC_QUEUE_FLAG_VM BIT(3) +#define EXEC_QUEUE_FLAG_VM BIT(2) /* child of VM queue for multi-tile VM jobs */ -#define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(4) +#define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(3) /* kernel exec_queue only, set priority to highest level */ -#define EXEC_QUEUE_FLAG_HIGH_PRIORITY BIT(5) +#define EXEC_QUEUE_FLAG_HIGH_PRIORITY BIT(4) /** * @flags: flags for this exec queue, should statically setup aside from ban @@ -115,19 +113,19 @@ struct xe_exec_queue { enum xe_exec_queue_priority priority; } sched_props; - /** @compute: compute exec queue state */ + /** @lr: long-running exec queue state */ struct { - /** @compute.pfence: preemption fence */ + /** @lr.pfence: preemption fence */ struct dma_fence *pfence; - /** @compute.context: preemption fence context */ + /** @lr.context: preemption fence context */ u64 context; - /** @compute.seqno: preemption fence seqno */ + /** @lr.seqno: preemption fence seqno */ u32 seqno; - /** @compute.link: link into VM's list of exec queues */ + /** @lr.link: link into VM's list of exec queues */ struct list_head link; - /** @compute.lock: preemption fences lock */ + /** @lr.lock: preemption fences lock */ spinlock_t lock; - } compute; + } lr; /** @ops: submission backend exec queue operations */ const struct xe_exec_queue_ops *ops; diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index 9bbe8a5040da..b263fff15273 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -10,31 +10,26 @@ #include "regs/xe_gt_regs.h" #include "regs/xe_reg_defs.h" #include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_mmio.h" +#include "xe_sriov.h" #define XE_FORCE_WAKE_ACK_TIMEOUT_MS 50 -static struct xe_gt * -fw_to_gt(struct xe_force_wake *fw) +static const char *str_wake_sleep(bool wake) { - return fw->gt; -} - -static struct xe_device * -fw_to_xe(struct xe_force_wake *fw) -{ - return gt_to_xe(fw_to_gt(fw)); + return wake ? "wake" : "sleep"; } static void domain_init(struct xe_force_wake_domain *domain, enum xe_force_wake_domain_id id, - struct xe_reg reg, struct xe_reg ack, u32 val, u32 mask) + struct xe_reg reg, struct xe_reg ack) { domain->id = id; domain->reg_ctl = reg; domain->reg_ack = ack; - domain->val = val; - domain->mask = mask; + domain->val = FORCEWAKE_MT(FORCEWAKE_KERNEL); + domain->mask = FORCEWAKE_MT_MASK(FORCEWAKE_KERNEL); } void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw) @@ -51,14 +46,12 @@ void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw) domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT], XE_FW_DOMAIN_ID_GT, FORCEWAKE_GT, - FORCEWAKE_ACK_GT_MTL, - BIT(0), BIT(16)); + FORCEWAKE_ACK_GT_MTL); } else { domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT], XE_FW_DOMAIN_ID_GT, FORCEWAKE_GT, - FORCEWAKE_ACK_GT, - BIT(0), BIT(16)); + FORCEWAKE_ACK_GT); } } @@ -73,8 +66,7 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) domain_init(&fw->domains[XE_FW_DOMAIN_ID_RENDER], XE_FW_DOMAIN_ID_RENDER, FORCEWAKE_RENDER, - FORCEWAKE_ACK_RENDER, - BIT(0), BIT(16)); + FORCEWAKE_ACK_RENDER); for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { if (!(gt->info.engine_mask & BIT(i))) @@ -83,8 +75,7 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j], XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j, FORCEWAKE_MEDIA_VDBOX(j), - FORCEWAKE_ACK_MEDIA_VDBOX(j), - BIT(0), BIT(16)); + FORCEWAKE_ACK_MEDIA_VDBOX(j)); } for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) { @@ -94,42 +85,63 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j], XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j, FORCEWAKE_MEDIA_VEBOX(j), - FORCEWAKE_ACK_MEDIA_VEBOX(j), - BIT(0), BIT(16)); + FORCEWAKE_ACK_MEDIA_VEBOX(j)); } if (gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0)) domain_init(&fw->domains[XE_FW_DOMAIN_ID_GSC], XE_FW_DOMAIN_ID_GSC, FORCEWAKE_GSC, - FORCEWAKE_ACK_GSC, - BIT(0), BIT(16)); + FORCEWAKE_ACK_GSC); +} + +static void __domain_ctl(struct xe_gt *gt, struct xe_force_wake_domain *domain, bool wake) +{ + if (IS_SRIOV_VF(gt_to_xe(gt))) + return; + + xe_mmio_write32(gt, domain->reg_ctl, domain->mask | (wake ? domain->val : 0)); +} + +static int __domain_wait(struct xe_gt *gt, struct xe_force_wake_domain *domain, bool wake) +{ + u32 value; + int ret; + + if (IS_SRIOV_VF(gt_to_xe(gt))) + return 0; + + ret = xe_mmio_wait32(gt, domain->reg_ack, domain->val, wake ? domain->val : 0, + XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC, + &value, true); + if (ret) + xe_gt_notice(gt, "Force wake domain %d failed to ack %s (%pe) reg[%#x] = %#x\n", + domain->id, str_wake_sleep(wake), ERR_PTR(ret), + domain->reg_ack.addr, value); + + return ret; } static void domain_wake(struct xe_gt *gt, struct xe_force_wake_domain *domain) { - xe_mmio_write32(gt, domain->reg_ctl, domain->mask | domain->val); + __domain_ctl(gt, domain, true); } static int domain_wake_wait(struct xe_gt *gt, struct xe_force_wake_domain *domain) { - return xe_mmio_wait32(gt, domain->reg_ack, domain->val, domain->val, - XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC, - NULL, true); + return __domain_wait(gt, domain, true); } static void domain_sleep(struct xe_gt *gt, struct xe_force_wake_domain *domain) { - xe_mmio_write32(gt, domain->reg_ctl, domain->mask); + __domain_ctl(gt, domain, false); } static int domain_sleep_wait(struct xe_gt *gt, struct xe_force_wake_domain *domain) { - return xe_mmio_wait32(gt, domain->reg_ack, domain->val, 0, - XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC, - NULL, true); + return __domain_wait(gt, domain, false); } #define for_each_fw_domain_masked(domain__, mask__, fw__, tmp__) \ @@ -141,12 +153,11 @@ static int domain_sleep_wait(struct xe_gt *gt, int xe_force_wake_get(struct xe_force_wake *fw, enum xe_force_wake_domains domains) { - struct xe_device *xe = fw_to_xe(fw); - struct xe_gt *gt = fw_to_gt(fw); + struct xe_gt *gt = fw->gt; struct xe_force_wake_domain *domain; enum xe_force_wake_domains tmp, woken = 0; unsigned long flags; - int ret, ret2 = 0; + int ret = 0; spin_lock_irqsave(&fw->lock, flags); for_each_fw_domain_masked(domain, domains, fw, tmp) { @@ -156,27 +167,22 @@ int xe_force_wake_get(struct xe_force_wake *fw, } } for_each_fw_domain_masked(domain, woken, fw, tmp) { - ret = domain_wake_wait(gt, domain); - ret2 |= ret; - if (ret) - drm_notice(&xe->drm, "Force wake domain (%d) failed to ack wake, ret=%d\n", - domain->id, ret); + ret |= domain_wake_wait(gt, domain); } fw->awake_domains |= woken; spin_unlock_irqrestore(&fw->lock, flags); - return ret2; + return ret; } int xe_force_wake_put(struct xe_force_wake *fw, enum xe_force_wake_domains domains) { - struct xe_device *xe = fw_to_xe(fw); - struct xe_gt *gt = fw_to_gt(fw); + struct xe_gt *gt = fw->gt; struct xe_force_wake_domain *domain; enum xe_force_wake_domains tmp, sleep = 0; unsigned long flags; - int ret, ret2 = 0; + int ret = 0; spin_lock_irqsave(&fw->lock, flags); for_each_fw_domain_masked(domain, domains, fw, tmp) { @@ -186,14 +192,10 @@ int xe_force_wake_put(struct xe_force_wake *fw, } } for_each_fw_domain_masked(domain, sleep, fw, tmp) { - ret = domain_sleep_wait(gt, domain); - ret2 |= ret; - if (ret) - drm_notice(&xe->drm, "Force wake domain (%d) failed to ack sleep, ret=%d\n", - domain->id, ret); + ret |= domain_sleep_wait(gt, domain); } fw->awake_domains &= ~sleep; spin_unlock_irqrestore(&fw->lock, flags); - return ret2; + return ret; } diff --git a/drivers/gpu/drm/xe/xe_force_wake.h b/drivers/gpu/drm/xe/xe_force_wake.h index 83cb157da7cc..a2577672f4e3 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.h +++ b/drivers/gpu/drm/xe/xe_force_wake.h @@ -24,14 +24,25 @@ static inline int xe_force_wake_ref(struct xe_force_wake *fw, enum xe_force_wake_domains domain) { - xe_gt_assert(fw->gt, domain); + xe_gt_assert(fw->gt, domain != XE_FORCEWAKE_ALL); return fw->domains[ffs(domain) - 1].ref; } +/** + * xe_force_wake_assert_held - asserts domain is awake + * @fw : xe_force_wake structure + * @domain: xe_force_wake_domains apart from XE_FORCEWAKE_ALL + * + * xe_force_wake_assert_held() is designed to confirm a particular + * forcewake domain's wakefulness; it doesn't verify the wakefulness of + * multiple domains. Make sure the caller doesn't input multiple + * domains(XE_FORCEWAKE_ALL) as a parameter. + */ static inline void xe_force_wake_assert_held(struct xe_force_wake *fw, enum xe_force_wake_domains domain) { + xe_gt_assert(fw->gt, domain != XE_FORCEWAKE_ALL); xe_gt_assert(fw->gt, fw->awake_domains & domain); } diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 8ff91fd1b7c8..883cfc7f98a8 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -11,6 +11,7 @@ #include <drm/drm_drv.h> #include <drm/drm_managed.h> #include <drm/intel/i915_drm.h> +#include <generated/xe_wa_oob.h> #include "regs/xe_gt_regs.h" #include "regs/xe_gtt_defs.h" @@ -23,8 +24,10 @@ #include "xe_gt_sriov_vf.h" #include "xe_gt_tlb_invalidation.h" #include "xe_map.h" +#include "xe_mmio.h" #include "xe_pm.h" #include "xe_sriov.h" +#include "xe_wa.h" #include "xe_wopcm.h" static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, @@ -69,7 +72,22 @@ static unsigned int probe_gsm_size(struct pci_dev *pdev) return ggms ? SZ_1M << ggms : 0; } -void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte) +static void ggtt_update_access_counter(struct xe_ggtt *ggtt) +{ + /* + * Wa_22019338487: GMD_ID is a RO register, a dummy write forces gunit + * to wait for completion of prior GTT writes before letting this through. + * This needs to be done for all GGTT writes originating from the CPU. + */ + lockdep_assert_held(&ggtt->lock); + + if ((++ggtt->access_count % 63) == 0) { + xe_mmio_write32(ggtt->tile->media_gt, GMD_ID, 0x0); + ggtt->access_count = 0; + } +} + +static void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte) { xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK)); xe_tile_assert(ggtt->tile, addr < ggtt->size); @@ -77,6 +95,12 @@ void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte) writeq(pte, &ggtt->gsm[addr >> XE_PTE_SHIFT]); } +static void xe_ggtt_set_pte_and_flush(struct xe_ggtt *ggtt, u64 addr, u64 pte) +{ + xe_ggtt_set_pte(ggtt, addr, pte); + ggtt_update_access_counter(ggtt); +} + static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) { u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB]; @@ -92,7 +116,7 @@ static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) scratch_pte = 0; while (start < end) { - xe_ggtt_set_pte(ggtt, start, scratch_pte); + ggtt->pt_ops->ggtt_set_pte(ggtt, start, scratch_pte); start += XE_PAGE_SIZE; } } @@ -124,10 +148,17 @@ static void primelockdep(struct xe_ggtt *ggtt) static const struct xe_ggtt_pt_ops xelp_pt_ops = { .pte_encode_bo = xelp_ggtt_pte_encode_bo, + .ggtt_set_pte = xe_ggtt_set_pte, }; static const struct xe_ggtt_pt_ops xelpg_pt_ops = { .pte_encode_bo = xelpg_ggtt_pte_encode_bo, + .ggtt_set_pte = xe_ggtt_set_pte, +}; + +static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = { + .pte_encode_bo = xelpg_ggtt_pte_encode_bo, + .ggtt_set_pte = xe_ggtt_set_pte_and_flush, }; /* @@ -187,7 +218,8 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) ggtt->size = GUC_GGTT_TOP; if (GRAPHICS_VERx100(xe) >= 1270) - ggtt->pt_ops = &xelpg_pt_ops; + ggtt->pt_ops = ggtt->tile->media_gt && XE_WA(ggtt->tile->media_gt, 22019338487) ? + &xelpg_pt_wa_ops : &xelpg_pt_ops; else ggtt->pt_ops = &xelp_pt_ops; @@ -394,7 +426,7 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) { pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index); - xe_ggtt_set_pte(ggtt, start + offset, pte); + ggtt->pt_ops->ggtt_set_pte(ggtt, start + offset, pte); } } @@ -502,7 +534,7 @@ static void xe_ggtt_assign_locked(struct xe_ggtt *ggtt, const struct drm_mm_node return; while (start < end) { - xe_ggtt_set_pte(ggtt, start, pte); + ggtt->pt_ops->ggtt_set_pte(ggtt, start, pte); start += XE_PAGE_SIZE; } diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 4a41a1762358..6a96fd54bf60 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -10,7 +10,6 @@ struct drm_printer; -void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte); int xe_ggtt_init_early(struct xe_ggtt *ggtt); int xe_ggtt_init(struct xe_ggtt *ggtt); void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix); diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h index d8c584d9a8c3..2245d88d8f39 100644 --- a/drivers/gpu/drm/xe/xe_ggtt_types.h +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h @@ -13,10 +13,6 @@ struct xe_bo; struct xe_gt; -struct xe_ggtt_pt_ops { - u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index); -}; - struct xe_ggtt { struct xe_tile *tile; @@ -34,6 +30,14 @@ struct xe_ggtt { const struct xe_ggtt_pt_ops *pt_ops; struct drm_mm mm; + + /** @access_count: counts GGTT writes */ + unsigned int access_count; +}; + +struct xe_ggtt_pt_ops { + u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index); + void (*ggtt_set_pte)(struct xe_ggtt *ggtt, u64 addr, u64 pte); }; #endif diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 80a61934decc..f8239a13fa2b 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -22,6 +22,7 @@ #include "xe_gt.h" #include "xe_gt_mcr.h" #include "xe_gt_printk.h" +#include "xe_guc_pc.h" #include "xe_huc.h" #include "xe_map.h" #include "xe_mmio.h" @@ -284,6 +285,10 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) return ret; xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); + + /* GSC load is done, restore expected GT frequencies */ + xe_gt_sanitize_freq(gt); + xe_gt_dbg(gt, "GSC FW async load completed\n"); /* HuC auth failure is not fatal */ diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 57d84751e160..759634cff1d8 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -9,6 +9,7 @@ #include <drm/drm_managed.h> #include <drm/xe_drm.h> +#include <generated/xe_wa_oob.h> #include "instructions/xe_gfxpipe_commands.h" #include "instructions/xe_mi_commands.h" @@ -54,6 +55,7 @@ #include "xe_sriov.h" #include "xe_tuning.h" #include "xe_uc.h" +#include "xe_uc_fw.h" #include "xe_vm.h" #include "xe_wa.h" #include "xe_wopcm.h" @@ -678,6 +680,9 @@ static int do_gt_restart(struct xe_gt *gt) /* Get CCS mode in sync between sw/hw */ xe_gt_apply_ccs_mode(gt); + /* Restore GT freq to expected values */ + xe_gt_sanitize_freq(gt); + return 0; } @@ -801,6 +806,25 @@ err_msg: return err; } +/** + * xe_gt_sanitize_freq() - Restore saved frequencies if necessary. + * @gt: the GT object + * + * Called after driver init/GSC load completes to restore GT frequencies if we + * limited them for any WAs. + */ +int xe_gt_sanitize_freq(struct xe_gt *gt) +{ + int ret = 0; + + if ((!xe_uc_fw_is_available(>->uc.gsc.fw) || + xe_uc_fw_is_loaded(>->uc.gsc.fw)) && + XE_WA(gt, 22019338487)) + ret = xe_guc_pc_restore_stashed_freq(>->uc.guc.pc); + + return ret; +} + int xe_gt_resume(struct xe_gt *gt) { int err; diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 9073ac68a777..1123fdfc4ebc 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -56,6 +56,7 @@ int xe_gt_suspend(struct xe_gt *gt); int xe_gt_resume(struct xe_gt *gt); void xe_gt_reset_async(struct xe_gt *gt); void xe_gt_sanitize(struct xe_gt *gt); +int xe_gt_sanitize_freq(struct xe_gt *gt); void xe_gt_remove(struct xe_gt *gt); /** diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c index 5d4cdbd69bc3..d2e4dc3aaf61 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c @@ -12,6 +12,7 @@ #include "xe_gt_printk.h" #include "xe_gt_sysfs.h" #include "xe_mmio.h" +#include "xe_sriov.h" static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines) { @@ -75,7 +76,7 @@ static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines) void xe_gt_apply_ccs_mode(struct xe_gt *gt) { - if (!gt->ccs_mode) + if (!gt->ccs_mode || IS_SRIOV_VF(gt_to_xe(gt))) return; __xe_gt_apply_ccs_mode(gt, gt->ccs_mode); @@ -110,6 +111,12 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr, u32 num_engines, num_slices; int ret; + if (IS_SRIOV(xe)) { + xe_gt_dbg(gt, "Can't change compute mode when running as %s\n", + xe_sriov_mode_to_string(xe_device_sriov_mode(xe))); + return -EOPNOTSUPP; + } + ret = kstrtou32(buff, 0, &num_engines); if (ret) return ret; diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c index 9ff2061133df..86c2d62b4bdc 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.c +++ b/drivers/gpu/drm/xe/xe_gt_clock.c @@ -3,6 +3,8 @@ * Copyright © 2022 Intel Corporation */ +#include <linux/math64.h> + #include "xe_gt_clock.h" #include "regs/xe_gt_regs.h" @@ -79,3 +81,21 @@ int xe_gt_clock_init(struct xe_gt *gt) gt->info.reference_clock = freq; return 0; } + +static u64 div_u64_roundup(u64 n, u32 d) +{ + return div_u64(n + d - 1, d); +} + +/** + * xe_gt_clock_interval_to_ms - Convert sampled GT clock ticks to msec + * + * @gt: the &xe_gt + * @count: count of GT clock ticks + * + * Returns: time in msec + */ +u64 xe_gt_clock_interval_to_ms(struct xe_gt *gt, u64 count) +{ + return div_u64_roundup(count * MSEC_PER_SEC, gt->info.reference_clock); +} diff --git a/drivers/gpu/drm/xe/xe_gt_clock.h b/drivers/gpu/drm/xe/xe_gt_clock.h index 44fa0371b973..3adeb7baaca4 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.h +++ b/drivers/gpu/drm/xe/xe_gt_clock.h @@ -11,5 +11,6 @@ struct xe_gt; int xe_gt_clock_init(struct xe_gt *gt); +u64 xe_gt_clock_interval_to_ms(struct xe_gt *gt, u64 count); #endif diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index 5d6181117ab2..67aba4140510 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -15,6 +15,7 @@ #include "xe_macros.h" #include "xe_mmio.h" #include "xe_pm.h" +#include "xe_sriov.h" /** * DOC: Xe GT Idle @@ -100,6 +101,9 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) u32 pg_enable; int i, j; + if (IS_SRIOV_VF(xe)) + return; + /* Disable CPG for PVC */ if (xe->info.platform == XE_PVC) return; @@ -130,6 +134,9 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) void xe_gt_idle_disable_pg(struct xe_gt *gt) { + if (IS_SRIOV_VF(gt_to_xe(gt))) + return; + xe_device_assert_mem_access(gt_to_xe(gt)); XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); @@ -214,6 +221,9 @@ int xe_gt_idle_init(struct xe_gt_idle *gtidle) struct kobject *kobj; int err; + if (IS_SRIOV_VF(xe)) + return 0; + kobj = kobject_create_and_add("gtidle", gt->sysfs); if (!kobj) return -ENOMEM; @@ -246,6 +256,9 @@ void xe_gt_idle_enable_c6(struct xe_gt *gt) xe_device_assert_mem_access(gt_to_xe(gt)); xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + if (IS_SRIOV_VF(gt_to_xe(gt))) + return; + /* Units of 1280 ns for a total of 5s */ xe_mmio_write32(gt, RC_IDLE_HYSTERSIS, 0x3B9ACA); /* Enable RC6 */ @@ -258,6 +271,9 @@ void xe_gt_idle_disable_c6(struct xe_gt *gt) xe_device_assert_mem_access(gt_to_xe(gt)); xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + if (IS_SRIOV_VF(gt_to_xe(gt))) + return; + xe_mmio_write32(gt, RC_CONTROL, 0); xe_mmio_write32(gt, RC_STATE, 0); } diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 040dd142c49c..9292d5468868 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -19,7 +19,7 @@ #include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_migrate.h" -#include "xe_trace.h" +#include "xe_trace_bo.h" #include "xe_vm.h" struct pagefault { @@ -125,126 +125,108 @@ static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, return 0; } -static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) +static int handle_vma_pagefault(struct xe_tile *tile, struct pagefault *pf, + struct xe_vma *vma) { - struct xe_device *xe = gt_to_xe(gt); - struct xe_tile *tile = gt_to_tile(gt); + struct xe_vm *vm = xe_vma_vm(vma); struct drm_exec exec; - struct xe_vm *vm; - struct xe_vma *vma = NULL; struct dma_fence *fence; - bool write_locked; - int ret = 0; + ktime_t end = 0; + int err; bool atomic; - /* SW isn't expected to handle TRTT faults */ - if (pf->trva_fault) - return -EFAULT; - - /* ASID to VM */ - mutex_lock(&xe->usm.lock); - vm = xa_load(&xe->usm.asid_to_vm, pf->asid); - if (vm && xe_vm_in_fault_mode(vm)) - xe_vm_get(vm); - else - vm = NULL; - mutex_unlock(&xe->usm.lock); - if (!vm) - return -EINVAL; - -retry_userptr: - /* - * TODO: Avoid exclusive lock if VM doesn't have userptrs, or - * start out read-locked? - */ - down_write(&vm->lock); - write_locked = true; - vma = lookup_vma(vm, pf->page_addr); - if (!vma) { - ret = -EINVAL; - goto unlock_vm; - } - - if (!xe_vma_is_userptr(vma) || - !xe_vma_userptr_check_repin(to_userptr_vma(vma))) { - downgrade_write(&vm->lock); - write_locked = false; - } - trace_xe_vma_pagefault(vma); - atomic = access_is_atomic(pf->access_type); /* Check if VMA is valid */ if (vma_is_valid(tile, vma) && !atomic) - goto unlock_vm; - - /* TODO: Validate fault */ + return 0; - if (xe_vma_is_userptr(vma) && write_locked) { +retry_userptr: + if (xe_vma_is_userptr(vma) && + xe_vma_userptr_check_repin(to_userptr_vma(vma))) { struct xe_userptr_vma *uvma = to_userptr_vma(vma); - spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&uvma->userptr.invalidate_link); - spin_unlock(&vm->userptr.invalidated_lock); - - ret = xe_vma_userptr_pin_pages(uvma); - if (ret) - goto unlock_vm; - - downgrade_write(&vm->lock); - write_locked = false; + err = xe_vma_userptr_pin_pages(uvma); + if (err) + return err; } /* Lock VM and BOs dma-resv */ drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { - ret = xe_pf_begin(&exec, vma, atomic, tile->id); + err = xe_pf_begin(&exec, vma, atomic, tile->id); drm_exec_retry_on_contention(&exec); - if (ret) + if (xe_vm_validate_should_retry(&exec, err, &end)) + err = -EAGAIN; + if (err) goto unlock_dma_resv; /* Bind VMA only to the GT that has faulted */ trace_xe_vma_pf_bind(vma); fence = xe_vma_rebind(vm, vma, BIT(tile->id)); if (IS_ERR(fence)) { - ret = PTR_ERR(fence); + err = PTR_ERR(fence); + if (xe_vm_validate_should_retry(&exec, err, &end)) + err = -EAGAIN; goto unlock_dma_resv; } } - /* - * XXX: Should we drop the lock before waiting? This only helps if doing - * GPU binds which is currently only done if we have to wait for more - * than 10ms on a move. - */ dma_fence_wait(fence, false); dma_fence_put(fence); - - if (xe_vma_is_userptr(vma)) - ret = xe_vma_userptr_check_repin(to_userptr_vma(vma)); vma->tile_invalidated &= ~BIT(tile->id); unlock_dma_resv: drm_exec_fini(&exec); -unlock_vm: - if (!ret) - vm->usm.last_fault_vma = vma; - if (write_locked) - up_write(&vm->lock); - else - up_read(&vm->lock); - if (ret == -EAGAIN) + if (err == -EAGAIN) goto retry_userptr; - if (!ret) { - ret = xe_gt_tlb_invalidation_vma(gt, NULL, vma); - if (ret >= 0) - ret = 0; + return err; +} + +static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_vm *vm; + struct xe_vma *vma = NULL; + int err; + + /* SW isn't expected to handle TRTT faults */ + if (pf->trva_fault) + return -EFAULT; + + /* ASID to VM */ + mutex_lock(&xe->usm.lock); + vm = xa_load(&xe->usm.asid_to_vm, pf->asid); + if (vm && xe_vm_in_fault_mode(vm)) + xe_vm_get(vm); + else + vm = NULL; + mutex_unlock(&xe->usm.lock); + if (!vm) + return -EINVAL; + + /* + * TODO: Change to read lock? Using write lock for simplicity. + */ + down_write(&vm->lock); + vma = lookup_vma(vm, pf->page_addr); + if (!vma) { + err = -EINVAL; + goto unlock_vm; } + + err = handle_vma_pagefault(tile, pf, vma); + +unlock_vm: + if (!err) + vm->usm.last_fault_vma = vma; + up_write(&vm->lock); xe_vm_put(vm); - return ret; + return err; } static int send_pagefault_reply(struct xe_guc *guc, diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index f49fc2917f93..694671497f6e 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -1290,6 +1290,9 @@ static void pf_reset_vf_lmtt(struct xe_device *xe, unsigned int vfid) struct xe_tile *tile; unsigned int tid; + xe_assert(xe, IS_DGFX(xe)); + xe_assert(xe, IS_SRIOV_PF(xe)); + for_each_tile(tile, xe, tid) { lmtt = &tile->sriov.pf.lmtt; xe_lmtt_drop_pages(lmtt, vfid); @@ -1308,6 +1311,9 @@ static int pf_update_vf_lmtt(struct xe_device *xe, unsigned int vfid) unsigned int tid; int err; + xe_assert(xe, IS_DGFX(xe)); + xe_assert(xe, IS_SRIOV_PF(xe)); + total = 0; for_each_tile(tile, xe, tid) total += pf_get_vf_config_lmem(tile->primary_gt, vfid); @@ -1353,6 +1359,7 @@ fail: static void pf_release_vf_config_lmem(struct xe_gt *gt, struct xe_gt_sriov_config *config) { + xe_gt_assert(gt, IS_DGFX(gt_to_xe(gt))); xe_gt_assert(gt, !xe_gt_is_media_type(gt)); lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); @@ -1371,6 +1378,7 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) int err; xe_gt_assert(gt, vfid); + xe_gt_assert(gt, IS_DGFX(xe)); xe_gt_assert(gt, !xe_gt_is_media_type(gt)); size = round_up(size, pf_get_lmem_alignment(gt)); @@ -1838,11 +1846,14 @@ u32 xe_gt_sriov_pf_config_get_threshold(struct xe_gt *gt, unsigned int vfid, static void pf_release_vf_config(struct xe_gt *gt, unsigned int vfid) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + struct xe_device *xe = gt_to_xe(gt); if (!xe_gt_is_media_type(gt)) { pf_release_vf_config_ggtt(gt, config); - pf_release_vf_config_lmem(gt, config); - pf_update_vf_lmtt(gt_to_xe(gt), vfid); + if (IS_DGFX(xe)) { + pf_release_vf_config_lmem(gt, config); + pf_update_vf_lmtt(xe, vfid); + } } pf_release_config_ctxs(gt, config); pf_release_config_dbs(gt, config); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c index 40b8f881fe04..ebf06e037750 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c @@ -130,6 +130,27 @@ int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid) } /** + * xe_gt_sriov_pf_control_trigger_flr - Start a VF FLR sequence. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid) +{ + int err; + + /* XXX pf_send_vf_flr_start() expects ct->lock */ + mutex_lock(>->uc.guc.ct.lock); + err = pf_send_vf_flr_start(gt, vfid); + mutex_unlock(>->uc.guc.ct.lock); + + return err; +} + +/** * DOC: The VF FLR Flow with GuC * * PF GUC PCI diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h index 850a3e37661f..405d1586f991 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h @@ -14,6 +14,7 @@ struct xe_gt; int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid); #ifdef CONFIG_PCI_IOV int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len); diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 105797776a6c..e1f1ccb01143 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -13,6 +13,7 @@ #include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_mmio.h" +#include "xe_sriov.h" #include "xe_trace.h" #include "regs/xe_guc_regs.h" @@ -22,6 +23,7 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work) { struct xe_gt *gt = container_of(work, struct xe_gt, tlb_invalidation.fence_tdr.work); + struct xe_device *xe = gt_to_xe(gt); struct xe_gt_tlb_invalidation_fence *fence, *next; spin_lock_irq(>->tlb_invalidation.pending_lock); @@ -33,7 +35,7 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work) if (msecs_to_jiffies(since_inval_ms) < TLB_TIMEOUT) break; - trace_xe_gt_tlb_invalidation_fence_timeout(fence); + trace_xe_gt_tlb_invalidation_fence_timeout(xe, fence); xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d", fence->seqno, gt->tlb_invalidation.seqno_recv); @@ -71,18 +73,18 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt) } static void -__invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence) +__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) { - trace_xe_gt_tlb_invalidation_fence_signal(fence); + trace_xe_gt_tlb_invalidation_fence_signal(xe, fence); dma_fence_signal(&fence->base); dma_fence_put(&fence->base); } static void -invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence) +invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) { list_del(&fence->link); - __invalidation_fence_signal(fence); + __invalidation_fence_signal(xe, fence); } /** @@ -121,7 +123,7 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) list_for_each_entry_safe(fence, next, >->tlb_invalidation.pending_fences, link) - invalidation_fence_signal(fence); + invalidation_fence_signal(gt_to_xe(gt), fence); spin_unlock_irq(>->tlb_invalidation.pending_lock); mutex_unlock(>->uc.guc.ct.lock); } @@ -144,6 +146,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, u32 *action, int len) { struct xe_gt *gt = guc_to_gt(guc); + struct xe_device *xe = gt_to_xe(gt); int seqno; int ret; @@ -157,7 +160,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, seqno = gt->tlb_invalidation.seqno; if (fence) { fence->seqno = seqno; - trace_xe_gt_tlb_invalidation_fence_send(fence); + trace_xe_gt_tlb_invalidation_fence_send(xe, fence); } action[1] = seqno; ret = xe_guc_ct_send_locked(&guc->ct, action, len, @@ -171,7 +174,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, * we can just go ahead and signal the fence here. */ if (tlb_invalidation_seqno_past(gt, seqno)) { - __invalidation_fence_signal(fence); + __invalidation_fence_signal(xe, fence); } else { fence->invalidation_time = ktime_get(); list_add_tail(&fence->link, @@ -184,7 +187,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, } spin_unlock_irq(>->tlb_invalidation.pending_lock); } else if (ret < 0 && fence) { - __invalidation_fence_signal(fence); + __invalidation_fence_signal(xe, fence); } if (!ret) { gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) % @@ -247,6 +250,9 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) xe_gt_tlb_invalidation_wait(gt, seqno); } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { + if (IS_SRIOV_VF(xe)) + return 0; + xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1, @@ -294,7 +300,7 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, /* Execlists not supported */ if (gt_to_xe(gt)->info.force_execlist) { if (fence) - __invalidation_fence_signal(fence); + __invalidation_fence_signal(xe, fence); return 0; } @@ -432,6 +438,7 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) { struct xe_gt *gt = guc_to_gt(guc); + struct xe_device *xe = gt_to_xe(gt); struct xe_gt_tlb_invalidation_fence *fence, *next; unsigned long flags; @@ -468,12 +475,12 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) list_for_each_entry_safe(fence, next, >->tlb_invalidation.pending_fences, link) { - trace_xe_gt_tlb_invalidation_fence_recv(fence); + trace_xe_gt_tlb_invalidation_fence_recv(xe, fence); if (!tlb_invalidation_seqno_past(gt, fence->seqno)) break; - invalidation_fence_signal(fence); + invalidation_fence_signal(xe, fence); } if (!list_empty(>->tlb_invalidation.pending_fences)) diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 10a9a9529377..24bb95de920f 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -12,6 +12,7 @@ #include "xe_gt_sriov_vf_types.h" #include "xe_hw_engine_types.h" #include "xe_hw_fence_types.h" +#include "xe_oa.h" #include "xe_reg_sr_types.h" #include "xe_sa_types.h" #include "xe_uc_types.h" @@ -387,6 +388,9 @@ struct xe_gt { */ u8 instances_per_class[XE_ENGINE_CLASS_MAX]; } user_engines; + + /** @oa: oa perf counter subsystem per gt info */ + struct xe_oa_gt oa; }; #endif diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 0bf6e01b8910..eb655cee19f7 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -476,6 +476,9 @@ static void guc_prepare_xfer(struct xe_guc *guc) xe_mmio_write32(gt, GUC_SHIM_CONTROL, shim_flags); xe_mmio_write32(gt, GT_PM_CONFIG, GT_DOORBELL_ENABLE); + + /* Make sure GuC receives ARAT interrupts */ + xe_mmio_rmw32(gt, PMINTRMSK, ARAT_EXPIRED_INTRMSK, 0); } /* @@ -699,6 +702,9 @@ static int __xe_guc_upload(struct xe_guc *guc) { int ret; + /* Raise GT freq to speed up HuC/GuC load */ + xe_guc_pc_raise_unslice(&guc->pc); + guc_write_params(guc); guc_prepare_xfer(guc); @@ -784,7 +790,6 @@ int xe_guc_min_load_for_hwconfig(struct xe_guc *guc) xe_guc_ads_populate_minimal(&guc->ads); - /* Raise GT freq to speed up HuC/GuC load */ xe_guc_pc_init_early(&guc->pc); ret = __xe_guc_upload(guc); @@ -854,8 +859,6 @@ int xe_guc_enable_communication(struct xe_guc *guc) struct xe_device *xe = guc_to_xe(guc); int err; - guc_enable_irq(guc); - if (IS_SRIOV_VF(xe) && xe_device_has_memirq(xe)) { struct xe_gt *gt = guc_to_gt(guc); struct xe_tile *tile = gt_to_tile(gt); @@ -863,11 +866,10 @@ int xe_guc_enable_communication(struct xe_guc *guc) err = xe_memirq_init_guc(&tile->sriov.vf.memirq, guc); if (err) return err; + } else { + guc_enable_irq(guc); } - xe_mmio_rmw32(guc_to_gt(guc), PMINTRMSK, - ARAT_EXPIRED_INTRMSK, 0); - err = xe_guc_ct_enable(&guc->ct); if (err) return err; @@ -1094,7 +1096,7 @@ void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir) void xe_guc_sanitize(struct xe_guc *guc) { - xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); + xe_uc_fw_sanitize(&guc->fw); xe_guc_ct_disable(&guc->ct); guc->submission_state.enabled = false; } @@ -1111,7 +1113,13 @@ void xe_guc_reset_wait(struct xe_guc *guc) void xe_guc_stop_prepare(struct xe_guc *guc) { - XE_WARN_ON(xe_guc_pc_stop(&guc->pc)); + if (!IS_SRIOV_VF(guc_to_xe(guc))) { + int err; + + err = xe_guc_pc_stop(&guc->pc); + xe_gt_WARN(guc_to_gt(guc), err, "Failed to stop GuC PC: %pe\n", + ERR_PTR(err)); + } } void xe_guc_stop(struct xe_guc *guc) @@ -1123,10 +1131,13 @@ void xe_guc_stop(struct xe_guc *guc) int xe_guc_start(struct xe_guc *guc) { - int ret; + if (!IS_SRIOV_VF(guc_to_xe(guc))) { + int err; - ret = xe_guc_pc_start(&guc->pc); - XE_WARN_ON(ret); + err = xe_guc_pc_start(&guc->pc); + xe_gt_WARN(guc_to_gt(guc), err, "Failed to start GuC PC: %pe\n", + ERR_PTR(err)); + } return xe_guc_submit_start(guc); } diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index c1f258348f5c..873d1bcbedd7 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -29,7 +29,7 @@ #include "xe_guc_submit.h" #include "xe_map.h" #include "xe_pm.h" -#include "xe_trace.h" +#include "xe_trace_guc.h" /* Used when a CT send wants to block and / or receive data */ struct g2h_fence { @@ -126,7 +126,9 @@ static void guc_ct_fini(struct drm_device *drm, void *arg) xa_destroy(&ct->fence_lookup); } +static void receive_g2h(struct xe_guc_ct *ct); static void g2h_worker_func(struct work_struct *w); +static void safe_mode_worker_func(struct work_struct *w); static void primelockdep(struct xe_guc_ct *ct) { @@ -155,6 +157,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) spin_lock_init(&ct->fast_lock); xa_init(&ct->fence_lookup); INIT_WORK(&ct->g2h_worker, g2h_worker_func); + INIT_DELAYED_WORK(&ct->safe_mode_worker, safe_mode_worker_func); init_waitqueue_head(&ct->wq); init_waitqueue_head(&ct->g2h_fence_wq); @@ -321,6 +324,42 @@ static void xe_guc_ct_set_state(struct xe_guc_ct *ct, mutex_unlock(&ct->lock); } +static bool ct_needs_safe_mode(struct xe_guc_ct *ct) +{ + return !pci_dev_msi_enabled(to_pci_dev(ct_to_xe(ct)->drm.dev)); +} + +static bool ct_restart_safe_mode_worker(struct xe_guc_ct *ct) +{ + if (!ct_needs_safe_mode(ct)) + return false; + + queue_delayed_work(ct->g2h_wq, &ct->safe_mode_worker, HZ / 10); + return true; +} + +static void safe_mode_worker_func(struct work_struct *w) +{ + struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, safe_mode_worker.work); + + receive_g2h(ct); + + if (!ct_restart_safe_mode_worker(ct)) + xe_gt_dbg(ct_to_gt(ct), "GuC CT safe-mode canceled\n"); +} + +static void ct_enter_safe_mode(struct xe_guc_ct *ct) +{ + if (ct_restart_safe_mode_worker(ct)) + xe_gt_dbg(ct_to_gt(ct), "GuC CT safe-mode enabled\n"); +} + +static void ct_exit_safe_mode(struct xe_guc_ct *ct) +{ + if (cancel_delayed_work_sync(&ct->safe_mode_worker)) + xe_gt_dbg(ct_to_gt(ct), "GuC CT safe-mode disabled\n"); +} + int xe_guc_ct_enable(struct xe_guc_ct *ct) { struct xe_device *xe = ct_to_xe(ct); @@ -350,6 +389,9 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct) wake_up_all(&ct->wq); xe_gt_dbg(gt, "GuC CT communication channel enabled\n"); + if (ct_needs_safe_mode(ct)) + ct_enter_safe_mode(ct); + return 0; err_out: @@ -373,6 +415,7 @@ static void stop_g2h_handler(struct xe_guc_ct *ct) void xe_guc_ct_disable(struct xe_guc_ct *ct) { xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_DISABLED); + ct_exit_safe_mode(ct); stop_g2h_handler(ct); } @@ -528,7 +571,7 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, /* Update descriptor */ desc_write(xe, h2g, tail, h2g->info.tail); - trace_xe_guc_ctb_h2g(gt->info.id, *(action - 1), full_len, + trace_xe_guc_ctb_h2g(xe, gt->info.id, *(action - 1), full_len, desc_read(xe, h2g, head), h2g->info.tail); return 0; @@ -641,6 +684,7 @@ static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, u32 g2h_len, u32 num_g2h, struct g2h_fence *g2h_fence) { + struct xe_device *xe = ct_to_xe(ct); struct xe_gt *gt = ct_to_gt(ct); struct drm_printer p = xe_gt_info_printer(gt); unsigned int sleep_period_ms = 1; @@ -668,7 +712,7 @@ try_again: if (sleep_period_ms == 1024) goto broken; - trace_xe_guc_ct_h2g_flow_control(h2g->info.head, h2g->info.tail, + trace_xe_guc_ct_h2g_flow_control(xe, h2g->info.head, h2g->info.tail, h2g->info.size, h2g->info.space, len + GUC_CTB_HDR_LEN); @@ -680,7 +724,7 @@ try_again: struct xe_device *xe = ct_to_xe(ct); struct guc_ctb *g2h = &ct->ctbs.g2h; - trace_xe_guc_ct_g2h_flow_control(g2h->info.head, + trace_xe_guc_ct_g2h_flow_control(xe, g2h->info.head, desc_read(xe, g2h, tail), g2h->info.size, g2h->info.space, @@ -833,12 +877,12 @@ retry_same_fence: } if (g2h_fence.retry) { - xe_gt_warn(gt, "H2G retry, action 0x%04x, reason %u", - action[0], g2h_fence.reason); + xe_gt_dbg(gt, "H2G action %#x retrying: reason %#x\n", + action[0], g2h_fence.reason); goto retry; } if (g2h_fence.fail) { - xe_gt_err(gt, "H2G send failed, action 0x%04x, error %d, hint %u", + xe_gt_err(gt, "H2G request %#x failed: error %#x hint %#x\n", action[0], g2h_fence.error, g2h_fence.hint); ret = -EIO; } @@ -1170,8 +1214,8 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) g2h->info.head = (head + avail) % g2h->info.size; desc_write(xe, g2h, head, g2h->info.head); - trace_xe_guc_ctb_g2h(ct_to_gt(ct)->info.id, action, len, - g2h->info.head, tail); + trace_xe_guc_ctb_g2h(xe, ct_to_gt(ct)->info.id, + action, len, g2h->info.head, tail); return len; } @@ -1260,9 +1304,8 @@ static int dequeue_one_g2h(struct xe_guc_ct *ct) return 1; } -static void g2h_worker_func(struct work_struct *w) +static void receive_g2h(struct xe_guc_ct *ct) { - struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, g2h_worker); struct xe_gt *gt = ct_to_gt(ct); bool ongoing; int ret; @@ -1311,6 +1354,13 @@ static void g2h_worker_func(struct work_struct *w) xe_pm_runtime_put(ct_to_xe(ct)); } +static void g2h_worker_func(struct work_struct *w) +{ + struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, g2h_worker); + + receive_g2h(ct); +} + static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb, struct guc_ctb_snapshot *snapshot, bool atomic) diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h index fede4c6e93cb..761cb9031298 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct_types.h +++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h @@ -110,6 +110,8 @@ struct xe_guc_ct { u32 g2h_outstanding; /** @g2h_worker: worker to process G2H messages */ struct work_struct g2h_worker; + /** @safe_mode_worker: worker to check G2H messages with IRQ disabled */ + struct delayed_work safe_mode_worker; /** @state: CT state */ enum xe_guc_ct_state state; /** @fence_seqno: G2H fence seqno - 16 bits used by CT */ diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 508f0d39b4ad..d88f5e960fbd 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -8,8 +8,8 @@ #include <linux/delay.h> #include <drm/drm_managed.h> +#include <generated/xe_wa_oob.h> -#include "abi/guc_actions_abi.h" #include "abi/guc_actions_slpc_abi.h" #include "regs/xe_gt_regs.h" #include "regs/xe_regs.h" @@ -18,12 +18,15 @@ #include "xe_force_wake.h" #include "xe_gt.h" #include "xe_gt_idle.h" -#include "xe_gt_sysfs.h" +#include "xe_gt_printk.h" #include "xe_gt_types.h" +#include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_map.h" #include "xe_mmio.h" #include "xe_pcode.h" +#include "xe_pm.h" +#include "xe_wa.h" #define MCHBAR_MIRROR_BASE_SNB 0x140000 @@ -41,6 +44,8 @@ #define GT_FREQUENCY_MULTIPLIER 50 #define GT_FREQUENCY_SCALER 3 +#define LNL_MERT_FREQ_CAP 800 + /** * DOC: GuC Power Conservation (PC) * @@ -67,29 +72,27 @@ * */ -static struct xe_guc * -pc_to_guc(struct xe_guc_pc *pc) +static struct xe_guc *pc_to_guc(struct xe_guc_pc *pc) { return container_of(pc, struct xe_guc, pc); } -static struct xe_device * -pc_to_xe(struct xe_guc_pc *pc) +static struct xe_guc_ct *pc_to_ct(struct xe_guc_pc *pc) { - struct xe_guc *guc = pc_to_guc(pc); - struct xe_gt *gt = container_of(guc, struct xe_gt, uc.guc); + return &pc_to_guc(pc)->ct; +} - return gt_to_xe(gt); +static struct xe_gt *pc_to_gt(struct xe_guc_pc *pc) +{ + return guc_to_gt(pc_to_guc(pc)); } -static struct xe_gt * -pc_to_gt(struct xe_guc_pc *pc) +static struct xe_device *pc_to_xe(struct xe_guc_pc *pc) { - return container_of(pc, struct xe_gt, uc.guc.pc); + return guc_to_xe(pc_to_guc(pc)); } -static struct iosys_map * -pc_to_maps(struct xe_guc_pc *pc) +static struct iosys_map *pc_to_maps(struct xe_guc_pc *pc) { return &pc->bo->vmap; } @@ -130,32 +133,33 @@ static int wait_for_pc_state(struct xe_guc_pc *pc, static int pc_action_reset(struct xe_guc_pc *pc) { - struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; - int ret; + struct xe_guc_ct *ct = pc_to_ct(pc); u32 action[] = { GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, SLPC_EVENT(SLPC_EVENT_RESET, 2), xe_bo_ggtt_addr(pc->bo), 0, }; + int ret; ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); if (ret) - drm_err(&pc_to_xe(pc)->drm, "GuC PC reset: %pe", ERR_PTR(ret)); + xe_gt_err(pc_to_gt(pc), "GuC PC reset failed: %pe\n", + ERR_PTR(ret)); return ret; } static int pc_action_query_task_state(struct xe_guc_pc *pc) { - struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; - int ret; + struct xe_guc_ct *ct = pc_to_ct(pc); u32 action[] = { GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2), xe_bo_ggtt_addr(pc->bo), 0, }; + int ret; if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) return -EAGAIN; @@ -163,47 +167,68 @@ static int pc_action_query_task_state(struct xe_guc_pc *pc) /* Blocking here to ensure the results are ready before reading them */ ret = xe_guc_ct_send_block(ct, action, ARRAY_SIZE(action)); if (ret) - drm_err(&pc_to_xe(pc)->drm, - "GuC PC query task state failed: %pe", ERR_PTR(ret)); + xe_gt_err(pc_to_gt(pc), "GuC PC query task state failed: %pe\n", + ERR_PTR(ret)); return ret; } static int pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value) { - struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; - int ret; + struct xe_guc_ct *ct = pc_to_ct(pc); u32 action[] = { GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2), id, value, }; + int ret; if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) return -EAGAIN; ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); if (ret) - drm_err(&pc_to_xe(pc)->drm, "GuC PC set param failed: %pe", - ERR_PTR(ret)); + xe_gt_err(pc_to_gt(pc), "GuC PC set param[%u]=%u failed: %pe\n", + id, value, ERR_PTR(ret)); return ret; } -static int pc_action_setup_gucrc(struct xe_guc_pc *pc, u32 mode) +static int pc_action_unset_param(struct xe_guc_pc *pc, u8 id) { + u32 action[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 1), + id, + }; struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; + int ret; + + if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) + return -EAGAIN; + + ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); + if (ret) + xe_gt_err(pc_to_gt(pc), "GuC PC unset param failed: %pe", + ERR_PTR(ret)); + + return ret; +} + +static int pc_action_setup_gucrc(struct xe_guc_pc *pc, u32 mode) +{ + struct xe_guc_ct *ct = pc_to_ct(pc); u32 action[] = { - XE_GUC_ACTION_SETUP_PC_GUCRC, + GUC_ACTION_HOST2GUC_SETUP_PC_GUCRC, mode, }; int ret; ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); if (ret) - drm_err(&pc_to_xe(pc)->drm, "GuC RC enable failed: %pe", - ERR_PTR(ret)); + xe_gt_err(pc_to_gt(pc), "GuC RC enable mode=%u failed: %pe\n", + mode, ERR_PTR(ret)); return ret; } @@ -674,18 +699,39 @@ static void pc_init_fused_rp_values(struct xe_guc_pc *pc) tgl_init_fused_rp_values(pc); } +static u32 pc_max_freq_cap(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + + if (XE_WA(gt, 22019338487)) + return min(LNL_MERT_FREQ_CAP, pc->rp0_freq); + else + return pc->rp0_freq; +} + /** - * xe_guc_pc_init_early - Initialize RPx values and request a higher GT + * xe_guc_pc_raise_unslice - Initialize RPx values and request a higher GT * frequency to allow faster GuC load times * @pc: Xe_GuC_PC instance */ +void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + pc_set_cur_freq(pc, pc_max_freq_cap(pc)); +} + +/** + * xe_guc_pc_init_early - Initialize RPx values + * @pc: Xe_GuC_PC instance + */ void xe_guc_pc_init_early(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); pc_init_fused_rp_values(pc); - pc_set_cur_freq(pc, pc->rp0_freq); } static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) @@ -741,6 +787,53 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc) return ret; } +static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) +{ + int ret = 0; + + if (XE_WA(pc_to_gt(pc), 22019338487)) { + /* + * Get updated min/max and stash them. + */ + ret = xe_guc_pc_get_min_freq(pc, &pc->stashed_min_freq); + if (!ret) + ret = xe_guc_pc_get_max_freq(pc, &pc->stashed_max_freq); + if (ret) + return ret; + + /* + * Ensure min and max are bound by MERT_FREQ_CAP until driver loads. + */ + mutex_lock(&pc->freq_lock); + ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc))); + if (!ret) + ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc))); + mutex_unlock(&pc->freq_lock); + } + + return ret; +} + +/** + * xe_guc_pc_restore_stashed_freq - Set min/max back to stashed values + * @pc: The GuC PC + * + * Returns: 0 on success, + * error code on failure + */ +int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc) +{ + int ret = 0; + + mutex_lock(&pc->freq_lock); + ret = pc_set_max_freq(pc, pc->stashed_max_freq); + if (!ret) + ret = pc_set_min_freq(pc, pc->stashed_min_freq); + mutex_unlock(&pc->freq_lock); + + return ret; +} + /** * xe_guc_pc_gucrc_disable - Disable GuC RC * @pc: Xe_GuC_PC instance @@ -758,7 +851,7 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc) if (xe->info.skip_guc_pc) return 0; - ret = pc_action_setup_gucrc(pc, XE_GUCRC_HOST_CONTROL); + ret = pc_action_setup_gucrc(pc, GUCRC_HOST_CONTROL); if (ret) return ret; @@ -773,6 +866,41 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc) return 0; } +/** + * xe_guc_pc_override_gucrc_mode - override GUCRC mode + * @pc: Xe_GuC_PC instance + * @mode: new value of the mode. + * + * Return: 0 on success, negative error code on error + */ +int xe_guc_pc_override_gucrc_mode(struct xe_guc_pc *pc, enum slpc_gucrc_mode mode) +{ + int ret; + + xe_pm_runtime_get(pc_to_xe(pc)); + ret = pc_action_set_param(pc, SLPC_PARAM_PWRGATE_RC_MODE, mode); + xe_pm_runtime_put(pc_to_xe(pc)); + + return ret; +} + +/** + * xe_guc_pc_unset_gucrc_mode - unset GUCRC mode override + * @pc: Xe_GuC_PC instance + * + * Return: 0 on success, negative error code on error + */ +int xe_guc_pc_unset_gucrc_mode(struct xe_guc_pc *pc) +{ + int ret; + + xe_pm_runtime_get(pc_to_xe(pc)); + ret = pc_action_unset_param(pc, SLPC_PARAM_PWRGATE_RC_MODE); + xe_pm_runtime_put(pc_to_xe(pc)); + + return ret; +} + static void pc_init_pcode_freq(struct xe_guc_pc *pc) { u32 min = DIV_ROUND_CLOSEST(pc->rpn_freq, GT_FREQUENCY_MULTIPLIER); @@ -846,7 +974,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) goto out; if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) { - drm_err(&pc_to_xe(pc)->drm, "GuC PC Start failed\n"); + xe_gt_err(gt, "GuC PC Start failed\n"); ret = -EIO; goto out; } @@ -855,13 +983,17 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) if (ret) goto out; + ret = pc_set_mert_freq_cap(pc); + if (ret) + goto out; + if (xe->info.platform == XE_PVC) { xe_guc_pc_gucrc_disable(pc); ret = 0; goto out; } - ret = pc_action_setup_gucrc(pc, XE_GUCRC_FIRMWARE_CONTROL); + ret = pc_action_setup_gucrc(pc, GUCRC_FIRMWARE_CONTROL); out: XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); @@ -903,6 +1035,10 @@ static void xe_guc_pc_fini_hw(void *arg) XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL)); XE_WARN_ON(xe_guc_pc_gucrc_disable(pc)); XE_WARN_ON(xe_guc_pc_stop(pc)); + + /* Bind requested freq to mert_freq_cap before unload */ + pc_set_cur_freq(pc, min(pc_max_freq_cap(pc), pc->rpe_freq)); + xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL); } diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index 532cac985a6d..efda432fadfc 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -9,11 +9,14 @@ #include <linux/types.h> struct xe_guc_pc; +enum slpc_gucrc_mode; int xe_guc_pc_init(struct xe_guc_pc *pc); int xe_guc_pc_start(struct xe_guc_pc *pc); int xe_guc_pc_stop(struct xe_guc_pc *pc); int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc); +int xe_guc_pc_override_gucrc_mode(struct xe_guc_pc *pc, enum slpc_gucrc_mode mode); +int xe_guc_pc_unset_gucrc_mode(struct xe_guc_pc *pc); u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc); int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq); @@ -29,5 +32,7 @@ enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc); u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc); u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc); void xe_guc_pc_init_early(struct xe_guc_pc *pc); +int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc); +void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc); #endif /* _XE_GUC_PC_H_ */ diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h index 2afd0dbc3542..13810be015db 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h @@ -25,6 +25,10 @@ struct xe_guc_pc { u32 user_requested_min; /** @user_requested_max: Stash the maximum requested freq by user */ u32 user_requested_max; + /** @stashed_min_freq: Stash the current minimum freq */ + u32 stashed_min_freq; + /** @stashed_max_freq: Stash the current maximum freq */ + u32 stashed_max_freq; /** @freq_lock: Let's protect the frequencies */ struct mutex freq_lock; /** @freq_ready: Only handle freq changes, if they are really ready */ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 47aab04cf34f..373447758a60 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -10,6 +10,7 @@ #include <linux/circ_buf.h> #include <linux/delay.h> #include <linux/dma-fence-array.h> +#include <linux/math64.h> #include <drm/drm_managed.h> @@ -23,6 +24,7 @@ #include "xe_force_wake.h" #include "xe_gpu_scheduler.h" #include "xe_gt.h" +#include "xe_gt_clock.h" #include "xe_gt_printk.h" #include "xe_guc.h" #include "xe_guc_ct.h" @@ -61,6 +63,9 @@ exec_queue_to_guc(struct xe_exec_queue *q) #define EXEC_QUEUE_STATE_RESET (1 << 6) #define EXEC_QUEUE_STATE_KILLED (1 << 7) #define EXEC_QUEUE_STATE_WEDGED (1 << 8) +#define EXEC_QUEUE_STATE_BANNED (1 << 9) +#define EXEC_QUEUE_STATE_CHECK_TIMEOUT (1 << 10) +#define EXEC_QUEUE_STATE_EXTRA_REF (1 << 11) static bool exec_queue_registered(struct xe_exec_queue *q) { @@ -134,12 +139,12 @@ static void set_exec_queue_destroyed(struct xe_exec_queue *q) static bool exec_queue_banned(struct xe_exec_queue *q) { - return (q->flags & EXEC_QUEUE_FLAG_BANNED); + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED; } static void set_exec_queue_banned(struct xe_exec_queue *q) { - q->flags |= EXEC_QUEUE_FLAG_BANNED; + atomic_or(EXEC_QUEUE_STATE_BANNED, &q->guc->state); } static bool exec_queue_suspended(struct xe_exec_queue *q) @@ -187,10 +192,36 @@ static void set_exec_queue_wedged(struct xe_exec_queue *q) atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state); } +static bool exec_queue_check_timeout(struct xe_exec_queue *q) +{ + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_CHECK_TIMEOUT; +} + +static void set_exec_queue_check_timeout(struct xe_exec_queue *q) +{ + atomic_or(EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state); +} + +static void clear_exec_queue_check_timeout(struct xe_exec_queue *q) +{ + atomic_and(~EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state); +} + +static bool exec_queue_extra_ref(struct xe_exec_queue *q) +{ + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_EXTRA_REF; +} + +static void set_exec_queue_extra_ref(struct xe_exec_queue *q) +{ + atomic_or(EXEC_QUEUE_STATE_EXTRA_REF, &q->guc->state); +} + static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) { - return exec_queue_banned(q) || (atomic_read(&q->guc->state) & - (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED)); + return (atomic_read(&q->guc->state) & + (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED | + EXEC_QUEUE_STATE_BANNED)); } #ifdef CONFIG_PROVE_LOCKING @@ -918,6 +949,109 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) xe_sched_submission_start(sched); } +#define ADJUST_FIVE_PERCENT(__t) mul_u64_u32_div(__t, 105, 100) + +static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) +{ + struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q)); + u32 ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]); + u32 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); + u32 timeout_ms = q->sched_props.job_timeout_ms; + u32 diff; + u64 running_time_ms; + + /* + * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch + * possible overflows with a high timeout. + */ + xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC); + + if (ctx_timestamp < ctx_job_timestamp) + diff = ctx_timestamp + U32_MAX - ctx_job_timestamp; + else + diff = ctx_timestamp - ctx_job_timestamp; + + /* + * Ensure timeout is within 5% to account for an GuC scheduling latency + */ + running_time_ms = + ADJUST_FIVE_PERCENT(xe_gt_clock_interval_to_ms(gt, diff)); + + xe_gt_dbg(gt, + "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, running_time_ms=%llu, timeout_ms=%u, diff=0x%08x", + xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), + q->guc->id, running_time_ms, timeout_ms, diff); + + return running_time_ms >= timeout_ms; +} + +static void enable_scheduling(struct xe_exec_queue *q) +{ + MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); + struct xe_guc *guc = exec_queue_to_guc(q); + int ret; + + xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); + xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); + xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); + + set_exec_queue_pending_enable(q); + set_exec_queue_enabled(q); + trace_xe_exec_queue_scheduling_enable(q); + + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); + + ret = wait_event_timeout(guc->ct.wq, + !exec_queue_pending_enable(q) || + guc_read_stopped(guc), HZ * 5); + if (!ret || guc_read_stopped(guc)) { + xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond"); + set_exec_queue_banned(q); + xe_gt_reset_async(q->gt); + xe_sched_tdr_queue_imm(&q->guc->sched); + } +} + +static void disable_scheduling(struct xe_exec_queue *q, bool immediate) +{ + MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); + struct xe_guc *guc = exec_queue_to_guc(q); + + xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); + xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); + + if (immediate) + set_min_preemption_timeout(guc, q); + clear_exec_queue_enabled(q); + set_exec_queue_pending_disable(q); + trace_xe_exec_queue_scheduling_disable(q); + + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); +} + +static void __deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) +{ + u32 action[] = { + XE_GUC_ACTION_DEREGISTER_CONTEXT, + q->guc->id, + }; + + xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); + xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); + xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); + + set_exec_queue_destroyed(q); + trace_xe_exec_queue_deregister(q); + + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), + G2H_LEN_DW_DEREGISTER_CONTEXT, 1); +} + static enum drm_gpu_sched_stat guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) { @@ -925,10 +1059,10 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) struct xe_sched_job *tmp_job; struct xe_exec_queue *q = job->q; struct xe_gpu_scheduler *sched = &q->guc->sched; - struct xe_device *xe = guc_to_xe(exec_queue_to_guc(q)); + struct xe_guc *guc = exec_queue_to_guc(q); int err = -ETIME; int i = 0; - bool wedged; + bool wedged, skip_timeout_check; /* * TDR has fired before free job worker. Common if exec queue @@ -940,49 +1074,53 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) return DRM_GPU_SCHED_STAT_NOMINAL; } - drm_notice(&xe->drm, "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx", - xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), - q->guc->id, q->flags); - xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, - "Kernel-submitted job timed out\n"); - xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), - "VM job timed out on non-killed execqueue\n"); - - if (!exec_queue_killed(q)) - xe_devcoredump(job); - - trace_xe_sched_job_timedout(job); - - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); - /* Kill the run_job entry point */ xe_sched_submission_stop(sched); + /* Must check all state after stopping scheduler */ + skip_timeout_check = exec_queue_reset(q) || + exec_queue_killed_or_banned_or_wedged(q) || + exec_queue_destroyed(q); + + /* Job hasn't started, can't be timed out */ + if (!skip_timeout_check && !xe_sched_job_started(job)) + goto rearm; + /* - * Kernel jobs should never fail, nor should VM jobs if they do - * somethings has gone wrong and the GT needs a reset + * XXX: Sampling timeout doesn't work in wedged mode as we have to + * modify scheduling state to read timestamp. We could read the + * timestamp from a register to accumulate current running time but this + * doesn't work for SRIOV. For now assuming timeouts in wedged mode are + * genuine timeouts. */ - if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL || - (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) { - if (!xe_sched_invalidate_job(job, 2)) { - xe_sched_add_pending_job(sched, job); - xe_sched_submission_start(sched); - xe_gt_reset_async(q->gt); - goto out; - } - } + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); - /* Engine state now stable, disable scheduling if needed */ + /* Engine state now stable, disable scheduling to check timestamp */ if (!wedged && exec_queue_registered(q)) { - struct xe_guc *guc = exec_queue_to_guc(q); int ret; if (exec_queue_reset(q)) err = -EIO; - set_exec_queue_banned(q); + if (!exec_queue_destroyed(q)) { - xe_exec_queue_get(q); - disable_scheduling_deregister(guc, q); + /* + * Wait for any pending G2H to flush out before + * modifying state + */ + ret = wait_event_timeout(guc->ct.wq, + !exec_queue_pending_enable(q) || + guc_read_stopped(guc), HZ * 5); + if (!ret || guc_read_stopped(guc)) + goto trigger_reset; + + /* + * Flag communicates to G2H handler that schedule + * disable originated from a timeout check. The G2H then + * avoid triggering cleanup or deregistering the exec + * queue. + */ + set_exec_queue_check_timeout(q); + disable_scheduling(q, skip_timeout_check); } /* @@ -998,15 +1136,60 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) !exec_queue_pending_disable(q) || guc_read_stopped(guc), HZ * 5); if (!ret || guc_read_stopped(guc)) { - drm_warn(&xe->drm, "Schedule disable failed to respond"); - xe_sched_add_pending_job(sched, job); - xe_sched_submission_start(sched); +trigger_reset: + if (!ret) + xe_gt_warn(guc_to_gt(guc), "Schedule disable failed to respond"); + set_exec_queue_extra_ref(q); + xe_exec_queue_get(q); /* GT reset owns this */ + set_exec_queue_banned(q); xe_gt_reset_async(q->gt); xe_sched_tdr_queue_imm(sched); - goto out; + goto rearm; } } + /* + * Check if job is actually timed out, if so restart job execution and TDR + */ + if (!wedged && !skip_timeout_check && !check_timeout(q, job) && + !exec_queue_reset(q) && exec_queue_registered(q)) { + clear_exec_queue_check_timeout(q); + goto sched_enable; + } + + xe_gt_notice(guc_to_gt(guc), "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx", + xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), + q->guc->id, q->flags); + trace_xe_sched_job_timedout(job); + + if (!exec_queue_killed(q)) + xe_devcoredump(job); + + /* + * Kernel jobs should never fail, nor should VM jobs if they do + * somethings has gone wrong and the GT needs a reset + */ + xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, + "Kernel-submitted job timed out\n"); + xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), + "VM job timed out on non-killed execqueue\n"); + if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL || + (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) { + if (!xe_sched_invalidate_job(job, 2)) { + clear_exec_queue_check_timeout(q); + xe_gt_reset_async(q->gt); + goto rearm; + } + } + + /* Finish cleaning up exec queue via deregister */ + set_exec_queue_banned(q); + if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) { + set_exec_queue_extra_ref(q); + xe_exec_queue_get(q); + __deregister_exec_queue(guc, q); + } + /* Stop fence signaling */ xe_hw_fence_irq_stop(q->fence_irq); @@ -1028,7 +1211,19 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) /* Start fence signaling */ xe_hw_fence_irq_start(q->fence_irq); -out: + return DRM_GPU_SCHED_STAT_NOMINAL; + +sched_enable: + enable_scheduling(q); +rearm: + /* + * XXX: Ideally want to adjust timeout based on current exection time + * but there is not currently an easy way to do in DRM scheduler. With + * some thought, do this in a follow up. + */ + xe_sched_add_pending_job(sched, job); + xe_sched_submission_start(sched); + return DRM_GPU_SCHED_STAT_NOMINAL; } @@ -1131,7 +1326,6 @@ static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) guc_read_stopped(guc)); if (!guc_read_stopped(guc)) { - MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); s64 since_resume_ms = ktime_ms_delta(ktime_get(), q->guc->resume_time); @@ -1142,12 +1336,7 @@ static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) msleep(wait_ms); set_exec_queue_suspended(q); - clear_exec_queue_enabled(q); - set_exec_queue_pending_disable(q); - trace_xe_exec_queue_scheduling_disable(q); - - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), - G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); + disable_scheduling(q, false); } } else if (q->guc->suspend_pending) { set_exec_queue_suspended(q); @@ -1158,19 +1347,11 @@ static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) { struct xe_exec_queue *q = msg->private_data; - struct xe_guc *guc = exec_queue_to_guc(q); if (guc_exec_queue_allowed_to_change_state(q)) { - MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); - q->guc->resume_time = RESUME_PENDING; clear_exec_queue_suspended(q); - set_exec_queue_pending_enable(q); - set_exec_queue_enabled(q); - trace_xe_exec_queue_scheduling_enable(q); - - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), - G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); + enable_scheduling(q); } else { clear_exec_queue_suspended(q); } @@ -1432,8 +1613,7 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) /* Clean up lost G2H + reset engine state */ if (exec_queue_registered(q)) { - if ((exec_queue_banned(q) && exec_queue_destroyed(q)) || - xe_exec_queue_is_lr(q)) + if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) xe_exec_queue_put(q); else if (exec_queue_destroyed(q)) __guc_exec_queue_fini(guc, q); @@ -1442,7 +1622,9 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) set_exec_queue_suspended(q); suspend_fence_signal(q); } - atomic_and(EXEC_QUEUE_STATE_DESTROYED | EXEC_QUEUE_STATE_SUSPENDED, + atomic_and(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_BANNED | + EXEC_QUEUE_STATE_KILLED | EXEC_QUEUE_STATE_DESTROYED | + EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); q->guc->resume_time = 0; trace_xe_exec_queue_stop(q); @@ -1585,30 +1767,44 @@ static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) q->guc->id, }; + xe_gt_assert(guc_to_gt(guc), exec_queue_destroyed(q)); + xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); + xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); + trace_xe_exec_queue_deregister(q); xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action)); } -static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q) +static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, + u32 runnable_state) { trace_xe_exec_queue_scheduling_done(q); - if (exec_queue_pending_enable(q)) { + if (runnable_state == 1) { + xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q)); + q->guc->resume_time = ktime_get(); clear_exec_queue_pending_enable(q); smp_wmb(); wake_up_all(&guc->ct.wq); } else { + bool check_timeout = exec_queue_check_timeout(q); + + xe_gt_assert(guc_to_gt(guc), runnable_state == 0); + xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q)); + clear_exec_queue_pending_disable(q); if (q->guc->suspend_pending) { suspend_fence_signal(q); } else { - if (exec_queue_banned(q)) { + if (exec_queue_banned(q) || check_timeout) { smp_wmb(); wake_up_all(&guc->ct.wq); } - deregister_exec_queue(guc, q); + if (!check_timeout) + deregister_exec_queue(guc, q); } } } @@ -1618,6 +1814,7 @@ int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) struct xe_device *xe = guc_to_xe(guc); struct xe_exec_queue *q; u32 guc_id = msg[0]; + u32 runnable_state = msg[1]; if (unlikely(len < 2)) { drm_err(&xe->drm, "Invalid length %u", len); @@ -1630,12 +1827,14 @@ int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) if (unlikely(!exec_queue_pending_enable(q) && !exec_queue_pending_disable(q))) { - drm_err(&xe->drm, "Unexpected engine state 0x%04x", - atomic_read(&q->guc->state)); + xe_gt_err(guc_to_gt(guc), + "SCHED_DONE: Unexpected engine state 0x%04x, guc_id=%d, runnable_state=%u", + atomic_read(&q->guc->state), q->guc->id, + runnable_state); return -EPROTO; } - handle_sched_done(guc, q); + handle_sched_done(guc, q, runnable_state); return 0; } @@ -1646,7 +1845,7 @@ static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) clear_exec_queue_registered(q); - if (exec_queue_banned(q) || xe_exec_queue_is_lr(q)) + if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) xe_exec_queue_put(q); else __guc_exec_queue_fini(guc, q); @@ -1669,8 +1868,9 @@ int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || exec_queue_pending_enable(q) || exec_queue_enabled(q)) { - drm_err(&xe->drm, "Unexpected engine state 0x%04x", - atomic_read(&q->guc->state)); + xe_gt_err(guc_to_gt(guc), + "DEREGISTER_DONE: Unexpected engine state 0x%04x, guc_id=%d", + atomic_read(&q->guc->state), q->guc->id); return -EPROTO; } @@ -1709,7 +1909,7 @@ int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) * guc_exec_queue_timedout_job. */ set_exec_queue_reset(q); - if (!exec_queue_banned(q)) + if (!exec_queue_banned(q) && !exec_queue_check_timeout(q)) xe_guc_exec_queue_trigger_cleanup(q); return 0; @@ -1739,7 +1939,7 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, /* Treat the same as engine reset */ set_exec_queue_reset(q); - if (!exec_queue_banned(q)) + if (!exec_queue_banned(q) && !exec_queue_check_timeout(q)) xe_guc_exec_queue_trigger_cleanup(q); return 0; diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index b039ff49341b..bec4366e5513 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -18,9 +18,11 @@ #include "xe_force_wake.h" #include "xe_gsc_submit.h" #include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_guc.h" #include "xe_map.h" #include "xe_mmio.h" +#include "xe_sriov.h" #include "xe_uc_fw.h" static struct xe_gt * @@ -92,6 +94,9 @@ int xe_huc_init(struct xe_huc *huc) if (!xe_uc_fw_is_enabled(&huc->fw)) return 0; + if (IS_SRIOV_VF(xe)) + return 0; + if (huc->fw.has_gsc_headers) { ret = huc_alloc_gsc_pkt(huc); if (ret) @@ -103,7 +108,7 @@ int xe_huc_init(struct xe_huc *huc) return 0; out: - drm_err(&xe->drm, "HuC init failed with %d", ret); + xe_gt_err(gt, "HuC: initialization failed: %pe\n", ERR_PTR(ret)); return ret; } @@ -191,14 +196,14 @@ static int huc_auth_via_gsccs(struct xe_huc *huc) } while (--retry && err == -EBUSY); if (err) { - drm_err(&xe->drm, "failed to submit GSC request to auth: %d\n", err); + xe_gt_err(gt, "HuC: failed to submit GSC request to auth: %pe\n", ERR_PTR(err)); return err; } err = xe_gsc_read_out_header(xe, &pkt->vmap, PXP43_HUC_AUTH_INOUT_SIZE, sizeof(struct pxp43_huc_auth_out), &rd_offset); if (err) { - drm_err(&xe->drm, "HuC: invalid GSC reply for auth (err=%d)\n", err); + xe_gt_err(gt, "HuC: invalid GSC reply for auth: %pe\n", ERR_PTR(err)); return err; } @@ -209,7 +214,7 @@ static int huc_auth_via_gsccs(struct xe_huc *huc) */ out_status = huc_auth_msg_rd(xe, &pkt->vmap, rd_offset, header.status); if (out_status != PXP_STATUS_SUCCESS && out_status != PXP_STATUS_OP_NOT_PERMITTED) { - drm_err(&xe->drm, "auth failed with GSC error = 0x%x\n", out_status); + xe_gt_err(gt, "HuC: authentication failed with GSC error = %#x\n", out_status); return -EIO; } @@ -238,7 +243,6 @@ bool xe_huc_is_authenticated(struct xe_huc *huc, enum xe_huc_auth_types type) int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type) { - struct xe_device *xe = huc_to_xe(huc); struct xe_gt *gt = huc_to_gt(huc); struct xe_guc *guc = huc_to_guc(huc); int ret; @@ -268,26 +272,26 @@ int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type) return -EINVAL; } if (ret) { - drm_err(&xe->drm, "Failed to trigger HuC auth via %s: %d\n", - huc_auth_modes[type].name, ret); + xe_gt_err(gt, "HuC: failed to trigger auth via %s: %pe\n", + huc_auth_modes[type].name, ERR_PTR(ret)); goto fail; } ret = xe_mmio_wait32(gt, huc_auth_modes[type].reg, huc_auth_modes[type].val, huc_auth_modes[type].val, 100000, NULL, false); if (ret) { - drm_err(&xe->drm, "HuC: Firmware not verified %d\n", ret); + xe_gt_err(gt, "HuC: firmware not verified: %pe\n", ERR_PTR(ret)); goto fail; } xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_RUNNING); - drm_dbg(&xe->drm, "HuC authenticated via %s\n", huc_auth_modes[type].name); + xe_gt_dbg(gt, "HuC: authenticated via %s\n", huc_auth_modes[type].name); return 0; fail: - drm_err(&xe->drm, "HuC: Auth via %s failed: %d\n", - huc_auth_modes[type].name, ret); + xe_gt_err(gt, "HuC: authentication via %s failed: %pe\n", + huc_auth_modes[type].name, ERR_PTR(ret)); xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOAD_FAIL); return ret; @@ -295,9 +299,7 @@ fail: void xe_huc_sanitize(struct xe_huc *huc) { - if (!xe_uc_fw_is_loadable(&huc->fw)) - return; - xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOADABLE); + xe_uc_fw_sanitize(&huc->fw); } void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 0a83506e1ad8..78b50d3a6501 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -26,6 +26,7 @@ #include "xe_macros.h" #include "xe_mmio.h" #include "xe_reg_sr.h" +#include "xe_reg_whitelist.h" #include "xe_rtp.h" #include "xe_sched_job.h" #include "xe_sriov.h" @@ -546,7 +547,8 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, if (hwe->class == XE_ENGINE_CLASS_OTHER) hwe->irq_handler = xe_gsc_hwe_irq_handler; - xe_hw_engine_enable_ring(hwe); + if (!IS_SRIOV_VF(xe)) + xe_hw_engine_enable_ring(hwe); } /* We reserve the highest BCS instance for USM */ diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index 580bbd7e83b2..70e6434f150d 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -148,6 +148,8 @@ struct xe_hw_engine { enum xe_hw_engine_id engine_id; /** @eclass: pointer to per hw engine class interface */ struct xe_hw_engine_class_intf *eclass; + /** @oa_unit: oa unit for this hw engine */ + struct xe_oa_unit *oa_unit; }; /** diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 8ee3c300c5e4..85733f993d09 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -134,6 +134,9 @@ void xe_irq_enable_hwe(struct xe_gt *gt) u32 gsc_mask = 0; u32 heci_mask = 0; + if (IS_SRIOV_VF(xe) && xe_device_has_memirq(xe)) + return; + if (xe_device_uc_enabled(xe)) { irqs = GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT; @@ -735,11 +738,6 @@ free_irq_handler: return err; } -void xe_irq_shutdown(struct xe_device *xe) -{ - irq_uninstall(xe); -} - void xe_irq_suspend(struct xe_device *xe) { int irq = to_pci_dev(xe->drm.dev)->irq; diff --git a/drivers/gpu/drm/xe/xe_irq.h b/drivers/gpu/drm/xe/xe_irq.h index bc42bc90d967..067514e13675 100644 --- a/drivers/gpu/drm/xe/xe_irq.h +++ b/drivers/gpu/drm/xe/xe_irq.h @@ -11,7 +11,6 @@ struct xe_tile; struct xe_gt; int xe_irq_install(struct xe_device *xe); -void xe_irq_shutdown(struct xe_device *xe); void xe_irq_suspend(struct xe_device *xe); void xe_irq_resume(struct xe_device *xe); void xe_irq_enable_hwe(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index c1bb85d2e243..94ff62e1d95e 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -49,6 +49,8 @@ struct xe_lrc_snapshot { } tail; u32 start_seqno; u32 seqno; + u32 ctx_timestamp; + u32 ctx_job_timestamp; }; static struct xe_device * @@ -649,12 +651,19 @@ u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) /* Make the magic macros work */ #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset +#define __xe_lrc_regs_offset xe_lrc_regs_offset #define LRC_SEQNO_PPHWSP_OFFSET 512 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) +#define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8) #define LRC_PARALLEL_PPHWSP_OFFSET 2048 #define LRC_PPHWSP_SIZE SZ_4K +u32 xe_lrc_regs_offset(struct xe_lrc *lrc) +{ + return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; +} + static size_t lrc_reg_size(struct xe_device *xe) { if (GRAPHICS_VERx100(xe) >= 1250) @@ -680,15 +689,21 @@ static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET; } +static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc) +{ + /* The start seqno is stored in the driver-defined portion of PPHWSP */ + return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET; +} + static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) { /* The parallel is stored in the driver-defined portion of PPHWSP */ return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; } -static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc) +static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc) { - return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; + return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32); } static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) @@ -716,11 +731,65 @@ DECL_MAP_ADDR_HELPERS(pphwsp) DECL_MAP_ADDR_HELPERS(seqno) DECL_MAP_ADDR_HELPERS(regs) DECL_MAP_ADDR_HELPERS(start_seqno) +DECL_MAP_ADDR_HELPERS(ctx_job_timestamp) +DECL_MAP_ADDR_HELPERS(ctx_timestamp) DECL_MAP_ADDR_HELPERS(parallel) DECL_MAP_ADDR_HELPERS(indirect_ring) #undef DECL_MAP_ADDR_HELPERS +/** + * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address + * @lrc: Pointer to the lrc. + * + * Returns: ctx timestamp GGTT address + */ +u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc) +{ + return __xe_lrc_ctx_timestamp_ggtt_addr(lrc); +} + +/** + * xe_lrc_ctx_timestamp() - Read ctx timestamp value + * @lrc: Pointer to the lrc. + * + * Returns: ctx timestamp value + */ +u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc) +{ + struct xe_device *xe = lrc_to_xe(lrc); + struct iosys_map map; + + map = __xe_lrc_ctx_timestamp_map(lrc); + return xe_map_read32(xe, &map); +} + +/** + * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address + * @lrc: Pointer to the lrc. + * + * Returns: ctx timestamp job GGTT address + */ +u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc) +{ + return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc); +} + +/** + * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value + * @lrc: Pointer to the lrc. + * + * Returns: ctx timestamp job value + */ +u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc) +{ + struct xe_device *xe = lrc_to_xe(lrc); + struct iosys_map map; + + map = __xe_lrc_ctx_job_timestamp_map(lrc); + return xe_map_read32(xe, &map); +} + u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc) { return __xe_lrc_pphwsp_ggtt_addr(lrc); @@ -1576,6 +1645,8 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset; snapshot->lrc_snapshot = NULL; + snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); + snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); return snapshot; } @@ -1624,6 +1695,8 @@ void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer snapshot->tail.internal, snapshot->tail.memory); drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno); drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno); + drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp); + drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp); if (!snapshot->lrc_snapshot) return; @@ -1659,11 +1732,21 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) kfree(snapshot); } +/** + * xe_lrc_update_timestamp() - Update ctx timestamp + * @lrc: Pointer to the lrc. + * @old_ts: Old timestamp value + * + * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and + * update saved value. + * + * Returns: New ctx timestamp value + */ u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts) { *old_ts = lrc->ctx_timestamp; - lrc->ctx_timestamp = xe_lrc_read_ctx_reg(lrc, CTX_TIMESTAMP); + lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); return lrc->ctx_timestamp; } diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index 882c3437ba5c..c24542e89318 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -52,6 +52,7 @@ static inline void xe_lrc_put(struct xe_lrc *lrc) size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class); u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc); +u32 xe_lrc_regs_offset(struct xe_lrc *lrc); void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail); u32 xe_lrc_ring_tail(struct xe_lrc *lrc); @@ -94,6 +95,11 @@ void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot); void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p); void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot); +u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc); +u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc); +u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc); +u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc); + /** * xe_lrc_update_timestamp - readout LRC timestamp and update cached value * @lrc: logical ring context for this exec queue diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 7e3fb33110d9..05f933787860 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -32,7 +32,7 @@ #include "xe_res_cursor.h" #include "xe_sched_job.h" #include "xe_sync.h" -#include "xe_trace.h" +#include "xe_trace_bo.h" #include "xe_vm.h" /** @@ -647,12 +647,6 @@ static void emit_copy(struct xe_gt *gt, struct xe_bb *bb, bb->cs[bb->len++] = upper_32_bits(src_ofs); } -static int job_add_deps(struct xe_sched_job *job, struct dma_resv *resv, - enum dma_resv_usage usage) -{ - return drm_sched_job_add_resv_dependencies(&job->drm, resv, usage); -} - static u64 xe_migrate_batch_base(struct xe_migrate *m, bool usm) { return usm ? m->usm_batch_base_ofs : m->batch_base_ofs; @@ -849,11 +843,11 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, xe_sched_job_add_migrate_flush(job, flush_flags); if (!fence) { - err = job_add_deps(job, src_bo->ttm.base.resv, - DMA_RESV_USAGE_BOOKKEEP); + err = xe_sched_job_add_deps(job, src_bo->ttm.base.resv, + DMA_RESV_USAGE_BOOKKEEP); if (!err && src_bo != dst_bo) - err = job_add_deps(job, dst_bo->ttm.base.resv, - DMA_RESV_USAGE_BOOKKEEP); + err = xe_sched_job_add_deps(job, dst_bo->ttm.base.resv, + DMA_RESV_USAGE_BOOKKEEP); if (err) goto err_job; } @@ -1091,8 +1085,8 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, * fences, which are always tracked as * DMA_RESV_USAGE_KERNEL. */ - err = job_add_deps(job, bo->ttm.base.resv, - DMA_RESV_USAGE_KERNEL); + err = xe_sched_job_add_deps(job, bo->ttm.base.resv, + DMA_RESV_USAGE_KERNEL); if (err) goto err_job; } @@ -1417,8 +1411,8 @@ xe_migrate_update_pgtables(struct xe_migrate *m, /* Wait on BO move */ if (bo) { - err = job_add_deps(job, bo->ttm.base.resv, - DMA_RESV_USAGE_KERNEL); + err = xe_sched_job_add_deps(job, bo->ttm.base.resv, + DMA_RESV_USAGE_KERNEL); if (err) goto err_job; } @@ -1428,8 +1422,8 @@ xe_migrate_update_pgtables(struct xe_migrate *m, * trigger preempts before moving forward */ if (first_munmap_rebind) { - err = job_add_deps(job, xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP); + err = xe_sched_job_add_deps(job, xe_vm_resv(vm), + DMA_RESV_USAGE_BOOKKEEP); if (err) goto err_job; } diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 7962eeb9adb7..f92faad4b96d 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -21,6 +21,7 @@ #include "xe_gt_sriov_vf.h" #include "xe_macros.h" #include "xe_sriov.h" +#include "xe_trace.h" static void tiles_fini(void *arg) { @@ -124,16 +125,24 @@ u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg) { struct xe_tile *tile = gt_to_tile(gt); u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); + u8 val; - return readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); + val = readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); + trace_xe_reg_rw(gt, false, addr, val, sizeof(val)); + + return val; } u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg) { struct xe_tile *tile = gt_to_tile(gt); u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); + u16 val; + + val = readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); + trace_xe_reg_rw(gt, false, addr, val, sizeof(val)); - return readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); + return val; } void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val) @@ -141,6 +150,7 @@ void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val) struct xe_tile *tile = gt_to_tile(gt); u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); + trace_xe_reg_rw(gt, true, addr, val, sizeof(val)); writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); } @@ -148,11 +158,16 @@ u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg) { struct xe_tile *tile = gt_to_tile(gt); u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); + u32 val; if (!reg.vf && IS_SRIOV_VF(gt_to_xe(gt))) - return xe_gt_sriov_vf_read32(gt, reg); + val = xe_gt_sriov_vf_read32(gt, reg); + else + val = readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); + + trace_xe_reg_rw(gt, false, addr, val, sizeof(val)); - return readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); + return val; } u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set) diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index 3edeb30d5ccb..893858a2eea0 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -11,6 +11,7 @@ #include "xe_drv.h" #include "xe_hw_fence.h" #include "xe_pci.h" +#include "xe_perf.h" #include "xe_sched_job.h" struct xe_modparam xe_modparam = { @@ -78,6 +79,10 @@ static const struct init_funcs init_funcs[] = { .init = xe_register_pci_driver, .exit = xe_unregister_pci_driver, }, + { + .init = xe_perf_sysctl_register, + .exit = xe_perf_sysctl_unregister, + }, }; static int __init xe_init(void) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c new file mode 100644 index 000000000000..6cc3f0217341 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -0,0 +1,2510 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include <linux/anon_inodes.h> +#include <linux/delay.h> +#include <linux/nospec.h> +#include <linux/poll.h> + +#include <drm/drm_drv.h> +#include <drm/drm_managed.h> +#include <drm/xe_drm.h> + +#include "abi/guc_actions_slpc_abi.h" +#include "instructions/xe_mi_commands.h" +#include "regs/xe_engine_regs.h" +#include "regs/xe_gt_regs.h" +#include "regs/xe_lrc_layout.h" +#include "regs/xe_oa_regs.h" +#include "xe_assert.h" +#include "xe_bb.h" +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_exec_queue.h" +#include "xe_force_wake.h" +#include "xe_gt.h" +#include "xe_gt_mcr.h" +#include "xe_gt_printk.h" +#include "xe_guc_pc.h" +#include "xe_lrc.h" +#include "xe_macros.h" +#include "xe_mmio.h" +#include "xe_oa.h" +#include "xe_perf.h" +#include "xe_pm.h" +#include "xe_sched_job.h" +#include "xe_sriov.h" + +#define DEFAULT_POLL_FREQUENCY_HZ 200 +#define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) +#define XE_OA_UNIT_INVALID U32_MAX + +struct xe_oa_reg { + struct xe_reg addr; + u32 value; +}; + +struct xe_oa_config { + struct xe_oa *oa; + + char uuid[UUID_STRING_LEN + 1]; + int id; + + const struct xe_oa_reg *regs; + u32 regs_len; + + struct attribute_group sysfs_metric; + struct attribute *attrs[2]; + struct kobj_attribute sysfs_metric_id; + + struct kref ref; + struct rcu_head rcu; +}; + +struct flex { + struct xe_reg reg; + u32 offset; + u32 value; +}; + +struct xe_oa_open_param { + u32 oa_unit_id; + bool sample; + u32 metric_set; + enum xe_oa_format_name oa_format; + int period_exponent; + bool disabled; + int exec_queue_id; + int engine_instance; + struct xe_exec_queue *exec_q; + struct xe_hw_engine *hwe; + bool no_preempt; +}; + +struct xe_oa_config_bo { + struct llist_node node; + + struct xe_oa_config *oa_config; + struct xe_bb *bb; +}; + +#define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x + +static const struct xe_oa_format oa_formats[] = { + [XE_OA_FORMAT_C4_B8] = { 7, 64, DRM_FMT(OAG) }, + [XE_OA_FORMAT_A12] = { 0, 64, DRM_FMT(OAG) }, + [XE_OA_FORMAT_A12_B8_C8] = { 2, 128, DRM_FMT(OAG) }, + [XE_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256, DRM_FMT(OAG) }, + [XE_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256, DRM_FMT(OAR) }, + [XE_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256, DRM_FMT(OAG) }, + [XE_OAC_FORMAT_A24u64_B8_C8] = { 1, 320, DRM_FMT(OAC), HDR_64_BIT }, + [XE_OAC_FORMAT_A22u32_R2u32_B8_C8] = { 2, 192, DRM_FMT(OAC), HDR_64_BIT }, + [XE_OAM_FORMAT_MPEC8u64_B8_C8] = { 1, 192, DRM_FMT(OAM_MPEC), HDR_64_BIT }, + [XE_OAM_FORMAT_MPEC8u32_B8_C8] = { 2, 128, DRM_FMT(OAM_MPEC), HDR_64_BIT }, + [XE_OA_FORMAT_PEC64u64] = { 1, 576, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, + [XE_OA_FORMAT_PEC64u64_B8_C8] = { 1, 640, DRM_FMT(PEC), HDR_64_BIT, 1, 1 }, + [XE_OA_FORMAT_PEC64u32] = { 1, 320, DRM_FMT(PEC), HDR_64_BIT }, + [XE_OA_FORMAT_PEC32u64_G1] = { 5, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, + [XE_OA_FORMAT_PEC32u32_G1] = { 5, 192, DRM_FMT(PEC), HDR_64_BIT }, + [XE_OA_FORMAT_PEC32u64_G2] = { 6, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, + [XE_OA_FORMAT_PEC32u32_G2] = { 6, 192, DRM_FMT(PEC), HDR_64_BIT }, + [XE_OA_FORMAT_PEC36u64_G1_32_G2_4] = { 3, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, + [XE_OA_FORMAT_PEC36u64_G1_4_G2_32] = { 4, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, +}; + +static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 head) +{ + return tail >= head ? tail - head : + tail + stream->oa_buffer.circ_size - head; +} + +static u32 xe_oa_circ_incr(struct xe_oa_stream *stream, u32 ptr, u32 n) +{ + return ptr + n >= stream->oa_buffer.circ_size ? + ptr + n - stream->oa_buffer.circ_size : ptr + n; +} + +static void xe_oa_config_release(struct kref *ref) +{ + struct xe_oa_config *oa_config = + container_of(ref, typeof(*oa_config), ref); + + kfree(oa_config->regs); + + kfree_rcu(oa_config, rcu); +} + +static void xe_oa_config_put(struct xe_oa_config *oa_config) +{ + if (!oa_config) + return; + + kref_put(&oa_config->ref, xe_oa_config_release); +} + +static struct xe_oa_config *xe_oa_config_get(struct xe_oa_config *oa_config) +{ + return kref_get_unless_zero(&oa_config->ref) ? oa_config : NULL; +} + +static struct xe_oa_config *xe_oa_get_oa_config(struct xe_oa *oa, int metrics_set) +{ + struct xe_oa_config *oa_config; + + rcu_read_lock(); + oa_config = idr_find(&oa->metrics_idr, metrics_set); + if (oa_config) + oa_config = xe_oa_config_get(oa_config); + rcu_read_unlock(); + + return oa_config; +} + +static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo) +{ + xe_oa_config_put(oa_bo->oa_config); + xe_bb_free(oa_bo->bb, NULL); + kfree(oa_bo); +} + +static const struct xe_oa_regs *__oa_regs(struct xe_oa_stream *stream) +{ + return &stream->hwe->oa_unit->regs; +} + +static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream) +{ + return xe_mmio_read32(stream->gt, __oa_regs(stream)->oa_tail_ptr) & + OAG_OATAILPTR_MASK; +} + +#define oa_report_header_64bit(__s) \ + ((__s)->oa_buffer.format->header == HDR_64_BIT) + +static u64 oa_report_id(struct xe_oa_stream *stream, void *report) +{ + return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report; +} + +static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report) +{ + if (oa_report_header_64bit(stream)) + *(u64 *)report = 0; + else + *report = 0; +} + +static u64 oa_timestamp(struct xe_oa_stream *stream, void *report) +{ + return oa_report_header_64bit(stream) ? + *((u64 *)report + 1) : + *((u32 *)report + 1); +} + +static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report) +{ + if (oa_report_header_64bit(stream)) + *(u64 *)&report[2] = 0; + else + report[1] = 0; +} + +static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) +{ + u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); + int report_size = stream->oa_buffer.format->size; + u32 tail, hw_tail; + unsigned long flags; + bool pollin; + u32 partial_report_size; + + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); + + hw_tail = xe_oa_hw_tail_read(stream); + hw_tail -= gtt_offset; + + /* + * The tail pointer increases in 64 byte (cacheline size), not in report_size + * increments. Also report size may not be a power of 2. Compute potential + * partially landed report in OA buffer. + */ + partial_report_size = xe_oa_circ_diff(stream, hw_tail, stream->oa_buffer.tail); + partial_report_size %= report_size; + + /* Subtract partial amount off the tail */ + hw_tail = xe_oa_circ_diff(stream, hw_tail, partial_report_size); + + tail = hw_tail; + + /* + * Walk the stream backward until we find a report with report id and timestamp + * not 0. We can't tell whether a report has fully landed in memory before the + * report id and timestamp of the following report have landed. + * + * This is assuming that the writes of the OA unit land in memory in the order + * they were written. If not : (╯°□°)╯︵ ┻━┻ + */ + while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) { + void *report = stream->oa_buffer.vaddr + tail; + + if (oa_report_id(stream, report) || oa_timestamp(stream, report)) + break; + + tail = xe_oa_circ_diff(stream, tail, report_size); + } + + if (xe_oa_circ_diff(stream, hw_tail, tail) > report_size) + drm_dbg(&stream->oa->xe->drm, + "unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n", + stream->oa_buffer.head, tail, hw_tail); + + stream->oa_buffer.tail = tail; + + pollin = xe_oa_circ_diff(stream, stream->oa_buffer.tail, + stream->oa_buffer.head) >= report_size; + + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); + + return pollin; +} + +static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer) +{ + struct xe_oa_stream *stream = + container_of(hrtimer, typeof(*stream), poll_check_timer); + + if (xe_oa_buffer_check_unlocked(stream)) { + stream->pollin = true; + wake_up(&stream->poll_wq); + } + + hrtimer_forward_now(hrtimer, ns_to_ktime(stream->poll_period_ns)); + + return HRTIMER_RESTART; +} + +static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf, + size_t count, size_t *offset, const u8 *report) +{ + int report_size = stream->oa_buffer.format->size; + int report_size_partial; + u8 *oa_buf_end; + + if ((count - *offset) < report_size) + return -ENOSPC; + + buf += *offset; + + oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size; + report_size_partial = oa_buf_end - report; + + if (report_size_partial < report_size) { + if (copy_to_user(buf, report, report_size_partial)) + return -EFAULT; + buf += report_size_partial; + + if (copy_to_user(buf, stream->oa_buffer.vaddr, + report_size - report_size_partial)) + return -EFAULT; + } else if (copy_to_user(buf, report, report_size)) { + return -EFAULT; + } + + *offset += report_size; + + return 0; +} + +static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, + size_t count, size_t *offset) +{ + int report_size = stream->oa_buffer.format->size; + u8 *oa_buf_base = stream->oa_buffer.vaddr; + u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); + size_t start_offset = *offset; + unsigned long flags; + u32 head, tail; + int ret = 0; + + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); + head = stream->oa_buffer.head; + tail = stream->oa_buffer.tail; + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); + + xe_assert(stream->oa->xe, + head < stream->oa_buffer.circ_size && tail < stream->oa_buffer.circ_size); + + for (; xe_oa_circ_diff(stream, tail, head); + head = xe_oa_circ_incr(stream, head, report_size)) { + u8 *report = oa_buf_base + head; + + ret = xe_oa_append_report(stream, buf, count, offset, report); + if (ret) + break; + + if (!(stream->oa_buffer.circ_size % report_size)) { + /* Clear out report id and timestamp to detect unlanded reports */ + oa_report_id_clear(stream, (void *)report); + oa_timestamp_clear(stream, (void *)report); + } else { + u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size; + u32 part = oa_buf_end - report; + + /* Zero out the entire report */ + if (report_size <= part) { + memset(report, 0, report_size); + } else { + memset(report, 0, part); + memset(oa_buf_base, 0, report_size - part); + } + } + } + + if (start_offset != *offset) { + struct xe_reg oaheadptr = __oa_regs(stream)->oa_head_ptr; + + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); + xe_mmio_write32(stream->gt, oaheadptr, + (head + gtt_offset) & OAG_OAHEADPTR_MASK); + stream->oa_buffer.head = head; + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); + } + + return ret; +} + +static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) +{ + u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); + u32 oa_buf = gtt_offset | OABUFFER_SIZE_16M | OAG_OABUFFER_MEMORY_SELECT; + unsigned long flags; + + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); + + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_status, 0); + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr, + gtt_offset & OAG_OAHEADPTR_MASK); + stream->oa_buffer.head = 0; + /* + * PRM says: "This MMIO must be set before the OATAILPTR register and after the + * OAHEADPTR register. This is to enable proper functionality of the overflow bit". + */ + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_buffer, oa_buf); + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_tail_ptr, + gtt_offset & OAG_OATAILPTR_MASK); + + /* Mark that we need updated tail pointer to read from */ + stream->oa_buffer.tail = 0; + + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); + + /* Zero out the OA buffer since we rely on zero report id and timestamp fields */ + memset(stream->oa_buffer.vaddr, 0, stream->oa_buffer.bo->size); +} + +static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask) +{ + return ((format->counter_select << (ffs(counter_sel_mask) - 1)) & counter_sel_mask) | + REG_FIELD_PREP(OA_OACONTROL_REPORT_BC_MASK, format->bc_report) | + REG_FIELD_PREP(OA_OACONTROL_COUNTER_SIZE_MASK, format->counter_size); +} + +static u32 __oa_ccs_select(struct xe_oa_stream *stream) +{ + u32 val; + + if (stream->hwe->class != XE_ENGINE_CLASS_COMPUTE) + return 0; + + val = REG_FIELD_PREP(OAG_OACONTROL_OA_CCS_SELECT_MASK, stream->hwe->instance); + xe_assert(stream->oa->xe, + REG_FIELD_GET(OAG_OACONTROL_OA_CCS_SELECT_MASK, val) == stream->hwe->instance); + return val; +} + +static void xe_oa_enable(struct xe_oa_stream *stream) +{ + const struct xe_oa_format *format = stream->oa_buffer.format; + const struct xe_oa_regs *regs; + u32 val; + + /* + * BSpec: 46822: Bit 0. Even if stream->sample is 0, for OAR to function, the OA + * buffer must be correctly initialized + */ + xe_oa_init_oa_buffer(stream); + + regs = __oa_regs(stream); + val = __format_to_oactrl(format, regs->oa_ctrl_counter_select_mask) | + __oa_ccs_select(stream) | OAG_OACONTROL_OA_COUNTER_ENABLE; + + xe_mmio_write32(stream->gt, regs->oa_ctrl, val); +} + +static void xe_oa_disable(struct xe_oa_stream *stream) +{ + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctrl, 0); + if (xe_mmio_wait32(stream->gt, __oa_regs(stream)->oa_ctrl, + OAG_OACONTROL_OA_COUNTER_ENABLE, 0, 50000, NULL, false)) + drm_err(&stream->oa->xe->drm, + "wait for OA to be disabled timed out\n"); + + if (GRAPHICS_VERx100(stream->oa->xe) <= 1270 && GRAPHICS_VERx100(stream->oa->xe) != 1260) { + /* <= XE_METEORLAKE except XE_PVC */ + xe_mmio_write32(stream->gt, OA_TLB_INV_CR, 1); + if (xe_mmio_wait32(stream->gt, OA_TLB_INV_CR, 1, 0, 50000, NULL, false)) + drm_err(&stream->oa->xe->drm, + "wait for OA tlb invalidate timed out\n"); + } +} + +static int xe_oa_wait_unlocked(struct xe_oa_stream *stream) +{ + /* We might wait indefinitely if periodic sampling is not enabled */ + if (!stream->periodic) + return -EINVAL; + + return wait_event_interruptible(stream->poll_wq, + xe_oa_buffer_check_unlocked(stream)); +} + +#define OASTATUS_RELEVANT_BITS (OASTATUS_MMIO_TRG_Q_FULL | OASTATUS_COUNTER_OVERFLOW | \ + OASTATUS_BUFFER_OVERFLOW | OASTATUS_REPORT_LOST) + +static int __xe_oa_read(struct xe_oa_stream *stream, char __user *buf, + size_t count, size_t *offset) +{ + /* Only clear our bits to avoid side-effects */ + stream->oa_status = xe_mmio_rmw32(stream->gt, __oa_regs(stream)->oa_status, + OASTATUS_RELEVANT_BITS, 0); + /* + * Signal to userspace that there is non-zero OA status to read via + * @DRM_XE_PERF_IOCTL_STATUS perf fd ioctl + */ + if (stream->oa_status & OASTATUS_RELEVANT_BITS) + return -EIO; + + return xe_oa_append_reports(stream, buf, count, offset); +} + +static ssize_t xe_oa_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct xe_oa_stream *stream = file->private_data; + size_t offset = 0; + int ret; + + /* Can't read from disabled streams */ + if (!stream->enabled || !stream->sample) + return -EINVAL; + + if (!(file->f_flags & O_NONBLOCK)) { + do { + ret = xe_oa_wait_unlocked(stream); + if (ret) + return ret; + + mutex_lock(&stream->stream_lock); + ret = __xe_oa_read(stream, buf, count, &offset); + mutex_unlock(&stream->stream_lock); + } while (!offset && !ret); + } else { + mutex_lock(&stream->stream_lock); + ret = __xe_oa_read(stream, buf, count, &offset); + mutex_unlock(&stream->stream_lock); + } + + /* + * Typically we clear pollin here in order to wait for the new hrtimer callback + * before unblocking. The exception to this is if __xe_oa_read returns -ENOSPC, + * which means that more OA data is available than could fit in the user provided + * buffer. In this case we want the next poll() call to not block. + * + * Also in case of -EIO, we have already waited for data before returning + * -EIO, so need to wait again + */ + if (ret != -ENOSPC && ret != -EIO) + stream->pollin = false; + + /* Possible values for ret are 0, -EFAULT, -ENOSPC, -EIO, -EINVAL, ... */ + return offset ?: (ret ?: -EAGAIN); +} + +static __poll_t xe_oa_poll_locked(struct xe_oa_stream *stream, + struct file *file, poll_table *wait) +{ + __poll_t events = 0; + + poll_wait(file, &stream->poll_wq, wait); + + /* + * We don't explicitly check whether there's something to read here since this + * path may be hot depending on what else userspace is polling, or on the timeout + * in use. We rely on hrtimer xe_oa_poll_check_timer_cb to notify us when there + * are samples to read + */ + if (stream->pollin) + events |= EPOLLIN; + + return events; +} + +static __poll_t xe_oa_poll(struct file *file, poll_table *wait) +{ + struct xe_oa_stream *stream = file->private_data; + __poll_t ret; + + mutex_lock(&stream->stream_lock); + ret = xe_oa_poll_locked(stream, file, wait); + mutex_unlock(&stream->stream_lock); + + return ret; +} + +static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb) +{ + struct xe_sched_job *job; + struct dma_fence *fence; + long timeout; + int err = 0; + + /* Kernel configuration is issued on stream->k_exec_q, not stream->exec_q */ + job = xe_bb_create_job(stream->k_exec_q, bb); + if (IS_ERR(job)) { + err = PTR_ERR(job); + goto exit; + } + + xe_sched_job_arm(job); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + timeout = dma_fence_wait_timeout(fence, false, HZ); + dma_fence_put(fence); + if (timeout < 0) + err = timeout; + else if (!timeout) + err = -ETIME; +exit: + return err; +} + +static void write_cs_mi_lri(struct xe_bb *bb, const struct xe_oa_reg *reg_data, u32 n_regs) +{ + u32 i; + +#define MI_LOAD_REGISTER_IMM_MAX_REGS (126) + + for (i = 0; i < n_regs; i++) { + if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) { + u32 n_lri = min_t(u32, n_regs - i, + MI_LOAD_REGISTER_IMM_MAX_REGS); + + bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(n_lri); + } + bb->cs[bb->len++] = reg_data[i].addr.addr; + bb->cs[bb->len++] = reg_data[i].value; + } +} + +static int num_lri_dwords(int num_regs) +{ + int count = 0; + + if (num_regs > 0) { + count += DIV_ROUND_UP(num_regs, MI_LOAD_REGISTER_IMM_MAX_REGS); + count += num_regs * 2; + } + + return count; +} + +static void xe_oa_free_oa_buffer(struct xe_oa_stream *stream) +{ + xe_bo_unpin_map_no_vm(stream->oa_buffer.bo); +} + +static void xe_oa_free_configs(struct xe_oa_stream *stream) +{ + struct xe_oa_config_bo *oa_bo, *tmp; + + xe_oa_config_put(stream->oa_config); + llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node) + free_oa_config_bo(oa_bo); +} + +static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc, + struct xe_bb *bb, const struct flex *flex, u32 count) +{ + u32 offset = xe_bo_ggtt_addr(lrc->bo); + + do { + bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(22) /* GGTT */ | 2; + bb->cs[bb->len++] = offset + flex->offset * sizeof(u32); + bb->cs[bb->len++] = 0; + bb->cs[bb->len++] = flex->value; + + } while (flex++, --count); +} + +static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lrc, + const struct flex *flex, u32 count) +{ + struct xe_bb *bb; + int err; + + bb = xe_bb_new(stream->gt, 4 * count, false); + if (IS_ERR(bb)) { + err = PTR_ERR(bb); + goto exit; + } + + xe_oa_store_flex(stream, lrc, bb, flex, count); + + err = xe_oa_submit_bb(stream, bb); + xe_bb_free(bb, NULL); +exit: + return err; +} + +static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri) +{ + struct xe_bb *bb; + int err; + + bb = xe_bb_new(stream->gt, 3, false); + if (IS_ERR(bb)) { + err = PTR_ERR(bb); + goto exit; + } + + write_cs_mi_lri(bb, reg_lri, 1); + + err = xe_oa_submit_bb(stream, bb); + xe_bb_free(bb, NULL); +exit: + return err; +} + +static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable) +{ + const struct xe_oa_format *format = stream->oa_buffer.format; + struct xe_lrc *lrc = stream->exec_q->lrc[0]; + u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32); + u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | + (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); + + struct flex regs_context[] = { + { + OACTXCONTROL(stream->hwe->mmio_base), + stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1, + enable ? OA_COUNTER_RESUME : 0, + }, + { + RING_CONTEXT_CONTROL(stream->hwe->mmio_base), + regs_offset + CTX_CONTEXT_CONTROL, + _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, + enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) + }, + }; + struct xe_oa_reg reg_lri = { OAR_OACONTROL, oacontrol }; + int err; + + /* Modify stream hwe context image with regs_context */ + err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0], + regs_context, ARRAY_SIZE(regs_context)); + if (err) + return err; + + /* Apply reg_lri using LRI */ + return xe_oa_load_with_lri(stream, ®_lri); +} + +static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable) +{ + const struct xe_oa_format *format = stream->oa_buffer.format; + struct xe_lrc *lrc = stream->exec_q->lrc[0]; + u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32); + u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | + (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); + struct flex regs_context[] = { + { + OACTXCONTROL(stream->hwe->mmio_base), + stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1, + enable ? OA_COUNTER_RESUME : 0, + }, + { + RING_CONTEXT_CONTROL(stream->hwe->mmio_base), + regs_offset + CTX_CONTEXT_CONTROL, + _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, + enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) | + _MASKED_FIELD(CTX_CTRL_RUN_ALONE, + enable ? CTX_CTRL_RUN_ALONE : 0), + }, + }; + struct xe_oa_reg reg_lri = { OAC_OACONTROL, oacontrol }; + int err; + + /* Set ccs select to enable programming of OAC_OACONTROL */ + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctrl, __oa_ccs_select(stream)); + + /* Modify stream hwe context image with regs_context */ + err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0], + regs_context, ARRAY_SIZE(regs_context)); + if (err) + return err; + + /* Apply reg_lri using LRI */ + return xe_oa_load_with_lri(stream, ®_lri); +} + +static int xe_oa_configure_oa_context(struct xe_oa_stream *stream, bool enable) +{ + switch (stream->hwe->class) { + case XE_ENGINE_CLASS_RENDER: + return xe_oa_configure_oar_context(stream, enable); + case XE_ENGINE_CLASS_COMPUTE: + return xe_oa_configure_oac_context(stream, enable); + default: + /* Video engines do not support MI_REPORT_PERF_COUNT */ + return 0; + } +} + +#define HAS_OA_BPC_REPORTING(xe) (GRAPHICS_VERx100(xe) >= 1255) + +static u32 oag_configure_mmio_trigger(const struct xe_oa_stream *stream, bool enable) +{ + return _MASKED_FIELD(OAG_OA_DEBUG_DISABLE_MMIO_TRG, + enable && stream && stream->sample ? + 0 : OAG_OA_DEBUG_DISABLE_MMIO_TRG); +} + +static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) +{ + u32 sqcnt1; + + /* + * Wa_1508761755:xehpsdv, dg2 + * Enable thread stall DOP gating and EU DOP gating. + */ + if (stream->oa->xe->info.platform == XE_DG2) { + xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN, + _MASKED_BIT_DISABLE(STALL_DOP_GATING_DISABLE)); + xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2, + _MASKED_BIT_DISABLE(DISABLE_DOP_GATING)); + } + + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_debug, + oag_configure_mmio_trigger(stream, false)); + + /* disable the context save/restore or OAR counters */ + if (stream->exec_q) + xe_oa_configure_oa_context(stream, false); + + /* Make sure we disable noa to save power. */ + xe_mmio_rmw32(stream->gt, RPM_CONFIG1, GT_NOA_ENABLE, 0); + + sqcnt1 = SQCNT1_PMON_ENABLE | + (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0); + + /* Reset PMON Enable to save power. */ + xe_mmio_rmw32(stream->gt, XELPMP_SQCNT1, sqcnt1, 0); +} + +static void xe_oa_stream_destroy(struct xe_oa_stream *stream) +{ + struct xe_oa_unit *u = stream->hwe->oa_unit; + struct xe_gt *gt = stream->hwe->gt; + + if (WARN_ON(stream != u->exclusive_stream)) + return; + + WRITE_ONCE(u->exclusive_stream, NULL); + + xe_oa_disable_metric_set(stream); + xe_exec_queue_put(stream->k_exec_q); + + xe_oa_free_oa_buffer(stream); + + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_pm_runtime_put(stream->oa->xe); + + /* Wa_1509372804:pvc: Unset the override of GUCRC mode to enable rc6 */ + if (stream->override_gucrc) + xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(>->uc.guc.pc)); + + xe_oa_free_configs(stream); +} + +static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream) +{ + struct xe_bo *bo; + + BUILD_BUG_ON_NOT_POWER_OF_2(XE_OA_BUFFER_SIZE); + BUILD_BUG_ON(XE_OA_BUFFER_SIZE < SZ_128K || XE_OA_BUFFER_SIZE > SZ_16M); + + bo = xe_bo_create_pin_map(stream->oa->xe, stream->gt->tile, NULL, + XE_OA_BUFFER_SIZE, ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + stream->oa_buffer.bo = bo; + /* mmap implementation requires OA buffer to be in system memory */ + xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0); + stream->oa_buffer.vaddr = bo->vmap.vaddr; + return 0; +} + +static struct xe_oa_config_bo * +__xe_oa_alloc_config_buffer(struct xe_oa_stream *stream, struct xe_oa_config *oa_config) +{ + struct xe_oa_config_bo *oa_bo; + size_t config_length; + struct xe_bb *bb; + + oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL); + if (!oa_bo) + return ERR_PTR(-ENOMEM); + + config_length = num_lri_dwords(oa_config->regs_len); + config_length = ALIGN(sizeof(u32) * config_length, XE_PAGE_SIZE) / sizeof(u32); + + bb = xe_bb_new(stream->gt, config_length, false); + if (IS_ERR(bb)) + goto err_free; + + write_cs_mi_lri(bb, oa_config->regs, oa_config->regs_len); + + oa_bo->bb = bb; + oa_bo->oa_config = xe_oa_config_get(oa_config); + llist_add(&oa_bo->node, &stream->oa_config_bos); + + return oa_bo; +err_free: + kfree(oa_bo); + return ERR_CAST(bb); +} + +static struct xe_oa_config_bo * +xe_oa_alloc_config_buffer(struct xe_oa_stream *stream, struct xe_oa_config *oa_config) +{ + struct xe_oa_config_bo *oa_bo; + + /* Look for the buffer in the already allocated BOs attached to the stream */ + llist_for_each_entry(oa_bo, stream->oa_config_bos.first, node) { + if (oa_bo->oa_config == oa_config && + memcmp(oa_bo->oa_config->uuid, oa_config->uuid, + sizeof(oa_config->uuid)) == 0) + goto out; + } + + oa_bo = __xe_oa_alloc_config_buffer(stream, oa_config); +out: + return oa_bo; +} + +static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config *config) +{ +#define NOA_PROGRAM_ADDITIONAL_DELAY_US 500 + struct xe_oa_config_bo *oa_bo; + int err, us = NOA_PROGRAM_ADDITIONAL_DELAY_US; + + oa_bo = xe_oa_alloc_config_buffer(stream, config); + if (IS_ERR(oa_bo)) { + err = PTR_ERR(oa_bo); + goto exit; + } + + err = xe_oa_submit_bb(stream, oa_bo->bb); + + /* Additional empirical delay needed for NOA programming after registers are written */ + usleep_range(us, 2 * us); +exit: + return err; +} + +static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream) +{ + /* If user didn't require OA reports, ask HW not to emit ctx switch reports */ + return _MASKED_FIELD(OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS, + stream->sample ? + 0 : OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS); +} + +static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) +{ + u32 oa_debug, sqcnt1; + int ret; + + /* + * Wa_1508761755:xehpsdv, dg2 + * EU NOA signals behave incorrectly if EU clock gating is enabled. + * Disable thread stall DOP gating and EU DOP gating. + */ + if (stream->oa->xe->info.platform == XE_DG2) { + xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN, + _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); + xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2, + _MASKED_BIT_ENABLE(DISABLE_DOP_GATING)); + } + + /* Disable clk ratio reports */ + oa_debug = OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | + OAG_OA_DEBUG_INCLUDE_CLK_RATIO; + + if (GRAPHICS_VER(stream->oa->xe) >= 20) + oa_debug |= + /* The three bits below are needed to get PEC counters running */ + OAG_OA_DEBUG_START_TRIGGER_SCOPE_CONTROL | + OAG_OA_DEBUG_DISABLE_START_TRG_2_COUNT_QUAL | + OAG_OA_DEBUG_DISABLE_START_TRG_1_COUNT_QUAL; + + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_debug, + _MASKED_BIT_ENABLE(oa_debug) | + oag_report_ctx_switches(stream) | + oag_configure_mmio_trigger(stream, true)); + + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ? + (OAG_OAGLBCTXCTRL_COUNTER_RESUME | + OAG_OAGLBCTXCTRL_TIMER_ENABLE | + REG_FIELD_PREP(OAG_OAGLBCTXCTRL_TIMER_PERIOD_MASK, + stream->period_exponent)) : 0); + + /* + * Initialize Super Queue Internal Cnt Register + * Set PMON Enable in order to collect valid metrics + * Enable bytes per clock reporting + */ + sqcnt1 = SQCNT1_PMON_ENABLE | + (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0); + + xe_mmio_rmw32(stream->gt, XELPMP_SQCNT1, 0, sqcnt1); + + /* Configure OAR/OAC */ + if (stream->exec_q) { + ret = xe_oa_configure_oa_context(stream, true); + if (ret) + return ret; + } + + return xe_oa_emit_oa_config(stream, stream->oa_config); +} + +static void xe_oa_stream_enable(struct xe_oa_stream *stream) +{ + stream->pollin = false; + + xe_oa_enable(stream); + + if (stream->sample) + hrtimer_start(&stream->poll_check_timer, + ns_to_ktime(stream->poll_period_ns), + HRTIMER_MODE_REL_PINNED); +} + +static void xe_oa_stream_disable(struct xe_oa_stream *stream) +{ + xe_oa_disable(stream); + + if (stream->sample) + hrtimer_cancel(&stream->poll_check_timer); +} + +static int xe_oa_enable_preempt_timeslice(struct xe_oa_stream *stream) +{ + struct xe_exec_queue *q = stream->exec_q; + int ret1, ret2; + + /* Best effort recovery: try to revert both to original, irrespective of error */ + ret1 = q->ops->set_timeslice(q, stream->hwe->eclass->sched_props.timeslice_us); + ret2 = q->ops->set_preempt_timeout(q, stream->hwe->eclass->sched_props.preempt_timeout_us); + if (ret1 || ret2) + goto err; + return 0; +err: + drm_dbg(&stream->oa->xe->drm, "%s failed ret1 %d ret2 %d\n", __func__, ret1, ret2); + return ret1 ?: ret2; +} + +static int xe_oa_disable_preempt_timeslice(struct xe_oa_stream *stream) +{ + struct xe_exec_queue *q = stream->exec_q; + int ret; + + /* Setting values to 0 will disable timeslice and preempt_timeout */ + ret = q->ops->set_timeslice(q, 0); + if (ret) + goto err; + + ret = q->ops->set_preempt_timeout(q, 0); + if (ret) + goto err; + + return 0; +err: + xe_oa_enable_preempt_timeslice(stream); + drm_dbg(&stream->oa->xe->drm, "%s failed %d\n", __func__, ret); + return ret; +} + +static int xe_oa_enable_locked(struct xe_oa_stream *stream) +{ + if (stream->enabled) + return 0; + + if (stream->no_preempt) { + int ret = xe_oa_disable_preempt_timeslice(stream); + + if (ret) + return ret; + } + + xe_oa_stream_enable(stream); + + stream->enabled = true; + return 0; +} + +static int xe_oa_disable_locked(struct xe_oa_stream *stream) +{ + int ret = 0; + + if (!stream->enabled) + return 0; + + xe_oa_stream_disable(stream); + + if (stream->no_preempt) + ret = xe_oa_enable_preempt_timeslice(stream); + + stream->enabled = false; + return ret; +} + +static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg) +{ + struct drm_xe_ext_set_property ext; + long ret = stream->oa_config->id; + struct xe_oa_config *config; + int err; + + err = __copy_from_user(&ext, u64_to_user_ptr(arg), sizeof(ext)); + if (XE_IOCTL_DBG(stream->oa->xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(stream->oa->xe, ext.pad) || + XE_IOCTL_DBG(stream->oa->xe, ext.base.name != DRM_XE_OA_EXTENSION_SET_PROPERTY) || + XE_IOCTL_DBG(stream->oa->xe, ext.base.next_extension) || + XE_IOCTL_DBG(stream->oa->xe, ext.property != DRM_XE_OA_PROPERTY_OA_METRIC_SET)) + return -EINVAL; + + config = xe_oa_get_oa_config(stream->oa, ext.value); + if (!config) + return -ENODEV; + + if (config != stream->oa_config) { + err = xe_oa_emit_oa_config(stream, config); + if (!err) + config = xchg(&stream->oa_config, config); + else + ret = err; + } + + xe_oa_config_put(config); + + return ret; +} + +static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) +{ + struct drm_xe_oa_stream_status status = {}; + void __user *uaddr = (void __user *)arg; + + /* Map from register to uapi bits */ + if (stream->oa_status & OASTATUS_REPORT_LOST) + status.oa_status |= DRM_XE_OASTATUS_REPORT_LOST; + if (stream->oa_status & OASTATUS_BUFFER_OVERFLOW) + status.oa_status |= DRM_XE_OASTATUS_BUFFER_OVERFLOW; + if (stream->oa_status & OASTATUS_COUNTER_OVERFLOW) + status.oa_status |= DRM_XE_OASTATUS_COUNTER_OVERFLOW; + if (stream->oa_status & OASTATUS_MMIO_TRG_Q_FULL) + status.oa_status |= DRM_XE_OASTATUS_MMIO_TRG_Q_FULL; + + if (copy_to_user(uaddr, &status, sizeof(status))) + return -EFAULT; + + return 0; +} + +static long xe_oa_info_locked(struct xe_oa_stream *stream, unsigned long arg) +{ + struct drm_xe_oa_stream_info info = { .oa_buf_size = XE_OA_BUFFER_SIZE, }; + void __user *uaddr = (void __user *)arg; + + if (copy_to_user(uaddr, &info, sizeof(info))) + return -EFAULT; + + return 0; +} + +static long xe_oa_ioctl_locked(struct xe_oa_stream *stream, + unsigned int cmd, + unsigned long arg) +{ + switch (cmd) { + case DRM_XE_PERF_IOCTL_ENABLE: + return xe_oa_enable_locked(stream); + case DRM_XE_PERF_IOCTL_DISABLE: + return xe_oa_disable_locked(stream); + case DRM_XE_PERF_IOCTL_CONFIG: + return xe_oa_config_locked(stream, arg); + case DRM_XE_PERF_IOCTL_STATUS: + return xe_oa_status_locked(stream, arg); + case DRM_XE_PERF_IOCTL_INFO: + return xe_oa_info_locked(stream, arg); + } + + return -EINVAL; +} + +static long xe_oa_ioctl(struct file *file, + unsigned int cmd, + unsigned long arg) +{ + struct xe_oa_stream *stream = file->private_data; + long ret; + + mutex_lock(&stream->stream_lock); + ret = xe_oa_ioctl_locked(stream, cmd, arg); + mutex_unlock(&stream->stream_lock); + + return ret; +} + +static void xe_oa_destroy_locked(struct xe_oa_stream *stream) +{ + if (stream->enabled) + xe_oa_disable_locked(stream); + + xe_oa_stream_destroy(stream); + + if (stream->exec_q) + xe_exec_queue_put(stream->exec_q); + + kfree(stream); +} + +static int xe_oa_release(struct inode *inode, struct file *file) +{ + struct xe_oa_stream *stream = file->private_data; + struct xe_gt *gt = stream->gt; + + mutex_lock(>->oa.gt_lock); + xe_oa_destroy_locked(stream); + mutex_unlock(>->oa.gt_lock); + + /* Release the reference the perf stream kept on the driver */ + drm_dev_put(>_to_xe(gt)->drm); + + return 0; +} + +static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct xe_oa_stream *stream = file->private_data; + struct xe_bo *bo = stream->oa_buffer.bo; + unsigned long start = vma->vm_start; + int i, ret; + + if (xe_perf_stream_paranoid && !perfmon_capable()) { + drm_dbg(&stream->oa->xe->drm, "Insufficient privilege to map OA buffer\n"); + return -EACCES; + } + + /* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */ + if (vma->vm_end - vma->vm_start != XE_OA_BUFFER_SIZE) { + drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n"); + return -EINVAL; + } + + /* + * Only support VM_READ, enforce MAP_PRIVATE by checking for + * VM_MAYSHARE, don't copy the vma on fork + */ + if (vma->vm_flags & (VM_WRITE | VM_EXEC | VM_SHARED | VM_MAYSHARE)) { + drm_dbg(&stream->oa->xe->drm, "mmap must be read only\n"); + return -EINVAL; + } + vm_flags_mod(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY, + VM_MAYWRITE | VM_MAYEXEC); + + xe_assert(stream->oa->xe, bo->ttm.ttm->num_pages == + (vma->vm_end - vma->vm_start) >> PAGE_SHIFT); + for (i = 0; i < bo->ttm.ttm->num_pages; i++) { + ret = remap_pfn_range(vma, start, page_to_pfn(bo->ttm.ttm->pages[i]), + PAGE_SIZE, vma->vm_page_prot); + if (ret) + break; + + start += PAGE_SIZE; + } + + return ret; +} + +static const struct file_operations xe_oa_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .release = xe_oa_release, + .poll = xe_oa_poll, + .read = xe_oa_read, + .unlocked_ioctl = xe_oa_ioctl, + .mmap = xe_oa_mmap, +}; + +static bool engine_supports_mi_query(struct xe_hw_engine *hwe) +{ + return hwe->class == XE_ENGINE_CLASS_RENDER || + hwe->class == XE_ENGINE_CLASS_COMPUTE; +} + +static bool xe_oa_find_reg_in_lri(u32 *state, u32 reg, u32 *offset, u32 end) +{ + u32 idx = *offset; + u32 len = min(MI_LRI_LEN(state[idx]) + idx, end); + bool found = false; + + idx++; + for (; idx < len; idx += 2) { + if (state[idx] == reg) { + found = true; + break; + } + } + + *offset = idx; + return found; +} + +#define IS_MI_LRI_CMD(x) (REG_FIELD_GET(MI_OPCODE, (x)) == \ + REG_FIELD_GET(MI_OPCODE, MI_LOAD_REGISTER_IMM)) + +static u32 xe_oa_context_image_offset(struct xe_oa_stream *stream, u32 reg) +{ + struct xe_lrc *lrc = stream->exec_q->lrc[0]; + u32 len = (xe_gt_lrc_size(stream->gt, stream->hwe->class) + + lrc->ring.size) / sizeof(u32); + u32 offset = xe_lrc_regs_offset(lrc) / sizeof(u32); + u32 *state = (u32 *)lrc->bo->vmap.vaddr; + + if (drm_WARN_ON(&stream->oa->xe->drm, !state)) + return U32_MAX; + + for (; offset < len; ) { + if (IS_MI_LRI_CMD(state[offset])) { + /* + * We expect reg-value pairs in MI_LRI command, so + * MI_LRI_LEN() should be even + */ + drm_WARN_ON(&stream->oa->xe->drm, + MI_LRI_LEN(state[offset]) & 0x1); + + if (xe_oa_find_reg_in_lri(state, reg, &offset, len)) + break; + } else { + offset++; + } + } + + return offset < len ? offset : U32_MAX; +} + +static int xe_oa_set_ctx_ctrl_offset(struct xe_oa_stream *stream) +{ + struct xe_reg reg = OACTXCONTROL(stream->hwe->mmio_base); + u32 offset = stream->oa->ctx_oactxctrl_offset[stream->hwe->class]; + + /* Do this only once. Failure is stored as offset of U32_MAX */ + if (offset) + goto exit; + + offset = xe_oa_context_image_offset(stream, reg.addr); + stream->oa->ctx_oactxctrl_offset[stream->hwe->class] = offset; + + drm_dbg(&stream->oa->xe->drm, "%s oa ctx control at 0x%08x dword offset\n", + stream->hwe->name, offset); +exit: + return offset && offset != U32_MAX ? 0 : -ENODEV; +} + +static int xe_oa_stream_init(struct xe_oa_stream *stream, + struct xe_oa_open_param *param) +{ + struct xe_oa_unit *u = param->hwe->oa_unit; + struct xe_gt *gt = param->hwe->gt; + int ret; + + stream->exec_q = param->exec_q; + stream->poll_period_ns = DEFAULT_POLL_PERIOD_NS; + stream->hwe = param->hwe; + stream->gt = stream->hwe->gt; + stream->oa_buffer.format = &stream->oa->oa_formats[param->oa_format]; + + stream->sample = param->sample; + stream->periodic = param->period_exponent > 0; + stream->period_exponent = param->period_exponent; + stream->no_preempt = param->no_preempt; + + /* + * For Xe2+, when overrun mode is enabled, there are no partial reports at the end + * of buffer, making the OA buffer effectively a non-power-of-2 size circular + * buffer whose size, circ_size, is a multiple of the report size + */ + if (GRAPHICS_VER(stream->oa->xe) >= 20 && + stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample) + stream->oa_buffer.circ_size = + XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size; + else + stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE; + + if (stream->exec_q && engine_supports_mi_query(stream->hwe)) { + /* If we don't find the context offset, just return error */ + ret = xe_oa_set_ctx_ctrl_offset(stream); + if (ret) { + drm_err(&stream->oa->xe->drm, + "xe_oa_set_ctx_ctrl_offset failed for %s\n", + stream->hwe->name); + goto exit; + } + } + + stream->oa_config = xe_oa_get_oa_config(stream->oa, param->metric_set); + if (!stream->oa_config) { + drm_dbg(&stream->oa->xe->drm, "Invalid OA config id=%i\n", param->metric_set); + ret = -EINVAL; + goto exit; + } + + /* + * Wa_1509372804:pvc + * + * GuC reset of engines causes OA to lose configuration + * state. Prevent this by overriding GUCRC mode. + */ + if (stream->oa->xe->info.platform == XE_PVC) { + ret = xe_guc_pc_override_gucrc_mode(>->uc.guc.pc, + SLPC_GUCRC_MODE_GUCRC_NO_RC6); + if (ret) + goto err_free_configs; + + stream->override_gucrc = true; + } + + /* Take runtime pm ref and forcewake to disable RC6 */ + xe_pm_runtime_get(stream->oa->xe); + XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + + ret = xe_oa_alloc_oa_buffer(stream); + if (ret) + goto err_fw_put; + + stream->k_exec_q = xe_exec_queue_create(stream->oa->xe, NULL, + BIT(stream->hwe->logical_instance), 1, + stream->hwe, EXEC_QUEUE_FLAG_KERNEL, 0); + if (IS_ERR(stream->k_exec_q)) { + ret = PTR_ERR(stream->k_exec_q); + drm_err(&stream->oa->xe->drm, "gt%d, hwe %s, xe_exec_queue_create failed=%d", + stream->gt->info.id, stream->hwe->name, ret); + goto err_free_oa_buf; + } + + ret = xe_oa_enable_metric_set(stream); + if (ret) { + drm_dbg(&stream->oa->xe->drm, "Unable to enable metric set\n"); + goto err_put_k_exec_q; + } + + drm_dbg(&stream->oa->xe->drm, "opening stream oa config uuid=%s\n", + stream->oa_config->uuid); + + WRITE_ONCE(u->exclusive_stream, stream); + + hrtimer_init(&stream->poll_check_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + stream->poll_check_timer.function = xe_oa_poll_check_timer_cb; + init_waitqueue_head(&stream->poll_wq); + + spin_lock_init(&stream->oa_buffer.ptr_lock); + mutex_init(&stream->stream_lock); + + return 0; + +err_put_k_exec_q: + xe_oa_disable_metric_set(stream); + xe_exec_queue_put(stream->k_exec_q); +err_free_oa_buf: + xe_oa_free_oa_buffer(stream); +err_fw_put: + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_pm_runtime_put(stream->oa->xe); + if (stream->override_gucrc) + xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(>->uc.guc.pc)); +err_free_configs: + xe_oa_free_configs(stream); +exit: + return ret; +} + +static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa, + struct xe_oa_open_param *param) +{ + struct xe_oa_stream *stream; + int stream_fd; + int ret; + + /* We currently only allow exclusive access */ + if (param->hwe->oa_unit->exclusive_stream) { + drm_dbg(&oa->xe->drm, "OA unit already in use\n"); + ret = -EBUSY; + goto exit; + } + + stream = kzalloc(sizeof(*stream), GFP_KERNEL); + if (!stream) { + ret = -ENOMEM; + goto exit; + } + + stream->oa = oa; + ret = xe_oa_stream_init(stream, param); + if (ret) + goto err_free; + + if (!param->disabled) { + ret = xe_oa_enable_locked(stream); + if (ret) + goto err_destroy; + } + + stream_fd = anon_inode_getfd("[xe_oa]", &xe_oa_fops, stream, 0); + if (stream_fd < 0) { + ret = stream_fd; + goto err_disable; + } + + /* Hold a reference on the drm device till stream_fd is released */ + drm_dev_get(&stream->oa->xe->drm); + + return stream_fd; +err_disable: + if (!param->disabled) + xe_oa_disable_locked(stream); +err_destroy: + xe_oa_stream_destroy(stream); +err_free: + kfree(stream); +exit: + return ret; +} + +/** + * xe_oa_timestamp_frequency - Return OA timestamp frequency + * @gt: @xe_gt + * + * OA timestamp frequency = CS timestamp frequency in most platforms. On some + * platforms OA unit ignores the CTC_SHIFT and the 2 timestamps differ. In such + * cases, return the adjusted CS timestamp frequency to the user. + */ +u32 xe_oa_timestamp_frequency(struct xe_gt *gt) +{ + u32 reg, shift; + + /* + * Wa_18013179988:dg2 + * Wa_14015568240:pvc + * Wa_14015846243:mtl + */ + switch (gt_to_xe(gt)->info.platform) { + case XE_DG2: + case XE_PVC: + case XE_METEORLAKE: + xe_pm_runtime_get(gt_to_xe(gt)); + reg = xe_mmio_read32(gt, RPM_CONFIG0); + xe_pm_runtime_put(gt_to_xe(gt)); + + shift = REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg); + return gt->info.reference_clock << (3 - shift); + + default: + return gt->info.reference_clock; + } +} + +static u64 oa_exponent_to_ns(struct xe_gt *gt, int exponent) +{ + u64 nom = (2ULL << exponent) * NSEC_PER_SEC; + u32 den = xe_oa_timestamp_frequency(gt); + + return div_u64(nom + den - 1, den); +} + +static bool engine_supports_oa_format(const struct xe_hw_engine *hwe, int type) +{ + switch (hwe->oa_unit->type) { + case DRM_XE_OA_UNIT_TYPE_OAG: + return type == DRM_XE_OA_FMT_TYPE_OAG || type == DRM_XE_OA_FMT_TYPE_OAR || + type == DRM_XE_OA_FMT_TYPE_OAC || type == DRM_XE_OA_FMT_TYPE_PEC; + case DRM_XE_OA_UNIT_TYPE_OAM: + return type == DRM_XE_OA_FMT_TYPE_OAM || type == DRM_XE_OA_FMT_TYPE_OAM_MPEC; + default: + return false; + } +} + +static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *name) +{ + u32 counter_size = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, fmt); + u32 counter_sel = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, fmt); + u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt); + u32 type = FIELD_GET(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, fmt); + int idx; + + for_each_set_bit(idx, oa->format_mask, __XE_OA_FORMAT_MAX) { + const struct xe_oa_format *f = &oa->oa_formats[idx]; + + if (counter_size == f->counter_size && bc_report == f->bc_report && + type == f->type && counter_sel == f->counter_select) { + *name = idx; + return 0; + } + } + + return -EINVAL; +} + +/** + * xe_oa_unit_id - Return OA unit ID for a hardware engine + * @hwe: @xe_hw_engine + * + * Return OA unit ID for a hardware engine when available + */ +u16 xe_oa_unit_id(struct xe_hw_engine *hwe) +{ + return hwe->oa_unit && hwe->oa_unit->num_engines ? + hwe->oa_unit->oa_unit_id : U16_MAX; +} + +static int xe_oa_assign_hwe(struct xe_oa *oa, struct xe_oa_open_param *param) +{ + struct xe_gt *gt; + int i, ret = 0; + + if (param->exec_q) { + /* When we have an exec_q, get hwe from the exec_q */ + param->hwe = xe_gt_hw_engine(param->exec_q->gt, param->exec_q->class, + param->engine_instance, true); + } else { + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + /* Else just get the first hwe attached to the oa unit */ + for_each_gt(gt, oa->xe, i) { + for_each_hw_engine(hwe, gt, id) { + if (xe_oa_unit_id(hwe) == param->oa_unit_id) { + param->hwe = hwe; + goto out; + } + } + } + } +out: + if (!param->hwe || xe_oa_unit_id(param->hwe) != param->oa_unit_id) { + drm_dbg(&oa->xe->drm, "Unable to find hwe (%d, %d) for OA unit ID %d\n", + param->exec_q ? param->exec_q->class : -1, + param->engine_instance, param->oa_unit_id); + ret = -EINVAL; + } + + return ret; +} + +static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + if (value >= oa->oa_unit_ids) { + drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value); + return -EINVAL; + } + param->oa_unit_id = value; + return 0; +} + +static int xe_oa_set_prop_sample_oa(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->sample = value; + return 0; +} + +static int xe_oa_set_prop_metric_set(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->metric_set = value; + return 0; +} + +static int xe_oa_set_prop_oa_format(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + int ret = decode_oa_format(oa, value, ¶m->oa_format); + + if (ret) { + drm_dbg(&oa->xe->drm, "Unsupported OA report format %#llx\n", value); + return ret; + } + return 0; +} + +static int xe_oa_set_prop_oa_exponent(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ +#define OA_EXPONENT_MAX 31 + + if (value > OA_EXPONENT_MAX) { + drm_dbg(&oa->xe->drm, "OA timer exponent too high (> %u)\n", OA_EXPONENT_MAX); + return -EINVAL; + } + param->period_exponent = value; + return 0; +} + +static int xe_oa_set_prop_disabled(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->disabled = value; + return 0; +} + +static int xe_oa_set_prop_exec_queue_id(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->exec_queue_id = value; + return 0; +} + +static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->engine_instance = value; + return 0; +} + +static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->no_preempt = value; + return 0; +} + +typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param); +static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = { + [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_oa_unit_id, + [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_sample_oa, + [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set, + [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_oa_format, + [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_oa_exponent, + [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled, + [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id, + [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance, + [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt, +}; + +static int xe_oa_user_ext_set_property(struct xe_oa *oa, u64 extension, + struct xe_oa_open_param *param) +{ + u64 __user *address = u64_to_user_ptr(extension); + struct drm_xe_ext_set_property ext; + int err; + u32 idx; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_DBG(oa->xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs)) || + XE_IOCTL_DBG(oa->xe, ext.pad)) + return -EINVAL; + + idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs)); + return xe_oa_set_property_funcs[idx](oa, ext.value, param); +} + +typedef int (*xe_oa_user_extension_fn)(struct xe_oa *oa, u64 extension, + struct xe_oa_open_param *param); +static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = { + [DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property, +}; + +#define MAX_USER_EXTENSIONS 16 +static int xe_oa_user_extensions(struct xe_oa *oa, u64 extension, int ext_number, + struct xe_oa_open_param *param) +{ + u64 __user *address = u64_to_user_ptr(extension); + struct drm_xe_user_extension ext; + int err; + u32 idx; + + if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS)) + return -E2BIG; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_DBG(oa->xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(oa->xe, ext.pad) || + XE_IOCTL_DBG(oa->xe, ext.name >= ARRAY_SIZE(xe_oa_user_extension_funcs))) + return -EINVAL; + + idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_oa_user_extension_funcs)); + err = xe_oa_user_extension_funcs[idx](oa, extension, param); + if (XE_IOCTL_DBG(oa->xe, err)) + return err; + + if (ext.next_extension) + return xe_oa_user_extensions(oa, ext.next_extension, ++ext_number, param); + + return 0; +} + +/** + * xe_oa_stream_open_ioctl - Opens an OA stream + * @dev: @drm_device + * @data: pointer to struct @drm_xe_oa_config + * @file: @drm_file + * + * The functions opens an OA stream. An OA stream, opened with specified + * properties, enables perf counter samples to be collected, either + * periodically (time based sampling), or on request (using perf queries) + */ +int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_oa *oa = &xe->oa; + struct xe_file *xef = to_xe_file(file); + struct xe_oa_open_param param = {}; + const struct xe_oa_format *f; + bool privileged_op = true; + int ret; + + if (!oa->xe) { + drm_dbg(&xe->drm, "xe oa interface not available for this system\n"); + return -ENODEV; + } + + ret = xe_oa_user_extensions(oa, data, 0, ¶m); + if (ret) + return ret; + + if (param.exec_queue_id > 0) { + param.exec_q = xe_exec_queue_lookup(xef, param.exec_queue_id); + if (XE_IOCTL_DBG(oa->xe, !param.exec_q)) + return -ENOENT; + + if (param.exec_q->width > 1) + drm_dbg(&oa->xe->drm, "exec_q->width > 1, programming only exec_q->lrc[0]\n"); + } + + /* + * Query based sampling (using MI_REPORT_PERF_COUNT) with OAR/OAC, + * without global stream access, can be an unprivileged operation + */ + if (param.exec_q && !param.sample) + privileged_op = false; + + if (param.no_preempt) { + if (!param.exec_q) { + drm_dbg(&oa->xe->drm, "Preemption disable without exec_q!\n"); + ret = -EINVAL; + goto err_exec_q; + } + privileged_op = true; + } + + if (privileged_op && xe_perf_stream_paranoid && !perfmon_capable()) { + drm_dbg(&oa->xe->drm, "Insufficient privileges to open xe perf stream\n"); + ret = -EACCES; + goto err_exec_q; + } + + if (!param.exec_q && !param.sample) { + drm_dbg(&oa->xe->drm, "Only OA report sampling supported\n"); + ret = -EINVAL; + goto err_exec_q; + } + + ret = xe_oa_assign_hwe(oa, ¶m); + if (ret) + goto err_exec_q; + + f = &oa->oa_formats[param.oa_format]; + if (!param.oa_format || !f->size || + !engine_supports_oa_format(param.hwe, f->type)) { + drm_dbg(&oa->xe->drm, "Invalid OA format %d type %d size %d for class %d\n", + param.oa_format, f->type, f->size, param.hwe->class); + ret = -EINVAL; + goto err_exec_q; + } + + if (param.period_exponent > 0) { + u64 oa_period, oa_freq_hz; + + /* Requesting samples from OAG buffer is a privileged operation */ + if (!param.sample) { + drm_dbg(&oa->xe->drm, "OA_EXPONENT specified without SAMPLE_OA\n"); + ret = -EINVAL; + goto err_exec_q; + } + oa_period = oa_exponent_to_ns(param.hwe->gt, param.period_exponent); + oa_freq_hz = div64_u64(NSEC_PER_SEC, oa_period); + drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz); + } + + mutex_lock(¶m.hwe->gt->oa.gt_lock); + ret = xe_oa_stream_open_ioctl_locked(oa, ¶m); + mutex_unlock(¶m.hwe->gt->oa.gt_lock); +err_exec_q: + if (ret < 0 && param.exec_q) + xe_exec_queue_put(param.exec_q); + return ret; +} + +static bool xe_oa_is_valid_flex_addr(struct xe_oa *oa, u32 addr) +{ + static const struct xe_reg flex_eu_regs[] = { + EU_PERF_CNTL0, + EU_PERF_CNTL1, + EU_PERF_CNTL2, + EU_PERF_CNTL3, + EU_PERF_CNTL4, + EU_PERF_CNTL5, + EU_PERF_CNTL6, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) { + if (flex_eu_regs[i].addr == addr) + return true; + } + return false; +} + +static bool xe_oa_reg_in_range_table(u32 addr, const struct xe_mmio_range *table) +{ + while (table->start && table->end) { + if (addr >= table->start && addr <= table->end) + return true; + + table++; + } + + return false; +} + +static const struct xe_mmio_range xehp_oa_b_counters[] = { + { .start = 0xdc48, .end = 0xdc48 }, /* OAA_ENABLE_REG */ + { .start = 0xdd00, .end = 0xdd48 }, /* OAG_LCE0_0 - OAA_LENABLE_REG */ + {} +}; + +static const struct xe_mmio_range gen12_oa_b_counters[] = { + { .start = 0x2b2c, .end = 0x2b2c }, /* OAG_OA_PESS */ + { .start = 0xd900, .end = 0xd91c }, /* OAG_OASTARTTRIG[1-8] */ + { .start = 0xd920, .end = 0xd93c }, /* OAG_OAREPORTTRIG1[1-8] */ + { .start = 0xd940, .end = 0xd97c }, /* OAG_CEC[0-7][0-1] */ + { .start = 0xdc00, .end = 0xdc3c }, /* OAG_SCEC[0-7][0-1] */ + { .start = 0xdc40, .end = 0xdc40 }, /* OAG_SPCTR_CNF */ + { .start = 0xdc44, .end = 0xdc44 }, /* OAA_DBG_REG */ + {} +}; + +static const struct xe_mmio_range mtl_oam_b_counters[] = { + { .start = 0x393000, .end = 0x39301c }, /* OAM_STARTTRIG1[1-8] */ + { .start = 0x393020, .end = 0x39303c }, /* OAM_REPORTTRIG1[1-8] */ + { .start = 0x393040, .end = 0x39307c }, /* OAM_CEC[0-7][0-1] */ + { .start = 0x393200, .end = 0x39323C }, /* MPES[0-7] */ + {} +}; + +static const struct xe_mmio_range xe2_oa_b_counters[] = { + { .start = 0x393200, .end = 0x39323C }, /* MPES_0_MPES_SAG - MPES_7_UPPER_MPES_SAG */ + { .start = 0x394200, .end = 0x39423C }, /* MPES_0_MPES_SCMI0 - MPES_7_UPPER_MPES_SCMI0 */ + { .start = 0x394A00, .end = 0x394A3C }, /* MPES_0_MPES_SCMI1 - MPES_7_UPPER_MPES_SCMI1 */ + {}, +}; + +static bool xe_oa_is_valid_b_counter_addr(struct xe_oa *oa, u32 addr) +{ + return xe_oa_reg_in_range_table(addr, xehp_oa_b_counters) || + xe_oa_reg_in_range_table(addr, gen12_oa_b_counters) || + xe_oa_reg_in_range_table(addr, mtl_oam_b_counters) || + (GRAPHICS_VER(oa->xe) >= 20 && + xe_oa_reg_in_range_table(addr, xe2_oa_b_counters)); +} + +static const struct xe_mmio_range mtl_oa_mux_regs[] = { + { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */ + { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */ + { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */ + { .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */ + { .start = 0x38d100, .end = 0x38d114}, /* VISACTL */ + {} +}; + +static const struct xe_mmio_range gen12_oa_mux_regs[] = { + { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */ + { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */ + { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */ + { .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */ + { .start = 0x20cc, .end = 0x20cc }, /* WAIT_FOR_RC6_EXIT */ + {} +}; + +static const struct xe_mmio_range xe2_oa_mux_regs[] = { + { .start = 0x5194, .end = 0x5194 }, /* SYS_MEM_LAT_MEASURE_MERTF_GRP_3D */ + { .start = 0x8704, .end = 0x8704 }, /* LMEM_LAT_MEASURE_MCFG_GRP */ + { .start = 0xB1BC, .end = 0xB1BC }, /* L3_BANK_LAT_MEASURE_LBCF_GFX */ + { .start = 0xE18C, .end = 0xE18C }, /* SAMPLER_MODE */ + { .start = 0xE590, .end = 0xE590 }, /* TDL_LSC_LAT_MEASURE_TDL_GFX */ + { .start = 0x13000, .end = 0x137FC }, /* PES_0_PESL0 - PES_63_UPPER_PESL3 */ + {}, +}; + +static bool xe_oa_is_valid_mux_addr(struct xe_oa *oa, u32 addr) +{ + if (GRAPHICS_VER(oa->xe) >= 20) + return xe_oa_reg_in_range_table(addr, xe2_oa_mux_regs); + else if (GRAPHICS_VERx100(oa->xe) >= 1270) + return xe_oa_reg_in_range_table(addr, mtl_oa_mux_regs); + else + return xe_oa_reg_in_range_table(addr, gen12_oa_mux_regs); +} + +static bool xe_oa_is_valid_config_reg_addr(struct xe_oa *oa, u32 addr) +{ + return xe_oa_is_valid_flex_addr(oa, addr) || + xe_oa_is_valid_b_counter_addr(oa, addr) || + xe_oa_is_valid_mux_addr(oa, addr); +} + +static struct xe_oa_reg * +xe_oa_alloc_regs(struct xe_oa *oa, bool (*is_valid)(struct xe_oa *oa, u32 addr), + u32 __user *regs, u32 n_regs) +{ + struct xe_oa_reg *oa_regs; + int err; + u32 i; + + oa_regs = kmalloc_array(n_regs, sizeof(*oa_regs), GFP_KERNEL); + if (!oa_regs) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < n_regs; i++) { + u32 addr, value; + + err = get_user(addr, regs); + if (err) + goto addr_err; + + if (!is_valid(oa, addr)) { + drm_dbg(&oa->xe->drm, "Invalid oa_reg address: %X\n", addr); + err = -EINVAL; + goto addr_err; + } + + err = get_user(value, regs + 1); + if (err) + goto addr_err; + + oa_regs[i].addr = XE_REG(addr); + oa_regs[i].value = value; + + regs += 2; + } + + return oa_regs; + +addr_err: + kfree(oa_regs); + return ERR_PTR(err); +} + +static ssize_t show_dynamic_id(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct xe_oa_config *oa_config = + container_of(attr, typeof(*oa_config), sysfs_metric_id); + + return sysfs_emit(buf, "%d\n", oa_config->id); +} + +static int create_dynamic_oa_sysfs_entry(struct xe_oa *oa, + struct xe_oa_config *oa_config) +{ + sysfs_attr_init(&oa_config->sysfs_metric_id.attr); + oa_config->sysfs_metric_id.attr.name = "id"; + oa_config->sysfs_metric_id.attr.mode = 0444; + oa_config->sysfs_metric_id.show = show_dynamic_id; + oa_config->sysfs_metric_id.store = NULL; + + oa_config->attrs[0] = &oa_config->sysfs_metric_id.attr; + oa_config->attrs[1] = NULL; + + oa_config->sysfs_metric.name = oa_config->uuid; + oa_config->sysfs_metric.attrs = oa_config->attrs; + + return sysfs_create_group(oa->metrics_kobj, &oa_config->sysfs_metric); +} + +/** + * xe_oa_add_config_ioctl - Adds one OA config + * @dev: @drm_device + * @data: pointer to struct @drm_xe_oa_config + * @file: @drm_file + * + * The functions adds an OA config to the set of OA configs maintained in + * the kernel. The config determines which OA metrics are collected for an + * OA stream. + */ +int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_oa *oa = &xe->oa; + struct drm_xe_oa_config param; + struct drm_xe_oa_config *arg = ¶m; + struct xe_oa_config *oa_config, *tmp; + struct xe_oa_reg *regs; + int err, id; + + if (!oa->xe) { + drm_dbg(&xe->drm, "xe oa interface not available for this system\n"); + return -ENODEV; + } + + if (xe_perf_stream_paranoid && !perfmon_capable()) { + drm_dbg(&oa->xe->drm, "Insufficient privileges to add xe OA config\n"); + return -EACCES; + } + + err = __copy_from_user(¶m, u64_to_user_ptr(data), sizeof(param)); + if (XE_IOCTL_DBG(oa->xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(oa->xe, arg->extensions) || + XE_IOCTL_DBG(oa->xe, !arg->regs_ptr) || + XE_IOCTL_DBG(oa->xe, !arg->n_regs)) + return -EINVAL; + + oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL); + if (!oa_config) + return -ENOMEM; + + oa_config->oa = oa; + kref_init(&oa_config->ref); + + if (!uuid_is_valid(arg->uuid)) { + drm_dbg(&oa->xe->drm, "Invalid uuid format for OA config\n"); + err = -EINVAL; + goto reg_err; + } + + /* Last character in oa_config->uuid will be 0 because oa_config is kzalloc */ + memcpy(oa_config->uuid, arg->uuid, sizeof(arg->uuid)); + + oa_config->regs_len = arg->n_regs; + regs = xe_oa_alloc_regs(oa, xe_oa_is_valid_config_reg_addr, + u64_to_user_ptr(arg->regs_ptr), + arg->n_regs); + if (IS_ERR(regs)) { + drm_dbg(&oa->xe->drm, "Failed to create OA config for mux_regs\n"); + err = PTR_ERR(regs); + goto reg_err; + } + oa_config->regs = regs; + + err = mutex_lock_interruptible(&oa->metrics_lock); + if (err) + goto reg_err; + + /* We shouldn't have too many configs, so this iteration shouldn't be too costly */ + idr_for_each_entry(&oa->metrics_idr, tmp, id) { + if (!strcmp(tmp->uuid, oa_config->uuid)) { + drm_dbg(&oa->xe->drm, "OA config already exists with this uuid\n"); + err = -EADDRINUSE; + goto sysfs_err; + } + } + + err = create_dynamic_oa_sysfs_entry(oa, oa_config); + if (err) { + drm_dbg(&oa->xe->drm, "Failed to create sysfs entry for OA config\n"); + goto sysfs_err; + } + + oa_config->id = idr_alloc(&oa->metrics_idr, oa_config, 1, 0, GFP_KERNEL); + if (oa_config->id < 0) { + drm_dbg(&oa->xe->drm, "Failed to create sysfs entry for OA config\n"); + err = oa_config->id; + goto sysfs_err; + } + + mutex_unlock(&oa->metrics_lock); + + drm_dbg(&oa->xe->drm, "Added config %s id=%i\n", oa_config->uuid, oa_config->id); + + return oa_config->id; + +sysfs_err: + mutex_unlock(&oa->metrics_lock); +reg_err: + xe_oa_config_put(oa_config); + drm_dbg(&oa->xe->drm, "Failed to add new OA config\n"); + return err; +} + +/** + * xe_oa_remove_config_ioctl - Removes one OA config + * @dev: @drm_device + * @data: pointer to struct @drm_xe_perf_param + * @file: @drm_file + */ +int xe_oa_remove_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_oa *oa = &xe->oa; + struct xe_oa_config *oa_config; + u64 arg, *ptr = u64_to_user_ptr(data); + int ret; + + if (!oa->xe) { + drm_dbg(&xe->drm, "xe oa interface not available for this system\n"); + return -ENODEV; + } + + if (xe_perf_stream_paranoid && !perfmon_capable()) { + drm_dbg(&oa->xe->drm, "Insufficient privileges to remove xe OA config\n"); + return -EACCES; + } + + ret = get_user(arg, ptr); + if (XE_IOCTL_DBG(oa->xe, ret)) + return ret; + + ret = mutex_lock_interruptible(&oa->metrics_lock); + if (ret) + return ret; + + oa_config = idr_find(&oa->metrics_idr, arg); + if (!oa_config) { + drm_dbg(&oa->xe->drm, "Failed to remove unknown OA config\n"); + ret = -ENOENT; + goto err_unlock; + } + + WARN_ON(arg != oa_config->id); + + sysfs_remove_group(oa->metrics_kobj, &oa_config->sysfs_metric); + idr_remove(&oa->metrics_idr, arg); + + mutex_unlock(&oa->metrics_lock); + + drm_dbg(&oa->xe->drm, "Removed config %s id=%i\n", oa_config->uuid, oa_config->id); + + xe_oa_config_put(oa_config); + + return 0; + +err_unlock: + mutex_unlock(&oa->metrics_lock); + return ret; +} + +/** + * xe_oa_register - Xe OA registration + * @xe: @xe_device + * + * Exposes the metrics sysfs directory upon completion of module initialization + */ +void xe_oa_register(struct xe_device *xe) +{ + struct xe_oa *oa = &xe->oa; + + if (!oa->xe) + return; + + oa->metrics_kobj = kobject_create_and_add("metrics", + &xe->drm.primary->kdev->kobj); +} + +/** + * xe_oa_unregister - Xe OA de-registration + * @xe: @xe_device + */ +void xe_oa_unregister(struct xe_device *xe) +{ + struct xe_oa *oa = &xe->oa; + + if (!oa->metrics_kobj) + return; + + kobject_put(oa->metrics_kobj); + oa->metrics_kobj = NULL; +} + +static u32 num_oa_units_per_gt(struct xe_gt *gt) +{ + return 1; +} + +static u32 __hwe_oam_unit(struct xe_hw_engine *hwe) +{ + if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) >= 1270) { + /* + * There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media slices + * within the gt use the same OAM. All MTL/LNL SKUs list 1 SA MEDIA + */ + xe_gt_WARN_ON(hwe->gt, hwe->gt->info.type != XE_GT_TYPE_MEDIA); + + return 0; + } + + return XE_OA_UNIT_INVALID; +} + +static u32 __hwe_oa_unit(struct xe_hw_engine *hwe) +{ + switch (hwe->class) { + case XE_ENGINE_CLASS_RENDER: + case XE_ENGINE_CLASS_COMPUTE: + return 0; + + case XE_ENGINE_CLASS_VIDEO_DECODE: + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + return __hwe_oam_unit(hwe); + + default: + return XE_OA_UNIT_INVALID; + } +} + +static struct xe_oa_regs __oam_regs(u32 base) +{ + return (struct xe_oa_regs) { + base, + OAM_HEAD_POINTER(base), + OAM_TAIL_POINTER(base), + OAM_BUFFER(base), + OAM_CONTEXT_CONTROL(base), + OAM_CONTROL(base), + OAM_DEBUG(base), + OAM_STATUS(base), + OAM_CONTROL_COUNTER_SEL_MASK, + }; +} + +static struct xe_oa_regs __oag_regs(void) +{ + return (struct xe_oa_regs) { + 0, + OAG_OAHEADPTR, + OAG_OATAILPTR, + OAG_OABUFFER, + OAG_OAGLBCTXCTRL, + OAG_OACONTROL, + OAG_OA_DEBUG, + OAG_OASTATUS, + OAG_OACONTROL_OA_COUNTER_SEL_MASK, + }; +} + +static void __xe_oa_init_oa_units(struct xe_gt *gt) +{ + const u32 mtl_oa_base[] = { 0x13000 }; + int i, num_units = gt->oa.num_oa_units; + + for (i = 0; i < num_units; i++) { + struct xe_oa_unit *u = >->oa.oa_unit[i]; + + if (gt->info.type != XE_GT_TYPE_MEDIA) { + u->regs = __oag_regs(); + u->type = DRM_XE_OA_UNIT_TYPE_OAG; + } else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { + u->regs = __oam_regs(mtl_oa_base[i]); + u->type = DRM_XE_OA_UNIT_TYPE_OAM; + } + + /* Ensure MMIO trigger remains disabled till there is a stream */ + xe_mmio_write32(gt, u->regs.oa_debug, + oag_configure_mmio_trigger(NULL, false)); + + /* Set oa_unit_ids now to ensure ids remain contiguous */ + u->oa_unit_id = gt_to_xe(gt)->oa.oa_unit_ids++; + } +} + +static int xe_oa_init_gt(struct xe_gt *gt) +{ + u32 num_oa_units = num_oa_units_per_gt(gt); + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + struct xe_oa_unit *u; + + u = drmm_kcalloc(>_to_xe(gt)->drm, num_oa_units, sizeof(*u), GFP_KERNEL); + if (!u) + return -ENOMEM; + + for_each_hw_engine(hwe, gt, id) { + u32 index = __hwe_oa_unit(hwe); + + hwe->oa_unit = NULL; + if (index < num_oa_units) { + u[index].num_engines++; + hwe->oa_unit = &u[index]; + } + } + + /* + * Fused off engines can result in oa_unit's with num_engines == 0. These units + * will appear in OA unit query, but no perf streams can be opened on them. + */ + gt->oa.num_oa_units = num_oa_units; + gt->oa.oa_unit = u; + + __xe_oa_init_oa_units(gt); + + drmm_mutex_init(>_to_xe(gt)->drm, >->oa.gt_lock); + + return 0; +} + +static int xe_oa_init_oa_units(struct xe_oa *oa) +{ + struct xe_gt *gt; + int i, ret; + + for_each_gt(gt, oa->xe, i) { + ret = xe_oa_init_gt(gt); + if (ret) + return ret; + } + + return 0; +} + +static void oa_format_add(struct xe_oa *oa, enum xe_oa_format_name format) +{ + __set_bit(format, oa->format_mask); +} + +static void xe_oa_init_supported_formats(struct xe_oa *oa) +{ + if (GRAPHICS_VER(oa->xe) >= 20) { + /* Xe2+ */ + oa_format_add(oa, XE_OAM_FORMAT_MPEC8u64_B8_C8); + oa_format_add(oa, XE_OAM_FORMAT_MPEC8u32_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_PEC64u64); + oa_format_add(oa, XE_OA_FORMAT_PEC64u64_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_PEC64u32); + oa_format_add(oa, XE_OA_FORMAT_PEC32u64_G1); + oa_format_add(oa, XE_OA_FORMAT_PEC32u32_G1); + oa_format_add(oa, XE_OA_FORMAT_PEC32u64_G2); + oa_format_add(oa, XE_OA_FORMAT_PEC32u32_G2); + oa_format_add(oa, XE_OA_FORMAT_PEC36u64_G1_32_G2_4); + oa_format_add(oa, XE_OA_FORMAT_PEC36u64_G1_4_G2_32); + } else if (GRAPHICS_VERx100(oa->xe) >= 1270) { + /* XE_METEORLAKE */ + oa_format_add(oa, XE_OAR_FORMAT_A32u40_A4u32_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_A24u40_A14u32_B8_C8); + oa_format_add(oa, XE_OAC_FORMAT_A24u64_B8_C8); + oa_format_add(oa, XE_OAC_FORMAT_A22u32_R2u32_B8_C8); + oa_format_add(oa, XE_OAM_FORMAT_MPEC8u64_B8_C8); + oa_format_add(oa, XE_OAM_FORMAT_MPEC8u32_B8_C8); + } else if (GRAPHICS_VERx100(oa->xe) >= 1255) { + /* XE_DG2, XE_PVC */ + oa_format_add(oa, XE_OAR_FORMAT_A32u40_A4u32_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_A24u40_A14u32_B8_C8); + oa_format_add(oa, XE_OAC_FORMAT_A24u64_B8_C8); + oa_format_add(oa, XE_OAC_FORMAT_A22u32_R2u32_B8_C8); + } else { + /* Gen12+ */ + xe_assert(oa->xe, GRAPHICS_VER(oa->xe) >= 12); + oa_format_add(oa, XE_OA_FORMAT_A12); + oa_format_add(oa, XE_OA_FORMAT_A12_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_A32u40_A4u32_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_C4_B8); + } +} + +/** + * xe_oa_init - OA initialization during device probe + * @xe: @xe_device + * + * Return: 0 on success or a negative error code on failure + */ +int xe_oa_init(struct xe_device *xe) +{ + struct xe_oa *oa = &xe->oa; + int ret; + + /* Support OA only with GuC submission and Gen12+ */ + if (!xe_device_uc_enabled(xe) || GRAPHICS_VER(xe) < 12) + return 0; + + if (IS_SRIOV_VF(xe)) + return 0; + + oa->xe = xe; + oa->oa_formats = oa_formats; + + drmm_mutex_init(&oa->xe->drm, &oa->metrics_lock); + idr_init_base(&oa->metrics_idr, 1); + + ret = xe_oa_init_oa_units(oa); + if (ret) { + drm_err(&xe->drm, "OA initialization failed (%pe)\n", ERR_PTR(ret)); + goto exit; + } + + xe_oa_init_supported_formats(oa); + return 0; +exit: + oa->xe = NULL; + return ret; +} + +static int destroy_config(int id, void *p, void *data) +{ + xe_oa_config_put(p); + return 0; +} + +/** + * xe_oa_fini - OA de-initialization during device remove + * @xe: @xe_device + */ +void xe_oa_fini(struct xe_device *xe) +{ + struct xe_oa *oa = &xe->oa; + + if (!oa->xe) + return; + + idr_for_each(&oa->metrics_idr, destroy_config, oa); + idr_destroy(&oa->metrics_idr); + + oa->xe = NULL; +} diff --git a/drivers/gpu/drm/xe/xe_oa.h b/drivers/gpu/drm/xe/xe_oa.h new file mode 100644 index 000000000000..87a38820c317 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_oa.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_OA_H_ +#define _XE_OA_H_ + +#include "xe_oa_types.h" + +struct drm_device; +struct drm_file; +struct xe_device; +struct xe_gt; +struct xe_hw_engine; + +int xe_oa_init(struct xe_device *xe); +void xe_oa_fini(struct xe_device *xe); +void xe_oa_register(struct xe_device *xe); +void xe_oa_unregister(struct xe_device *xe); +int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); +int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); +int xe_oa_remove_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); +u32 xe_oa_timestamp_frequency(struct xe_gt *gt); +u16 xe_oa_unit_id(struct xe_hw_engine *hwe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h new file mode 100644 index 000000000000..540c3ec53a6d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -0,0 +1,242 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_OA_TYPES_H_ +#define _XE_OA_TYPES_H_ + +#include <linux/bitops.h> +#include <linux/idr.h> +#include <linux/mutex.h> +#include <linux/types.h> + +#include <drm/xe_drm.h> +#include "regs/xe_reg_defs.h" +#include "xe_hw_engine_types.h" + +#define XE_OA_BUFFER_SIZE SZ_16M + +enum xe_oa_report_header { + HDR_32_BIT = 0, + HDR_64_BIT, +}; + +enum xe_oa_format_name { + XE_OA_FORMAT_C4_B8, + + /* Gen8+ */ + XE_OA_FORMAT_A12, + XE_OA_FORMAT_A12_B8_C8, + XE_OA_FORMAT_A32u40_A4u32_B8_C8, + + /* DG2 */ + XE_OAR_FORMAT_A32u40_A4u32_B8_C8, + XE_OA_FORMAT_A24u40_A14u32_B8_C8, + + /* DG2/MTL OAC */ + XE_OAC_FORMAT_A24u64_B8_C8, + XE_OAC_FORMAT_A22u32_R2u32_B8_C8, + + /* MTL OAM */ + XE_OAM_FORMAT_MPEC8u64_B8_C8, + XE_OAM_FORMAT_MPEC8u32_B8_C8, + + /* Xe2+ */ + XE_OA_FORMAT_PEC64u64, + XE_OA_FORMAT_PEC64u64_B8_C8, + XE_OA_FORMAT_PEC64u32, + XE_OA_FORMAT_PEC32u64_G1, + XE_OA_FORMAT_PEC32u32_G1, + XE_OA_FORMAT_PEC32u64_G2, + XE_OA_FORMAT_PEC32u32_G2, + XE_OA_FORMAT_PEC36u64_G1_32_G2_4, + XE_OA_FORMAT_PEC36u64_G1_4_G2_32, + + __XE_OA_FORMAT_MAX, +}; + +/** + * struct xe_oa_format - Format fields for supported OA formats. OA format + * properties are specified in PRM/Bspec 52198 and 60942 + */ +struct xe_oa_format { + /** @counter_select: counter select value (see Bspec 52198/60942) */ + u32 counter_select; + /** @size: record size as written by HW (multiple of 64 byte cachelines) */ + int size; + /** @type: of enum @drm_xe_oa_format_type */ + int type; + /** @header: 32 or 64 bit report headers */ + enum xe_oa_report_header header; + /** @counter_size: counter size value (see Bspec 60942) */ + u16 counter_size; + /** @bc_report: BC report value (see Bspec 60942) */ + u16 bc_report; +}; + +/** struct xe_oa_regs - Registers for each OA unit */ +struct xe_oa_regs { + u32 base; + struct xe_reg oa_head_ptr; + struct xe_reg oa_tail_ptr; + struct xe_reg oa_buffer; + struct xe_reg oa_ctx_ctrl; + struct xe_reg oa_ctrl; + struct xe_reg oa_debug; + struct xe_reg oa_status; + u32 oa_ctrl_counter_select_mask; +}; + +/** + * struct xe_oa_unit - Hardware OA unit + */ +struct xe_oa_unit { + /** @oa_unit_id: identifier for the OA unit */ + u16 oa_unit_id; + + /** @type: Type of OA unit - OAM, OAG etc. */ + enum drm_xe_oa_unit_type type; + + /** @regs: OA registers for programming the OA unit */ + struct xe_oa_regs regs; + + /** @num_engines: number of engines attached to this OA unit */ + u32 num_engines; + + /** @exclusive_stream: The stream currently using the OA unit */ + struct xe_oa_stream *exclusive_stream; +}; + +/** + * struct xe_oa_gt - OA per-gt information + */ +struct xe_oa_gt { + /** @gt_lock: lock protecting create/destroy OA streams */ + struct mutex gt_lock; + + /** @num_oa_units: number of oa units for each gt */ + u32 num_oa_units; + + /** @oa_unit: array of oa_units */ + struct xe_oa_unit *oa_unit; +}; + +/** + * struct xe_oa - OA device level information + */ +struct xe_oa { + /** @xe: back pointer to xe device */ + struct xe_device *xe; + + /** @metrics_kobj: kobj for metrics sysfs */ + struct kobject *metrics_kobj; + + /** @metrics_lock: lock protecting add/remove configs */ + struct mutex metrics_lock; + + /** @metrics_idr: List of dynamic configurations (struct xe_oa_config) */ + struct idr metrics_idr; + + /** @ctx_oactxctrl_offset: offset of OACTXCONTROL register in context image */ + u32 ctx_oactxctrl_offset[XE_ENGINE_CLASS_MAX]; + + /** @oa_formats: tracks all OA formats across platforms */ + const struct xe_oa_format *oa_formats; + + /** @format_mask: tracks valid OA formats for a platform */ + unsigned long format_mask[BITS_TO_LONGS(__XE_OA_FORMAT_MAX)]; + + /** @oa_unit_ids: tracks oa unit ids assigned across gt's */ + u16 oa_unit_ids; +}; + +/** @xe_oa_buffer: State of the stream OA buffer */ +struct xe_oa_buffer { + /** @format: data format */ + const struct xe_oa_format *format; + + /** @format: xe_bo backing the OA buffer */ + struct xe_bo *bo; + + /** @vaddr: mapped vaddr of the OA buffer */ + u8 *vaddr; + + /** @ptr_lock: Lock protecting reads/writes to head/tail pointers */ + spinlock_t ptr_lock; + + /** @head: Cached head to read from */ + u32 head; + + /** @tail: The last verified cached tail where HW has completed writing */ + u32 tail; + + /** @circ_size: The effective circular buffer size, for Xe2+ */ + u32 circ_size; +}; + +/** + * struct xe_oa_stream - state for a single open stream FD + */ +struct xe_oa_stream { + /** @oa: xe_oa backpointer */ + struct xe_oa *oa; + + /** @gt: gt associated with the oa stream */ + struct xe_gt *gt; + + /** @hwe: hardware engine associated with this oa stream */ + struct xe_hw_engine *hwe; + + /** @stream_lock: Lock serializing stream operations */ + struct mutex stream_lock; + + /** @sample: true if DRM_XE_OA_PROP_SAMPLE_OA is provided */ + bool sample; + + /** @exec_q: Exec queue corresponding to DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID */ + struct xe_exec_queue *exec_q; + + /** @k_exec_q: kernel exec_q used for OA programming batch submissions */ + struct xe_exec_queue *k_exec_q; + + /** @enabled: Whether the stream is currently enabled */ + bool enabled; + + /** @oa_config: OA configuration used by the stream */ + struct xe_oa_config *oa_config; + + /** @oa_config_bos: List of struct @xe_oa_config_bo's */ + struct llist_head oa_config_bos; + + /** @poll_check_timer: Timer to periodically check for data in the OA buffer */ + struct hrtimer poll_check_timer; + + /** @poll_wq: Wait queue for waiting for OA data to be available */ + wait_queue_head_t poll_wq; + + /** @pollin: Whether there is data available to read */ + bool pollin; + + /** @periodic: Whether periodic sampling is currently enabled */ + bool periodic; + + /** @period_exponent: OA unit sampling frequency is derived from this */ + int period_exponent; + + /** @oa_buffer: OA buffer for the stream */ + struct xe_oa_buffer oa_buffer; + + /** @poll_period_ns: hrtimer period for checking OA buffer for available data */ + u64 poll_period_ns; + + /** @override_gucrc: GuC RC has been overridden for the OA stream */ + bool override_gucrc; + + /** @oa_status: temporary storage for oa_status register value */ + u32 oa_status; + + /** @no_preempt: Whether preemption and timeslicing is disabled for stream exec_q */ + u32 no_preempt; +}; +#endif diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 973c14fbbbf3..f5d5a368e595 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -340,7 +340,7 @@ static const struct xe_device_desc lnl_desc = { .require_force_probe = true, }; -static const struct xe_device_desc bmg_desc __maybe_unused = { +static const struct xe_device_desc bmg_desc = { DGFX_FEATURES, PLATFORM(BATTLEMAGE), .has_display = true, @@ -390,6 +390,7 @@ static const struct pci_device_id pciidlist[] = { XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc), XE_MTL_IDS(INTEL_VGA_DEVICE, &mtl_desc), XE_LNL_IDS(INTEL_VGA_DEVICE, &lnl_desc), + XE_BMG_IDS(INTEL_VGA_DEVICE, &bmg_desc), { } }; MODULE_DEVICE_TABLE(pci, pciidlist); @@ -747,6 +748,11 @@ static void xe_pci_remove(struct pci_dev *pdev) if (!xe) /* driver load aborted, nothing to cleanup */ return; +#ifdef CONFIG_PCI_IOV + if (IS_SRIOV_PF(xe)) + xe_pci_sriov_configure(pdev, 0); +#endif + xe_device_remove(xe); xe_pm_runtime_fini(xe); pci_set_drvdata(pdev, NULL); diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c index 06d0fceb5114..74c8fadc9365 100644 --- a/drivers/gpu/drm/xe/xe_pci_sriov.c +++ b/drivers/gpu/drm/xe/xe_pci_sriov.c @@ -6,6 +6,7 @@ #include "xe_assert.h" #include "xe_device.h" #include "xe_gt_sriov_pf_config.h" +#include "xe_gt_sriov_pf_control.h" #include "xe_pci_sriov.h" #include "xe_pm.h" #include "xe_sriov.h" @@ -37,6 +38,17 @@ static void pf_unprovision_vfs(struct xe_device *xe, unsigned int num_vfs) xe_gt_sriov_pf_config_release(gt, n, true); } +static void pf_reset_vfs(struct xe_device *xe, unsigned int num_vfs) +{ + struct xe_gt *gt; + unsigned int id; + unsigned int n; + + for_each_gt(gt, xe, id) + for (n = 1; n <= num_vfs; n++) + xe_gt_sriov_pf_control_trigger_flr(gt, n); +} + static int pf_enable_vfs(struct xe_device *xe, int num_vfs) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -94,6 +106,8 @@ static int pf_disable_vfs(struct xe_device *xe) pci_disable_sriov(pdev); + pf_reset_vfs(xe, num_vfs); + pf_unprovision_vfs(xe, num_vfs); /* not needed anymore - see pf_enable_vfs() */ diff --git a/drivers/gpu/drm/xe/xe_perf.c b/drivers/gpu/drm/xe/xe_perf.c new file mode 100644 index 000000000000..d6cd74cadf34 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_perf.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include <linux/errno.h> +#include <linux/sysctl.h> + +#include <drm/xe_drm.h> + +#include "xe_oa.h" +#include "xe_perf.h" + +u32 xe_perf_stream_paranoid = true; +static struct ctl_table_header *sysctl_header; + +static int xe_oa_ioctl(struct drm_device *dev, struct drm_xe_perf_param *arg, + struct drm_file *file) +{ + switch (arg->perf_op) { + case DRM_XE_PERF_OP_STREAM_OPEN: + return xe_oa_stream_open_ioctl(dev, arg->param, file); + case DRM_XE_PERF_OP_ADD_CONFIG: + return xe_oa_add_config_ioctl(dev, arg->param, file); + case DRM_XE_PERF_OP_REMOVE_CONFIG: + return xe_oa_remove_config_ioctl(dev, arg->param, file); + default: + return -EINVAL; + } +} + +/** + * xe_perf_ioctl - The top level perf layer ioctl + * @dev: @drm_device + * @data: pointer to struct @drm_xe_perf_param + * @file: @drm_file + * + * The function is called for different perf streams types and allows execution + * of different operations supported by those perf stream types. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_perf_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_xe_perf_param *arg = data; + + if (arg->extensions) + return -EINVAL; + + switch (arg->perf_type) { + case DRM_XE_PERF_TYPE_OA: + return xe_oa_ioctl(dev, arg, file); + default: + return -EINVAL; + } +} + +static struct ctl_table perf_ctl_table[] = { + { + .procname = "perf_stream_paranoid", + .data = &xe_perf_stream_paranoid, + .maxlen = sizeof(xe_perf_stream_paranoid), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + {} +}; + +/** + * xe_perf_sysctl_register - Register "perf_stream_paranoid" sysctl + * + * Normally only superuser/root can access perf counter data. However, + * superuser can set perf_stream_paranoid sysctl to 0 to allow non-privileged + * users to also access perf data. + * + * Return: always returns 0 + */ +int xe_perf_sysctl_register(void) +{ + sysctl_header = register_sysctl("dev/xe", perf_ctl_table); + return 0; +} + +/** + * xe_perf_sysctl_unregister - Unregister "perf_stream_paranoid" sysctl + */ +void xe_perf_sysctl_unregister(void) +{ + unregister_sysctl_table(sysctl_header); +} diff --git a/drivers/gpu/drm/xe/xe_perf.h b/drivers/gpu/drm/xe/xe_perf.h new file mode 100644 index 000000000000..53a8377a1bb1 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_perf.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_PERF_H_ +#define _XE_PERF_H_ + +#include <linux/types.h> + +struct drm_device; +struct drm_file; + +extern u32 xe_perf_stream_paranoid; + +int xe_perf_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int xe_perf_sysctl_register(void); +void xe_perf_sysctl_unregister(void); + +#endif diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c index 5b243b7feb59..e8b8ae5c6485 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence.c +++ b/drivers/gpu/drm/xe/xe_preempt_fence.c @@ -129,7 +129,7 @@ xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_exec_queue *q, list_del_init(&pfence->link); pfence->q = xe_exec_queue_get(q); dma_fence_init(&pfence->base, &preempt_fence_ops, - &q->compute.lock, context, seqno); + &q->lr.lock, context, seqno); return &pfence->base; } diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index cd60c009b679..ade9e7a3a0ad 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1137,8 +1137,9 @@ static void invalidation_fence_cb(struct dma_fence *fence, { struct invalidation_fence *ifence = container_of(cb, struct invalidation_fence, cb); + struct xe_device *xe = gt_to_xe(ifence->gt); - trace_xe_gt_tlb_invalidation_fence_cb(&ifence->base); + trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence->base); if (!ifence->fence->error) { queue_work(system_wq, &ifence->work); } else { @@ -1153,8 +1154,9 @@ static void invalidation_fence_work_func(struct work_struct *w) { struct invalidation_fence *ifence = container_of(w, struct invalidation_fence, work); + struct xe_device *xe = gt_to_xe(ifence->gt); - trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base); + trace_xe_gt_tlb_invalidation_fence_work_func(xe, &ifence->base); xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start, ifence->end, ifence->asid); } @@ -1166,7 +1168,7 @@ static int invalidation_fence_init(struct xe_gt *gt, { int ret; - trace_xe_gt_tlb_invalidation_fence_create(&ifence->base); + trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base); spin_lock_irq(>->tlb_invalidation.lock); dma_fence_init(&ifence->base.base, &invalidation_fence_ops, diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 995effcb904b..4e01df6b1b7a 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -602,6 +602,82 @@ query_uc_fw_version(struct xe_device *xe, struct drm_xe_device_query *query) return 0; } +static size_t calc_oa_unit_query_size(struct xe_device *xe) +{ + size_t size = sizeof(struct drm_xe_query_oa_units); + struct xe_gt *gt; + int i, id; + + for_each_gt(gt, xe, id) { + for (i = 0; i < gt->oa.num_oa_units; i++) { + size += sizeof(struct drm_xe_oa_unit); + size += gt->oa.oa_unit[i].num_engines * + sizeof(struct drm_xe_engine_class_instance); + } + } + + return size; +} + +static int query_oa_units(struct xe_device *xe, + struct drm_xe_device_query *query) +{ + void __user *query_ptr = u64_to_user_ptr(query->data); + size_t size = calc_oa_unit_query_size(xe); + struct drm_xe_query_oa_units *qoa; + enum xe_hw_engine_id hwe_id; + struct drm_xe_oa_unit *du; + struct xe_hw_engine *hwe; + struct xe_oa_unit *u; + int gt_id, i, j, ret; + struct xe_gt *gt; + u8 *pdu; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_DBG(xe, query->size != size)) { + return -EINVAL; + } + + qoa = kzalloc(size, GFP_KERNEL); + if (!qoa) + return -ENOMEM; + + pdu = (u8 *)&qoa->oa_units[0]; + for_each_gt(gt, xe, gt_id) { + for (i = 0; i < gt->oa.num_oa_units; i++) { + u = >->oa.oa_unit[i]; + du = (struct drm_xe_oa_unit *)pdu; + + du->oa_unit_id = u->oa_unit_id; + du->oa_unit_type = u->type; + du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt); + du->capabilities = DRM_XE_OA_CAPS_BASE; + + j = 0; + for_each_hw_engine(hwe, gt, hwe_id) { + if (!xe_hw_engine_is_reserved(hwe) && + xe_oa_unit_id(hwe) == u->oa_unit_id) { + du->eci[j].engine_class = + xe_to_user_engine_class[hwe->class]; + du->eci[j].engine_instance = hwe->logical_instance; + du->eci[j].gt_id = gt->info.id; + j++; + } + } + du->num_engines = j; + pdu += sizeof(*du) + j * sizeof(du->eci[0]); + qoa->num_oa_units++; + } + } + + ret = copy_to_user(query_ptr, qoa, size); + kfree(qoa); + + return ret ? -EFAULT : 0; +} + static int (* const xe_query_funcs[])(struct xe_device *xe, struct drm_xe_device_query *query) = { query_engines, @@ -612,6 +688,7 @@ static int (* const xe_query_funcs[])(struct xe_device *xe, query_gt_topology, query_engine_cycles, query_uc_fw_version, + query_oa_units, }; int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 3fa2ece7d228..3996934974fa 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -7,6 +7,7 @@ #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" +#include "regs/xe_oa_regs.h" #include "regs/xe_regs.h" #include "xe_gt_types.h" #include "xe_platform_types.h" @@ -63,7 +64,28 @@ static const struct xe_rtp_entry_sr register_whitelist[] = { ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(WHITELIST(CSBE_DEBUG_STATUS(RENDER_RING_BASE), 0)) }, - + { XE_RTP_NAME("oa_reg_render"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED), + ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(WHITELIST(OAG_MMIOTRIGGER, + RING_FORCE_TO_NONPRIV_ACCESS_RW), + WHITELIST(OAG_OASTATUS, + RING_FORCE_TO_NONPRIV_ACCESS_RD), + WHITELIST(OAG_OAHEADPTR, + RING_FORCE_TO_NONPRIV_ACCESS_RD | + RING_FORCE_TO_NONPRIV_RANGE_4)) + }, + { XE_RTP_NAME("oa_reg_compute"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED), + ENGINE_CLASS(COMPUTE)), + XE_RTP_ACTIONS(WHITELIST(OAG_MMIOTRIGGER, + RING_FORCE_TO_NONPRIV_ACCESS_RW), + WHITELIST(OAG_OASTATUS, + RING_FORCE_TO_NONPRIV_ACCESS_RD), + WHITELIST(OAG_OAHEADPTR, + RING_FORCE_TO_NONPRIV_ACCESS_RD | + RING_FORCE_TO_NONPRIV_RANGE_4)) + }, {} }; diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index db630d27beba..0be4f489d3e1 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -224,6 +224,19 @@ static u32 get_ppgtt_flag(struct xe_sched_job *job) return job->q->vm ? BIT(8) : 0; } +static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i) +{ + dw[i++] = MI_COPY_MEM_MEM | MI_COPY_MEM_MEM_SRC_GGTT | + MI_COPY_MEM_MEM_DST_GGTT; + dw[i++] = xe_lrc_ctx_job_timestamp_ggtt_addr(lrc); + dw[i++] = 0; + dw[i++] = xe_lrc_ctx_timestamp_ggtt_addr(lrc); + dw[i++] = 0; + dw[i++] = MI_NOOP; + + return i; +} + /* for engines that don't require any special HW handling (no EUs, no aux inval, etc) */ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc, u64 batch_addr, u32 seqno) @@ -232,6 +245,8 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc u32 ppgtt_flag = get_ppgtt_flag(job); struct xe_gt *gt = job->q->gt; + i = emit_copy_timestamp(lrc, dw, i); + if (job->ring_ops_flush_tlb) { dw[i++] = preparser_disable(true); i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), @@ -283,6 +298,8 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, struct xe_device *xe = gt_to_xe(gt); bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE; + i = emit_copy_timestamp(lrc, dw, i); + dw[i++] = preparser_disable(true); /* hsdes: 1809175790 */ @@ -332,6 +349,8 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK); u32 mask_flags = 0; + i = emit_copy_timestamp(lrc, dw, i); + dw[i++] = preparser_disable(true); if (lacks_render) mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS; @@ -375,6 +394,8 @@ static void emit_migration_job_gen12(struct xe_sched_job *job, { u32 dw[MAX_JOB_SIZE_DW], i = 0; + i = emit_copy_timestamp(lrc, dw, i); + i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), seqno, dw, i); diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 01c32a932780..5b27f7c45ea3 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -13,6 +13,7 @@ #include "xe_gt_topology.h" #include "xe_macros.h" #include "xe_reg_sr.h" +#include "xe_sriov.h" /** * DOC: Register Table Processing @@ -35,11 +36,18 @@ static bool rule_matches(const struct xe_device *xe, unsigned int n_rules) { const struct xe_rtp_rule *r; - unsigned int i; + unsigned int i, rcount = 0; bool match; for (r = rules, i = 0; i < n_rules; r = &rules[++i]) { switch (r->match_type) { + case XE_RTP_MATCH_OR: + /* + * This is only reached if a complete set of + * rules passed or none were evaluated. For both cases, + * shortcut the other rules and return the proper value. + */ + goto done; case XE_RTP_MATCH_PLATFORM: match = xe->info.platform == r->platform; break; @@ -56,6 +64,9 @@ static bool rule_matches(const struct xe_device *xe, xe->info.graphics_verx100 <= r->ver_end && (!has_samedia(xe) || !xe_gt_is_media_type(gt)); break; + case XE_RTP_MATCH_GRAPHICS_VERSION_ANY_GT: + match = xe->info.graphics_verx100 == r->ver_start; + break; case XE_RTP_MATCH_GRAPHICS_STEP: match = xe->info.step.graphics >= r->step_start && xe->info.step.graphics < r->step_end && @@ -75,6 +86,9 @@ static bool rule_matches(const struct xe_device *xe, xe->info.step.media < r->step_end && (!has_samedia(xe) || xe_gt_is_media_type(gt)); break; + case XE_RTP_MATCH_MEDIA_VERSION_ANY_GT: + match = xe->info.media_verx100 == r->ver_start; + break; case XE_RTP_MATCH_INTEGRATED: match = !xe->info.is_dgfx; break; @@ -102,10 +116,27 @@ static bool rule_matches(const struct xe_device *xe, match = false; } - if (!match) - return false; + if (!match) { + /* + * Advance rules until we find XE_RTP_MATCH_OR to check + * if there's another set of conditions to check + */ + while (i < n_rules && rules[++i].match_type != XE_RTP_MATCH_OR) + ; + + if (i >= n_rules) + return false; + + rcount = 0; + } else { + rcount++; + } } +done: + if (drm_WARN_ON(&xe->drm, !rcount)) + return false; + return true; } @@ -227,6 +258,9 @@ void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, rtp_get_context(ctx, &hwe, >, &xe); + if (IS_SRIOV_VF(xe)) + return; + for (entry = entries; entry && entry->name; entry++) { bool match = false; @@ -324,8 +358,3 @@ bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, return dss >= dss_per_gslice; } -bool xe_rtp_match_when_media2000(const struct xe_gt *gt, - const struct xe_hw_engine *hwe) -{ - return (gt_to_xe(gt))->info.media_verx100 == 2000; -} diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index a32645f5f80b..ad446731192c 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -140,9 +140,23 @@ struct xe_reg_sr; .ver_start = ver_start__, .ver_end = ver_end__, } /** - * XE_RTP_RULE_MEDIA_VERSION - Create rule matching media version + * XE_RTP_RULE_GRAPHICS_VERSION_ANY_GT - Create rule matching graphics version on any GT * @ver__: Graphics IP version to match * + * Like XE_RTP_RULE_GRAPHICS_VERSION, but it matches even if the current GT + * being checked is not of the graphics type. It allows to add RTP entries to + * another GT when the device contains a Graphics IP with that version. + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_GRAPHICS_VERSION_ANY_GT(ver__) \ + { .match_type = XE_RTP_MATCH_GRAPHICS_VERSION_ANY_GT, \ + .ver_start = ver__, } + +/** + * XE_RTP_RULE_MEDIA_VERSION - Create rule matching media version + * @ver__: Media IP version to match + * * Refer to XE_RTP_RULES() for expected usage. */ #define XE_RTP_RULE_MEDIA_VERSION(ver__) \ @@ -164,6 +178,20 @@ struct xe_reg_sr; .ver_start = ver_start__, .ver_end = ver_end__, } /** + * XE_RTP_RULE_MEDIA_VERSION_ANY_GT - Create rule matching media version on any GT + * @ver__: Media IP version to match + * + * Like XE_RTP_RULE_MEDIA_VERSION, but it matches even if the current GT being + * checked is not of the media type. It allows to add RTP entries to another + * GT when the device contains a Media IP with that version. + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_MEDIA_VERSION_ANY_GT(ver__) \ + { .match_type = XE_RTP_MATCH_MEDIA_VERSION_ANY_GT, \ + .ver_start = ver__, } + +/** * XE_RTP_RULE_IS_INTEGRATED - Create a rule matching integrated graphics devices * * Refer to XE_RTP_RULES() for expected usage. @@ -180,6 +208,27 @@ struct xe_reg_sr; { .match_type = XE_RTP_MATCH_DISCRETE } /** + * XE_RTP_RULE_OR - Create an OR condition for rtp rules + * + * RTP rules are AND'ed when evaluated and all of them need to match. + * XE_RTP_RULE_OR allows to create set of rules where any of them matching is + * sufficient for the action to trigger. Example: + * + * .. code-block:: c + * + * const struct xe_rtp_entry_sr entries[] = { + * ... + * { XE_RTP_NAME("test-entry"), + * XE_RTP_RULES(PLATFORM(DG2), OR, PLATFORM(TIGERLAKE)), + * ... + * }, + * ... + * }; + */ +#define XE_RTP_RULE_OR \ + { .match_type = XE_RTP_MATCH_OR } + +/** * XE_RTP_ACTION_WR - Helper to write a value to the register, overriding all * the bits * @reg_: Register @@ -325,7 +374,7 @@ struct xe_reg_sr; * XE_RTP_RULES - Helper to set multiple rules to a struct xe_rtp_entry_sr entry * @...: Rules * - * At least one rule is needed and up to 4 are supported. Multiple rules are + * At least one rule is needed and up to 6 are supported. Multiple rules are * AND'ed together, i.e. all the rules must evaluate to true for the entry to * be processed. See XE_RTP_MATCH_* for the possible match rules. Example: * @@ -350,7 +399,7 @@ struct xe_reg_sr; * XE_RTP_ACTIONS - Helper to set multiple actions to a struct xe_rtp_entry_sr * @...: Actions to be taken * - * At least one action is needed and up to 4 are supported. See XE_RTP_ACTION_* + * At least one action is needed and up to 6 are supported. See XE_RTP_ACTION_* * for the possible actions. Example: * * .. code-block:: c @@ -427,18 +476,4 @@ bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, const struct xe_hw_engine *hwe); -/* - * xe_rtp_match_when_media2000 - Match when media GT version 2000 - * - * @gt: GT structure - * @hwe: Engine instance - * - * Its one of the case where we need to apply workaround on primary GT - * based on if media GT version 2000 is present. Thus this API will help - * us to match media version 2000. - * - * Returns: true if media GT version 2000, false otherwise. - */ -bool xe_rtp_match_when_media2000(const struct xe_gt *gt, - const struct xe_hw_engine *hwe); #endif diff --git a/drivers/gpu/drm/xe/xe_rtp_helpers.h b/drivers/gpu/drm/xe/xe_rtp_helpers.h index 7735f217ba71..c59e40fd7fff 100644 --- a/drivers/gpu/drm/xe/xe_rtp_helpers.h +++ b/drivers/gpu/drm/xe/xe_rtp_helpers.h @@ -58,6 +58,8 @@ #define XE_RTP_PASTE_2(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_1(prefix_, sep_, _XE_TUPLE_TAIL args_) #define XE_RTP_PASTE_3(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_2(prefix_, sep_, _XE_TUPLE_TAIL args_) #define XE_RTP_PASTE_4(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL args_) +#define XE_RTP_PASTE_5(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_4(prefix_, sep_, _XE_TUPLE_TAIL args_) +#define XE_RTP_PASTE_6(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_5(prefix_, sep_, _XE_TUPLE_TAIL args_) /* * XE_RTP_DROP_CAST - Drop cast to convert a compound statement to a initializer diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h index 637acc7626a4..1b76b947c706 100644 --- a/drivers/gpu/drm/xe/xe_rtp_types.h +++ b/drivers/gpu/drm/xe/xe_rtp_types.h @@ -42,15 +42,18 @@ enum { XE_RTP_MATCH_SUBPLATFORM, XE_RTP_MATCH_GRAPHICS_VERSION, XE_RTP_MATCH_GRAPHICS_VERSION_RANGE, + XE_RTP_MATCH_GRAPHICS_VERSION_ANY_GT, XE_RTP_MATCH_GRAPHICS_STEP, XE_RTP_MATCH_MEDIA_VERSION, XE_RTP_MATCH_MEDIA_VERSION_RANGE, + XE_RTP_MATCH_MEDIA_VERSION_ANY_GT, XE_RTP_MATCH_MEDIA_STEP, XE_RTP_MATCH_INTEGRATED, XE_RTP_MATCH_DISCRETE, XE_RTP_MATCH_ENGINE_CLASS, XE_RTP_MATCH_NOT_ENGINE_CLASS, XE_RTP_MATCH_FUNC, + XE_RTP_MATCH_OR, }; /** struct xe_rtp_rule - match rule for processing entry */ diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 5c013904877a..44d534e362cd 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -363,3 +363,9 @@ xe_sched_job_snapshot_print(struct xe_sched_job_snapshot *snapshot, for (i = 0; i < snapshot->batch_addr_len; i++) drm_printf(p, "batch_addr[%u]: 0x%016llx\n", i, snapshot->batch_addr[i]); } + +int xe_sched_job_add_deps(struct xe_sched_job *job, struct dma_resv *resv, + enum dma_resv_usage usage) +{ + return drm_sched_job_add_resv_dependencies(&job->drm, resv, usage); +} diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h index f362e28455db..3dc72c5c1f13 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.h +++ b/drivers/gpu/drm/xe/xe_sched_job.h @@ -90,4 +90,7 @@ struct xe_sched_job_snapshot *xe_sched_job_snapshot_capture(struct xe_sched_job void xe_sched_job_snapshot_free(struct xe_sched_job_snapshot *snapshot); void xe_sched_job_snapshot_print(struct xe_sched_job_snapshot *snapshot, struct drm_printer *p); +int xe_sched_job_add_deps(struct xe_sched_job *job, struct dma_resv *resv, + enum dma_resv_usage usage); + #endif diff --git a/drivers/gpu/drm/xe/xe_sriov.h b/drivers/gpu/drm/xe/xe_sriov.h index 486bb21c3256..688fbabf08f1 100644 --- a/drivers/gpu/drm/xe/xe_sriov.h +++ b/drivers/gpu/drm/xe/xe_sriov.h @@ -19,18 +19,18 @@ void xe_sriov_probe_early(struct xe_device *xe); void xe_sriov_print_info(struct xe_device *xe, struct drm_printer *p); int xe_sriov_init(struct xe_device *xe); -static inline enum xe_sriov_mode xe_device_sriov_mode(struct xe_device *xe) +static inline enum xe_sriov_mode xe_device_sriov_mode(const struct xe_device *xe) { xe_assert(xe, xe->sriov.__mode); return xe->sriov.__mode; } -static inline bool xe_device_is_sriov_pf(struct xe_device *xe) +static inline bool xe_device_is_sriov_pf(const struct xe_device *xe) { return xe_device_sriov_mode(xe) == XE_SRIOV_MODE_PF; } -static inline bool xe_device_is_sriov_vf(struct xe_device *xe) +static inline bool xe_device_is_sriov_vf(const struct xe_device *xe) { return xe_device_sriov_mode(xe) == XE_SRIOV_MODE_VF; } diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index e4cba64474e6..09ca1ad057b0 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -12,8 +12,6 @@ #include <linux/tracepoint.h> #include <linux/types.h> -#include "xe_bo.h" -#include "xe_bo_types.h" #include "xe_exec_queue_types.h" #include "xe_gpu_scheduler_types.h" #include "xe_gt_tlb_invalidation_types.h" @@ -22,110 +20,64 @@ #include "xe_sched_job.h" #include "xe_vm.h" +#define __dev_name_xe(xe) dev_name((xe)->drm.dev) +#define __dev_name_gt(gt) __dev_name_xe(gt_to_xe((gt))) +#define __dev_name_eq(q) __dev_name_gt((q)->gt) + DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence, - TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), - TP_ARGS(fence), + TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(xe, fence), TP_STRUCT__entry( + __string(dev, __dev_name_xe(xe)) __field(struct xe_gt_tlb_invalidation_fence *, fence) __field(int, seqno) ), TP_fast_assign( + __assign_str(dev); __entry->fence = fence; __entry->seqno = fence->seqno; ), - TP_printk("fence=%p, seqno=%d", - __entry->fence, __entry->seqno) + TP_printk("dev=%s, fence=%p, seqno=%d", + __get_str(dev), __entry->fence, __entry->seqno) ); DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_create, - TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), - TP_ARGS(fence) + TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(xe, fence) ); DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_work_func, - TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), - TP_ARGS(fence) + TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(xe, fence) ); DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_cb, - TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), - TP_ARGS(fence) + TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(xe, fence) ); DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_send, - TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), - TP_ARGS(fence) + TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(xe, fence) ); DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_recv, - TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), - TP_ARGS(fence) + TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(xe, fence) ); DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_signal, - TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), - TP_ARGS(fence) + TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(xe, fence) ); DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_timeout, - TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), - TP_ARGS(fence) -); - -DECLARE_EVENT_CLASS(xe_bo, - TP_PROTO(struct xe_bo *bo), - TP_ARGS(bo), - - TP_STRUCT__entry( - __field(size_t, size) - __field(u32, flags) - __field(struct xe_vm *, vm) - ), - - TP_fast_assign( - __entry->size = bo->size; - __entry->flags = bo->flags; - __entry->vm = bo->vm; - ), - - TP_printk("size=%zu, flags=0x%02x, vm=%p", - __entry->size, __entry->flags, __entry->vm) -); - -DEFINE_EVENT(xe_bo, xe_bo_cpu_fault, - TP_PROTO(struct xe_bo *bo), - TP_ARGS(bo) -); - -TRACE_EVENT(xe_bo_move, - TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement, - bool move_lacks_source), - TP_ARGS(bo, new_placement, old_placement, move_lacks_source), - TP_STRUCT__entry( - __field(struct xe_bo *, bo) - __field(size_t, size) - __field(u32, new_placement) - __field(u32, old_placement) - __array(char, device_id, 12) - __field(bool, move_lacks_source) - ), - - TP_fast_assign( - __entry->bo = bo; - __entry->size = bo->size; - __entry->new_placement = new_placement; - __entry->old_placement = old_placement; - strscpy(__entry->device_id, dev_name(xe_bo_device(__entry->bo)->drm.dev), 12); - __entry->move_lacks_source = move_lacks_source; - ), - TP_printk("move_lacks_source:%s, migrate object %p [size %zu] from %s to %s device_id:%s", - __entry->move_lacks_source ? "yes" : "no", __entry->bo, __entry->size, - xe_mem_type_to_name[__entry->old_placement], - xe_mem_type_to_name[__entry->new_placement], __entry->device_id) + TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(xe, fence) ); DECLARE_EVENT_CLASS(xe_exec_queue, @@ -133,6 +85,7 @@ DECLARE_EVENT_CLASS(xe_exec_queue, TP_ARGS(q), TP_STRUCT__entry( + __string(dev, __dev_name_eq(q)) __field(enum xe_engine_class, class) __field(u32, logical_mask) __field(u8, gt_id) @@ -143,6 +96,7 @@ DECLARE_EVENT_CLASS(xe_exec_queue, ), TP_fast_assign( + __assign_str(dev); __entry->class = q->class; __entry->logical_mask = q->logical_mask; __entry->gt_id = q->gt->info.id; @@ -152,8 +106,8 @@ DECLARE_EVENT_CLASS(xe_exec_queue, __entry->flags = q->flags; ), - TP_printk("%d:0x%x, gt=%d, width=%d, guc_id=%d, guc_state=0x%x, flags=0x%x", - __entry->class, __entry->logical_mask, + TP_printk("dev=%s, %d:0x%x, gt=%d, width=%d, guc_id=%d, guc_state=0x%x, flags=0x%x", + __get_str(dev), __entry->class, __entry->logical_mask, __entry->gt_id, __entry->width, __entry->guc_id, __entry->guc_state, __entry->flags) ); @@ -253,6 +207,7 @@ DECLARE_EVENT_CLASS(xe_sched_job, TP_ARGS(job), TP_STRUCT__entry( + __string(dev, __dev_name_eq(job->q)) __field(u32, seqno) __field(u32, lrc_seqno) __field(u16, guc_id) @@ -264,6 +219,7 @@ DECLARE_EVENT_CLASS(xe_sched_job, ), TP_fast_assign( + __assign_str(dev); __entry->seqno = xe_sched_job_seqno(job); __entry->lrc_seqno = xe_sched_job_lrc_seqno(job); __entry->guc_id = job->q->guc->id; @@ -275,8 +231,8 @@ DECLARE_EVENT_CLASS(xe_sched_job, __entry->batch_addr = (u64)job->ptrs[0].batch_addr; ), - TP_printk("fence=%p, seqno=%u, lrc_seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d", - __entry->fence, __entry->seqno, + TP_printk("dev=%s, fence=%p, seqno=%u, lrc_seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d", + __get_str(dev), __entry->fence, __entry->seqno, __entry->lrc_seqno, __entry->guc_id, __entry->batch_addr, __entry->guc_state, __entry->flags, __entry->error) @@ -322,17 +278,19 @@ DECLARE_EVENT_CLASS(xe_sched_msg, TP_ARGS(msg), TP_STRUCT__entry( + __string(dev, __dev_name_eq(((struct xe_exec_queue *)msg->private_data))) __field(u32, opcode) __field(u16, guc_id) ), TP_fast_assign( + __assign_str(dev); __entry->opcode = msg->opcode; __entry->guc_id = ((struct xe_exec_queue *)msg->private_data)->guc->id; ), - TP_printk("guc_id=%d, opcode=%u", __entry->guc_id, + TP_printk("dev=%s, guc_id=%d, opcode=%u", __get_str(dev), __entry->guc_id, __entry->opcode) ); @@ -351,19 +309,21 @@ DECLARE_EVENT_CLASS(xe_hw_fence, TP_ARGS(fence), TP_STRUCT__entry( + __string(dev, __dev_name_gt(fence->ctx->gt)) __field(u64, ctx) __field(u32, seqno) __field(struct xe_hw_fence *, fence) ), TP_fast_assign( + __assign_str(dev); __entry->ctx = fence->dma.context; __entry->seqno = fence->dma.seqno; __entry->fence = fence; ), - TP_printk("ctx=0x%016llx, fence=%p, seqno=%u", - __entry->ctx, __entry->fence, __entry->seqno) + TP_printk("dev=%s, ctx=0x%016llx, fence=%p, seqno=%u", + __get_str(dev), __entry->ctx, __entry->fence, __entry->seqno) ); DEFINE_EVENT(xe_hw_fence, xe_hw_fence_create, @@ -386,242 +346,32 @@ DEFINE_EVENT(xe_hw_fence, xe_hw_fence_free, TP_ARGS(fence) ); -DECLARE_EVENT_CLASS(xe_vma, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma), - - TP_STRUCT__entry( - __field(struct xe_vma *, vma) - __field(u32, asid) - __field(u64, start) - __field(u64, end) - __field(u64, ptr) - ), - - TP_fast_assign( - __entry->vma = vma; - __entry->asid = xe_vma_vm(vma)->usm.asid; - __entry->start = xe_vma_start(vma); - __entry->end = xe_vma_end(vma) - 1; - __entry->ptr = xe_vma_userptr(vma); - ), - - TP_printk("vma=%p, asid=0x%05x, start=0x%012llx, end=0x%012llx, userptr=0x%012llx,", - __entry->vma, __entry->asid, __entry->start, - __entry->end, __entry->ptr) -) - -DEFINE_EVENT(xe_vma, xe_vma_flush, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DEFINE_EVENT(xe_vma, xe_vma_pagefault, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DEFINE_EVENT(xe_vma, xe_vma_acc, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DEFINE_EVENT(xe_vma, xe_vma_fail, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DEFINE_EVENT(xe_vma, xe_vma_bind, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DEFINE_EVENT(xe_vma, xe_vma_pf_bind, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DEFINE_EVENT(xe_vma, xe_vma_unbind, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_worker, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_exec, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DEFINE_EVENT(xe_vma, xe_vma_rebind_worker, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DEFINE_EVENT(xe_vma, xe_vma_rebind_exec, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DEFINE_EVENT(xe_vma, xe_vma_invalidate, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DEFINE_EVENT(xe_vma, xe_vma_evict, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate_complete, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DECLARE_EVENT_CLASS(xe_vm, - TP_PROTO(struct xe_vm *vm), - TP_ARGS(vm), - - TP_STRUCT__entry( - __field(struct xe_vm *, vm) - __field(u32, asid) - ), - - TP_fast_assign( - __entry->vm = vm; - __entry->asid = vm->usm.asid; - ), - - TP_printk("vm=%p, asid=0x%05x", __entry->vm, - __entry->asid) -); - -DEFINE_EVENT(xe_vm, xe_vm_kill, - TP_PROTO(struct xe_vm *vm), - TP_ARGS(vm) -); - -DEFINE_EVENT(xe_vm, xe_vm_create, - TP_PROTO(struct xe_vm *vm), - TP_ARGS(vm) -); - -DEFINE_EVENT(xe_vm, xe_vm_free, - TP_PROTO(struct xe_vm *vm), - TP_ARGS(vm) -); - -DEFINE_EVENT(xe_vm, xe_vm_cpu_bind, - TP_PROTO(struct xe_vm *vm), - TP_ARGS(vm) -); - -DEFINE_EVENT(xe_vm, xe_vm_restart, - TP_PROTO(struct xe_vm *vm), - TP_ARGS(vm) -); - -DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_enter, - TP_PROTO(struct xe_vm *vm), - TP_ARGS(vm) -); - -DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_retry, - TP_PROTO(struct xe_vm *vm), - TP_ARGS(vm) -); - -DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_exit, - TP_PROTO(struct xe_vm *vm), - TP_ARGS(vm) -); - -/* GuC */ -DECLARE_EVENT_CLASS(xe_guc_ct_flow_control, - TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), - TP_ARGS(_head, _tail, size, space, len), - - TP_STRUCT__entry( - __field(u32, _head) - __field(u32, _tail) - __field(u32, size) - __field(u32, space) - __field(u32, len) - ), - - TP_fast_assign( - __entry->_head = _head; - __entry->_tail = _tail; - __entry->size = size; - __entry->space = space; - __entry->len = len; - ), - - TP_printk("h2g flow control: head=%u, tail=%u, size=%u, space=%u, len=%u", - __entry->_head, __entry->_tail, __entry->size, - __entry->space, __entry->len) -); - -DEFINE_EVENT(xe_guc_ct_flow_control, xe_guc_ct_h2g_flow_control, - TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), - TP_ARGS(_head, _tail, size, space, len) -); - -DEFINE_EVENT_PRINT(xe_guc_ct_flow_control, xe_guc_ct_g2h_flow_control, - TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), - TP_ARGS(_head, _tail, size, space, len), - - TP_printk("g2h flow control: head=%u, tail=%u, size=%u, space=%u, len=%u", - __entry->_head, __entry->_tail, __entry->size, - __entry->space, __entry->len) -); - -DECLARE_EVENT_CLASS(xe_guc_ctb, - TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), - TP_ARGS(gt_id, action, len, _head, tail), - - TP_STRUCT__entry( - __field(u8, gt_id) - __field(u32, action) - __field(u32, len) - __field(u32, tail) - __field(u32, _head) - ), - - TP_fast_assign( - __entry->gt_id = gt_id; - __entry->action = action; - __entry->len = len; - __entry->tail = tail; - __entry->_head = _head; - ), - - TP_printk("gt%d: H2G CTB: action=0x%x, len=%d, tail=%d, head=%d\n", - __entry->gt_id, __entry->action, __entry->len, - __entry->tail, __entry->_head) -); - -DEFINE_EVENT(xe_guc_ctb, xe_guc_ctb_h2g, - TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), - TP_ARGS(gt_id, action, len, _head, tail) -); - -DEFINE_EVENT_PRINT(xe_guc_ctb, xe_guc_ctb_g2h, - TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), - TP_ARGS(gt_id, action, len, _head, tail), - - TP_printk("gt%d: G2H CTB: action=0x%x, len=%d, tail=%d, head=%d\n", - __entry->gt_id, __entry->action, __entry->len, - __entry->tail, __entry->_head) - +TRACE_EVENT(xe_reg_rw, + TP_PROTO(struct xe_gt *gt, bool write, u32 reg, u64 val, int len), + + TP_ARGS(gt, write, reg, val, len), + + TP_STRUCT__entry( + __string(dev, __dev_name_gt(gt)) + __field(u64, val) + __field(u32, reg) + __field(u16, write) + __field(u16, len) + ), + + TP_fast_assign( + __assign_str(dev); + __entry->val = val; + __entry->reg = reg; + __entry->write = write; + __entry->len = len; + ), + + TP_printk("dev=%s, %s reg=0x%x, len=%d, val=(0x%x, 0x%x)", + __get_str(dev), __entry->write ? "write" : "read", + __entry->reg, __entry->len, + (u32)(__entry->val & 0xffffffff), + (u32)(__entry->val >> 32)) ); #endif diff --git a/drivers/gpu/drm/xe/xe_trace_bo.c b/drivers/gpu/drm/xe/xe_trace_bo.c new file mode 100644 index 000000000000..6d5e66ce4c50 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_trace_bo.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "xe_trace_bo.h" +#endif diff --git a/drivers/gpu/drm/xe/xe_trace_bo.h b/drivers/gpu/drm/xe/xe_trace_bo.h new file mode 100644 index 000000000000..f39f09ed3495 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_trace_bo.h @@ -0,0 +1,247 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright © 2024 Intel Corporation + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM xe + +#if !defined(_XE_TRACE_BO_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _XE_TRACE_BO_H_ + +#include <linux/tracepoint.h> +#include <linux/types.h> + +#include "xe_bo.h" +#include "xe_bo_types.h" +#include "xe_vm.h" + +#define __dev_name_bo(bo) dev_name(xe_bo_device(bo)->drm.dev) +#define __dev_name_vm(vm) dev_name((vm)->xe->drm.dev) +#define __dev_name_vma(vma) __dev_name_vm(xe_vma_vm(vma)) + +DECLARE_EVENT_CLASS(xe_bo, + TP_PROTO(struct xe_bo *bo), + TP_ARGS(bo), + + TP_STRUCT__entry( + __string(dev, __dev_name_bo(bo)) + __field(size_t, size) + __field(u32, flags) + __field(struct xe_vm *, vm) + ), + + TP_fast_assign( + __assign_str(dev); + __entry->size = bo->size; + __entry->flags = bo->flags; + __entry->vm = bo->vm; + ), + + TP_printk("dev=%s, size=%zu, flags=0x%02x, vm=%p", + __get_str(dev), __entry->size, + __entry->flags, __entry->vm) +); + +DEFINE_EVENT(xe_bo, xe_bo_cpu_fault, + TP_PROTO(struct xe_bo *bo), + TP_ARGS(bo) +); + +TRACE_EVENT(xe_bo_move, + TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement, + bool move_lacks_source), + TP_ARGS(bo, new_placement, old_placement, move_lacks_source), + TP_STRUCT__entry( + __field(struct xe_bo *, bo) + __field(size_t, size) + __field(u32, new_placement) + __field(u32, old_placement) + __string(device_id, __dev_name_bo(bo)) + __field(bool, move_lacks_source) + ), + + TP_fast_assign( + __entry->bo = bo; + __entry->size = bo->size; + __entry->new_placement = new_placement; + __entry->old_placement = old_placement; + __assign_str(device_id); + __entry->move_lacks_source = move_lacks_source; + ), + TP_printk("move_lacks_source:%s, migrate object %p [size %zu] from %s to %s device_id:%s", + __entry->move_lacks_source ? "yes" : "no", __entry->bo, __entry->size, + xe_mem_type_to_name[__entry->old_placement], + xe_mem_type_to_name[__entry->new_placement], __get_str(device_id)) +); + +DECLARE_EVENT_CLASS(xe_vma, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma), + + TP_STRUCT__entry( + __string(dev, __dev_name_vma(vma)) + __field(struct xe_vma *, vma) + __field(u32, asid) + __field(u64, start) + __field(u64, end) + __field(u64, ptr) + ), + + TP_fast_assign( + __assign_str(dev); + __entry->vma = vma; + __entry->asid = xe_vma_vm(vma)->usm.asid; + __entry->start = xe_vma_start(vma); + __entry->end = xe_vma_end(vma) - 1; + __entry->ptr = xe_vma_userptr(vma); + ), + + TP_printk("dev=%s, vma=%p, asid=0x%05x, start=0x%012llx, end=0x%012llx, userptr=0x%012llx,", + __get_str(dev), __entry->vma, __entry->asid, __entry->start, + __entry->end, __entry->ptr) +) + +DEFINE_EVENT(xe_vma, xe_vma_flush, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_pagefault, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_acc, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_fail, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_bind, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_pf_bind, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_unbind, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_worker, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_exec, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_rebind_worker, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_rebind_exec, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_invalidate, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_evict, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate_complete, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DECLARE_EVENT_CLASS(xe_vm, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm), + + TP_STRUCT__entry( + __string(dev, __dev_name_vm(vm)) + __field(struct xe_vm *, vm) + __field(u32, asid) + ), + + TP_fast_assign( + __assign_str(dev); + __entry->vm = vm; + __entry->asid = vm->usm.asid; + ), + + TP_printk("dev=%s, vm=%p, asid=0x%05x", __get_str(dev), + __entry->vm, __entry->asid) +); + +DEFINE_EVENT(xe_vm, xe_vm_kill, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_create, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_free, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_cpu_bind, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_restart, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_enter, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_retry, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_exit, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +#endif + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/xe +#define TRACE_INCLUDE_FILE xe_trace_bo +#include <trace/define_trace.h> diff --git a/drivers/gpu/drm/xe/xe_trace_guc.c b/drivers/gpu/drm/xe/xe_trace_guc.c new file mode 100644 index 000000000000..fcdf6888ff2f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_trace_guc.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "xe_trace_guc.h" +#endif diff --git a/drivers/gpu/drm/xe/xe_trace_guc.h b/drivers/gpu/drm/xe/xe_trace_guc.h new file mode 100644 index 000000000000..23abdd55dc62 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_trace_guc.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright © 2024 Intel Corporation + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM xe + +#if !defined(_XE_TRACE_GUC_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _XE_TRACE_GUC_H_ + +#include <linux/tracepoint.h> +#include <linux/types.h> + +#include "xe_device_types.h" +#include "xe_guc_exec_queue_types.h" + +#define __dev_name_xe(xe) dev_name((xe)->drm.dev) + +DECLARE_EVENT_CLASS(xe_guc_ct_flow_control, + TP_PROTO(struct xe_device *xe, u32 _head, u32 _tail, u32 size, u32 space, u32 len), + TP_ARGS(xe, _head, _tail, size, space, len), + + TP_STRUCT__entry( + __string(dev, __dev_name_xe(xe)) + __field(u32, _head) + __field(u32, _tail) + __field(u32, size) + __field(u32, space) + __field(u32, len) + ), + + TP_fast_assign( + __assign_str(dev); + __entry->_head = _head; + __entry->_tail = _tail; + __entry->size = size; + __entry->space = space; + __entry->len = len; + ), + + TP_printk("h2g flow control: dev=%s, head=%u, tail=%u, size=%u, space=%u, len=%u", + __get_str(dev), __entry->_head, __entry->_tail, __entry->size, + __entry->space, __entry->len) +); + +DEFINE_EVENT(xe_guc_ct_flow_control, xe_guc_ct_h2g_flow_control, + TP_PROTO(struct xe_device *xe, u32 _head, u32 _tail, u32 size, u32 space, u32 len), + TP_ARGS(xe, _head, _tail, size, space, len) +); + +DEFINE_EVENT_PRINT(xe_guc_ct_flow_control, xe_guc_ct_g2h_flow_control, + TP_PROTO(struct xe_device *xe, u32 _head, u32 _tail, u32 size, u32 space, u32 len), + TP_ARGS(xe, _head, _tail, size, space, len), + + TP_printk("g2h flow control: dev=%s, head=%u, tail=%u, size=%u, space=%u, len=%u", + __get_str(dev), __entry->_head, __entry->_tail, __entry->size, + __entry->space, __entry->len) +); + +DECLARE_EVENT_CLASS(xe_guc_ctb, + TP_PROTO(struct xe_device *xe, u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), + TP_ARGS(xe, gt_id, action, len, _head, tail), + + TP_STRUCT__entry( + __string(dev, __dev_name_xe(xe)) + __field(u8, gt_id) + __field(u32, action) + __field(u32, len) + __field(u32, tail) + __field(u32, _head) + ), + + TP_fast_assign( + __assign_str(dev); + __entry->gt_id = gt_id; + __entry->action = action; + __entry->len = len; + __entry->tail = tail; + __entry->_head = _head; + ), + + TP_printk("H2G CTB: dev=%s, gt%d: action=0x%x, len=%d, tail=%d, head=%d\n", + __get_str(dev), __entry->gt_id, __entry->action, __entry->len, + __entry->tail, __entry->_head) +); + +DEFINE_EVENT(xe_guc_ctb, xe_guc_ctb_h2g, + TP_PROTO(struct xe_device *xe, u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), + TP_ARGS(xe, gt_id, action, len, _head, tail) +); + +DEFINE_EVENT_PRINT(xe_guc_ctb, xe_guc_ctb_g2h, + TP_PROTO(struct xe_device *xe, u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), + TP_ARGS(xe, gt_id, action, len, _head, tail), + + TP_printk("G2H CTB: dev=%s, gt%d: action=0x%x, len=%d, tail=%d, head=%d\n", + __get_str(dev), __entry->gt_id, __entry->action, __entry->len, + __entry->tail, __entry->_head) + +); + +#endif + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/xe +#define TRACE_INCLUDE_FILE xe_trace_guc +#include <trace/define_trace.h> diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h index 35078038797e..c108e9d08e70 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.h +++ b/drivers/gpu/drm/xe/xe_uc_fw.h @@ -158,7 +158,7 @@ static inline bool xe_uc_fw_is_overridden(const struct xe_uc_fw *uc_fw) static inline void xe_uc_fw_sanitize(struct xe_uc_fw *uc_fw) { - if (xe_uc_fw_is_loaded(uc_fw)) + if (xe_uc_fw_is_loadable(uc_fw)) xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_LOADABLE); } diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 99bf7412475c..5b166fa03684 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -36,7 +36,7 @@ #include "xe_pt.h" #include "xe_res_cursor.h" #include "xe_sync.h" -#include "xe_trace.h" +#include "xe_trace_bo.h" #include "xe_wa.h" #include "xe_hmm.h" @@ -83,10 +83,10 @@ static bool preempt_fences_waiting(struct xe_vm *vm) lockdep_assert_held(&vm->lock); xe_vm_assert_held(vm); - list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { - if (!q->compute.pfence || + list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { + if (!q->lr.pfence || test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, - &q->compute.pfence->flags)) { + &q->lr.pfence->flags)) { return true; } } @@ -129,14 +129,14 @@ static int wait_for_existing_preempt_fences(struct xe_vm *vm) xe_vm_assert_held(vm); - list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { - if (q->compute.pfence) { - long timeout = dma_fence_wait(q->compute.pfence, false); + list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { + if (q->lr.pfence) { + long timeout = dma_fence_wait(q->lr.pfence, false); if (timeout < 0) return -ETIME; - dma_fence_put(q->compute.pfence); - q->compute.pfence = NULL; + dma_fence_put(q->lr.pfence); + q->lr.pfence = NULL; } } @@ -148,7 +148,7 @@ static bool xe_vm_is_idle(struct xe_vm *vm) struct xe_exec_queue *q; xe_vm_assert_held(vm); - list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { + list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { if (!xe_exec_queue_is_idle(q)) return false; } @@ -161,17 +161,17 @@ static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) struct list_head *link; struct xe_exec_queue *q; - list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { + list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { struct dma_fence *fence; link = list->next; xe_assert(vm->xe, link != list); fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), - q, q->compute.context, - ++q->compute.seqno); - dma_fence_put(q->compute.pfence); - q->compute.pfence = fence; + q, q->lr.context, + ++q->lr.seqno); + dma_fence_put(q->lr.pfence); + q->lr.pfence = fence; } } @@ -180,27 +180,23 @@ static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) struct xe_exec_queue *q; int err; + xe_bo_assert_held(bo); + if (!vm->preempt.num_exec_queues) return 0; - err = xe_bo_lock(bo, true); - if (err) - return err; - err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); if (err) - goto out_unlock; + return err; - list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) - if (q->compute.pfence) { + list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) + if (q->lr.pfence) { dma_resv_add_fence(bo->ttm.base.resv, - q->compute.pfence, + q->lr.pfence, DMA_RESV_USAGE_BOOKKEEP); } -out_unlock: - xe_bo_unlock(bo); - return err; + return 0; } static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, @@ -211,10 +207,10 @@ static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, lockdep_assert_held(&vm->lock); xe_vm_assert_held(vm); - list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { + list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { q->ops->resume(q); - drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->compute.pfence, + drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence, DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); } } @@ -238,16 +234,16 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) if (err) goto out_up_write; - pfence = xe_preempt_fence_create(q, q->compute.context, - ++q->compute.seqno); + pfence = xe_preempt_fence_create(q, q->lr.context, + ++q->lr.seqno); if (!pfence) { err = -ENOMEM; goto out_fini; } - list_add(&q->compute.link, &vm->preempt.exec_queues); + list_add(&q->lr.link, &vm->preempt.exec_queues); ++vm->preempt.num_exec_queues; - q->compute.pfence = pfence; + q->lr.pfence = pfence; down_read(&vm->userptr.notifier_lock); @@ -284,12 +280,12 @@ void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) return; down_write(&vm->lock); - list_del(&q->compute.link); + list_del(&q->lr.link); --vm->preempt.num_exec_queues; - if (q->compute.pfence) { - dma_fence_enable_sw_signaling(q->compute.pfence); - dma_fence_put(q->compute.pfence); - q->compute.pfence = NULL; + if (q->lr.pfence) { + dma_fence_enable_sw_signaling(q->lr.pfence); + dma_fence_put(q->lr.pfence); + q->lr.pfence = NULL; } up_write(&vm->lock); } @@ -327,7 +323,7 @@ static void xe_vm_kill(struct xe_vm *vm, bool unlocked) vm->flags |= XE_VM_FLAG_BANNED; trace_xe_vm_kill(vm); - list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) + list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) q->ops->kill(q); if (unlocked) @@ -2140,7 +2136,7 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; struct drm_exec exec; struct xe_vma *vma; - int err; + int err = 0; lockdep_assert_held_write(&vm->lock); @@ -2165,23 +2161,22 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, vma = xe_vma_create(vm, bo, op->gem.offset, op->va.addr, op->va.addr + op->va.range - 1, pat_index, flags); - if (bo) - drm_exec_fini(&exec); + if (IS_ERR(vma)) + goto err_unlock; - if (xe_vma_is_userptr(vma)) { + if (xe_vma_is_userptr(vma)) err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); - if (err) { - prep_vma_destroy(vm, vma, false); - xe_vma_destroy_unlocked(vma); - return ERR_PTR(err); - } - } else if (!xe_vma_has_no_bo(vma) && !bo->vm) { + else if (!xe_vma_has_no_bo(vma) && !bo->vm) err = add_preempt_fences(vm, bo); - if (err) { - prep_vma_destroy(vm, vma, false); - xe_vma_destroy_unlocked(vma); - return ERR_PTR(err); - } + +err_unlock: + if (bo) + drm_exec_fini(&exec); + + if (err) { + prep_vma_destroy(vm, vma, false); + xe_vma_destroy_unlocked(vma); + vma = ERR_PTR(err); } return vma; diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 26b170a0cdc7..c7bf0862b231 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -21,6 +21,7 @@ #include "xe_mmio.h" #include "xe_platform_types.h" #include "xe_rtp.h" +#include "xe_sriov.h" #include "xe_step.h" /** @@ -629,7 +630,7 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE)) }, { XE_RTP_NAME("14019877138"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) }, @@ -678,9 +679,19 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP)) }, { XE_RTP_NAME("14020756599"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER), OR, + MEDIA_VERSION_ANY_GT(2000), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS)) }, + { XE_RTP_NAME("14021490052"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(FF_MODE, + DIS_MESH_PARTIAL_AUTOSTRIP | + DIS_MESH_AUTOSTRIP), + SET(VFLSKPD, + DIS_PARTIAL_AUTOSTRIP | + DIS_AUTOSTRIP)) + }, /* Xe2_HPG */ { XE_RTP_NAME("15010599737"), @@ -705,13 +716,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = { DIS_AUTOSTRIP)) }, - /* Xe2_LPM */ - - { XE_RTP_NAME("14020756599"), - XE_RTP_RULES(ENGINE_CLASS(RENDER), FUNC(xe_rtp_match_when_media2000)), - XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS)) - }, - {} }; @@ -862,6 +866,9 @@ void xe_wa_apply_tile_workarounds(struct xe_tile *tile) { struct xe_gt *mmio = tile->primary_gt; + if (IS_SRIOV_VF(tile->xe)) + return; + if (XE_WA(mmio, 22010954014)) xe_mmio_rmw32(mmio, XEHP_CLOCK_GATE_DIS, 0, SGSI_SIDECLK_DIS); } diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h index 1b24d66f9d80..db9ddeaf69bf 100644 --- a/drivers/gpu/drm/xe/xe_wa.h +++ b/drivers/gpu/drm/xe/xe_wa.h @@ -17,8 +17,6 @@ void xe_wa_process_gt(struct xe_gt *gt); void xe_wa_process_engine(struct xe_hw_engine *hwe); void xe_wa_process_lrc(struct xe_hw_engine *hwe); void xe_wa_apply_tile_workarounds(struct xe_tile *tile); - -void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe); void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p); /** diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 12fe88796a49..a6b897030fde 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -27,3 +27,4 @@ 16022287689 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) 13011645652 GRAPHICS_VERSION(2004) +22019338487 MEDIA_VERSION(2000) diff --git a/include/drm/intel/xe_pciids.h b/include/drm/intel/xe_pciids.h index adb37bc541e4..644872a35c35 100644 --- a/include/drm/intel/xe_pciids.h +++ b/include/drm/intel/xe_pciids.h @@ -192,4 +192,11 @@ MACRO__(0x64A0, ## __VA_ARGS__), \ MACRO__(0x64B0, ## __VA_ARGS__) +#define XE_BMG_IDS(MACRO__, ...) \ + MACRO__(0xE202, ## __VA_ARGS__), \ + MACRO__(0xE20B, ## __VA_ARGS__), \ + MACRO__(0xE20C, ## __VA_ARGS__), \ + MACRO__(0xE20D, ## __VA_ARGS__), \ + MACRO__(0xE212, ## __VA_ARGS__) + #endif diff --git a/include/linux/devcoredump.h b/include/linux/devcoredump.h index c8f7eb6cc191..377892604ff4 100644 --- a/include/linux/devcoredump.h +++ b/include/linux/devcoredump.h @@ -12,6 +12,9 @@ #include <linux/scatterlist.h> #include <linux/slab.h> +/* if data isn't read by userspace after 5 minutes then delete it */ +#define DEVCD_TIMEOUT (HZ * 60 * 5) + /* * _devcd_free_sgtable - free all the memory of the given scatterlist table * (i.e. both pages and scatterlist instances) @@ -50,16 +53,17 @@ static inline void _devcd_free_sgtable(struct scatterlist *table) kfree(delete_iter); } - #ifdef CONFIG_DEV_COREDUMP void dev_coredumpv(struct device *dev, void *data, size_t datalen, gfp_t gfp); -void dev_coredumpm(struct device *dev, struct module *owner, - void *data, size_t datalen, gfp_t gfp, - ssize_t (*read)(char *buffer, loff_t offset, size_t count, - void *data, size_t datalen), - void (*free)(void *data)); +void dev_coredumpm_timeout(struct device *dev, struct module *owner, + void *data, size_t datalen, gfp_t gfp, + ssize_t (*read)(char *buffer, loff_t offset, + size_t count, void *data, + size_t datalen), + void (*free)(void *data), + unsigned long timeout); void dev_coredumpsg(struct device *dev, struct scatterlist *table, size_t datalen, gfp_t gfp); @@ -73,11 +77,13 @@ static inline void dev_coredumpv(struct device *dev, void *data, } static inline void -dev_coredumpm(struct device *dev, struct module *owner, - void *data, size_t datalen, gfp_t gfp, - ssize_t (*read)(char *buffer, loff_t offset, size_t count, - void *data, size_t datalen), - void (*free)(void *data)) +dev_coredumpm_timeout(struct device *dev, struct module *owner, + void *data, size_t datalen, gfp_t gfp, + ssize_t (*read)(char *buffer, loff_t offset, + size_t count, void *data, + size_t datalen), + void (*free)(void *data), + unsigned long timeout) { free(data); } @@ -92,4 +98,29 @@ static inline void dev_coredump_put(struct device *dev) } #endif /* CONFIG_DEV_COREDUMP */ +/** + * dev_coredumpm - create device coredump with read/free methods + * @dev: the struct device for the crashed device + * @owner: the module that contains the read/free functions, use %THIS_MODULE + * @data: data cookie for the @read/@free functions + * @datalen: length of the data + * @gfp: allocation flags + * @read: function to read from the given buffer + * @free: function to free the given buffer + * + * Creates a new device coredump for the given device. If a previous one hasn't + * been read yet, the new coredump is discarded. The data lifetime is determined + * by the device coredump framework and when it is no longer needed the @free + * function will be called to free the data. + */ +static inline void dev_coredumpm(struct device *dev, struct module *owner, + void *data, size_t datalen, gfp_t gfp, + ssize_t (*read)(char *buffer, loff_t offset, size_t count, + void *data, size_t datalen), + void (*free)(void *data)) +{ + dev_coredumpm_timeout(dev, owner, data, datalen, gfp, read, free, + DEVCD_TIMEOUT); +} + #endif /* __DEVCOREDUMP_H */ diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index d7b0903c22b2..12eaa8532b5c 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -80,6 +80,7 @@ extern "C" { * - &DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY * - &DRM_IOCTL_XE_EXEC * - &DRM_IOCTL_XE_WAIT_USER_FENCE + * - &DRM_IOCTL_XE_PERF */ /* @@ -100,6 +101,8 @@ extern "C" { #define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x08 #define DRM_XE_EXEC 0x09 #define DRM_XE_WAIT_USER_FENCE 0x0a +#define DRM_XE_PERF 0x0b + /* Must be kept compact -- no holes */ #define DRM_IOCTL_XE_DEVICE_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query) @@ -113,6 +116,7 @@ extern "C" { #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property) #define DRM_IOCTL_XE_EXEC DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec) #define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence) +#define DRM_IOCTL_XE_PERF DRM_IOW(DRM_COMMAND_BASE + DRM_XE_PERF, struct drm_xe_perf_param) /** * DOC: Xe IOCTL Extensions @@ -685,6 +689,7 @@ struct drm_xe_device_query { #define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY 5 #define DRM_XE_DEVICE_QUERY_ENGINE_CYCLES 6 #define DRM_XE_DEVICE_QUERY_UC_FW_VERSION 7 +#define DRM_XE_DEVICE_QUERY_OA_UNITS 8 /** @query: The type of data to query */ __u32 query; @@ -1370,6 +1375,309 @@ struct drm_xe_wait_user_fence { __u64 reserved[2]; }; +/** + * enum drm_xe_perf_type - Perf stream types + */ +enum drm_xe_perf_type { + /** @DRM_XE_PERF_TYPE_OA: OA perf stream type */ + DRM_XE_PERF_TYPE_OA, +}; + +/** + * enum drm_xe_perf_op - Perf stream ops + */ +enum drm_xe_perf_op { + /** @DRM_XE_PERF_OP_STREAM_OPEN: Open a perf counter stream */ + DRM_XE_PERF_OP_STREAM_OPEN, + + /** @DRM_XE_PERF_OP_ADD_CONFIG: Add perf stream config */ + DRM_XE_PERF_OP_ADD_CONFIG, + + /** @DRM_XE_PERF_OP_REMOVE_CONFIG: Remove perf stream config */ + DRM_XE_PERF_OP_REMOVE_CONFIG, +}; + +/** + * struct drm_xe_perf_param - Input of &DRM_XE_PERF + * + * The perf layer enables multiplexing perf counter streams of multiple + * types. The actual params for a particular stream operation are supplied + * via the @param pointer (use __copy_from_user to get these params). + */ +struct drm_xe_perf_param { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + /** @perf_type: Perf stream type, of enum @drm_xe_perf_type */ + __u64 perf_type; + /** @perf_op: Perf op, of enum @drm_xe_perf_op */ + __u64 perf_op; + /** @param: Pointer to actual stream params */ + __u64 param; +}; + +/** + * enum drm_xe_perf_ioctls - Perf fd ioctl's + * + * Information exchanged between userspace and kernel for perf fd ioctl's + * is stream type specific + */ +enum drm_xe_perf_ioctls { + /** @DRM_XE_PERF_IOCTL_ENABLE: Enable data capture for a stream */ + DRM_XE_PERF_IOCTL_ENABLE = _IO('i', 0x0), + + /** @DRM_XE_PERF_IOCTL_DISABLE: Disable data capture for a stream */ + DRM_XE_PERF_IOCTL_DISABLE = _IO('i', 0x1), + + /** @DRM_XE_PERF_IOCTL_CONFIG: Change stream configuration */ + DRM_XE_PERF_IOCTL_CONFIG = _IO('i', 0x2), + + /** @DRM_XE_PERF_IOCTL_STATUS: Return stream status */ + DRM_XE_PERF_IOCTL_STATUS = _IO('i', 0x3), + + /** @DRM_XE_PERF_IOCTL_INFO: Return stream info */ + DRM_XE_PERF_IOCTL_INFO = _IO('i', 0x4), +}; + +/** + * enum drm_xe_oa_unit_type - OA unit types + */ +enum drm_xe_oa_unit_type { + /** + * @DRM_XE_OA_UNIT_TYPE_OAG: OAG OA unit. OAR/OAC are considered + * sub-types of OAG. For OAR/OAC, use OAG. + */ + DRM_XE_OA_UNIT_TYPE_OAG, + + /** @DRM_XE_OA_UNIT_TYPE_OAM: OAM OA unit */ + DRM_XE_OA_UNIT_TYPE_OAM, +}; + +/** + * struct drm_xe_oa_unit - describe OA unit + */ +struct drm_xe_oa_unit { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @oa_unit_id: OA unit ID */ + __u32 oa_unit_id; + + /** @oa_unit_type: OA unit type of @drm_xe_oa_unit_type */ + __u32 oa_unit_type; + + /** @capabilities: OA capabilities bit-mask */ + __u64 capabilities; +#define DRM_XE_OA_CAPS_BASE (1 << 0) + + /** @oa_timestamp_freq: OA timestamp freq */ + __u64 oa_timestamp_freq; + + /** @reserved: MBZ */ + __u64 reserved[4]; + + /** @num_engines: number of engines in @eci array */ + __u64 num_engines; + + /** @eci: engines attached to this OA unit */ + struct drm_xe_engine_class_instance eci[]; +}; + +/** + * struct drm_xe_query_oa_units - describe OA units + * + * If a query is made with a struct drm_xe_device_query where .query + * is equal to DRM_XE_DEVICE_QUERY_OA_UNITS, then the reply uses struct + * drm_xe_query_oa_units in .data. + * + * OA unit properties for all OA units can be accessed using a code block + * such as the one below: + * + * .. code-block:: C + * + * struct drm_xe_query_oa_units *qoa; + * struct drm_xe_oa_unit *oau; + * u8 *poau; + * + * // malloc qoa and issue DRM_XE_DEVICE_QUERY_OA_UNITS. Then: + * poau = (u8 *)&qoa->oa_units[0]; + * for (int i = 0; i < qoa->num_oa_units; i++) { + * oau = (struct drm_xe_oa_unit *)poau; + * // Access 'struct drm_xe_oa_unit' fields here + * poau += sizeof(*oau) + oau->num_engines * sizeof(oau->eci[0]); + * } + */ +struct drm_xe_query_oa_units { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + /** @num_oa_units: number of OA units returned in oau[] */ + __u32 num_oa_units; + /** @pad: MBZ */ + __u32 pad; + /** + * @oa_units: struct @drm_xe_oa_unit array returned for this device. + * Written below as a u64 array to avoid problems with nested flexible + * arrays with some compilers + */ + __u64 oa_units[]; +}; + +/** + * enum drm_xe_oa_format_type - OA format types as specified in PRM/Bspec + * 52198/60942 + */ +enum drm_xe_oa_format_type { + /** @DRM_XE_OA_FMT_TYPE_OAG: OAG report format */ + DRM_XE_OA_FMT_TYPE_OAG, + /** @DRM_XE_OA_FMT_TYPE_OAR: OAR report format */ + DRM_XE_OA_FMT_TYPE_OAR, + /** @DRM_XE_OA_FMT_TYPE_OAM: OAM report format */ + DRM_XE_OA_FMT_TYPE_OAM, + /** @DRM_XE_OA_FMT_TYPE_OAC: OAC report format */ + DRM_XE_OA_FMT_TYPE_OAC, + /** @DRM_XE_OA_FMT_TYPE_OAM_MPEC: OAM SAMEDIA or OAM MPEC report format */ + DRM_XE_OA_FMT_TYPE_OAM_MPEC, + /** @DRM_XE_OA_FMT_TYPE_PEC: PEC report format */ + DRM_XE_OA_FMT_TYPE_PEC, +}; + +/** + * enum drm_xe_oa_property_id - OA stream property id's + * + * Stream params are specified as a chain of @drm_xe_ext_set_property + * struct's, with @property values from enum @drm_xe_oa_property_id and + * @drm_xe_user_extension base.name set to @DRM_XE_OA_EXTENSION_SET_PROPERTY. + * @param field in struct @drm_xe_perf_param points to the first + * @drm_xe_ext_set_property struct. + * + * Exactly the same mechanism is also used for stream reconfiguration using + * the @DRM_XE_PERF_IOCTL_CONFIG perf fd ioctl, though only a subset of + * properties below can be specified for stream reconfiguration. + */ +enum drm_xe_oa_property_id { +#define DRM_XE_OA_EXTENSION_SET_PROPERTY 0 + /** + * @DRM_XE_OA_PROPERTY_OA_UNIT_ID: ID of the OA unit on which to open + * the OA stream, see @oa_unit_id in 'struct + * drm_xe_query_oa_units'. Defaults to 0 if not provided. + */ + DRM_XE_OA_PROPERTY_OA_UNIT_ID = 1, + + /** + * @DRM_XE_OA_PROPERTY_SAMPLE_OA: A value of 1 requests inclusion of raw + * OA unit reports or stream samples in a global buffer attached to an + * OA unit. + */ + DRM_XE_OA_PROPERTY_SAMPLE_OA, + + /** + * @DRM_XE_OA_PROPERTY_OA_METRIC_SET: OA metrics defining contents of OA + * reports, previously added via @DRM_XE_PERF_OP_ADD_CONFIG. + */ + DRM_XE_OA_PROPERTY_OA_METRIC_SET, + + /** @DRM_XE_OA_PROPERTY_OA_FORMAT: Perf counter report format */ + DRM_XE_OA_PROPERTY_OA_FORMAT, + /* + * OA_FORMAT's are specified the same way as in PRM/Bspec 52198/60942, + * in terms of the following quantities: a. enum @drm_xe_oa_format_type + * b. Counter select c. Counter size and d. BC report. Also refer to the + * oa_formats array in drivers/gpu/drm/xe/xe_oa.c. + */ +#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE (0xff << 0) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL (0xff << 8) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE (0xff << 16) +#define DRM_XE_OA_FORMAT_MASK_BC_REPORT (0xff << 24) + + /** + * @DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT: Requests periodic OA unit + * sampling with sampling frequency proportional to 2^(period_exponent + 1) + */ + DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT, + + /** + * @DRM_XE_OA_PROPERTY_OA_DISABLED: A value of 1 will open the OA + * stream in a DISABLED state (see @DRM_XE_PERF_IOCTL_ENABLE). + */ + DRM_XE_OA_PROPERTY_OA_DISABLED, + + /** + * @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID: Open the stream for a specific + * @exec_queue_id. Perf queries can be executed on this exec queue. + */ + DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID, + + /** + * @DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE: Optional engine instance to + * pass along with @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID or will default to 0. + */ + DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE, + + /** + * @DRM_XE_OA_PROPERTY_NO_PREEMPT: Allow preemption and timeslicing + * to be disabled for the stream exec queue. + */ + DRM_XE_OA_PROPERTY_NO_PREEMPT, +}; + +/** + * struct drm_xe_oa_config - OA metric configuration + * + * Multiple OA configs can be added using @DRM_XE_PERF_OP_ADD_CONFIG. A + * particular config can be specified when opening an OA stream using + * @DRM_XE_OA_PROPERTY_OA_METRIC_SET property. + */ +struct drm_xe_oa_config { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @uuid: String formatted like "%\08x-%\04x-%\04x-%\04x-%\012x" */ + char uuid[36]; + + /** @n_regs: Number of regs in @regs_ptr */ + __u32 n_regs; + + /** + * @regs_ptr: Pointer to (register address, value) pairs for OA config + * registers. Expected length of buffer is: (2 * sizeof(u32) * @n_regs). + */ + __u64 regs_ptr; +}; + +/** + * struct drm_xe_oa_stream_status - OA stream status returned from + * @DRM_XE_PERF_IOCTL_STATUS perf fd ioctl. Userspace can call the ioctl to + * query stream status in response to EIO errno from perf fd read(). + */ +struct drm_xe_oa_stream_status { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @oa_status: OA stream status (see Bspec 46717/61226) */ + __u64 oa_status; +#define DRM_XE_OASTATUS_MMIO_TRG_Q_FULL (1 << 3) +#define DRM_XE_OASTATUS_COUNTER_OVERFLOW (1 << 2) +#define DRM_XE_OASTATUS_BUFFER_OVERFLOW (1 << 1) +#define DRM_XE_OASTATUS_REPORT_LOST (1 << 0) + + /** @reserved: reserved for future use */ + __u64 reserved[3]; +}; + +/** + * struct drm_xe_oa_stream_info - OA stream info returned from + * @DRM_XE_PERF_IOCTL_INFO perf fd ioctl + */ +struct drm_xe_oa_stream_info { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @oa_buf_size: OA buffer size */ + __u64 oa_buf_size; + + /** @reserved: reserved for future use */ + __u64 reserved[3]; +}; + #if defined(__cplusplus) } #endif |