summaryrefslogtreecommitdiff
path: root/tools/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-03-27 23:42:32 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2022-03-27 23:42:32 +0300
commit7b58b82b86c8b65a2b57a4c6cb96a460654f9e09 (patch)
treea13e19f216389f16f1cb6641d54751f167482515 /tools/arch/x86
parent02f9a04d76b76b80b05ddc33ceabe806b84fda3c (diff)
parentab0809af0bee88b689ba289ec8c40aa2be3a17ec (diff)
downloadlinux-7b58b82b86c8b65a2b57a4c6cb96a460654f9e09.tar.xz
Merge tag 'perf-tools-for-v5.18-2022-03-26' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tools updates from Arnaldo Carvalho de Melo: "New features: perf ftrace: - Add -n/--use-nsec option to the 'latency' subcommand. Default: usecs: $ sudo perf ftrace latency -T dput -a sleep 1 # DURATION | COUNT | GRAPH | 0 - 1 us | 2098375 | ############################# | 1 - 2 us | 61 | | 2 - 4 us | 33 | | 4 - 8 us | 13 | | 8 - 16 us | 124 | | 16 - 32 us | 123 | | 32 - 64 us | 1 | | 64 - 128 us | 0 | | 128 - 256 us | 1 | | 256 - 512 us | 0 | | Better granularity with nsec: $ sudo perf ftrace latency -T dput -a -n sleep 1 # DURATION | COUNT | GRAPH | 0 - 1 us | 0 | | 1 - 2 ns | 0 | | 2 - 4 ns | 0 | | 4 - 8 ns | 0 | | 8 - 16 ns | 0 | | 16 - 32 ns | 0 | | 32 - 64 ns | 0 | | 64 - 128 ns | 1163434 | ############## | 128 - 256 ns | 914102 | ############# | 256 - 512 ns | 884 | | 512 - 1024 ns | 613 | | 1 - 2 us | 31 | | 2 - 4 us | 17 | | 4 - 8 us | 7 | | 8 - 16 us | 123 | | 16 - 32 us | 83 | | perf lock: - Add -c/--combine-locks option to merge lock instances in the same class into a single entry. # perf lock report -c Name acquired contended avg wait(ns) total wait(ns) max wait(ns) min wait(ns) rcu_read_lock 251225 0 0 0 0 0 hrtimer_bases.lock 39450 0 0 0 0 0 &sb->s_type->i_l... 10301 1 662 662 662 662 ptlock_ptr(page) 10173 2 701 1402 760 642 &(ei->i_block_re... 8732 0 0 0 0 0 &xa->xa_lock 8088 0 0 0 0 0 &base->lock 6705 0 0 0 0 0 &p->pi_lock 5549 0 0 0 0 0 &dentry->d_lockr... 5010 4 1274 5097 1844 789 &ep->lock 3958 0 0 0 0 0 - Add -F/--field option to customize the list of fields to output: $ perf lock report -F contended,wait_max -k avg_wait Name contended max wait(ns) avg wait(ns) slock-AF_INET6 1 23543 23543 &lruvec->lru_lock 5 18317 11254 slock-AF_INET6 1 10379 10379 rcu_node_1 1 2104 2104 &dentry->d_lockr... 1 1844 1844 &dentry->d_lockr... 1 1672 1672 &newf->file_lock 15 2279 1025 &dentry->d_lockr... 1 792 792 - Add --synth=no option for record, as there is no need to symbolize, lock names comes from the tracepoints. perf record: - Threaded recording, opt-in, via the new --threads command line option. - Improve AMD IBS (Instruction-Based Sampling) error handling messages. perf script: - Add 'brstackinsnlen' field (use it with -F) for branch stacks. - Output branch sample type in 'perf script'. perf report: - Add "addr_from" and "addr_to" sort dimensions. - Print branch stack entry type in 'perf report --dump-raw-trace' - Fix symbolization for chrooted workloads. Hardware tracing: Intel PT: - Add CFE (Control Flow Event) and EVD (Event Data) packets support. - Add MODE.Exec IFLAG bit support. Explanation about these features from the "Intel® 64 and IA-32 architectures software developer’s manual combined volumes: 1, 2A, 2B, 2C, 2D, 3A, 3B, 3C, 3D, and 4" PDF at: https://cdrdv2.intel.com/v1/dl/getContent/671200 At page 3951: "32.2.4 Event Trace is a capability that exposes details about the asynchronous events, when they are generated, and when their corresponding software event handler completes execution. These include: o Interrupts, including NMI and SMI, including the interrupt vector when defined. o Faults, exceptions including the fault vector. - Page faults additionally include the page fault address, when in context. o Event handler returns, including IRET and RSM. o VM exits and VM entries.¹ - VM exits include the values written to the “exit reason” and “exit qualification” VMCS fields. INIT and SIPI events. o TSX aborts, including the abort status returned for the RTM instructions. o Shutdown. Additionally, it provides indication of the status of the Interrupt Flag (IF), to indicate when interrupts are masked" ARM CoreSight: - Use advertised caps/min_interval as default sample_period on ARM spe. - Update deduction of TRCCONFIGR register for branch broadcast on ARM's CoreSight ETM. Vendor Events (JSON): Intel: - Update events and metrics for: Alderlake, Broadwell, Broadwell DE, BroadwellX, CascadelakeX, Elkhartlake, Bonnell, Goldmont, GoldmontPlus, Westmere EP-DP, Haswell, HaswellX, Icelake, IcelakeX, Ivybridge, Ivytown, Jaketown, Knights Landing, Nehalem EP, Sandybridge, Silvermont, Skylake, Skylake Server, SkylakeX, Tigerlake, TremontX, Westmere EP-SP, and Westmere EX. ARM: - Add support for HiSilicon CPA PMU aliasing. perf stat: - Fix forked applications enablement of counters. - The 'slots' should only be printed on a different order than the one specified on the command line when 'topdown' events are present, fix it. Miscellaneous: - Sync msr-index, cpufeatures header files with the kernel sources. - Stop using some deprecated libbpf APIs in 'perf trace'. - Fix some spelling mistakes. - Refactor the maps pointers usage to pave the way for using refcount debugging. - Only offer the --tui option on perf top, report and annotate when perf was built with libslang. - Don't mention --to-ctf in 'perf data --help' when not linking with the required library, libbabeltrace. - Use ARRAY_SIZE() instead of ad hoc equivalent, spotted by array_size.cocci. - Enhance the matching of sub-commands abbreviations: 'perf c2c rec' -> 'perf c2c record' 'perf c2c recport -> error - Set build-id using build-id header on new mmap records. - Fix generation of 'perf --version' string. perf test: - Add test for the arm_spe event. - Add test to check unwinding using fame-pointer (fp) mode on arm64. - Make metric testing more robust in 'perf test'. - Add error message for unsupported branch stack cases. libperf: - Add API for allocating new thread map array. - Fix typo in perf_evlist__open() failure error messages in libperf tests. perf c2c: - Replace bitmap_weight() with bitmap_empty() where appropriate" * tag 'perf-tools-for-v5.18-2022-03-26' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (143 commits) perf evsel: Improve AMD IBS (Instruction-Based Sampling) error handling messages perf python: Add perf_env stubs that will be needed in evsel__open_strerror() perf tools: Enhance the matching of sub-commands abbreviations libperf tests: Fix typo in perf_evlist__open() failure error messages tools arm64: Import cputype.h perf lock: Add -F/--field option to control output perf lock: Extend struct lock_key to have print function perf lock: Add --synth=no option for record tools headers cpufeatures: Sync with the kernel sources tools headers cpufeatures: Sync with the kernel sources perf stat: Fix forked applications enablement of counters tools arch x86: Sync the msr-index.h copy with the kernel sources perf evsel: Make evsel__env() always return a valid env perf build-id: Fix spelling mistake "Cant" -> "Can't" perf header: Fix spelling mistake "could't" -> "couldn't" perf script: Add 'brstackinsnlen' for branch stacks perf parse-events: Move slots only with topdown perf ftrace latency: Update documentation perf ftrace latency: Add -n/--use-nsec option perf tools: Fix version kernel tag ...
Diffstat (limited to 'tools/arch/x86')
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h7
-rw-r--r--tools/arch/x86/include/asm/disabled-features.h7
-rw-r--r--tools/arch/x86/include/asm/msr-index.h6
3 files changed, 15 insertions, 5 deletions
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 65d147974f8d..3edf05e98e58 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -299,9 +299,6 @@
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
-#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */
-#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */
-#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
@@ -330,6 +327,7 @@
#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
+#define X86_FEATURE_HFI (14*32+19) /* Hardware Feedback Interface */
/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
@@ -390,7 +388,10 @@
#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */
#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */
+#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */
#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */
+#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */
+#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index 8f28fafa98b3..1231d63f836d 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -56,8 +56,11 @@
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
#endif
-/* Force disable because it's broken beyond repair */
-#define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
+#ifdef CONFIG_INTEL_IOMMU_SVM
+# define DISABLE_ENQCMD 0
+#else
+# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
+#endif
#ifdef CONFIG_X86_SGX
# define DISABLE_SGX 0
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index a4a39c3e0f19..0e7f303542bf 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -705,12 +705,14 @@
#define PACKAGE_THERM_STATUS_PROCHOT (1 << 0)
#define PACKAGE_THERM_STATUS_POWER_LIMIT (1 << 10)
+#define PACKAGE_THERM_STATUS_HFI_UPDATED (1 << 26)
#define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x000001b2
#define PACKAGE_THERM_INT_HIGH_ENABLE (1 << 0)
#define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1)
#define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24)
+#define PACKAGE_THERM_INT_HFI_ENABLE (1 << 25)
/* Thermal Thresholds Support */
#define THERM_INT_THRESHOLD0_ENABLE (1 << 15)
@@ -959,4 +961,8 @@
#define MSR_VM_IGNNE 0xc0010115
#define MSR_VM_HSAVE_PA 0xc0010117
+/* Hardware Feedback Interface */
+#define MSR_IA32_HW_FEEDBACK_PTR 0x17d0
+#define MSR_IA32_HW_FEEDBACK_CONFIG 0x17d1
+
#endif /* _ASM_X86_MSR_INDEX_H */