diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-07 00:16:36 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-07 00:16:36 +0300 |
commit | 90489a72fba9529c85e051067ecb41183b8e982e (patch) | |
tree | 6c61660a380c3ddf25607b5892d173c3f4feb0d2 /arch/x86/include/asm | |
parent | 007dc78fea62610bf06829e38f1d8c69b6ea5af6 (diff) | |
parent | d15d356887e770c5f2dcf963b52c7cb510c9e42d (diff) | |
download | linux-90489a72fba9529c85e051067ecb41183b8e982e.tar.xz |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar:
"The main kernel changes were:
- add support for Intel's "adaptive PEBS v4" - which embedds LBS data
in PEBS records and can thus batch up and reduce the IRQ (NMI) rate
significantly - reducing overhead and making call-graph profiling
less intrusive.
- add Intel CPU core and uncore support updates for Tremont, Icelake,
- extend the x86 PMU constraints scheduler with 'constraint ranges'
to better support Icelake hw constraints,
- make x86 call-chain support work better with CONFIG_FRAME_POINTER=y
- misc other changes
Tooling changes:
- updates to the main tools: 'perf record', 'perf trace', 'perf
stat'
- updated Intel and S/390 vendor events
- libtraceevent updates
- misc other updates and fixes"
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (69 commits)
perf/x86: Make perf callchains work without CONFIG_FRAME_POINTER
watchdog: Fix typo in comment
perf/x86/intel: Add Tremont core PMU support
perf/x86/intel/uncore: Add Intel Icelake uncore support
perf/x86/msr: Add Icelake support
perf/x86/intel/rapl: Add Icelake support
perf/x86/intel/cstate: Add Icelake support
perf/x86/intel: Add Icelake support
perf/x86: Support constraint ranges
perf/x86/lbr: Avoid reading the LBRs when adaptive PEBS handles them
perf/x86/intel: Support adaptive PEBS v4
perf/x86/intel/ds: Extract code of event update in short period
perf/x86/intel: Extract memory code PEBS parser for reuse
perf/x86: Support outputting XMM registers
perf/x86/intel: Force resched when TFA sysctl is modified
perf/core: Add perf_pmu_resched() as global function
perf/headers: Fix stale comment for struct perf_addr_filter
perf/core: Make perf_swevent_init_cpu() static
perf/x86: Add sanity checks to x86_schedule_events()
perf/x86: Optimize x86_schedule_events()
...
Diffstat (limited to 'arch/x86/include/asm')
-rw-r--r-- | arch/x86/include/asm/intel_ds.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/msr-index.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/perf_event.h | 57 | ||||
-rw-r--r-- | arch/x86/include/asm/stacktrace.h | 13 |
4 files changed, 52 insertions, 21 deletions
diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h index ae26df1c2789..8380c3ddd4b2 100644 --- a/arch/x86/include/asm/intel_ds.h +++ b/arch/x86/include/asm/intel_ds.h @@ -8,7 +8,7 @@ /* The maximal number of PEBS events: */ #define MAX_PEBS_EVENTS 8 -#define MAX_FIXED_PEBS_EVENTS 3 +#define MAX_FIXED_PEBS_EVENTS 4 /* * A debug store configuration. diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ca5bc0eacb95..1378518cf63f 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -116,6 +116,7 @@ #define LBR_INFO_CYCLES 0xffff #define MSR_IA32_PEBS_ENABLE 0x000003f1 +#define MSR_PEBS_DATA_CFG 0x000003f2 #define MSR_IA32_DS_AREA 0x00000600 #define MSR_IA32_PERF_CAPABILITIES 0x00000345 #define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 8bdf74902293..1392d5e6e8d6 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -7,7 +7,7 @@ */ #define INTEL_PMC_MAX_GENERIC 32 -#define INTEL_PMC_MAX_FIXED 3 +#define INTEL_PMC_MAX_FIXED 4 #define INTEL_PMC_IDX_FIXED 32 #define X86_PMC_IDX_MAX 64 @@ -32,6 +32,8 @@ #define HSW_IN_TX (1ULL << 32) #define HSW_IN_TX_CHECKPOINTED (1ULL << 33) +#define ICL_EVENTSEL_ADAPTIVE (1ULL << 34) +#define ICL_FIXED_0_ADAPTIVE (1ULL << 32) #define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36) #define AMD64_EVENTSEL_GUESTONLY (1ULL << 40) @@ -87,6 +89,12 @@ #define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 #define ARCH_PERFMON_EVENTS_COUNT 7 +#define PEBS_DATACFG_MEMINFO BIT_ULL(0) +#define PEBS_DATACFG_GP BIT_ULL(1) +#define PEBS_DATACFG_XMMS BIT_ULL(2) +#define PEBS_DATACFG_LBRS BIT_ULL(3) +#define PEBS_DATACFG_LBR_SHIFT 24 + /* * Intel "Architectural Performance Monitoring" CPUID * detection/enumeration details: @@ -177,6 +185,41 @@ struct x86_pmu_capability { #define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(55) /* + * Adaptive PEBS v4 + */ + +struct pebs_basic { + u64 format_size; + u64 ip; + u64 applicable_counters; + u64 tsc; +}; + +struct pebs_meminfo { + u64 address; + u64 aux; + u64 latency; + u64 tsx_tuning; +}; + +struct pebs_gprs { + u64 flags, ip, ax, cx, dx, bx, sp, bp, si, di; + u64 r8, r9, r10, r11, r12, r13, r14, r15; +}; + +struct pebs_xmm { + u64 xmm[16*2]; /* two entries for each register */ +}; + +struct pebs_lbr_entry { + u64 from, to, info; +}; + +struct pebs_lbr { + struct pebs_lbr_entry lbr[0]; /* Variable length */ +}; + +/* * IBS cpuid feature detection */ @@ -248,6 +291,11 @@ extern void perf_events_lapic_init(void); #define PERF_EFLAGS_VM (1UL << 5) struct pt_regs; +struct x86_perf_regs { + struct pt_regs regs; + u64 *xmm_regs; +}; + extern unsigned long perf_instruction_pointer(struct pt_regs *regs); extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_misc_flags(regs) perf_misc_flags(regs) @@ -260,14 +308,9 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); */ #define perf_arch_fetch_caller_regs(regs, __ip) { \ (regs)->ip = (__ip); \ - (regs)->bp = caller_frame_pointer(); \ + (regs)->sp = (unsigned long)__builtin_frame_address(0); \ (regs)->cs = __KERNEL_CS; \ regs->flags = 0; \ - asm volatile( \ - _ASM_MOV "%%"_ASM_SP ", %0\n" \ - : "=m" ((regs)->sp) \ - :: "memory" \ - ); \ } struct perf_guest_switch_msr { diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index f335aad404a4..beef7ad9e43a 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -98,19 +98,6 @@ struct stack_frame_ia32 { u32 return_address; }; -static inline unsigned long caller_frame_pointer(void) -{ - struct stack_frame *frame; - - frame = __builtin_frame_address(0); - -#ifdef CONFIG_FRAME_POINTER - frame = frame->next_frame; -#endif - - return (unsigned long)frame; -} - void show_opcodes(struct pt_regs *regs, const char *loglvl); void show_ip(struct pt_regs *regs, const char *loglvl); #endif /* _ASM_X86_STACKTRACE_H */ |