From 771ceddaadd0a2b31603034b36dca50943ff6836 Mon Sep 17 00:00:00 2001 From: Karol Wachowski Date: Mon, 20 Feb 2017 12:50:40 +0100 Subject: perf vendor events: Add mapping for KnightsMill PMU events Reuse events from KnightsLanding for KnightsMill Signed-off-by: Karol Wachowski Cc: Alexander Shishkin Cc: Andi Kleen Cc: Dave Hansen Cc: Kan Liang Cc: Peter Zijlstra Cc: Piotr Luc Cc: Srinivas Pandruvada Link: http://lkml.kernel.org/r/1487591440-25172-1-git-send-email-karol.wachowski@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/mapfile.csv | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 12181bb1da2a..d1a12e584c1b 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -17,6 +17,7 @@ GenuineIntel-6-3A,v18,ivybridge,core GenuineIntel-6-3E,v19,ivytown,core GenuineIntel-6-2D,v20,jaketown,core GenuineIntel-6-57,v9,knightslanding,core +GenuineIntel-6-85,v9,knightslanding,core GenuineIntel-6-1E,v2,nehalemep,core GenuineIntel-6-1F,v2,nehalemep,core GenuineIntel-6-1A,v2,nehalemep,core -- cgit v1.2.3 From 02d492e5dcb72c004d213756eb87c9d62a6d76a7 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 7 Feb 2017 01:40:05 +0100 Subject: perf stat: Issue a HW watchdog disable hint When using perf stat on an AMD F15h system with the default hw events attributes, some of the events don't get counted: Performance counter stats for 'sleep 1': 0.749208 task-clock (msec) # 0.001 CPUs utilized 1 context-switches # 0.001 M/sec 0 cpu-migrations # 0.000 K/sec 54 page-faults # 0.072 M/sec 1,122,815 cycles # 1.499 GHz 286,740 stalled-cycles-frontend # 25.54% frontend cycles idle stalled-cycles-backend (0.00%) ^^^^^^^^^^^^ instructions (0.00%) ^^^^^^^^^^^^ branches (0.00%) branch-misses (0.00%) 1.001550070 seconds time elapsed The reason is that we have the HW watchdog consuming one PMU counter and when perf tries to schedule 6 events on 6 counters and some of those counters are constrained to only a specific subset of PMCs by the hardware, the event scheduling fails. So issue a hint to disable the HW watchdog around a perf stat session. Committer note: Testing it... # perf stat -d usleep 1 Performance counter stats for 'usleep 1': 1.180203 task-clock (msec) # 0.490 CPUs utilized 1 context-switches # 0.847 K/sec 0 cpu-migrations # 0.000 K/sec 54 page-faults # 0.046 M/sec 184,754 cycles # 0.157 GHz 714,553 instructions # 3.87 insn per cycle 154,661 branches # 131.046 M/sec 7,247 branch-misses # 4.69% of all branches 219,984 L1-dcache-loads # 186.395 M/sec 17,600 L1-dcache-load-misses # 8.00% of all L1-dcache hits (90.16%) LLC-loads (0.00%) LLC-load-misses (0.00%) 0.002406823 seconds time elapsed Some events weren't counted. Try disabling the NMI watchdog: echo 0 > /proc/sys/kernel/nmi_watchdog perf stat ... echo 1 > /proc/sys/kernel/nmi_watchdog # Signed-off-by: Borislav Petkov Acked-by: Ingo Molnar Tested-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Robert Richter Cc: Vince Weaver Link: http://lkml.kernel.org/r/20170211183218.ijnvb5f7ciyuunx4@pd.tnic Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 13b54999ad79..f4f555a67e9b 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -146,6 +146,7 @@ static aggr_get_id_t aggr_get_id; static bool append_file; static const char *output_name; static int output_fd; +static int print_free_counters_hint; struct perf_stat { bool record; @@ -1109,6 +1110,9 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, csv_sep); + if (counter->supported) + print_free_counters_hint = 1; + fprintf(stat_config.output, "%-*s%s", csv_output ? 0 : unit_width, counter->unit, csv_sep); @@ -1477,6 +1481,13 @@ static void print_footer(void) avg_stats(&walltime_nsecs_stats)); } fprintf(output, "\n\n"); + + if (print_free_counters_hint) + fprintf(output, +"Some events weren't counted. Try disabling the NMI watchdog:\n" +" echo 0 > /proc/sys/kernel/nmi_watchdog\n" +" perf stat ...\n" +" echo 1 > /proc/sys/kernel/nmi_watchdog\n"); } static void print_counters(struct timespec *ts, int argc, const char **argv) -- cgit v1.2.3 From 4900653829175f60356efc279695bb23c59483c3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 22 Feb 2017 16:48:24 -0300 Subject: tools include: Adopt __compiletime_error From the kernel, get the gcc one and provide the fallback so that we can continue build with other compilers, such as with clang. Will be used by tools/arch/x86/include/asm/cmpxchg.h. Cc: Adrian Hunter Cc: David Ahern Cc: Elena Reshetova Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-pecgz6efai4a9euuk4rxuotr@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/compiler-gcc.h | 4 ++++ tools/include/linux/compiler.h | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index 48af2f10a42d..616935f1ff56 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -12,3 +12,7 @@ #if GCC_VERSION >= 70000 && !defined(__CHECKER__) # define __fallthrough __attribute__ ((fallthrough)) #endif + +#if GCC_VERSION >= 40300 +# define __compiletime_error(message) __attribute__((error(message))) +#endif /* GCC_VERSION >= 40300 */ diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 8de163b17c0d..c9e65e8faacd 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -5,6 +5,10 @@ #include #endif +#ifndef __compiletime_error +# define __compiletime_error(message) +#endif + /* Optimization barrier */ /* The "volatile" is due to gcc bugs */ #define barrier() __asm__ __volatile__("": : :"memory") -- cgit v1.2.3 From 3337e682d9f3043bb0b925d976558ed5c41b0a09 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 22 Feb 2017 16:54:53 -0300 Subject: tools arch x86: Include asm/cmpxchg.h Will be included from atomic.h and used in refcount.h Cc: Adrian Hunter Cc: David Ahern Cc: Elena Reshetova Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-pzrydfee75mhq64kazxmf9it@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cmpxchg.h | 89 ++++++++++++++++++++++++++++++++++++ tools/perf/MANIFEST | 1 + tools/scripts/Makefile.include | 9 ++++ 3 files changed, 99 insertions(+) create mode 100644 tools/arch/x86/include/asm/cmpxchg.h diff --git a/tools/arch/x86/include/asm/cmpxchg.h b/tools/arch/x86/include/asm/cmpxchg.h new file mode 100644 index 000000000000..f5253260f3cc --- /dev/null +++ b/tools/arch/x86/include/asm/cmpxchg.h @@ -0,0 +1,89 @@ +#ifndef TOOLS_ASM_X86_CMPXCHG_H +#define TOOLS_ASM_X86_CMPXCHG_H + +#include + +/* + * Non-existant functions to indicate usage errors at link time + * (or compile-time if the compiler implements __compiletime_error(). + */ +extern void __cmpxchg_wrong_size(void) + __compiletime_error("Bad argument size for cmpxchg"); + +/* + * Constants for operation sizes. On 32-bit, the 64-bit size it set to + * -1 because sizeof will never return -1, thereby making those switch + * case statements guaranteeed dead code which the compiler will + * eliminate, and allowing the "missing symbol in the default case" to + * indicate a usage error. + */ +#define __X86_CASE_B 1 +#define __X86_CASE_W 2 +#define __X86_CASE_L 4 +#ifdef __x86_64__ +#define __X86_CASE_Q 8 +#else +#define __X86_CASE_Q -1 /* sizeof will never return -1 */ +#endif + +/* + * Atomic compare and exchange. Compare OLD with MEM, if identical, + * store NEW in MEM. Return the initial value in MEM. Success is + * indicated by comparing RETURN with OLD. + */ +#define __raw_cmpxchg(ptr, old, new, size, lock) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __typeof__(*(ptr)) __old = (old); \ + __typeof__(*(ptr)) __new = (new); \ + switch (size) { \ + case __X86_CASE_B: \ + { \ + volatile u8 *__ptr = (volatile u8 *)(ptr); \ + asm volatile(lock "cmpxchgb %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ + : "q" (__new), "0" (__old) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_W: \ + { \ + volatile u16 *__ptr = (volatile u16 *)(ptr); \ + asm volatile(lock "cmpxchgw %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ + : "r" (__new), "0" (__old) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_L: \ + { \ + volatile u32 *__ptr = (volatile u32 *)(ptr); \ + asm volatile(lock "cmpxchgl %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ + : "r" (__new), "0" (__old) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_Q: \ + { \ + volatile u64 *__ptr = (volatile u64 *)(ptr); \ + asm volatile(lock "cmpxchgq %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ + : "r" (__new), "0" (__old) \ + : "memory"); \ + break; \ + } \ + default: \ + __cmpxchg_wrong_size(); \ + } \ + __ret; \ +}) + +#define __cmpxchg(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX) + +#define cmpxchg(ptr, old, new) \ + __cmpxchg(ptr, old, new, sizeof(*(ptr))) + + +#endif /* TOOLS_ASM_X86_CMPXCHG_H */ diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 8672f835ae4e..e2c52190cf28 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -12,6 +12,7 @@ tools/arch/sparc/include/asm/barrier_32.h tools/arch/sparc/include/asm/barrier_64.h tools/arch/tile/include/asm/barrier.h tools/arch/x86/include/asm/barrier.h +tools/arch/x86/include/asm/cmpxchg.h tools/arch/x86/include/asm/cpufeatures.h tools/arch/x86/include/asm/disabled-features.h tools/arch/x86/include/asm/required-features.h diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 621578aa12d6..fc74db62fef4 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -43,6 +43,15 @@ ifneq ($(CC), clang) EXTRA_WARNINGS += -Wstrict-aliasing=3 endif +# Hack to avoid type-punned warnings on old systems such as RHEL5: +# We should be changing CFLAGS and checking gcc version, but this +# will do for now and keep the above -Wstrict-aliasing=3 in place +# in newer systems. +# Needed for the __raw_cmpxchg in tools/arch/x86/include/asm/cmpxchg.h +ifneq ($(filter 3.%,$(MAKE_VERSION)),) # make-3 +EXTRA_WARNINGS += -fno-strict-aliasing +endif + ifneq ($(findstring $(MAKEFLAGS), w),w) PRINT_DIR = --no-print-directory else -- cgit v1.2.3 From 8a73615df3b8973df2de1455c00e9169522d8257 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 22 Feb 2017 16:55:43 -0300 Subject: tools arch x86: Introduce atomic_cmpxchg() Will be used by atomic_cmpxchg_relaxed(), in turn used by refcount.h. Cc: Adrian Hunter Cc: David Ahern Cc: Elena Reshetova Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-kdmovd3l4gw5b1w31ypr6ddv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/atomic.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/arch/x86/include/asm/atomic.h b/tools/arch/x86/include/asm/atomic.h index 059e33e94260..328eeceec709 100644 --- a/tools/arch/x86/include/asm/atomic.h +++ b/tools/arch/x86/include/asm/atomic.h @@ -7,6 +7,8 @@ #define LOCK_PREFIX "\n\tlock; " +#include + /* * Atomic operations that C can't guarantee us. Useful for * resource counting etc.. @@ -62,4 +64,9 @@ static inline int atomic_dec_and_test(atomic_t *v) GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e"); } +static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new) +{ + return cmpxchg(&v->counter, old, new); +} + #endif /* _TOOLS_LINUX_ASM_X86_ATOMIC_H */ -- cgit v1.2.3 From 2bcdeadbc094b4f6511aedea1e5b8052bf0cc89c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 22 Feb 2017 16:57:53 -0300 Subject: tools include: Introduce atomic_cmpxchg_{relaxed,release}() Will be used by refcnt.h Cc: Adrian Hunter Cc: David Ahern Cc: Elena Reshetova Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-jszriruqfqpez1bkivwfj6qb@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/atomic.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/include/linux/atomic.h b/tools/include/linux/atomic.h index 4e3d3d18ebab..9f21fc2b092b 100644 --- a/tools/include/linux/atomic.h +++ b/tools/include/linux/atomic.h @@ -3,4 +3,10 @@ #include +/* atomic_cmpxchg_relaxed */ +#ifndef atomic_cmpxchg_relaxed +#define atomic_cmpxchg_relaxed atomic_cmpxchg +#define atomic_cmpxchg_release atomic_cmpxchg +#endif /* atomic_cmpxchg_relaxed */ + #endif /* __TOOLS_LINUX_ATOMIC_H */ -- cgit v1.2.3 From ed4aad50ea0384737034b39f952f29cfb2da52ac Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 23 Feb 2017 15:33:02 -0300 Subject: tools include: Provide gcc based cmpxchg fallback for !x86 We've been using an atomic_t implementation subset based on the gcc builtin functions for a while, now, with refcount.h we need cmpxchg(), use gcc's __sync_val_compare_and_swap() for that. Cc: Adrian Hunter Cc: David Ahern Cc: Elena Reshetova Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-b9zovyxgpa0c4vi3nm0kjo97@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/asm-generic/atomic-gcc.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/include/asm-generic/atomic-gcc.h b/tools/include/asm-generic/atomic-gcc.h index 2ba78c9f5701..5e9738f97bf3 100644 --- a/tools/include/asm-generic/atomic-gcc.h +++ b/tools/include/asm-generic/atomic-gcc.h @@ -60,4 +60,12 @@ static inline int atomic_dec_and_test(atomic_t *v) return __sync_sub_and_fetch(&v->counter, 1) == 0; } +#define cmpxchg(ptr, oldval, newval) \ + __sync_val_compare_and_swap(ptr, oldval, newval) + +static inline int atomic_cmpxchg(atomic_t *v, int oldval, int newval) +{ + return cmpxchg(&(v)->counter, oldval, newval); +} + #endif /* __TOOLS_ASM_GENERIC_ATOMIC_H */ -- cgit v1.2.3 From eaa75b5117d52adf1efd3c6c3fb4bd8f97de648b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 22 Feb 2017 17:42:40 -0300 Subject: tools include: Add UINT_MAX def to kernel.h The kernel has it and some files we got from there would require us including the userland header for that, so add it conditionally. Cc: Adrian Hunter Cc: David Ahern Cc: Elena Reshetova Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-gmwyal7c9vzzttlyk6u59rzn@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/kernel.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h index 28607db02bd3..adb4d0147755 100644 --- a/tools/include/linux/kernel.h +++ b/tools/include/linux/kernel.h @@ -5,6 +5,10 @@ #include #include +#ifndef UINT_MAX +#define UINT_MAX (~0U) +#endif + #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) #define PERF_ALIGN(x, a) __PERF_ALIGN_MASK(x, (typeof(x))(a)-1) -- cgit v1.2.3 From 73a9bf95ed1c05698ecabe2f28c47aedfa61b52b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 22 Feb 2017 17:00:53 -0300 Subject: tools include: Adopt kernel's refcount.h To aid in catching bugs when using atomics as a reference count. This is a trimmed down version with just what is used by tools/ at this point. After this, the patches submitted by Elena for tools/ doing the conversion from atomic_ to recount_ methods can be applied and tested. To activate it, buint perf with: make DEBUG=1 -C tools/perf Cc: Adrian Hunter Cc: David Ahern Cc: Elena Reshetova Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-dqtxsumns9ov0l9r5x398f19@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/refcount.h | 151 +++++++++++++++++++++++++++++++++++++++++ tools/perf/MANIFEST | 1 + 2 files changed, 152 insertions(+) create mode 100644 tools/include/linux/refcount.h diff --git a/tools/include/linux/refcount.h b/tools/include/linux/refcount.h new file mode 100644 index 000000000000..a0177c1f55b1 --- /dev/null +++ b/tools/include/linux/refcount.h @@ -0,0 +1,151 @@ +#ifndef _TOOLS_LINUX_REFCOUNT_H +#define _TOOLS_LINUX_REFCOUNT_H + +/* + * Variant of atomic_t specialized for reference counts. + * + * The interface matches the atomic_t interface (to aid in porting) but only + * provides the few functions one should use for reference counting. + * + * It differs in that the counter saturates at UINT_MAX and will not move once + * there. This avoids wrapping the counter and causing 'spurious' + * use-after-free issues. + * + * Memory ordering rules are slightly relaxed wrt regular atomic_t functions + * and provide only what is strictly required for refcounts. + * + * The increments are fully relaxed; these will not provide ordering. The + * rationale is that whatever is used to obtain the object we're increasing the + * reference count on will provide the ordering. For locked data structures, + * its the lock acquire, for RCU/lockless data structures its the dependent + * load. + * + * Do note that inc_not_zero() provides a control dependency which will order + * future stores against the inc, this ensures we'll never modify the object + * if we did not in fact acquire a reference. + * + * The decrements will provide release order, such that all the prior loads and + * stores will be issued before, it also provides a control dependency, which + * will order us against the subsequent free(). + * + * The control dependency is against the load of the cmpxchg (ll/sc) that + * succeeded. This means the stores aren't fully ordered, but this is fine + * because the 1->0 transition indicates no concurrency. + * + * Note that the allocator is responsible for ordering things between free() + * and alloc(). + * + */ + +#include +#include + +#ifdef NDEBUG +#define REFCOUNT_WARN(cond, str) (void)(cond) +#define __refcount_check +#else +#define REFCOUNT_WARN(cond, str) BUG_ON(cond) +#define __refcount_check __must_check +#endif + +typedef struct refcount_struct { + atomic_t refs; +} refcount_t; + +#define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), } + +static inline void refcount_set(refcount_t *r, unsigned int n) +{ + atomic_set(&r->refs, n); +} + +static inline unsigned int refcount_read(const refcount_t *r) +{ + return atomic_read(&r->refs); +} + +/* + * Similar to atomic_inc_not_zero(), will saturate at UINT_MAX and WARN. + * + * Provides no memory ordering, it is assumed the caller has guaranteed the + * object memory to be stable (RCU, etc.). It does provide a control dependency + * and thereby orders future stores. See the comment on top. + */ +static inline __refcount_check +bool refcount_inc_not_zero(refcount_t *r) +{ + unsigned int old, new, val = atomic_read(&r->refs); + + for (;;) { + new = val + 1; + + if (!val) + return false; + + if (unlikely(!new)) + return true; + + old = atomic_cmpxchg_relaxed(&r->refs, val, new); + if (old == val) + break; + + val = old; + } + + REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); + + return true; +} + +/* + * Similar to atomic_inc(), will saturate at UINT_MAX and WARN. + * + * Provides no memory ordering, it is assumed the caller already has a + * reference on the object, will WARN when this is not so. + */ +static inline void refcount_inc(refcount_t *r) +{ + REFCOUNT_WARN(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n"); +} + +/* + * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to + * decrement when saturated at UINT_MAX. + * + * Provides release memory ordering, such that prior loads and stores are done + * before, and provides a control dependency such that free() must come after. + * See the comment on top. + */ +static inline __refcount_check +bool refcount_sub_and_test(unsigned int i, refcount_t *r) +{ + unsigned int old, new, val = atomic_read(&r->refs); + + for (;;) { + if (unlikely(val == UINT_MAX)) + return false; + + new = val - i; + if (new > val) { + REFCOUNT_WARN(new > val, "refcount_t: underflow; use-after-free.\n"); + return false; + } + + old = atomic_cmpxchg_release(&r->refs, val, new); + if (old == val) + break; + + val = old; + } + + return !new; +} + +static inline __refcount_check +bool refcount_dec_and_test(refcount_t *r) +{ + return refcount_sub_and_test(1, r); +} + + +#endif /* _ATOMIC_LINUX_REFCOUNT_H */ diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index e2c52190cf28..28648c09dcd6 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -79,6 +79,7 @@ tools/include/uapi/linux/perf_event.h tools/include/linux/poison.h tools/include/linux/rbtree.h tools/include/linux/rbtree_augmented.h +tools/include/linux/refcount.h tools/include/linux/string.h tools/include/linux/stringify.h tools/include/linux/types.h -- cgit v1.2.3 From 79c5fe6db8c70558d3a64959f55596d137ccc6e6 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Tue, 21 Feb 2017 17:34:55 +0200 Subject: perf cgroup: Convert cgroup_sel.refcnt from atomic_t to refcount_t The refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: David Windsor Signed-off-by: Hans Liljestrand Signed-off-by: Kees Kook Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: alsa-devel@alsa-project.org Cc: Andrew Morton Cc: David Windsor Cc: Greg Kroah-Hartman Cc: Hans Liljestrand Cc: Jiri Olsa Cc: Kees Kook Cc: Mark Rutland Cc: Matija Glavinic Pecotic Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1487691303-31858-2-git-send-email-elena.reshetova@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cgroup.c | 6 +++--- tools/perf/util/cgroup.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index eafbf11442b2..86399eda3684 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -127,19 +127,19 @@ static int add_cgroup(struct perf_evlist *evlist, char *str) goto found; n++; } - if (atomic_read(&cgrp->refcnt) == 0) + if (refcount_read(&cgrp->refcnt) == 0) free(cgrp); return -1; found: - atomic_inc(&cgrp->refcnt); + refcount_inc(&cgrp->refcnt); counter->cgrp = cgrp; return 0; } void close_cgroup(struct cgroup_sel *cgrp) { - if (cgrp && atomic_dec_and_test(&cgrp->refcnt)) { + if (cgrp && refcount_dec_and_test(&cgrp->refcnt)) { close(cgrp->fd); zfree(&cgrp->name); free(cgrp); diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h index 31f8dcdbd7ef..d91966b97cbd 100644 --- a/tools/perf/util/cgroup.h +++ b/tools/perf/util/cgroup.h @@ -1,14 +1,14 @@ #ifndef __CGROUP_H__ #define __CGROUP_H__ -#include +#include struct option; struct cgroup_sel { char *name; int fd; - atomic_t refcnt; + refcount_t refcnt; }; -- cgit v1.2.3 From ec09a42a6dbd2afde9b8fd4bb8f98bbd94ca904c Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Tue, 21 Feb 2017 17:34:56 +0200 Subject: perf cpumap: Convert cpu_map.refcnt from atomic_t to refcount_t The refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: David Windsor Signed-off-by: Hans Liljestrand Signed-off-by: Kees Kook Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andrew Morton Cc: David Windsor Cc: Greg Kroah-Hartman Cc: Hans Liljestrand Cc: Jiri Olsa Cc: Kees Kook Cc: Mark Rutland Cc: Matija Glavinic Pecotic Cc: Peter Zijlstra Cc: alsa-devel@alsa-project.org Link: http://lkml.kernel.org/r/1487691303-31858-3-git-send-email-elena.reshetova@intel.com [ fixed mixed conversion to refcount in tests/cpumap.c ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/cpumap.c | 2 +- tools/perf/util/cpumap.c | 16 ++++++++-------- tools/perf/util/cpumap.h | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c index f168a85992d0..4478773cdb97 100644 --- a/tools/perf/tests/cpumap.c +++ b/tools/perf/tests/cpumap.c @@ -66,7 +66,7 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused, TEST_ASSERT_VAL("wrong nr", map->nr == 2); TEST_ASSERT_VAL("wrong cpu", map->map[0] == 1); TEST_ASSERT_VAL("wrong cpu", map->map[1] == 256); - TEST_ASSERT_VAL("wrong refcnt", atomic_read(&map->refcnt) == 1); + TEST_ASSERT_VAL("wrong refcnt", refcount_read(&map->refcnt) == 1); cpu_map__put(map); return 0; } diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 8c7504939113..39ad2caccf56 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -29,7 +29,7 @@ static struct cpu_map *cpu_map__default_new(void) cpus->map[i] = i; cpus->nr = nr_cpus; - atomic_set(&cpus->refcnt, 1); + refcount_set(&cpus->refcnt, 1); } return cpus; @@ -43,7 +43,7 @@ static struct cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus) if (cpus != NULL) { cpus->nr = nr_cpus; memcpy(cpus->map, tmp_cpus, payload_size); - atomic_set(&cpus->refcnt, 1); + refcount_set(&cpus->refcnt, 1); } return cpus; @@ -252,7 +252,7 @@ struct cpu_map *cpu_map__dummy_new(void) if (cpus != NULL) { cpus->nr = 1; cpus->map[0] = -1; - atomic_set(&cpus->refcnt, 1); + refcount_set(&cpus->refcnt, 1); } return cpus; @@ -269,7 +269,7 @@ struct cpu_map *cpu_map__empty_new(int nr) for (i = 0; i < nr; i++) cpus->map[i] = -1; - atomic_set(&cpus->refcnt, 1); + refcount_set(&cpus->refcnt, 1); } return cpus; @@ -278,7 +278,7 @@ struct cpu_map *cpu_map__empty_new(int nr) static void cpu_map__delete(struct cpu_map *map) { if (map) { - WARN_ONCE(atomic_read(&map->refcnt) != 0, + WARN_ONCE(refcount_read(&map->refcnt) != 0, "cpu_map refcnt unbalanced\n"); free(map); } @@ -287,13 +287,13 @@ static void cpu_map__delete(struct cpu_map *map) struct cpu_map *cpu_map__get(struct cpu_map *map) { if (map) - atomic_inc(&map->refcnt); + refcount_inc(&map->refcnt); return map; } void cpu_map__put(struct cpu_map *map) { - if (map && atomic_dec_and_test(&map->refcnt)) + if (map && refcount_dec_and_test(&map->refcnt)) cpu_map__delete(map); } @@ -357,7 +357,7 @@ int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res, /* ensure we process id in increasing order */ qsort(c->map, c->nr, sizeof(int), cmp_ids); - atomic_set(&c->refcnt, 1); + refcount_set(&c->refcnt, 1); *res = c; return 0; } diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 1a0549af8f5c..e84491636c1b 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -3,13 +3,13 @@ #include #include -#include +#include #include "perf.h" #include "util/debug.h" struct cpu_map { - atomic_t refcnt; + refcount_t refcnt; int nr; int map[]; }; -- cgit v1.2.3 From 6df74bc08bc2201f65fb0e81cd5feb787575f7ce Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Tue, 21 Feb 2017 17:34:57 +0200 Subject: perf comm: Convert comm_str.refcnt from atomic_t to refcount_t The refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: David Windsor Signed-off-by: Hans Liljestrand Signed-off-by: Kees Kook Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andrew Morton Cc: David Windsor Cc: Greg Kroah-Hartman Cc: Hans Liljestrand Cc: Jiri Olsa Cc: Kees Kook Cc: Mark Rutland Cc: Matija Glavinic Pecotic Cc: Peter Zijlstra Cc: alsa-devel@alsa-project.org Link: http://lkml.kernel.org/r/1487691303-31858-4-git-send-email-elena.reshetova@intel.com [ Reinstated comm_str__get() function, needed when reusing entries in the rbtree ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/comm.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c index 21b7ff382c3f..32837b6f7879 100644 --- a/tools/perf/util/comm.c +++ b/tools/perf/util/comm.c @@ -2,12 +2,12 @@ #include "util.h" #include #include -#include +#include struct comm_str { char *str; struct rb_node rb_node; - atomic_t refcnt; + refcount_t refcnt; }; /* Should perhaps be moved to struct machine */ @@ -16,13 +16,13 @@ static struct rb_root comm_str_root; static struct comm_str *comm_str__get(struct comm_str *cs) { if (cs) - atomic_inc(&cs->refcnt); + refcount_inc(&cs->refcnt); return cs; } static void comm_str__put(struct comm_str *cs) { - if (cs && atomic_dec_and_test(&cs->refcnt)) { + if (cs && refcount_dec_and_test(&cs->refcnt)) { rb_erase(&cs->rb_node, &comm_str_root); zfree(&cs->str); free(cs); @@ -43,7 +43,7 @@ static struct comm_str *comm_str__alloc(const char *str) return NULL; } - atomic_set(&cs->refcnt, 0); + refcount_set(&cs->refcnt, 1); return cs; } @@ -61,7 +61,7 @@ static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root) cmp = strcmp(str, iter->str); if (!cmp) - return iter; + return comm_str__get(iter); if (cmp < 0) p = &(*p)->rb_left; @@ -95,8 +95,6 @@ struct comm *comm__new(const char *str, u64 timestamp, bool exec) return NULL; } - comm_str__get(comm->comm_str); - return comm; } @@ -108,7 +106,6 @@ int comm__override(struct comm *comm, const char *str, u64 timestamp, bool exec) if (!new) return -ENOMEM; - comm_str__get(new); comm_str__put(old); comm->comm_str = new; comm->start = timestamp; -- cgit v1.2.3 From 7100810a75b9854f1b05550b54500497c5914d4b Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Tue, 21 Feb 2017 17:34:58 +0200 Subject: perf dso: Convert dso.refcnt from atomic_t to refcount_t The refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: David Windsor Signed-off-by: Hans Liljestrand Signed-off-by: Kees Kook Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andrew Morton Cc: David Windsor Cc: Greg Kroah-Hartman Cc: Hans Liljestrand Cc: Jiri Olsa Cc: Kees Kook Cc: Mark Rutland Cc: Matija Glavinic Pecotic Cc: Peter Zijlstra Cc: alsa-devel@alsa-project.org Link: http://lkml.kernel.org/r/1487691303-31858-5-git-send-email-elena.reshetova@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.c | 6 +++--- tools/perf/util/dso.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index d38b62a700ca..42db00d78573 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1109,7 +1109,7 @@ struct dso *dso__new(const char *name) INIT_LIST_HEAD(&dso->node); INIT_LIST_HEAD(&dso->data.open_entry); pthread_mutex_init(&dso->lock, NULL); - atomic_set(&dso->refcnt, 1); + refcount_set(&dso->refcnt, 1); } return dso; @@ -1147,13 +1147,13 @@ void dso__delete(struct dso *dso) struct dso *dso__get(struct dso *dso) { if (dso) - atomic_inc(&dso->refcnt); + refcount_inc(&dso->refcnt); return dso; } void dso__put(struct dso *dso) { - if (dso && atomic_dec_and_test(&dso->refcnt)) + if (dso && refcount_dec_and_test(&dso->refcnt)) dso__delete(dso); } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index ecc4bbd3f82e..12350b171727 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -1,7 +1,7 @@ #ifndef __PERF_DSO #define __PERF_DSO -#include +#include #include #include #include @@ -187,7 +187,7 @@ struct dso { void *priv; u64 db_id; }; - atomic_t refcnt; + refcount_t refcnt; char name[0]; }; -- cgit v1.2.3 From e3a42cdd3e35d6c2181d5acfa191eb448aea6ace Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Tue, 21 Feb 2017 17:34:59 +0200 Subject: perf map: Convert map.refcnt from atomic_t to refcount_t The refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: David Windsor Signed-off-by: Hans Liljestrand Signed-off-by: Kees Kook Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andrew Morton Cc: David Windsor Cc: Greg Kroah-Hartman Cc: Hans Liljestrand Cc: Jiri Olsa Cc: Kees Kook Cc: Mark Rutland Cc: Matija Glavinic Pecotic Cc: Peter Zijlstra Cc: alsa-devel@alsa-project.org Link: http://lkml.kernel.org/r/1487691303-31858-6-git-send-email-elena.reshetova@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c | 6 +++--- tools/perf/util/map.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 0a943e7b1ea7..f0e2428efd0b 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -141,7 +141,7 @@ void map__init(struct map *map, enum map_type type, RB_CLEAR_NODE(&map->rb_node); map->groups = NULL; map->erange_warned = false; - atomic_set(&map->refcnt, 1); + refcount_set(&map->refcnt, 1); } struct map *map__new(struct machine *machine, u64 start, u64 len, @@ -255,7 +255,7 @@ void map__delete(struct map *map) void map__put(struct map *map) { - if (map && atomic_dec_and_test(&map->refcnt)) + if (map && refcount_dec_and_test(&map->refcnt)) map__delete(map); } @@ -354,7 +354,7 @@ struct map *map__clone(struct map *from) struct map *map = memdup(from, sizeof(*map)); if (map != NULL) { - atomic_set(&map->refcnt, 1); + refcount_set(&map->refcnt, 1); RB_CLEAR_NODE(&map->rb_node); dso__get(map->dso); map->groups = NULL; diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index abdacf800c98..9545ff343ec5 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -1,7 +1,7 @@ #ifndef __PERF_MAP_H #define __PERF_MAP_H -#include +#include #include #include #include @@ -51,7 +51,7 @@ struct map { struct dso *dso; struct map_groups *groups; - atomic_t refcnt; + refcount_t refcnt; }; struct kmap { @@ -150,7 +150,7 @@ struct map *map__clone(struct map *map); static inline struct map *map__get(struct map *map) { if (map) - atomic_inc(&map->refcnt); + refcount_inc(&map->refcnt); return map; } -- cgit v1.2.3 From ead05e8f3fffe2860f0f8d1c23d74c526e9f2a3c Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Tue, 21 Feb 2017 17:35:00 +0200 Subject: perf map: Convert map_groups.refcnt from atomic_t to refcount_t The refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: David Windsor Signed-off-by: Hans Liljestrand Signed-off-by: Kees Kook Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andrew Morton Cc: David Windsor Cc: Greg Kroah-Hartman Cc: Hans Liljestrand Cc: Jiri Olsa Cc: Kees Kook Cc: Mark Rutland Cc: Matija Glavinic Pecotic Cc: Peter Zijlstra Cc: alsa-devel@alsa-project.org Link: http://lkml.kernel.org/r/1487691303-31858-7-git-send-email-elena.reshetova@intel.com [ Did the missing conversion of tests/thread-mg-share.c too ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/thread-mg-share.c | 12 ++++++------ tools/perf/util/map.c | 4 ++-- tools/perf/util/map.h | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/perf/tests/thread-mg-share.c b/tools/perf/tests/thread-mg-share.c index 188b63140fc8..76686dd6f5ec 100644 --- a/tools/perf/tests/thread-mg-share.c +++ b/tools/perf/tests/thread-mg-share.c @@ -43,7 +43,7 @@ int test__thread_mg_share(int subtest __maybe_unused) leader && t1 && t2 && t3 && other); mg = leader->mg; - TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 4); + TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&mg->refcnt), 4); /* test the map groups pointer is shared */ TEST_ASSERT_VAL("map groups don't match", mg == t1->mg); @@ -71,25 +71,25 @@ int test__thread_mg_share(int subtest __maybe_unused) machine__remove_thread(machine, other_leader); other_mg = other->mg; - TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&other_mg->refcnt), 2); + TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&other_mg->refcnt), 2); TEST_ASSERT_VAL("map groups don't match", other_mg == other_leader->mg); /* release thread group */ thread__put(leader); - TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 3); + TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&mg->refcnt), 3); thread__put(t1); - TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 2); + TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&mg->refcnt), 2); thread__put(t2); - TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 1); + TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&mg->refcnt), 1); thread__put(t3); /* release other group */ thread__put(other_leader); - TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&other_mg->refcnt), 1); + TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&other_mg->refcnt), 1); thread__put(other); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index f0e2428efd0b..1d9ebcf9e38e 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -485,7 +485,7 @@ void map_groups__init(struct map_groups *mg, struct machine *machine) maps__init(&mg->maps[i]); } mg->machine = machine; - atomic_set(&mg->refcnt, 1); + refcount_set(&mg->refcnt, 1); } static void __maps__purge(struct maps *maps) @@ -547,7 +547,7 @@ void map_groups__delete(struct map_groups *mg) void map_groups__put(struct map_groups *mg) { - if (mg && atomic_dec_and_test(&mg->refcnt)) + if (mg && refcount_dec_and_test(&mg->refcnt)) map_groups__delete(mg); } diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 9545ff343ec5..c8a5a644c0a9 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -67,7 +67,7 @@ struct maps { struct map_groups { struct maps maps[MAP__NR_TYPES]; struct machine *machine; - atomic_t refcnt; + refcount_t refcnt; }; struct map_groups *map_groups__new(struct machine *machine); @@ -77,7 +77,7 @@ bool map_groups__empty(struct map_groups *mg); static inline struct map_groups *map_groups__get(struct map_groups *mg) { if (mg) - atomic_inc(&mg->refcnt); + refcount_inc(&mg->refcnt); return mg; } -- cgit v1.2.3 From 25a3720cf45779900246ec17e238fbb674ce4e67 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Tue, 21 Feb 2017 17:35:01 +0200 Subject: perf evlist: Convert perf_map.refcnt from atomic_t to refcount_t The refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: David Windsor Signed-off-by: Hans Liljestrand Signed-off-by: Kees Kook Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andrew Morton Cc: David Windsor Cc: Greg Kroah-Hartman Cc: Hans Liljestrand Cc: Jiri Olsa Cc: Kees Kook Cc: Mark Rutland Cc: Matija Glavinic Pecotic Cc: Peter Zijlstra Cc: alsa-devel@alsa-project.org Link: http://lkml.kernel.org/r/1487691303-31858-8-git-send-email-elena.reshetova@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 18 +++++++++--------- tools/perf/util/evlist.h | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index b601f2814a30..564b924fb48a 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -777,7 +777,7 @@ union perf_event *perf_mmap__read_forward(struct perf_mmap *md, bool check_messu /* * Check if event was unmapped due to a POLLHUP/POLLERR. */ - if (!atomic_read(&md->refcnt)) + if (!refcount_read(&md->refcnt)) return NULL; head = perf_mmap__read_head(md); @@ -794,7 +794,7 @@ perf_mmap__read_backward(struct perf_mmap *md) /* * Check if event was unmapped due to a POLLHUP/POLLERR. */ - if (!atomic_read(&md->refcnt)) + if (!refcount_read(&md->refcnt)) return NULL; head = perf_mmap__read_head(md); @@ -856,7 +856,7 @@ void perf_mmap__read_catchup(struct perf_mmap *md) { u64 head; - if (!atomic_read(&md->refcnt)) + if (!refcount_read(&md->refcnt)) return; head = perf_mmap__read_head(md); @@ -875,14 +875,14 @@ static bool perf_mmap__empty(struct perf_mmap *md) static void perf_mmap__get(struct perf_mmap *map) { - atomic_inc(&map->refcnt); + refcount_inc(&map->refcnt); } static void perf_mmap__put(struct perf_mmap *md) { - BUG_ON(md->base && atomic_read(&md->refcnt) == 0); + BUG_ON(md->base && refcount_read(&md->refcnt) == 0); - if (atomic_dec_and_test(&md->refcnt)) + if (refcount_dec_and_test(&md->refcnt)) perf_mmap__munmap(md); } @@ -894,7 +894,7 @@ void perf_mmap__consume(struct perf_mmap *md, bool overwrite) perf_mmap__write_tail(md, old); } - if (atomic_read(&md->refcnt) == 1 && perf_mmap__empty(md)) + if (refcount_read(&md->refcnt) == 1 && perf_mmap__empty(md)) perf_mmap__put(md); } @@ -937,7 +937,7 @@ static void perf_mmap__munmap(struct perf_mmap *map) munmap(map->base, perf_mmap__mmap_len(map)); map->base = NULL; map->fd = -1; - atomic_set(&map->refcnt, 0); + refcount_set(&map->refcnt, 0); } auxtrace_mmap__munmap(&map->auxtrace_mmap); } @@ -1001,7 +1001,7 @@ static int perf_mmap__mmap(struct perf_mmap *map, * evlist layer can't just drop it when filtering events in * perf_evlist__filter_pollfd(). */ - atomic_set(&map->refcnt, 2); + refcount_set(&map->refcnt, 2); map->prev = 0; map->mask = mp->mask; map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 389b9ccdf8c7..39942995f537 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -1,7 +1,7 @@ #ifndef __PERF_EVLIST_H #define __PERF_EVLIST_H 1 -#include +#include #include #include #include @@ -29,7 +29,7 @@ struct perf_mmap { void *base; int mask; int fd; - atomic_t refcnt; + refcount_t refcnt; u64 prev; struct auxtrace_mmap auxtrace_mmap; char event_copy[PERF_SAMPLE_MAX_SIZE] __attribute__((aligned(8))); -- cgit v1.2.3 From e34f5b11cd51fbe723e481c1db03a77260be6f4c Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Tue, 21 Feb 2017 17:35:02 +0200 Subject: perf thread: convert thread.refcnt from atomic_t to refcount_t The refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: David Windsor Signed-off-by: Hans Liljestrand Signed-off-by: Kees Kook Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andrew Morton Cc: David Windsor Cc: Greg Kroah-Hartman Cc: Hans Liljestrand Cc: Jiri Olsa Cc: Kees Kook Cc: Mark Rutland Cc: Matija Glavinic Pecotic Cc: Peter Zijlstra Cc: alsa-devel@alsa-project.org Link: http://lkml.kernel.org/r/1487691303-31858-9-git-send-email-elena.reshetova@intel.com [ Did missing conversion in __machine__remove_thread() ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 2 +- tools/perf/util/thread.c | 6 +++--- tools/perf/util/thread.h | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 71c9720d4973..b9974fe41bc1 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1439,7 +1439,7 @@ static void __machine__remove_thread(struct machine *machine, struct thread *th, if (machine->last_match == th) machine->last_match = NULL; - BUG_ON(atomic_read(&th->refcnt) == 0); + BUG_ON(refcount_read(&th->refcnt) == 0); if (lock) pthread_rwlock_wrlock(&machine->threads_lock); rb_erase_init(&th->rb_node, &machine->threads); diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index f5af87f66663..74e79d26b421 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -53,7 +53,7 @@ struct thread *thread__new(pid_t pid, pid_t tid) goto err_thread; list_add(&comm->list, &thread->comm_list); - atomic_set(&thread->refcnt, 1); + refcount_set(&thread->refcnt, 1); RB_CLEAR_NODE(&thread->rb_node); } @@ -88,13 +88,13 @@ void thread__delete(struct thread *thread) struct thread *thread__get(struct thread *thread) { if (thread) - atomic_inc(&thread->refcnt); + refcount_inc(&thread->refcnt); return thread; } void thread__put(struct thread *thread) { - if (thread && atomic_dec_and_test(&thread->refcnt)) { + if (thread && refcount_dec_and_test(&thread->refcnt)) { /* * Remove it from the dead_threads list, as last reference * is gone. diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 99263cb6e6b6..e57188546465 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -1,7 +1,7 @@ #ifndef __PERF_THREAD_H #define __PERF_THREAD_H -#include +#include #include #include #include @@ -23,7 +23,7 @@ struct thread { pid_t tid; pid_t ppid; int cpu; - atomic_t refcnt; + refcount_t refcnt; char shortname[3]; bool comm_set; int comm_len; -- cgit v1.2.3 From 364fed351369e0193244fa2c78df855724cdddb9 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Tue, 21 Feb 2017 17:35:03 +0200 Subject: perf thread_map: Convert thread_map.refcnt from atomic_t to refcount_t The refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: David Windsor Signed-off-by: Hans Liljestrand Signed-off-by: Kees Kook Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andrew Morton Cc: David Windsor Cc: Greg Kroah-Hartman Cc: Hans Liljestrand Cc: Jiri Olsa Cc: Kees Kook Cc: Mark Rutland Cc: Matija Glavinic Pecotic Cc: Peter Zijlstra Cc: alsa-devel@alsa-project.org Link: http://lkml.kernel.org/r/1487691303-31858-10-git-send-email-elena.reshetova@intel.com [ Did missing tests/thread-map.c conversion ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/thread-map.c | 6 +++--- tools/perf/util/thread_map.c | 20 ++++++++++---------- tools/perf/util/thread_map.h | 4 ++-- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index f2d2e542d0ee..a63d6945807b 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -29,7 +29,7 @@ int test__thread_map(int subtest __maybe_unused) thread_map__comm(map, 0) && !strcmp(thread_map__comm(map, 0), NAME)); TEST_ASSERT_VAL("wrong refcnt", - atomic_read(&map->refcnt) == 1); + refcount_read(&map->refcnt) == 1); thread_map__put(map); /* test dummy pid */ @@ -44,7 +44,7 @@ int test__thread_map(int subtest __maybe_unused) thread_map__comm(map, 0) && !strcmp(thread_map__comm(map, 0), "dummy")); TEST_ASSERT_VAL("wrong refcnt", - atomic_read(&map->refcnt) == 1); + refcount_read(&map->refcnt) == 1); thread_map__put(map); return 0; } @@ -71,7 +71,7 @@ static int process_event(struct perf_tool *tool __maybe_unused, thread_map__comm(threads, 0) && !strcmp(thread_map__comm(threads, 0), NAME)); TEST_ASSERT_VAL("wrong refcnt", - atomic_read(&threads->refcnt) == 1); + refcount_read(&threads->refcnt) == 1); thread_map__put(threads); return 0; } diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 7c3fcc538a70..9026408ea55b 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -66,7 +66,7 @@ struct thread_map *thread_map__new_by_pid(pid_t pid) for (i = 0; i < items; i++) thread_map__set_pid(threads, i, atoi(namelist[i]->d_name)); threads->nr = items; - atomic_set(&threads->refcnt, 1); + refcount_set(&threads->refcnt, 1); } for (i=0; inr = 1; - atomic_set(&threads->refcnt, 1); + refcount_set(&threads->refcnt, 1); } return threads; @@ -105,7 +105,7 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) goto out_free_threads; threads->nr = 0; - atomic_set(&threads->refcnt, 1); + refcount_set(&threads->refcnt, 1); while ((dirent = readdir(proc)) != NULL) { char *end; @@ -235,7 +235,7 @@ static struct thread_map *thread_map__new_by_pid_str(const char *pid_str) out: strlist__delete(slist); if (threads) - atomic_set(&threads->refcnt, 1); + refcount_set(&threads->refcnt, 1); return threads; out_free_namelist: @@ -255,7 +255,7 @@ struct thread_map *thread_map__new_dummy(void) if (threads != NULL) { thread_map__set_pid(threads, 0, -1); threads->nr = 1; - atomic_set(&threads->refcnt, 1); + refcount_set(&threads->refcnt, 1); } return threads; } @@ -300,7 +300,7 @@ struct thread_map *thread_map__new_by_tid_str(const char *tid_str) } out: if (threads) - atomic_set(&threads->refcnt, 1); + refcount_set(&threads->refcnt, 1); return threads; out_free_threads: @@ -326,7 +326,7 @@ static void thread_map__delete(struct thread_map *threads) if (threads) { int i; - WARN_ONCE(atomic_read(&threads->refcnt) != 0, + WARN_ONCE(refcount_read(&threads->refcnt) != 0, "thread map refcnt unbalanced\n"); for (i = 0; i < threads->nr; i++) free(thread_map__comm(threads, i)); @@ -337,13 +337,13 @@ static void thread_map__delete(struct thread_map *threads) struct thread_map *thread_map__get(struct thread_map *map) { if (map) - atomic_inc(&map->refcnt); + refcount_inc(&map->refcnt); return map; } void thread_map__put(struct thread_map *map) { - if (map && atomic_dec_and_test(&map->refcnt)) + if (map && refcount_dec_and_test(&map->refcnt)) thread_map__delete(map); } @@ -423,7 +423,7 @@ static void thread_map__copy_event(struct thread_map *threads, threads->map[i].comm = strndup(event->entries[i].comm, 16); } - atomic_set(&threads->refcnt, 1); + refcount_set(&threads->refcnt, 1); } struct thread_map *thread_map__new_event(struct thread_map_event *event) diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index ea0ef08c6303..bd34d7a0b9fa 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -3,7 +3,7 @@ #include #include -#include +#include struct thread_map_data { pid_t pid; @@ -11,7 +11,7 @@ struct thread_map_data { }; struct thread_map { - atomic_t refcnt; + refcount_t refcnt; int nr; struct thread_map_data map[]; }; -- cgit v1.2.3 From 4738ca30b4a7a113084d7863846175094f95c62f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 23 Feb 2017 13:24:34 -0300 Subject: perf evlist: Clarify a bit the use of perf_mmap->refcnt This is an odd refcount use case, so add some more comments to help understand that when it hits zero it really means that the mmap()ed area (on a perf_event_open() returned fd) has been munmap()ed. Cc: Adrian Hunter Cc: David Ahern Cc: Elena Reshetova Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/20170223162344.GD3595@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 564b924fb48a..50420cd35446 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -974,8 +974,19 @@ static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist) if (!map) return NULL; - for (i = 0; i < evlist->nr_mmaps; i++) + for (i = 0; i < evlist->nr_mmaps; i++) { map[i].fd = -1; + /* + * When the perf_mmap() call is made we grab one refcount, plus + * one extra to let perf_evlist__mmap_consume() get the last + * events after all real references (perf_mmap__get()) are + * dropped. + * + * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and + * thus does perf_mmap__get() on it. + */ + refcount_set(&map[i].refcnt, 0); + } return map; } -- cgit v1.2.3 From 7768f8dada66d6052fccbc2ddc375f3e650455b9 Mon Sep 17 00:00:00 2001 From: Charles Baylis Date: Fri, 24 Feb 2017 13:32:56 +0000 Subject: perf tools: Allow sorting by symbol size Add new sort key 'symbol_size' to allow user to sort by symbol size, or (more usefully) display the symbol size using --fields=...,symbol_size. Committer note: Testing it together with the recently added -q, to remove the headers, and using the '+' sign with -s, to add the symbol_size sort order to the default, which is '-s/--sort comm,dso,symbol': # perf report -q -s +symbol_size | head -10 10.39% swapper [kernel.vmlinux] [k] intel_idle 270 3.45% swapper [kernel.vmlinux] [k] update_blocked_averages 1546 2.61% swapper [kernel.vmlinux] [k] update_load_avg 1292 2.36% swapper [kernel.vmlinux] [k] update_cfs_shares 240 1.83% swapper [kernel.vmlinux] [k] __hrtimer_run_queues 606 1.74% swapper [kernel.vmlinux] [k] update_cfs_rq_load_avg. 1187 1.66% swapper [kernel.vmlinux] [k] apic_timer_interrupt 152 1.60% CPU 0/KVM [kvm] [k] kvm_set_msr_common 3046 1.60% gnome-shell libglib-2.0.so.0 [.] g_slist_find 37 1.46% gnome-termina libglib-2.0.so.0 [.] g_hash_table_lookup 370 # Signed-off-by: Charles Baylis Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Maxim Kuvyrkov Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1487943176-13840-1-git-send-email-charles.baylis@linaro.org [ Use symbol__size(), remove needless %lld + (long long) casting ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 1 + tools/perf/util/hist.h | 1 + tools/perf/util/sort.c | 41 ++++++++++++++++++++++++++++++++ tools/perf/util/sort.h | 1 + 4 files changed, 44 insertions(+) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index c04cc0647c16..33f91906f5dc 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -80,6 +80,7 @@ OPTIONS - pid: command and tid of the task - dso: name of library or module executed at the time of sample - symbol: name of function executed at the time of sample + - symbol_size: size of function executed at the time of sample - parent: name of function matched to the parent regex filter. Unmatched entries are displayed as "[other]". - cpu: cpu number the task ran at the time of sample diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 28c216e3d5b7..2e839bf40bdd 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -57,6 +57,7 @@ enum hist_column { HISTC_SRCLINE_FROM, HISTC_SRCLINE_TO, HISTC_TRACE, + HISTC_SYM_SIZE, HISTC_NR_COLS, /* Last entry */ }; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 0ff622288d24..f8f16c0e20b6 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1396,6 +1396,46 @@ struct sort_entry sort_transaction = { .se_width_idx = HISTC_TRANSACTION, }; +/* --sort symbol_size */ + +static int64_t _sort__sym_size_cmp(struct symbol *sym_l, struct symbol *sym_r) +{ + int64_t size_l = sym_l != NULL ? symbol__size(sym_l) : 0; + int64_t size_r = sym_r != NULL ? symbol__size(sym_r) : 0; + + return size_l < size_r ? -1 : + size_l == size_r ? 0 : 1; +} + +static int64_t +sort__sym_size_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return _sort__sym_size_cmp(right->ms.sym, left->ms.sym); +} + +static int _hist_entry__sym_size_snprintf(struct symbol *sym, char *bf, + size_t bf_size, unsigned int width) +{ + if (sym) + return repsep_snprintf(bf, bf_size, "%*d", width, symbol__size(sym)); + + return repsep_snprintf(bf, bf_size, "%*s", width, "unknown"); +} + +static int hist_entry__sym_size_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + return _hist_entry__sym_size_snprintf(he->ms.sym, bf, size, width); +} + +struct sort_entry sort_sym_size = { + .se_header = "Symbol size", + .se_cmp = sort__sym_size_cmp, + .se_snprintf = hist_entry__sym_size_snprintf, + .se_width_idx = HISTC_SYM_SIZE, +}; + + struct sort_dimension { const char *name; struct sort_entry *entry; @@ -1418,6 +1458,7 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight), DIM(SORT_TRANSACTION, "transaction", sort_transaction), DIM(SORT_TRACE, "trace", sort_trace), + DIM(SORT_SYM_SIZE, "symbol_size", sort_sym_size), }; #undef DIM diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 796c847e2f00..f583325a3743 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -211,6 +211,7 @@ enum sort_type { SORT_GLOBAL_WEIGHT, SORT_TRANSACTION, SORT_TRACE, + SORT_SYM_SIZE, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, -- cgit v1.2.3 From a9af6be5bc25214f7870fef2b6d3490fe8b87bf7 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 24 Feb 2017 10:12:48 +0900 Subject: perf ftrace: Add support for --pid option The -p (--pid) option enables to trace existing process by its pid. Committer notes: Testing it: Using the function_graph tracer on a process that is just waiting for user input and thus will make 'perf ftrace' sit there waiting for that, then press any key on that mutt session and see what happens: # perf ftrace -t function_graph -p `pidof mutt` | head -40 2) 1.038 us | switch_mm_irqs_off(); ------------------------------------------ 2) -0 => mutt-3595 ------------------------------------------ 2) | finish_task_switch() { 2) | smp_irq_work_interrupt() { 2) | irq_enter() { 2) 0.180 us | rcu_irq_enter(); 2) 1.248 us | } 2) | __wake_up() { 2) 0.126 us | _raw_spin_lock_irqsave(); 2) | __wake_up_common() { 2) | pollwake() { 2) | default_wake_function() { 2) | try_to_wake_up() { 2) 0.662 us | _raw_spin_lock_irqsave(); 2) | select_task_rq_fair() { 2) 1.719 us | effective_load.isra.41(); 2) 1.343 us | effective_load.isra.41(); 2) | select_idle_sibling() { 2) 0.331 us | idle_cpu(); 2) 1.458 us | } 2) 8.350 us | } 2) 0.200 us | _raw_spin_lock(); 2) | ttwu_do_activate() { 2) | activate_task() { 2) 0.136 us | update_rq_clock.part.77(); 2) | enqueue_task_fair() { 2) | enqueue_entity() { 2) 0.146 us | update_curr(); 2) 0.330 us | account_entity_enqueue(); 2) 0.280 us | update_cfs_shares(); 2) 0.321 us | place_entity(); 2) 0.206 us | __enqueue_entity(); 2) 6.926 us | } 2) | enqueue_entity() { 2) 0.105 us | update_curr(); 2) 0.175 us | account_entity_enqueue(); 2) 0.531 us | update_cfs_shares(); # Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Steven Rostedt Cc: kernel-team@lge.com Link: http://lkml.kernel.org/r/20170224011251.14946-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-ftrace.txt | 4 ++ tools/perf/builtin-ftrace.c | 91 ++++++++++++++++++++++---------- 2 files changed, 68 insertions(+), 27 deletions(-) diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index 2d96de6132a9..2d39397f3f30 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -30,6 +30,10 @@ OPTIONS --verbose=:: Verbosity level. +-p:: +--pid=:: + Trace on existing process id (comma separated list). + SEE ALSO -------- diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index c3e643666c72..85eee9c444ae 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -11,6 +11,7 @@ #include #include +#include #include "debug.h" #include @@ -50,11 +51,12 @@ static void ftrace__workload_exec_failed_signal(int signo __maybe_unused, done = true; } -static int write_tracing_file(const char *name, const char *val) +static int __write_tracing_file(const char *name, const char *val, bool append) { char *file; int fd, ret = -1; ssize_t size = strlen(val); + int flags = O_WRONLY; file = get_tracing_file(name); if (!file) { @@ -62,7 +64,12 @@ static int write_tracing_file(const char *name, const char *val) return -1; } - fd = open(file, O_WRONLY); + if (append) + flags |= O_APPEND; + else + flags |= O_TRUNC; + + fd = open(file, flags); if (fd < 0) { pr_debug("cannot open tracing file: %s\n", name); goto out; @@ -79,6 +86,16 @@ out: return ret; } +static int write_tracing_file(const char *name, const char *val) +{ + return __write_tracing_file(name, val, false); +} + +static int append_tracing_file(const char *name, const char *val) +{ + return __write_tracing_file(name, val, true); +} + static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused) { if (write_tracing_file("tracing_on", "0") < 0) @@ -93,11 +110,27 @@ static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused) return 0; } +static int set_tracing_pid(struct perf_ftrace *ftrace) +{ + int i; + char buf[16]; + + if (target__has_cpu(&ftrace->target)) + return 0; + + for (i = 0; i < thread_map__nr(ftrace->evlist->threads); i++) { + scnprintf(buf, sizeof(buf), "%d", + ftrace->evlist->threads->map[i]); + if (append_tracing_file("set_ftrace_pid", buf) < 0) + return -1; + } + return 0; +} + static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) { char *trace_file; int trace_fd; - char *trace_pid; char buf[4096]; struct pollfd pollfd = { .events = POLLIN, @@ -108,42 +141,37 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) return -1; } - if (argc < 1) - return -1; - signal(SIGINT, sig_handler); signal(SIGUSR1, sig_handler); signal(SIGCHLD, sig_handler); - reset_tracing_files(ftrace); + if (reset_tracing_files(ftrace) < 0) + goto out; /* reset ftrace buffer */ if (write_tracing_file("trace", "0") < 0) goto out; - if (perf_evlist__prepare_workload(ftrace->evlist, &ftrace->target, - argv, false, ftrace__workload_exec_failed_signal) < 0) - goto out; - - if (write_tracing_file("current_tracer", ftrace->tracer) < 0) { - pr_err("failed to set current_tracer to %s\n", ftrace->tracer); + if (argc && perf_evlist__prepare_workload(ftrace->evlist, + &ftrace->target, argv, false, + ftrace__workload_exec_failed_signal) < 0) { goto out; } - if (asprintf(&trace_pid, "%d", thread_map__pid(ftrace->evlist->threads, 0)) < 0) { - pr_err("failed to allocate pid string\n"); - goto out; + if (set_tracing_pid(ftrace) < 0) { + pr_err("failed to set ftrace pid\n"); + goto out_reset; } - if (write_tracing_file("set_ftrace_pid", trace_pid) < 0) { - pr_err("failed to set pid: %s\n", trace_pid); - goto out_free_pid; + if (write_tracing_file("current_tracer", ftrace->tracer) < 0) { + pr_err("failed to set current_tracer to %s\n", ftrace->tracer); + goto out_reset; } trace_file = get_tracing_file("trace_pipe"); if (!trace_file) { pr_err("failed to open trace_pipe\n"); - goto out_free_pid; + goto out_reset; } trace_fd = open(trace_file, O_RDONLY); @@ -152,7 +180,7 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) if (trace_fd < 0) { pr_err("failed to open trace_pipe\n"); - goto out_free_pid; + goto out_reset; } fcntl(trace_fd, F_SETFL, O_NONBLOCK); @@ -191,11 +219,9 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) out_close_fd: close(trace_fd); -out_free_pid: - free(trace_pid); -out: +out_reset: reset_tracing_files(ftrace); - +out: return done ? 0 : -1; } @@ -227,13 +253,15 @@ int cmd_ftrace(int argc, const char **argv, const char *prefix __maybe_unused) .target = { .uid = UINT_MAX, }, }; const char * const ftrace_usage[] = { - "perf ftrace [] ", + "perf ftrace [] []", "perf ftrace [] -- []", NULL }; const struct option ftrace_options[] = { OPT_STRING('t', "tracer", &ftrace.tracer, "tracer", "tracer to use: function_graph(default) or function"), + OPT_STRING('p', "pid", &ftrace.target.pid, "pid", + "trace on existing process id"), OPT_INCR('v', "verbose", &verbose, "be more verbose"), OPT_END() @@ -245,9 +273,18 @@ int cmd_ftrace(int argc, const char **argv, const char *prefix __maybe_unused) argc = parse_options(argc, argv, ftrace_options, ftrace_usage, PARSE_OPT_STOP_AT_NON_OPTION); - if (!argc) + if (!argc && target__none(&ftrace.target)) usage_with_options(ftrace_usage, ftrace_options); + ret = target__validate(&ftrace.target); + if (ret) { + char errbuf[512]; + + target__strerror(&ftrace.target, ret, errbuf, 512); + pr_err("%s\n", errbuf); + return -EINVAL; + } + ftrace.evlist = perf_evlist__new(); if (ftrace.evlist == NULL) return -ENOMEM; -- cgit v1.2.3 From 4400ac8a9a900318f8516dc0fb94075cb3fdb50d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 24 Feb 2017 10:12:49 +0900 Subject: perf cpumap: Introduce cpu_map__snprint_mask() The cpu_map__snprint_mask() generates a string representation of a cpumask bitmap. For cpu 0 to 11, it'll return "fff". Committer notes: Fix compiler warning on some toolchains: 19 fedora:24-x-ARC-uClibc: FAIL CC /tmp/build/perf/util/cpumap.o util/cpumap.c: In function 'hex_char': util/cpumap.c:679:2: error: comparison is always true due to limited range of data type [-Werror=type-limits] if (0 <= val && val <= 9) ^ cc1: all warnings being treated as errors Applying patch from Namhyung that makes function receive an 'unsigned char', that is what the callers are passing to this function. Signed-off-by: Namhyung Kim Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Steven Rostedt Cc: kernel-team@lge.com Link: http://lkml.kernel.org/r/20170224011251.14946-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cpumap.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/cpumap.h | 1 + 2 files changed, 47 insertions(+) diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 39ad2caccf56..061018b42393 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -673,3 +673,49 @@ size_t cpu_map__snprint(struct cpu_map *map, char *buf, size_t size) pr_debug("cpumask list: %s\n", buf); return ret; } + +static char hex_char(unsigned char val) +{ + if (val < 10) + return val + '0'; + if (val < 16) + return val - 10 + 'a'; + return '?'; +} + +size_t cpu_map__snprint_mask(struct cpu_map *map, char *buf, size_t size) +{ + int i, cpu; + char *ptr = buf; + unsigned char *bitmap; + int last_cpu = cpu_map__cpu(map, map->nr - 1); + + bitmap = zalloc((last_cpu + 7) / 8); + if (bitmap == NULL) { + buf[0] = '\0'; + return 0; + } + + for (i = 0; i < map->nr; i++) { + cpu = cpu_map__cpu(map, i); + bitmap[cpu / 8] |= 1 << (cpu % 8); + } + + for (cpu = last_cpu / 4 * 4; cpu >= 0; cpu -= 4) { + unsigned char bits = bitmap[cpu / 8]; + + if (cpu % 8) + bits >>= 4; + else + bits &= 0xf; + + *ptr++ = hex_char(bits); + if ((cpu % 32) == 0 && cpu > 0) + *ptr++ = ','; + } + *ptr = '\0'; + free(bitmap); + + buf[size - 1] = '\0'; + return ptr - buf; +} diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index e84491636c1b..6b8bff87481d 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -20,6 +20,7 @@ struct cpu_map *cpu_map__dummy_new(void); struct cpu_map *cpu_map__new_data(struct cpu_map_data *data); struct cpu_map *cpu_map__read(FILE *file); size_t cpu_map__snprint(struct cpu_map *map, char *buf, size_t size); +size_t cpu_map__snprint_mask(struct cpu_map *map, char *buf, size_t size); size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); int cpu_map__get_socket_id(int cpu); int cpu_map__get_socket(struct cpu_map *map, int idx, void *data); -- cgit v1.2.3 From dc23103278c5ad53c177a25e209ef687e6d5d293 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 24 Feb 2017 10:12:50 +0900 Subject: perf ftrace: Add support for -a and -C option The -a/--all-cpus and -C/--cpu option is for controlling tracing cpus. Signed-off-by: Namhyung Kim Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Steven Rostedt Cc: kernel-team@lge.com Link: http://lkml.kernel.org/r/20170224011251.14946-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-ftrace.txt | 14 ++++++++ tools/perf/builtin-ftrace.c | 60 ++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index 2d39397f3f30..6e6a8b22c859 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -34,6 +34,20 @@ OPTIONS --pid=:: Trace on existing process id (comma separated list). +-a:: +--all-cpus:: + Force system-wide collection. Scripts run without a + normally use -a by default, while scripts run with a + normally don't - this option allows the latter to be run in + system-wide mode. + +-C:: +--cpu=:: + Only trace for the list of CPUs provided. Multiple CPUs can + be provided as a comma separated list with no space like: 0,1. + Ranges of CPUs are specified with -: 0-2. + Default is to trace on all online CPUs. + SEE ALSO -------- diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 85eee9c444ae..d5b566ed7178 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -17,6 +17,7 @@ #include #include "evlist.h" #include "target.h" +#include "cpumap.h" #include "thread_map.h" #include "util/config.h" @@ -96,6 +97,8 @@ static int append_tracing_file(const char *name, const char *val) return __write_tracing_file(name, val, true); } +static int reset_tracing_cpu(void); + static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused) { if (write_tracing_file("tracing_on", "0") < 0) @@ -107,6 +110,9 @@ static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused) if (write_tracing_file("set_ftrace_pid", " ") < 0) return -1; + if (reset_tracing_cpu() < 0) + return -1; + return 0; } @@ -127,6 +133,51 @@ static int set_tracing_pid(struct perf_ftrace *ftrace) return 0; } +static int set_tracing_cpumask(struct cpu_map *cpumap) +{ + char *cpumask; + size_t mask_size; + int ret; + int last_cpu; + + last_cpu = cpu_map__cpu(cpumap, cpumap->nr - 1); + mask_size = (last_cpu + 3) / 4 + 1; + mask_size += last_cpu / 32; /* ',' is needed for every 32th cpus */ + + cpumask = malloc(mask_size); + if (cpumask == NULL) { + pr_debug("failed to allocate cpu mask\n"); + return -1; + } + + cpu_map__snprint_mask(cpumap, cpumask, mask_size); + + ret = write_tracing_file("tracing_cpumask", cpumask); + + free(cpumask); + return ret; +} + +static int set_tracing_cpu(struct perf_ftrace *ftrace) +{ + struct cpu_map *cpumap = ftrace->evlist->cpus; + + if (!target__has_cpu(&ftrace->target)) + return 0; + + return set_tracing_cpumask(cpumap); +} + +static int reset_tracing_cpu(void) +{ + struct cpu_map *cpumap = cpu_map__new(NULL); + int ret; + + ret = set_tracing_cpumask(cpumap); + cpu_map__put(cpumap); + return ret; +} + static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) { char *trace_file; @@ -163,6 +214,11 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) goto out_reset; } + if (set_tracing_cpu(ftrace) < 0) { + pr_err("failed to set tracing cpumask\n"); + goto out_reset; + } + if (write_tracing_file("current_tracer", ftrace->tracer) < 0) { pr_err("failed to set current_tracer to %s\n", ftrace->tracer); goto out_reset; @@ -264,6 +320,10 @@ int cmd_ftrace(int argc, const char **argv, const char *prefix __maybe_unused) "trace on existing process id"), OPT_INCR('v', "verbose", &verbose, "be more verbose"), + OPT_BOOLEAN('a', "all-cpus", &ftrace.target.system_wide, + "system-wide collection from all CPUs"), + OPT_STRING('C', "cpu", &ftrace.target.cpu_list, "cpu", + "list of cpus to monitor"), OPT_END() }; -- cgit v1.2.3 From 583359646fde8526ea9456618cc24dc359b34094 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 24 Feb 2017 10:12:51 +0900 Subject: perf ftrace: Use pager for displaying result It's convenient to use the pager when seeing many lines of result. Note that setup_pager() should be called after perf_evlist__prepare_workload() since they can interfere each other regarding shared stdio streams. Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Steven Rostedt Cc: kernel-team@lge.com Link: http://lkml.kernel.org/r/20170224011251.14946-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-ftrace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index d5b566ed7178..6087295f8827 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -195,6 +195,7 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) signal(SIGINT, sig_handler); signal(SIGUSR1, sig_handler); signal(SIGCHLD, sig_handler); + signal(SIGPIPE, sig_handler); if (reset_tracing_files(ftrace) < 0) goto out; @@ -247,6 +248,8 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) goto out_close_fd; } + setup_pager(); + perf_evlist__start_workload(ftrace->evlist); while (!done) { -- cgit v1.2.3 From 90ec5e89e393c76e19afc845d8f88a5dc8315919 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 22 Feb 2017 19:23:37 +0530 Subject: kretprobes: Ensure probe location is at function entry kretprobes can be registered by specifying an absolute address or by specifying offset to a symbol. However, we need to ensure this falls at function entry so as to be able to determine the return address. Validate the same during kretprobe registration. By default, there should not be any offset from a function entry, as determined through a kallsyms_lookup(). Introduce arch_function_offset_within_entry() as a way for architectures to override this. Signed-off-by: Naveen N. Rao Acked-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Michael Ellerman Cc: Steven Rostedt Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/f1583bc4839a3862cfc2acefcc56f9c8837fa2ba.1487770934.git.naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/kprobes.h | 1 + kernel/kprobes.c | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index c328e4f7dcad..177bdf6c6aeb 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -267,6 +267,7 @@ extern int arch_init_kprobes(void); extern void show_registers(struct pt_regs *regs); extern void kprobes_inc_nmissed_count(struct kprobe *p); extern bool arch_within_kprobe_blacklist(unsigned long addr); +extern bool arch_function_offset_within_entry(unsigned long offset); extern bool within_kprobe_blacklist(unsigned long addr); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 699c5bc51a92..448759d4a263 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1875,12 +1875,25 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) } NOKPROBE_SYMBOL(pre_handler_kretprobe); +bool __weak arch_function_offset_within_entry(unsigned long offset) +{ + return !offset; +} + int register_kretprobe(struct kretprobe *rp) { int ret = 0; struct kretprobe_instance *inst; int i; void *addr; + unsigned long offset; + + addr = kprobe_addr(&rp->kp); + if (!kallsyms_lookup_size_offset((unsigned long)addr, NULL, &offset)) + return -EINVAL; + + if (!arch_function_offset_within_entry(offset)) + return -EINVAL; if (kretprobe_blacklist_size) { addr = kprobe_addr(&rp->kp); -- cgit v1.2.3 From 35b6f55aa9ba65141f2def0997e23aab13715d3f Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 22 Feb 2017 19:23:39 +0530 Subject: trace/kprobes: Allow return probes with offsets and absolute addresses Since the kernel includes many non-global functions with same names, we will need to use offsets from other symbols (typically _text/_stext) or absolute addresses to place return probes on specific functions. Also, the core register_kretprobe() API never forbid use of offsets or absolute addresses with kretprobes. Allow its use with the trace infrastructure. To distinguish kernels that support this, update ftrace README to explicitly call this out. Signed-off-by: Naveen N. Rao Acked-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Michael Ellerman Cc: Steven Rostedt Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/183e7ce2921a08c9c755ee9a5da3134febc6695b.1487770934.git.naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- kernel/trace/trace.c | 1 + kernel/trace/trace_kprobe.c | 8 -------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f35109514a01..0ed834d6beb0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4355,6 +4355,7 @@ static const char readme_msg[] = "\t -:[/]\n" #ifdef CONFIG_KPROBE_EVENTS "\t place: [:][+]|\n" + "place (kretprobe): [:][+]|\n" #endif #ifdef CONFIG_UPROBE_EVENTS "\t place: :\n" diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index eadd96ef772f..18775ef182f8 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -680,10 +680,6 @@ static int create_trace_kprobe(int argc, char **argv) return -EINVAL; } if (isdigit(argv[1][0])) { - if (is_return) { - pr_info("Return probe point must be a symbol.\n"); - return -EINVAL; - } /* an address specified */ ret = kstrtoul(&argv[1][0], 0, (unsigned long *)&addr); if (ret) { @@ -699,10 +695,6 @@ static int create_trace_kprobe(int argc, char **argv) pr_info("Failed to parse symbol.\n"); return ret; } - if (offset && is_return) { - pr_info("Return probe must be used without offset.\n"); - return -EINVAL; - } } argc -= 2; argv += 2; -- cgit v1.2.3 From e491bc2f0dd9f1b4a23ba6f3da88f6b695c4a4c9 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 23 Feb 2017 17:07:23 +0530 Subject: perf probe: Generalize probe event file open routine Generalize probe event file open routine into a generic function for opening trace files. Signed-off-by: Naveen N. Rao Acked-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Michael Ellerman Cc: Steven Rostedt Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/b580465c7a4dcd5d3b40fdf8568e6be45d0a6333.1487849577.git.naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-file.c | 20 +++++++++++--------- tools/perf/util/probe-file.h | 1 + 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 436b64731f65..1a62daceb028 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -70,7 +70,7 @@ static void print_both_open_warning(int kerr, int uerr) } } -static int open_probe_events(const char *trace_file, bool readwrite) +int open_trace_file(const char *trace_file, bool readwrite) { char buf[PATH_MAX]; int ret; @@ -92,12 +92,12 @@ static int open_probe_events(const char *trace_file, bool readwrite) static int open_kprobe_events(bool readwrite) { - return open_probe_events("kprobe_events", readwrite); + return open_trace_file("kprobe_events", readwrite); } static int open_uprobe_events(bool readwrite) { - return open_probe_events("uprobe_events", readwrite); + return open_trace_file("uprobe_events", readwrite); } int probe_file__open(int flag) @@ -899,6 +899,7 @@ bool probe_type_is_available(enum probe_type type) size_t len = 0; bool target_line = false; bool ret = probe_type_table[type].avail; + int fd; if (type >= PROBE_TYPE_END) return false; @@ -906,14 +907,16 @@ bool probe_type_is_available(enum probe_type type) if (ret || probe_type_table[type].checked) return ret; - if (asprintf(&buf, "%s/README", tracing_path) < 0) + fd = open_trace_file("README", false); + if (fd < 0) return ret; - fp = fopen(buf, "r"); - if (!fp) - goto end; + fp = fdopen(fd, "r"); + if (!fp) { + close(fd); + return ret; + } - zfree(&buf); while (getline(&buf, &len, fp) > 0 && !ret) { if (!target_line) { target_line = !!strstr(buf, " type: "); @@ -928,7 +931,6 @@ bool probe_type_is_available(enum probe_type type) probe_type_table[type].avail = ret; fclose(fp); -end: free(buf); return ret; diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h index eba44c3e9dca..a17a82eff8a0 100644 --- a/tools/perf/util/probe-file.h +++ b/tools/perf/util/probe-file.h @@ -35,6 +35,7 @@ enum probe_type { /* probe-file.c depends on libelf */ #ifdef HAVE_LIBELF_SUPPORT +int open_trace_file(const char *trace_file, bool readwrite); int probe_file__open(int flag); int probe_file__open_both(int *kfd, int *ufd, int flag); struct strlist *probe_file__get_namelist(int fd); -- cgit v1.2.3 From f1c4d1ad39b5f7c617572f93658bb7159ec9c686 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 28 Feb 2017 12:02:36 +0200 Subject: perf intel-PT/BTS: Add missing initialization $ perf test decoder 57: x86 instruction decoder - new instructions : FAILED! $ Failed to decode 'rel' value (0xfffffffc vs expected 0): 0f 1b 80 78 56 34 12 bndstx %bnd0,0x12345678(%rax) Failed to decode 'rel' value (0xfffffffc vs expected 0): 0f 1b 85 78 56 34 12 bndstx %bnd0,0x12345678(%rbp) Failed to decode 'rel' value (0xfffffffc vs expected 0): 0f 1b 84 01 78 56 34 12 bndstx %bnd0,0x12345678(%rcx,%rax,1) Failed to decode 'rel' value (0xfffffffc vs expected 0): 0f 1b 84 05 78 56 34 12 bndstx %bnd0,0x12345678(%rbp,%rax,1) Failed to decode 'rel' value (0xfffffffc vs expected 0): 0f 1b 84 08 78 56 34 12 bndstx %bnd0,0x12345678(%rax,%rcx,1) There is missing initialization. It only affects the test because it is checking 'rel' even in cases where there is no value. Fix it. Reported-and-Tested-by: Arnaldo Carvalho de Melo Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/08c6ad07-7994-3e56-b20e-d75727ca7765@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index 7913363bde5c..55b6250350d7 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -39,6 +39,8 @@ static void intel_pt_insn_decoder(struct insn *insn, enum intel_pt_insn_branch branch = INTEL_PT_BR_NO_BRANCH; int ext; + intel_pt_insn->rel = 0; + if (insn_is_avx(insn)) { intel_pt_insn->op = INTEL_PT_OP_OTHER; intel_pt_insn->branch = INTEL_PT_BR_NO_BRANCH; -- cgit v1.2.3 From d0e02579c282ccf34c79818045ec2d2934b56c19 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 27 Feb 2017 11:52:04 -0500 Subject: trace/kprobes: Add back warning about offset in return probes Let's not remove the warning about offsets and return probes when the offset is invalid. Signed-off-by: Steven Rostedt Acked-by: Masami Hiramatsu Acked-by: Naveen N. Rao Cc: Ananth N Mavinakayanahalli Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20170227115204.00f92846@gandalf.local.home Signed-off-by: Arnaldo Carvalho de Melo --- kernel/trace/trace_kprobe.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 18775ef182f8..2b7d0dd938ba 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -695,6 +695,11 @@ static int create_trace_kprobe(int argc, char **argv) pr_info("Failed to parse symbol.\n"); return ret; } + if (offset && is_return && + !arch_function_offset_within_entry(offset)) { + pr_info("Given offset is not valid for return probe.\n"); + return -EINVAL; + } } argc -= 2; argv += 2; -- cgit v1.2.3 From e3ba76deef23064fc272424b86b506cd80b04fc5 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 27 Feb 2017 10:48:18 +0100 Subject: perf tools: Force uncore events to system wide monitoring Make system wide (-a) the default option if no target was specified and one of following conditions is met: - there's no workload specified (current behaviour) - there is workload specified but all requested events are system wide ones Mixed events core/uncore with workload: $ perf stat -e 'uncore_cbox_0/clockticks/,cycles' sleep 1 Performance counter stats for 'sleep 1': uncore_cbox_0/clockticks/ 980,489 cycles 1.000897406 seconds time elapsed Uncore event with workload: $ perf stat -e 'uncore_cbox_0/clockticks/' sleep 1 Performance counter stats for 'system wide': 281,473,897,192,670 uncore_cbox_0/clockticks/ 1.000833784 seconds time elapsed Committer note: When testing I realized the default case for !root, i.e. no events passed via -e, was broke by v2 of this patch, reported and after a patch provided by Jiri it is back working: [acme@jouet linux]$ perf stat usleep 1 Performance counter stats for 'usleep 1': 0.401335 task-clock:u (msec) # 0.297 CPUs utilized 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 48 page-faults:u # 0.120 M/sec 458,146 cycles:u # 1.142 GHz 245,113 instructions:u # 0.54 insn per cycle 47,991 branches:u # 119.578 M/sec 4,022 branch-misses:u # 8.38% of all branches 0.001350029 seconds time elapsed [acme@jouet linux]$ Suggested-and-Tested-by: Borislav Petkov Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170227094818.GA12764@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 33 ++++++++++++++++++++++++++++++--- tools/perf/util/parse-events.c | 5 +++-- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f4f555a67e9b..f53f449d864d 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2350,6 +2350,35 @@ static int __cmd_report(int argc, const char **argv) return 0; } +static void setup_system_wide(int forks) +{ + /* + * Make system wide (-a) the default target if + * no target was specified and one of following + * conditions is met: + * + * - there's no workload specified + * - there is workload specified but all requested + * events are system wide events + */ + if (!target__none(&target)) + return; + + if (!forks) + target.system_wide = true; + else { + struct perf_evsel *counter; + + evlist__for_each_entry(evsel_list, counter) { + if (!counter->system_wide) + return; + } + + if (evsel_list->nr_entries) + target.system_wide = true; + } +} + int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) { const char * const stat_usage[] = { @@ -2456,9 +2485,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) } else if (big_num_opt == 0) /* User passed --no-big-num */ big_num = false; - /* Make system wide (-a) the default target. */ - if (!argc && target__none(&target)) - target.system_wide = true; + setup_system_wide(argc); if (run_count < 0) { pr_err("Run count must be a positive number\n"); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 67a8aebc67ab..54355d3caf09 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -316,8 +316,9 @@ __add_event(struct list_head *list, int *idx, return NULL; (*idx)++; - evsel->cpus = cpu_map__get(cpus); - evsel->own_cpus = cpu_map__get(cpus); + evsel->cpus = cpu_map__get(cpus); + evsel->own_cpus = cpu_map__get(cpus); + evsel->system_wide = !!cpus; if (name) evsel->name = strdup(name); -- cgit v1.2.3 From 120010cb1eea151d38a3e66f5ffc79a0c3110292 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 2 Mar 2017 12:55:49 -0300 Subject: tools build: Add test for sched_getcpu() Instead of trying to go on adding more ifdef conditions, do a feature test and define HAVE_SCHED_GETCPU_SUPPORT instead, then use it to provide the prototype. No need to change the stub, as it is already a __weak symbol. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-yge89er9g90sc0v6k0a0r5tr@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 1 + tools/build/feature/Makefile | 6 +++++- tools/build/feature/test-all.c | 5 +++++ tools/build/feature/test-sched_getcpu.c | 7 +++++++ tools/perf/Makefile.config | 4 ++++ tools/perf/util/cloexec.h | 6 ------ tools/perf/util/util.h | 4 ++-- 7 files changed, 24 insertions(+), 9 deletions(-) create mode 100644 tools/build/feature/test-sched_getcpu.c diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index e3fb5ecbdcb6..523911f316ce 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -63,6 +63,7 @@ FEATURE_TESTS_BASIC := \ lzma \ get_cpuid \ bpf \ + sched_getcpu \ sdt # FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index b564a2eea039..ab1e2bbc2e96 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -48,7 +48,8 @@ FILES= \ test-get_cpuid.bin \ test-sdt.bin \ test-cxx.bin \ - test-jvmti.bin + test-jvmti.bin \ + test-sched_getcpu.bin FILES := $(addprefix $(OUTPUT),$(FILES)) @@ -91,6 +92,9 @@ $(OUTPUT)test-libelf.bin: $(OUTPUT)test-glibc.bin: $(BUILD) +$(OUTPUT)test-sched_getcpu.bin: + $(BUILD) + DWARFLIBS := -ldw ifeq ($(findstring -static,${LDFLAGS}),-static) DWARFLIBS += -lelf -lebl -lz -llzma -lbz2 diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 699e43627397..cc6c7c01f4ca 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -117,6 +117,10 @@ # include "test-pthread-attr-setaffinity-np.c" #undef main +#define main main_test_sched_getcpu +# include "test-sched_getcpu.c" +#undef main + # if 0 /* * Disable libbabeltrace check for test-all, because the requested @@ -182,6 +186,7 @@ int main(int argc, char *argv[]) main_test_get_cpuid(); main_test_bpf(); main_test_libcrypto(); + main_test_sched_getcpu(); main_test_sdt(); return 0; diff --git a/tools/build/feature/test-sched_getcpu.c b/tools/build/feature/test-sched_getcpu.c new file mode 100644 index 000000000000..c4a148dd7104 --- /dev/null +++ b/tools/build/feature/test-sched_getcpu.c @@ -0,0 +1,7 @@ +#define _GNU_SOURCE +#include + +int main(void) +{ + return sched_getcpu(); +} diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 27c9fbca7bd9..2b656de99495 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -317,6 +317,10 @@ ifdef NO_DWARF NO_LIBDW_DWARF_UNWIND := 1 endif +ifeq ($(feature-sched_getcpu), 1) + CFLAGS += -DHAVE_SCHED_GETCPU_SUPPORT +endif + ifndef NO_LIBELF CFLAGS += -DHAVE_LIBELF_SUPPORT EXTLIBS += -lelf diff --git a/tools/perf/util/cloexec.h b/tools/perf/util/cloexec.h index d0d465953d36..94a5a7d829d5 100644 --- a/tools/perf/util/cloexec.h +++ b/tools/perf/util/cloexec.h @@ -3,10 +3,4 @@ unsigned long perf_event_open_cloexec_flag(void); -#ifdef __GLIBC_PREREQ -#if !__GLIBC_PREREQ(2, 6) && !defined(__UCLIBC__) -int sched_getcpu(void) __THROW; -#endif -#endif - #endif /* __PERF_CLOEXEC_H */ diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index c74708da8571..b2cfa47990dc 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -355,8 +355,8 @@ void print_binary(unsigned char *data, size_t len, size_t bytes_per_line, print_binary_t printer, void *extra); -#if !defined(__GLIBC__) && !defined(__ANDROID__) -extern int sched_getcpu(void); +#ifndef HAVE_SCHED_GETCPU_SUPPORT +int sched_getcpu(void); #endif int is_printable_array(char *p, unsigned int len); -- cgit v1.2.3 From b8d1fd7ec661f5ccb9facd57589a563f627df230 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 2 Mar 2017 15:31:43 -0300 Subject: perf bench futex: Use __maybe_unused Instead of attributing a variable to itself to silence the compiler, use the attribute designed for that, avoiding this: In file included from bench/futex-hash.c:24: bench/futex.h:95:7: error: explicitly assigning value of variable of type 'pthread_attr_t *' to itself [-Werror,-Wself-assign] attr = attr; ~~~~ ^ ~~~~ bench/futex.h:96:13: error: explicitly assigning value of variable of type 'size_t' (aka 'unsigned long') to itself [-Werror,-Wself-assign] cpusetsize = cpusetsize; ~~~~~~~~~~ ^ ~~~~~~~~~~ bench/futex.h:97:9: error: explicitly assigning value of variable of type 'cpu_set_t *' (aka 'struct cpu_set_t *') to itself [-Werror,-Wself-assign] cpuset = cpuset; ~~~~~~ ^ ~~~~~~ That is only triggered when HAVE_PTHREAD_ATTR_SETAFFINITY_NP isn't set. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-14ws1d1elj2d5ej8g7cwdqau@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/futex.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index b2e06d1190d0..e44fd3239530 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -88,13 +88,11 @@ futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wak #ifndef HAVE_PTHREAD_ATTR_SETAFFINITY_NP #include -static inline int pthread_attr_setaffinity_np(pthread_attr_t *attr, - size_t cpusetsize, - cpu_set_t *cpuset) +#include +static inline int pthread_attr_setaffinity_np(pthread_attr_t *attr __maybe_unused, + size_t cpusetsize __maybe_unused, + cpu_set_t *cpuset __maybe_unused) { - attr = attr; - cpusetsize = cpusetsize; - cpuset = cpuset; return 0; } #endif -- cgit v1.2.3 From a0f213e14bb5cf4f190809b5811e1292bd614899 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 2 Mar 2017 15:44:19 -0300 Subject: perf bench futex: Fix build on musl + clang When building with clang on a musl libc system, Alpine Linux, we end up hitting a problem where memset() is used but its prototype is not present, add it to avoid this: bench/futex-wake.c:99:3: error: implicitly declaring library function 'memset' with type 'void *(void *, int, unsigned long)' [-Werror,-Wimplicit-function-declaration] CPU_ZERO(&cpu); ^ /usr/include/sched.h:127:23: note: expanded from macro 'CPU_ZERO' #define CPU_ZERO(set) CPU_ZERO_S(sizeof(cpu_set_t),set) ^ /usr/include/sched.h:110:30: note: expanded from macro 'CPU_ZERO_S' #define CPU_ZERO_S(size,set) memset(set,0,size) ^ bench/futex-wake.c:99:3: note: include the header or explicitly provide a declaration for 'memset' Found while updating my test build containers to build perf with clang in more systems. Cc: Adrian Hunter Cc: David Ahern Cc: Davidlohr Bueso Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-jh10vaz2r98zl6gm5iau8prr@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/futex-hash.c | 1 + tools/perf/bench/futex-lock-pi.c | 1 + tools/perf/bench/futex-requeue.c | 1 + tools/perf/bench/futex-wake-parallel.c | 1 + tools/perf/bench/futex-wake.c | 1 + 5 files changed, 5 insertions(+) diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index da04b8c5568a..2499e1b0c6fb 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -9,6 +9,7 @@ */ /* For the CLR_() macros */ +#include #include #include diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 91877777ec6e..a20814d94af1 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -3,6 +3,7 @@ */ /* For the CLR_() macros */ +#include #include #include diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index 2b9705a8734c..9fad1e4fcd3e 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -9,6 +9,7 @@ */ /* For the CLR_() macros */ +#include #include #include diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index 2c8fa67ad537..40f5fcf1d120 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -8,6 +8,7 @@ */ /* For the CLR_() macros */ +#include #include #include diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index e246b1b8388a..789490281ae3 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -9,6 +9,7 @@ */ /* For the CLR_() macros */ +#include #include #include -- cgit v1.2.3 From c8c188679ccfc86d9c7bac57ecf4b8205a061a06 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 2 Mar 2017 16:00:26 -0300 Subject: tools build: Use the same CC for feature detection and actual build When build with: 'make CC=clang' we were not using that CC to do feature detection, which resulted in features being detected with gcc and then the actual tools being built with clang. Most of the time these compilers are compatible enough, so no problem was being noticed. As soon as a system with an old enough clang, one that hasn't the cpuid.h header is used, and a gcc with it, the "get_cpuid" feature will be found available but then code that will use can't be compiled. Noticed with this combination: / $ gcc --version | head -1 gcc (Alpine 6.3.0) 6.3.0 / $ clang --version | head -1 clang version 3.8.1 (tags/RELEASE_381/final) / $ cat /etc/alpine-release 3.5.0 / $ Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-60q18nvlvgpyfv7e2qqgx4ou@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index ab1e2bbc2e96..09c9626ea666 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -53,8 +53,8 @@ FILES= \ FILES := $(addprefix $(OUTPUT),$(FILES)) -CC := $(CROSS_COMPILE)gcc -MD -CXX := $(CROSS_COMPILE)g++ -MD +CC ?= $(CROSS_COMPILE)gcc -MD +CXX ?= $(CROSS_COMPILE)g++ -MD PKG_CONFIG := $(CROSS_COMPILE)pkg-config LLVM_CONFIG ?= llvm-config -- cgit v1.2.3 From 001916b94a04809a94abb07daba6f9ace01906ba Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 5 Mar 2017 17:40:11 +0100 Subject: perf bench numa: Add more comment for -c option Adding more commentary for -c/--show_convergence option, to explain how the convergence is defined. Before: -c, --show_convergence show convergence details Now: -c, --show_convergence convergence is reached when each process \ (all its threads) is running on a single NUMA node. Suggested--by: Jiri Hladky Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Jiri Hladky Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1488732011-27384-1-git-send-email-jolsa@kernel.org [ Rephrased a bit based on a IRC conversation with Jiri ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/numa.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 3083fc36282b..6bd0581de298 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -187,7 +187,8 @@ static const struct option options[] = { OPT_INCR ('d', "show_details" , &p0.show_details, "Show details"), OPT_INCR ('a', "all" , &p0.run_all, "Run all tests in the suite"), OPT_INTEGER('H', "thp" , &p0.thp, "MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"), - OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details"), + OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details, " + "convergence is reached when each process (all its threads) is running on a single NUMA node."), OPT_BOOLEAN('m', "measure_convergence", &p0.measure_convergence, "measure convergence latency"), OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "quiet mode"), OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"), -- cgit v1.2.3 From 8b53dbef2aeaad6bf532bd4db2f4b2beefd42e49 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 8 Mar 2017 00:08:29 +0900 Subject: perf report: Hide tip message when -q option is given The tip message at the end was printed regardless of the -q option. Originally, the message suggested only '-s comm,dso' option for higher level view when no sort option and parent option were given. Now it shows random help message regardless of the options so the condition can be simplified to honor the -q option. Committer notes: Before: $ perf report --stdio -q 42.77% ls ls [.] _init 13.21% ls ld-2.24.so [.] match_symbol 12.55% ls libc-2.24.so [.] __strcoll_l 11.94% ls libc-2.24.so [.] _init # # (Tip: Show current config key-value pairs: perf config --list) # $ After: $ perf report --stdio -q 42.77% ls ls [.] _init 13.21% ls ld-2.24.so [.] match_symbol 12.55% ls libc-2.24.so [.] __strcoll_l 11.94% ls libc-2.24.so [.] _init $ We still have those two extra lines tho (that git commit insists in turning into one, or git commit --amend doesn't make me add), food for another patch... Reported-and-Tested-by: Arnaldo Carvalho de Melo Signed-off-by: Namhyung Kim Cc: Jiri Olsa Cc: Peter Zijlstra Cc: kernel-team@lge.com Link: http://lkml.kernel.org/r/20170307150851.22304-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 0a88670e56f3..f03a5eac2a62 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -394,8 +394,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, fprintf(stdout, "\n\n"); } - if (sort_order == NULL && - parent_pattern == default_parent_pattern) + if (!quiet) fprintf(stdout, "#\n# (%s)\n#\n", help); if (rep->show_threads) { -- cgit v1.2.3 From f75d2895e053efb2a69194d98754e4d5f4fa3a28 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 8 Mar 2017 00:08:32 +0900 Subject: perf c2c: Clarify help message of --stats option As it is not strictly asking for only stdio output, but will imply using it. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Peter Zijlstra Cc: kernel-team@lge.com Link: http://lkml.kernel.org/r/20170307150851.22304-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index e2b21723bbf8..3fac30ed92f1 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2536,7 +2536,7 @@ static int perf_c2c__report(int argc, const char **argv) OPT_BOOLEAN(0, "stdio", &c2c.use_stdio, "Use the stdio interface"), #endif OPT_BOOLEAN(0, "stats", &c2c.stats_only, - "Use the stdio interface"), + "Display only statistic tables (implies --stdio)"), OPT_BOOLEAN(0, "full-symbols", &c2c.symbol_full, "Display full length of symbols"), OPT_BOOLEAN(0, "no-source", &no_source, -- cgit v1.2.3 From 1936feae54a6724a27a6ca1b948fd0e80371f7b0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 8 Mar 2017 00:08:33 +0900 Subject: perf c2c: Fix display bug when using pipe Currently 'perf c2c report' determines display mode using the --stdio option, but it could be a problem if stdout is not a tty since setup_browser falls back to stdio in this case. But perf c2c didn't know this and tried to use TUI browser anyway. It should check "use_browser" variable instead. For example, the following command showed nothing and broke terminal setting. Now it's fixed.. $ perf c2c report | head ================================================= Trace Event Information ================================================= Total records : 136 Locked Load/Store Operations : 6 Load Operations : 62 Loads - uncacheable : 0 Loads - IO : 1 Loads - Miss : 7 Loads - no mapping : 2 Committer notes: When trying it without a proper perf.data file it results in a stuck terminal, just as Namhyung reported above: [acme@jouet ~]$ perf c2c report | head WARNING: no sample cpu value[acme@jouet ~]$ One has to kill it from some other xterm. Confirm that this patch fixes it: After: $ perf c2c report | head WARNING: no sample cpu value================================================= Trace Event Information ================================================= Total records : 14 Locked Load/Store Operations : 0 Load Operations : 0 Loads - uncacheable : 0 Loads - IO : 0 Loads - Miss : 0 Loads - no mapping : 0 $ Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Peter Zijlstra Cc: kernel-team@lge.com Link: http://lkml.kernel.org/r/20170307150851.22304-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 3fac30ed92f1..5cd6d7a047b9 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2334,7 +2334,7 @@ out: static void perf_c2c_display(struct perf_session *session) { - if (c2c.use_stdio) + if (use_browser == 0) perf_c2c__hists_fprintf(stdout, session); else perf_c2c__hists_browse(&c2c.hists.hists); -- cgit v1.2.3 From f9c10cd645a3defc24110ac71f93e8d18a50d0d6 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Mon, 13 Mar 2017 16:28:45 +0800 Subject: perf tools: Missing c2c command in command-list Add the c2c command to command-list.txt so perf help can list this command. Committer notes: Before: # perf help | grep c2c # After: # perf help | grep c2c c2c Shared Data C2C/HITM Analyzer. # Signed-off-by: Changbin Du Tested-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170313082845.23373-1-changbin.du@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/command-list.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index ac3efd396a72..2d0caf20ff3a 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -9,6 +9,7 @@ perf-buildid-cache mainporcelain common perf-buildid-list mainporcelain common perf-data mainporcelain common perf-diff mainporcelain common +perf-c2c mainporcelain common perf-config mainporcelain common perf-evlist mainporcelain common perf-ftrace mainporcelain common -- cgit v1.2.3 From 12a601c64339d2d28af534d2324f87cf00ba8cd8 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Mon, 13 Mar 2017 16:30:26 +0800 Subject: perf tools: Ignore generated files pmu-events/{jevents,pmu-events.c} for git Ignore two files: pmu-events/{jevents,pmu-events.c} which are generated during the build. Committer notes: Testing it: $ make -C tools/perf/ $ git status On branch perf/core Untracked files: (use "git add ..." to include in what will be committed) tools/perf/pmu-events/jevents tools/perf/pmu-events/pmu-events.c nothing added to commit but untracked files present (use "git add" to track) $ After the patch: $ git status On branch perf/core nothing to commit, working tree clean $ Signed-off-by: Changbin Du Tested-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170313083026.23487-1-changbin.du@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/.gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 3db3db9278be..643cc4ba6872 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -31,3 +31,5 @@ config.mak.autogen .config-detected util/intel-pt-decoder/inat-tables.c arch/*/include/generated/ +pmu-events/pmu-events.c +pmu-events/jevents -- cgit v1.2.3 From 4b0b3aa6a2756e6115fdf275c521e4552a7082f3 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Mon, 13 Mar 2017 16:31:48 +0800 Subject: perf sort: Fix segfault with basic block 'cycles' sort dimension Skip the sample which doesn't have branch_info to avoid segmentation fault: The fault can be reproduced by: perf record -a perf report -F cycles Signed-off-by: Changbin Du Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Peter Zijlstra Fixes: 0e332f033a82 ("perf tools: Add support for cycles, weight branch_info field") Link: http://lkml.kernel.org/r/20170313083148.23568-1-changbin.du@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index f8f16c0e20b6..93f755ac60ca 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -846,6 +846,9 @@ static int hist_entry__mispredict_snprintf(struct hist_entry *he, char *bf, static int64_t sort__cycles_cmp(struct hist_entry *left, struct hist_entry *right) { + if (!left->branch_info || !right->branch_info) + return cmp_null(left->branch_info, right->branch_info); + return left->branch_info->flags.cycles - right->branch_info->flags.cycles; } @@ -853,6 +856,8 @@ sort__cycles_cmp(struct hist_entry *left, struct hist_entry *right) static int hist_entry__cycles_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { + if (!he->branch_info) + return scnprintf(bf, size, "%-.*s", width, "N/A"); if (he->branch_info->flags.cycles == 0) return repsep_snprintf(bf, size, "%-*s", width, "-"); return repsep_snprintf(bf, size, "%-*hd", width, -- cgit v1.2.3 From d35fa1e75fdb308ae8a1a3efffe7ddedbd6051e2 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Mon, 13 Mar 2017 16:32:52 +0800 Subject: perf report: Document +field style argument support for --field option Commit 2f3f9bcf000b ("perf tools: Add +field argument support for --field option") by Jiri Olsa introduced +field style argument support for --field option. This is useful but not updated documentation. This add a little description there. Signed-off-by: Changbin Du Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170313083252.23644-1-changbin.du@intel.com [ Slightly improved the phrase structure ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 33f91906f5dc..672b149aa80a 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -173,6 +173,9 @@ OPTIONS By default, every sort keys not specified in -F will be appended automatically. + If the keys starts with a prefix '+', then it will append the specified + field(s) to the default field order. For example: perf report -F +period,sample. + -p:: --parent=:: A regex filter to identify parent. The parent is a caller of this -- cgit v1.2.3 From 3ef5b4023c64c16c793a066a7a2ed6ea3767e0d7 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Mon, 13 Mar 2017 19:46:52 +0800 Subject: perf hists browser: Fix typo in function switch_data_file Should clear buf 'abs_path', not 'options'. Signed-off-by: Changbin Du Cc: Feng Tang Cc: Peter Zijlstra Fixes: 341487ab561f ("perf hists browser: Add option for runtime switching perf data file") Link: http://lkml.kernel.org/r/20170313114652.9207-1-changbin.du@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index fc4fb669ceee..2dc82bec10c0 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2308,7 +2308,7 @@ static int switch_data_file(void) return ret; memset(options, 0, sizeof(options)); - memset(options, 0, sizeof(abs_path)); + memset(abs_path, 0, sizeof(abs_path)); while ((dent = readdir(pwd_dir))) { char path[PATH_MAX]; -- cgit v1.2.3 From e422267322cd319e2695a535e47c5b1feeac45eb Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Wed, 8 Mar 2017 02:11:36 +0530 Subject: perf: Add PERF_RECORD_NAMESPACES to include namespaces related info With the advert of container technologies like docker, that depend on namespaces for isolation, there is a need for tracing support for namespaces. This patch introduces new PERF_RECORD_NAMESPACES event for recording namespaces related info. By recording info for every namespace, it is left to userspace to take a call on the definition of a container and trace containers by updating perf tool accordingly. Each namespace has a combination of device and inode numbers. Though every namespace has the same device number currently, that may change in future to avoid the need for a namespace of namespaces. Considering such possibility, record both device and inode numbers separately for each namespace. Signed-off-by: Hari Bathini Acked-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Ananth N Mavinakayanahalli Cc: Aravinda Prasad Cc: Brendan Gregg Cc: Daniel Borkmann Cc: Eric Biederman Cc: Sargun Dhillon Cc: Steven Rostedt Link: http://lkml.kernel.org/r/148891929686.25309.2827618988917007768.stgit@hbathini.in.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 2 + include/uapi/linux/perf_event.h | 32 ++++++++- kernel/events/core.c | 139 ++++++++++++++++++++++++++++++++++++++++ kernel/fork.c | 2 + kernel/nsproxy.c | 3 + 5 files changed, 177 insertions(+), 1 deletion(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 000fdb211c7d..f19a82362851 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1112,6 +1112,7 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks extern void perf_event_exec(void); extern void perf_event_comm(struct task_struct *tsk, bool exec); +extern void perf_event_namespaces(struct task_struct *tsk); extern void perf_event_fork(struct task_struct *tsk); /* Callchains */ @@ -1315,6 +1316,7 @@ static inline int perf_unregister_guest_info_callbacks static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_exec(void) { } static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } +static inline void perf_event_namespaces(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } static inline int perf_swevent_get_recursion_context(void) { return -1; } diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index c66a485a24ac..bec0aad0e15c 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -344,7 +344,8 @@ struct perf_event_attr { use_clockid : 1, /* use @clockid for time fields */ context_switch : 1, /* context switch data */ write_backward : 1, /* Write ring buffer from end to beginning */ - __reserved_1 : 36; + namespaces : 1, /* include namespaces data */ + __reserved_1 : 35; union { __u32 wakeup_events; /* wakeup every n events */ @@ -610,6 +611,23 @@ struct perf_event_header { __u16 size; }; +struct perf_ns_link_info { + __u64 dev; + __u64 ino; +}; + +enum { + NET_NS_INDEX = 0, + UTS_NS_INDEX = 1, + IPC_NS_INDEX = 2, + PID_NS_INDEX = 3, + USER_NS_INDEX = 4, + MNT_NS_INDEX = 5, + CGROUP_NS_INDEX = 6, + + NR_NAMESPACES, /* number of available namespaces */ +}; + enum perf_event_type { /* @@ -862,6 +880,18 @@ enum perf_event_type { */ PERF_RECORD_SWITCH_CPU_WIDE = 15, + /* + * struct { + * struct perf_event_header header; + * u32 pid; + * u32 tid; + * u64 nr_namespaces; + * { u64 dev, inode; } [nr_namespaces]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_NAMESPACES = 16, + PERF_RECORD_MAX, /* non-ABI */ }; diff --git a/kernel/events/core.c b/kernel/events/core.c index 6f41548f2e32..16c877a121c8 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -48,6 +48,8 @@ #include #include #include +#include +#include #include "internal.h" @@ -379,6 +381,7 @@ static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events); static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; +static atomic_t nr_namespaces_events __read_mostly; static atomic_t nr_task_events __read_mostly; static atomic_t nr_freq_events __read_mostly; static atomic_t nr_switch_events __read_mostly; @@ -3991,6 +3994,8 @@ static void unaccount_event(struct perf_event *event) atomic_dec(&nr_mmap_events); if (event->attr.comm) atomic_dec(&nr_comm_events); + if (event->attr.namespaces) + atomic_dec(&nr_namespaces_events); if (event->attr.task) atomic_dec(&nr_task_events); if (event->attr.freq) @@ -6491,6 +6496,7 @@ static void perf_event_task(struct task_struct *task, void perf_event_fork(struct task_struct *task) { perf_event_task(task, NULL, 1); + perf_event_namespaces(task); } /* @@ -6592,6 +6598,132 @@ void perf_event_comm(struct task_struct *task, bool exec) perf_event_comm_event(&comm_event); } +/* + * namespaces tracking + */ + +struct perf_namespaces_event { + struct task_struct *task; + + struct { + struct perf_event_header header; + + u32 pid; + u32 tid; + u64 nr_namespaces; + struct perf_ns_link_info link_info[NR_NAMESPACES]; + } event_id; +}; + +static int perf_event_namespaces_match(struct perf_event *event) +{ + return event->attr.namespaces; +} + +static void perf_event_namespaces_output(struct perf_event *event, + void *data) +{ + struct perf_namespaces_event *namespaces_event = data; + struct perf_output_handle handle; + struct perf_sample_data sample; + int ret; + + if (!perf_event_namespaces_match(event)) + return; + + perf_event_header__init_id(&namespaces_event->event_id.header, + &sample, event); + ret = perf_output_begin(&handle, event, + namespaces_event->event_id.header.size); + if (ret) + return; + + namespaces_event->event_id.pid = perf_event_pid(event, + namespaces_event->task); + namespaces_event->event_id.tid = perf_event_tid(event, + namespaces_event->task); + + perf_output_put(&handle, namespaces_event->event_id); + + perf_event__output_id_sample(event, &handle, &sample); + + perf_output_end(&handle); +} + +static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info, + struct task_struct *task, + const struct proc_ns_operations *ns_ops) +{ + struct path ns_path; + struct inode *ns_inode; + void *error; + + error = ns_get_path(&ns_path, task, ns_ops); + if (!error) { + ns_inode = ns_path.dentry->d_inode; + ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev); + ns_link_info->ino = ns_inode->i_ino; + } +} + +void perf_event_namespaces(struct task_struct *task) +{ + struct perf_namespaces_event namespaces_event; + struct perf_ns_link_info *ns_link_info; + + if (!atomic_read(&nr_namespaces_events)) + return; + + namespaces_event = (struct perf_namespaces_event){ + .task = task, + .event_id = { + .header = { + .type = PERF_RECORD_NAMESPACES, + .misc = 0, + .size = sizeof(namespaces_event.event_id), + }, + /* .pid */ + /* .tid */ + .nr_namespaces = NR_NAMESPACES, + /* .link_info[NR_NAMESPACES] */ + }, + }; + + ns_link_info = namespaces_event.event_id.link_info; + + perf_fill_ns_link_info(&ns_link_info[MNT_NS_INDEX], + task, &mntns_operations); + +#ifdef CONFIG_USER_NS + perf_fill_ns_link_info(&ns_link_info[USER_NS_INDEX], + task, &userns_operations); +#endif +#ifdef CONFIG_NET_NS + perf_fill_ns_link_info(&ns_link_info[NET_NS_INDEX], + task, &netns_operations); +#endif +#ifdef CONFIG_UTS_NS + perf_fill_ns_link_info(&ns_link_info[UTS_NS_INDEX], + task, &utsns_operations); +#endif +#ifdef CONFIG_IPC_NS + perf_fill_ns_link_info(&ns_link_info[IPC_NS_INDEX], + task, &ipcns_operations); +#endif +#ifdef CONFIG_PID_NS + perf_fill_ns_link_info(&ns_link_info[PID_NS_INDEX], + task, &pidns_operations); +#endif +#ifdef CONFIG_CGROUPS + perf_fill_ns_link_info(&ns_link_info[CGROUP_NS_INDEX], + task, &cgroupns_operations); +#endif + + perf_iterate_sb(perf_event_namespaces_output, + &namespaces_event, + NULL); +} + /* * mmap tracking */ @@ -9146,6 +9278,8 @@ static void account_event(struct perf_event *event) atomic_inc(&nr_mmap_events); if (event->attr.comm) atomic_inc(&nr_comm_events); + if (event->attr.namespaces) + atomic_inc(&nr_namespaces_events); if (event->attr.task) atomic_inc(&nr_task_events); if (event->attr.freq) @@ -9691,6 +9825,11 @@ SYSCALL_DEFINE5(perf_event_open, return -EACCES; } + if (attr.namespaces) { + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + } + if (attr.freq) { if (attr.sample_freq > sysctl_perf_event_sample_rate) return -EINVAL; diff --git a/kernel/fork.c b/kernel/fork.c index 6c463c80e93d..afa2947286cd 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2352,6 +2352,8 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) } } + perf_event_namespaces(current); + bad_unshare_cleanup_cred: if (new_cred) put_cred(new_cred); diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 782102e59eed..f6c5d330059a 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -26,6 +26,7 @@ #include #include #include +#include static struct kmem_cache *nsproxy_cachep; @@ -262,6 +263,8 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype) goto out; } switch_task_namespaces(tsk, new_nsproxy); + + perf_event_namespaces(tsk); out: fput(file); return err; -- cgit v1.2.3 From f3b3614a284deb124018155a618a7b19694c8b5c Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Wed, 8 Mar 2017 02:11:43 +0530 Subject: perf tools: Add PERF_RECORD_NAMESPACES to include namespaces related info Introduce a new option to record PERF_RECORD_NAMESPACES events emitted by the kernel when fork, clone, setns or unshare are invoked. And update perf-record documentation with the new option to record namespace events. Committer notes: Combined it with a later patch to allow printing it via 'perf report -D' and be able to test the feature introduced in this patch. Had to move here also perf_ns__name(), that was introduced in another later patch. Also used PRIu64 and PRIx64 to fix the build in some enfironments wrt: util/event.c:1129:39: error: format '%lx' expects argument of type 'long unsigned int', but argument 6 has type 'long long unsigned int' [-Werror=format=] ret += fprintf(fp, "%u/%s: %lu/0x%lx%s", idx ^ Testing it: # perf record --namespaces -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 1.083 MB perf.data (423 samples) ] # # perf report -D 3 2028902078892 0x115140 [0xa0]: PERF_RECORD_NAMESPACES 14783/14783 - nr_namespaces: 7 [0/net: 3/0xf0000081, 1/uts: 3/0xeffffffe, 2/ipc: 3/0xefffffff, 3/pid: 3/0xeffffffc, 4/user: 3/0xeffffffd, 5/mnt: 3/0xf0000000, 6/cgroup: 3/0xeffffffb] 0x1151e0 [0x30]: event: 9 . . ... raw event: size 48 bytes . 0000: 09 00 00 00 02 00 30 00 c4 71 82 68 0c 7f 00 00 ......0..q.h.... . 0010: a9 39 00 00 a9 39 00 00 94 28 fe 63 d8 01 00 00 .9...9...(.c.... . 0020: 03 00 00 00 00 00 00 00 ce c4 02 00 00 00 00 00 ................ NAMESPACES events: 1 # Signed-off-by: Hari Bathini Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Ananth N Mavinakayanahalli Cc: Aravinda Prasad Cc: Brendan Gregg Cc: Daniel Borkmann Cc: Eric Biederman Cc: Peter Zijlstra Cc: Sargun Dhillon Cc: Steven Rostedt Link: http://lkml.kernel.org/r/148891930386.25309.18412039920746995488.stgit@hbathini.in.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/perf_event.h | 32 +++++++++++++++++- tools/perf/Documentation/perf-record.txt | 3 ++ tools/perf/builtin-annotate.c | 1 + tools/perf/builtin-diff.c | 1 + tools/perf/builtin-inject.c | 13 ++++++++ tools/perf/builtin-kmem.c | 1 + tools/perf/builtin-kvm.c | 2 ++ tools/perf/builtin-lock.c | 1 + tools/perf/builtin-mem.c | 1 + tools/perf/builtin-record.c | 6 ++++ tools/perf/builtin-report.c | 1 + tools/perf/builtin-sched.c | 1 + tools/perf/builtin-script.c | 1 + tools/perf/builtin-trace.c | 3 +- tools/perf/perf.h | 1 + tools/perf/util/Build | 1 + tools/perf/util/data-convert-bt.c | 1 + tools/perf/util/event.c | 56 ++++++++++++++++++++++++++++++++ tools/perf/util/event.h | 13 ++++++++ tools/perf/util/evsel.c | 3 ++ tools/perf/util/machine.c | 34 +++++++++++++++++++ tools/perf/util/machine.h | 3 ++ tools/perf/util/namespaces.c | 36 ++++++++++++++++++++ tools/perf/util/namespaces.h | 26 +++++++++++++++ tools/perf/util/session.c | 7 ++++ tools/perf/util/thread.c | 44 +++++++++++++++++++++++-- tools/perf/util/thread.h | 6 ++++ tools/perf/util/tool.h | 2 ++ 28 files changed, 296 insertions(+), 4 deletions(-) create mode 100644 tools/perf/util/namespaces.c create mode 100644 tools/perf/util/namespaces.h diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index c66a485a24ac..bec0aad0e15c 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -344,7 +344,8 @@ struct perf_event_attr { use_clockid : 1, /* use @clockid for time fields */ context_switch : 1, /* context switch data */ write_backward : 1, /* Write ring buffer from end to beginning */ - __reserved_1 : 36; + namespaces : 1, /* include namespaces data */ + __reserved_1 : 35; union { __u32 wakeup_events; /* wakeup every n events */ @@ -610,6 +611,23 @@ struct perf_event_header { __u16 size; }; +struct perf_ns_link_info { + __u64 dev; + __u64 ino; +}; + +enum { + NET_NS_INDEX = 0, + UTS_NS_INDEX = 1, + IPC_NS_INDEX = 2, + PID_NS_INDEX = 3, + USER_NS_INDEX = 4, + MNT_NS_INDEX = 5, + CGROUP_NS_INDEX = 6, + + NR_NAMESPACES, /* number of available namespaces */ +}; + enum perf_event_type { /* @@ -862,6 +880,18 @@ enum perf_event_type { */ PERF_RECORD_SWITCH_CPU_WIDE = 15, + /* + * struct { + * struct perf_event_header header; + * u32 pid; + * u32 tid; + * u64 nr_namespaces; + * { u64 dev, inode; } [nr_namespaces]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_NAMESPACES = 16, + PERF_RECORD_MAX, /* non-ABI */ }; diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index b16003ec14a7..ea3789d05e5e 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -347,6 +347,9 @@ Enable weightened sampling. An additional weight is recorded per sample and can displayed with the weight and local_weight sort keys. This currently works for TSX abort events and some memory events in precise mode on modern Intel CPUs. +--namespaces:: +Record events of type PERF_RECORD_NAMESPACES. + --transaction:: Record transaction flags for transaction related events. diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 4f52d85f5ebc..e54b1f9fe1ee 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -393,6 +393,7 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused) .comm = perf_event__process_comm, .exit = perf_event__process_exit, .fork = perf_event__process_fork, + .namespaces = perf_event__process_namespaces, .ordered_events = true, .ordering_requires_timestamps = true, }, diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 1b96a3122228..5e4803158672 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -364,6 +364,7 @@ static struct perf_tool tool = { .exit = perf_event__process_exit, .fork = perf_event__process_fork, .lost = perf_event__process_lost, + .namespaces = perf_event__process_namespaces, .ordered_events = true, .ordering_requires_timestamps = true, }; diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index b9bc7e39833a..8d1d13b9bab6 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -333,6 +333,18 @@ static int perf_event__repipe_comm(struct perf_tool *tool, return err; } +static int perf_event__repipe_namespaces(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + int err = perf_event__process_namespaces(tool, event, sample, machine); + + perf_event__repipe(tool, event, sample, machine); + + return err; +} + static int perf_event__repipe_exit(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -660,6 +672,7 @@ static int __cmd_inject(struct perf_inject *inject) session->itrace_synth_opts = &inject->itrace_synth_opts; inject->itrace_synth_opts.inject = true; inject->tool.comm = perf_event__repipe_comm; + inject->tool.namespaces = perf_event__repipe_namespaces; inject->tool.exit = perf_event__repipe_exit; inject->tool.id_index = perf_event__repipe_id_index; inject->tool.auxtrace_info = perf_event__process_auxtrace_info; diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 6da8d083e4e5..d509e74bc6e8 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -964,6 +964,7 @@ static struct perf_tool perf_kmem = { .comm = perf_event__process_comm, .mmap = perf_event__process_mmap, .mmap2 = perf_event__process_mmap2, + .namespaces = perf_event__process_namespaces, .ordered_events = true, }; diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 08fa88f62a24..18e6c38864bc 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1044,6 +1044,7 @@ static int read_events(struct perf_kvm_stat *kvm) struct perf_tool eops = { .sample = process_sample_event, .comm = perf_event__process_comm, + .namespaces = perf_event__process_namespaces, .ordered_events = true, }; struct perf_data_file file = { @@ -1348,6 +1349,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, kvm->tool.exit = perf_event__process_exit; kvm->tool.fork = perf_event__process_fork; kvm->tool.lost = process_lost_event; + kvm->tool.namespaces = perf_event__process_namespaces; kvm->tool.ordered_events = true; perf_tool__fill_defaults(&kvm->tool); diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index ce3bfb48b26f..d750ccaa978f 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -858,6 +858,7 @@ static int __cmd_report(bool display_info) struct perf_tool eops = { .sample = process_sample_event, .comm = perf_event__process_comm, + .namespaces = perf_event__process_namespaces, .ordered_events = true, }; struct perf_data_file file = { diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 6114e07ca613..030a6cfdda59 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -342,6 +342,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) .lost = perf_event__process_lost, .fork = perf_event__process_fork, .build_id = perf_event__process_build_id, + .namespaces = perf_event__process_namespaces, .ordered_events = true, }, .input_name = "perf.data", diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index bc84a375295d..99562c7242b6 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -876,6 +876,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) signal(SIGTERM, sig_handler); signal(SIGSEGV, sigsegv_handler); + if (rec->opts.record_namespaces) + tool->namespace_events = true; + if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { signal(SIGUSR2, snapshot_sig_handler); if (rec->opts.auxtrace_snapshot_mode) @@ -1497,6 +1500,7 @@ static struct record record = { .fork = perf_event__process_fork, .exit = perf_event__process_exit, .comm = perf_event__process_comm, + .namespaces = perf_event__process_namespaces, .mmap = perf_event__process_mmap, .mmap2 = perf_event__process_mmap2, .ordered_events = true, @@ -1611,6 +1615,8 @@ static struct option __record_options[] = { "opts", "AUX area tracing Snapshot Mode", ""), OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout, "per thread proc mmap processing timeout in ms"), + OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, + "Record namespaces events"), OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, "Record context switch events"), OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index f03a5eac2a62..5ab8117c3bfd 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -700,6 +700,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) .mmap = perf_event__process_mmap, .mmap2 = perf_event__process_mmap2, .comm = perf_event__process_comm, + .namespaces = perf_event__process_namespaces, .exit = perf_event__process_exit, .fork = perf_event__process_fork, .lost = perf_event__process_lost, diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index b94cf0de715a..16170e9b47e6 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -3272,6 +3272,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) .tool = { .sample = perf_sched__process_tracepoint_sample, .comm = perf_event__process_comm, + .namespaces = perf_event__process_namespaces, .lost = perf_event__process_lost, .fork = perf_sched__process_fork_event, .ordered_events = true, diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index c0783b4f7b6c..f1ce806a1f31 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2097,6 +2097,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) .mmap = perf_event__process_mmap, .mmap2 = perf_event__process_mmap2, .comm = perf_event__process_comm, + .namespaces = perf_event__process_namespaces, .exit = perf_event__process_exit, .fork = perf_event__process_fork, .attr = process_attr, diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 256f1fac6f7e..912fedc5b42d 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2415,8 +2415,9 @@ static int trace__replay(struct trace *trace) trace->tool.exit = perf_event__process_exit; trace->tool.fork = perf_event__process_fork; trace->tool.attr = perf_event__process_attr; - trace->tool.tracing_data = perf_event__process_tracing_data; + trace->tool.tracing_data = perf_event__process_tracing_data; trace->tool.build_id = perf_event__process_build_id; + trace->tool.namespaces = perf_event__process_namespaces; trace->tool.ordered_events = true; trace->tool.ordering_requires_timestamps = true; diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 1c27d947c2fe..806c216a1078 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -50,6 +50,7 @@ struct record_opts { bool running_time; bool full_auxtrace; bool auxtrace_snapshot_mode; + bool record_namespaces; bool record_switch_events; bool all_kernel; bool all_user; diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 5da376bc1afc..2ea5ee179a3b 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -42,6 +42,7 @@ libperf-y += pstack.o libperf-y += session.o libperf-$(CONFIG_AUDIT) += syscalltbl.o libperf-y += ordered-events.o +libperf-y += namespaces.o libperf-y += comm.o libperf-y += thread.o libperf-y += thread_map.o diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 4e6cbc99f08e..89ece2445713 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -1468,6 +1468,7 @@ int bt_convert__perf2ctf(const char *input, const char *path, .lost = perf_event__process_lost, .tracing_data = perf_event__process_tracing_data, .build_id = perf_event__process_build_id, + .namespaces = perf_event__process_namespaces, .ordered_events = true, .ordering_requires_timestamps = true, }, diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 4ea7ce72ed9c..fb52819023c7 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -31,6 +31,7 @@ static const char *perf_event__names[] = { [PERF_RECORD_LOST_SAMPLES] = "LOST_SAMPLES", [PERF_RECORD_SWITCH] = "SWITCH", [PERF_RECORD_SWITCH_CPU_WIDE] = "SWITCH_CPU_WIDE", + [PERF_RECORD_NAMESPACES] = "NAMESPACES", [PERF_RECORD_HEADER_ATTR] = "ATTR", [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", @@ -49,6 +50,16 @@ static const char *perf_event__names[] = { [PERF_RECORD_TIME_CONV] = "TIME_CONV", }; +static const char *perf_ns__names[] = { + [NET_NS_INDEX] = "net", + [UTS_NS_INDEX] = "uts", + [IPC_NS_INDEX] = "ipc", + [PID_NS_INDEX] = "pid", + [USER_NS_INDEX] = "user", + [MNT_NS_INDEX] = "mnt", + [CGROUP_NS_INDEX] = "cgroup", +}; + const char *perf_event__name(unsigned int id) { if (id >= ARRAY_SIZE(perf_event__names)) @@ -58,6 +69,13 @@ const char *perf_event__name(unsigned int id) return perf_event__names[id]; } +static const char *perf_ns__name(unsigned int id) +{ + if (id >= ARRAY_SIZE(perf_ns__names)) + return "UNKNOWN"; + return perf_ns__names[id]; +} + static int perf_tool__process_synth_event(struct perf_tool *tool, union perf_event *event, struct machine *machine, @@ -1008,6 +1026,33 @@ size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp) return fprintf(fp, "%s: %s:%d/%d\n", s, event->comm.comm, event->comm.pid, event->comm.tid); } +size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp) +{ + size_t ret = 0; + struct perf_ns_link_info *ns_link_info; + u32 nr_namespaces, idx; + + ns_link_info = event->namespaces.link_info; + nr_namespaces = event->namespaces.nr_namespaces; + + ret += fprintf(fp, " %d/%d - nr_namespaces: %u\n\t\t[", + event->namespaces.pid, + event->namespaces.tid, + nr_namespaces); + + for (idx = 0; idx < nr_namespaces; idx++) { + if (idx && (idx % 4 == 0)) + ret += fprintf(fp, "\n\t\t "); + + ret += fprintf(fp, "%u/%s: %" PRIu64 "/%#" PRIx64 "%s", idx, + perf_ns__name(idx), (u64)ns_link_info[idx].dev, + (u64)ns_link_info[idx].ino, + ((idx + 1) != nr_namespaces) ? ", " : "]\n"); + } + + return ret; +} + int perf_event__process_comm(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, @@ -1016,6 +1061,14 @@ int perf_event__process_comm(struct perf_tool *tool __maybe_unused, return machine__process_comm_event(machine, event, sample); } +int perf_event__process_namespaces(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + return machine__process_namespaces_event(machine, event, sample); +} + int perf_event__process_lost(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, @@ -1196,6 +1249,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp) case PERF_RECORD_MMAP: ret += perf_event__fprintf_mmap(event, fp); break; + case PERF_RECORD_NAMESPACES: + ret += perf_event__fprintf_namespaces(event, fp); + break; case PERF_RECORD_MMAP2: ret += perf_event__fprintf_mmap2(event, fp); break; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index c735c53a26f8..b39ff795b9a9 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -39,6 +39,13 @@ struct comm_event { char comm[16]; }; +struct namespaces_event { + struct perf_event_header header; + u32 pid, tid; + u64 nr_namespaces; + struct perf_ns_link_info link_info[]; +}; + struct fork_event { struct perf_event_header header; u32 pid, ppid; @@ -485,6 +492,7 @@ union perf_event { struct mmap_event mmap; struct mmap2_event mmap2; struct comm_event comm; + struct namespaces_event namespaces; struct fork_event fork; struct lost_event lost; struct lost_samples_event lost_samples; @@ -587,6 +595,10 @@ int perf_event__process_switch(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); +int perf_event__process_namespaces(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); int perf_event__process_mmap(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -653,6 +665,7 @@ size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp); size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp); size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp); size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp); size_t perf_event__fprintf(union perf_event *event, FILE *fp); u64 kallsyms__get_function_start(const char *kallsyms_filename, diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ac59710b79e0..175dc2305aa8 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -932,6 +932,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, attr->mmap2 = track && !perf_missing_features.mmap2; attr->comm = track; + if (opts->record_namespaces) + attr->namespaces = track; + if (opts->record_switch_events) attr->context_switch = track; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index b9974fe41bc1..dfc600446586 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -13,6 +13,7 @@ #include #include "unwind.h" #include "linux/hash.h" +#include "asm/bug.h" static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock); @@ -501,6 +502,37 @@ int machine__process_comm_event(struct machine *machine, union perf_event *event return err; } +int machine__process_namespaces_event(struct machine *machine __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused) +{ + struct thread *thread = machine__findnew_thread(machine, + event->namespaces.pid, + event->namespaces.tid); + int err = 0; + + WARN_ONCE(event->namespaces.nr_namespaces > NR_NAMESPACES, + "\nWARNING: kernel seems to support more namespaces than perf" + " tool.\nTry updating the perf tool..\n\n"); + + WARN_ONCE(event->namespaces.nr_namespaces < NR_NAMESPACES, + "\nWARNING: perf tool seems to support more namespaces than" + " the kernel.\nTry updating the kernel..\n\n"); + + if (dump_trace) + perf_event__fprintf_namespaces(event, stdout); + + if (thread == NULL || + thread__set_namespaces(thread, sample->time, &event->namespaces)) { + dump_printf("problem processing PERF_RECORD_NAMESPACES, skipping event.\n"); + err = -1; + } + + thread__put(thread); + + return err; +} + int machine__process_lost_event(struct machine *machine __maybe_unused, union perf_event *event, struct perf_sample *sample __maybe_unused) { @@ -1538,6 +1570,8 @@ int machine__process_event(struct machine *machine, union perf_event *event, ret = machine__process_comm_event(machine, event, sample); break; case PERF_RECORD_MMAP: ret = machine__process_mmap_event(machine, event, sample); break; + case PERF_RECORD_NAMESPACES: + ret = machine__process_namespaces_event(machine, event, sample); break; case PERF_RECORD_MMAP2: ret = machine__process_mmap2_event(machine, event, sample); break; case PERF_RECORD_FORK: diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index a28305029711..3cdb1340f917 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -97,6 +97,9 @@ int machine__process_itrace_start_event(struct machine *machine, union perf_event *event); int machine__process_switch_event(struct machine *machine, union perf_event *event); +int machine__process_namespaces_event(struct machine *machine, + union perf_event *event, + struct perf_sample *sample); int machine__process_mmap_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); int machine__process_mmap2_event(struct machine *machine, union perf_event *event, diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c new file mode 100644 index 000000000000..2de8da64d90c --- /dev/null +++ b/tools/perf/util/namespaces.c @@ -0,0 +1,36 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * Copyright (C) 2017 Hari Bathini, IBM Corporation + */ + +#include "namespaces.h" +#include "util.h" +#include "event.h" +#include +#include + +struct namespaces *namespaces__new(struct namespaces_event *event) +{ + struct namespaces *namespaces; + u64 link_info_size = ((event ? event->nr_namespaces : NR_NAMESPACES) * + sizeof(struct perf_ns_link_info)); + + namespaces = zalloc(sizeof(struct namespaces) + link_info_size); + if (!namespaces) + return NULL; + + namespaces->end_time = -1; + + if (event) + memcpy(namespaces->link_info, event->link_info, link_info_size); + + return namespaces; +} + +void namespaces__free(struct namespaces *namespaces) +{ + free(namespaces); +} diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h new file mode 100644 index 000000000000..468f1e9a1484 --- /dev/null +++ b/tools/perf/util/namespaces.h @@ -0,0 +1,26 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * Copyright (C) 2017 Hari Bathini, IBM Corporation + */ + +#ifndef __PERF_NAMESPACES_H +#define __PERF_NAMESPACES_H + +#include "../perf.h" +#include + +struct namespaces_event; + +struct namespaces { + struct list_head list; + u64 end_time; + struct perf_ns_link_info link_info[]; +}; + +struct namespaces *namespaces__new(struct namespaces_event *event); +void namespaces__free(struct namespaces *namespaces); + +#endif /* __PERF_NAMESPACES_H */ diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 1dd617d116b5..ae42e742d461 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1239,6 +1239,8 @@ static int machines__deliver_event(struct machines *machines, return tool->mmap2(tool, event, sample, machine); case PERF_RECORD_COMM: return tool->comm(tool, event, sample, machine); + case PERF_RECORD_NAMESPACES: + return tool->namespaces(tool, event, sample, machine); case PERF_RECORD_FORK: return tool->fork(tool, event, sample, machine); case PERF_RECORD_EXIT: @@ -1494,6 +1496,11 @@ int perf_session__register_idle_thread(struct perf_session *session) err = -1; } + if (thread == NULL || thread__set_namespaces(thread, 0, NULL)) { + pr_err("problem inserting idle task.\n"); + err = -1; + } + /* machine__findnew_thread() got the thread, so put it */ thread__put(thread); return err; diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 74e79d26b421..dcdb87a5d0a1 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -7,6 +7,7 @@ #include "thread-stack.h" #include "util.h" #include "debug.h" +#include "namespaces.h" #include "comm.h" #include "unwind.h" @@ -40,6 +41,7 @@ struct thread *thread__new(pid_t pid, pid_t tid) thread->tid = tid; thread->ppid = -1; thread->cpu = -1; + INIT_LIST_HEAD(&thread->namespaces_list); INIT_LIST_HEAD(&thread->comm_list); comm_str = malloc(32); @@ -66,7 +68,8 @@ err_thread: void thread__delete(struct thread *thread) { - struct comm *comm, *tmp; + struct namespaces *namespaces, *tmp_namespaces; + struct comm *comm, *tmp_comm; BUG_ON(!RB_EMPTY_NODE(&thread->rb_node)); @@ -76,7 +79,12 @@ void thread__delete(struct thread *thread) map_groups__put(thread->mg); thread->mg = NULL; } - list_for_each_entry_safe(comm, tmp, &thread->comm_list, list) { + list_for_each_entry_safe(namespaces, tmp_namespaces, + &thread->namespaces_list, list) { + list_del(&namespaces->list); + namespaces__free(namespaces); + } + list_for_each_entry_safe(comm, tmp_comm, &thread->comm_list, list) { list_del(&comm->list); comm__free(comm); } @@ -104,6 +112,38 @@ void thread__put(struct thread *thread) } } +struct namespaces *thread__namespaces(const struct thread *thread) +{ + if (list_empty(&thread->namespaces_list)) + return NULL; + + return list_first_entry(&thread->namespaces_list, struct namespaces, list); +} + +int thread__set_namespaces(struct thread *thread, u64 timestamp, + struct namespaces_event *event) +{ + struct namespaces *new, *curr = thread__namespaces(thread); + + new = namespaces__new(event); + if (!new) + return -ENOMEM; + + list_add(&new->list, &thread->namespaces_list); + + if (timestamp && curr) { + /* + * setns syscall must have changed few or all the namespaces + * of this thread. Update end time for the namespaces + * previously used. + */ + curr = list_next_entry(new, list); + curr->end_time = timestamp; + } + + return 0; +} + struct comm *thread__comm(const struct thread *thread) { if (list_empty(&thread->comm_list)) diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index e57188546465..4eb849e9098f 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -28,6 +28,7 @@ struct thread { bool comm_set; int comm_len; bool dead; /* if set thread has exited */ + struct list_head namespaces_list; struct list_head comm_list; u64 db_id; @@ -40,6 +41,7 @@ struct thread { }; struct machine; +struct namespaces; struct comm; struct thread *thread__new(pid_t pid, pid_t tid); @@ -62,6 +64,10 @@ static inline void thread__exited(struct thread *thread) thread->dead = true; } +struct namespaces *thread__namespaces(const struct thread *thread); +int thread__set_namespaces(struct thread *thread, u64 timestamp, + struct namespaces_event *event); + int __thread__set_comm(struct thread *thread, const char *comm, u64 timestamp, bool exec); static inline int thread__set_comm(struct thread *thread, const char *comm, diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index ac2590a3de2d..829471a1c6d7 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -40,6 +40,7 @@ struct perf_tool { event_op mmap, mmap2, comm, + namespaces, fork, exit, lost, @@ -66,6 +67,7 @@ struct perf_tool { event_op3 auxtrace; bool ordered_events; bool ordering_requires_timestamps; + bool namespace_events; }; #endif /* __PERF_TOOL_H */ -- cgit v1.2.3 From e907caf3a07ee42ef08ba689a436fd1eb99fbf62 Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Wed, 8 Mar 2017 02:11:51 +0530 Subject: perf record: Synthesize namespace events for current processes Synthesize PERF_RECORD_NAMESPACES events for processes that were running prior to invocation of perf record. The data for this is taken from /proc/$PID/ns. These changes make way for analyzing events with regard to namespaces. Committer notes: Check if 'tool' is NULL in perf_event__synthesize_namespaces(), as in the test__mmap_thread_lookup case, i.e. 'perf test Lookup mmap thread". Testing it: # ps axH > /tmp/allthreads # perf record -a --namespaces usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 1.169 MB perf.data (8 samples) ] # perf report -D | grep PERF_RECORD_NAMESPACES | wc -l 602 # wc -l /tmp/allthreads 601 /tmp/allthreads # tail /tmp/allthreads 16951 pts/4 T 0:00 git rebase -i a033bf1bfacdaa25642e6bcc857a7d0f67cc3c92^ 16952 pts/4 T 0:00 /bin/sh /usr/libexec/git-core/git-rebase -i a033bf1bfacdaa25642e6bcc857a7d0f67cc3c92^ 17176 pts/4 T 0:00 git commit --amend --no-post-rewrite 17204 pts/4 T 0:00 vim /home/acme/git/linux/.git/COMMIT_EDITMSG 18939 ? S 0:00 [kworker/2:1] 18947 ? S 0:00 [kworker/3:0] 18974 ? S 0:00 [kworker/1:0] 19047 ? S 0:00 [kworker/0:1] 19152 pts/6 S+ 0:00 weechat 19153 pts/7 R+ 0:00 ps axH # perf report -D | grep PERF_RECORD_NAMESPACES | tail 0 0 0x125068 [0xa0]: PERF_RECORD_NAMESPACES 17176/17176 - nr_namespaces: 7 0 0 0x1255b8 [0xa0]: PERF_RECORD_NAMESPACES 17204/17204 - nr_namespaces: 7 0 0 0x125df0 [0xa0]: PERF_RECORD_NAMESPACES 18939/18939 - nr_namespaces: 7 0 0 0x125f00 [0xa0]: PERF_RECORD_NAMESPACES 18947/18947 - nr_namespaces: 7 0 0 0x126010 [0xa0]: PERF_RECORD_NAMESPACES 18974/18974 - nr_namespaces: 7 0 0 0x126120 [0xa0]: PERF_RECORD_NAMESPACES 19047/19047 - nr_namespaces: 7 0 0 0x126230 [0xa0]: PERF_RECORD_NAMESPACES 19152/19152 - nr_namespaces: 7 0 0 0x129330 [0xa0]: PERF_RECORD_NAMESPACES 19154/19154 - nr_namespaces: 7 0 0 0x12a1f8 [0xa0]: PERF_RECORD_NAMESPACES 19155/19155 - nr_namespaces: 7 0 0 0x12b0b8 [0xa0]: PERF_RECORD_NAMESPACES 19155/19155 - nr_namespaces: 7 # Humm, investigate why we got two record for the 19155 pid/tid... Signed-off-by: Hari Bathini Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Ananth N Mavinakayanahalli Cc: Aravinda Prasad Cc: Brendan Gregg Cc: Daniel Borkmann Cc: Eric Biederman Cc: Peter Zijlstra Cc: Sargun Dhillon Cc: Steven Rostedt Link: http://lkml.kernel.org/r/148891931111.25309.11073854609798681633.stgit@hbathini.in.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 29 ++++++++++++-- tools/perf/util/event.c | 94 ++++++++++++++++++++++++++++++++++++++++++--- tools/perf/util/event.h | 6 +++ 3 files changed, 119 insertions(+), 10 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 99562c7242b6..04faef79a548 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -986,6 +986,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) */ if (forks) { union perf_event *event; + pid_t tgid; event = malloc(sizeof(event->comm) + machine->id_hdr_size); if (event == NULL) { @@ -999,10 +1000,30 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) * cannot see a correct process name for those events. * Synthesize COMM event to prevent it. */ - perf_event__synthesize_comm(tool, event, - rec->evlist->workload.pid, - process_synthesized_event, - machine); + tgid = perf_event__synthesize_comm(tool, event, + rec->evlist->workload.pid, + process_synthesized_event, + machine); + free(event); + + if (tgid == -1) + goto out_child; + + event = malloc(sizeof(event->namespaces) + + (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + + machine->id_hdr_size); + if (event == NULL) { + err = -ENOMEM; + goto out_child; + } + + /* + * Synthesize NAMESPACES event for the command specified. + */ + perf_event__synthesize_namespaces(tool, event, + rec->evlist->workload.pid, + tgid, process_synthesized_event, + machine); free(event); perf_evlist__start_workload(rec->evlist); diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index fb52819023c7..d082cb70445d 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -221,6 +221,58 @@ pid_t perf_event__synthesize_comm(struct perf_tool *tool, return tgid; } +static void perf_event__get_ns_link_info(pid_t pid, const char *ns, + struct perf_ns_link_info *ns_link_info) +{ + struct stat64 st; + char proc_ns[128]; + + sprintf(proc_ns, "/proc/%u/ns/%s", pid, ns); + if (stat64(proc_ns, &st) == 0) { + ns_link_info->dev = st.st_dev; + ns_link_info->ino = st.st_ino; + } +} + +int perf_event__synthesize_namespaces(struct perf_tool *tool, + union perf_event *event, + pid_t pid, pid_t tgid, + perf_event__handler_t process, + struct machine *machine) +{ + u32 idx; + struct perf_ns_link_info *ns_link_info; + + if (!tool || !tool->namespace_events) + return 0; + + memset(&event->namespaces, 0, (sizeof(event->namespaces) + + (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + + machine->id_hdr_size)); + + event->namespaces.pid = tgid; + event->namespaces.tid = pid; + + event->namespaces.nr_namespaces = NR_NAMESPACES; + + ns_link_info = event->namespaces.link_info; + + for (idx = 0; idx < event->namespaces.nr_namespaces; idx++) + perf_event__get_ns_link_info(pid, perf_ns__name(idx), + &ns_link_info[idx]); + + event->namespaces.header.type = PERF_RECORD_NAMESPACES; + + event->namespaces.header.size = (sizeof(event->namespaces) + + (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + + machine->id_hdr_size); + + if (perf_tool__process_synth_event(tool, event, machine, process) != 0) + return -1; + + return 0; +} + static int perf_event__synthesize_fork(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, pid_t ppid, @@ -452,8 +504,9 @@ int perf_event__synthesize_modules(struct perf_tool *tool, static int __event__synthesize_thread(union perf_event *comm_event, union perf_event *mmap_event, union perf_event *fork_event, + union perf_event *namespaces_event, pid_t pid, int full, - perf_event__handler_t process, + perf_event__handler_t process, struct perf_tool *tool, struct machine *machine, bool mmap_data, @@ -473,6 +526,11 @@ static int __event__synthesize_thread(union perf_event *comm_event, if (tgid == -1) return -1; + if (perf_event__synthesize_namespaces(tool, namespaces_event, pid, + tgid, process, machine) < 0) + return -1; + + return perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, process, machine, mmap_data, proc_map_timeout); @@ -506,6 +564,11 @@ static int __event__synthesize_thread(union perf_event *comm_event, if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid, ppid, process, machine) < 0) break; + + if (perf_event__synthesize_namespaces(tool, namespaces_event, _pid, + tgid, process, machine) < 0) + break; + /* * Send the prepared comm event */ @@ -534,6 +597,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, unsigned int proc_map_timeout) { union perf_event *comm_event, *mmap_event, *fork_event; + union perf_event *namespaces_event; int err = -1, thread, j; comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); @@ -548,10 +612,16 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, if (fork_event == NULL) goto out_free_mmap; + namespaces_event = malloc(sizeof(namespaces_event->namespaces) + + (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + + machine->id_hdr_size); + if (namespaces_event == NULL) + goto out_free_fork; + err = 0; for (thread = 0; thread < threads->nr; ++thread) { if (__event__synthesize_thread(comm_event, mmap_event, - fork_event, + fork_event, namespaces_event, thread_map__pid(threads, thread), 0, process, tool, machine, mmap_data, proc_map_timeout)) { @@ -577,7 +647,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, /* if not, generate events for it */ if (need_leader && __event__synthesize_thread(comm_event, mmap_event, - fork_event, + fork_event, namespaces_event, comm_event->comm.pid, 0, process, tool, machine, mmap_data, proc_map_timeout)) { @@ -586,6 +656,8 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, } } } + free(namespaces_event); +out_free_fork: free(fork_event); out_free_mmap: free(mmap_event); @@ -605,6 +677,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool, char proc_path[PATH_MAX]; struct dirent *dirent; union perf_event *comm_event, *mmap_event, *fork_event; + union perf_event *namespaces_event; int err = -1; if (machine__is_default_guest(machine)) @@ -622,11 +695,17 @@ int perf_event__synthesize_threads(struct perf_tool *tool, if (fork_event == NULL) goto out_free_mmap; + namespaces_event = malloc(sizeof(namespaces_event->namespaces) + + (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + + machine->id_hdr_size); + if (namespaces_event == NULL) + goto out_free_fork; + snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir); proc = opendir(proc_path); if (proc == NULL) - goto out_free_fork; + goto out_free_namespaces; while ((dirent = readdir(proc)) != NULL) { char *end; @@ -638,13 +717,16 @@ int perf_event__synthesize_threads(struct perf_tool *tool, * We may race with exiting thread, so don't stop just because * one thread couldn't be synthesized. */ - __event__synthesize_thread(comm_event, mmap_event, fork_event, pid, - 1, process, tool, machine, mmap_data, + __event__synthesize_thread(comm_event, mmap_event, fork_event, + namespaces_event, pid, 1, process, + tool, machine, mmap_data, proc_map_timeout); } err = 0; closedir(proc); +out_free_namespaces: + free(namespaces_event); out_free_fork: free(fork_event); out_free_mmap: diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index b39ff795b9a9..e1d8166ebbd5 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -648,6 +648,12 @@ pid_t perf_event__synthesize_comm(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_namespaces(struct perf_tool *tool, + union perf_event *event, + pid_t pid, pid_t tgid, + perf_event__handler_t process, + struct machine *machine); + int perf_event__synthesize_mmap_events(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, -- cgit v1.2.3 From 96a44bbccdd8ff263829d42fe934e6094ea5bb20 Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Wed, 8 Mar 2017 02:12:06 +0530 Subject: perf script: Add script print support for namespace events Introduce a new option to display events of type PERF_RECORD_NAMESPACES and update perf-script documentation accordingly. Shown below is output (trimmed) of perf script command with the newly introduced option, on perf.data generated with perf record command using --namespaces option. $ perf script --show-namespace-events swapper 0 [000] 0.000000: PERF_RECORD_NAMESPACES 1/1 - nr_namespaces: 7 [0/net: 3/0xf000001c, 1/uts: 3/0xeffffffe, 2/ipc: 3/0xefffffff, 3/pid: 3/0xeffffffc, 4/user: 3/0xeffffffd, 5/mnt: 3/0xf0000000, 6/cgroup: 3/0xeffffffb] swapper 0 [000] 0.000000: PERF_RECORD_NAMESPACES 2/2 - nr_namespaces: 7 [0/net: 3/0xf000001c, 1/uts: 3/0xeffffffe, 2/ipc: 3/0xefffffff, 3/pid: 3/0xeffffffc, 4/user: 3/0xeffffffd, 5/mnt: 3/0xf0000000, 6/cgroup: 3/0xeffffffb] Commiter notes: Testing it: Investigating that double PERF_RECORD_NAMESPACES for the 19155 pid/tid... Its more than that, there are two PERF_RECORD_COMM as well, and with zeroed timestamps, so probably a synthesizing artifact... # perf script --show-task --show-namespace perf 0 [000] 0.000000: PERF_RECORD_COMM: perf:19154/19154 perf 0 [000] 0.000000: PERF_RECORD_FORK(19155:19155):(19154:19154) perf 0 [000] 0.000000: PERF_RECORD_NAMESPACES 19155/19155 - nr_namespaces: 7 [0/net: 3/0xf0000081, 1/uts: 3/0xeffffffe, 2/ipc: 3/0xefffffff, 3/pid: 3/0xeffffffc, 4/user: 3/0xeffffffd, 5/mnt: 3/0xf0000000, 6/cgroup: 3/0xeffffffb] perf 0 [000] 0.000000: PERF_RECORD_COMM: perf:19155/19155 perf 0 [000] 0.000000: PERF_RECORD_COMM: perf:19155/19155 perf 0 [000] 0.000000: PERF_RECORD_NAMESPACES 19155/19155 - nr_namespaces: 7 [0/net: 3/0xf0000081, 1/uts: 3/0xeffffffe, 2/ipc: 3/0xefffffff, 3/pid: 3/0xeffffffc, 4/user: 3/0xeffffffd, 5/mnt: 3/0xf0000000, 6/cgroup: 3/0xeffffffb] swapper 0 [000] 3110.881834: 1 cycles: ffffffffa7060bf6 native_write_msr (/lib/modules/4.11.0-rc1+/build/vmlinux) Signed-off-by: Hari Bathini Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Ananth N Mavinakayanahalli Cc: Aravinda Prasad Cc: Brendan Gregg Cc: Daniel Borkmann Cc: Eric Biederman Cc: Peter Zijlstra Cc: Sargun Dhillon Cc: Steven Rostedt Link: http://lkml.kernel.org/r/148891932627.25309.1941587059154176221.stgit@hbathini.in.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 3 +++ tools/perf/builtin-script.c | 40 ++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 4ed5f239ba7d..62c9b0c77a3a 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -248,6 +248,9 @@ OPTIONS --show-mmap-events Display mmap related events (e.g. MMAP, MMAP2). +--show-namespace-events + Display namespace events i.e. events of type PERF_RECORD_NAMESPACES. + --show-switch-events Display context switch events i.e. events of type PERF_RECORD_SWITCH or PERF_RECORD_SWITCH_CPU_WIDE. diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index f1ce806a1f31..66d62c98dff9 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -830,6 +830,7 @@ struct perf_script { bool show_task_events; bool show_mmap_events; bool show_switch_events; + bool show_namespace_events; bool allocated; struct cpu_map *cpus; struct thread_map *threads; @@ -1118,6 +1119,41 @@ out: return ret; } +static int process_namespaces_event(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct thread *thread; + struct perf_script *script = container_of(tool, struct perf_script, tool); + struct perf_session *session = script->session; + struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); + int ret = -1; + + thread = machine__findnew_thread(machine, event->namespaces.pid, + event->namespaces.tid); + if (thread == NULL) { + pr_debug("problem processing NAMESPACES event, skipping it.\n"); + return -1; + } + + if (perf_event__process_namespaces(tool, event, sample, machine) < 0) + goto out; + + if (!evsel->attr.sample_id_all) { + sample->cpu = 0; + sample->time = 0; + sample->tid = event->namespaces.tid; + sample->pid = event->namespaces.pid; + } + print_sample_start(sample, thread, evsel); + perf_event__fprintf(event, stdout); + ret = 0; +out: + thread__put(thread); + return ret; +} + static int process_fork_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -1293,6 +1329,8 @@ static int __cmd_script(struct perf_script *script) } if (script->show_switch_events) script->tool.context_switch = process_switch_event; + if (script->show_namespace_events) + script->tool.namespaces = process_namespaces_event; ret = perf_session__process_events(script->session); @@ -2181,6 +2219,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "Show the mmap events"), OPT_BOOLEAN('\0', "show-switch-events", &script.show_switch_events, "Show context switch events (if recorded)"), + OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events, + "Show namespace events (if recorded)"), OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), OPT_BOOLEAN(0, "ns", &nanosecs, "Use 9 decimal places when displaying time"), -- cgit v1.2.3 From d890a98c9217892575761d0c1311c41612844c4d Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Wed, 8 Mar 2017 02:12:13 +0530 Subject: perf tools: Add 'cgroup_id' sort order keyword This patch introduces a cgroup identifier entry field in perf report to identify or distinguish data of different cgroups. It uses the device number and inode number of cgroup namespace, included in perf data with the new PERF_RECORD_NAMESPACES event, as cgroup identifier. With the assumption that each container is created with it's own cgroup namespace, this allows assessment/analysis of multiple containers at once. A simple test for this would be to clone a few processes passing SIGCHILD & CLONE_NEWCROUP flags to each of them, execute shell and run different workloads on each of those contexts, while running perf record command with --namespaces option. Shown below is the output of perf report, sorted with cgroup identifier, on perf.data generated with the above test scenario, clearly indicating one context's considerable use of kernel memory in comparison with others: $ perf report -s cgroup_id,sample --stdio # # Total Lost Samples: 0 # # Samples: 5K of event 'kmem:kmalloc' # Event count (approx.): 5965 # # Overhead cgroup id (dev/inode) Samples # ........ ..................... ............ # 81.27% 3/0xeffffffb 4848 16.24% 3/0xf00000d0 969 1.16% 3/0xf00000ce 69 0.82% 3/0xf00000cf 49 0.50% 0/0x0 30 While this is a start, there is further scope of improving this. For example, instead of cgroup namespace's device and inode numbers, dev and inode numbers of some or all namespaces may be used to distinguish which processes are running in a given container context. Also, scripts to map device and inode info to containers sounds plausible for better tracing of containers. Signed-off-by: Hari Bathini Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Ananth N Mavinakayanahalli Cc: Aravinda Prasad Cc: Brendan Gregg Cc: Daniel Borkmann Cc: Eric Biederman Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Sargun Dhillon Cc: Steven Rostedt Link: http://lkml.kernel.org/r/148891933338.25309.756882900782042645.stgit@hbathini.in.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 4 +++- tools/perf/util/hist.c | 7 ++++++ tools/perf/util/hist.h | 1 + tools/perf/util/sort.c | 41 ++++++++++++++++++++++++++++++++ tools/perf/util/sort.h | 7 ++++++ 5 files changed, 59 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 672b149aa80a..e9a61f5485eb 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -72,7 +72,8 @@ OPTIONS --sort=:: Sort histogram entries by given key(s) - multiple keys can be specified in CSV format. Following sort keys are available: - pid, comm, dso, symbol, parent, cpu, socket, srcline, weight, local_weight. + pid, comm, dso, symbol, parent, cpu, socket, srcline, weight, + local_weight, cgroup_id. Each key has following meaning: @@ -92,6 +93,7 @@ OPTIONS - weight: Event specific weight, e.g. memory latency or transaction abort cost. This is the global weight. - local_weight: Local weight version of the weight above. + - cgroup_id: ID derived from cgroup namespace device and inode numbers. - transaction: Transaction abort flags. - overhead: Overhead percentage of sample - overhead_sys: Overhead percentage of sample running in system mode diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index eaf72a938fb4..e3b38f629504 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -3,6 +3,7 @@ #include "hist.h" #include "map.h" #include "session.h" +#include "namespaces.h" #include "sort.h" #include "evlist.h" #include "evsel.h" @@ -169,6 +170,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO); } + hists__new_col_len(hists, HISTC_CGROUP_ID, 20); hists__new_col_len(hists, HISTC_CPU, 3); hists__new_col_len(hists, HISTC_SOCKET, 6); hists__new_col_len(hists, HISTC_MEM_LOCKED, 6); @@ -574,9 +576,14 @@ __hists__add_entry(struct hists *hists, bool sample_self, struct hist_entry_ops *ops) { + struct namespaces *ns = thread__namespaces(al->thread); struct hist_entry entry = { .thread = al->thread, .comm = thread__comm(al->thread), + .cgroup_id = { + .dev = ns ? ns->link_info[CGROUP_NS_INDEX].dev : 0, + .ino = ns ? ns->link_info[CGROUP_NS_INDEX].ino : 0, + }, .ms = { .map = al->map, .sym = al->sym, diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 2e839bf40bdd..ee3670a388df 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -30,6 +30,7 @@ enum hist_column { HISTC_DSO, HISTC_THREAD, HISTC_COMM, + HISTC_CGROUP_ID, HISTC_PARENT, HISTC_CPU, HISTC_SOCKET, diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 93f755ac60ca..8b0d4e39f640 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -536,6 +536,46 @@ struct sort_entry sort_cpu = { .se_width_idx = HISTC_CPU, }; +/* --sort cgroup_id */ + +static int64_t _sort__cgroup_dev_cmp(u64 left_dev, u64 right_dev) +{ + return (int64_t)(right_dev - left_dev); +} + +static int64_t _sort__cgroup_inode_cmp(u64 left_ino, u64 right_ino) +{ + return (int64_t)(right_ino - left_ino); +} + +static int64_t +sort__cgroup_id_cmp(struct hist_entry *left, struct hist_entry *right) +{ + int64_t ret; + + ret = _sort__cgroup_dev_cmp(right->cgroup_id.dev, left->cgroup_id.dev); + if (ret != 0) + return ret; + + return _sort__cgroup_inode_cmp(right->cgroup_id.ino, + left->cgroup_id.ino); +} + +static int hist_entry__cgroup_id_snprintf(struct hist_entry *he, + char *bf, size_t size, + unsigned int width __maybe_unused) +{ + return repsep_snprintf(bf, size, "%lu/0x%lx", he->cgroup_id.dev, + he->cgroup_id.ino); +} + +struct sort_entry sort_cgroup_id = { + .se_header = "cgroup id (dev/inode)", + .se_cmp = sort__cgroup_id_cmp, + .se_snprintf = hist_entry__cgroup_id_snprintf, + .se_width_idx = HISTC_CGROUP_ID, +}; + /* --sort socket */ static int64_t @@ -1464,6 +1504,7 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_TRANSACTION, "transaction", sort_transaction), DIM(SORT_TRACE, "trace", sort_trace), DIM(SORT_SYM_SIZE, "symbol_size", sort_sym_size), + DIM(SORT_CGROUP_ID, "cgroup_id", sort_cgroup_id), }; #undef DIM diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index f583325a3743..baf20a399f34 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -54,6 +54,11 @@ struct he_stat { u32 nr_events; }; +struct namespace_id { + u64 dev; + u64 ino; +}; + struct hist_entry_diff { bool computed; union { @@ -91,6 +96,7 @@ struct hist_entry { struct map_symbol ms; struct thread *thread; struct comm *comm; + struct namespace_id cgroup_id; u64 ip; u64 transaction; s32 socket; @@ -212,6 +218,7 @@ enum sort_type { SORT_TRANSACTION, SORT_TRACE, SORT_SYM_SIZE, + SORT_CGROUP_ID, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, -- cgit v1.2.3 From 292c4a8f985b35b3738d5900fe256c4fed4cd3f5 Mon Sep 17 00:00:00 2001 From: Brendan Gregg Date: Tue, 14 Mar 2017 01:56:29 +0000 Subject: perf sched timehist: Add --next option The --next option shows the next task for each context switch, providing more context for the sequence of scheduler events. $ perf sched timehist --next | head Samples do not have callchains. time cpu task name waittime schdelay run time [tid/pid] (msec) (msec) (msec) ---------- --- ---------- --------- ------ ----- 374.793792 [0] 0.000 0.000 0.000 next: rngd[1524] 374.793801 [0] rngd[1524] 0.000 0.000 0.009 next: swapper/0[0] 374.794048 [7] 0.000 0.000 0.000 next: yes[30884] 374.794066 [7] yes[30884] 0.000 0.000 0.018 next: swapper/7[0] 374.794126 [2] 0.000 0.000 0.000 next: rngd[1524] 374.794140 [2] rngd[1524] 0.325 0.006 0.013 next: swapper/2[0] 374.794281 [3] 0.000 0.000 0.000 next: perf[31070] Signed-off-by: Brendan Gregg Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1489456589-32555-1-git-send-email-bgregg@netflix.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-sched.txt | 4 ++++ tools/perf/builtin-sched.c | 25 ++++++++++++++++++++----- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index d33deddb0146..a092a2499e8f 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt @@ -132,6 +132,10 @@ OPTIONS for 'perf sched timehist' --migrations:: Show migration events. +-n:: +--next:: + Show next task. + -I:: --idle-hist:: Show idle-related events only. diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 16170e9b47e6..b92c4d97192c 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -221,6 +221,7 @@ struct perf_sched { unsigned int max_stack; bool show_cpu_visual; bool show_wakeups; + bool show_next; bool show_migrations; bool show_state; u64 skipped_samples; @@ -1897,14 +1898,18 @@ static char task_state_char(struct thread *thread, int state) } static void timehist_print_sample(struct perf_sched *sched, + struct perf_evsel *evsel, struct perf_sample *sample, struct addr_location *al, struct thread *thread, u64 t, int state) { struct thread_runtime *tr = thread__priv(thread); + const char *next_comm = perf_evsel__strval(evsel, sample, "next_comm"); + const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid"); u32 max_cpus = sched->max_cpu + 1; char tstr[64]; + char nstr[30]; u64 wait_time; timestamp__scnprintf_usec(t, tstr, sizeof(tstr)); @@ -1937,7 +1942,12 @@ static void timehist_print_sample(struct perf_sched *sched, if (sched->show_state) printf(" %5c ", task_state_char(thread, state)); - if (sched->show_wakeups) + if (sched->show_next) { + snprintf(nstr, sizeof(nstr), "next: %s[%d]", next_comm, next_pid); + printf(" %-*s", comm_width, nstr); + } + + if (sched->show_wakeups && !sched->show_next) printf(" %-*s", comm_width, ""); if (thread->tid == 0) @@ -2531,7 +2541,7 @@ static int timehist_sched_change_event(struct perf_tool *tool, } if (!sched->summary_only) - timehist_print_sample(sched, sample, &al, thread, t, state); + timehist_print_sample(sched, evsel, sample, &al, thread, t, state); out: if (sched->hist_time.start == 0 && t >= ptime->start) @@ -3341,6 +3351,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN('S', "with-summary", &sched.summary, "Show all syscalls and summary with statistics"), OPT_BOOLEAN('w', "wakeups", &sched.show_wakeups, "Show wakeup events"), + OPT_BOOLEAN('n', "next", &sched.show_next, "Show next task"), OPT_BOOLEAN('M', "migrations", &sched.show_migrations, "Show migration events"), OPT_BOOLEAN('V', "cpu-visual", &sched.show_cpu_visual, "Add CPU visual"), OPT_BOOLEAN('I', "idle-hist", &sched.idle_hist, "Show idle events only"), @@ -3438,10 +3449,14 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) if (argc) usage_with_options(timehist_usage, timehist_options); } - if (sched.show_wakeups && sched.summary_only) { - pr_err(" Error: -s and -w are mutually exclusive.\n"); + if ((sched.show_wakeups || sched.show_next) && + sched.summary_only) { + pr_err(" Error: -s and -[n|w] are mutually exclusive.\n"); parse_options_usage(timehist_usage, timehist_options, "s", true); - parse_options_usage(NULL, timehist_options, "w", true); + if (sched.show_wakeups) + parse_options_usage(NULL, timehist_options, "w", true); + if (sched.show_next) + parse_options_usage(NULL, timehist_options, "n", true); return -EINVAL; } -- cgit v1.2.3 From 3da3ea7a8e205edc24b9491a459b46527c70b5b1 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 8 Mar 2017 13:56:08 +0530 Subject: perf probe: Factor out the ftrace README scanning Simplify and separate out the ftrace README scanning logic into a separate helper. This is used subsequently to scan for all patterns of interest and to cache the result. Since we are only interested in availability of probe argument type x, we will only scan for that. Signed-off-by: Naveen N. Rao Acked-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Michael Ellerman Cc: Steven Rostedt Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/6dc30edc747ba82a236593be6cf3a046fa9453b5.1488961018.git.naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-file.c | 70 +++++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 33 deletions(-) diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 1a62daceb028..8a219cd831b7 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -877,35 +877,31 @@ int probe_cache__show_all_caches(struct strfilter *filter) return 0; } +enum ftrace_readme { + FTRACE_README_PROBE_TYPE_X = 0, + FTRACE_README_END, +}; + static struct { const char *pattern; - bool avail; - bool checked; -} probe_type_table[] = { -#define DEFINE_TYPE(idx, pat, def_avail) \ - [idx] = {.pattern = pat, .avail = (def_avail)} - DEFINE_TYPE(PROBE_TYPE_U, "* u8/16/32/64,*", true), - DEFINE_TYPE(PROBE_TYPE_S, "* s8/16/32/64,*", true), - DEFINE_TYPE(PROBE_TYPE_X, "* x8/16/32/64,*", false), - DEFINE_TYPE(PROBE_TYPE_STRING, "* string,*", true), - DEFINE_TYPE(PROBE_TYPE_BITFIELD, - "* b@/", true), + bool avail; +} ftrace_readme_table[] = { +#define DEFINE_TYPE(idx, pat) \ + [idx] = {.pattern = pat, .avail = false} + DEFINE_TYPE(FTRACE_README_PROBE_TYPE_X, "*type: * x8/16/32/64,*"), }; -bool probe_type_is_available(enum probe_type type) +static bool scan_ftrace_readme(enum ftrace_readme type) { + int fd; FILE *fp; char *buf = NULL; size_t len = 0; - bool target_line = false; - bool ret = probe_type_table[type].avail; - int fd; + bool ret = false; + static bool scanned = false; - if (type >= PROBE_TYPE_END) - return false; - /* We don't have to check the type which supported by default */ - if (ret || probe_type_table[type].checked) - return ret; + if (scanned) + goto result; fd = open_trace_file("README", false); if (fd < 0) @@ -917,21 +913,29 @@ bool probe_type_is_available(enum probe_type type) return ret; } - while (getline(&buf, &len, fp) > 0 && !ret) { - if (!target_line) { - target_line = !!strstr(buf, " type: "); - if (!target_line) - continue; - } else if (strstr(buf, "\t ") != buf) - break; - ret = strglobmatch(buf, probe_type_table[type].pattern); - } - /* Cache the result */ - probe_type_table[type].checked = true; - probe_type_table[type].avail = ret; + while (getline(&buf, &len, fp) > 0) + for (enum ftrace_readme i = 0; i < FTRACE_README_END; i++) + if (!ftrace_readme_table[i].avail) + ftrace_readme_table[i].avail = + strglobmatch(buf, ftrace_readme_table[i].pattern); + scanned = true; fclose(fp); free(buf); - return ret; +result: + if (type >= FTRACE_README_END) + return false; + + return ftrace_readme_table[type].avail; +} + +bool probe_type_is_available(enum probe_type type) +{ + if (type >= PROBE_TYPE_END) + return false; + else if (type == PROBE_TYPE_X) + return scan_ftrace_readme(FTRACE_README_PROBE_TYPE_X); + + return true; } -- cgit v1.2.3 From 7ab31d94bff96f4f80b38dc5147622b9f3889ac6 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 8 Mar 2017 13:56:09 +0530 Subject: perf kretprobes: Offset from reloc_sym if kernel supports it We indicate support for accepting sym+offset with kretprobes through a line in ftrace README. Parse the same to identify support and choose the appropriate format for kprobe_events. As an example, without this perf patch, but with the ftrace changes: naveen@ubuntu:~/linux/tools/perf$ sudo cat /sys/kernel/debug/tracing/README | grep kretprobe place (kretprobe): [:][+]| naveen@ubuntu:~/linux/tools/perf$ naveen@ubuntu:~/linux/tools/perf$ sudo ./perf probe -v do_open%return probe-definition(0): do_open%return symbol:do_open file:(null) line:0 offset:0 return:1 lazy:(null) 0 arguments Looking at the vmlinux_path (8 entries long) Using /boot/vmlinux for symbols Open Debuginfo file: /boot/vmlinux Try to find probe point from debuginfo. Matched function: do_open [2d0c7d8] Probe point found: do_open+0 Matched function: do_open [35d76b5] found inline addr: 0xc0000000004ba984 Failed to find "do_open%return", because do_open is an inlined function and has no return point. An error occurred in debuginfo analysis (-22). Trying to use symbols. Opening /sys/kernel/debug/tracing//kprobe_events write=1 Writing event: r:probe/do_open do_open+0 Writing event: r:probe/do_open_1 do_open+0 Added new events: probe:do_open (on do_open%return) probe:do_open_1 (on do_open%return) You can now use it in all perf tools, such as: perf record -e probe:do_open_1 -aR sleep 1 naveen@ubuntu:~/linux/tools/perf$ sudo cat /sys/kernel/debug/kprobes/list c000000000041370 k kretprobe_trampoline+0x0 [OPTIMIZED] c0000000004433d0 r do_open+0x0 [DISABLED] c0000000004433d0 r do_open+0x0 [DISABLED] And after this patch (and the subsequent powerpc patch): naveen@ubuntu:~/linux/tools/perf$ sudo ./perf probe -v do_open%return probe-definition(0): do_open%return symbol:do_open file:(null) line:0 offset:0 return:1 lazy:(null) 0 arguments Looking at the vmlinux_path (8 entries long) Using /boot/vmlinux for symbols Open Debuginfo file: /boot/vmlinux Try to find probe point from debuginfo. Matched function: do_open [2d0c7d8] Probe point found: do_open+0 Matched function: do_open [35d76b5] found inline addr: 0xc0000000004ba984 Failed to find "do_open%return", because do_open is an inlined function and has no return point. An error occurred in debuginfo analysis (-22). Trying to use symbols. Opening /sys/kernel/debug/tracing//README write=0 Opening /sys/kernel/debug/tracing//kprobe_events write=1 Writing event: r:probe/do_open _text+4469712 Writing event: r:probe/do_open_1 _text+4956248 Added new events: probe:do_open (on do_open%return) probe:do_open_1 (on do_open%return) You can now use it in all perf tools, such as: perf record -e probe:do_open_1 -aR sleep 1 naveen@ubuntu:~/linux/tools/perf$ sudo cat /sys/kernel/debug/kprobes/list c000000000041370 k kretprobe_trampoline+0x0 [OPTIMIZED] c0000000004433d0 r do_open+0x0 [DISABLED] c0000000004ba058 r do_open+0x8 [DISABLED] Signed-off-by: Naveen N. Rao Acked-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Michael Ellerman Cc: Steven Rostedt Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/496ef9f33c1ab16286ece9dd62aa672807aef91c.1488961018.git.naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 12 +++++------- tools/perf/util/probe-file.c | 7 +++++++ tools/perf/util/probe-file.h | 1 + 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 28fb62c32678..c9bdc9ded0c3 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -757,7 +757,9 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs, } for (i = 0; i < ntevs; i++) { - if (!tevs[i].point.address || tevs[i].point.retprobe) + if (!tevs[i].point.address) + continue; + if (tevs[i].point.retprobe && !kretprobe_offset_is_supported()) continue; /* If we found a wrong one, mark it by NULL symbol */ if (kprobe_warn_out_range(tevs[i].point.symbol, @@ -1528,11 +1530,6 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) return -EINVAL; } - if (pp->retprobe && !pp->function) { - semantic_error("Return probe requires an entry function.\n"); - return -EINVAL; - } - if ((pp->offset || pp->line || pp->lazy_line) && pp->retprobe) { semantic_error("Offset/Line/Lazy pattern can't be used with " "return probe.\n"); @@ -2841,7 +2838,8 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, } /* Note that the symbols in the kmodule are not relocated */ - if (!pev->uprobes && !pp->retprobe && !pev->target) { + if (!pev->uprobes && !pev->target && + (!pp->retprobe || kretprobe_offset_is_supported())) { reloc_sym = kernel_get_ref_reloc_sym(); if (!reloc_sym) { pr_warning("Relocated base symbol is not found!\n"); diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 8a219cd831b7..1542cd0d6799 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -879,6 +879,7 @@ int probe_cache__show_all_caches(struct strfilter *filter) enum ftrace_readme { FTRACE_README_PROBE_TYPE_X = 0, + FTRACE_README_KRETPROBE_OFFSET, FTRACE_README_END, }; @@ -889,6 +890,7 @@ static struct { #define DEFINE_TYPE(idx, pat) \ [idx] = {.pattern = pat, .avail = false} DEFINE_TYPE(FTRACE_README_PROBE_TYPE_X, "*type: * x8/16/32/64,*"), + DEFINE_TYPE(FTRACE_README_KRETPROBE_OFFSET, "*place (kretprobe): *"), }; static bool scan_ftrace_readme(enum ftrace_readme type) @@ -939,3 +941,8 @@ bool probe_type_is_available(enum probe_type type) return true; } + +bool kretprobe_offset_is_supported(void) +{ + return scan_ftrace_readme(FTRACE_README_KRETPROBE_OFFSET); +} diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h index a17a82eff8a0..dbf95a00864a 100644 --- a/tools/perf/util/probe-file.h +++ b/tools/perf/util/probe-file.h @@ -65,6 +65,7 @@ struct probe_cache_entry *probe_cache__find_by_name(struct probe_cache *pcache, const char *group, const char *event); int probe_cache__show_all_caches(struct strfilter *filter); bool probe_type_is_available(enum probe_type type); +bool kretprobe_offset_is_supported(void); #else /* ! HAVE_LIBELF_SUPPORT */ static inline struct probe_cache *probe_cache__new(const char *tgt __maybe_unused) { -- cgit v1.2.3 From 44ca9341f65295c56e904cce4c84f5778f5c8537 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 8 Mar 2017 13:56:10 +0530 Subject: perf powerpc: Choose local entry point with kretprobes perf now uses an offset from _text/_stext for kretprobes if the kernel supports it, rather than the actual function name. As such, let's choose the LEP for powerpc ABIv2 so as to ensure the probe gets hit. Do it only if the kernel supports specifying offsets with kretprobes. Signed-off-by: Naveen N. Rao Acked-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Michael Ellerman Cc: Steven Rostedt Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/7445b5334673ef5404ac1d12609bad4d73d2b567.1488961018.git.naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/sym-handling.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index 1030a6e504bb..39dbe512b9fc 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -10,6 +10,7 @@ #include "symbol.h" #include "map.h" #include "probe-event.h" +#include "probe-file.h" #ifdef HAVE_LIBELF_SUPPORT bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) @@ -79,13 +80,18 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev, * However, if the user specifies an offset, we fall back to using the * GEP since all userspace applications (objdump/readelf) show function * disassembly with offsets from the GEP. - * - * In addition, we shouldn't specify an offset for kretprobes. */ - if (pev->point.offset || (!pev->uprobes && pev->point.retprobe) || - !map || !sym) + if (pev->point.offset || !map || !sym) return; + /* For kretprobes, add an offset only if the kernel supports it */ + if (!pev->uprobes && pev->point.retprobe) { +#ifdef HAVE_LIBELF_SUPPORT + if (!kretprobe_offset_is_supported()) +#endif + return; + } + lep_offset = PPC64_LOCAL_ENTRY_OFFSET(sym->arch_sym); if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) -- cgit v1.2.3 From c1ac094d5061e757624a47217d2195ba24a75450 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 8 Mar 2017 22:34:14 +0530 Subject: doc: trace/kprobes: add information about NOKPROBE_SYMBOL Update kprobe tracer documentation to also mention that NOKPROBE_SYMBOL() and nokprobe_inline add symbols to the kprobes blacklist. Signed-off-by: Naveen N. Rao Acked-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Link: http://lkml.kernel.org/r/d924e20de099579ace4286e610304f054cd798db.1488991670.git.naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- Documentation/trace/kprobetrace.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index 41ef9d8efe95..5ea85059db3b 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -8,8 +8,9 @@ Overview -------- These events are similar to tracepoint based events. Instead of Tracepoint, this is based on kprobes (kprobe and kretprobe). So it can probe wherever -kprobes can probe (this means, all functions body except for __kprobes -functions). Unlike the Tracepoint based event, this can be added and removed +kprobes can probe (this means, all functions except those with +__kprobes/nokprobe_inline annotation and those marked NOKPROBE_SYMBOL). +Unlike the Tracepoint based event, this can be added and removed dynamically, on the fly. To enable this feature, build your kernel with CONFIG_KPROBE_EVENTS=y. -- cgit v1.2.3 From 5f6bee34707973ea7879a7857fd63ddccc92fff3 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 8 Mar 2017 22:34:15 +0530 Subject: kprobes: Convert kprobe_exceptions_notify to use NOKPROBE_SYMBOL commit fc62d0207ae0 ("kprobes: Introduce weak variant of kprobe_exceptions_notify()") used the __kprobes annotation to exclude kprobe_exceptions_notify from being probed. Since NOKPROBE_SYMBOL() is a better way to do this enabling the symbol to be discovered as being blacklisted, change over to using NOKPROBE_SYMBOL(). Signed-off-by: Naveen N. Rao Acked-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Link: http://lkml.kernel.org/r/3f25bf400da5c222cd9b10eec6ded2d6b58209f8.1488991670.git.naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- kernel/kprobes.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 448759d4a263..4780ec236035 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1740,11 +1740,12 @@ void unregister_kprobes(struct kprobe **kps, int num) } EXPORT_SYMBOL_GPL(unregister_kprobes); -int __weak __kprobes kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) +int __weak kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) { return NOTIFY_DONE; } +NOKPROBE_SYMBOL(kprobe_exceptions_notify); static struct notifier_block kprobe_exceptions_nb = { .notifier_call = kprobe_exceptions_notify, -- cgit v1.2.3 From af9100ad149cf31a1ab1160f71bb4025443dbdb6 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 14 Mar 2017 20:36:52 +0530 Subject: perf probe: Introduce util func is_sdt_event() Factor out the SDT event name checking routine as is_sdt_event(). Signed-off-by: Ravi Bangoria Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ananth N Mavinakayanahalli Cc: Andi Kleen Cc: Brendan Gregg Cc: He Kuang Cc: Hemant Kumar Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Mathieu Poirier Cc: Naveen N. Rao Cc: Peter Zijlstra Cc: Sukadev Bhattiprolu Cc: Taeung Song Cc: Wang Nan Link: http://lkml.kernel.org/r/20170314150658.7065-2-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.h | 20 ++++++++++++++++++++ tools/perf/util/probe-event.c | 9 +-------- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 1af6a267c21b..8c72b0ff7fcb 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -8,6 +8,7 @@ #include #include #include +#include struct list_head; struct perf_evsel; @@ -196,4 +197,23 @@ int is_valid_tracepoint(const char *event_string); int valid_event_mount(const char *eventfs); char *parse_events_formats_error_string(char *additional_terms); +#ifdef HAVE_LIBELF_SUPPORT +/* + * If the probe point starts with '%', + * or starts with "sdt_" and has a ':' but no '=', + * then it should be a SDT/cached probe point. + */ +static inline bool is_sdt_event(char *str) +{ + return (str[0] == '%' || + (!strncmp(str, "sdt_", 4) && + !!strchr(str, ':') && !strchr(str, '='))); +} +#else +static inline bool is_sdt_event(char *str __maybe_unused) +{ + return false; +} +#endif /* HAVE_LIBELF_SUPPORT */ + #endif /* __PERF_PARSE_EVENTS_H */ diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index c9bdc9ded0c3..b19d17801beb 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1341,14 +1341,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) if (!arg) return -EINVAL; - /* - * If the probe point starts with '%', - * or starts with "sdt_" and has a ':' but no '=', - * then it should be a SDT/cached probe point. - */ - if (arg[0] == '%' || - (!strncmp(arg, "sdt_", 4) && - !!strchr(arg, ':') && !strchr(arg, '='))) { + if (is_sdt_event(arg)) { pev->sdt = true; if (arg[0] == '%') arg++; -- cgit v1.2.3 From 1d585e70905e03e8c19c9aaf523ec246ae6b18a1 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 8 Mar 2017 13:56:06 +0530 Subject: trace/kprobes: Fix check for kretprobe offset within function entry perf specifies an offset from _text and since this offset is fed directly into the arch-specific helper, kprobes tracer rejects installation of kretprobes through perf. Fix this by looking up the actual offset from a function for the specified sym+offset. Refactor and reuse existing routines to limit code duplication -- we repurpose kprobe_addr() for determining final kprobe address and we split out the function entry offset determination into a separate generic helper. Before patch: naveen@ubuntu:~/linux/tools/perf$ sudo ./perf probe -v do_open%return probe-definition(0): do_open%return symbol:do_open file:(null) line:0 offset:0 return:1 lazy:(null) 0 arguments Looking at the vmlinux_path (8 entries long) Using /boot/vmlinux for symbols Open Debuginfo file: /boot/vmlinux Try to find probe point from debuginfo. Matched function: do_open [2d0c7ff] Probe point found: do_open+0 Matched function: do_open [35d76dc] found inline addr: 0xc0000000004ba9c4 Failed to find "do_open%return", because do_open is an inlined function and has no return point. An error occurred in debuginfo analysis (-22). Trying to use symbols. Opening /sys/kernel/debug/tracing//README write=0 Opening /sys/kernel/debug/tracing//kprobe_events write=1 Writing event: r:probe/do_open _text+4469776 Failed to write event: Invalid argument Error: Failed to add events. Reason: Invalid argument (Code: -22) naveen@ubuntu:~/linux/tools/perf$ dmesg | tail [ 33.568656] Given offset is not valid for return probe. After patch: naveen@ubuntu:~/linux/tools/perf$ sudo ./perf probe -v do_open%return probe-definition(0): do_open%return symbol:do_open file:(null) line:0 offset:0 return:1 lazy:(null) 0 arguments Looking at the vmlinux_path (8 entries long) Using /boot/vmlinux for symbols Open Debuginfo file: /boot/vmlinux Try to find probe point from debuginfo. Matched function: do_open [2d0c7d6] Probe point found: do_open+0 Matched function: do_open [35d76b3] found inline addr: 0xc0000000004ba9e4 Failed to find "do_open%return", because do_open is an inlined function and has no return point. An error occurred in debuginfo analysis (-22). Trying to use symbols. Opening /sys/kernel/debug/tracing//README write=0 Opening /sys/kernel/debug/tracing//kprobe_events write=1 Writing event: r:probe/do_open _text+4469808 Writing event: r:probe/do_open_1 _text+4956344 Added new events: probe:do_open (on do_open%return) probe:do_open_1 (on do_open%return) You can now use it in all perf tools, such as: perf record -e probe:do_open_1 -aR sleep 1 naveen@ubuntu:~/linux/tools/perf$ sudo cat /sys/kernel/debug/kprobes/list c000000000041370 k kretprobe_trampoline+0x0 [OPTIMIZED] c0000000004ba0b8 r do_open+0x8 [DISABLED] c000000000443430 r do_open+0x0 [DISABLED] Signed-off-by: Naveen N. Rao Acked-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Michael Ellerman Cc: Steven Rostedt Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/d8cd1ef420ec22e3643ac332fdabcffc77319a42.1488961018.git.naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/kprobes.h | 1 + kernel/kprobes.c | 40 ++++++++++++++++++++++++++-------------- kernel/trace/trace_kprobe.c | 2 +- 3 files changed, 28 insertions(+), 15 deletions(-) diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 177bdf6c6aeb..47e4da5b4fa2 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -268,6 +268,7 @@ extern void show_registers(struct pt_regs *regs); extern void kprobes_inc_nmissed_count(struct kprobe *p); extern bool arch_within_kprobe_blacklist(unsigned long addr); extern bool arch_function_offset_within_entry(unsigned long offset); +extern bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset); extern bool within_kprobe_blacklist(unsigned long addr); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 4780ec236035..d733479a10ee 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1391,21 +1391,19 @@ bool within_kprobe_blacklist(unsigned long addr) * This returns encoded errors if it fails to look up symbol or invalid * combination of parameters. */ -static kprobe_opcode_t *kprobe_addr(struct kprobe *p) +static kprobe_opcode_t *_kprobe_addr(kprobe_opcode_t *addr, + const char *symbol_name, unsigned int offset) { - kprobe_opcode_t *addr = p->addr; - - if ((p->symbol_name && p->addr) || - (!p->symbol_name && !p->addr)) + if ((symbol_name && addr) || (!symbol_name && !addr)) goto invalid; - if (p->symbol_name) { - kprobe_lookup_name(p->symbol_name, addr); + if (symbol_name) { + kprobe_lookup_name(symbol_name, addr); if (!addr) return ERR_PTR(-ENOENT); } - addr = (kprobe_opcode_t *)(((char *)addr) + p->offset); + addr = (kprobe_opcode_t *)(((char *)addr) + offset); if (addr) return addr; @@ -1413,6 +1411,11 @@ invalid: return ERR_PTR(-EINVAL); } +static kprobe_opcode_t *kprobe_addr(struct kprobe *p) +{ + return _kprobe_addr(p->addr, p->symbol_name, p->offset); +} + /* Check passed kprobe is valid and return kprobe in kprobe_table. */ static struct kprobe *__get_valid_kprobe(struct kprobe *p) { @@ -1881,19 +1884,28 @@ bool __weak arch_function_offset_within_entry(unsigned long offset) return !offset; } +bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset) +{ + kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset); + + if (IS_ERR(kp_addr)) + return false; + + if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) || + !arch_function_offset_within_entry(offset)) + return false; + + return true; +} + int register_kretprobe(struct kretprobe *rp) { int ret = 0; struct kretprobe_instance *inst; int i; void *addr; - unsigned long offset; - - addr = kprobe_addr(&rp->kp); - if (!kallsyms_lookup_size_offset((unsigned long)addr, NULL, &offset)) - return -EINVAL; - if (!arch_function_offset_within_entry(offset)) + if (!function_offset_within_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset)) return -EINVAL; if (kretprobe_blacklist_size) { diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 12fb540da0e5..013f4e7146d4 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -697,7 +697,7 @@ static int create_trace_kprobe(int argc, char **argv) return ret; } if (offset && is_return && - !arch_function_offset_within_entry(offset)) { + !function_offset_within_entry(NULL, symbol, offset)) { pr_info("Given offset is not valid for return probe.\n"); return -EINVAL; } -- cgit v1.2.3 From 88b897a30c525c2eee6e7f16e1e8d0f18830845e Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Wed, 15 Mar 2017 10:17:13 -0700 Subject: perf tools: Make perf_event__synthesize_mmap_events() scale This patch significantly improves the execution time of perf_event__synthesize_mmap_events() when running perf record on systems where processes have lots of threads. It just happens that cat /proc/pid/maps support uses a O(N^2) algorithm to generate each map line in the maps file. If you have 1000 threads, then you have necessarily 1000 stacks. For each vma, you need to check if it corresponds to a thread's stack. With a large number of threads, this can take a very long time. I have seen latencies >> 10mn. As of today, perf does not use the fact that a mapping is a stack, therefore we can work around the issue by using /proc/pid/tasks/pid/maps. This entry does not try to map a vma to stack and is thus much faster with no loss of functonality. The proc-map-timeout logic is kept in case users still want some upper limit. In V2, we fix the file path from /proc/pid/tasks/pid/maps to actual /proc/pid/task/pid/maps, tasks -> task. Thanks Arnaldo for catching this. Committer note: This problem seems to have been elliminated in the kernel since commit : b18cb64ead40 ("fs/proc: Stop trying to report thread stacks"). Signed-off-by: Stephane Eranian Acked-by: Jiri Olsa Cc: Andy Lutomirski Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170315135059.GC2177@redhat.com Link: http://lkml.kernel.org/r/1489598233-25586-1-git-send-email-eranian@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index d082cb70445d..33fc2e9c0b0c 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -325,8 +325,8 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, if (machine__is_default_guest(machine)) return 0; - snprintf(filename, sizeof(filename), "%s/proc/%d/maps", - machine->root_dir, pid); + snprintf(filename, sizeof(filename), "%s/proc/%d/task/%d/maps", + machine->root_dir, pid, pid); fp = fopen(filename, "r"); if (fp == NULL) { -- cgit v1.2.3 From ed827adb009490673c9c63e0b716e0fa36afbcc1 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 10 Feb 2017 02:23:58 -0500 Subject: perf/x86: Add Top Down events to Intel Goldmont Goldmont supports full Top Down level 1 metrics (FrontendBound, Retiring, Backend Bound and Bad Speculation). It has 3 wide pipeline. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1486711438-80058-1-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index eb1484c86bb4..4244bed77824 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -1553,6 +1553,27 @@ static __initconst const u64 slm_hw_cache_event_ids }, }; +EVENT_ATTR_STR(topdown-total-slots, td_total_slots_glm, "event=0x3c"); +EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_glm, "3"); +/* UOPS_NOT_DELIVERED.ANY */ +EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_glm, "event=0x9c"); +/* ISSUE_SLOTS_NOT_CONSUMED.RECOVERY */ +EVENT_ATTR_STR(topdown-recovery-bubbles, td_recovery_bubbles_glm, "event=0xca,umask=0x02"); +/* UOPS_RETIRED.ANY */ +EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_glm, "event=0xc2"); +/* UOPS_ISSUED.ANY */ +EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_glm, "event=0x0e"); + +static struct attribute *glm_events_attrs[] = { + EVENT_PTR(td_total_slots_glm), + EVENT_PTR(td_total_slots_scale_glm), + EVENT_PTR(td_fetch_bubbles_glm), + EVENT_PTR(td_recovery_bubbles_glm), + EVENT_PTR(td_slots_issued_glm), + EVENT_PTR(td_slots_retired_glm), + NULL +}; + static struct extra_reg intel_glm_extra_regs[] __read_mostly = { /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x760005ffbfull, RSP_0), @@ -3750,6 +3771,7 @@ __init int intel_pmu_init(void) x86_pmu.pebs_prec_dist = true; x86_pmu.lbr_pt_coexist = true; x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.cpu_events = glm_events_attrs; pr_cont("Goldmont events, "); break; -- cgit v1.2.3 From f4c0b0aa58d9b7e30ab0a95e33da84d53b3d764a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 20 Feb 2017 15:33:50 +0200 Subject: perf/core: Keep AUX flags in the output handle In preparation for adding more flags to perf AUX records, introduce a separate API for setting the flags for a session, rather than appending more bool arguments to perf_aux_output_end. This allows to set each flag at the time a corresponding condition is detected, instead of tracking it in each driver's private state. Signed-off-by: Will Deacon Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mathieu Poirier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: vince@deater.net Link: http://lkml.kernel.org/r/20170220133352.17995-3-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/bts.c | 16 +++++------ arch/x86/events/intel/pt.c | 17 ++++++------ arch/x86/events/intel/pt.h | 1 - drivers/hwtracing/coresight/coresight-etb10.c | 9 +++---- drivers/hwtracing/coresight/coresight-etm-perf.c | 9 +++---- drivers/hwtracing/coresight/coresight-priv.h | 2 -- drivers/hwtracing/coresight/coresight-tmc-etf.c | 7 +++-- include/linux/coresight.h | 2 +- include/linux/perf_event.h | 8 +++--- kernel/events/ring_buffer.c | 34 ++++++++++++++++-------- 10 files changed, 56 insertions(+), 49 deletions(-) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 982c9e31daca..8ae8c5ce3a1f 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -63,7 +63,6 @@ struct bts_buffer { unsigned int cur_buf; bool snapshot; local_t data_size; - local_t lost; local_t head; unsigned long end; void **data_pages; @@ -199,7 +198,8 @@ static void bts_update(struct bts_ctx *bts) return; if (ds->bts_index >= ds->bts_absolute_maximum) - local_inc(&buf->lost); + perf_aux_output_flag(&bts->handle, + PERF_AUX_FLAG_TRUNCATED); /* * old and head are always in the same physical buffer, so we @@ -276,7 +276,7 @@ static void bts_event_start(struct perf_event *event, int flags) return; fail_end_stop: - perf_aux_output_end(&bts->handle, 0, false); + perf_aux_output_end(&bts->handle, 0); fail_stop: event->hw.state = PERF_HES_STOPPED; @@ -319,9 +319,8 @@ static void bts_event_stop(struct perf_event *event, int flags) bts->handle.head = local_xchg(&buf->data_size, buf->nr_pages << PAGE_SHIFT); - - perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0), - !!local_xchg(&buf->lost, 0)); + perf_aux_output_end(&bts->handle, + local_xchg(&buf->data_size, 0)); } cpuc->ds->bts_index = bts->ds_back.bts_buffer_base; @@ -484,8 +483,7 @@ int intel_bts_interrupt(void) if (old_head == local_read(&buf->head)) return handled; - perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0), - !!local_xchg(&buf->lost, 0)); + perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0)); buf = perf_aux_output_begin(&bts->handle, event); if (buf) @@ -500,7 +498,7 @@ int intel_bts_interrupt(void) * cleared handle::event */ barrier(); - perf_aux_output_end(&bts->handle, 0, false); + perf_aux_output_end(&bts->handle, 0); } } diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 5900471ee508..0218728be37a 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -753,7 +753,8 @@ static void pt_handle_status(struct pt *pt) */ if (!pt_cap_get(PT_CAP_topa_multiple_entries) || buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) { - local_inc(&buf->lost); + perf_aux_output_flag(&pt->handle, + PERF_AUX_FLAG_TRUNCATED); advance++; } } @@ -846,8 +847,10 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf, /* can't stop in the middle of an output region */ if (buf->output_off + handle->size + 1 < - sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) + sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) { + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); return -EINVAL; + } /* single entry ToPA is handled by marking all regions STOP=1 INT=1 */ @@ -1192,8 +1195,7 @@ void intel_pt_interrupt(void) pt_update_head(pt); - perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0), - local_xchg(&buf->lost, 0)); + perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0)); if (!event->hw.state) { int ret; @@ -1208,7 +1210,7 @@ void intel_pt_interrupt(void) /* snapshot counters don't use PMI, so it's safe */ ret = pt_buffer_reset_markers(buf, &pt->handle); if (ret) { - perf_aux_output_end(&pt->handle, 0, true); + perf_aux_output_end(&pt->handle, 0); return; } @@ -1280,7 +1282,7 @@ static void pt_event_start(struct perf_event *event, int mode) return; fail_end_stop: - perf_aux_output_end(&pt->handle, 0, true); + perf_aux_output_end(&pt->handle, 0); fail_stop: hwc->state = PERF_HES_STOPPED; } @@ -1321,8 +1323,7 @@ static void pt_event_stop(struct perf_event *event, int mode) pt->handle.head = local_xchg(&buf->data_size, buf->nr_pages << PAGE_SHIFT); - perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0), - local_xchg(&buf->lost, 0)); + perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0)); } } diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h index 53473c21b554..b528e8f373e4 100644 --- a/arch/x86/events/intel/pt.h +++ b/arch/x86/events/intel/pt.h @@ -143,7 +143,6 @@ struct pt_buffer { size_t output_off; unsigned long nr_pages; local_t data_size; - local_t lost; local64_t head; bool snapshot; unsigned long stop_pos, intr_pos; diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index d7325c6534ad..979ea6ec7902 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -321,7 +321,7 @@ static int etb_set_buffer(struct coresight_device *csdev, static unsigned long etb_reset_buffer(struct coresight_device *csdev, struct perf_output_handle *handle, - void *sink_config, bool *lost) + void *sink_config) { unsigned long size = 0; struct cs_buffers *buf = sink_config; @@ -343,7 +343,6 @@ static unsigned long etb_reset_buffer(struct coresight_device *csdev, * resetting parameters here and squaring off with the ring * buffer API in the tracer PMU is fine. */ - *lost = !!local_xchg(&buf->lost, 0); size = local_xchg(&buf->data_size, 0); } @@ -385,7 +384,7 @@ static void etb_update_buffer(struct coresight_device *csdev, (unsigned long)write_ptr); write_ptr &= ~(ETB_FRAME_SIZE_WORDS - 1); - local_inc(&buf->lost); + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); } /* @@ -396,7 +395,7 @@ static void etb_update_buffer(struct coresight_device *csdev, */ status = readl_relaxed(drvdata->base + ETB_STATUS_REG); if (status & ETB_STATUS_RAM_FULL) { - local_inc(&buf->lost); + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); to_read = capacity; read_ptr = write_ptr; } else { @@ -429,7 +428,7 @@ static void etb_update_buffer(struct coresight_device *csdev, if (read_ptr > (drvdata->buffer_depth - 1)) read_ptr -= drvdata->buffer_depth; /* let the decoder know we've skipped ahead */ - local_inc(&buf->lost); + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); } /* finally tell HW where we want to start reading from */ diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 26cfac3e6de7..288a423c1b27 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -302,7 +302,8 @@ out: return; fail_end_stop: - perf_aux_output_end(handle, 0, true); + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); + perf_aux_output_end(handle, 0); fail: event->hw.state = PERF_HES_STOPPED; goto out; @@ -310,7 +311,6 @@ fail: static void etm_event_stop(struct perf_event *event, int mode) { - bool lost; int cpu = smp_processor_id(); unsigned long size; struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu); @@ -348,10 +348,9 @@ static void etm_event_stop(struct perf_event *event, int mode) return; size = sink_ops(sink)->reset_buffer(sink, handle, - event_data->snk_config, - &lost); + event_data->snk_config); - perf_aux_output_end(handle, size, lost); + perf_aux_output_end(handle, size); } /* Disabling the path make its elements available to other sessions */ diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h index ef9d8e93e3b2..5f662d82052c 100644 --- a/drivers/hwtracing/coresight/coresight-priv.h +++ b/drivers/hwtracing/coresight/coresight-priv.h @@ -76,7 +76,6 @@ enum cs_mode { * @nr_pages: max number of pages granted to us * @offset: offset within the current buffer * @data_size: how much we collected in this run - * @lost: other than zero if we had a HW buffer wrap around * @snapshot: is this run in snapshot mode * @data_pages: a handle the ring buffer */ @@ -85,7 +84,6 @@ struct cs_buffers { unsigned int nr_pages; unsigned long offset; local_t data_size; - local_t lost; bool snapshot; void **data_pages; }; diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index 1549436e2492..aec61a6d5c63 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -329,7 +329,7 @@ static int tmc_set_etf_buffer(struct coresight_device *csdev, static unsigned long tmc_reset_etf_buffer(struct coresight_device *csdev, struct perf_output_handle *handle, - void *sink_config, bool *lost) + void *sink_config) { long size = 0; struct cs_buffers *buf = sink_config; @@ -350,7 +350,6 @@ static unsigned long tmc_reset_etf_buffer(struct coresight_device *csdev, * resetting parameters here and squaring off with the ring * buffer API in the tracer PMU is fine. */ - *lost = !!local_xchg(&buf->lost, 0); size = local_xchg(&buf->data_size, 0); } @@ -389,7 +388,7 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev, */ status = readl_relaxed(drvdata->base + TMC_STS); if (status & TMC_STS_FULL) { - local_inc(&buf->lost); + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); to_read = drvdata->size; } else { to_read = CIRC_CNT(write_ptr, read_ptr, drvdata->size); @@ -434,7 +433,7 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev, read_ptr -= drvdata->size; /* Tell the HW */ writel_relaxed(read_ptr, drvdata->base + TMC_RRP); - local_inc(&buf->lost); + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); } cur = buf->cur; diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 2a5982c37dfb..035c16c9a505 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -201,7 +201,7 @@ struct coresight_ops_sink { void *sink_config); unsigned long (*reset_buffer)(struct coresight_device *csdev, struct perf_output_handle *handle, - void *sink_config, bool *lost); + void *sink_config); void (*update_buffer)(struct coresight_device *csdev, struct perf_output_handle *handle, void *sink_config); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f19a82362851..b6e75c9d4791 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -801,6 +801,7 @@ struct perf_output_handle { struct ring_buffer *rb; unsigned long wakeup; unsigned long size; + u64 aux_flags; union { void *addr; unsigned long head; @@ -849,10 +850,11 @@ perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx) extern void *perf_aux_output_begin(struct perf_output_handle *handle, struct perf_event *event); extern void perf_aux_output_end(struct perf_output_handle *handle, - unsigned long size, bool truncated); + unsigned long size); extern int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size); extern void *perf_get_aux(struct perf_output_handle *handle); +extern void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags); extern int perf_pmu_register(struct pmu *pmu, const char *name, int type); extern void perf_pmu_unregister(struct pmu *pmu); @@ -1268,8 +1270,8 @@ static inline void * perf_aux_output_begin(struct perf_output_handle *handle, struct perf_event *event) { return NULL; } static inline void -perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, - bool truncated) { } +perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) + { } static inline int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size) { return -EINVAL; } diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 257fa460b846..9654e55c38d6 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -297,6 +297,19 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) rb->paused = 1; } +void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags) +{ + /* + * OVERWRITE is determined by perf_aux_output_end() and can't + * be passed in directly. + */ + if (WARN_ON_ONCE(flags & PERF_AUX_FLAG_OVERWRITE)) + return; + + handle->aux_flags |= flags; +} +EXPORT_SYMBOL_GPL(perf_aux_output_flag); + /* * This is called before hardware starts writing to the AUX area to * obtain an output handle and make sure there's room in the buffer. @@ -360,6 +373,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, handle->event = event; handle->head = aux_head; handle->size = 0; + handle->aux_flags = 0; /* * In overwrite mode, AUX data stores do not depend on aux_tail, @@ -408,34 +422,32 @@ err: * of the AUX buffer management code is that after pmu::stop(), the AUX * transaction must be stopped and therefore drop the AUX reference count. */ -void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, - bool truncated) +void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) { struct ring_buffer *rb = handle->rb; - bool wakeup = truncated; + bool wakeup = !!handle->aux_flags; unsigned long aux_head; - u64 flags = 0; - - if (truncated) - flags |= PERF_AUX_FLAG_TRUNCATED; /* in overwrite mode, driver provides aux_head via handle */ if (rb->aux_overwrite) { - flags |= PERF_AUX_FLAG_OVERWRITE; + handle->aux_flags |= PERF_AUX_FLAG_OVERWRITE; aux_head = handle->head; local_set(&rb->aux_head, aux_head); } else { + handle->aux_flags &= ~PERF_AUX_FLAG_OVERWRITE; + aux_head = local_read(&rb->aux_head); local_add(size, &rb->aux_head); } - if (size || flags) { + if (size || handle->aux_flags) { /* * Only send RECORD_AUX if we have something useful to communicate */ - perf_event_aux_event(handle->event, aux_head, size, flags); + perf_event_aux_event(handle->event, aux_head, size, + handle->aux_flags); } aux_head = rb->user_page->aux_head = local_read(&rb->aux_head); @@ -446,7 +458,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, } if (wakeup) { - if (truncated) + if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED) handle->event->pending_disable = 1; perf_output_wakeup(handle); } -- cgit v1.2.3 From ae0c2d995d648d5165545d5e05e2869642009b38 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Mon, 20 Feb 2017 15:33:51 +0200 Subject: perf/core: Add a flag for partial AUX records The Intel PT driver needs to be able to communicate partial AUX transactions, that is, transactions with gaps in data for reasons other than no room left in the buffer (i.e. truncated transactions). Therefore, this condition does not imply a wakeup for the consumer. To this end, add a new "partial" AUX flag. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mathieu Poirier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: vince@deater.net Link: http://lkml.kernel.org/r/20170220133352.17995-4-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- include/uapi/linux/perf_event.h | 1 + kernel/events/ring_buffer.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index bec0aad0e15c..d09a9cd021b1 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -915,6 +915,7 @@ enum perf_callchain_context { */ #define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ #define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ +#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ #define PERF_FLAG_FD_NO_GROUP (1UL << 0) #define PERF_FLAG_FD_OUTPUT (1UL << 1) diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 9654e55c38d6..2831480c63a2 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -424,8 +424,8 @@ err: */ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) { + bool wakeup = !!(handle->aux_flags & PERF_AUX_FLAG_TRUNCATED); struct ring_buffer *rb = handle->rb; - bool wakeup = !!handle->aux_flags; unsigned long aux_head; /* in overwrite mode, driver provides aux_head via handle */ -- cgit v1.2.3 From ee368428aac96d94a9804b9109a81355451c3cd9 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Mon, 20 Feb 2017 15:33:52 +0200 Subject: perf/x86/intel/pt: Handle VMX better Since commit: 1c5ac21a0e ("perf/x86/intel/pt: Don't die on VMXON") ... PT events depend on re-scheduling to get enabled after a VMX session has taken place. This is, in particular, a problem for CPU context events, which don't normally get re-scheduled, unless there is a reason. This patch changes the VMX handling so that PT event gets re-enabled when VMX root mode exits. Also, notify the user when there's a gap in PT data due to VMX root mode by flagging AUX records as partial. In combination with vmm_exclusive=0 parameter of the kvm_intel driver, this will result in trace gaps only for the duration of the guest's timeslices. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: vince@deater.net Link: http://lkml.kernel.org/r/20170220133352.17995-5-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/pt.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 0218728be37a..354e9ff2978c 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -411,6 +411,7 @@ static u64 pt_config_filters(struct perf_event *event) static void pt_config(struct perf_event *event) { + struct pt *pt = this_cpu_ptr(&pt_ctx); u64 reg; if (!event->hw.itrace_started) { @@ -429,11 +430,15 @@ static void pt_config(struct perf_event *event) reg |= (event->attr.config & PT_CONFIG_MASK); event->hw.config = reg; - wrmsrl(MSR_IA32_RTIT_CTL, reg); + if (READ_ONCE(pt->vmx_on)) + perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL); + else + wrmsrl(MSR_IA32_RTIT_CTL, reg); } static void pt_config_stop(struct perf_event *event) { + struct pt *pt = this_cpu_ptr(&pt_ctx); u64 ctl = READ_ONCE(event->hw.config); /* may be already stopped by a PMI */ @@ -441,7 +446,8 @@ static void pt_config_stop(struct perf_event *event) return; ctl &= ~RTIT_CTL_TRACEEN; - wrmsrl(MSR_IA32_RTIT_CTL, ctl); + if (!READ_ONCE(pt->vmx_on)) + wrmsrl(MSR_IA32_RTIT_CTL, ctl); WRITE_ONCE(event->hw.config, ctl); @@ -1174,12 +1180,6 @@ void intel_pt_interrupt(void) if (!READ_ONCE(pt->handle_nmi)) return; - /* - * If VMX is on and PT does not support it, don't touch anything. - */ - if (READ_ONCE(pt->vmx_on)) - return; - if (!event) return; @@ -1239,12 +1239,19 @@ void intel_pt_handle_vmx(int on) local_irq_save(flags); WRITE_ONCE(pt->vmx_on, on); - if (on) { - /* prevent pt_config_stop() from writing RTIT_CTL */ - event = pt->handle.event; - if (event) - event->hw.config = 0; - } + /* + * If an AUX transaction is in progress, it will contain + * gap(s), so flag it PARTIAL to inform the user. + */ + event = pt->handle.event; + if (event) + perf_aux_output_flag(&pt->handle, + PERF_AUX_FLAG_PARTIAL); + + /* Turn PTs back on */ + if (!on && event) + wrmsrl(MSR_IA32_RTIT_CTL, event->hw.config); + local_irq_restore(flags); } EXPORT_SYMBOL_GPL(intel_pt_handle_vmx); @@ -1259,9 +1266,6 @@ static void pt_event_start(struct perf_event *event, int mode) struct pt *pt = this_cpu_ptr(&pt_ctx); struct pt_buffer *buf; - if (READ_ONCE(pt->vmx_on)) - return; - buf = perf_aux_output_begin(&pt->handle, event); if (!buf) goto fail_stop; -- cgit v1.2.3 From 74beb09a80ca248ba9d54dede80aebc2557ccb30 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 15 Mar 2017 17:40:19 -0300 Subject: tools headers: Sync {tools/,}arch/x86/include/asm/cpufeatures.h We use those in tools/arch/x86/lib/mem{cpy,set}_64.S, in turn used in the 'perf bench mem' benchmarks. The changes in the following csets are not relevant for this usecase, but lets sync it to silence the diff detector in the tools build system: 78d1b296843a ("x86/cpu: Add X86_FEATURE_CPUID") 3bba73b1b7a8 ("x86/cpufeature: Move RING3MWAIT feature to avoid conflicts") Cc: Borislav Petkov Cc: Jiri Olsa Cc: Namhyung Kim Cc: Thomas Gleixner Link: http://lkml.kernel.org/n/tip-nsqxpyzcv4ywesikhhhrgfgc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 293149a1c6a1..4e7772387c6e 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -100,7 +100,7 @@ #define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ #define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ #define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ -/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */ +#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ #define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ @@ -186,7 +186,7 @@ * * Reuse free bits when adding new feature flags! */ - +#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */ #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ @@ -321,5 +321,4 @@ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ - #endif /* _ASM_X86_CPUFEATURES_H */ -- cgit v1.2.3 From 48d02a1d5c137d362defd11a5d57d0af4a75a983 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 23 Feb 2017 15:46:34 -0800 Subject: perf script: Add 'brstackinsn' for branch stacks Implement printing instruction sequences as hex dump for branch stacks. This relies on the x86 instruction decoder used by the PT decoder to find the lengths of instructions to dump them individually. This is good enough for pattern matching. This allows to study hot paths for individual samples, together with branch misprediction and cycle count / IPC information if available (on Skylake systems). % perf record -b ... % perf script -F brstackinsn ... read_hpet+67: ffffffff9905b843 insn: 74 ea # PRED ffffffff9905b82f insn: 85 c9 ffffffff9905b831 insn: 74 12 ffffffff9905b833 insn: f3 90 ffffffff9905b835 insn: 48 8b 0f ffffffff9905b838 insn: 48 89 ca ffffffff9905b83b insn: 48 c1 ea 20 ffffffff9905b83f insn: 39 f2 ffffffff9905b841 insn: 89 d0 ffffffff9905b843 insn: 74 ea # PRED Only works when no special branch filters are specified. Occasionally the path does not reach up to the sample IP, as the LBRs may be frozen before executing a final jump. In this case we print a special message. The instruction dumper piggy backs on the existing infrastructure from the IP PT decoder. An earlier iteration of this patch relied on a disassembler, but this version only uses the existing instruction decoder. Committer note: Added hint about how to get suitable perf.data files for use with '-F brstackinsm': $ perf record usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.018 MB perf.data (8 samples) ] $ $ perf script -F brstackinsn Display of branch stack assembler requested, but non all-branch filter set Hint: run 'perf record -b ...' $ Signed-off-by: Andi Kleen Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Michael Ellerman Link: http://lkml.kernel.org/r/20170223234634.583-1-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 13 +- tools/perf/builtin-script.c | 264 ++++++++++++++++++++- tools/perf/util/Build | 1 + tools/perf/util/dump-insn.c | 14 ++ tools/perf/util/dump-insn.h | 22 ++ .../util/intel-pt-decoder/intel-pt-insn-decoder.c | 24 ++ 6 files changed, 327 insertions(+), 11 deletions(-) create mode 100644 tools/perf/util/dump-insn.c create mode 100644 tools/perf/util/dump-insn.h diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 62c9b0c77a3a..cb0eda3925e6 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -116,7 +116,7 @@ OPTIONS --fields:: Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, - srcline, period, iregs, brstack, brstacksym, flags, bpf-output, + srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn, callindent, insn, insnlen. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace @@ -189,15 +189,20 @@ OPTIONS i.e., -F "" is not allowed. The brstack output includes branch related information with raw addresses using the - /v/v/v/v/ syntax in the following order: + /v/v/v/v/cycles syntax in the following order: FROM: branch source instruction TO : branch target instruction M/P/-: M=branch target mispredicted or branch direction was mispredicted, P=target predicted or direction predicted, -=not supported X/- : X=branch inside a transactional region, -=not in transaction region or not supported A/- : A=TSX abort entry, -=not aborted region or not supported + cycles The brstacksym is identical to brstack, except that the FROM and TO addresses are printed in a symbolic form if possible. + When brstackinsn is specified the full assembler sequences of branch sequences for each sample + is printed. This is the full execution path leading to the sample. This is only supported when the + sample was recorded with perf record -b or -j any. + -k:: --vmlinux=:: vmlinux pathname @@ -302,6 +307,10 @@ include::itrace.txt[] stop time is not given (i.e, time string is 'x.y,') then analysis goes to end of file. +--max-blocks:: + Set the maximum number of program blocks to print with brstackasm for + each sample. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script-perl[1], diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 66d62c98dff9..c98e16689b57 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -28,6 +28,7 @@ #include #include "asm/bug.h" #include "util/mem-events.h" +#include "util/dump-insn.h" static char const *script_name; static char const *generate_script_lang; @@ -42,6 +43,7 @@ static bool nanosecs; static const char *cpu_list; static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); static struct perf_stat_config stat_config; +static int max_blocks; unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH; @@ -69,6 +71,7 @@ enum perf_output_field { PERF_OUTPUT_CALLINDENT = 1U << 20, PERF_OUTPUT_INSN = 1U << 21, PERF_OUTPUT_INSNLEN = 1U << 22, + PERF_OUTPUT_BRSTACKINSN = 1U << 23, }; struct output_option { @@ -98,6 +101,7 @@ struct output_option { {.str = "callindent", .field = PERF_OUTPUT_CALLINDENT}, {.str = "insn", .field = PERF_OUTPUT_INSN}, {.str = "insnlen", .field = PERF_OUTPUT_INSNLEN}, + {.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN}, }; /* default set to maintain compatibility with current format */ @@ -292,7 +296,13 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, "selected. Hence, no address to lookup the source line number.\n"); return -EINVAL; } - + if (PRINT_FIELD(BRSTACKINSN) && + !(perf_evlist__combined_branch_type(session->evlist) & + PERF_SAMPLE_BRANCH_ANY)) { + pr_err("Display of branch stack assembler requested, but non all-branch filter set\n" + "Hint: run 'perf record -b ...'\n"); + return -EINVAL; + } if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) && perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID", PERF_OUTPUT_TID|PERF_OUTPUT_PID)) @@ -546,6 +556,233 @@ static void print_sample_brstacksym(struct perf_sample *sample, } } +#define MAXBB 16384UL + +static int grab_bb(u8 *buffer, u64 start, u64 end, + struct machine *machine, struct thread *thread, + bool *is64bit, u8 *cpumode, bool last) +{ + long offset, len; + struct addr_location al; + bool kernel; + + if (!start || !end) + return 0; + + kernel = machine__kernel_ip(machine, start); + if (kernel) + *cpumode = PERF_RECORD_MISC_KERNEL; + else + *cpumode = PERF_RECORD_MISC_USER; + + /* + * Block overlaps between kernel and user. + * This can happen due to ring filtering + * On Intel CPUs the entry into the kernel is filtered, + * but the exit is not. Let the caller patch it up. + */ + if (kernel != machine__kernel_ip(machine, end)) { + printf("\tblock %" PRIx64 "-%" PRIx64 " transfers between kernel and user\n", + start, end); + return -ENXIO; + } + + memset(&al, 0, sizeof(al)); + if (end - start > MAXBB - MAXINSN) { + if (last) + printf("\tbrstack does not reach to final jump (%" PRIx64 "-%" PRIx64 ")\n", start, end); + else + printf("\tblock %" PRIx64 "-%" PRIx64 " (%" PRIu64 ") too long to dump\n", start, end, end - start); + return 0; + } + + thread__find_addr_map(thread, *cpumode, MAP__FUNCTION, start, &al); + if (!al.map || !al.map->dso) { + printf("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end); + return 0; + } + if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR) { + printf("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end); + return 0; + } + + /* Load maps to ensure dso->is_64_bit has been updated */ + map__load(al.map); + + offset = al.map->map_ip(al.map, start); + len = dso__data_read_offset(al.map->dso, machine, offset, (u8 *)buffer, + end - start + MAXINSN); + + *is64bit = al.map->dso->is_64_bit; + if (len <= 0) + printf("\tcannot fetch code for block at %" PRIx64 "-%" PRIx64 "\n", + start, end); + return len; +} + +static void print_jump(uint64_t ip, struct branch_entry *en, + struct perf_insn *x, u8 *inbuf, int len, + int insn) +{ + printf("\t%016" PRIx64 "\t%-30s\t#%s%s%s%s", + ip, + dump_insn(x, ip, inbuf, len, NULL), + en->flags.predicted ? " PRED" : "", + en->flags.mispred ? " MISPRED" : "", + en->flags.in_tx ? " INTX" : "", + en->flags.abort ? " ABORT" : ""); + if (en->flags.cycles) { + printf(" %d cycles", en->flags.cycles); + if (insn) + printf(" %.2f IPC", (float)insn / en->flags.cycles); + } + putchar('\n'); +} + +static void print_ip_sym(struct thread *thread, u8 cpumode, int cpu, + uint64_t addr, struct symbol **lastsym, + struct perf_event_attr *attr) +{ + struct addr_location al; + int off; + + memset(&al, 0, sizeof(al)); + + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al); + if (!al.map) + thread__find_addr_map(thread, cpumode, MAP__VARIABLE, + addr, &al); + if ((*lastsym) && al.addr >= (*lastsym)->start && al.addr < (*lastsym)->end) + return; + + al.cpu = cpu; + al.sym = NULL; + if (al.map) + al.sym = map__find_symbol(al.map, al.addr); + + if (!al.sym) + return; + + if (al.addr < al.sym->end) + off = al.addr - al.sym->start; + else + off = al.addr - al.map->start - al.sym->start; + printf("\t%s", al.sym->name); + if (off) + printf("%+d", off); + putchar(':'); + if (PRINT_FIELD(SRCLINE)) + map__fprintf_srcline(al.map, al.addr, "\t", stdout); + putchar('\n'); + *lastsym = al.sym; +} + +static void print_sample_brstackinsn(struct perf_sample *sample, + struct thread *thread, + struct perf_event_attr *attr, + struct machine *machine) +{ + struct branch_stack *br = sample->branch_stack; + u64 start, end; + int i, insn, len, nr, ilen; + struct perf_insn x; + u8 buffer[MAXBB]; + unsigned off; + struct symbol *lastsym = NULL; + + if (!(br && br->nr)) + return; + nr = br->nr; + if (max_blocks && nr > max_blocks + 1) + nr = max_blocks + 1; + + x.thread = thread; + x.cpu = sample->cpu; + + putchar('\n'); + + /* Handle first from jump, of which we don't know the entry. */ + len = grab_bb(buffer, br->entries[nr-1].from, + br->entries[nr-1].from, + machine, thread, &x.is64bit, &x.cpumode, false); + if (len > 0) { + print_ip_sym(thread, x.cpumode, x.cpu, + br->entries[nr - 1].from, &lastsym, attr); + print_jump(br->entries[nr - 1].from, &br->entries[nr - 1], + &x, buffer, len, 0); + } + + /* Print all blocks */ + for (i = nr - 2; i >= 0; i--) { + if (br->entries[i].from || br->entries[i].to) + pr_debug("%d: %" PRIx64 "-%" PRIx64 "\n", i, + br->entries[i].from, + br->entries[i].to); + start = br->entries[i + 1].to; + end = br->entries[i].from; + + len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false); + /* Patch up missing kernel transfers due to ring filters */ + if (len == -ENXIO && i > 0) { + end = br->entries[--i].from; + pr_debug("\tpatching up to %" PRIx64 "-%" PRIx64 "\n", start, end); + len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false); + } + if (len <= 0) + continue; + + insn = 0; + for (off = 0;; off += ilen) { + uint64_t ip = start + off; + + print_ip_sym(thread, x.cpumode, x.cpu, ip, &lastsym, attr); + if (ip == end) { + print_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn); + break; + } else { + printf("\t%016" PRIx64 "\t%s\n", ip, + dump_insn(&x, ip, buffer + off, len - off, &ilen)); + if (ilen == 0) + break; + insn++; + } + } + } + + /* + * Hit the branch? In this case we are already done, and the target + * has not been executed yet. + */ + if (br->entries[0].from == sample->ip) + return; + if (br->entries[0].flags.abort) + return; + + /* + * Print final block upto sample + */ + start = br->entries[0].to; + end = sample->ip; + len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, true); + print_ip_sym(thread, x.cpumode, x.cpu, start, &lastsym, attr); + if (len <= 0) { + /* Print at least last IP if basic block did not work */ + len = grab_bb(buffer, sample->ip, sample->ip, + machine, thread, &x.is64bit, &x.cpumode, false); + if (len <= 0) + return; + + printf("\t%016" PRIx64 "\t%s\n", sample->ip, + dump_insn(&x, sample->ip, buffer, len, NULL)); + return; + } + for (off = 0; off <= end - start; off += ilen) { + printf("\t%016" PRIx64 "\t%s\n", start + off, + dump_insn(&x, start + off, buffer + off, len - off, &ilen)); + if (ilen == 0) + break; + } +} static void print_sample_addr(struct perf_sample *sample, struct thread *thread, @@ -632,7 +869,9 @@ static void print_sample_callindent(struct perf_sample *sample, } static void print_insn(struct perf_sample *sample, - struct perf_event_attr *attr) + struct perf_event_attr *attr, + struct thread *thread, + struct machine *machine) { if (PRINT_FIELD(INSNLEN)) printf(" ilen: %d", sample->insn_len); @@ -643,12 +882,15 @@ static void print_insn(struct perf_sample *sample, for (i = 0; i < sample->insn_len; i++) printf(" %02x", (unsigned char)sample->insn[i]); } + if (PRINT_FIELD(BRSTACKINSN)) + print_sample_brstackinsn(sample, thread, attr, machine); } static void print_sample_bts(struct perf_sample *sample, struct perf_evsel *evsel, struct thread *thread, - struct addr_location *al) + struct addr_location *al, + struct machine *machine) { struct perf_event_attr *attr = &evsel->attr; bool print_srcline_last = false; @@ -689,7 +931,7 @@ static void print_sample_bts(struct perf_sample *sample, if (print_srcline_last) map__fprintf_srcline(al->map, al->addr, "\n ", stdout); - print_insn(sample, attr); + print_insn(sample, attr, thread, machine); printf("\n"); } @@ -872,7 +1114,8 @@ static size_t data_src__printf(u64 data_src) static void process_event(struct perf_script *script, struct perf_sample *sample, struct perf_evsel *evsel, - struct addr_location *al) + struct addr_location *al, + struct machine *machine) { struct thread *thread = al->thread; struct perf_event_attr *attr = &evsel->attr; @@ -899,7 +1142,7 @@ static void process_event(struct perf_script *script, print_sample_flags(sample->flags); if (is_bts_event(attr)) { - print_sample_bts(sample, evsel, thread, al); + print_sample_bts(sample, evsel, thread, al, machine); return; } @@ -937,7 +1180,7 @@ static void process_event(struct perf_script *script, if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) print_sample_bpf_output(sample); - print_insn(sample, attr); + print_insn(sample, attr, thread, machine); printf("\n"); } @@ -1047,7 +1290,7 @@ static int process_sample_event(struct perf_tool *tool, if (scripting_ops) scripting_ops->process_event(event, sample, evsel, &al); else - process_event(scr, sample, evsel, &al); + process_event(scr, sample, evsel, &al, machine); out_put: addr_location__put(&al); @@ -2191,7 +2434,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "Valid types: hw,sw,trace,raw. " "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," "addr,symoff,period,iregs,brstack,brstacksym,flags," - "bpf-output,callindent,insn,insnlen", parse_output_fields), + "bpf-output,callindent,insn,insnlen,brstackinsn", + parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", @@ -2222,6 +2466,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events, "Show namespace events (if recorded)"), OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), + OPT_INTEGER(0, "max-blocks", &max_blocks, + "Maximum number of code blocks to dump with brstackinsn"), OPT_BOOLEAN(0, "ns", &nanosecs, "Use 9 decimal places when displaying time"), OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 2ea5ee179a3b..fb4f42f1bb38 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -82,6 +82,7 @@ libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ libperf-$(CONFIG_AUXTRACE) += intel-pt.o libperf-$(CONFIG_AUXTRACE) += intel-bts.o libperf-y += parse-branch-options.o +libperf-y += dump-insn.o libperf-y += parse-regs-options.o libperf-y += term.o libperf-y += help-unknown-cmd.o diff --git a/tools/perf/util/dump-insn.c b/tools/perf/util/dump-insn.c new file mode 100644 index 000000000000..ffbdb19f05d0 --- /dev/null +++ b/tools/perf/util/dump-insn.c @@ -0,0 +1,14 @@ +#include +#include "dump-insn.h" + +/* Fallback code */ + +__weak +const char *dump_insn(struct perf_insn *x __maybe_unused, + u64 ip __maybe_unused, u8 *inbuf __maybe_unused, + int inlen __maybe_unused, int *lenp) +{ + if (lenp) + *lenp = 0; + return "?"; +} diff --git a/tools/perf/util/dump-insn.h b/tools/perf/util/dump-insn.h new file mode 100644 index 000000000000..90fb115981cf --- /dev/null +++ b/tools/perf/util/dump-insn.h @@ -0,0 +1,22 @@ +#ifndef __PERF_DUMP_INSN_H +#define __PERF_DUMP_INSN_H 1 + +#define MAXINSN 15 + +#include + +struct thread; + +struct perf_insn { + /* Initialized by callers: */ + struct thread *thread; + u8 cpumode; + bool is64bit; + int cpu; + /* Temporary */ + char out[256]; +}; + +const char *dump_insn(struct perf_insn *x, u64 ip, + u8 *inbuf, int inlen, int *lenp); +#endif diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index 55b6250350d7..a5f35b21172f 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -26,6 +26,7 @@ #include "insn.c" #include "intel-pt-insn-decoder.h" +#include "dump-insn.h" #if INTEL_PT_INSN_BUF_SZ < MAX_INSN_SIZE || INTEL_PT_INSN_BUF_SZ > MAX_INSN #error Instruction buffer size too small @@ -179,6 +180,29 @@ int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64, return 0; } +const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused, + u8 *inbuf, int inlen, int *lenp) +{ + struct insn insn; + int n, i; + int left; + + insn_init(&insn, inbuf, inlen, x->is64bit); + insn_get_length(&insn); + if (!insn_complete(&insn) || insn.length > inlen) + return ""; + if (lenp) + *lenp = insn.length; + left = sizeof(x->out); + n = snprintf(x->out, left, "insn: "); + left -= n; + for (i = 0; i < insn.length; i++) { + n += snprintf(x->out + n, left, "%02x ", inbuf[i]); + left -= n; + } + return x->out; +} + const char *branch_name[] = { [INTEL_PT_OP_OTHER] = "Other", [INTEL_PT_OP_CALL] = "Call", -- cgit v1.2.3 From 61f35d750683b21e9e3836e309195c79c1daed74 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 16 Mar 2017 12:42:02 -0300 Subject: uprobes: Default UPROBES_EVENTS to Y As it is already turned on by most distros, so just flip the default to Y. Suggested-by: Alexei Starovoitov Acked-by: David Ahern Acked-by: Ingo Molnar Acked-by: Masami Hiramatsu Acked-by: Namhyung Kim Acked-by: Srikar Dronamraju Acked-by: Wang Nan Cc: Alexander Shishkin Cc: Anton Blanchard Cc: David Miller Cc: Hemant Kumar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170316005817.GA6805@ast-mbp.thefacebook.com Signed-off-by: Arnaldo Carvalho de Melo --- kernel/trace/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index d4a06e714645..9619b5768e4b 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -455,7 +455,7 @@ config UPROBE_EVENTS select UPROBES select PROBE_EVENTS select TRACING - default n + default y help This allows the user to add tracing events on top of userspace dynamic events (similar to tracepoints) on the fly via the trace -- cgit v1.2.3 From 249eed53152167c64c6dc66fa269a1d8b415a7b4 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Fri, 17 Mar 2017 13:53:42 +0800 Subject: perf lock: Subcommands should include common options When I use -i option for report subcommand, it doesn't accept it. We need add common options using OPT_PARENT macro. perf lock report -i lock_perf.data Error: unknown switch `i' Usage: perf lock report [] -f, --force don't complain, do it -k, --key key for sorting ... Signed-off-by: Changbin Du Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170317055342.8284-1-changbin.du@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-lock.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index d750ccaa978f..4ce815bb360d 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -948,27 +948,30 @@ static int __cmd_record(int argc, const char **argv) int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused) { + const struct option lock_options[] = { + OPT_STRING('i', "input", &input_name, "file", "input file name"), + OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), + OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), + OPT_END() + }; + const struct option info_options[] = { OPT_BOOLEAN('t', "threads", &info_threads, "dump thread list in perf.data"), OPT_BOOLEAN('m', "map", &info_map, "map of lock instances (address:name table)"), OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), - OPT_END() - }; - const struct option lock_options[] = { - OPT_STRING('i', "input", &input_name, "file", "input file name"), - OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), - OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), - OPT_END() + OPT_PARENT(lock_options) }; + const struct option report_options[] = { OPT_STRING('k', "key", &sort_key, "acquired", "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"), OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), /* TODO: type */ - OPT_END() + OPT_PARENT(lock_options) }; + const char * const info_usage[] = { "perf lock info []", NULL -- cgit v1.2.3 From b40e36121e23031f1e8916a70110ffc841230670 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 17 Mar 2017 11:16:02 -0300 Subject: perf lock: Make 'f' part of the common 'lock_options' All options need the -f/--force option, so move it to the array referenced via OPT_PARENT. Cc: Adrian Hunter Cc: Changbin Du Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-unbeionpi58rioh4e9w8kp4n@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-lock.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 4ce815bb360d..e992e7206993 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -952,6 +952,7 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused) OPT_STRING('i', "input", &input_name, "file", "input file name"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), + OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), OPT_END() }; @@ -960,14 +961,12 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused) "dump thread list in perf.data"), OPT_BOOLEAN('m', "map", &info_map, "map of lock instances (address:name table)"), - OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), OPT_PARENT(lock_options) }; const struct option report_options[] = { OPT_STRING('k', "key", &sort_key, "acquired", "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"), - OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), /* TODO: type */ OPT_PARENT(lock_options) }; -- cgit v1.2.3 From f371594a514a4d3bc8fca6913ce9b5d9e325095d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 17 Mar 2017 11:23:21 -0300 Subject: perf timechart: Use OPT_PARENT for common options Move -T/--tasks-only and -P/--power-only options to a separate options array that then gets referenced via OPT_PARENT from the 'perf timechart' and 'perf timechart record' option arrays. Cc: Adrian Hunter Cc: Changbin Du Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-j80lol9wj1i6556ibh48iebe@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-timechart.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index e7eaa298d34a..fbd7c6c695b8 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -1933,6 +1933,11 @@ int cmd_timechart(int argc, const char **argv, .merge_dist = 1000, }; const char *output_name = "output.svg"; + const struct option timechart_common_options[] = { + OPT_BOOLEAN('P', "power-only", &tchart.power_only, "output power data only"), + OPT_BOOLEAN('T', "tasks-only", &tchart.tasks_only, "output processes data only"), + OPT_END() + }; const struct option timechart_options[] = { OPT_STRING('i', "input", &input_name, "file", "input file name"), OPT_STRING('o', "output", &output_name, "file", "output file name"), @@ -1940,9 +1945,6 @@ int cmd_timechart(int argc, const char **argv, OPT_CALLBACK(0, "highlight", NULL, "duration or task name", "highlight tasks. Pass duration in ns or process name.", parse_highlight), - OPT_BOOLEAN('P', "power-only", &tchart.power_only, "output power data only"), - OPT_BOOLEAN('T', "tasks-only", &tchart.tasks_only, - "output processes data only"), OPT_CALLBACK('p', "process", NULL, "process", "process selector. Pass a pid or process name.", parse_process), @@ -1962,22 +1964,18 @@ int cmd_timechart(int argc, const char **argv, "merge events that are merge-dist us apart", parse_time), OPT_BOOLEAN('f', "force", &tchart.force, "don't complain, do it"), - OPT_END() + OPT_PARENT(timechart_common_options), }; const char * const timechart_subcommands[] = { "record", NULL }; const char *timechart_usage[] = { "perf timechart [] {record}", NULL }; - const struct option timechart_record_options[] = { - OPT_BOOLEAN('P', "power-only", &tchart.power_only, "output power data only"), - OPT_BOOLEAN('T', "tasks-only", &tchart.tasks_only, - "output processes data only"), OPT_BOOLEAN('I', "io-only", &tchart.io_only, "record only IO data"), OPT_BOOLEAN('g', "callchain", &tchart.with_backtrace, "record callchain"), - OPT_END() + OPT_PARENT(timechart_common_options), }; const char * const timechart_record_usage[] = { "perf timechart record []", -- cgit v1.2.3 From b9835a90084bd3cc45d7ab80c37f282046bc13d3 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 16 Mar 2017 18:41:59 +0200 Subject: tools lib api fs: Introduce sysfs__read_bool Will be used in a upcoming patch warning about PERF_RECORD_AUX data gaps, reading the "module/kvm_intel/parameters/vmm_exclusive" sysfs entry. Signed-off-by: Alexander Shishkin Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Vince Weaver Link: http://lkml.kernel.org/r/8760j941ig.fsf@ashishki-desk.ger.corp.intel.com [ split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/fs/fs.c | 29 +++++++++++++++++++++++++++++ tools/lib/api/fs/fs.h | 1 + 2 files changed, 30 insertions(+) diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index 4b6bfc43cccf..809c7721cd24 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -439,6 +439,35 @@ int sysfs__read_str(const char *entry, char **buf, size_t *sizep) return filename__read_str(path, buf, sizep); } +int sysfs__read_bool(const char *entry, bool *value) +{ + char *buf; + size_t size; + int ret; + + ret = sysfs__read_str(entry, &buf, &size); + if (ret < 0) + return ret; + + switch (buf[0]) { + case '1': + case 'y': + case 'Y': + *value = true; + break; + case '0': + case 'n': + case 'N': + *value = false; + break; + default: + ret = -1; + } + + free(buf); + + return ret; +} int sysctl__read_int(const char *sysctl, int *value) { char path[PATH_MAX]; diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h index 6b332dc74498..956c21127d1e 100644 --- a/tools/lib/api/fs/fs.h +++ b/tools/lib/api/fs/fs.h @@ -37,4 +37,5 @@ int sysctl__read_int(const char *sysctl, int *value); int sysfs__read_int(const char *entry, int *value); int sysfs__read_ull(const char *entry, unsigned long long *value); int sysfs__read_str(const char *entry, char **buf, size_t *sizep); +int sysfs__read_bool(const char *entry, bool *value); #endif /* __API_FS__ */ -- cgit v1.2.3 From 38a33f07122f6e6194bf5402c0cd057d1cc50be8 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 16 Mar 2017 18:41:59 +0200 Subject: tools include: Sync {,tools/}include/uapi/linux/perf_event.h To get PERF_AUX_FLAG_PARTIAL, introduced in: ae0c2d995d64 ("perf/core: Add a flag for partial AUX records") and that will be used to warn the user about gaps in AUX records due to VMX being used in KVM guests. Silences the kernel/tools file copy detector: Warning: include/uapi/linux/perf_event.h differs from kernel Signed-off-by: Alexander Shishkin Cc: Adrian Hunter Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Vince Weaver Link: http://lkml.kernel.org/r/8760j941ig.fsf@ashishki-desk.ger.corp.intel.com [ Split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index bec0aad0e15c..d09a9cd021b1 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -915,6 +915,7 @@ enum perf_callchain_context { */ #define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ #define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ +#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ #define PERF_FLAG_FD_NO_GROUP (1UL << 0) #define PERF_FLAG_FD_OUTPUT (1UL << 1) -- cgit v1.2.3 From 05a1f47ed47a83736aca117aeee96e926cc0dfd0 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 16 Mar 2017 18:41:59 +0200 Subject: perf tools: Handle partial AUX records and print a warning This patch decodes the 'partial' flag in AUX records and prints a warning to the user, so that they don't have to guess why their PT traces contain gaps (or missing altogether): Warning: AUX data had gaps in it 8 times out of 8! Are you running a KVM guest in the background? Trying to be even more helpful, we will detect if the user's kvm driver sets up exclusive VMX root mode for the entire lifespan of the kvm process: Reloading kvm_intel module with vmm_exclusive=0 will reduce the gaps to only guest's timeslices. Note however, that you'll still have gaps in cpu-wide traces even with vmm_exclusive=0, but the number of gaps will be below 100% (as opposed to the above example). Currently this is the only reason for partial records. Signed-off-by: Alexander Shishkin Cc: Adrian Hunter Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Vince Weaver Link: http://lkml.kernel.org/r/8760j941ig.fsf@ashishki-desk.ger.corp.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 5 +++-- tools/perf/util/event.h | 1 + tools/perf/util/session.c | 27 ++++++++++++++++++++++++--- 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 33fc2e9c0b0c..76b9c6bc8369 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1288,11 +1288,12 @@ int perf_event__process_exit(struct perf_tool *tool __maybe_unused, size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp) { - return fprintf(fp, " offset: %#"PRIx64" size: %#"PRIx64" flags: %#"PRIx64" [%s%s]\n", + return fprintf(fp, " offset: %#"PRIx64" size: %#"PRIx64" flags: %#"PRIx64" [%s%s%s]\n", event->aux.aux_offset, event->aux.aux_size, event->aux.flags, event->aux.flags & PERF_AUX_FLAG_TRUNCATED ? "T" : "", - event->aux.flags & PERF_AUX_FLAG_OVERWRITE ? "O" : ""); + event->aux.flags & PERF_AUX_FLAG_OVERWRITE ? "O" : "", + event->aux.flags & PERF_AUX_FLAG_PARTIAL ? "P" : ""); } size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp) diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index e1d8166ebbd5..eb7a7b200737 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -276,6 +276,7 @@ struct events_stats { u64 total_lost; u64 total_lost_samples; u64 total_aux_lost; + u64 total_aux_partial; u64 total_invalid_chains; u32 nr_events[PERF_RECORD_HEADER_MAX]; u32 nr_non_filtered_samples; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index ae42e742d461..24259bc2c598 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -1260,9 +1261,12 @@ static int machines__deliver_event(struct machines *machines, case PERF_RECORD_UNTHROTTLE: return tool->unthrottle(tool, event, sample, machine); case PERF_RECORD_AUX: - if (tool->aux == perf_event__process_aux && - (event->aux.flags & PERF_AUX_FLAG_TRUNCATED)) - evlist->stats.total_aux_lost += 1; + if (tool->aux == perf_event__process_aux) { + if (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) + evlist->stats.total_aux_lost += 1; + if (event->aux.flags & PERF_AUX_FLAG_PARTIAL) + evlist->stats.total_aux_partial += 1; + } return tool->aux(tool, event, sample, machine); case PERF_RECORD_ITRACE_START: return tool->itrace_start(tool, event, sample, machine); @@ -1555,6 +1559,23 @@ static void perf_session__warn_about_errors(const struct perf_session *session) stats->nr_events[PERF_RECORD_AUX]); } + if (session->tool->aux == perf_event__process_aux && + stats->total_aux_partial != 0) { + bool vmm_exclusive = false; + + (void)sysfs__read_bool("module/kvm_intel/parameters/vmm_exclusive", + &vmm_exclusive); + + ui__warning("AUX data had gaps in it %" PRIu64 " times out of %u!\n\n" + "Are you running a KVM guest in the background?%s\n\n", + stats->total_aux_partial, + stats->nr_events[PERF_RECORD_AUX], + vmm_exclusive ? + "\nReloading kvm_intel module with vmm_exclusive=0\n" + "will reduce the gaps to only guest's timeslices." : + ""); + } + if (stats->nr_unknown_events != 0) { ui__warning("Found %u unknown events!\n\n" "Is this an older tool processing a perf.data " -- cgit v1.2.3 From f2ba3ee08eb9af666c6f948ccbb866ae6d3c9107 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 15 Mar 2017 17:40:19 -0300 Subject: tools headers: Sync {tools/,}arch/x86/include/asm/cpufeatures.h We use those in tools/arch/x86/lib/mem{cpy,set}_64.S, in turn used in the 'perf bench mem' benchmarks. The changes in the following csets are not relevant for this usecase, but lets sync it to silence the diff detector in the tools build system: 6fb895692a03 ("x86/cpufeature: Add 5-level paging detection") Link: http://lkml.kernel.org/n/tip-nsqxpyzcv4ywesikhhhrgfgc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 4e7772387c6e..b04bb6dfed7f 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -289,7 +289,8 @@ #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ -#define X86_FEATURE_RDPID (16*32+ 22) /* RDPID instruction */ +#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ +#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ -- cgit v1.2.3 From eedb3c44313ae0785e1dc62c6910557953887388 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 15 Mar 2017 17:40:19 -0300 Subject: tools headers: Sync {tools/,}arch/arm{64}/include/uapi/asm/kvm.h The changes in the following csets are not relevant for 'perf kvm' usage but lets sync it to silence the diff detector in the tools build system: e96a006cb066 ("KVM: arm/arm64: vgic: Implement KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO ioctl") d017d7b0bd7a ("KVM: arm/arm64: vgic: Implement VGICv3 CPU interface access") 94574c9488e2 ("KVM: arm/arm64: vgic: Add distributor and redistributor access") Cc: Hemant Kumar Cc: Marc Zyngier Cc: Vijaya Kumar K Cc: Yunlong Song Link: http://lkml.kernel.org/n/tip-nsqxpyzcv4ywesikhhhrgfgc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/arm/include/uapi/asm/kvm.h | 13 +++++++++++++ tools/arch/arm64/include/uapi/asm/kvm.h | 13 +++++++++++++ 2 files changed, 26 insertions(+) diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h index af05f8e0903e..6ebd3e6a1fd1 100644 --- a/tools/arch/arm/include/uapi/asm/kvm.h +++ b/tools/arch/arm/include/uapi/asm/kvm.h @@ -181,10 +181,23 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2 #define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32 #define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) +#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32 +#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \ + (0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT) #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) +#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff) #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 #define KVM_DEV_ARM_VGIC_GRP_CTRL 4 +#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5 +#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 +#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ + (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff +#define VGIC_LEVEL_INFO_LINE_LEVEL 0 + #define KVM_DEV_ARM_VGIC_CTRL_INIT 0 /* KVM_IRQ_LINE irq field index values */ diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 3051f86a9b5f..c2860358ae3e 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -201,10 +201,23 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2 #define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32 #define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) +#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32 +#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \ + (0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT) #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) +#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff) #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 #define KVM_DEV_ARM_VGIC_GRP_CTRL 4 +#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5 +#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 +#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ + (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff +#define VGIC_LEVEL_INFO_LINE_LEVEL 0 + #define KVM_DEV_ARM_VGIC_CTRL_INIT 0 /* Device Control API on vcpu fd */ -- cgit v1.2.3 From efc9c05681c589ed942bf8c55a774853938ca8d4 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Mon, 20 Mar 2017 18:07:18 +0530 Subject: perf stat: Correct --no-aggr description Description of --no-aggr in perf-stat man page is outdated. --no-aggr can also be used while profiling specific set of cpus. For ex, $ perf stat -e cycles,instructions -C 1-2 --no-aggr -- sleep 1 Performance counter stats for 'CPU(s) 1-2': CPU1 5,94,92,795 cycles CPU2 2,69,72,403 cycles CPU1 2,02,08,327 instructions # 0.34 insn per cycle CPU2 73,17,123 instructions # 0.12 insn per cycle 1.000989132 seconds time elapsed Signed-off-by: Ravi Bangoria Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1490013438-5713-1-git-send-email-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index aecf2a87e7d6..978548138624 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -94,8 +94,7 @@ to activate system-wide monitoring. Default is to count on all CPUs. -A:: --no-aggr:: -Do not aggregate counts across all monitored CPUs in system-wide mode (-a). -This option is only valid in system-wide mode. +Do not aggregate counts across all monitored CPUs. -n:: --null:: -- cgit v1.2.3 From f0a30dca5f84fe8048271799b56677ac2279de66 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Wed, 8 Mar 2017 12:29:07 +0530 Subject: perf probe: Fix concat_probe_trace_events '*ntevs' contains number of elements present in 'tevs' array. If there are no elements in array, 'tevs2' can be directly assigned to 'tevs' without allocating more space. So the condition should be '*ntevs == 0' not 'ntevs == 0'. Signed-off-by: Ravi Bangoria Acked-by: Masami Hiramatsu Cc: Alexander Shishkin Cc: Peter Zijlstra Fixes: 42bba263eb58 ("perf probe: Allow wildcard for cached events") Link: http://lkml.kernel.org/r/20170308065908.4128-1-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index b19d17801beb..6740d6812691 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -3048,7 +3048,7 @@ concat_probe_trace_events(struct probe_trace_event **tevs, int *ntevs, struct probe_trace_event *new_tevs; int ret = 0; - if (ntevs == 0) { + if (*ntevs == 0) { *tevs = *tevs2; *ntevs = ntevs2; *tevs2 = NULL; -- cgit v1.2.3 From affa6c169bae8dc9cb1a2d070c7cd2fe1939c5b8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 15 Mar 2017 17:40:19 -0300 Subject: tools headers: Sync {tools/,}arch/powerpc/include/uapi/asm/kvm.h The changes in the following csets are not relevant for what is used in tools/perf/arch/powerpc/util/kvm-stat.c, but lets sync it to silence the diff detector in the tools build system: c92701322711 ("KVM: PPC: Book3S HV: Add userspace interfaces for POWER9 MMU") 17d48610ae0f ("KVM: PPC: Book 3S: XICS: Implement ICS P/Q states") Cc: Alexander Yarygin Cc: David Ahern Cc: Li Zhong Cc: Michael Ellerman Cc: Naveen N. Rao Cc: Paul Mackerras Cc: Scott Wood Cc: Srikar Dronamraju Link: http://lkml.kernel.org/n/tip-nsqxpyzcv4ywesikhhhrgfgc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/powerpc/include/uapi/asm/kvm.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index 3603b6f51b11..4edbe4bb0e8b 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -413,6 +413,26 @@ struct kvm_get_htab_header { __u16 n_invalid; }; +/* For KVM_PPC_CONFIGURE_V3_MMU */ +struct kvm_ppc_mmuv3_cfg { + __u64 flags; + __u64 process_table; /* second doubleword of partition table entry */ +}; + +/* Flag values for KVM_PPC_CONFIGURE_V3_MMU */ +#define KVM_PPC_MMUV3_RADIX 1 /* 1 = radix mode, 0 = HPT */ +#define KVM_PPC_MMUV3_GTSE 2 /* global translation shootdown enb. */ + +/* For KVM_PPC_GET_RMMU_INFO */ +struct kvm_ppc_rmmu_info { + struct kvm_ppc_radix_geom { + __u8 page_shift; + __u8 level_bits[4]; + __u8 pad[3]; + } geometries[8]; + __u32 ap_encodings[8]; +}; + /* Per-vcpu XICS interrupt controller state */ #define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) @@ -613,5 +633,7 @@ struct kvm_get_htab_header { #define KVM_XICS_LEVEL_SENSITIVE (1ULL << 40) #define KVM_XICS_MASKED (1ULL << 41) #define KVM_XICS_PENDING (1ULL << 42) +#define KVM_XICS_PRESENTED (1ULL << 43) +#define KVM_XICS_QUEUED (1ULL << 44) #endif /* __LINUX_KVM_POWERPC_H */ -- cgit v1.2.3 From 2e1f8f7895731a8592d483a7364a23855843af17 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 7 Feb 2017 11:15:47 +0530 Subject: perf probe: Change MAX_CMDLEN There are many SDT markers in powerpc whose uprobe definition goes beyond current MAX_CMDLEN, especially when target filename is long and sdt marker has long list of arguments. For example, definition of sdt marker method__compile__end: 8@17 8@9 8@10 -4@8 8@7 -4@6 8@5 -4@4 1@37(28) from file /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.91-2.b14.fc22.ppc64/jre/lib/ppc64/server/libjvm.so is p:sdt_hotspot/method__compile__end /usr/lib/jvm/java-1.8.0-openjdk-\ 1.8.0.91-2.b14.fc22.ppc64/jre/lib/ppc64/server/libjvm.so:0x4c4e00\ arg1=%gpr17:u64 arg2=%gpr9:u64 arg3=%gpr10:u64 arg4=%gpr8:s32\ arg5=%gpr7:u64 arg6=%gpr6:s32 arg7=%gpr5:u64 arg8=%gpr4:s32\ arg9=+37(%gpr28):u8 'perf probe' fails with segfault for such markers. As the uprobe_events file accepts definitions up to 4094 characters(4096 - 2 (\n\0)), increase value of MAX_CMDLEN match that. Signed-off-by: Ravi Bangoria Acked-by: Masami Hiramatsu Cc: Alexander Shishkin Cc: Alexis Berlemont Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Naveen N. Rao Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170207054547.3690-1-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 1 - tools/perf/util/probe-file.c | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 6740d6812691..e4b889444447 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -47,7 +47,6 @@ #include "probe-file.h" #include "session.h" -#define MAX_CMDLEN 256 #define PERFPROBE_GROUP "probe" bool probe_event_dry_run; /* Dry run flag */ diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 1542cd0d6799..c3c287125be5 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -28,7 +28,8 @@ #include "probe-file.h" #include "session.h" -#define MAX_CMDLEN 256 +/* 4096 - 2 ('\n' + '\0') */ +#define MAX_CMDLEN 4094 static void print_open_warning(int err, bool uprobe) { -- cgit v1.2.3 From 70946723eeb859466f026274b29c6196e39149c4 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Fri, 17 Mar 2017 16:16:32 +0800 Subject: perf probe: Return errno when not hitting any event On old perf, when using 'perf probe -d' to delete an inexistent event, it returns errno, eg, -bash-4.3# perf probe -d xxx || echo $? Info: Event "*:xxx" does not exist. Error: Failed to delete events. 255 But now perf_del_probe_events() will always set ret = 0, different from previous del_perf_probe_events(). After this, it returns errno again, eg, -bash-4.3# ./perf probe -d xxx || echo $? "xxx" does not hit any event. Error: Failed to delete events. 254 And it is more appropriate to return -ENOENT instead of -EPERM. Signed-off-by: Kefeng Wang Acked-by: Masami Hiramatsu Cc: Hanjun Guo Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Wang Nan Fixes: dddc7ee32fa1 ("perf probe: Fix an error when deleting probes successfully") Link: http://lkml.kernel.org/r/1489738592-61011-1-git-send-email-wangkefeng.wang@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-probe.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 1fcebc31a508..51cdc230f6ca 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -442,9 +442,9 @@ static int perf_del_probe_events(struct strfilter *filter) } if (ret == -ENOENT && ret2 == -ENOENT) - pr_debug("\"%s\" does not hit any event.\n", str); - /* Note that this is silently ignored */ - ret = 0; + pr_warning("\"%s\" does not hit any event.\n", str); + else + ret = 0; error: if (kfd >= 0) -- cgit v1.2.3 From be88184b1c7054719296387c6063748fb48fa645 Mon Sep 17 00:00:00 2001 From: Alexis Berlemont Date: Wed, 14 Dec 2016 01:07:31 +0100 Subject: perf sdt: Add scanning of sdt probes arguments During a "perf buildid-cache --add" command, the section ".note.stapsdt" of the "added" binary is scanned in order to list the available SDT markers available in a binary. The parts containing the probes arguments were left unscanned. The whole section is now parsed; the probe arguments are extracted for later use. Signed-off-by: Alexis Berlemont Acked-by: Masami Hiramatsu Cc: Alexander Shishkin Cc: Hemant Kumar Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lkml.kernel.org/r/20161214000732.1710-2-alexis.berlemont@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 25 +++++++++++++++++++++++-- tools/perf/util/symbol.h | 1 + 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 4e59ddeb4eda..0e660dba58ad 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1828,7 +1828,7 @@ void kcore_extract__delete(struct kcore_extract *kce) static int populate_sdt_note(Elf **elf, const char *data, size_t len, struct list_head *sdt_notes) { - const char *provider, *name; + const char *provider, *name, *args; struct sdt_note *tmp = NULL; GElf_Ehdr ehdr; GElf_Addr base_off = 0; @@ -1887,6 +1887,25 @@ static int populate_sdt_note(Elf **elf, const char *data, size_t len, goto out_free_prov; } + args = memchr(name, '\0', data + len - name); + + /* + * There is no argument if: + * - We reached the end of the note; + * - There is not enough room to hold a potential string; + * - The argument string is empty or just contains ':'. + */ + if (args == NULL || data + len - args < 2 || + args[1] == ':' || args[1] == '\0') + tmp->args = NULL; + else { + tmp->args = strdup(++args); + if (!tmp->args) { + ret = -ENOMEM; + goto out_free_name; + } + } + if (gelf_getclass(*elf) == ELFCLASS32) { memcpy(&tmp->addr, &buf, 3 * sizeof(Elf32_Addr)); tmp->bit32 = true; @@ -1898,7 +1917,7 @@ static int populate_sdt_note(Elf **elf, const char *data, size_t len, if (!gelf_getehdr(*elf, &ehdr)) { pr_debug("%s : cannot get elf header.\n", __func__); ret = -EBADF; - goto out_free_name; + goto out_free_args; } /* Adjust the prelink effect : @@ -1923,6 +1942,8 @@ static int populate_sdt_note(Elf **elf, const char *data, size_t len, list_add_tail(&tmp->note_list, sdt_notes); return 0; +out_free_args: + free(tmp->args); out_free_name: free(tmp->name); out_free_prov: diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 6c358b7ed336..9222c7e702f3 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -351,6 +351,7 @@ int arch__choose_best_symbol(struct symbol *syma, struct symbol *symb); struct sdt_note { char *name; /* name of the note*/ char *provider; /* provider name */ + char *args; bool bit32; /* whether the location is 32 bits? */ union { /* location, base and semaphore addrs */ Elf64_Addr a64[3]; -- cgit v1.2.3 From 3b1f8311f6963cd11a7d1efbcd2fd900d472ba5c Mon Sep 17 00:00:00 2001 From: Alexis Berlemont Date: Wed, 14 Dec 2016 01:07:32 +0100 Subject: perf probe: Add sdt probes arguments into the uprobe cmd string An sdt probe can be associated with arguments but they were not passed to the user probe tracing interface (uprobe_events); this patch adapts the sdt argument descriptors according to the uprobe input format. As the uprobe parser does not support scaled address mode, perf will skip arguments which cannot be adapted to the uprobe format. Here are the results: $ perf buildid-cache -v --add test_sdt $ perf probe -x test_sdt sdt_libfoo:table_frob $ perf probe -x test_sdt sdt_libfoo:table_diddle $ perf record -e sdt_libfoo:table_frob -e sdt_libfoo:table_diddle test_sdt $ perf script test_sdt ... 666.255678: sdt_libfoo:table_frob: (4004d7) arg0=0 arg1=0 test_sdt ... 666.255683: sdt_libfoo:table_diddle: (40051a) arg0=0 arg1=0 test_sdt ... 666.255686: sdt_libfoo:table_frob: (4004d7) arg0=1 arg1=2 test_sdt ... 666.255689: sdt_libfoo:table_diddle: (40051a) arg0=3 arg1=4 test_sdt ... 666.255692: sdt_libfoo:table_frob: (4004d7) arg0=2 arg1=4 test_sdt ... 666.255694: sdt_libfoo:table_diddle: (40051a) arg0=6 arg1=8 Signed-off-by: Alexis Berlemont Acked-by: Masami Hiramatsu Cc: Alexander Shishkin Cc: Hemant Kumar Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lkml.kernel.org/r/20161214000732.1710-3-alexis.berlemont@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/perf_regs.c | 83 +++++++++++++++++ tools/perf/util/perf_regs.c | 6 ++ tools/perf/util/perf_regs.h | 6 ++ tools/perf/util/probe-file.c | 170 ++++++++++++++++++++++++++++++++++- 4 files changed, 261 insertions(+), 4 deletions(-) diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c index c5db14f36cc7..09a7f556dc7c 100644 --- a/tools/perf/arch/x86/util/perf_regs.c +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -1,4 +1,7 @@ +#include + #include "../../perf.h" +#include "../../util/util.h" #include "../../util/perf_regs.h" const struct sample_reg sample_reg_masks[] = { @@ -26,3 +29,83 @@ const struct sample_reg sample_reg_masks[] = { #endif SMPL_REG_END }; + +struct sdt_name_reg { + const char *sdt_name; + const char *uprobe_name; +}; +#define SDT_NAME_REG(n, m) {.sdt_name = "%" #n, .uprobe_name = "%" #m} +#define SDT_NAME_REG_END {.sdt_name = NULL, .uprobe_name = NULL} + +static const struct sdt_name_reg sdt_reg_renamings[] = { + SDT_NAME_REG(eax, ax), + SDT_NAME_REG(rax, ax), + SDT_NAME_REG(ebx, bx), + SDT_NAME_REG(rbx, bx), + SDT_NAME_REG(ecx, cx), + SDT_NAME_REG(rcx, cx), + SDT_NAME_REG(edx, dx), + SDT_NAME_REG(rdx, dx), + SDT_NAME_REG(esi, si), + SDT_NAME_REG(rsi, si), + SDT_NAME_REG(edi, di), + SDT_NAME_REG(rdi, di), + SDT_NAME_REG(ebp, bp), + SDT_NAME_REG(rbp, bp), + SDT_NAME_REG_END, +}; + +int sdt_rename_register(char **pdesc, char *old_name) +{ + const struct sdt_name_reg *rnames = sdt_reg_renamings; + char *new_desc, *old_desc = *pdesc; + size_t prefix_len, sdt_len, uprobe_len, old_desc_len, offset; + int ret = -1; + + while (ret != 0 && rnames->sdt_name != NULL) { + sdt_len = strlen(rnames->sdt_name); + ret = strncmp(old_name, rnames->sdt_name, sdt_len); + rnames += !!ret; + } + + if (rnames->sdt_name == NULL) + return 0; + + sdt_len = strlen(rnames->sdt_name); + uprobe_len = strlen(rnames->uprobe_name); + old_desc_len = strlen(old_desc) + 1; + + new_desc = zalloc(old_desc_len + uprobe_len - sdt_len); + if (new_desc == NULL) + return -1; + + /* Copy the chars before the register name (at least '%') */ + prefix_len = old_name - old_desc; + memcpy(new_desc, old_desc, prefix_len); + + /* Copy the new register name */ + memcpy(new_desc + prefix_len, rnames->uprobe_name, uprobe_len); + + /* Copy the chars after the register name (if need be) */ + offset = prefix_len + sdt_len; + if (offset < old_desc_len) { + /* + * The orginal register name can be suffixed by 'b', + * 'w' or 'd' to indicate its size; so, we need to + * skip this char if we met one. + */ + char sfx = old_desc[offset]; + + if (sfx == 'b' || sfx == 'w' || sfx == 'd') + offset++; + } + + if (offset < old_desc_len) + memcpy(new_desc + prefix_len + uprobe_len, + old_desc + offset, old_desc_len - offset); + + free(old_desc); + *pdesc = new_desc; + + return 0; +} diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index c4023f22f287..a37e5934aa2a 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -6,6 +6,12 @@ const struct sample_reg __weak sample_reg_masks[] = { SMPL_REG_END }; +int __weak sdt_rename_register(char **pdesc __maybe_unused, + char *old_name __maybe_unused) +{ + return 0; +} + #ifdef HAVE_PERF_REGS_SUPPORT int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) { diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index 679d6e493962..7544a157e159 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -15,6 +15,12 @@ struct sample_reg { extern const struct sample_reg sample_reg_masks[]; +/* + * The table sdt_reg_renamings is used for adjusting gcc/gas-generated + * registers before filling the uprobe tracer interface. + */ +int sdt_rename_register(char **pdesc, char *old_name); + #ifdef HAVE_PERF_REGS_SUPPORT #include diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index c3c287125be5..d741634cbfc0 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -27,6 +27,7 @@ #include "probe-event.h" #include "probe-file.h" #include "session.h" +#include "perf_regs.h" /* 4096 - 2 ('\n' + '\0') */ #define MAX_CMDLEN 4094 @@ -688,6 +689,166 @@ static unsigned long long sdt_note__get_addr(struct sdt_note *note) : (unsigned long long)note->addr.a64[0]; } +static const char * const type_to_suffix[] = { + ":s64", "", "", "", ":s32", "", ":s16", ":s8", + "", ":u8", ":u16", "", ":u32", "", "", "", ":u64" +}; + +static int synthesize_sdt_probe_arg(struct strbuf *buf, int i, const char *arg) +{ + char *tmp, *desc = strdup(arg); + const char *prefix = "", *suffix = ""; + int ret = -1; + + if (desc == NULL) { + pr_debug4("Allocation error\n"); + return ret; + } + + tmp = strchr(desc, '@'); + if (tmp) { + long type_idx; + /* + * Isolate the string number and convert it into a + * binary value; this will be an index to get suffix + * of the uprobe name (defining the type) + */ + tmp[0] = '\0'; + type_idx = strtol(desc, NULL, 10); + /* Check that the conversion went OK */ + if (type_idx == LONG_MIN || type_idx == LONG_MAX) { + pr_debug4("Failed to parse sdt type\n"); + goto error; + } + /* Check that the converted value is OK */ + if (type_idx < -8 || type_idx > 8) { + pr_debug4("Failed to get a valid sdt type\n"); + goto error; + } + suffix = type_to_suffix[type_idx + 8]; + /* Get rid of the sdt prefix which is now useless */ + tmp++; + memmove(desc, tmp, strlen(tmp) + 1); + } + + /* + * The uprobe tracer format does not support all the + * addressing modes (notably: in x86 the scaled mode); so, we + * detect ',' characters, if there is just one, there is no + * use converting the sdt arg into a uprobe one. + */ + if (strchr(desc, ',')) { + pr_debug4("Skipping unsupported SDT argument; %s\n", desc); + goto out; + } + + /* + * If the argument addressing mode is indirect, we must check + * a few things... + */ + tmp = strchr(desc, '('); + if (tmp) { + int j; + + /* + * ...if the addressing mode is indirect with a + * positive offset (ex.: "1608(%ax)"), we need to add + * a '+' prefix so as to be compliant with uprobe + * format. + */ + if (desc[0] != '+' && desc[0] != '-') + prefix = "+"; + + /* + * ...or if the addressing mode is indirect with a symbol + * as offset, the argument will not be supported by + * the uprobe tracer format; so, let's skip this one. + */ + for (j = 0; j < tmp - desc; j++) { + if (desc[j] != '+' && desc[j] != '-' && + !isdigit(desc[j])) { + pr_debug4("Skipping unsupported SDT argument; " + "%s\n", desc); + goto out; + } + } + } + + /* + * The uprobe tracer format does not support constants; if we + * find one in the current argument, let's skip the argument. + */ + if (strchr(desc, '$')) { + pr_debug4("Skipping unsupported SDT argument; %s\n", desc); + goto out; + } + + /* + * The uprobe parser does not support all gas register names; + * so, we have to replace them (ex. for x86_64: %rax -> %ax); + * the loop below looks for the register names (starting with + * a '%' and tries to perform the needed renamings. + */ + tmp = strchr(desc, '%'); + while (tmp) { + size_t offset = tmp - desc; + + ret = sdt_rename_register(&desc, desc + offset); + if (ret < 0) + goto error; + + /* + * The desc pointer might have changed; so, let's not + * try to reuse tmp for next lookup + */ + tmp = strchr(desc + offset + 1, '%'); + } + + if (strbuf_addf(buf, " arg%d=%s%s%s", i + 1, prefix, desc, suffix) < 0) + goto error; + +out: + ret = 0; +error: + free(desc); + return ret; +} + +static char *synthesize_sdt_probe_command(struct sdt_note *note, + const char *pathname, + const char *sdtgrp) +{ + struct strbuf buf; + char *ret = NULL, **args; + int i, args_count; + + if (strbuf_init(&buf, 32) < 0) + return NULL; + + if (strbuf_addf(&buf, "p:%s/%s %s:0x%llx", + sdtgrp, note->name, pathname, + sdt_note__get_addr(note)) < 0) + goto error; + + if (!note->args) + goto out; + + if (note->args) { + args = argv_split(note->args, &args_count); + + for (i = 0; i < args_count; ++i) { + if (synthesize_sdt_probe_arg(&buf, i, args[i]) < 0) + goto error; + } + } + +out: + ret = strbuf_detach(&buf, NULL); +error: + strbuf_release(&buf); + return ret; +} + int probe_cache__scan_sdt(struct probe_cache *pcache, const char *pathname) { struct probe_cache_entry *entry = NULL; @@ -724,11 +885,12 @@ int probe_cache__scan_sdt(struct probe_cache *pcache, const char *pathname) entry->pev.group = strdup(sdtgrp); list_add_tail(&entry->node, &pcache->entries); } - ret = asprintf(&buf, "p:%s/%s %s:0x%llx", - sdtgrp, note->name, pathname, - sdt_note__get_addr(note)); - if (ret < 0) + buf = synthesize_sdt_probe_command(note, pathname, sdtgrp); + if (!buf) { + ret = -ENOMEM; break; + } + strlist__add(entry->tevlist, buf); free(buf); entry = NULL; -- cgit v1.2.3 From 8544d24c3204f94c0ba9788d3113b7a83d5edc0d Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 2 Feb 2017 16:41:40 +0530 Subject: perf sdt x86: Add renaming logic for rNN and other registers 'perf probe' is failing for sdt markers whose arguments has rNN (with postfix b/w/d), %rsp, %esp, %sil etc. registers. Add renaming logic for these registers. Signed-off-by: Ravi Bangoria Acked-by: Masami Hiramatsu Cc: Alexander Shishkin Cc: Alexis Berlemont Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Naveen N. Rao Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170202111143.14319-3-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/perf_regs.c | 44 ++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c index 09a7f556dc7c..d8a8dcf761f7 100644 --- a/tools/perf/arch/x86/util/perf_regs.c +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -48,10 +48,42 @@ static const struct sdt_name_reg sdt_reg_renamings[] = { SDT_NAME_REG(rdx, dx), SDT_NAME_REG(esi, si), SDT_NAME_REG(rsi, si), + SDT_NAME_REG(sil, si), SDT_NAME_REG(edi, di), SDT_NAME_REG(rdi, di), + SDT_NAME_REG(dil, di), SDT_NAME_REG(ebp, bp), SDT_NAME_REG(rbp, bp), + SDT_NAME_REG(bpl, bp), + SDT_NAME_REG(rsp, sp), + SDT_NAME_REG(esp, sp), + SDT_NAME_REG(spl, sp), + + /* rNN registers */ + SDT_NAME_REG(r8b, r8), + SDT_NAME_REG(r8w, r8), + SDT_NAME_REG(r8d, r8), + SDT_NAME_REG(r9b, r9), + SDT_NAME_REG(r9w, r9), + SDT_NAME_REG(r9d, r9), + SDT_NAME_REG(r10b, r10), + SDT_NAME_REG(r10w, r10), + SDT_NAME_REG(r10d, r10), + SDT_NAME_REG(r11b, r11), + SDT_NAME_REG(r11w, r11), + SDT_NAME_REG(r11d, r11), + SDT_NAME_REG(r12b, r12), + SDT_NAME_REG(r12w, r12), + SDT_NAME_REG(r12d, r12), + SDT_NAME_REG(r13b, r13), + SDT_NAME_REG(r13w, r13), + SDT_NAME_REG(r13d, r13), + SDT_NAME_REG(r14b, r14), + SDT_NAME_REG(r14w, r14), + SDT_NAME_REG(r14d, r14), + SDT_NAME_REG(r15b, r15), + SDT_NAME_REG(r15w, r15), + SDT_NAME_REG(r15d, r15), SDT_NAME_REG_END, }; @@ -88,18 +120,6 @@ int sdt_rename_register(char **pdesc, char *old_name) /* Copy the chars after the register name (if need be) */ offset = prefix_len + sdt_len; - if (offset < old_desc_len) { - /* - * The orginal register name can be suffixed by 'b', - * 'w' or 'd' to indicate its size; so, we need to - * skip this char if we met one. - */ - char sfx = old_desc[offset]; - - if (sfx == 'b' || sfx == 'w' || sfx == 'd') - offset++; - } - if (offset < old_desc_len) memcpy(new_desc + prefix_len + uprobe_len, old_desc + offset, old_desc_len - offset); -- cgit v1.2.3 From e7cb9de211ebb2924d87fdeb77e50d74c2e673d1 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Mon, 20 Mar 2017 11:56:57 +0900 Subject: perf annotate: More exactly grep -v of the objdump command The 'grep -v "filename"' applied to the objdump command output cause a side effect eliminating filename:linenr of output of 'objdump -l' if the object file name and source file name are the same, fix it. E.g. the output of the following objdump command in symbol__disassemble(): $ objdump -l -d -S -C /home/taeung/hello --start-address=... /home/taeung/hello: file format elf64-x86-64 Disassembly of section .text: 0000000000400526
: main(): /home/taeung/hello.c:4 void main() { 400526: 55 push %rbp 400527: 48 89 e5 mov %rsp,%rbp /home/taeung/hello.c:5 ... But it uses grep -v "filename" e.g. "/home/taeung/hello" in the objdump command to remove the first line containing file name and file format ("/home/taeung/hello: file format elf64-x86-64"): Before: $ objdump -l -d -S -C /home/taeung/hello | grep /home/taeung/hello But this causes a side effect, removing filename:linenr too, because the object file and source file have the same name e.g. "/home/taueng/hello", "/home/taeung/hello.c" So more do a better match by using grep -v as below to correctly remove that first line: "/home/taeung/hello: file format elf64-x86-64" After: $ objdump -l -d -S -C /home/taeung/hello | grep /home/taeung/hello: Signed-off-by: Taeung Song Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1489978617-31396-5-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 273f21fa32b5..4d325cdcb732 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1435,7 +1435,7 @@ int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_na snprintf(command, sizeof(command), "%s %s%s --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 - " -l -d %s %s -C %s 2>/dev/null|grep -v %s|expand", + " -l -d %s %s -C %s 2>/dev/null|grep -v %s:|expand", objdump_path ? objdump_path : "objdump", disassembler_style ? "-M " : "", disassembler_style ? disassembler_style : "", -- cgit v1.2.3 From ed7b339fb570749042332169e62541b208fc4296 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 21 Mar 2017 16:00:50 -0300 Subject: perf annotate: Add comment clarifying how the source code line is parsed The source code line number (lineno) needs to be kept in accross calls to symbol__parse_objdump_line() when parsing the output of 'objdump -l -dS', so that it can associate it with the instructions till the next line. See disasm_line__new() and struct disasm_line::line_nr. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Taeung Song Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-7hpx8f8ybdpiujceysaj229w@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 4d325cdcb732..22cd1dbe724b 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1482,6 +1482,12 @@ int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_na nline = 0; while (!feof(file)) { + /* + * The source code line number (lineno) needs to be kept in + * accross calls to symbol__parse_objdump_line(), so that it + * can associate it with the instructions till the next one. + * See disasm_line__new() and struct disasm_line::line_nr. + */ if (symbol__parse_objdump_line(sym, map, arch, file, privsize, &lineno) < 0) break; -- cgit v1.2.3 From fbe51fba82901fd15d3e0a068388fcd7d02dc047 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 20 Mar 2017 13:16:59 -0700 Subject: perf stat: Factor out callback for collecting event values To be used in next patch to support automatic summing of alias events. v2: Move check for bad results to next patch v3: Remove trivial addition. v4: Use perf_evsel__cpus instead of evsel->cpus Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170320201711.14142-2-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 103 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 80 insertions(+), 23 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f53f449d864d..5c13a0f40adc 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1182,11 +1182,46 @@ static void aggr_update_shadow(void) } } +static void collect_data(struct perf_evsel *counter, + void (*cb)(struct perf_evsel *counter, void *data, + bool first), + void *data) +{ + cb(counter, data, true); +} + +struct aggr_data { + u64 ena, run, val; + int id; + int nr; + int cpu; +}; + +static void aggr_cb(struct perf_evsel *counter, void *data, bool first) +{ + struct aggr_data *ad = data; + int cpu, s2; + + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { + struct perf_counts_values *counts; + + s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); + if (s2 != ad->id) + continue; + if (first) + ad->nr++; + counts = perf_counts(counter->counts, cpu, 0); + ad->val += counts->val; + ad->ena += counts->ena; + ad->run += counts->run; + } +} + static void print_aggr(char *prefix) { FILE *output = stat_config.output; struct perf_evsel *counter; - int cpu, s, s2, id, nr; + int s, id, nr; double uval; u64 ena, run, val; bool first; @@ -1201,23 +1236,20 @@ static void print_aggr(char *prefix) * Without each counter has its own line. */ for (s = 0; s < aggr_map->nr; s++) { + struct aggr_data ad; if (prefix && metric_only) fprintf(output, "%s", prefix); - id = aggr_map->map[s]; + ad.id = id = aggr_map->map[s]; first = true; evlist__for_each_entry(evsel_list, counter) { - val = ena = run = 0; - nr = 0; - for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { - s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); - if (s2 != id) - continue; - val += perf_counts(counter->counts, cpu, 0)->val; - ena += perf_counts(counter->counts, cpu, 0)->ena; - run += perf_counts(counter->counts, cpu, 0)->run; - nr++; - } + ad.val = ad.ena = ad.run = 0; + ad.nr = 0; + collect_data(counter, aggr_cb, &ad); + nr = ad.nr; + ena = ad.ena; + run = ad.run; + val = ad.val; if (first && metric_only) { first = false; aggr_printout(counter, id, nr); @@ -1261,6 +1293,21 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) } } +struct caggr_data { + double avg, avg_enabled, avg_running; +}; + +static void counter_aggr_cb(struct perf_evsel *counter, void *data, + bool first __maybe_unused) +{ + struct caggr_data *cd = data; + struct perf_stat_evsel *ps = counter->priv; + + cd->avg += avg_stats(&ps->res_stats[0]); + cd->avg_enabled += avg_stats(&ps->res_stats[1]); + cd->avg_running += avg_stats(&ps->res_stats[2]); +} + /* * Print out the results of a single counter: * aggregated counts in system-wide mode @@ -1268,23 +1315,30 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) static void print_counter_aggr(struct perf_evsel *counter, char *prefix) { FILE *output = stat_config.output; - struct perf_stat_evsel *ps = counter->priv; - double avg = avg_stats(&ps->res_stats[0]); double uval; - double avg_enabled, avg_running; + struct caggr_data cd = { .avg = 0.0 }; - avg_enabled = avg_stats(&ps->res_stats[1]); - avg_running = avg_stats(&ps->res_stats[2]); + collect_data(counter, counter_aggr_cb, &cd); if (prefix && !metric_only) fprintf(output, "%s", prefix); - uval = avg * counter->scale; - printout(-1, 0, counter, uval, prefix, avg_running, avg_enabled, avg); + uval = cd.avg * counter->scale; + printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, cd.avg); if (!metric_only) fprintf(output, "\n"); } +static void counter_cb(struct perf_evsel *counter, void *data, + bool first __maybe_unused) +{ + struct aggr_data *ad = data; + + ad->val += perf_counts(counter->counts, ad->cpu, 0)->val; + ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena; + ad->run += perf_counts(counter->counts, ad->cpu, 0)->run; +} + /* * Print out the results of a single counter: * does not use aggregated count in system-wide @@ -1297,9 +1351,12 @@ static void print_counter(struct perf_evsel *counter, char *prefix) int cpu; for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { - val = perf_counts(counter->counts, cpu, 0)->val; - ena = perf_counts(counter->counts, cpu, 0)->ena; - run = perf_counts(counter->counts, cpu, 0)->run; + struct aggr_data ad = { .cpu = cpu }; + + collect_data(counter, counter_cb, &ad); + val = ad.val; + ena = ad.ena; + run = ad.run; if (prefix) fprintf(output, "%s", prefix); -- cgit v1.2.3 From 430daf2dc7aff16096a137347e6fd03d4af609e9 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 20 Mar 2017 13:17:00 -0700 Subject: perf stat: Collapse identically named events The uncore PMU has a lot of duplicated PMUs for different subsystems. When expanding an uncore alias we usually end up with a large number of identically named aliases, which makes perf stat output difficult to read. Automatically sum them up in perf stat, unless --no-merge is specified. This can be default because only the uncores generally have duplicated aliases. Other PMUs have unique names. Before: % perf stat --no-merge -a -e unc_c_llc_lookup.any sleep 1 Performance counter stats for 'system wide': 694,976 Bytes unc_c_llc_lookup.any 706,304 Bytes unc_c_llc_lookup.any 956,608 Bytes unc_c_llc_lookup.any 782,720 Bytes unc_c_llc_lookup.any 605,696 Bytes unc_c_llc_lookup.any 442,816 Bytes unc_c_llc_lookup.any 659,328 Bytes unc_c_llc_lookup.any 509,312 Bytes unc_c_llc_lookup.any 263,936 Bytes unc_c_llc_lookup.any 592,448 Bytes unc_c_llc_lookup.any 672,448 Bytes unc_c_llc_lookup.any 608,640 Bytes unc_c_llc_lookup.any 641,024 Bytes unc_c_llc_lookup.any 856,896 Bytes unc_c_llc_lookup.any 808,832 Bytes unc_c_llc_lookup.any 684,864 Bytes unc_c_llc_lookup.any 710,464 Bytes unc_c_llc_lookup.any 538,304 Bytes unc_c_llc_lookup.any 1.002577660 seconds time elapsed After: % perf stat -a -e unc_c_llc_lookup.any sleep 1 Performance counter stats for 'system wide': 2,685,120 Bytes unc_c_llc_lookup.any 1.002648032 seconds time elapsed v2: Split collect_aliases. Rename alias flag. v3: Make sure unsupported/not counted is always printed. v4: Factor out callback change into separate patch. v5: Move check for bad results here Move merged check into collect_data Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170320201711.14142-3-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 3 +++ tools/perf/builtin-stat.c | 38 ++++++++++++++++++++++++++++++---- tools/perf/util/evsel.h | 1 + 3 files changed, 38 insertions(+), 4 deletions(-) diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 978548138624..bd0e4417f2be 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -236,6 +236,9 @@ To interpret the results it is usually needed to know on which CPUs the workload runs on. If needed the CPUs can be forced using taskset. +--no-merge:: +Do not merge results from same PMUs. + EXAMPLES -------- diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 5c13a0f40adc..a4da10a506dd 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -140,6 +140,7 @@ static unsigned int unit_width = 4; /* strlen("unit") */ static bool forever = false; static bool metric_only = false; static bool force_metric_only = false; +static bool no_merge = false; static struct timespec ref_time; static struct cpu_map *aggr_map; static aggr_get_id_t aggr_get_id; @@ -1182,12 +1183,37 @@ static void aggr_update_shadow(void) } } -static void collect_data(struct perf_evsel *counter, +static void collect_all_aliases(struct perf_evsel *counter, void (*cb)(struct perf_evsel *counter, void *data, bool first), void *data) { + struct perf_evsel *alias; + + alias = list_prepare_entry(counter, &(evsel_list->entries), node); + list_for_each_entry_continue (alias, &evsel_list->entries, node) { + if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) || + alias->scale != counter->scale || + alias->cgrp != counter->cgrp || + strcmp(alias->unit, counter->unit) || + nsec_counter(alias) != nsec_counter(counter)) + break; + alias->merged_stat = true; + cb(alias, data, false); + } +} + +static bool collect_data(struct perf_evsel *counter, + void (*cb)(struct perf_evsel *counter, void *data, + bool first), + void *data) +{ + if (counter->merged_stat) + return false; cb(counter, data, true); + if (!no_merge) + collect_all_aliases(counter, cb, data); + return true; } struct aggr_data { @@ -1245,7 +1271,8 @@ static void print_aggr(char *prefix) evlist__for_each_entry(evsel_list, counter) { ad.val = ad.ena = ad.run = 0; ad.nr = 0; - collect_data(counter, aggr_cb, &ad); + if (!collect_data(counter, aggr_cb, &ad)) + continue; nr = ad.nr; ena = ad.ena; run = ad.run; @@ -1318,7 +1345,8 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) double uval; struct caggr_data cd = { .avg = 0.0 }; - collect_data(counter, counter_aggr_cb, &cd); + if (!collect_data(counter, counter_aggr_cb, &cd)) + return; if (prefix && !metric_only) fprintf(output, "%s", prefix); @@ -1353,7 +1381,8 @@ static void print_counter(struct perf_evsel *counter, char *prefix) for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { struct aggr_data ad = { .cpu = cpu }; - collect_data(counter, counter_cb, &ad); + if (!collect_data(counter, counter_cb, &ad)) + return; val = ad.val; ena = ad.ena; run = ad.run; @@ -1701,6 +1730,7 @@ static const struct option stat_options[] = { "list of cpus to monitor in system-wide"), OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, "disable CPU count aggregation", AGGR_NONE), + OPT_BOOLEAN(0, "no-merge", &no_merge, "Do not merge identical named events"), OPT_STRING('x', "field-separator", &csv_sep, "separator", "print counts with custom separator"), OPT_CALLBACK('G', "cgroup", &evsel_list, "name", diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 06ef6f29efa1..bd2e9b112d49 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -131,6 +131,7 @@ struct perf_evsel { bool cmdline_group_boundary; struct list_head config_terms; int bpf_fd; + bool merged_stat; }; union u64_swap { -- cgit v1.2.3 From b4229e9d4cac2295f8f04ec26acd571a391c6c37 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 20 Mar 2017 13:17:01 -0700 Subject: perf stat: Handle partially bad results with merging When any result that is being merged is bad, mark them all bad to give consistent output in interval mode. No before/after, because the issue was only found in theoretical review and it is hard to reproduce Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170320201711.14142-4-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a4da10a506dd..cd7dc3b648ca 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1237,6 +1237,16 @@ static void aggr_cb(struct perf_evsel *counter, void *data, bool first) if (first) ad->nr++; counts = perf_counts(counter->counts, cpu, 0); + /* + * When any result is bad, make them all to give + * consistent output in interval mode. + */ + if (counts->ena == 0 || counts->run == 0 || + counter->counts->scaled == -1) { + ad->ena = 0; + ad->run = 0; + break; + } ad->val += counts->val; ad->ena += counts->ena; ad->run += counts->run; -- cgit v1.2.3 From 2073ad3326b7e4577af3d6789edd03df79519d21 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 20 Mar 2017 13:17:02 -0700 Subject: perf tools: Factor out PMU matching in parser Factor out the PMU name matching in the event parser into a separate function, to use the same code for other grammar rules later. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170320201711.14142-5-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 46 ++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/parse-events.h | 5 +++++ tools/perf/util/parse-events.y | 30 +-------------------------- 3 files changed, 52 insertions(+), 29 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 54355d3caf09..c3edb373ed86 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1260,6 +1260,52 @@ int parse_events_add_pmu(struct parse_events_evlist *data, return evsel ? 0 : -ENOMEM; } +int parse_events_multi_pmu_add(struct parse_events_evlist *data, + char *str, struct list_head **listp) +{ + struct list_head *head; + struct parse_events_term *term; + struct list_head *list; + struct perf_pmu *pmu = NULL; + int ok = 0; + + *listp = NULL; + /* Add it for all PMUs that support the alias */ + list = malloc(sizeof(struct list_head)); + if (!list) + return -1; + INIT_LIST_HEAD(list); + while ((pmu = perf_pmu__scan(pmu)) != NULL) { + struct perf_pmu_alias *alias; + + list_for_each_entry(alias, &pmu->aliases, list) { + if (!strcasecmp(alias->name, str)) { + head = malloc(sizeof(struct list_head)); + if (!head) + return -1; + INIT_LIST_HEAD(head); + if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, + str, 1, false, &str, NULL) < 0) + return -1; + list_add_tail(&term->list, head); + + if (!parse_events_add_pmu(data, list, + pmu->name, head)) { + pr_debug("%s -> %s/%s/\n", str, + pmu->name, alias->str); + ok++; + } + + parse_events_terms__delete(head); + } + } + } + if (!ok) + return -1; + *listp = list; + return 0; +} + int parse_events__modifier_group(struct list_head *list, char *event_mod) { diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 8c72b0ff7fcb..deca9ce965a7 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -167,6 +167,11 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx, int parse_events_add_pmu(struct parse_events_evlist *data, struct list_head *list, char *name, struct list_head *head_config); + +int parse_events_multi_pmu_add(struct parse_events_evlist *data, + char *str, + struct list_head **listp); + enum perf_pmu_event_symbol_type perf_pmu__parse_check(const char *name); void parse_events__set_leader(char *name, struct list_head *list); diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 30f018ea1370..36af02f95243 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -236,37 +236,9 @@ PE_NAME opt_event_config | PE_KERNEL_PMU_EVENT sep_dc { - struct parse_events_evlist *data = _data; - struct list_head *head; - struct parse_events_term *term; struct list_head *list; - struct perf_pmu *pmu = NULL; - int ok = 0; - /* Add it for all PMUs that support the alias */ - ALLOC_LIST(list); - while ((pmu = perf_pmu__scan(pmu)) != NULL) { - struct perf_pmu_alias *alias; - - list_for_each_entry(alias, &pmu->aliases, list) { - if (!strcasecmp(alias->name, $1)) { - ALLOC_LIST(head); - ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, - $1, 1, false, &@1, NULL)); - list_add_tail(&term->list, head); - - if (!parse_events_add_pmu(data, list, - pmu->name, head)) { - pr_debug("%s -> %s/%s/\n", $1, - pmu->name, alias->str); - ok++; - } - - parse_events_terms__delete(head); - } - } - } - if (!ok) + if (parse_events_multi_pmu_add(_data, $1, &list) < 0) YYABORT; $$ = list; } -- cgit v1.2.3 From 8255718f4bedbfb3558fba10ff40a70934f2117d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 20 Mar 2017 13:17:03 -0700 Subject: perf pmu: Expand PMU events by prefix match When the user specifies a pmu directly, expand it automatically with a prefix match for all available PMUs, similar as we do for the normal aliases now. This allows to specify attributes for duplicated boxes quickly. For example uncore_cbox_{0,6}/.../ can be now specified as uncore_cbox/.../ and it gets automatically expanded for all boxes. This generally makes it more concise to write uncore specifications, and also avoids the need to know the exact topology of the system. Before: % perf stat -a -e uncore_cbox_0/event=0x35,umask=0x1,filter_opc=0x19C/,\ uncore_cbox_1/event=0x35,umask=0x1,filter_opc=0x19C/,\ uncore_cbox_2/event=0x35,umask=0x1,filter_opc=0x19C/,\ uncore_cbox_3/event=0x35,umask=0x1,filter_opc=0x19C/,\ uncore_cbox_4/event=0x35,umask=0x1,filter_opc=0x19C/,\ uncore_cbox_5/event=0x35,umask=0x1,filter_opc=0x19C/ sleep 1 After: % perf stat -a -e uncore_cbox/event=0x35,umask=0x1,filter_opc=0x19C/ sleep 1 v2: Handle all bison rules. Move multi add code to separate function. Handle uncore_ prefix correctly. v3: Move parse_events_multi_pmu_add to separate patch. Move uncore prefix check to separate patch. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170320201711.14142-6-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 25 +++++++++++++++++++++++++ tools/perf/util/parse-events.h | 3 +++ tools/perf/util/parse-events.y | 40 ++++++++++++++++++++++++++-------------- 3 files changed, 54 insertions(+), 14 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index c3edb373ed86..e594c974c93e 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2462,6 +2462,31 @@ int parse_events_term__clone(struct parse_events_term **new, return new_term(new, &temp, term->val.str, term->val.num); } +int parse_events_copy_term_list(struct list_head *old, + struct list_head **new) +{ + struct parse_events_term *term, *n; + int ret; + + if (!old) { + *new = NULL; + return 0; + } + + *new = malloc(sizeof(struct list_head)); + if (!*new) + return -ENOMEM; + INIT_LIST_HEAD(*new); + + list_for_each_entry (term, old, list) { + ret = parse_events_term__clone(&n, term); + if (ret) + return ret; + list_add_tail(&n->list, *new); + } + return 0; +} + void parse_events_terms__purge(struct list_head *terms) { struct parse_events_term *term, *h; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index deca9ce965a7..f38086b8dbea 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -172,6 +172,9 @@ int parse_events_multi_pmu_add(struct parse_events_evlist *data, char *str, struct list_head **listp); +int parse_events_copy_term_list(struct list_head *old, + struct list_head **new); + enum perf_pmu_event_symbol_type perf_pmu__parse_check(const char *name); void parse_events__set_leader(char *name, struct list_head *list); diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 36af02f95243..20935b17753d 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -226,11 +226,32 @@ event_pmu: PE_NAME opt_event_config { struct parse_events_evlist *data = _data; - struct list_head *list; + struct list_head *list, *orig_terms, *terms; + + if (parse_events_copy_term_list($2, &orig_terms)) + YYABORT; ALLOC_LIST(list); - ABORT_ON(parse_events_add_pmu(data, list, $1, $2)); + if (parse_events_add_pmu(data, list, $1, $2)) { + struct perf_pmu *pmu = NULL; + int ok = 0; + + while ((pmu = perf_pmu__scan(pmu)) != NULL) { + char *name = pmu->name; + + if (!strncmp($1, name, strlen($1))) { + if (parse_events_copy_term_list(orig_terms, &terms)) + YYABORT; + if (!parse_events_add_pmu(data, list, pmu->name, terms)) + ok++; + parse_events_terms__delete(terms); + } + } + if (!ok) + YYABORT; + } parse_events_terms__delete($2); + parse_events_terms__delete(orig_terms); $$ = list; } | @@ -245,21 +266,12 @@ PE_KERNEL_PMU_EVENT sep_dc | PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc { - struct parse_events_evlist *data = _data; - struct list_head *head; - struct parse_events_term *term; struct list_head *list; char pmu_name[128]; - snprintf(&pmu_name, 128, "%s-%s", $1, $3); - ALLOC_LIST(head); - ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, - &pmu_name, 1, false, &@1, NULL)); - list_add_tail(&term->list, head); - - ALLOC_LIST(list); - ABORT_ON(parse_events_add_pmu(data, list, "cpu", head)); - parse_events_terms__delete(head); + snprintf(&pmu_name, 128, "%s-%s", $1, $3); + if (parse_events_multi_pmu_add(_data, pmu_name, &list) < 0) + YYABORT; $$ = list; } -- cgit v1.2.3 From a820e33547aee9fd0460106c1fc577a125c23975 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 20 Mar 2017 13:17:04 -0700 Subject: perf pmu: Special case uncore_ prefix Special case uncore_ prefix in PMU match, to allow for shorter event uncore specifications. Before: perf stat -a -e uncore_cbox/event=0x35,umask=0x1,filter_opc=0x19C/ sleep 1 After perf stat -a -e cbox/event=0x35,umask=0x1,filter_opc=0x19C/ sleep 1 Committer tests: # perf list uncore List of pre-defined events (to be used in -e): uncore_cbox_0/clockticks/ [Kernel PMU event] uncore_cbox_1/clockticks/ [Kernel PMU event] uncore_imc/data_reads/ [Kernel PMU event] uncore_imc/data_writes/ [Kernel PMU event] # perf stat -a -e cbox_0/clockticks/ sleep 1 Performance counter stats for 'system wide': 281,474,976,653,084 cbox_0/clockticks/ 1.000870129 seconds time elapsed # Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/20170320201711.14142-7-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.y | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 20935b17753d..04fd8c9af9f9 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -239,6 +239,9 @@ PE_NAME opt_event_config while ((pmu = perf_pmu__scan(pmu)) != NULL) { char *name = pmu->name; + if (!strncmp(name, "uncore_", 7) && + strncmp($1, "uncore_", 7)) + name += 7; if (!strncmp($1, name, strlen($1))) { if (parse_events_copy_term_list(orig_terms, &terms)) YYABORT; -- cgit v1.2.3 From 075167363f8b53ade702cd83f5818eb47119b659 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 20 Mar 2017 13:17:05 -0700 Subject: perf tools: Add a simple expression parser for JSON Add a simple expression parser good enough to parse JSON relation expressions. The parser is implemented using bison. This is just intended as an simple parser for internal usage in the event lists, not the beginning of a "perf scripting language" v2: Use expr__ prefix instead of expr_ Support multiple free variables for parser Committer note: The v2 patch had: %define api.pure full In expr.y, that is a feature introduced in bison 2.7, to have reentrant parsers, not using global variables, which would make tools/perf stop building with the bison version shipped in older distros, so Andi realised that the other parsers (e.g. parse-events.y) were using: %pure-parser Which is present in older versions of bison and fits the bill. I added: CFLAGS_expr-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w To finally make it build, copying what was there for pmu-bison.o, another parser. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170320201711.14142-8-andi@firstfloor.org [ stdlib.h is needed in tests/expr.c for free() fixing build in systems such as ubuntu:16.04-x-s390 ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/Build | 1 + tools/perf/tests/builtin-test.c | 4 + tools/perf/tests/expr.c | 56 +++++++++++++ tools/perf/tests/tests.h | 1 + tools/perf/util/Build | 6 ++ tools/perf/util/expr.h | 25 ++++++ tools/perf/util/expr.y | 173 ++++++++++++++++++++++++++++++++++++++++ 7 files changed, 266 insertions(+) create mode 100644 tools/perf/tests/expr.c create mode 100644 tools/perf/util/expr.h create mode 100644 tools/perf/util/expr.y diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 1cb3d9b540e9..af58ebc243ef 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -38,6 +38,7 @@ perf-y += cpumap.o perf-y += stat.o perf-y += event_update.o perf-y += event-times.o +perf-y += expr.o perf-y += backward-ring-buffer.o perf-y += sdt.o perf-y += is_printable_array.o diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 83c4669cbc5b..86822969e8a8 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -43,6 +43,10 @@ static struct test generic_tests[] = { .desc = "Parse event definition strings", .func = test__parse_events, }, + { + .desc = "Simple expression parser", + .func = test__expr, + }, { .desc = "PERF_RECORD_* events & perf_sample fields", .func = test__PERF_RECORD, diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c new file mode 100644 index 000000000000..6c6a3749aaf6 --- /dev/null +++ b/tools/perf/tests/expr.c @@ -0,0 +1,56 @@ +#include "util/debug.h" +#include "util/expr.h" +#include "tests.h" +#include + +static int test(struct parse_ctx *ctx, const char *e, double val2) +{ + double val; + + if (expr__parse(&val, ctx, &e)) + TEST_ASSERT_VAL("parse test failed", 0); + TEST_ASSERT_VAL("unexpected value", val == val2); + return 0; +} + +int test__expr(int subtest __maybe_unused) +{ + const char *p; + const char **other; + double val; + int ret; + struct parse_ctx ctx; + int num_other; + + expr__ctx_init(&ctx); + expr__add_id(&ctx, "FOO", 1); + expr__add_id(&ctx, "BAR", 2); + + ret = test(&ctx, "1+1", 2); + ret |= test(&ctx, "FOO+BAR", 3); + ret |= test(&ctx, "(BAR/2)%2", 1); + ret |= test(&ctx, "1 - -4", 5); + ret |= test(&ctx, "(FOO-1)*2 + (BAR/2)%2 - -4", 5); + + if (ret) + return ret; + + p = "FOO/0"; + ret = expr__parse(&val, &ctx, &p); + TEST_ASSERT_VAL("division by zero", ret == 1); + + p = "BAR/"; + ret = expr__parse(&val, &ctx, &p); + TEST_ASSERT_VAL("missing operand", ret == 1); + + TEST_ASSERT_VAL("find other", + expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", &other, &num_other) == 0); + TEST_ASSERT_VAL("find other", num_other == 3); + TEST_ASSERT_VAL("find other", !strcmp(other[0], "BAR")); + TEST_ASSERT_VAL("find other", !strcmp(other[1], "BAZ")); + TEST_ASSERT_VAL("find other", !strcmp(other[2], "BOZO")); + TEST_ASSERT_VAL("find other", other[3] == NULL); + free((void *)other); + + return 0; +} diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 1fa9b9d83aa5..631859629403 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -62,6 +62,7 @@ int test__sample_parsing(int subtest); int test__keep_tracking(int subtest); int test__parse_no_sample_id_all(int subtest); int test__dwarf_unwind(int subtest); +int test__expr(int subtest); int test__hists_filter(int subtest); int test__mmap_thread_lookup(int subtest); int test__thread_mg_share(int subtest); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index fb4f42f1bb38..2ae92da613dd 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -90,6 +90,7 @@ libperf-y += mem-events.o libperf-y += vsprintf.o libperf-y += drv_configs.o libperf-y += time-utils.o +libperf-y += expr-bison.o libperf-$(CONFIG_LIBBPF) += bpf-loader.o libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o @@ -142,6 +143,10 @@ $(OUTPUT)util/parse-events-bison.c: util/parse-events.y $(call rule_mkdir) $(Q)$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_ +$(OUTPUT)util/expr-bison.c: util/expr.y + $(call rule_mkdir) + $(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr__ + $(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c $(call rule_mkdir) $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l @@ -154,6 +159,7 @@ CFLAGS_parse-events-flex.o += -w CFLAGS_pmu-flex.o += -w CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -w CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w +CFLAGS_expr-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w $(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c $(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h new file mode 100644 index 000000000000..9c2760a1a96e --- /dev/null +++ b/tools/perf/util/expr.h @@ -0,0 +1,25 @@ +#ifndef PARSE_CTX_H +#define PARSE_CTX_H 1 + +#define EXPR_MAX_OTHER 8 +#define MAX_PARSE_ID EXPR_MAX_OTHER + +struct parse_id { + const char *name; + double val; +}; + +struct parse_ctx { + int num_ids; + struct parse_id ids[MAX_PARSE_ID]; +}; + +void expr__ctx_init(struct parse_ctx *ctx); +void expr__add_id(struct parse_ctx *ctx, const char *id, double val); +#ifndef IN_EXPR_Y +int expr__parse(double *final_val, struct parse_ctx *ctx, const char **pp); +#endif +int expr__find_other(const char *p, const char *one, const char ***other, + int *num_other); + +#endif diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y new file mode 100644 index 000000000000..954556bea36e --- /dev/null +++ b/tools/perf/util/expr.y @@ -0,0 +1,173 @@ +/* Simple expression parser */ +%{ +#include "util.h" +#include "util/debug.h" +#define IN_EXPR_Y 1 +#include "expr.h" +#include + +#define MAXIDLEN 256 +%} + +%pure-parser +%parse-param { double *final_val } +%parse-param { struct parse_ctx *ctx } +%parse-param { const char **pp } +%lex-param { const char **pp } + +%union { + double num; + char id[MAXIDLEN+1]; +} + +%token NUMBER +%token ID +%left '|' +%left '^' +%left '&' +%left '-' '+' +%left '*' '/' '%' +%left NEG NOT +%type expr + +%{ +static int expr__lex(YYSTYPE *res, const char **pp); + +static void expr__error(double *final_val __maybe_unused, + struct parse_ctx *ctx __maybe_unused, + const char **pp __maybe_unused, + const char *s) +{ + pr_debug("%s\n", s); +} + +static int lookup_id(struct parse_ctx *ctx, char *id, double *val) +{ + int i; + + for (i = 0; i < ctx->num_ids; i++) { + if (!strcasecmp(ctx->ids[i].name, id)) { + *val = ctx->ids[i].val; + return 0; + } + } + return -1; +} + +%} +%% + +all_expr: expr { *final_val = $1; } + ; + +expr: NUMBER + | ID { if (lookup_id(ctx, $1, &$$) < 0) { + pr_debug("%s not found", $1); + YYABORT; + } + } + | expr '+' expr { $$ = $1 + $3; } + | expr '-' expr { $$ = $1 - $3; } + | expr '*' expr { $$ = $1 * $3; } + | expr '/' expr { if ($3 == 0) YYABORT; $$ = $1 / $3; } + | expr '%' expr { if ((long)$3 == 0) YYABORT; $$ = (long)$1 % (long)$3; } + | '-' expr %prec NEG { $$ = -$2; } + | '(' expr ')' { $$ = $2; } + ; + +%% + +static int expr__symbol(YYSTYPE *res, const char *p, const char **pp) +{ + char *dst = res->id; + const char *s = p; + + while (isalnum(*p) || *p == '_' || *p == '.') { + if (p - s >= MAXIDLEN) + return -1; + *dst++ = *p++; + } + *dst = 0; + *pp = p; + return ID; +} + +static int expr__lex(YYSTYPE *res, const char **pp) +{ + int tok; + const char *s; + const char *p = *pp; + + while (isspace(*p)) + p++; + s = p; + switch (*p++) { + case 'a' ... 'z': + case 'A' ... 'Z': + return expr__symbol(res, p - 1, pp); + case '0' ... '9': case '.': + res->num = strtod(s, (char **)&p); + tok = NUMBER; + break; + default: + tok = *s; + break; + } + *pp = p; + return tok; +} + +/* Caller must make sure id is allocated */ +void expr__add_id(struct parse_ctx *ctx, const char *name, double val) +{ + int idx; + assert(ctx->num_ids < MAX_PARSE_ID); + idx = ctx->num_ids++; + ctx->ids[idx].name = name; + ctx->ids[idx].val = val; +} + +void expr__ctx_init(struct parse_ctx *ctx) +{ + ctx->num_ids = 0; +} + +int expr__find_other(const char *p, const char *one, const char ***other, + int *num_otherp) +{ + const char *orig = p; + int err = -1; + int num_other; + + *other = malloc((EXPR_MAX_OTHER + 1) * sizeof(char *)); + if (!*other) + return -1; + + num_other = 0; + for (;;) { + YYSTYPE val; + int tok = expr__lex(&val, &p); + if (tok == 0) { + err = 0; + break; + } + if (tok == ID && strcasecmp(one, val.id)) { + if (num_other >= EXPR_MAX_OTHER - 1) { + pr_debug("Too many extra events in %s\n", orig); + break; + } + (*other)[num_other] = strdup(val.id); + if (!(*other)[num_other]) + return -1; + num_other++; + } + } + (*other)[num_other] = NULL; + *num_otherp = num_other; + if (err) { + *num_otherp = 0; + free(*other); + *other = NULL; + } + return err; +} -- cgit v1.2.3 From b90b3e9c11050e09279d2b9a318189e155910b20 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 20 Mar 2017 13:17:06 -0700 Subject: perf vendor events intel: Update Intel uncore JSON event files - Add MetricName to describe Metric - Remove redundant "derived from" in descriptions - Rename UNC_M_CAS_COUNT to LLC_MISSES.READ Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170320201711.14142-9-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/x86/broadwellde/uncore-cache.json | 28 ++++++------ .../arch/x86/broadwellde/uncore-memory.json | 26 +++++------ .../arch/x86/broadwellde/uncore-power.json | 26 +++++++---- .../arch/x86/broadwellx/uncore-cache.json | 28 ++++++------ .../arch/x86/broadwellx/uncore-interconnect.json | 6 +-- .../arch/x86/broadwellx/uncore-memory.json | 21 +++++---- .../arch/x86/broadwellx/uncore-power.json | 26 +++++++---- .../pmu-events/arch/x86/haswellx/uncore-cache.json | 28 ++++++------ .../arch/x86/haswellx/uncore-interconnect.json | 6 +-- .../arch/x86/haswellx/uncore-memory.json | 21 +++++---- .../pmu-events/arch/x86/haswellx/uncore-power.json | 26 +++++++---- .../pmu-events/arch/x86/ivytown/uncore-cache.json | 22 ++++----- .../arch/x86/ivytown/uncore-interconnect.json | 12 +++-- .../pmu-events/arch/x86/ivytown/uncore-memory.json | 19 ++++---- .../pmu-events/arch/x86/ivytown/uncore-power.json | 53 ++++++++++++++++------ .../pmu-events/arch/x86/jaketown/uncore-cache.json | 13 +++--- .../arch/x86/jaketown/uncore-interconnect.json | 12 +++-- .../arch/x86/jaketown/uncore-memory.json | 21 +++++---- .../pmu-events/arch/x86/jaketown/uncore-power.json | 53 ++++++++++++++++------ 19 files changed, 267 insertions(+), 180 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/uncore-cache.json b/tools/perf/pmu-events/arch/x86/broadwellde/uncore-cache.json index 076459c51d4e..58ed6d33d1f4 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellde/uncore-cache.json +++ b/tools/perf/pmu-events/arch/x86/broadwellde/uncore-cache.json @@ -1,13 +1,13 @@ [ { - "BriefDescription": "Uncore cache clock ticks. Derived from unc_c_clockticks", + "BriefDescription": "Uncore cache clock ticks", "Counter": "0,1,2,3", "EventName": "UNC_C_CLOCKTICKS", "PerPkg": "1", "Unit": "CBO" }, { - "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch). Derived from unc_c_llc_lookup.any", + "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch)", "Counter": "0,1,2,3", "EventCode": "0x34", "EventName": "UNC_C_LLC_LOOKUP.ANY", @@ -18,7 +18,7 @@ "Unit": "CBO" }, { - "BriefDescription": "M line evictions from LLC (writebacks to memory). Derived from unc_c_llc_victims.m_state", + "BriefDescription": "M line evictions from LLC (writebacks to memory)", "Counter": "0,1,2,3", "EventCode": "0x37", "EventName": "UNC_C_LLC_VICTIMS.M_STATE", @@ -212,7 +212,7 @@ "Unit": "CBO" }, { - "BriefDescription": "read requests to home agent. Derived from unc_h_requests.reads", + "BriefDescription": "read requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS", @@ -221,7 +221,7 @@ "Unit": "HA" }, { - "BriefDescription": "read requests to local home agent. Derived from unc_h_requests.reads_local", + "BriefDescription": "read requests to local home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS_LOCAL", @@ -230,7 +230,7 @@ "Unit": "HA" }, { - "BriefDescription": "read requests to remote home agent. Derived from unc_h_requests.reads_remote", + "BriefDescription": "read requests to remote home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS_REMOTE", @@ -239,7 +239,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to home agent. Derived from unc_h_requests.writes", + "BriefDescription": "write requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES", @@ -248,7 +248,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to local home agent. Derived from unc_h_requests.writes_local", + "BriefDescription": "write requests to local home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES_LOCAL", @@ -257,7 +257,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to remote home agent. Derived from unc_h_requests.writes_remote", + "BriefDescription": "write requests to remote home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES_REMOTE", @@ -266,7 +266,7 @@ "Unit": "HA" }, { - "BriefDescription": "Conflict requests (requests for same address from multiple agents simultaneously). Derived from unc_h_snoop_resp.rspcnflct", + "BriefDescription": "Conflict requests (requests for same address from multiple agents simultaneously)", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPCNFLCT", @@ -275,7 +275,7 @@ "Unit": "HA" }, { - "BriefDescription": "M line forwarded from remote cache along with writeback to memory. Derived from unc_h_snoop_resp.rsp_fwd_wb", + "BriefDescription": "M line forwarded from remote cache along with writeback to memory", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSP_FWD_WB", @@ -285,7 +285,7 @@ "Unit": "HA" }, { - "BriefDescription": "M line forwarded from remote cache with no writeback to memory. Derived from unc_h_snoop_resp.rspifwd", + "BriefDescription": "M line forwarded from remote cache with no writeback to memory", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPIFWD", @@ -295,7 +295,7 @@ "Unit": "HA" }, { - "BriefDescription": "Shared line response from remote cache. Derived from unc_h_snoop_resp.rsps", + "BriefDescription": "Shared line response from remote cache", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPS", @@ -305,7 +305,7 @@ "Unit": "HA" }, { - "BriefDescription": "Shared line forwarded from remote cache. Derived from unc_h_snoop_resp.rspsfwd", + "BriefDescription": "Shared line forwarded from remote cache", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPSFWD", diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/uncore-memory.json b/tools/perf/pmu-events/arch/x86/broadwellde/uncore-memory.json index d17dc235f734..fa09e12018ce 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellde/uncore-memory.json +++ b/tools/perf/pmu-events/arch/x86/broadwellde/uncore-memory.json @@ -3,7 +3,7 @@ "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.RD", + "EventName": "LLC_MISSES.MEM_READ", "PerPkg": "1", "ScaleUnit": "64Bytes", "UMask": "0x3", @@ -13,48 +13,44 @@ "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.WR", + "EventName": "LLC_MISSES.MEM_WRITE", "PerPkg": "1", "ScaleUnit": "64Bytes", "UMask": "0xC", "Unit": "iMC" }, { - "BriefDescription": "Memory controller clock ticks. Derived from unc_m_clockticks", - "Counter": "0,1,2,3", - "EventName": "UNC_M_CLOCKTICKS", - "PerPkg": "1", - "Unit": "iMC" - }, - { - "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode. Derived from unc_m_power_channel_ppd", + "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode", "Counter": "0,1,2,3", "EventCode": "0x85", "EventName": "UNC_M_POWER_CHANNEL_PPD", "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_channel_ppd %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles all ranks are in critical thermal throttle. Derived from unc_m_power_critical_throttle_cycles", + "BriefDescription": "Cycles all ranks are in critical thermal throttle", "Counter": "0,1,2,3", "EventCode": "0x86", "EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES", "MetricExpr": "(UNC_M_POWER_CRITICAL_THROTTLE_CYCLES / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_critical_throttle_cycles %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles Memory is in self refresh power mode. Derived from unc_m_power_self_refresh", + "BriefDescription": "Cycles Memory is in self refresh power mode", "Counter": "0,1,2,3", "EventCode": "0x43", "EventName": "UNC_M_POWER_SELF_REFRESH", "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_self_refresh %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Pre-charges due to page misses. Derived from unc_m_pre_count.page_miss", + "BriefDescription": "Pre-charges due to page misses", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.PAGE_MISS", @@ -63,7 +59,7 @@ "Unit": "iMC" }, { - "BriefDescription": "Pre-charge for reads. Derived from unc_m_pre_count.rd", + "BriefDescription": "Pre-charge for reads", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.RD", @@ -72,7 +68,7 @@ "Unit": "iMC" }, { - "BriefDescription": "Pre-charge for writes. Derived from unc_m_pre_count.wr", + "BriefDescription": "Pre-charge for writes", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.WR", diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/uncore-power.json b/tools/perf/pmu-events/arch/x86/broadwellde/uncore-power.json index b44d43088bbb..dd1b95655d1d 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellde/uncore-power.json +++ b/tools/perf/pmu-events/arch/x86/broadwellde/uncore-power.json @@ -1,83 +1,91 @@ [ { - "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events. Derived from unc_p_clockticks", + "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events", "Counter": "0,1,2,3", "EventName": "UNC_P_CLOCKTICKS", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "C0 and C1. Derived from unc_p_power_state_occupancy.cores_c0", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C0. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0", "Filter": "occ_sel=1", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C0 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c0 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "C3. Derived from unc_p_power_state_occupancy.cores_c3", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C3. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3", "Filter": "occ_sel=2", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C3 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c3 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "C6 and C7. Derived from unc_p_power_state_occupancy.cores_c6", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C6. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events ", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6", "Filter": "occ_sel=3", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C6 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c6 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "External Prochot. Derived from unc_p_prochot_external_cycles", + "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode. This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip", "Counter": "0,1,2,3", "EventCode": "0xA", "EventName": "UNC_P_PROCHOT_EXTERNAL_CYCLES", "MetricExpr": "(UNC_P_PROCHOT_EXTERNAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "prochot_external_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Thermal Strongest Upper Limit Cycles. Derived from unc_p_freq_max_limit_thermal_cycles", + "BriefDescription": "Counts the number of cycles when temperature is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x4", "EventName": "UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_limit_thermal_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "OS Strongest Upper Limit Cycles. Derived from unc_p_freq_max_os_cycles", + "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x6", "EventName": "UNC_P_FREQ_MAX_OS_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_OS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_os_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Power Strongest Upper Limit Cycles. Derived from unc_p_freq_max_power_cycles", + "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x5", "EventName": "UNC_P_FREQ_MAX_POWER_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_POWER_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_power_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Cycles spent changing Frequency. Derived from unc_p_freq_trans_cycles", + "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x74", "EventName": "UNC_P_FREQ_TRANS_CYCLES", "MetricExpr": "(UNC_P_FREQ_TRANS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_trans_cycles %", "PerPkg": "1", "Unit": "PCU" } diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/uncore-cache.json b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-cache.json index 076459c51d4e..58ed6d33d1f4 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/uncore-cache.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-cache.json @@ -1,13 +1,13 @@ [ { - "BriefDescription": "Uncore cache clock ticks. Derived from unc_c_clockticks", + "BriefDescription": "Uncore cache clock ticks", "Counter": "0,1,2,3", "EventName": "UNC_C_CLOCKTICKS", "PerPkg": "1", "Unit": "CBO" }, { - "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch). Derived from unc_c_llc_lookup.any", + "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch)", "Counter": "0,1,2,3", "EventCode": "0x34", "EventName": "UNC_C_LLC_LOOKUP.ANY", @@ -18,7 +18,7 @@ "Unit": "CBO" }, { - "BriefDescription": "M line evictions from LLC (writebacks to memory). Derived from unc_c_llc_victims.m_state", + "BriefDescription": "M line evictions from LLC (writebacks to memory)", "Counter": "0,1,2,3", "EventCode": "0x37", "EventName": "UNC_C_LLC_VICTIMS.M_STATE", @@ -212,7 +212,7 @@ "Unit": "CBO" }, { - "BriefDescription": "read requests to home agent. Derived from unc_h_requests.reads", + "BriefDescription": "read requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS", @@ -221,7 +221,7 @@ "Unit": "HA" }, { - "BriefDescription": "read requests to local home agent. Derived from unc_h_requests.reads_local", + "BriefDescription": "read requests to local home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS_LOCAL", @@ -230,7 +230,7 @@ "Unit": "HA" }, { - "BriefDescription": "read requests to remote home agent. Derived from unc_h_requests.reads_remote", + "BriefDescription": "read requests to remote home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS_REMOTE", @@ -239,7 +239,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to home agent. Derived from unc_h_requests.writes", + "BriefDescription": "write requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES", @@ -248,7 +248,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to local home agent. Derived from unc_h_requests.writes_local", + "BriefDescription": "write requests to local home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES_LOCAL", @@ -257,7 +257,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to remote home agent. Derived from unc_h_requests.writes_remote", + "BriefDescription": "write requests to remote home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES_REMOTE", @@ -266,7 +266,7 @@ "Unit": "HA" }, { - "BriefDescription": "Conflict requests (requests for same address from multiple agents simultaneously). Derived from unc_h_snoop_resp.rspcnflct", + "BriefDescription": "Conflict requests (requests for same address from multiple agents simultaneously)", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPCNFLCT", @@ -275,7 +275,7 @@ "Unit": "HA" }, { - "BriefDescription": "M line forwarded from remote cache along with writeback to memory. Derived from unc_h_snoop_resp.rsp_fwd_wb", + "BriefDescription": "M line forwarded from remote cache along with writeback to memory", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSP_FWD_WB", @@ -285,7 +285,7 @@ "Unit": "HA" }, { - "BriefDescription": "M line forwarded from remote cache with no writeback to memory. Derived from unc_h_snoop_resp.rspifwd", + "BriefDescription": "M line forwarded from remote cache with no writeback to memory", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPIFWD", @@ -295,7 +295,7 @@ "Unit": "HA" }, { - "BriefDescription": "Shared line response from remote cache. Derived from unc_h_snoop_resp.rsps", + "BriefDescription": "Shared line response from remote cache", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPS", @@ -305,7 +305,7 @@ "Unit": "HA" }, { - "BriefDescription": "Shared line forwarded from remote cache. Derived from unc_h_snoop_resp.rspsfwd", + "BriefDescription": "Shared line forwarded from remote cache", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPSFWD", diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-interconnect.json index 39387f7909b2..824961318c1e 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/uncore-interconnect.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-interconnect.json @@ -1,6 +1,6 @@ [ { - "BriefDescription": "QPI clock ticks. Derived from unc_q_clockticks", + "BriefDescription": "QPI clock ticks", "Counter": "0,1,2,3", "EventCode": "0x14", "EventName": "UNC_Q_CLOCKTICKS", @@ -10,7 +10,7 @@ { "BriefDescription": "Number of data flits transmitted . Derived from unc_q_txl_flits_g0.data", "Counter": "0,1,2,3", - "EventName": "UNC_Q_TxL_FLITS_G0.DATA", + "EventName": "QPI_DATA_BANDWIDTH_TX", "PerPkg": "1", "ScaleUnit": "8Bytes", "UMask": "0x2", @@ -19,7 +19,7 @@ { "BriefDescription": "Number of non data (control) flits transmitted . Derived from unc_q_txl_flits_g0.non_data", "Counter": "0,1,2,3", - "EventName": "UNC_Q_TxL_FLITS_G0.NON_DATA", + "EventName": "QPI_CTL_BANDWIDTH_TX", "PerPkg": "1", "ScaleUnit": "8Bytes", "UMask": "0x4", diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/uncore-memory.json b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-memory.json index d17dc235f734..66eed399724c 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/uncore-memory.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-memory.json @@ -3,7 +3,7 @@ "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.RD", + "EventName": "LLC_MISSES.MEM_READ", "PerPkg": "1", "ScaleUnit": "64Bytes", "UMask": "0x3", @@ -13,48 +13,51 @@ "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.WR", + "EventName": "LLC_MISSES.MEM_WRITE", "PerPkg": "1", "ScaleUnit": "64Bytes", "UMask": "0xC", "Unit": "iMC" }, { - "BriefDescription": "Memory controller clock ticks. Derived from unc_m_clockticks", + "BriefDescription": "Memory controller clock ticks", "Counter": "0,1,2,3", "EventName": "UNC_M_CLOCKTICKS", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode. Derived from unc_m_power_channel_ppd", + "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode", "Counter": "0,1,2,3", "EventCode": "0x85", "EventName": "UNC_M_POWER_CHANNEL_PPD", "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_channel_ppd %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles all ranks are in critical thermal throttle. Derived from unc_m_power_critical_throttle_cycles", + "BriefDescription": "Cycles all ranks are in critical thermal throttle", "Counter": "0,1,2,3", "EventCode": "0x86", "EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES", "MetricExpr": "(UNC_M_POWER_CRITICAL_THROTTLE_CYCLES / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_critical_throttle_cycles %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles Memory is in self refresh power mode. Derived from unc_m_power_self_refresh", + "BriefDescription": "Cycles Memory is in self refresh power mode", "Counter": "0,1,2,3", "EventCode": "0x43", "EventName": "UNC_M_POWER_SELF_REFRESH", "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_self_refresh %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Pre-charges due to page misses. Derived from unc_m_pre_count.page_miss", + "BriefDescription": "Pre-charges due to page misses", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.PAGE_MISS", @@ -63,7 +66,7 @@ "Unit": "iMC" }, { - "BriefDescription": "Pre-charge for reads. Derived from unc_m_pre_count.rd", + "BriefDescription": "Pre-charge for reads", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.RD", @@ -72,7 +75,7 @@ "Unit": "iMC" }, { - "BriefDescription": "Pre-charge for writes. Derived from unc_m_pre_count.wr", + "BriefDescription": "Pre-charge for writes", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.WR", diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/uncore-power.json b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-power.json index b44d43088bbb..dd1b95655d1d 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/uncore-power.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-power.json @@ -1,83 +1,91 @@ [ { - "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events. Derived from unc_p_clockticks", + "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events", "Counter": "0,1,2,3", "EventName": "UNC_P_CLOCKTICKS", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "C0 and C1. Derived from unc_p_power_state_occupancy.cores_c0", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C0. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0", "Filter": "occ_sel=1", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C0 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c0 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "C3. Derived from unc_p_power_state_occupancy.cores_c3", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C3. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3", "Filter": "occ_sel=2", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C3 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c3 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "C6 and C7. Derived from unc_p_power_state_occupancy.cores_c6", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C6. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events ", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6", "Filter": "occ_sel=3", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C6 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c6 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "External Prochot. Derived from unc_p_prochot_external_cycles", + "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode. This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip", "Counter": "0,1,2,3", "EventCode": "0xA", "EventName": "UNC_P_PROCHOT_EXTERNAL_CYCLES", "MetricExpr": "(UNC_P_PROCHOT_EXTERNAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "prochot_external_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Thermal Strongest Upper Limit Cycles. Derived from unc_p_freq_max_limit_thermal_cycles", + "BriefDescription": "Counts the number of cycles when temperature is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x4", "EventName": "UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_limit_thermal_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "OS Strongest Upper Limit Cycles. Derived from unc_p_freq_max_os_cycles", + "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x6", "EventName": "UNC_P_FREQ_MAX_OS_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_OS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_os_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Power Strongest Upper Limit Cycles. Derived from unc_p_freq_max_power_cycles", + "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x5", "EventName": "UNC_P_FREQ_MAX_POWER_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_POWER_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_power_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Cycles spent changing Frequency. Derived from unc_p_freq_trans_cycles", + "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x74", "EventName": "UNC_P_FREQ_TRANS_CYCLES", "MetricExpr": "(UNC_P_FREQ_TRANS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_trans_cycles %", "PerPkg": "1", "Unit": "PCU" } diff --git a/tools/perf/pmu-events/arch/x86/haswellx/uncore-cache.json b/tools/perf/pmu-events/arch/x86/haswellx/uncore-cache.json index 076459c51d4e..58ed6d33d1f4 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/uncore-cache.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/uncore-cache.json @@ -1,13 +1,13 @@ [ { - "BriefDescription": "Uncore cache clock ticks. Derived from unc_c_clockticks", + "BriefDescription": "Uncore cache clock ticks", "Counter": "0,1,2,3", "EventName": "UNC_C_CLOCKTICKS", "PerPkg": "1", "Unit": "CBO" }, { - "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch). Derived from unc_c_llc_lookup.any", + "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch)", "Counter": "0,1,2,3", "EventCode": "0x34", "EventName": "UNC_C_LLC_LOOKUP.ANY", @@ -18,7 +18,7 @@ "Unit": "CBO" }, { - "BriefDescription": "M line evictions from LLC (writebacks to memory). Derived from unc_c_llc_victims.m_state", + "BriefDescription": "M line evictions from LLC (writebacks to memory)", "Counter": "0,1,2,3", "EventCode": "0x37", "EventName": "UNC_C_LLC_VICTIMS.M_STATE", @@ -212,7 +212,7 @@ "Unit": "CBO" }, { - "BriefDescription": "read requests to home agent. Derived from unc_h_requests.reads", + "BriefDescription": "read requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS", @@ -221,7 +221,7 @@ "Unit": "HA" }, { - "BriefDescription": "read requests to local home agent. Derived from unc_h_requests.reads_local", + "BriefDescription": "read requests to local home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS_LOCAL", @@ -230,7 +230,7 @@ "Unit": "HA" }, { - "BriefDescription": "read requests to remote home agent. Derived from unc_h_requests.reads_remote", + "BriefDescription": "read requests to remote home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS_REMOTE", @@ -239,7 +239,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to home agent. Derived from unc_h_requests.writes", + "BriefDescription": "write requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES", @@ -248,7 +248,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to local home agent. Derived from unc_h_requests.writes_local", + "BriefDescription": "write requests to local home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES_LOCAL", @@ -257,7 +257,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to remote home agent. Derived from unc_h_requests.writes_remote", + "BriefDescription": "write requests to remote home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES_REMOTE", @@ -266,7 +266,7 @@ "Unit": "HA" }, { - "BriefDescription": "Conflict requests (requests for same address from multiple agents simultaneously). Derived from unc_h_snoop_resp.rspcnflct", + "BriefDescription": "Conflict requests (requests for same address from multiple agents simultaneously)", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPCNFLCT", @@ -275,7 +275,7 @@ "Unit": "HA" }, { - "BriefDescription": "M line forwarded from remote cache along with writeback to memory. Derived from unc_h_snoop_resp.rsp_fwd_wb", + "BriefDescription": "M line forwarded from remote cache along with writeback to memory", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSP_FWD_WB", @@ -285,7 +285,7 @@ "Unit": "HA" }, { - "BriefDescription": "M line forwarded from remote cache with no writeback to memory. Derived from unc_h_snoop_resp.rspifwd", + "BriefDescription": "M line forwarded from remote cache with no writeback to memory", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPIFWD", @@ -295,7 +295,7 @@ "Unit": "HA" }, { - "BriefDescription": "Shared line response from remote cache. Derived from unc_h_snoop_resp.rsps", + "BriefDescription": "Shared line response from remote cache", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPS", @@ -305,7 +305,7 @@ "Unit": "HA" }, { - "BriefDescription": "Shared line forwarded from remote cache. Derived from unc_h_snoop_resp.rspsfwd", + "BriefDescription": "Shared line forwarded from remote cache", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPSFWD", diff --git a/tools/perf/pmu-events/arch/x86/haswellx/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/haswellx/uncore-interconnect.json index 39387f7909b2..824961318c1e 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/uncore-interconnect.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/uncore-interconnect.json @@ -1,6 +1,6 @@ [ { - "BriefDescription": "QPI clock ticks. Derived from unc_q_clockticks", + "BriefDescription": "QPI clock ticks", "Counter": "0,1,2,3", "EventCode": "0x14", "EventName": "UNC_Q_CLOCKTICKS", @@ -10,7 +10,7 @@ { "BriefDescription": "Number of data flits transmitted . Derived from unc_q_txl_flits_g0.data", "Counter": "0,1,2,3", - "EventName": "UNC_Q_TxL_FLITS_G0.DATA", + "EventName": "QPI_DATA_BANDWIDTH_TX", "PerPkg": "1", "ScaleUnit": "8Bytes", "UMask": "0x2", @@ -19,7 +19,7 @@ { "BriefDescription": "Number of non data (control) flits transmitted . Derived from unc_q_txl_flits_g0.non_data", "Counter": "0,1,2,3", - "EventName": "UNC_Q_TxL_FLITS_G0.NON_DATA", + "EventName": "QPI_CTL_BANDWIDTH_TX", "PerPkg": "1", "ScaleUnit": "8Bytes", "UMask": "0x4", diff --git a/tools/perf/pmu-events/arch/x86/haswellx/uncore-memory.json b/tools/perf/pmu-events/arch/x86/haswellx/uncore-memory.json index d17dc235f734..66eed399724c 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/uncore-memory.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/uncore-memory.json @@ -3,7 +3,7 @@ "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.RD", + "EventName": "LLC_MISSES.MEM_READ", "PerPkg": "1", "ScaleUnit": "64Bytes", "UMask": "0x3", @@ -13,48 +13,51 @@ "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.WR", + "EventName": "LLC_MISSES.MEM_WRITE", "PerPkg": "1", "ScaleUnit": "64Bytes", "UMask": "0xC", "Unit": "iMC" }, { - "BriefDescription": "Memory controller clock ticks. Derived from unc_m_clockticks", + "BriefDescription": "Memory controller clock ticks", "Counter": "0,1,2,3", "EventName": "UNC_M_CLOCKTICKS", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode. Derived from unc_m_power_channel_ppd", + "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode", "Counter": "0,1,2,3", "EventCode": "0x85", "EventName": "UNC_M_POWER_CHANNEL_PPD", "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_channel_ppd %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles all ranks are in critical thermal throttle. Derived from unc_m_power_critical_throttle_cycles", + "BriefDescription": "Cycles all ranks are in critical thermal throttle", "Counter": "0,1,2,3", "EventCode": "0x86", "EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES", "MetricExpr": "(UNC_M_POWER_CRITICAL_THROTTLE_CYCLES / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_critical_throttle_cycles %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles Memory is in self refresh power mode. Derived from unc_m_power_self_refresh", + "BriefDescription": "Cycles Memory is in self refresh power mode", "Counter": "0,1,2,3", "EventCode": "0x43", "EventName": "UNC_M_POWER_SELF_REFRESH", "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_self_refresh %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Pre-charges due to page misses. Derived from unc_m_pre_count.page_miss", + "BriefDescription": "Pre-charges due to page misses", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.PAGE_MISS", @@ -63,7 +66,7 @@ "Unit": "iMC" }, { - "BriefDescription": "Pre-charge for reads. Derived from unc_m_pre_count.rd", + "BriefDescription": "Pre-charge for reads", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.RD", @@ -72,7 +75,7 @@ "Unit": "iMC" }, { - "BriefDescription": "Pre-charge for writes. Derived from unc_m_pre_count.wr", + "BriefDescription": "Pre-charge for writes", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.WR", diff --git a/tools/perf/pmu-events/arch/x86/haswellx/uncore-power.json b/tools/perf/pmu-events/arch/x86/haswellx/uncore-power.json index b44d43088bbb..dd1b95655d1d 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/uncore-power.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/uncore-power.json @@ -1,83 +1,91 @@ [ { - "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events. Derived from unc_p_clockticks", + "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events", "Counter": "0,1,2,3", "EventName": "UNC_P_CLOCKTICKS", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "C0 and C1. Derived from unc_p_power_state_occupancy.cores_c0", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C0. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0", "Filter": "occ_sel=1", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C0 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c0 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "C3. Derived from unc_p_power_state_occupancy.cores_c3", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C3. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3", "Filter": "occ_sel=2", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C3 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c3 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "C6 and C7. Derived from unc_p_power_state_occupancy.cores_c6", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C6. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events ", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6", "Filter": "occ_sel=3", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C6 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c6 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "External Prochot. Derived from unc_p_prochot_external_cycles", + "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode. This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip", "Counter": "0,1,2,3", "EventCode": "0xA", "EventName": "UNC_P_PROCHOT_EXTERNAL_CYCLES", "MetricExpr": "(UNC_P_PROCHOT_EXTERNAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "prochot_external_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Thermal Strongest Upper Limit Cycles. Derived from unc_p_freq_max_limit_thermal_cycles", + "BriefDescription": "Counts the number of cycles when temperature is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x4", "EventName": "UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_limit_thermal_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "OS Strongest Upper Limit Cycles. Derived from unc_p_freq_max_os_cycles", + "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x6", "EventName": "UNC_P_FREQ_MAX_OS_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_OS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_os_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Power Strongest Upper Limit Cycles. Derived from unc_p_freq_max_power_cycles", + "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x5", "EventName": "UNC_P_FREQ_MAX_POWER_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_POWER_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_power_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Cycles spent changing Frequency. Derived from unc_p_freq_trans_cycles", + "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x74", "EventName": "UNC_P_FREQ_TRANS_CYCLES", "MetricExpr": "(UNC_P_FREQ_TRANS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_trans_cycles %", "PerPkg": "1", "Unit": "PCU" } diff --git a/tools/perf/pmu-events/arch/x86/ivytown/uncore-cache.json b/tools/perf/pmu-events/arch/x86/ivytown/uncore-cache.json index 2efdc6772e0b..267410594833 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/uncore-cache.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/uncore-cache.json @@ -1,13 +1,13 @@ [ { - "BriefDescription": "Uncore cache clock ticks. Derived from unc_c_clockticks", + "BriefDescription": "Uncore cache clock ticks", "Counter": "0,1,2,3", "EventName": "UNC_C_CLOCKTICKS", "PerPkg": "1", "Unit": "CBO" }, { - "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch). Derived from unc_c_llc_lookup.any", + "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch)", "Counter": "0,1", "EventCode": "0x34", "EventName": "UNC_C_LLC_LOOKUP.ANY", @@ -18,7 +18,7 @@ "Unit": "CBO" }, { - "BriefDescription": "M line evictions from LLC (writebacks to memory). Derived from unc_c_llc_victims.m_state", + "BriefDescription": "M line evictions from LLC (writebacks to memory)", "Counter": "0,1", "EventCode": "0x37", "EventName": "UNC_C_LLC_VICTIMS.M_STATE", @@ -237,7 +237,7 @@ "Unit": "CBO" }, { - "BriefDescription": "Occupancy for all LLC misses that are addressed to local memory. Derived from unc_c_tor_occupancy.miss_local", + "BriefDescription": "Occupancy for all LLC misses that are addressed to local memory", "EventCode": "0x36", "EventName": "UNC_C_TOR_OCCUPANCY.MISS_LOCAL", "PerPkg": "1", @@ -254,7 +254,7 @@ "Unit": "CBO" }, { - "BriefDescription": "Occupancy for all LLC misses that are addressed to remote memory. Derived from unc_c_tor_occupancy.miss_remote", + "BriefDescription": "Occupancy for all LLC misses that are addressed to remote memory", "EventCode": "0x36", "EventName": "UNC_C_TOR_OCCUPANCY.MISS_REMOTE", "PerPkg": "1", @@ -262,7 +262,7 @@ "Unit": "CBO" }, { - "BriefDescription": "Read requests to home agent. Derived from unc_h_requests.reads", + "BriefDescription": "Read requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS", @@ -271,7 +271,7 @@ "Unit": "HA" }, { - "BriefDescription": "Write requests to home agent. Derived from unc_h_requests.writes", + "BriefDescription": "Write requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES", @@ -280,7 +280,7 @@ "Unit": "HA" }, { - "BriefDescription": "M line forwarded from remote cache along with writeback to memory. Derived from unc_h_snoop_resp.rsp_fwd_wb", + "BriefDescription": "M line forwarded from remote cache along with writeback to memory", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSP_FWD_WB", @@ -290,7 +290,7 @@ "Unit": "HA" }, { - "BriefDescription": "M line forwarded from remote cache with no writeback to memory. Derived from unc_h_snoop_resp.rspifwd", + "BriefDescription": "M line forwarded from remote cache with no writeback to memory", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPIFWD", @@ -300,7 +300,7 @@ "Unit": "HA" }, { - "BriefDescription": "Shared line response from remote cache. Derived from unc_h_snoop_resp.rsps", + "BriefDescription": "Shared line response from remote cache", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPS", @@ -310,7 +310,7 @@ "Unit": "HA" }, { - "BriefDescription": "Shared line forwarded from remote cache. Derived from unc_h_snoop_resp.rspsfwd", + "BriefDescription": "Shared line forwarded from remote cache", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPSFWD", diff --git a/tools/perf/pmu-events/arch/x86/ivytown/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/ivytown/uncore-interconnect.json index d7e2fda1d695..b798a860bc81 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/uncore-interconnect.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/uncore-interconnect.json @@ -1,6 +1,6 @@ [ { - "BriefDescription": "QPI clock ticks. Use to get percentages for QPI cycles events. Derived from unc_q_clockticks", + "BriefDescription": "QPI clock ticks. Use to get percentages for QPI cycles events", "Counter": "0,1,2,3", "EventCode": "0x14", "EventName": "UNC_Q_CLOCKTICKS", @@ -8,25 +8,27 @@ "Unit": "QPI LL" }, { - "BriefDescription": "Cycles where receiving QPI link is in half-width mode. Derived from unc_q_rxl0p_power_cycles", + "BriefDescription": "Cycles where receiving QPI link is in half-width mode", "Counter": "0,1,2,3", "EventCode": "0x10", "EventName": "UNC_Q_RxL0P_POWER_CYCLES", "MetricExpr": "(UNC_Q_RxL0P_POWER_CYCLES / UNC_Q_CLOCKTICKS) * 100.", + "MetricName": "rxl0p_power_cycles %", "PerPkg": "1", "Unit": "QPI LL" }, { - "BriefDescription": "Cycles where transmitting QPI link is in half-width mode. Derived from unc_q_txl0p_power_cycles", + "BriefDescription": "Cycles where transmitting QPI link is in half-width mode", "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_Q_TxL0P_POWER_CYCLES", "MetricExpr": "(UNC_Q_TxL0P_POWER_CYCLES / UNC_Q_CLOCKTICKS) * 100.", + "MetricName": "txl0p_power_cycles %", "PerPkg": "1", "Unit": "QPI LL" }, { - "BriefDescription": "Number of data flits transmitted . Derived from unc_q_txl_flits_g0.data", + "BriefDescription": "Number of data flits transmitted ", "Counter": "0,1,2,3", "EventName": "UNC_Q_TxL_FLITS_G0.DATA", "PerPkg": "1", @@ -35,7 +37,7 @@ "Unit": "QPI LL" }, { - "BriefDescription": "Number of non data (control) flits transmitted . Derived from unc_q_txl_flits_g0.non_data", + "BriefDescription": "Number of non data (control) flits transmitted ", "Counter": "0,1,2,3", "EventName": "UNC_Q_TxL_FLITS_G0.NON_DATA", "PerPkg": "1", diff --git a/tools/perf/pmu-events/arch/x86/ivytown/uncore-memory.json b/tools/perf/pmu-events/arch/x86/ivytown/uncore-memory.json index ac4ad4d6357b..df4b43294fa0 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/uncore-memory.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/uncore-memory.json @@ -1,6 +1,6 @@ [ { - "BriefDescription": "Memory page activates for reads and writes. Derived from unc_m_act_count.rd", + "BriefDescription": "Memory page activates for reads and writes", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_M_ACT_COUNT.RD", @@ -13,7 +13,7 @@ "BriefDescription": "Read requests to memory controller. Derived from unc_m_cas_count.rd", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.RD", + "EventName": "LLC_MISSES.MEM_READ", "PerPkg": "1", "ScaleUnit": "64Bytes", "UMask": "0x3", @@ -23,48 +23,51 @@ "BriefDescription": "Write requests to memory controller. Derived from unc_m_cas_count.wr", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.WR", + "EventName": "LLC_MISSES.MEM_WRITE", "PerPkg": "1", "ScaleUnit": "64Bytes", "UMask": "0xC", "Unit": "iMC" }, { - "BriefDescription": "Memory controller clock ticks. Use to generate percentages for memory controller CYCLES events. Derived from unc_m_clockticks", + "BriefDescription": "Memory controller clock ticks. Use to generate percentages for memory controller CYCLES events", "Counter": "0,1,2,3", "EventName": "UNC_M_CLOCKTICKS", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode. Derived from unc_m_power_channel_ppd", + "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode", "Counter": "0,1,2,3", "EventCode": "0x85", "EventName": "UNC_M_POWER_CHANNEL_PPD", "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_channel_ppd %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles all ranks are in critical thermal throttle. Derived from unc_m_power_critical_throttle_cycles", + "BriefDescription": "Cycles all ranks are in critical thermal throttle", "Counter": "0,1,2,3", "EventCode": "0x86", "EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES", "MetricExpr": "(UNC_M_POWER_CRITICAL_THROTTLE_CYCLES / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_critical_throttle_cycles %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles Memory is in self refresh power mode. Derived from unc_m_power_self_refresh", + "BriefDescription": "Cycles Memory is in self refresh power mode", "Counter": "0,1,2,3", "EventCode": "0x43", "EventName": "UNC_M_POWER_SELF_REFRESH", "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_self_refresh %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Memory page conflicts. Derived from unc_m_pre_count.page_miss", + "BriefDescription": "Memory page conflicts", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.PAGE_MISS", diff --git a/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json b/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json index dc2586db0dfc..d40498f2cb1e 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json @@ -1,44 +1,48 @@ [ { - "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events. Derived from unc_p_clockticks", + "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events", "Counter": "0,1,2,3", "EventName": "UNC_P_CLOCKTICKS", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band0=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band0_cycles", + "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band0=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency", "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_BAND0_CYCLES", "MetricExpr": "(UNC_P_FREQ_BAND0_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band0_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band1=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band1_cycles", + "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band1=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency", "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_BAND1_CYCLES", "MetricExpr": "(UNC_P_FREQ_BAND1_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band1_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band2=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band2_cycles", + "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band2=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency", "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_BAND2_CYCLES", "MetricExpr": "(UNC_P_FREQ_BAND2_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band2_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band3=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band3_cycles", + "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band3=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency", "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_BAND3_CYCLES", "MetricExpr": "(UNC_P_FREQ_BAND3_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band3_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -49,6 +53,7 @@ "EventName": "UNC_P_FREQ_BAND0_TRANSITIONS", "Filter": "edge=1", "MetricExpr": "(UNC_P_FREQ_BAND0_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band0_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -59,6 +64,7 @@ "EventName": "UNC_P_FREQ_BAND1_TRANSITIONS", "Filter": "edge=1", "MetricExpr": "(UNC_P_FREQ_BAND1_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band1_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -69,6 +75,7 @@ "EventName": "UNC_P_FREQ_BAND2_TRANSITIONS", "Filter": "edge=1", "MetricExpr": "(UNC_P_FREQ_BAND2_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band2_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -79,90 +86,100 @@ "EventName": "UNC_P_FREQ_BAND3_TRANSITIONS", "Filter": "edge=1", "MetricExpr": "(UNC_P_FREQ_BAND3_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band3_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "This is an occupancy event that tracks the number of cores that are in the chosen C-State. It can be used by itself to get the average number of cores in that C-state with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details. Derived from unc_p_power_state_occupancy.cores_c0", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C0. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0", "Filter": "occ_sel=1", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C0 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c0 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "This is an occupancy event that tracks the number of cores that are in the chosen C-State. It can be used by itself to get the average number of cores in that C-state with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details. Derived from unc_p_power_state_occupancy.cores_c3", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C3. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3", "Filter": "occ_sel=2", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C3 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c3 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "This is an occupancy event that tracks the number of cores that are in the chosen C-State. It can be used by itself to get the average number of cores in that C-state with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details. Derived from unc_p_power_state_occupancy.cores_c6", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C6. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events ", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6", "Filter": "occ_sel=3", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C6 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c6 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode. This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip. Derived from unc_p_prochot_external_cycles", + "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode. This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip", "Counter": "0,1,2,3", "EventCode": "0xa", "EventName": "UNC_P_PROCHOT_EXTERNAL_CYCLES", "MetricExpr": "(UNC_P_PROCHOT_EXTERNAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "prochot_external_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles when thermal conditions are the upper limit on frequency. This is related to the THERMAL_THROTTLE CYCLES_ABOVE_TEMP event, which always counts cycles when we are above the thermal temperature. This event (STRONGEST_UPPER_LIMIT) is sampled at the output of the algorithm that determines the actual frequency, while THERMAL_THROTTLE looks at the input. Derived from unc_p_freq_max_limit_thermal_cycles", + "BriefDescription": "Counts the number of cycles when temperature is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x4", "EventName": "UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_limit_thermal_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency. Derived from unc_p_freq_max_os_cycles", + "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x6", "EventName": "UNC_P_FREQ_MAX_OS_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_OS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_os_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency. Derived from unc_p_freq_max_power_cycles", + "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x5", "EventName": "UNC_P_FREQ_MAX_POWER_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_POWER_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_power_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency. Derived from unc_p_freq_max_current_cycles", + "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x7", "EventName": "UNC_P_FREQ_MAX_CURRENT_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_CURRENT_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_current_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles when the system is changing frequency. This can not be filtered by thread ID. One can also use it with the occupancy counter that monitors number of threads in C0 to estimate the performance impact that frequency transitions had on the system. Derived from unc_p_freq_trans_cycles", + "BriefDescription": "Cycles spent changing Frequency", "Counter": "0,1,2,3", "EventCode": "0x60", "EventName": "UNC_P_FREQ_TRANS_CYCLES", "MetricExpr": "(UNC_P_FREQ_TRANS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_trans_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -173,6 +190,7 @@ "EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES", "Filter": "filter_band0=1200", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -183,6 +201,7 @@ "EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES", "Filter": "filter_band1=2000", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -193,6 +212,7 @@ "EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES", "Filter": "filter_band2=3000", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -203,6 +223,7 @@ "EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES", "Filter": "filter_band3=4000", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -213,6 +234,7 @@ "EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS", "Filter": "edge=1,filter_band0=1200", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -223,6 +245,7 @@ "EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS", "Filter": "edge=1,filter_band1=2000", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -233,6 +256,7 @@ "EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS", "Filter": "edge=1,filter_band2=4000", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -243,6 +267,7 @@ "EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS", "Filter": "edge=1,filter_band3=4000", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" } diff --git a/tools/perf/pmu-events/arch/x86/jaketown/uncore-cache.json b/tools/perf/pmu-events/arch/x86/jaketown/uncore-cache.json index 2f23cf0129e7..3fa61d962607 100644 --- a/tools/perf/pmu-events/arch/x86/jaketown/uncore-cache.json +++ b/tools/perf/pmu-events/arch/x86/jaketown/uncore-cache.json @@ -1,13 +1,13 @@ [ { - "BriefDescription": "Uncore cache clock ticks. Derived from unc_c_clockticks", + "BriefDescription": "Uncore cache clock ticks", "Counter": "0,1,2,3", "EventName": "UNC_C_CLOCKTICKS", "PerPkg": "1", "Unit": "CBO" }, { - "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch). Derived from unc_c_llc_lookup.any", + "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch)", "Counter": "0,1", "EventCode": "0x34", "EventName": "UNC_C_LLC_LOOKUP.ANY", @@ -18,7 +18,7 @@ "Unit": "CBO" }, { - "BriefDescription": "M line evictions from LLC (writebacks to memory). Derived from unc_c_llc_victims.m_state", + "BriefDescription": "M line evictions from LLC (writebacks to memory)", "Counter": "0,1", "EventCode": "0x37", "EventName": "UNC_C_LLC_VICTIMS.M_STATE", @@ -171,11 +171,12 @@ "Unit": "CBO" }, { - "BriefDescription": "Occupancy counter for all LLC misses; we divide this by UNC_C_CLOCKTICKS to get average Q depth. Derived from unc_c_tor_occupancy.miss_all", + "BriefDescription": "Occupancy counter for all LLC misses; we divide this by UNC_C_CLOCKTICKS to get average Q depth", "EventCode": "0x36", "EventName": "UNC_C_TOR_OCCUPANCY.MISS_ALL", "Filter": "filter_opc=0x182", "MetricExpr": "(UNC_C_TOR_OCCUPANCY.MISS_ALL / UNC_C_CLOCKTICKS) * 100.", + "MetricName": "tor_occupancy.miss_all %", "PerPkg": "1", "UMask": "0xa", "Unit": "CBO" @@ -189,7 +190,7 @@ "Unit": "CBO" }, { - "BriefDescription": "read requests to home agent. Derived from unc_h_requests.reads", + "BriefDescription": "read requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS", @@ -198,7 +199,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to home agent. Derived from unc_h_requests.writes", + "BriefDescription": "write requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES", diff --git a/tools/perf/pmu-events/arch/x86/jaketown/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/jaketown/uncore-interconnect.json index 63351876eb57..1b53c0e609e3 100644 --- a/tools/perf/pmu-events/arch/x86/jaketown/uncore-interconnect.json +++ b/tools/perf/pmu-events/arch/x86/jaketown/uncore-interconnect.json @@ -1,6 +1,6 @@ [ { - "BriefDescription": "QPI clock ticks. Used to get percentages of QPI cycles events. Derived from unc_q_clockticks", + "BriefDescription": "QPI clock ticks. Used to get percentages of QPI cycles events", "Counter": "0,1,2,3", "EventCode": "0x14", "EventName": "UNC_Q_CLOCKTICKS", @@ -8,25 +8,27 @@ "Unit": "QPI LL" }, { - "BriefDescription": "Cycles where receiving QPI link is in half-width mode. Derived from unc_q_rxl0p_power_cycles", + "BriefDescription": "Cycles where receiving QPI link is in half-width mode", "Counter": "0,1,2,3", "EventCode": "0x10", "EventName": "UNC_Q_RxL0P_POWER_CYCLES", "MetricExpr": "(UNC_Q_RxL0P_POWER_CYCLES / UNC_Q_CLOCKTICKS) * 100.", + "MetricName": "rxl0p_power_cycles %", "PerPkg": "1", "Unit": "QPI LL" }, { - "BriefDescription": "Cycles where transmitting QPI link is in half-width mode. Derived from unc_q_txl0p_power_cycles", + "BriefDescription": "Cycles where transmitting QPI link is in half-width mode", "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_Q_TxL0P_POWER_CYCLES", "MetricExpr": "(UNC_Q_TxL0P_POWER_CYCLES / UNC_Q_CLOCKTICKS) * 100.", + "MetricName": "txl0p_power_cycles %", "PerPkg": "1", "Unit": "QPI LL" }, { - "BriefDescription": "Number of data flits transmitted . Derived from unc_q_txl_flits_g0.data", + "BriefDescription": "Number of data flits transmitted ", "Counter": "0,1,2,3", "EventName": "UNC_Q_TxL_FLITS_G0.DATA", "PerPkg": "1", @@ -35,7 +37,7 @@ "Unit": "QPI LL" }, { - "BriefDescription": "Number of non data (control) flits transmitted . Derived from unc_q_txl_flits_g0.non_data", + "BriefDescription": "Number of non data (control) flits transmitted ", "Counter": "0,1,2,3", "EventName": "UNC_Q_TxL_FLITS_G0.NON_DATA", "PerPkg": "1", diff --git a/tools/perf/pmu-events/arch/x86/jaketown/uncore-memory.json b/tools/perf/pmu-events/arch/x86/jaketown/uncore-memory.json index e2cf6daa7b37..8551cebeba23 100644 --- a/tools/perf/pmu-events/arch/x86/jaketown/uncore-memory.json +++ b/tools/perf/pmu-events/arch/x86/jaketown/uncore-memory.json @@ -1,6 +1,6 @@ [ { - "BriefDescription": "Memory page activates. Derived from unc_m_act_count", + "BriefDescription": "Memory page activates", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_M_ACT_COUNT", @@ -11,7 +11,7 @@ "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.RD", + "EventName": "LLC_MISSES.MEM_READ", "PerPkg": "1", "UMask": "0x3", "Unit": "iMC" @@ -20,47 +20,50 @@ "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.WR", + "EventName": "LLC_MISSES.MEM_WRITE", "PerPkg": "1", "UMask": "0xc", "Unit": "iMC" }, { - "BriefDescription": "Memory controller clock ticks. Used to get percentages of memory controller cycles events. Derived from unc_m_clockticks", + "BriefDescription": "Memory controller clock ticks. Used to get percentages of memory controller cycles events", "Counter": "0,1,2,3", "EventName": "UNC_M_CLOCKTICKS", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode. Derived from unc_m_power_channel_ppd", + "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode", "Counter": "0,1,2,3", "EventCode": "0x85", "EventName": "UNC_M_POWER_CHANNEL_PPD", "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_channel_ppd %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles all ranks are in critical thermal throttle. Derived from unc_m_power_critical_throttle_cycles", + "BriefDescription": "Cycles all ranks are in critical thermal throttle", "Counter": "0,1,2,3", "EventCode": "0x86", "EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES", "MetricExpr": "(UNC_M_POWER_CRITICAL_THROTTLE_CYCLES / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_critical_throttle_cycles %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles Memory is in self refresh power mode. Derived from unc_m_power_self_refresh", + "BriefDescription": "Cycles Memory is in self refresh power mode", "Counter": "0,1,2,3", "EventCode": "0x43", "EventName": "UNC_M_POWER_SELF_REFRESH", "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_self_refresh %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Memory page conflicts. Derived from unc_m_pre_count.page_miss", + "BriefDescription": "Memory page conflicts", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.PAGE_MISS", @@ -69,7 +72,7 @@ "Unit": "iMC" }, { - "BriefDescription": "Occupancy counter for memory read queue. Derived from unc_m_rpq_occupancy", + "BriefDescription": "Occupancy counter for memory read queue", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_M_RPQ_OCCUPANCY", diff --git a/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json b/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json index bbe36d547386..16034bfd06dd 100644 --- a/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json +++ b/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json @@ -1,44 +1,48 @@ [ { - "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events. Derived from unc_p_clockticks", + "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events", "Counter": "0,1,2,3", "EventName": "UNC_P_CLOCKTICKS", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band0=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band0_cycles", + "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band0=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency", "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_BAND0_CYCLES", "MetricExpr": "(UNC_P_FREQ_BAND0_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band0_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band1=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band1_cycles", + "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band1=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency", "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_BAND1_CYCLES", "MetricExpr": "(UNC_P_FREQ_BAND1_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band1_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band2=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band2_cycles", + "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band2=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency", "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_BAND2_CYCLES", "MetricExpr": "(UNC_P_FREQ_BAND2_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band2_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band3=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band3_cycles", + "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band3=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency", "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_BAND3_CYCLES", "MetricExpr": "(UNC_P_FREQ_BAND3_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band3_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -49,6 +53,7 @@ "EventName": "UNC_P_FREQ_BAND0_TRANSITIONS", "Filter": "edge=1", "MetricExpr": "(UNC_P_FREQ_BAND0_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band0_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -59,6 +64,7 @@ "EventName": "UNC_P_FREQ_BAND1_TRANSITIONS", "Filter": "edge=1", "MetricExpr": "(UNC_P_FREQ_BAND1_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band1_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -69,6 +75,7 @@ "EventName": "UNC_P_FREQ_BAND2_TRANSITIONS", "Filter": "edge=1", "MetricExpr": "(UNC_P_FREQ_BAND2_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band2_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -79,89 +86,99 @@ "EventName": "UNC_P_FREQ_BAND3_TRANSITIONS", "Filter": "edge=1", "MetricExpr": "(UNC_P_FREQ_BAND3_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band3_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C0. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details. Derived from unc_p_power_state_occupancy.cores_c0", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C0. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0", "Filter": "occ_sel=1", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C0 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c0 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C3. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details. Derived from unc_p_power_state_occupancy.cores_c3", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C3. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3", "Filter": "occ_sel=2", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C3 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c3 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C6. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events . Derived from unc_p_power_state_occupancy.cores_c6", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C6. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events ", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6", "Filter": "occ_sel=3", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C6 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c6 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode. This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip. Derived from unc_p_prochot_external_cycles", + "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode. This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip", "Counter": "0,1,2,3", "EventCode": "0xa", "EventName": "UNC_P_PROCHOT_EXTERNAL_CYCLES", "MetricExpr": "(UNC_P_PROCHOT_EXTERNAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "prochot_external_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles when temperature is the upper limit on frequency. Derived from unc_p_freq_max_limit_thermal_cycles", + "BriefDescription": "Counts the number of cycles when temperature is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x4", "EventName": "UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_limit_thermal_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency. Derived from unc_p_freq_max_os_cycles", + "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x6", "EventName": "UNC_P_FREQ_MAX_OS_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_OS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_os_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency. Derived from unc_p_freq_max_power_cycles", + "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x5", "EventName": "UNC_P_FREQ_MAX_POWER_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_POWER_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_power_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency. Derived from unc_p_freq_max_current_cycles", + "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x7", "EventName": "UNC_P_FREQ_MAX_CURRENT_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_CURRENT_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_current_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Cycles spent changing Frequency. Derived from unc_p_freq_trans_cycles", + "BriefDescription": "Cycles spent changing Frequency", "Counter": "0,1,2,3", "EventName": "UNC_P_FREQ_TRANS_CYCLES", "MetricExpr": "(UNC_P_FREQ_TRANS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_trans_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -172,6 +189,7 @@ "EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES", "Filter": "filter_band0=1200", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -182,6 +200,7 @@ "EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES", "Filter": "filter_band1=2000", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -192,6 +211,7 @@ "EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES", "Filter": "filter_band2=3000", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -202,6 +222,7 @@ "EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES", "Filter": "filter_band3=4000", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -212,6 +233,7 @@ "EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS", "Filter": "edge=1,filter_band0=1200", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -222,6 +244,7 @@ "EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS", "Filter": "edge=1,filter_band1=2000", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -232,6 +255,7 @@ "EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS", "Filter": "edge=1,filter_band2=4000", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -242,6 +266,7 @@ "EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS", "Filter": "edge=1,filter_band3=4000", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" } -- cgit v1.2.3 From 00636c3b48e8acac2acd2601274c6eab4ecf8201 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 20 Mar 2017 13:17:07 -0700 Subject: perf pmu: Support MetricExpr header in JSON event list Add support for parsing the MetricExpr header in the JSON event lists and storing them in the alias structure. Used in the next patch. v2: Change DividedBy to MetricExpr v3: Really catch all uses of DividedBy Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170320201711.14142-10-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 18 ++++++++++++++---- tools/perf/pmu-events/jevents.h | 2 +- tools/perf/pmu-events/pmu-events.h | 1 + tools/perf/util/pmu.c | 9 ++++++--- tools/perf/util/pmu.h | 1 + 5 files changed, 23 insertions(+), 8 deletions(-) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index eed09346a72a..0735dc2a167a 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -291,7 +291,8 @@ static void print_events_table_prefix(FILE *fp, const char *tblname) static int print_events_table_entry(void *data, char *name, char *event, char *desc, char *long_desc, - char *pmu, char *unit, char *perpkg) + char *pmu, char *unit, char *perpkg, + char *metric_expr) { struct perf_entry_data *pd = data; FILE *outfp = pd->outfp; @@ -315,6 +316,8 @@ static int print_events_table_entry(void *data, char *name, char *event, fprintf(outfp, "\t.unit = \"%s\",\n", unit); if (perpkg) fprintf(outfp, "\t.perpkg = \"%s\",\n", perpkg); + if (metric_expr) + fprintf(outfp, "\t.metric_expr = \"%s\",\n", metric_expr); fprintf(outfp, "},\n"); return 0; @@ -362,7 +365,8 @@ static char *real_event(const char *name, char *event) int json_events(const char *fn, int (*func)(void *data, char *name, char *event, char *desc, char *long_desc, - char *pmu, char *unit, char *perpkg), + char *pmu, char *unit, char *perpkg, + char *metric_expr), void *data) { int err = -EIO; @@ -388,6 +392,7 @@ int json_events(const char *fn, char *filter = NULL; char *perpkg = NULL; char *unit = NULL; + char *metric_expr = NULL; unsigned long long eventcode = 0; struct msrmap *msr = NULL; jsmntok_t *msrval = NULL; @@ -398,6 +403,7 @@ int json_events(const char *fn, for (j = 0; j < obj->size; j += 2) { jsmntok_t *field, *val; int nz; + char *s; field = tok + j; EXPECT(field->type == JSMN_STRING, tok + j, @@ -444,7 +450,6 @@ int json_events(const char *fn, NULL); } else if (json_streq(map, field, "Unit")) { const char *ppmu; - char *s; ppmu = field_to_perf(unit_to_pmu, map, val); if (ppmu) { @@ -464,6 +469,10 @@ int json_events(const char *fn, addfield(map, &unit, "", "", val); } else if (json_streq(map, field, "PerPkg")) { addfield(map, &perpkg, "", "", val); + } else if (json_streq(map, field, "MetricExpr")) { + addfield(map, &metric_expr, "", "", val); + for (s = metric_expr; *s; s++) + *s = tolower(*s); } /* ignore unknown fields */ } @@ -488,7 +497,7 @@ int json_events(const char *fn, fixname(name); err = func(data, name, real_event(name, event), desc, long_desc, - pmu, unit, perpkg); + pmu, unit, perpkg, metric_expr); free(event); free(desc); free(name); @@ -498,6 +507,7 @@ int json_events(const char *fn, free(filter); free(perpkg); free(unit); + free(metric_expr); if (err) break; tok += j; diff --git a/tools/perf/pmu-events/jevents.h b/tools/perf/pmu-events/jevents.h index 71e13de31092..57e111bf2168 100644 --- a/tools/perf/pmu-events/jevents.h +++ b/tools/perf/pmu-events/jevents.h @@ -5,7 +5,7 @@ int json_events(const char *fn, int (*func)(void *data, char *name, char *event, char *desc, char *long_desc, char *pmu, - char *unit, char *perpkg), + char *unit, char *perpkg, char *metric_expr), void *data); char *get_cpu_str(void); diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index c669a3cdb9f0..d046e3a4ce46 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -13,6 +13,7 @@ struct pmu_event { const char *pmu; const char *unit; const char *perpkg; + const char *metric_expr; }; /* diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 12f84dd2ac5d..c0d487b3b925 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -231,7 +231,8 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, char *desc, char *val, char *long_desc, char *topic, - char *unit, char *perpkg) + char *unit, char *perpkg, + char *metric_expr) { struct perf_pmu_alias *alias; int ret; @@ -265,6 +266,7 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, perf_pmu__parse_snapshot(alias, dir, name); } + alias->metric_expr = metric_expr ? strdup(metric_expr) : NULL; alias->desc = desc ? strdup(desc) : NULL; alias->long_desc = long_desc ? strdup(long_desc) : desc ? strdup(desc) : NULL; @@ -294,7 +296,7 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI buf[ret] = 0; return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL, NULL, NULL, - NULL); + NULL, NULL); } static inline bool pmu_alias_info_file(char *name) @@ -564,7 +566,8 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name) __perf_pmu__new_alias(head, NULL, (char *)pe->name, (char *)pe->desc, (char *)pe->event, (char *)pe->long_desc, (char *)pe->topic, - (char *)pe->unit, (char *)pe->perpkg); + (char *)pe->unit, (char *)pe->perpkg, + (char *)pe->metric_expr); } out: diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 00852ddc7741..3dccb15f29e9 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -50,6 +50,7 @@ struct perf_pmu_alias { double scale; bool per_pkg; bool snapshot; + char *metric_expr; }; struct perf_pmu *perf_pmu__find(const char *name); -- cgit v1.2.3 From 37932c188ef1b471eae29249df045c8e567772d0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 20 Mar 2017 13:17:08 -0700 Subject: perf stat: Output JSON MetricExpr metric Add generic infrastructure to perf stat to output ratios for "MetricExpr" entries in the event lists. Many events are more useful as ratios than in raw form, typically some count in relation to total ticks. Transfer the MetricExpr information from the alias to the evsel. We mark the events that need to be collected for MetricExpr, and also link the events using them with a pointer. The code is careful to always prefer the right event in the same group to minimize multiplexing errors. At the moment only a single relation is supported. Then add a rblist to the stat shadow code that remembers stats based on the cpu and context. Then finally update and retrieve and print these values similarly to the existing hardcoded perf metrics. We use the simple expression parser added earlier to evaluate the expression. Normally we just output the result without further commentary, but for --metric-only this would lead to empty columns. So for this case use the original event as description. There is no attempt to automatically add the MetricExpr event, if it is missing, however we suggest it to the user, because the user tool doesn't have enough information to reliably construct a group that is guaranteed to schedule. So we leave that to the user. % perf stat -a -I 1000 -e '{unc_p_clockticks,unc_p_freq_max_os_cycles}' 1.000147889 800,085,181 unc_p_clockticks 1.000147889 93,126,241 unc_p_freq_max_os_cycles # 11.6 2.000448381 800,218,217 unc_p_clockticks 2.000448381 142,516,095 unc_p_freq_max_os_cycles # 17.8 3.000639852 800,243,057 unc_p_clockticks 3.000639852 162,292,689 unc_p_freq_max_os_cycles # 20.3 % perf stat -a -I 1000 -e '{unc_p_clockticks,unc_p_freq_max_os_cycles}' --metric-only # time freq_max_os_cycles % 1.000127077 0.9 2.000301436 0.7 3.000456379 0.0 v2: Change from DivideBy to MetricExpr v3: Use expr__ prefix. Support more than one other event. v4: Update description v5: Only print warning message once for multiple PMUs. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170320201711.14142-11-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 3 + tools/perf/util/evsel.c | 3 + tools/perf/util/evsel.h | 3 + tools/perf/util/parse-events.c | 1 + tools/perf/util/pmu.c | 2 + tools/perf/util/pmu.h | 1 + tools/perf/util/stat-shadow.c | 195 +++++++++++++++++++++++++++++++++++++++++ tools/perf/util/stat.h | 2 + 8 files changed, 210 insertions(+) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index cd7dc3b648ca..01b589e3c3a6 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1145,6 +1145,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, out.print_metric = pm; out.new_line = nl; out.ctx = &os; + out.force_header = false; if (csv_output && !metric_only) { print_noise(counter, noise); @@ -1480,6 +1481,7 @@ static void print_metric_headers(const char *prefix, bool no_indent) out.ctx = &os; out.print_metric = print_metric_header; out.new_line = new_line_metric; + out.force_header = true; os.evsel = counter; perf_stat__print_shadow_stats(counter, 0, 0, @@ -2498,6 +2500,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands, (const char **) stat_usage, PARSE_OPT_STOP_AT_NON_OPTION); + perf_stat__collect_metric_expr(evsel_list); perf_stat__init_shadow_stats(); if (csv_sep) { diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 175dc2305aa8..ef2a31f6dd06 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -236,6 +236,9 @@ void perf_evsel__init(struct perf_evsel *evsel, evsel->sample_size = __perf_evsel__sample_size(attr->sample_type); perf_evsel__calc_id_pos(evsel); evsel->cmdline_group_boundary = false; + evsel->metric_expr = NULL; + evsel->metric_events = NULL; + evsel->collect_stat = false; } struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index bd2e9b112d49..8f1f61826fdf 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -132,6 +132,9 @@ struct perf_evsel { struct list_head config_terms; int bpf_fd; bool merged_stat; + const char * metric_expr; + struct perf_evsel **metric_events; + bool collect_stat; }; union u64_swap { diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index e594c974c93e..91b8e83e307d 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1255,6 +1255,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data, evsel->scale = info.scale; evsel->per_pkg = info.per_pkg; evsel->snapshot = info.snapshot; + evsel->metric_expr = info.metric_expr; } return evsel ? 0 : -ENOMEM; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index c0d487b3b925..0f1133aa3253 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -994,6 +994,7 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, info->unit = NULL; info->scale = 0.0; info->snapshot = false; + info->metric_expr = NULL; list_for_each_entry_safe(term, h, head_terms, list) { alias = pmu_find_alias(pmu, term); @@ -1009,6 +1010,7 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, if (alias->per_pkg) info->per_pkg = true; + info->metric_expr = alias->metric_expr; list_del(&term->list); free(term); diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 3dccb15f29e9..27f078ccc594 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -31,6 +31,7 @@ struct perf_pmu { struct perf_pmu_info { const char *unit; + const char *metric_expr; double scale; bool per_pkg; bool snapshot; diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 8a2bbd2a4d82..c323cce97d98 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -3,6 +3,9 @@ #include "stat.h" #include "color.h" #include "pmu.h" +#include "rblist.h" +#include "evlist.h" +#include "expr.h" enum { CTX_BIT_USER = 1 << 0, @@ -41,13 +44,73 @@ static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS]; +static struct rblist runtime_saved_values; static bool have_frontend_stalled; struct stats walltime_nsecs_stats; +struct saved_value { + struct rb_node rb_node; + struct perf_evsel *evsel; + int cpu; + int ctx; + struct stats stats; +}; + +static int saved_value_cmp(struct rb_node *rb_node, const void *entry) +{ + struct saved_value *a = container_of(rb_node, + struct saved_value, + rb_node); + const struct saved_value *b = entry; + + if (a->ctx != b->ctx) + return a->ctx - b->ctx; + if (a->cpu != b->cpu) + return a->cpu - b->cpu; + return a->evsel - b->evsel; +} + +static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused, + const void *entry) +{ + struct saved_value *nd = malloc(sizeof(struct saved_value)); + + if (!nd) + return NULL; + memcpy(nd, entry, sizeof(struct saved_value)); + return &nd->rb_node; +} + +static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, + int cpu, int ctx, + bool create) +{ + struct rb_node *nd; + struct saved_value dm = { + .cpu = cpu, + .ctx = ctx, + .evsel = evsel, + }; + nd = rblist__find(&runtime_saved_values, &dm); + if (nd) + return container_of(nd, struct saved_value, rb_node); + if (create) { + rblist__add_node(&runtime_saved_values, &dm); + nd = rblist__find(&runtime_saved_values, &dm); + if (nd) + return container_of(nd, struct saved_value, rb_node); + } + return NULL; +} + void perf_stat__init_shadow_stats(void) { have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); + rblist__init(&runtime_saved_values); + runtime_saved_values.node_cmp = saved_value_cmp; + runtime_saved_values.node_new = saved_value_new; + /* No delete for now */ } static int evsel_context(struct perf_evsel *evsel) @@ -70,6 +133,8 @@ static int evsel_context(struct perf_evsel *evsel) void perf_stat__reset_shadow_stats(void) { + struct rb_node *pos, *next; + memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); @@ -92,6 +157,15 @@ void perf_stat__reset_shadow_stats(void) memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued)); memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles)); memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles)); + + next = rb_first(&runtime_saved_values.entries); + while (next) { + pos = next; + next = rb_next(pos); + memset(&container_of(pos, struct saved_value, rb_node)->stats, + 0, + sizeof(struct stats)); + } } /* @@ -143,6 +217,12 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]); + + if (counter->collect_stat) { + struct saved_value *v = saved_value_lookup(counter, cpu, ctx, + true); + update_stats(&v->stats, count[0]); + } } /* used for get_ratio_color() */ @@ -172,6 +252,95 @@ static const char *get_ratio_color(enum grc_type type, double ratio) return color; } +static struct perf_evsel *perf_stat__find_event(struct perf_evlist *evsel_list, + const char *name) +{ + struct perf_evsel *c2; + + evlist__for_each_entry (evsel_list, c2) { + if (!strcasecmp(c2->name, name)) + return c2; + } + return NULL; +} + +/* Mark MetricExpr target events and link events using them to them. */ +void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list) +{ + struct perf_evsel *counter, *leader, **metric_events, *oc; + bool found; + const char **metric_names; + int i; + int num_metric_names; + + evlist__for_each_entry(evsel_list, counter) { + bool invalid = false; + + leader = counter->leader; + if (!counter->metric_expr) + continue; + metric_events = counter->metric_events; + if (!metric_events) { + if (expr__find_other(counter->metric_expr, counter->name, + &metric_names, &num_metric_names) < 0) + continue; + + metric_events = calloc(sizeof(struct perf_evsel *), + num_metric_names + 1); + if (!metric_events) + return; + counter->metric_events = metric_events; + } + + for (i = 0; i < num_metric_names; i++) { + found = false; + if (leader) { + /* Search in group */ + for_each_group_member (oc, leader) { + if (!strcasecmp(oc->name, metric_names[i])) { + found = true; + break; + } + } + } + if (!found) { + /* Search ignoring groups */ + oc = perf_stat__find_event(evsel_list, metric_names[i]); + } + if (!oc) { + /* Deduping one is good enough to handle duplicated PMUs. */ + static char *printed; + + /* + * Adding events automatically would be difficult, because + * it would risk creating groups that are not schedulable. + * perf stat doesn't understand all the scheduling constraints + * of events. So we ask the user instead to add the missing + * events. + */ + if (!printed || strcasecmp(printed, metric_names[i])) { + fprintf(stderr, + "Add %s event to groups to get metric expression for %s\n", + metric_names[i], + counter->name); + printed = strdup(metric_names[i]); + } + invalid = true; + continue; + } + metric_events[i] = oc; + oc->collect_stat = true; + } + metric_events[i] = NULL; + free(metric_names); + if (invalid) { + free(metric_events); + counter->metric_events = NULL; + counter->metric_expr = NULL; + } + } +} + static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out) @@ -614,6 +783,32 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, be_bound * 100.); else print_metric(ctxp, NULL, NULL, name, 0); + } else if (evsel->metric_expr) { + struct parse_ctx pctx; + int i; + + expr__ctx_init(&pctx); + expr__add_id(&pctx, evsel->name, avg); + for (i = 0; evsel->metric_events[i]; i++) { + struct saved_value *v; + + v = saved_value_lookup(evsel->metric_events[i], cpu, ctx, false); + if (!v) + break; + expr__add_id(&pctx, evsel->metric_events[i]->name, + avg_stats(&v->stats)); + } + if (!evsel->metric_events[i]) { + const char *p = evsel->metric_expr; + + if (expr__parse(&ratio, &pctx, &p) == 0) + print_metric(ctxp, NULL, "%8.1f", + out->force_header ? evsel->name : "", + ratio); + else + print_metric(ctxp, NULL, NULL, "", 0); + } else + print_metric(ctxp, NULL, NULL, "", 0); } else if (runtime_nsecs_stats[cpu].n != 0) { char unit = 'M'; char unit_buf[10]; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index c29bb94c48a4..0a65ae23f495 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -85,11 +85,13 @@ struct perf_stat_output_ctx { void *ctx; print_metric_t print_metric; new_line_t new_line; + bool force_header; }; void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out); +void perf_stat__collect_metric_expr(struct perf_evlist *); int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); void perf_evlist__free_stats(struct perf_evlist *evlist); -- cgit v1.2.3 From 7f372a636d92e21d6fa41aebd6986ef590aefbfc Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 20 Mar 2017 13:17:09 -0700 Subject: perf list: Support printing MetricExpr with --debug Output the metric expr in perf list when --debug is specified, so that the user can check the formula. Before: % perf list ... unc_m_power_channel_ppd [Cycles where DRAM ranks are in power down (CKE) mode. Derived from unc_m_power_channel_ppd. Unit: uncore_imc] uncore_imc_2/event=0x85/ After: % perf list --debug ... unc_m_power_channel_ppd [Cycles where DRAM ranks are in power down (CKE) mode. Derived from unc_m_power_channel_ppd. Unit: uncore_imc] Perf: uncore_imc_2/event=0x85/ MetricExpr: (unc_m_power_channel_ppd / unc_m_clockticks) * 100. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170320201711.14142-12-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 0f1133aa3253..f819ad162b7c 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1105,6 +1105,7 @@ struct sevent { char *topic; char *str; char *pmu; + char *metric_expr; }; static int cmp_sevent(const void *a, const void *b) @@ -1203,6 +1204,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, aliases[j].topic = alias->topic; aliases[j].str = alias->str; aliases[j].pmu = pmu->name; + aliases[j].metric_expr = alias->metric_expr; j++; } if (pmu->selectable && @@ -1237,8 +1239,12 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, printf("%*s", 8, "["); wordwrap(aliases[j].desc, 8, columns, 0); printf("]\n"); - if (verbose > 0) - printf("%*s%s/%s/\n", 8, "", aliases[j].pmu, aliases[j].str); + if (verbose > 0) { + printf("%*s%s/%s/ ", 8, "", aliases[j].pmu, aliases[j].str); + if (aliases[j].metric_expr) + printf(" MetricExpr: %s", aliases[j].metric_expr); + putchar('\n'); + } } else printf(" %-50s [Kernel PMU event]\n", aliases[j].name); printed++; -- cgit v1.2.3 From 962848142335e8b35d522be78f58f2011d976b17 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 20 Mar 2017 13:17:10 -0700 Subject: perf pmu: Add support for MetricName JSON attribute Add support for a new JSON event attribute to name MetricExpr for better output in perf stat. If the event has no MetricName it uses the normal event name instead to describe the metric. Before % perf stat -a -I 1000 -e '{unc_p_clockticks,unc_p_freq_max_os_cycles}' --metric-only time unc_p_freq_max_os_cycles 1.000149775 15.7 2.000344807 19.3 3.000502544 16.7 4.000640656 6.6 5.000779955 9.9 After % perf stat -a -I 1000 -e '{unc_p_clockticks,unc_p_freq_max_os_cycles}' --metric-only time freq_max_os_cycles % 1.000149775 15.7 2.000344807 19.3 3.000502544 16.7 4.000640656 6.6 5.000779955 9.9 Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170320201711.14142-13-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 14 +++++++++++--- tools/perf/pmu-events/jevents.h | 3 ++- tools/perf/pmu-events/pmu-events.h | 1 + tools/perf/util/evsel.c | 1 + tools/perf/util/evsel.h | 1 + tools/perf/util/parse-events.c | 1 + tools/perf/util/pmu.c | 15 ++++++++++++--- tools/perf/util/pmu.h | 2 ++ tools/perf/util/stat-shadow.c | 4 +++- 9 files changed, 34 insertions(+), 8 deletions(-) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 0735dc2a167a..81f2ef3b15cf 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -292,7 +292,8 @@ static void print_events_table_prefix(FILE *fp, const char *tblname) static int print_events_table_entry(void *data, char *name, char *event, char *desc, char *long_desc, char *pmu, char *unit, char *perpkg, - char *metric_expr) + char *metric_expr, + char *metric_name) { struct perf_entry_data *pd = data; FILE *outfp = pd->outfp; @@ -318,6 +319,8 @@ static int print_events_table_entry(void *data, char *name, char *event, fprintf(outfp, "\t.perpkg = \"%s\",\n", perpkg); if (metric_expr) fprintf(outfp, "\t.metric_expr = \"%s\",\n", metric_expr); + if (metric_name) + fprintf(outfp, "\t.metric_name = \"%s\",\n", metric_name); fprintf(outfp, "},\n"); return 0; @@ -366,7 +369,8 @@ int json_events(const char *fn, int (*func)(void *data, char *name, char *event, char *desc, char *long_desc, char *pmu, char *unit, char *perpkg, - char *metric_expr), + char *metric_expr, + char *metric_name), void *data) { int err = -EIO; @@ -393,6 +397,7 @@ int json_events(const char *fn, char *perpkg = NULL; char *unit = NULL; char *metric_expr = NULL; + char *metric_name = NULL; unsigned long long eventcode = 0; struct msrmap *msr = NULL; jsmntok_t *msrval = NULL; @@ -469,6 +474,8 @@ int json_events(const char *fn, addfield(map, &unit, "", "", val); } else if (json_streq(map, field, "PerPkg")) { addfield(map, &perpkg, "", "", val); + } else if (json_streq(map, field, "MetricName")) { + addfield(map, &metric_name, "", "", val); } else if (json_streq(map, field, "MetricExpr")) { addfield(map, &metric_expr, "", "", val); for (s = metric_expr; *s; s++) @@ -497,7 +504,7 @@ int json_events(const char *fn, fixname(name); err = func(data, name, real_event(name, event), desc, long_desc, - pmu, unit, perpkg, metric_expr); + pmu, unit, perpkg, metric_expr, metric_name); free(event); free(desc); free(name); @@ -508,6 +515,7 @@ int json_events(const char *fn, free(perpkg); free(unit); free(metric_expr); + free(metric_name); if (err) break; tok += j; diff --git a/tools/perf/pmu-events/jevents.h b/tools/perf/pmu-events/jevents.h index 57e111bf2168..611fac01913d 100644 --- a/tools/perf/pmu-events/jevents.h +++ b/tools/perf/pmu-events/jevents.h @@ -5,7 +5,8 @@ int json_events(const char *fn, int (*func)(void *data, char *name, char *event, char *desc, char *long_desc, char *pmu, - char *unit, char *perpkg, char *metric_expr), + char *unit, char *perpkg, char *metric_expr, + char *metric_name), void *data); char *get_cpu_str(void); diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index d046e3a4ce46..569eab3688dd 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -14,6 +14,7 @@ struct pmu_event { const char *unit; const char *perpkg; const char *metric_expr; + const char *metric_name; }; /* diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ef2a31f6dd06..9dc7e2d6e48a 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -237,6 +237,7 @@ void perf_evsel__init(struct perf_evsel *evsel, perf_evsel__calc_id_pos(evsel); evsel->cmdline_group_boundary = false; evsel->metric_expr = NULL; + evsel->metric_name = NULL; evsel->metric_events = NULL; evsel->collect_stat = false; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 8f1f61826fdf..d101695c482c 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -133,6 +133,7 @@ struct perf_evsel { int bpf_fd; bool merged_stat; const char * metric_expr; + const char * metric_name; struct perf_evsel **metric_events; bool collect_stat; }; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 91b8e83e307d..119eb0b65876 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1256,6 +1256,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data, evsel->per_pkg = info.per_pkg; evsel->snapshot = info.snapshot; evsel->metric_expr = info.metric_expr; + evsel->metric_name = info.metric_name; } return evsel ? 0 : -ENOMEM; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index f819ad162b7c..bcf752fa345b 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -232,7 +232,8 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, char *desc, char *val, char *long_desc, char *topic, char *unit, char *perpkg, - char *metric_expr) + char *metric_expr, + char *metric_name) { struct perf_pmu_alias *alias; int ret; @@ -267,6 +268,7 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, } alias->metric_expr = metric_expr ? strdup(metric_expr) : NULL; + alias->metric_name = metric_name ? strdup(metric_name): NULL; alias->desc = desc ? strdup(desc) : NULL; alias->long_desc = long_desc ? strdup(long_desc) : desc ? strdup(desc) : NULL; @@ -296,7 +298,7 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI buf[ret] = 0; return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL, NULL, NULL, - NULL, NULL); + NULL, NULL, NULL); } static inline bool pmu_alias_info_file(char *name) @@ -567,7 +569,8 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name) (char *)pe->desc, (char *)pe->event, (char *)pe->long_desc, (char *)pe->topic, (char *)pe->unit, (char *)pe->perpkg, - (char *)pe->metric_expr); + (char *)pe->metric_expr, + (char *)pe->metric_name); } out: @@ -995,6 +998,7 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, info->scale = 0.0; info->snapshot = false; info->metric_expr = NULL; + info->metric_name = NULL; list_for_each_entry_safe(term, h, head_terms, list) { alias = pmu_find_alias(pmu, term); @@ -1011,6 +1015,7 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, if (alias->per_pkg) info->per_pkg = true; info->metric_expr = alias->metric_expr; + info->metric_name = alias->metric_name; list_del(&term->list); free(term); @@ -1106,6 +1111,7 @@ struct sevent { char *str; char *pmu; char *metric_expr; + char *metric_name; }; static int cmp_sevent(const void *a, const void *b) @@ -1205,6 +1211,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, aliases[j].str = alias->str; aliases[j].pmu = pmu->name; aliases[j].metric_expr = alias->metric_expr; + aliases[j].metric_name = alias->metric_name; j++; } if (pmu->selectable && @@ -1241,6 +1248,8 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, printf("]\n"); if (verbose > 0) { printf("%*s%s/%s/ ", 8, "", aliases[j].pmu, aliases[j].str); + if (aliases[j].metric_name) + printf(" MetricName: %s", aliases[j].metric_name); if (aliases[j].metric_expr) printf(" MetricExpr: %s", aliases[j].metric_expr); putchar('\n'); diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 27f078ccc594..3d4b703f5d89 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -32,6 +32,7 @@ struct perf_pmu { struct perf_pmu_info { const char *unit; const char *metric_expr; + const char *metric_name; double scale; bool per_pkg; bool snapshot; @@ -52,6 +53,7 @@ struct perf_pmu_alias { bool per_pkg; bool snapshot; char *metric_expr; + char *metric_name; }; struct perf_pmu *perf_pmu__find(const char *name); diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index c323cce97d98..ac10cc675d39 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -803,7 +803,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, if (expr__parse(&ratio, &pctx, &p) == 0) print_metric(ctxp, NULL, "%8.1f", - out->force_header ? evsel->name : "", + evsel->metric_name ? + evsel->metric_name : + out->force_header ? evsel->name : "", ratio); else print_metric(ctxp, NULL, NULL, "", 0); -- cgit v1.2.3 From bf874fcf9f2fed58510dc83abcee388cee2b427e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 20 Mar 2017 13:17:11 -0700 Subject: perf list: Move extra details printing to new option Move the printing of perf expressions and internal events to a new clearer --details flag, instead of lumping it together with other debug options in --debug. This makes it clearer to use. Before perf list --debug ... unc_m_power_critical_throttle_cycles [Cycles all ranks are in critical thermal throttle. Unit: uncore_imc] uncore_imc_2/event=0x86/ MetricName: power_critical_throttle_cycles % MetricExpr: (unc_m_power_critical_throttle_cycles / unc_m_clockticks) * 100. after perf list --details ... unc_m_power_critical_throttle_cycles [Cycles all ranks are in critical thermal throttle. Unit: uncore_imc] uncore_imc_2/event=0x86/ MetricName: power_critical_throttle_cycles % MetricExpr: (unc_m_power_critical_throttle_cycles / unc_m_clockticks) * 100. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/20170320201711.14142-14-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-list.txt | 4 ++++ tools/perf/builtin-list.c | 14 ++++++++++---- tools/perf/util/parse-events.c | 5 +++-- tools/perf/util/parse-events.h | 2 +- tools/perf/util/pmu.c | 4 ++-- tools/perf/util/pmu.h | 2 +- 6 files changed, 21 insertions(+), 10 deletions(-) diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 41857cce5e86..143d98df2df9 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -24,6 +24,10 @@ Don't print descriptions. --long-desc:: Print longer event descriptions. +--details:: +Print how named events are resolved internally into perf events, and also +any extra expressions computed by perf stat. + [[EVENT_MODIFIERS]] EVENT MODIFIERS diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 3b9d98b5feef..be9195e95c78 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -18,6 +18,7 @@ #include static bool desc_flag = true; +static bool details_flag; int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) { @@ -30,6 +31,8 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) "Print extra event descriptions. --no-desc to not print."), OPT_BOOLEAN('v', "long-desc", &long_desc_flag, "Print longer event descriptions."), + OPT_BOOLEAN(0, "details", &details_flag, + "Print information on the perf event names and expressions used internally by events."), OPT_INCR(0, "debug", &verbose, "Enable debugging output"), OPT_END() @@ -50,7 +53,8 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) printf("\nList of pre-defined events (to be used in -e):\n\n"); if (argc == 0) { - print_events(NULL, raw_dump, !desc_flag, long_desc_flag); + print_events(NULL, raw_dump, !desc_flag, long_desc_flag, + details_flag); return 0; } @@ -72,7 +76,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) print_hwcache_events(NULL, raw_dump); else if (strcmp(argv[i], "pmu") == 0) print_pmu_events(NULL, raw_dump, !desc_flag, - long_desc_flag); + long_desc_flag, details_flag); else if (strcmp(argv[i], "sdt") == 0) print_sdt_events(NULL, NULL, raw_dump); else if ((sep = strchr(argv[i], ':')) != NULL) { @@ -80,7 +84,8 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) if (sep == NULL) { print_events(argv[i], raw_dump, !desc_flag, - long_desc_flag); + long_desc_flag, + details_flag); continue; } sep_idx = sep - argv[i]; @@ -103,7 +108,8 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump); print_hwcache_events(s, raw_dump); print_pmu_events(s, raw_dump, !desc_flag, - long_desc_flag); + long_desc_flag, + details_flag); print_tracepoint_events(NULL, s, raw_dump); print_sdt_events(NULL, s, raw_dump); free(s); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 119eb0b65876..6b498aea9fde 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2325,7 +2325,7 @@ out_enomem: * Print the help text for the event symbols: */ void print_events(const char *event_glob, bool name_only, bool quiet_flag, - bool long_desc) + bool long_desc, bool details_flag) { print_symbol_events(event_glob, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX, name_only); @@ -2335,7 +2335,8 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag, print_hwcache_events(event_glob, name_only); - print_pmu_events(event_glob, name_only, quiet_flag, long_desc); + print_pmu_events(event_glob, name_only, quiet_flag, long_desc, + details_flag); if (event_glob != NULL) return; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index f38086b8dbea..a235f4d6d5e5 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -184,7 +184,7 @@ void parse_events_evlist_error(struct parse_events_evlist *data, int idx, const char *str); void print_events(const char *event_glob, bool name_only, bool quiet, - bool long_desc); + bool long_desc, bool details_flag); struct event_symbol { const char *symbol; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index bcf752fa345b..362051ea7f3d 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1154,7 +1154,7 @@ static void wordwrap(char *s, int start, int max, int corr) } void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, - bool long_desc) + bool long_desc, bool details_flag) { struct perf_pmu *pmu; struct perf_pmu_alias *alias; @@ -1246,7 +1246,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, printf("%*s", 8, "["); wordwrap(aliases[j].desc, 8, columns, 0); printf("]\n"); - if (verbose > 0) { + if (details_flag) { printf("%*s%s/%s/ ", 8, "", aliases[j].pmu, aliases[j].str); if (aliases[j].metric_name) printf(" MetricName: %s", aliases[j].metric_name); diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 3d4b703f5d89..ea7f450dc609 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -80,7 +80,7 @@ int perf_pmu__format_parse(char *dir, struct list_head *head); struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu); void print_pmu_events(const char *event_glob, bool name_only, bool quiet, - bool long_desc); + bool long_desc, bool details_flag); bool pmu_have_event(const char *pname, const char *name); int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, -- cgit v1.2.3 From 39f0e7a825cfc971dc9ad40b0770c22f6f4f89b8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 24 Mar 2017 14:51:28 -0300 Subject: perf trace: Check for vfs_getname.pathname length It shouldn't be zero, but if the 'perf probe' on getname_flags() (or elsewhere in the future we need to probe to catch the pathname for syscalls like 'open' being copied from userspace to the kernel) is misplaced somehow, then we will end up not allocating space and trying to copy the "" empty string to ttrace->filename.name, causing a segfault, fix it. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-c4f1t6sx1nczuzop19r5si5s@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 912fedc5b42d..33c657c15d5e 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1656,6 +1656,8 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, goto out; filename_len = strlen(filename); + if (filename_len == 0) + goto out; if (ttrace->filename.namelen < filename_len) { char *f = realloc(ttrace->filename.name, filename_len + 1); -- cgit v1.2.3 From c04dfafa6033ca2eddc56fe188017d9ae50414c9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 24 Mar 2017 14:54:06 -0300 Subject: perf trace: Fix up error path indentation Trivial fix removing a tab in an error path. Link: http://lkml.kernel.org/n/tip-c14mk6cqaiby8gf5rpft3d9r@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 33c657c15d5e..2425605461fe 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1663,7 +1663,7 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, char *f = realloc(ttrace->filename.name, filename_len + 1); if (f == NULL) - goto out; + goto out; ttrace->filename.namelen = filename_len; ttrace->filename.name = f; -- cgit v1.2.3 From ef65e96e0762cb98d9abeb6737c721ca840f8092 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 24 Mar 2017 15:03:19 -0300 Subject: perf trace: Fixup thread refcounting In trace__vfs_getname() and when checking if a thread is filtered in trace__process_sample() we were not dropping the reference obtained via machine__findnew_thread(), fix it. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-9gc470phavxwxv5d9w7ck8ev@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 2425605461fe..60053d49539b 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1653,17 +1653,17 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, ttrace = thread__priv(thread); if (!ttrace) - goto out; + goto out_put; filename_len = strlen(filename); if (filename_len == 0) - goto out; + goto out_put; if (ttrace->filename.namelen < filename_len) { char *f = realloc(ttrace->filename.name, filename_len + 1); if (f == NULL) - goto out; + goto out_put; ttrace->filename.namelen = filename_len; ttrace->filename.name = f; @@ -1673,12 +1673,12 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, ttrace->filename.pending_open = true; if (!ttrace->filename.ptr) - goto out; + goto out_put; entry_str_len = strlen(ttrace->entry_str); remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */ if (remaining_space <= 0) - goto out; + goto out_put; if (filename_len > (size_t)remaining_space) { filename += filename_len - remaining_space; @@ -1692,6 +1692,8 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, ttrace->filename.ptr = 0; ttrace->filename.entry_str_pos = 0; +out_put: + thread__put(thread); out: return 0; } @@ -1712,6 +1714,7 @@ static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evs ttrace->runtime_ms += runtime_ms; trace->runtime_ms += runtime_ms; +out_put: thread__put(thread); return 0; @@ -1722,8 +1725,7 @@ out_dump: (pid_t)perf_evsel__intval(evsel, sample, "pid"), runtime, perf_evsel__intval(evsel, sample, "vruntime")); - thread__put(thread); - return 0; + goto out_put; } static void bpf_output__printer(enum binary_printer_ops op, @@ -1922,7 +1924,7 @@ static int trace__process_sample(struct perf_tool *tool, thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); if (thread && thread__is_filtered(thread)) - return 0; + goto out; trace__set_base_time(trace, evsel, sample); @@ -1930,7 +1932,8 @@ static int trace__process_sample(struct perf_tool *tool, ++trace->nr_events; handler(trace, evsel, event, sample); } - +out: + thread__put(thread); return err; } -- cgit v1.2.3 From c3a0bbc7ad7598dec5a204868bdf8a2b1b51df14 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 24 Mar 2017 14:15:52 +0200 Subject: perf auxtrace: Fix no_size logic in addr_filter__resolve_kernel_syms() Address filtering with kernel symbols incorrectly resulted in the error "Cannot determine size of symbol" because the no_size logic was the wrong way around. Signed-off-by: Adrian Hunter Tested-by: Andi Kleen Cc: stable@vger.kernel.org # v4.9+ Link: http://lkml.kernel.org/r/1490357752-27942-1-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/auxtrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index c5a6e0b12452..78bd632f144d 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1826,7 +1826,7 @@ static int addr_filter__resolve_kernel_syms(struct addr_filter *filt) filt->addr = start; if (filt->range && !filt->size && !filt->sym_to) { filt->size = size; - no_size = !!size; + no_size = !size; } } @@ -1840,7 +1840,7 @@ static int addr_filter__resolve_kernel_syms(struct addr_filter *filt) if (err) return err; filt->size = start + size - filt->addr; - no_size = !!size; + no_size = !size; } /* The very last symbol in kallsyms does not imply a particular size */ -- cgit v1.2.3 From 6963d3c387ec123753788838785e68928cb87c1b Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Mon, 27 Mar 2017 08:25:38 +0530 Subject: perf list sdt: Show option in man page Commit 40218daea1db ("perf list: Show SDT and pre-cached events") added sdt support in perf list, but it missed to update documentation. Show sdt option in man perf-list. Signed-off-by: Ravi Bangoria Acked-by: Masami Hiramatsu Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/20170327025538.1753-1-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-list.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 143d98df2df9..f709de54707b 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -8,7 +8,7 @@ perf-list - List all symbolic event types SYNOPSIS -------- [verse] -'perf list' [--no-desc] [--long-desc] [hw|sw|cache|tracepoint|pmu|event_glob] +'perf list' [--no-desc] [--long-desc] [hw|sw|cache|tracepoint|pmu|sdt|event_glob] DESCRIPTION ----------- @@ -244,6 +244,8 @@ To limit the list use: . 'pmu' to print the kernel supplied PMU events. +. 'sdt' to list all Statically Defined Tracepoint events. + . If none of the above is matched, it will apply the supplied glob to all events, printing the ones that match. -- cgit v1.2.3 From b0ad8ea66445d64a469df0c710947f4cdb8ef16b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 27 Mar 2017 11:47:20 -0300 Subject: perf tools: Remove unused 'prefix' from builtin functions We got it from the git sources but never used it for anything, with the place where this would be somehow used remaining: static int run_builtin(struct cmd_struct *p, int argc, const char **argv) { prefix = NULL; if (p->option & RUN_SETUP) prefix = NULL; /* setup_perf_directory(); */ Ditch it. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-uw5swz05vol0qpr32c5lpvus@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/bench.h | 20 ++++++------ tools/perf/bench/futex-hash.c | 3 +- tools/perf/bench/futex-lock-pi.c | 3 +- tools/perf/bench/futex-requeue.c | 3 +- tools/perf/bench/futex-wake-parallel.c | 3 +- tools/perf/bench/futex-wake.c | 3 +- tools/perf/bench/mem-functions.c | 4 +-- tools/perf/bench/numa.c | 2 +- tools/perf/bench/sched-messaging.c | 3 +- tools/perf/bench/sched-pipe.c | 2 +- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-bench.c | 12 +++---- tools/perf/builtin-buildid-cache.c | 3 +- tools/perf/builtin-buildid-list.c | 3 +- tools/perf/builtin-c2c.c | 4 +-- tools/perf/builtin-config.c | 2 +- tools/perf/builtin-data.c | 9 +++--- tools/perf/builtin-diff.c | 2 +- tools/perf/builtin-evlist.c | 2 +- tools/perf/builtin-ftrace.c | 2 +- tools/perf/builtin-help.c | 2 +- tools/perf/builtin-inject.c | 2 +- tools/perf/builtin-kallsyms.c | 2 +- tools/perf/builtin-kmem.c | 4 +-- tools/perf/builtin-kvm.c | 16 +++++----- tools/perf/builtin-list.c | 2 +- tools/perf/builtin-lock.c | 6 ++-- tools/perf/builtin-mem.c | 6 ++-- tools/perf/builtin-probe.c | 6 ++-- tools/perf/builtin-record.c | 2 +- tools/perf/builtin-report.c | 2 +- tools/perf/builtin-sched.c | 6 ++-- tools/perf/builtin-script.c | 4 +-- tools/perf/builtin-stat.c | 2 +- tools/perf/builtin-timechart.c | 7 ++-- tools/perf/builtin-top.c | 2 +- tools/perf/builtin-trace.c | 4 +-- tools/perf/builtin-version.c | 3 +- tools/perf/builtin.h | 58 +++++++++++++++++----------------- tools/perf/perf.c | 11 ++----- tools/perf/tests/builtin-test.c | 2 +- 41 files changed, 110 insertions(+), 126 deletions(-) diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 579a592990dd..842ab2781cdc 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -25,17 +25,17 @@ # endif #endif -int bench_numa(int argc, const char **argv, const char *prefix); -int bench_sched_messaging(int argc, const char **argv, const char *prefix); -int bench_sched_pipe(int argc, const char **argv, const char *prefix); -int bench_mem_memcpy(int argc, const char **argv, const char *prefix); -int bench_mem_memset(int argc, const char **argv, const char *prefix); -int bench_futex_hash(int argc, const char **argv, const char *prefix); -int bench_futex_wake(int argc, const char **argv, const char *prefix); -int bench_futex_wake_parallel(int argc, const char **argv, const char *prefix); -int bench_futex_requeue(int argc, const char **argv, const char *prefix); +int bench_numa(int argc, const char **argv); +int bench_sched_messaging(int argc, const char **argv); +int bench_sched_pipe(int argc, const char **argv); +int bench_mem_memcpy(int argc, const char **argv); +int bench_mem_memset(int argc, const char **argv); +int bench_futex_hash(int argc, const char **argv); +int bench_futex_wake(int argc, const char **argv); +int bench_futex_wake_parallel(int argc, const char **argv); +int bench_futex_requeue(int argc, const char **argv); /* pi futexes */ -int bench_futex_lock_pi(int argc, const char **argv, const char *prefix); +int bench_futex_lock_pi(int argc, const char **argv); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 2499e1b0c6fb..fe16b310097f 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -114,8 +114,7 @@ static void print_summary(void) (int) runtime.tv_sec); } -int bench_futex_hash(int argc, const char **argv, - const char *prefix __maybe_unused) +int bench_futex_hash(int argc, const char **argv) { int ret = 0; cpu_set_t cpu; diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index a20814d94af1..73a1c44ea63c 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -140,8 +140,7 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr) } } -int bench_futex_lock_pi(int argc, const char **argv, - const char *prefix __maybe_unused) +int bench_futex_lock_pi(int argc, const char **argv) { int ret = 0; unsigned int i; diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index 9fad1e4fcd3e..41786cbea24c 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -109,8 +109,7 @@ static void toggle_done(int sig __maybe_unused, done = true; } -int bench_futex_requeue(int argc, const char **argv, - const char *prefix __maybe_unused) +int bench_futex_requeue(int argc, const char **argv) { int ret = 0; unsigned int i, j; diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index 40f5fcf1d120..4ab12c8e016a 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -197,8 +197,7 @@ static void toggle_done(int sig __maybe_unused, done = true; } -int bench_futex_wake_parallel(int argc, const char **argv, - const char *prefix __maybe_unused) +int bench_futex_wake_parallel(int argc, const char **argv) { int ret = 0; unsigned int i, j; diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index 789490281ae3..2fa49222ef8d 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -115,8 +115,7 @@ static void toggle_done(int sig __maybe_unused, done = true; } -int bench_futex_wake(int argc, const char **argv, - const char *prefix __maybe_unused) +int bench_futex_wake(int argc, const char **argv) { int ret = 0; unsigned int i, j; diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c index 52504a83b5a1..d1dea33dcfcf 100644 --- a/tools/perf/bench/mem-functions.c +++ b/tools/perf/bench/mem-functions.c @@ -284,7 +284,7 @@ static const char * const bench_mem_memcpy_usage[] = { NULL }; -int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused) +int bench_mem_memcpy(int argc, const char **argv) { struct bench_mem_info info = { .functions = memcpy_functions, @@ -358,7 +358,7 @@ static const struct function memset_functions[] = { { .name = NULL, } }; -int bench_mem_memset(int argc, const char **argv, const char *prefix __maybe_unused) +int bench_mem_memset(int argc, const char **argv) { struct bench_mem_info info = { .functions = memset_functions, diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 6bd0581de298..1fe43bd5a012 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -1767,7 +1767,7 @@ static int bench_all(void) return 0; } -int bench_numa(int argc, const char **argv, const char *prefix __maybe_unused) +int bench_numa(int argc, const char **argv) { init_params(&p0, "main,", argc, argv); argc = parse_options(argc, argv, options, bench_numa_usage, 0); diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c index 6a111e775210..4f961e74535b 100644 --- a/tools/perf/bench/sched-messaging.c +++ b/tools/perf/bench/sched-messaging.c @@ -260,8 +260,7 @@ static const char * const bench_sched_message_usage[] = { NULL }; -int bench_sched_messaging(int argc, const char **argv, - const char *prefix __maybe_unused) +int bench_sched_messaging(int argc, const char **argv) { unsigned int i, total_children; struct timeval start, stop, diff; diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c index 2243f0150d76..a152737370c5 100644 --- a/tools/perf/bench/sched-pipe.c +++ b/tools/perf/bench/sched-pipe.c @@ -76,7 +76,7 @@ static void *worker_thread(void *__tdata) return NULL; } -int bench_sched_pipe(int argc, const char **argv, const char *prefix __maybe_unused) +int bench_sched_pipe(int argc, const char **argv) { struct thread_data threads[2], *td; int pipe_1[2], pipe_2[2]; diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index e54b1f9fe1ee..56a7c8d210b9 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -383,7 +383,7 @@ static const char * const annotate_usage[] = { NULL }; -int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_annotate(int argc, const char **argv) { struct perf_annotate annotate = { .tool = { diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index a1cddc6bbf0f..445e62881254 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -25,7 +25,7 @@ #include #include -typedef int (*bench_fn_t)(int argc, const char **argv, const char *prefix); +typedef int (*bench_fn_t)(int argc, const char **argv); struct bench { const char *name; @@ -155,7 +155,7 @@ static int bench_str2int(const char *str) * to something meaningful: */ static int run_bench(const char *coll_name, const char *bench_name, bench_fn_t fn, - int argc, const char **argv, const char *prefix) + int argc, const char **argv) { int size; char *name; @@ -171,7 +171,7 @@ static int run_bench(const char *coll_name, const char *bench_name, bench_fn_t f prctl(PR_SET_NAME, name); argv[0] = name; - ret = fn(argc, argv, prefix); + ret = fn(argc, argv); free(name); @@ -198,7 +198,7 @@ static void run_collection(struct collection *coll) fflush(stdout); argv[1] = bench->name; - run_bench(coll->name, bench->name, bench->fn, 1, argv, NULL); + run_bench(coll->name, bench->name, bench->fn, 1, argv); printf("\n"); } } @@ -211,7 +211,7 @@ static void run_all_collections(void) run_collection(coll); } -int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_bench(int argc, const char **argv) { struct collection *coll; int ret = 0; @@ -270,7 +270,7 @@ int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused) if (bench_format == BENCH_FORMAT_DEFAULT) printf("# Running '%s/%s' benchmark:\n", coll->name, bench->name); fflush(stdout); - ret = run_bench(coll->name, bench->name, bench->fn, argc-1, argv+1, prefix); + ret = run_bench(coll->name, bench->name, bench->fn, argc-1, argv+1); goto end; } diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 30e2b2cb2421..94b55eee0d9b 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -276,8 +276,7 @@ static int build_id_cache__update_file(const char *filename) return err; } -int cmd_buildid_cache(int argc, const char **argv, - const char *prefix __maybe_unused) +int cmd_buildid_cache(int argc, const char **argv) { struct strlist *list; struct str_node *pos; diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index 5e914ee79eb3..26f4e608207f 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -87,8 +87,7 @@ out: return 0; } -int cmd_buildid_list(int argc, const char **argv, - const char *prefix __maybe_unused) +int cmd_buildid_list(int argc, const char **argv) { bool show_kernel = false; bool with_hits = false; diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 5cd6d7a047b9..70c2c773a2b8 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2755,12 +2755,12 @@ static int perf_c2c__record(int argc, const char **argv) pr_debug("\n"); } - ret = cmd_record(i, rec_argv, NULL); + ret = cmd_record(i, rec_argv); free(rec_argv); return ret; } -int cmd_c2c(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_c2c(int argc, const char **argv) { argc = parse_options(argc, argv, c2c_options, c2c_usage, PARSE_OPT_STOP_AT_NON_OPTION); diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c index 8c0d93b7c2f0..55f04f85b049 100644 --- a/tools/perf/builtin-config.c +++ b/tools/perf/builtin-config.c @@ -154,7 +154,7 @@ static int parse_config_arg(char *arg, char **var, char **value) return 0; } -int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_config(int argc, const char **argv) { int i, ret = 0; struct perf_config_set *set; diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c index 7ad6e17ac6b3..0adb5f82335a 100644 --- a/tools/perf/builtin-data.c +++ b/tools/perf/builtin-data.c @@ -6,7 +6,7 @@ #include "data-convert.h" #include "data-convert-bt.h" -typedef int (*data_cmd_fn_t)(int argc, const char **argv, const char *prefix); +typedef int (*data_cmd_fn_t)(int argc, const char **argv); struct data_cmd { const char *name; @@ -50,8 +50,7 @@ static const char * const data_convert_usage[] = { NULL }; -static int cmd_data_convert(int argc, const char **argv, - const char *prefix __maybe_unused) +static int cmd_data_convert(int argc, const char **argv) { const char *to_ctf = NULL; struct perf_data_convert_opts opts = { @@ -98,7 +97,7 @@ static struct data_cmd data_cmds[] = { { .name = NULL, }, }; -int cmd_data(int argc, const char **argv, const char *prefix) +int cmd_data(int argc, const char **argv) { struct data_cmd *cmd; const char *cmdstr; @@ -118,7 +117,7 @@ int cmd_data(int argc, const char **argv, const char *prefix) if (strcmp(cmd->name, cmdstr)) continue; - return cmd->fn(argc, argv, prefix); + return cmd->fn(argc, argv); } pr_err("Unknown command: %s\n", cmdstr); diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 5e4803158672..cd2605d86984 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -1321,7 +1321,7 @@ static int diff__config(const char *var, const char *value, return 0; } -int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_diff(int argc, const char **argv) { int ret = hists__init(); diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c index e09c4287fe87..6d210e40d611 100644 --- a/tools/perf/builtin-evlist.c +++ b/tools/perf/builtin-evlist.c @@ -46,7 +46,7 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details return 0; } -int cmd_evlist(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_evlist(int argc, const char **argv) { struct perf_attr_details details = { .verbose = false, }; const struct option options[] = { diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 6087295f8827..f80fb60b00b0 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -304,7 +304,7 @@ static int perf_ftrace_config(const char *var, const char *value, void *cb) return -1; } -int cmd_ftrace(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_ftrace(int argc, const char **argv) { int ret; struct perf_ftrace ftrace = { diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index aed0d844e8c2..7ae238929e95 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -418,7 +418,7 @@ static int show_html_page(const char *perf_cmd) return 0; } -int cmd_help(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_help(int argc, const char **argv) { bool show_all = false; enum help_format help_format = HELP_FORMAT_MAN; diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 8d1d13b9bab6..42dff0b1375a 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -738,7 +738,7 @@ static int __cmd_inject(struct perf_inject *inject) return ret; } -int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_inject(int argc, const char **argv) { struct perf_inject inject = { .tool = { diff --git a/tools/perf/builtin-kallsyms.c b/tools/perf/builtin-kallsyms.c index 224bfc454b4a..8ff38c4eb2c0 100644 --- a/tools/perf/builtin-kallsyms.c +++ b/tools/perf/builtin-kallsyms.c @@ -43,7 +43,7 @@ static int __cmd_kallsyms(int argc, const char **argv) return 0; } -int cmd_kallsyms(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_kallsyms(int argc, const char **argv) { const struct option options[] = { OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index d509e74bc6e8..515587825af4 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -1866,7 +1866,7 @@ static int __cmd_record(int argc, const char **argv) for (j = 1; j < (unsigned int)argc; j++, i++) rec_argv[i] = argv[j]; - return cmd_record(i, rec_argv, NULL); + return cmd_record(i, rec_argv); } static int kmem_config(const char *var, const char *value, void *cb __maybe_unused) @@ -1885,7 +1885,7 @@ static int kmem_config(const char *var, const char *value, void *cb __maybe_unus return 0; } -int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_kmem(int argc, const char **argv) { const char * const default_slab_sort = "frag,hit,bytes"; const char * const default_page_sort = "bytes,hit"; diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 18e6c38864bc..38b409173693 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1209,7 +1209,7 @@ kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv) set_option_flag(record_options, 0, "transaction", PARSE_OPT_DISABLED); record_usage = kvm_stat_record_usage; - return cmd_record(i, rec_argv, NULL); + return cmd_record(i, rec_argv); } static int @@ -1477,7 +1477,7 @@ static int kvm_cmd_stat(const char *file_name, int argc, const char **argv) #endif perf_stat: - return cmd_stat(argc, argv, NULL); + return cmd_stat(argc, argv); } #endif /* HAVE_KVM_STAT_SUPPORT */ @@ -1496,7 +1496,7 @@ static int __cmd_record(const char *file_name, int argc, const char **argv) BUG_ON(i != rec_argc); - return cmd_record(i, rec_argv, NULL); + return cmd_record(i, rec_argv); } static int __cmd_report(const char *file_name, int argc, const char **argv) @@ -1514,7 +1514,7 @@ static int __cmd_report(const char *file_name, int argc, const char **argv) BUG_ON(i != rec_argc); - return cmd_report(i, rec_argv, NULL); + return cmd_report(i, rec_argv); } static int @@ -1533,10 +1533,10 @@ __cmd_buildid_list(const char *file_name, int argc, const char **argv) BUG_ON(i != rec_argc); - return cmd_buildid_list(i, rec_argv, NULL); + return cmd_buildid_list(i, rec_argv); } -int cmd_kvm(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_kvm(int argc, const char **argv) { const char *file_name = NULL; const struct option kvm_options[] = { @@ -1591,9 +1591,9 @@ int cmd_kvm(int argc, const char **argv, const char *prefix __maybe_unused) else if (!strncmp(argv[0], "rep", 3)) return __cmd_report(file_name, argc, argv); else if (!strncmp(argv[0], "diff", 4)) - return cmd_diff(argc, argv, NULL); + return cmd_diff(argc, argv); else if (!strncmp(argv[0], "top", 3)) - return cmd_top(argc, argv, NULL); + return cmd_top(argc, argv); else if (!strncmp(argv[0], "buildid-list", 12)) return __cmd_buildid_list(file_name, argc, argv); #ifdef HAVE_KVM_STAT_SUPPORT diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index be9195e95c78..4bf2cb4d25aa 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -20,7 +20,7 @@ static bool desc_flag = true; static bool details_flag; -int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_list(int argc, const char **argv) { int i; bool raw_dump = false; diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index e992e7206993..b686fb6759da 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -941,12 +941,12 @@ static int __cmd_record(int argc, const char **argv) BUG_ON(i != rec_argc); - ret = cmd_record(i, rec_argv, NULL); + ret = cmd_record(i, rec_argv); free(rec_argv); return ret; } -int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_lock(int argc, const char **argv) { const struct option lock_options[] = { OPT_STRING('i', "input", &input_name, "file", "input file name"), @@ -1009,7 +1009,7 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused) rc = __cmd_report(false); } else if (!strcmp(argv[0], "script")) { /* Aliased to 'perf script' */ - return cmd_script(argc, argv, prefix); + return cmd_script(argc, argv); } else if (!strcmp(argv[0], "info")) { if (argc) { argc = parse_options(argc, argv, diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 030a6cfdda59..643f4faac0d0 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -129,7 +129,7 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) pr_debug("\n"); } - ret = cmd_record(i, rec_argv, NULL); + ret = cmd_record(i, rec_argv); free(rec_argv); return ret; } @@ -256,7 +256,7 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem) for (j = 1; j < argc; j++, i++) rep_argv[i] = argv[j]; - ret = cmd_report(i, rep_argv, NULL); + ret = cmd_report(i, rep_argv); free(rep_argv); return ret; } @@ -330,7 +330,7 @@ error: return ret; } -int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_mem(int argc, const char **argv) { struct stat st; struct perf_mem mem = { diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 51cdc230f6ca..d7360c2bda13 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -468,7 +468,7 @@ out: static int -__cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) +__cmd_probe(int argc, const char **argv) { const char * const probe_usage[] = { "perf probe [] 'PROBEDEF' ['PROBEDEF' ...]", @@ -687,13 +687,13 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) return 0; } -int cmd_probe(int argc, const char **argv, const char *prefix) +int cmd_probe(int argc, const char **argv) { int ret; ret = init_params(); if (!ret) { - ret = __cmd_probe(argc, argv, prefix); + ret = __cmd_probe(argc, argv); cleanup_params(); } diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 04faef79a548..3191ab063852 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1667,7 +1667,7 @@ static struct option __record_options[] = { struct option *record_options = __record_options; -int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_record(int argc, const char **argv) { int err; struct record *rec = &record; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 5ab8117c3bfd..3c8885a1c452 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -681,7 +681,7 @@ const char report_callchain_help[] = "Display call graph (stack chain/backtrace) CALLCHAIN_REPORT_HELP "\n\t\t\t\tDefault: " CALLCHAIN_DEFAULT_OPT; -int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_report(int argc, const char **argv) { struct perf_session *session; struct itrace_synth_opts itrace_synth_opts = { .set = 0, }; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index b92c4d97192c..79833e226789 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -3272,10 +3272,10 @@ static int __cmd_record(int argc, const char **argv) BUG_ON(i != rec_argc); - return cmd_record(i, rec_argv, NULL); + return cmd_record(i, rec_argv); } -int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_sched(int argc, const char **argv) { const char default_sort_order[] = "avg, max, switch, runtime"; struct perf_sched sched = { @@ -3412,7 +3412,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) * Aliased to 'perf script' for now: */ if (!strcmp(argv[0], "script")) - return cmd_script(argc, argv, prefix); + return cmd_script(argc, argv); if (!strncmp(argv[0], "rec", 3)) { return __cmd_record(argc, argv); diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index c98e16689b57..46acc8ece41f 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2359,7 +2359,7 @@ int process_cpu_map_event(struct perf_tool *tool __maybe_unused, return set_maps(script); } -int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_script(int argc, const char **argv) { bool show_full_info = false; bool header = false; @@ -2504,7 +2504,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) { rec_script_path = get_script_path(argv[1], RECORD_SUFFIX); if (!rec_script_path) - return cmd_record(argc, argv, NULL); + return cmd_record(argc, argv); } if (argc > 1 && !strncmp(argv[0], "rep", strlen("rep"))) { diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 01b589e3c3a6..2158ea14da57 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2478,7 +2478,7 @@ static void setup_system_wide(int forks) } } -int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_stat(int argc, const char **argv) { const char * const stat_usage[] = { "perf stat [] []", diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index fbd7c6c695b8..fafdb44b8bcb 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -1773,7 +1773,7 @@ static int timechart__io_record(int argc, const char **argv) for (i = 0; i < (unsigned int)argc; i++) *p++ = argv[i]; - return cmd_record(rec_argc, rec_argv, NULL); + return cmd_record(rec_argc, rec_argv); } @@ -1864,7 +1864,7 @@ static int timechart__record(struct timechart *tchart, int argc, const char **ar for (j = 0; j < (unsigned int)argc; j++) *p++ = argv[j]; - return cmd_record(rec_argc, rec_argv, NULL); + return cmd_record(rec_argc, rec_argv); } static int @@ -1917,8 +1917,7 @@ parse_time(const struct option *opt, const char *arg, int __maybe_unused unset) return 0; } -int cmd_timechart(int argc, const char **argv, - const char *prefix __maybe_unused) +int cmd_timechart(int argc, const char **argv) { struct timechart tchart = { .tool = { diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index ab9077915763..a0c97c70ec81 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1075,7 +1075,7 @@ parse_percent_limit(const struct option *opt, const char *arg, const char top_callchain_help[] = CALLCHAIN_RECORD_HELP CALLCHAIN_REPORT_HELP "\n\t\t\t\tDefault: fp,graph,0.5,caller,function"; -int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_top(int argc, const char **argv) { char errbuf[BUFSIZ]; struct perf_top top = { diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 60053d49539b..c88f9f215e6f 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1993,7 +1993,7 @@ static int trace__record(struct trace *trace, int argc, const char **argv) for (i = 0; i < (unsigned int)argc; i++) rec_argv[j++] = argv[i]; - return cmd_record(j, rec_argv, NULL); + return cmd_record(j, rec_argv); } static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); @@ -2791,7 +2791,7 @@ out: return err; } -int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_trace(int argc, const char **argv) { const char *trace_usage[] = { "perf trace [] []", diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c index 9b10cda6b6dc..b9a095b1db99 100644 --- a/tools/perf/builtin-version.c +++ b/tools/perf/builtin-version.c @@ -2,8 +2,7 @@ #include "builtin.h" #include "perf.h" -int cmd_version(int argc __maybe_unused, const char **argv __maybe_unused, - const char *prefix __maybe_unused) +int cmd_version(int argc __maybe_unused, const char **argv __maybe_unused) { printf("perf version %s\n", perf_version_string); return 0; diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 036e1e35b1a8..26669bf9129c 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -13,35 +13,35 @@ void prune_packed_objects(int); int read_line_with_nul(char *buf, int size, FILE *file); int check_pager_config(const char *cmd); -int cmd_annotate(int argc, const char **argv, const char *prefix); -int cmd_bench(int argc, const char **argv, const char *prefix); -int cmd_buildid_cache(int argc, const char **argv, const char *prefix); -int cmd_buildid_list(int argc, const char **argv, const char *prefix); -int cmd_config(int argc, const char **argv, const char *prefix); -int cmd_c2c(int argc, const char **argv, const char *prefix); -int cmd_diff(int argc, const char **argv, const char *prefix); -int cmd_evlist(int argc, const char **argv, const char *prefix); -int cmd_help(int argc, const char **argv, const char *prefix); -int cmd_sched(int argc, const char **argv, const char *prefix); -int cmd_kallsyms(int argc, const char **argv, const char *prefix); -int cmd_list(int argc, const char **argv, const char *prefix); -int cmd_record(int argc, const char **argv, const char *prefix); -int cmd_report(int argc, const char **argv, const char *prefix); -int cmd_stat(int argc, const char **argv, const char *prefix); -int cmd_timechart(int argc, const char **argv, const char *prefix); -int cmd_top(int argc, const char **argv, const char *prefix); -int cmd_script(int argc, const char **argv, const char *prefix); -int cmd_version(int argc, const char **argv, const char *prefix); -int cmd_probe(int argc, const char **argv, const char *prefix); -int cmd_kmem(int argc, const char **argv, const char *prefix); -int cmd_lock(int argc, const char **argv, const char *prefix); -int cmd_kvm(int argc, const char **argv, const char *prefix); -int cmd_test(int argc, const char **argv, const char *prefix); -int cmd_trace(int argc, const char **argv, const char *prefix); -int cmd_inject(int argc, const char **argv, const char *prefix); -int cmd_mem(int argc, const char **argv, const char *prefix); -int cmd_data(int argc, const char **argv, const char *prefix); -int cmd_ftrace(int argc, const char **argv, const char *prefix); +int cmd_annotate(int argc, const char **argv); +int cmd_bench(int argc, const char **argv); +int cmd_buildid_cache(int argc, const char **argv); +int cmd_buildid_list(int argc, const char **argv); +int cmd_config(int argc, const char **argv); +int cmd_c2c(int argc, const char **argv); +int cmd_diff(int argc, const char **argv); +int cmd_evlist(int argc, const char **argv); +int cmd_help(int argc, const char **argv); +int cmd_sched(int argc, const char **argv); +int cmd_kallsyms(int argc, const char **argv); +int cmd_list(int argc, const char **argv); +int cmd_record(int argc, const char **argv); +int cmd_report(int argc, const char **argv); +int cmd_stat(int argc, const char **argv); +int cmd_timechart(int argc, const char **argv); +int cmd_top(int argc, const char **argv); +int cmd_script(int argc, const char **argv); +int cmd_version(int argc, const char **argv); +int cmd_probe(int argc, const char **argv); +int cmd_kmem(int argc, const char **argv); +int cmd_lock(int argc, const char **argv); +int cmd_kvm(int argc, const char **argv); +int cmd_test(int argc, const char **argv); +int cmd_trace(int argc, const char **argv); +int cmd_inject(int argc, const char **argv); +int cmd_mem(int argc, const char **argv); +int cmd_data(int argc, const char **argv); +int cmd_ftrace(int argc, const char **argv); int find_scripts(char **scripts_array, char **scripts_path_array); #endif diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 6d5479e03e0d..4b283d18e158 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -34,7 +34,7 @@ const char *input_name; struct cmd_struct { const char *cmd; - int (*fn)(int, const char **, const char *); + int (*fn)(int, const char **); int option; }; @@ -339,13 +339,8 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) { int status; struct stat st; - const char *prefix; char sbuf[STRERR_BUFSIZE]; - prefix = NULL; - if (p->option & RUN_SETUP) - prefix = NULL; /* setup_perf_directory(); */ - if (use_browser == -1) use_browser = check_browser_config(p->cmd); @@ -356,7 +351,7 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) commit_pager_choice(); perf_env__set_cmdline(&perf_env, argc, argv); - status = p->fn(argc, argv, prefix); + status = p->fn(argc, argv); perf_config__exit(); exit_browser(status); perf_env__exit(&perf_env); @@ -566,7 +561,7 @@ int main(int argc, const char **argv) #ifdef HAVE_LIBAUDIT_SUPPORT setup_path(); argv[0] = "trace"; - return cmd_trace(argc, argv, NULL); + return cmd_trace(argc, argv); #else fprintf(stderr, "trace command not available: missing audit-libs devel package at build time.\n"); diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 86822969e8a8..e6d7876c94c2 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -464,7 +464,7 @@ static int perf_test__list(int argc, const char **argv) return 0; } -int cmd_test(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_test(int argc, const char **argv) { const char *test_usage[] = { "perf test [] [{list |[|]}]", -- cgit v1.2.3 From 5580338d0f207921bc1fef5b668cd564adcc3419 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Sun, 26 Mar 2017 04:34:25 +0800 Subject: perf report: Refactor common code in srcline.c Introduce dso__name() and filename_split() out of existing code because these codes will be used in several places in next patch. For filename_split(), it may also solve a potential memory leak in existing code. In existing addr2line(), sep = strchr(filename, ':'); if (sep) { *sep++ = '\0'; *file = filename; *line_nr = strtoul(sep, NULL, 0); ret = 1; } out: pclose(fp); return ret; If sep is NULL, filename is not freed or returned via file. Signed-off-by: Yao Jin Tested-by: Milian Wolff Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1490474069-15823-2-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/srcline.c | 68 +++++++++++++++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 23 deletions(-) diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index b4db3f48e3b0..2953c9fecb30 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -12,6 +12,24 @@ bool srcline_full_filename; +static const char *dso__name(struct dso *dso) +{ + const char *dso_name; + + if (dso->symsrc_filename) + dso_name = dso->symsrc_filename; + else + dso_name = dso->long_name; + + if (dso_name[0] == '[') + return NULL; + + if (!strncmp(dso_name, "/tmp/perf-", 10)) + return NULL; + + return dso_name; +} + #ifdef HAVE_LIBBFD_SUPPORT /* @@ -207,6 +225,27 @@ void dso__free_a2l(struct dso *dso) #else /* HAVE_LIBBFD_SUPPORT */ +static int filename_split(char *filename, unsigned int *line_nr) +{ + char *sep; + + sep = strchr(filename, '\n'); + if (sep) + *sep = '\0'; + + if (!strcmp(filename, "??:0")) + return 0; + + sep = strchr(filename, ':'); + if (sep) { + *sep++ = '\0'; + *line_nr = strtoul(sep, NULL, 0); + return 1; + } + + return 0; +} + static int addr2line(const char *dso_name, u64 addr, char **file, unsigned int *line_nr, struct dso *dso __maybe_unused, @@ -216,7 +255,6 @@ static int addr2line(const char *dso_name, u64 addr, char cmd[PATH_MAX]; char *filename = NULL; size_t len; - char *sep; int ret = 0; scnprintf(cmd, sizeof(cmd), "addr2line -e %s %016"PRIx64, @@ -233,23 +271,14 @@ static int addr2line(const char *dso_name, u64 addr, goto out; } - sep = strchr(filename, '\n'); - if (sep) - *sep = '\0'; - - if (!strcmp(filename, "??:0")) { - pr_debug("no debugging info in %s\n", dso_name); + ret = filename_split(filename, line_nr); + if (ret != 1) { free(filename); goto out; } - sep = strchr(filename, ':'); - if (sep) { - *sep++ = '\0'; - *file = filename; - *line_nr = strtoul(sep, NULL, 0); - ret = 1; - } + *file = filename; + out: pclose(fp); return ret; @@ -278,15 +307,8 @@ char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, if (!dso->has_srcline) goto out; - if (dso->symsrc_filename) - dso_name = dso->symsrc_filename; - else - dso_name = dso->long_name; - - if (dso_name[0] == '[') - goto out; - - if (!strncmp(dso_name, "/tmp/perf-", 10)) + dso_name = dso__name(dso); + if (dso_name == NULL) goto out; if (!addr2line(dso_name, addr, &file, &line, dso, unwind_inlines)) -- cgit v1.2.3 From a64489c56c307bf0955f0489158c5ecf6aa10fe2 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Sun, 26 Mar 2017 04:34:26 +0800 Subject: perf report: Find the inline stack for a given address It would be useful for perf to support a mode to query the inline stack for a given callgraph address. This would simplify finding the right code in code that does a lot of inlining. The srcline.c has contained the code which supports to translate the address to filename:line_nr. This patch just extends the function to let it support getting the inline stacks. It introduces the inline_list which will store the inline function result (filename:line_nr and funcname). If BFD lib is not supported, the result is only filename:line_nr. Signed-off-by: Yao Jin Tested-by: Milian Wolff Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1490474069-15823-3-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/srcline.c | 167 +++++++++++++++++++++++++++++++++++++-- tools/perf/util/symbol-elf.c | 5 ++ tools/perf/util/symbol-minimal.c | 7 ++ tools/perf/util/symbol.h | 2 + tools/perf/util/util.h | 16 ++++ 5 files changed, 192 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index 2953c9fecb30..3ce28f702b36 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -7,6 +7,7 @@ #include "util/dso.h" #include "util/util.h" #include "util/debug.h" +#include "util/callchain.h" #include "symbol.h" @@ -30,6 +31,34 @@ static const char *dso__name(struct dso *dso) return dso_name; } +static int inline_list__append(char *filename, char *funcname, int line_nr, + struct inline_node *node, struct dso *dso) +{ + struct inline_list *ilist; + char *demangled; + + ilist = zalloc(sizeof(*ilist)); + if (ilist == NULL) + return -1; + + ilist->filename = filename; + ilist->line_nr = line_nr; + + if (dso != NULL) { + demangled = dso__demangle_sym(dso, 0, funcname); + if (demangled == NULL) { + ilist->funcname = funcname; + } else { + ilist->funcname = demangled; + free(funcname); + } + } + + list_add_tail(&ilist->list, &node->val); + + return 0; +} + #ifdef HAVE_LIBBFD_SUPPORT /* @@ -169,9 +198,17 @@ static void addr2line_cleanup(struct a2l_data *a2l) #define MAX_INLINE_NEST 1024 +static void inline_list__reverse(struct inline_node *node) +{ + struct inline_list *ilist, *n; + + list_for_each_entry_safe_reverse(ilist, n, &node->val, list) + list_move_tail(&ilist->list, &node->val); +} + static int addr2line(const char *dso_name, u64 addr, char **file, unsigned int *line, struct dso *dso, - bool unwind_inlines) + bool unwind_inlines, struct inline_node *node) { int ret = 0; struct a2l_data *a2l = dso->a2l; @@ -196,8 +233,21 @@ static int addr2line(const char *dso_name, u64 addr, while (bfd_find_inliner_info(a2l->abfd, &a2l->filename, &a2l->funcname, &a2l->line) && - cnt++ < MAX_INLINE_NEST) - ; + cnt++ < MAX_INLINE_NEST) { + + if (node != NULL) { + if (inline_list__append(strdup(a2l->filename), + strdup(a2l->funcname), + a2l->line, node, + dso) != 0) + return 0; + } + } + + if ((node != NULL) && + (callchain_param.order != ORDER_CALLEE)) { + inline_list__reverse(node); + } } if (a2l->found && a2l->filename) { @@ -223,6 +273,35 @@ void dso__free_a2l(struct dso *dso) dso->a2l = NULL; } +static struct inline_node *addr2inlines(const char *dso_name, u64 addr, + struct dso *dso) +{ + char *file = NULL; + unsigned int line = 0; + struct inline_node *node; + + node = zalloc(sizeof(*node)); + if (node == NULL) { + perror("not enough memory for the inline node"); + return NULL; + } + + INIT_LIST_HEAD(&node->val); + node->addr = addr; + + if (!addr2line(dso_name, addr, &file, &line, dso, TRUE, node)) + goto out_free_inline_node; + + if (list_empty(&node->val)) + goto out_free_inline_node; + + return node; + +out_free_inline_node: + inline_node__delete(node); + return NULL; +} + #else /* HAVE_LIBBFD_SUPPORT */ static int filename_split(char *filename, unsigned int *line_nr) @@ -249,7 +328,8 @@ static int filename_split(char *filename, unsigned int *line_nr) static int addr2line(const char *dso_name, u64 addr, char **file, unsigned int *line_nr, struct dso *dso __maybe_unused, - bool unwind_inlines __maybe_unused) + bool unwind_inlines __maybe_unused, + struct inline_node *node __maybe_unused) { FILE *fp; char cmd[PATH_MAX]; @@ -288,6 +368,58 @@ void dso__free_a2l(struct dso *dso __maybe_unused) { } +static struct inline_node *addr2inlines(const char *dso_name, u64 addr, + struct dso *dso __maybe_unused) +{ + FILE *fp; + char cmd[PATH_MAX]; + struct inline_node *node; + char *filename = NULL; + size_t len; + unsigned int line_nr = 0; + + scnprintf(cmd, sizeof(cmd), "addr2line -e %s -i %016"PRIx64, + dso_name, addr); + + fp = popen(cmd, "r"); + if (fp == NULL) { + pr_err("popen failed for %s\n", dso_name); + return NULL; + } + + node = zalloc(sizeof(*node)); + if (node == NULL) { + perror("not enough memory for the inline node"); + goto out; + } + + INIT_LIST_HEAD(&node->val); + node->addr = addr; + + while (getline(&filename, &len, fp) != -1) { + if (filename_split(filename, &line_nr) != 1) { + free(filename); + goto out; + } + + if (inline_list__append(filename, NULL, line_nr, node, + NULL) != 0) + goto out; + + filename = NULL; + } + +out: + pclose(fp); + + if (list_empty(&node->val)) { + inline_node__delete(node); + return NULL; + } + + return node; +} + #endif /* HAVE_LIBBFD_SUPPORT */ /* @@ -311,7 +443,7 @@ char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, if (dso_name == NULL) goto out; - if (!addr2line(dso_name, addr, &file, &line, dso, unwind_inlines)) + if (!addr2line(dso_name, addr, &file, &line, dso, unwind_inlines, NULL)) goto out; if (asprintf(&srcline, "%s:%u", @@ -351,3 +483,28 @@ char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, { return __get_srcline(dso, addr, sym, show_sym, false); } + +struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr) +{ + const char *dso_name; + + dso_name = dso__name(dso); + if (dso_name == NULL) + return NULL; + + return addr2inlines(dso_name, addr, dso); +} + +void inline_node__delete(struct inline_node *node) +{ + struct inline_list *ilist, *tmp; + + list_for_each_entry_safe(ilist, tmp, &node->val, list) { + list_del_init(&ilist->list); + zfree(&ilist->filename); + zfree(&ilist->funcname); + free(ilist); + } + + free(node); +} diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 0e660dba58ad..d1a40bb642ff 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -390,6 +390,11 @@ out_elf_end: return 0; } +char *dso__demangle_sym(struct dso *dso, int kmodule, char *elf_name) +{ + return demangle_sym(dso, kmodule, elf_name); +} + /* * Align offset to 4 bytes as needed for note name and descriptor data. */ diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index 11cdde980545..870ef0f0659c 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -373,3 +373,10 @@ int kcore_copy(const char *from_dir __maybe_unused, void symbol__elf_init(void) { } + +char *dso__demangle_sym(struct dso *dso __maybe_unused, + int kmodule __maybe_unused, + char *elf_name __maybe_unused) +{ + return NULL; +} diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 9222c7e702f3..e36213ccfcf7 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -305,6 +305,8 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *map); +char *dso__demangle_sym(struct dso *dso, int kmodule, char *elf_name); + void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel); void symbols__insert(struct rb_root *symbols, struct symbol *sym); void symbols__fixup_duplicate(struct rb_root *symbols); diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index b2cfa47990dc..cc0700d6fef0 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -364,4 +364,20 @@ int is_printable_array(char *p, unsigned int len); int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz); int unit_number__scnprintf(char *buf, size_t size, u64 n); + +struct inline_list { + char *filename; + char *funcname; + unsigned int line_nr; + struct list_head list; +}; + +struct inline_node { + u64 addr; + struct list_head val; +}; + +struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr); +void inline_node__delete(struct inline_node *node); + #endif /* GIT_COMPAT_UTIL_H */ -- cgit v1.2.3 From f3a60646cc3e0524d8f1083db1da7532a1590b40 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Sun, 26 Mar 2017 04:34:27 +0800 Subject: perf report: Introduce --inline option It takes some time to look for inline stack for callgraph addresses. So it provides new option "--inline" to let user decide if enable this feature. --inline: If a callgraph address belongs to an inlined function, the inline stack will be printed. Each entry is the inline function name or file/line. Signed-off-by: Yao Jin Tested-by: Milian Wolff Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1490474069-15823-4-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 4 ++++ tools/perf/builtin-report.c | 2 ++ tools/perf/util/symbol.h | 3 ++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index e9a61f5485eb..248bba434b53 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -430,6 +430,10 @@ include::itrace.txt[] --hierarchy:: Enable hierarchical output. +--inline:: + If a callgraph address belongs to an inlined function, the inline stack + will be printed. Each entry is function name or file/line. + include::callchain-overhead-calculation.txt[] SEE ALSO diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 3c8885a1c452..c18158b83eb1 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -845,6 +845,8 @@ int cmd_report(int argc, const char **argv) stdio__config_color, "always"), OPT_STRING(0, "time", &report.time_str, "str", "Time span of interest (start,stop)"), + OPT_BOOLEAN(0, "inline", &symbol_conf.inline_name, + "Show inline function"), OPT_END() }; struct perf_data_file file = { diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index e36213ccfcf7..5245d2fb1a0a 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -118,7 +118,8 @@ struct symbol_conf { show_ref_callgraph, hide_unresolved, raw_trace, - report_hierarchy; + report_hierarchy, + inline_name; const char *vmlinux_name, *kallsyms_name, *source_prefix, -- cgit v1.2.3 From 0db64dd060f7fd77921be8f10fa9f7a5f49a3a43 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Sun, 26 Mar 2017 04:34:28 +0800 Subject: perf report: Show inline stack for stdio mode If the address belongs to an inlined function, the source information back to the first non-inlined function will be printed. For example: 1. Show inlined function name perf report --stdio -g function --inline 0.69% 0.00% inline ld-2.23.so [.] dl_main | ---dl_main | --0.56%--_dl_relocate_object _dl_relocate_object (inline) elf_dynamic_do_Rela (inline) 2. Show the file/line information perf report --stdio -g address --inline 0.69% 0.00% inline ld-2.23.so [.] _dl_start_user | ---_dl_start_user .:0 _dl_start rtld.c:307 /build/glibc-GKVZIf/glibc-2.23/elf/rtld.c:413 (inline) _dl_sysdep_start dl-sysdep.c:250 | --0.56%--dl_main rtld.c:2076 Committer tests: # perf record --call-graph dwarf ~/bin/perf stat usleep 1 Performance counter stats for 'usleep 1': 0.443020 task-clock (msec) # 0.449 CPUs utilized 1 context-switches # 0.002 M/sec 0 cpu-migrations # 0.000 K/sec 52 page-faults # 0.117 M/sec 1,049,423 cycles # 2.369 GHz 801,456 instructions # 0.76 insn per cycle 155,609 branches # 351.246 M/sec 7,026 branch-misses # 4.52% of all branches 0.000987570 seconds time elapsed [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.553 MB perf.data (66 samples) ] # perf report --stdio --inline fs__get_mountpoint 1.73% 0.00% perf perf [.] fs__get_mountpoint | ---fs__get_mountpoint fs__get_mountpoint (inline) fs__check_mounts (inline) __statfs entry_SYSCALL_64 sys_statfs SYSC_statfs user_statfs user_path_at_empty filename_lookup path_lookupat link_path_walk inode_permission __inode_permission kernfs_iop_permission kernfs_refresh_inode security_inode_notifysecctx selinux_inode_notifysecctx selinux_inode_setsecurity security_context_to_sid security_context_to_sid_core string_to_context_struct symcmp Signed-off-by: Yao Jin Tested-by: Arnaldo Carvalho de Melo Tested-by: Milian Wolff Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1490474069-15823-5-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/stdio/hist.c | 85 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 1 deletion(-) diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 668f4aecf2e6..6128f485a3c5 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -17,6 +17,66 @@ static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin) return ret; } +static size_t inline__fprintf(struct map *map, u64 ip, int left_margin, + int depth, int depth_mask, FILE *fp) +{ + struct dso *dso; + struct inline_node *node; + struct inline_list *ilist; + int ret = 0, i; + + if (map == NULL) + return 0; + + dso = map->dso; + if (dso == NULL) + return 0; + + if (dso->kernel != DSO_TYPE_USER) + return 0; + + node = dso__parse_addr_inlines(dso, + map__rip_2objdump(map, ip)); + if (node == NULL) + return 0; + + list_for_each_entry(ilist, &node->val, list) { + if ((ilist->filename != NULL) || (ilist->funcname != NULL)) { + ret += callchain__fprintf_left_margin(fp, left_margin); + + for (i = 0; i < depth; i++) { + if (depth_mask & (1 << i)) + ret += fprintf(fp, "|"); + else + ret += fprintf(fp, " "); + ret += fprintf(fp, " "); + } + + if (callchain_param.key == CCKEY_ADDRESS) { + if (ilist->filename != NULL) + ret += fprintf(fp, "%s:%d (inline)", + ilist->filename, + ilist->line_nr); + else + ret += fprintf(fp, "??"); + } else if (ilist->funcname != NULL) + ret += fprintf(fp, "%s (inline)", + ilist->funcname); + else if (ilist->filename != NULL) + ret += fprintf(fp, "%s:%d (inline)", + ilist->filename, + ilist->line_nr); + else + ret += fprintf(fp, "??"); + + ret += fprintf(fp, "\n"); + } + } + + inline_node__delete(node); + return ret; +} + static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask, int left_margin) { @@ -78,6 +138,10 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node, fputs(str, fp); fputc('\n', fp); free(alloc_str); + + if (symbol_conf.inline_name) + ret += inline__fprintf(chain->ms.map, chain->ip, + left_margin, depth, depth_mask, fp); return ret; } @@ -229,6 +293,7 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, if (!i++ && field_order == NULL && sort_order && !prefixcmp(sort_order, "sym")) continue; + if (!printed) { ret += callchain__fprintf_left_margin(fp, left_margin); ret += fprintf(fp, "|\n"); @@ -251,6 +316,13 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, if (++entries_printed == callchain_param.print_limit) break; + + if (symbol_conf.inline_name) + ret += inline__fprintf(chain->ms.map, + chain->ip, + left_margin, + 0, 0, + fp); } root = &cnode->rb_root; } @@ -529,6 +601,8 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, bool use_callchain) { int ret; + int callchain_ret = 0; + int inline_ret = 0; struct perf_hpp hpp = { .buf = bf, .size = size, @@ -547,7 +621,16 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, ret = fprintf(fp, "%s\n", bf); if (use_callchain) - ret += hist_entry_callchain__fprintf(he, total_period, 0, fp); + callchain_ret = hist_entry_callchain__fprintf(he, total_period, + 0, fp); + + if (callchain_ret == 0 && symbol_conf.inline_name) { + inline_ret = inline__fprintf(he->ms.map, he->ip, 0, 0, 0, fp); + ret += inline_ret; + if (inline_ret > 0) + ret += fprintf(fp, "\n"); + } else + ret += callchain_ret; return ret; } -- cgit v1.2.3 From 0d3eb0b7783f1ee6d3314f101b9cbfb988020222 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Sun, 26 Mar 2017 04:34:29 +0800 Subject: perf report: Show inline stack for browser mode If the address belongs to an inlined function, the source information back to the first non-inlined function will be printed. For example: 1. Show inlined function name perf report -g function --inline - 0.69% 0.00% inline ld-2.23.so [.] dl_main - dl_main 0.56% _dl_relocate_object _dl_relocate_object (inline) elf_dynamic_do_Rela (inline) 2. Show the file/line information perf report -g address --inline - 0.69% 0.00% inline ld-2.23.so [.] _dl_start _dl_start rtld.c:307 /build/glibc-GKVZIf/glibc-2.23/elf/rtld.c:413 (inline) + _dl_sysdep_start dl-sysdep.c:250 Signed-off-by: Yao Jin Tested-by: Arnaldo Carvalho de Melo Tested-by: Milian Wolff Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1490474069-15823-6-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 180 +++++++++++++++++++++++++++++++++++++++-- tools/perf/util/hist.c | 5 ++ tools/perf/util/sort.h | 1 + 3 files changed, 178 insertions(+), 8 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 2dc82bec10c0..62ecaebf2520 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -144,9 +144,60 @@ static void callchain_list__set_folding(struct callchain_list *cl, bool unfold) cl->unfolded = unfold ? cl->has_children : false; } +static struct inline_node *inline_node__create(struct map *map, u64 ip) +{ + struct dso *dso; + struct inline_node *node; + + if (map == NULL) + return NULL; + + dso = map->dso; + if (dso == NULL) + return NULL; + + if (dso->kernel != DSO_TYPE_USER) + return NULL; + + node = dso__parse_addr_inlines(dso, + map__rip_2objdump(map, ip)); + + return node; +} + +static int inline__count_rows(struct inline_node *node) +{ + struct inline_list *ilist; + int i = 0; + + if (node == NULL) + return 0; + + list_for_each_entry(ilist, &node->val, list) { + if ((ilist->filename != NULL) || (ilist->funcname != NULL)) + i++; + } + + return i; +} + +static int callchain_list__inline_rows(struct callchain_list *chain) +{ + struct inline_node *node; + int rows; + + node = inline_node__create(chain->ms.map, chain->ip); + if (node == NULL) + return 0; + + rows = inline__count_rows(node); + inline_node__delete(node); + return rows; +} + static int callchain_node__count_rows_rb_tree(struct callchain_node *node) { - int n = 0; + int n = 0, inline_rows; struct rb_node *nd; for (nd = rb_first(&node->rb_root); nd; nd = rb_next(nd)) { @@ -156,6 +207,13 @@ static int callchain_node__count_rows_rb_tree(struct callchain_node *node) list_for_each_entry(chain, &child->val, list) { ++n; + + if (symbol_conf.inline_name) { + inline_rows = + callchain_list__inline_rows(chain); + n += inline_rows; + } + /* We need this because we may not have children */ folded_sign = callchain_list__folded(chain); if (folded_sign == '+') @@ -207,7 +265,7 @@ static int callchain_node__count_rows(struct callchain_node *node) { struct callchain_list *chain; bool unfolded = false; - int n = 0; + int n = 0, inline_rows; if (callchain_param.mode == CHAIN_FLAT) return callchain_node__count_flat_rows(node); @@ -216,6 +274,11 @@ static int callchain_node__count_rows(struct callchain_node *node) list_for_each_entry(chain, &node->val, list) { ++n; + if (symbol_conf.inline_name) { + inline_rows = callchain_list__inline_rows(chain); + n += inline_rows; + } + unfolded = chain->unfolded; } @@ -362,6 +425,19 @@ static void hist_entry__init_have_children(struct hist_entry *he) he->init_have_children = true; } +static void hist_entry_init_inline_node(struct hist_entry *he) +{ + if (he->inline_node) + return; + + he->inline_node = inline_node__create(he->ms.map, he->ip); + + if (he->inline_node == NULL) + return; + + he->has_children = true; +} + static bool hist_browser__toggle_fold(struct hist_browser *browser) { struct hist_entry *he = browser->he_selection; @@ -393,7 +469,12 @@ static bool hist_browser__toggle_fold(struct hist_browser *browser) if (he->unfolded) { if (he->leaf) - he->nr_rows = callchain__count_rows(&he->sorted_chain); + if (he->inline_node) + he->nr_rows = inline__count_rows( + he->inline_node); + else + he->nr_rows = callchain__count_rows( + &he->sorted_chain); else he->nr_rows = hierarchy_count_rows(browser, he, false); @@ -753,6 +834,70 @@ static bool hist_browser__check_dump_full(struct hist_browser *browser __maybe_u #define LEVEL_OFFSET_STEP 3 +static int hist_browser__show_inline(struct hist_browser *browser, + struct inline_node *node, + unsigned short row, + int offset) +{ + struct inline_list *ilist; + char buf[1024]; + int color, width, first_row; + + first_row = row; + width = browser->b.width - (LEVEL_OFFSET_STEP + 2); + list_for_each_entry(ilist, &node->val, list) { + if ((ilist->filename != NULL) || (ilist->funcname != NULL)) { + color = HE_COLORSET_NORMAL; + if (ui_browser__is_current_entry(&browser->b, row)) + color = HE_COLORSET_SELECTED; + + if (callchain_param.key == CCKEY_ADDRESS) { + if (ilist->filename != NULL) + scnprintf(buf, sizeof(buf), + "%s:%d (inline)", + ilist->filename, + ilist->line_nr); + else + scnprintf(buf, sizeof(buf), "??"); + } else if (ilist->funcname != NULL) + scnprintf(buf, sizeof(buf), "%s (inline)", + ilist->funcname); + else if (ilist->filename != NULL) + scnprintf(buf, sizeof(buf), + "%s:%d (inline)", + ilist->filename, + ilist->line_nr); + else + scnprintf(buf, sizeof(buf), "??"); + + ui_browser__set_color(&browser->b, color); + hist_browser__gotorc(browser, row, 0); + ui_browser__write_nstring(&browser->b, " ", + LEVEL_OFFSET_STEP + offset); + ui_browser__write_nstring(&browser->b, buf, width); + row++; + } + } + + return row - first_row; +} + +static size_t show_inline_list(struct hist_browser *browser, struct map *map, + u64 ip, int row, int offset) +{ + struct inline_node *node; + int ret; + + node = inline_node__create(map, ip); + if (node == NULL) + return 0; + + ret = hist_browser__show_inline(browser, node, row, offset); + + inline_node__delete(node); + return ret; +} + static int hist_browser__show_callchain_list(struct hist_browser *browser, struct callchain_node *node, struct callchain_list *chain, @@ -764,6 +909,7 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser, char bf[1024], *alloc_str; char buf[64], *alloc_str2; const char *str; + int inline_rows = 0, ret = 1; if (arg->row_offset != 0) { arg->row_offset--; @@ -801,10 +947,15 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser, } print(browser, chain, str, offset, row, arg); - free(alloc_str); free(alloc_str2); - return 1; + + if (symbol_conf.inline_name) { + inline_rows = show_inline_list(browser, chain->ms.map, + chain->ip, row + 1, offset); + } + + return ret + inline_rows; } static bool check_percent_display(struct rb_node *node, u64 parent_total) @@ -1228,6 +1379,12 @@ static int hist_browser__show_entry(struct hist_browser *browser, folded_sign = hist_entry__folded(entry); } + if (symbol_conf.inline_name && + (!entry->has_children)) { + hist_entry_init_inline_node(entry); + folded_sign = hist_entry__folded(entry); + } + if (row_offset == 0) { struct hpp_arg arg = { .b = &browser->b, @@ -1259,7 +1416,8 @@ static int hist_browser__show_entry(struct hist_browser *browser, } if (first) { - if (symbol_conf.use_callchain) { + if (symbol_conf.use_callchain || + symbol_conf.inline_name) { ui_browser__printf(&browser->b, "%c ", folded_sign); width -= 2; } @@ -1301,8 +1459,14 @@ static int hist_browser__show_entry(struct hist_browser *browser, .is_current_entry = current_entry, }; - printed += hist_browser__show_callchain(browser, entry, 1, row, - hist_browser__show_callchain_entry, &arg, + if (entry->inline_node) + printed += hist_browser__show_inline(browser, + entry->inline_node, row, 0); + else + printed += hist_browser__show_callchain(browser, + entry, 1, row, + hist_browser__show_callchain_entry, + &arg, hist_browser__check_output_full); } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index e3b38f629504..3c4d4d00cb2c 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1136,6 +1136,11 @@ void hist_entry__delete(struct hist_entry *he) zfree(&he->mem_info); } + if (he->inline_node) { + inline_node__delete(he->inline_node); + he->inline_node = NULL; + } + zfree(&he->stat_acc); free_srcline(he->srcline); if (he->srcfile && he->srcfile[0]) diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index baf20a399f34..e35fb186d048 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -128,6 +128,7 @@ struct hist_entry { }; char *srcline; char *srcfile; + struct inline_node *inline_node; struct symbol *parent; struct branch_info *branch_info; struct hists *hists; -- cgit v1.2.3 From 5dfa210e407d0fedf746958bff206995bd46570d Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Sat, 18 Mar 2017 22:49:28 +0100 Subject: perf report: Enable sorting by srcline as key Often it is interesting to know how costly a given source line is in total. Previously, one had to build these sums manually based on all addresses that pointed to the same source line. This patch introduces srcline as a sort key, which will do the aggregation for us. Paired with the recent addition of showing inline frames, this makes perf report much more useful for many C++ work loads. The following shows the new feature in action. First, let's show the status quo output when we sort by address. The result contains many hist entries that generate the same output: ~~~~~~~~~~~~~~~~ $ perf report --stdio --inline -g address # Children Self Command Shared Object Symbol # ........ ........ ............ ................... ......................................... # 99.89% 35.34% cpp-inlining cpp-inlining [.] main | |--64.55%--main complex:655 | /home/milian/projects/kdab/rnd/hotspot/tests/test-clients/cpp-inlining/main.cpp:39 (inline) | /usr/include/c++/6.3.1/complex:664 (inline) | | | |--60.31%--hypot +20 | | | | | |--8.52%--__hypot_finite +273 | | | | | |--7.32%--__hypot_finite +411 ... --35.34%--_start +4194346 __libc_start_main +241 | |--6.65%--main random.tcc:3326 | /home/milian/projects/kdab/rnd/hotspot/tests/test-clients/cpp-inlining/main.cpp:39 (inline) | /usr/include/c++/6.3.1/bits/random.h:1809 (inline) | /usr/include/c++/6.3.1/bits/random.h:1818 (inline) | /usr/include/c++/6.3.1/bits/random.h:185 (inline) | |--2.70%--main random.tcc:3326 | /home/milian/projects/kdab/rnd/hotspot/tests/test-clients/cpp-inlining/main.cpp:39 (inline) | /usr/include/c++/6.3.1/bits/random.h:1809 (inline) | /usr/include/c++/6.3.1/bits/random.h:1818 (inline) | /usr/include/c++/6.3.1/bits/random.h:185 (inline) | |--1.69%--main random.tcc:3326 | /home/milian/projects/kdab/rnd/hotspot/tests/test-clients/cpp-inlining/main.cpp:39 (inline) | /usr/include/c++/6.3.1/bits/random.h:1809 (inline) | /usr/include/c++/6.3.1/bits/random.h:1818 (inline) | /usr/include/c++/6.3.1/bits/random.h:185 (inline) ... ~~~~~~~~~~~~~~~~ With this patch and `-g srcline` we instead get the following output: ~~~~~~~~~~~~~~~~ $ perf report --stdio --inline -g srcline # Children Self Command Shared Object Symbol # ........ ........ ............ ................... ......................................... # 99.89% 35.34% cpp-inlining cpp-inlining [.] main | |--64.55%--main complex:655 | /home/milian/projects/kdab/rnd/hotspot/tests/test-clients/cpp-inlining/main.cpp:39 (inline) | /usr/include/c++/6.3.1/complex:664 (inline) | | | |--64.02%--hypot | | | | | --59.81%--__hypot_finite | | | --0.53%--cabs | --35.34%--_start __libc_start_main | |--12.48%--main random.tcc:3326 | /home/milian/projects/kdab/rnd/hotspot/tests/test-clients/cpp-inlining/main.cpp:39 (inline) | /usr/include/c++/6.3.1/bits/random.h:1809 (inline) | /usr/include/c++/6.3.1/bits/random.h:1818 (inline) | /usr/include/c++/6.3.1/bits/random.h:185 (inline) ... ~~~~~~~~~~~~~~~~ Signed-off-by: Milian Wolff Cc: Jiri Olsa Cc: Yao Jin Link: http://lkml.kernel.org/r/20170318214928.9047-1-milian.wolff@kdab.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 1 + tools/perf/ui/browsers/hists.c | 3 +- tools/perf/ui/stdio/hist.c | 3 +- tools/perf/util/annotate.c | 3 +- tools/perf/util/callchain.c | 52 +++++++++++++++++++++++++++++--- tools/perf/util/callchain.h | 3 +- tools/perf/util/map.c | 3 +- tools/perf/util/sort.c | 16 ++++++---- tools/perf/util/srcline.c | 11 +++++-- tools/perf/util/util.h | 4 +-- 10 files changed, 78 insertions(+), 21 deletions(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 248bba434b53..37a175914157 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -235,6 +235,7 @@ OPTIONS sort_key can be: - function: compare on functions (default) - address: compare on individual code addresses + - srcline: compare on source filename and line number branch can be: - branch: include last branch information in callgraph when available. diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 62ecaebf2520..da24072bb76e 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -851,7 +851,8 @@ static int hist_browser__show_inline(struct hist_browser *browser, if (ui_browser__is_current_entry(&browser->b, row)) color = HE_COLORSET_SELECTED; - if (callchain_param.key == CCKEY_ADDRESS) { + if (callchain_param.key == CCKEY_ADDRESS || + callchain_param.key == CCKEY_SRCLINE) { if (ilist->filename != NULL) scnprintf(buf, sizeof(buf), "%s:%d (inline)", diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 6128f485a3c5..d52d5f64ea89 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -52,7 +52,8 @@ static size_t inline__fprintf(struct map *map, u64 ip, int left_margin, ret += fprintf(fp, " "); } - if (callchain_param.key == CCKEY_ADDRESS) { + if (callchain_param.key == CCKEY_ADDRESS || + callchain_param.key == CCKEY_SRCLINE) { if (ilist->filename != NULL) ret += fprintf(fp, "%s:%d (inline)", ilist->filename, diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 22cd1dbe724b..3d0263e5d1db 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1674,7 +1674,8 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map, goto next; offset = start + i; - src_line->path = get_srcline(map->dso, offset, NULL, false); + src_line->path = get_srcline(map->dso, offset, NULL, + false, true); insert_source_line(&tmp_root, src_line); next: diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index aba953421a03..d78776a20e80 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -80,6 +80,10 @@ static int parse_callchain_sort_key(const char *value) callchain_param.key = CCKEY_ADDRESS; return 0; } + if (!strncmp(value, "srcline", strlen(value))) { + callchain_param.key = CCKEY_SRCLINE; + return 0; + } if (!strncmp(value, "branch", strlen(value))) { callchain_param.branch_callstack = 1; return 0; @@ -510,14 +514,51 @@ enum match_result { MATCH_GT, }; +static enum match_result match_chain_srcline(struct callchain_cursor_node *node, + struct callchain_list *cnode) +{ + char *left = get_srcline(cnode->ms.map->dso, + map__rip_2objdump(cnode->ms.map, cnode->ip), + cnode->ms.sym, true, false); + char *right = get_srcline(node->map->dso, + map__rip_2objdump(node->map, node->ip), + node->sym, true, false); + enum match_result ret = MATCH_EQ; + int cmp; + + if (left && right) + cmp = strcmp(left, right); + else if (!left && right) + cmp = 1; + else if (left && !right) + cmp = -1; + else if (cnode->ip == node->ip) + cmp = 0; + else + cmp = (cnode->ip < node->ip) ? -1 : 1; + + if (cmp != 0) + ret = cmp < 0 ? MATCH_LT : MATCH_GT; + + free_srcline(left); + free_srcline(right); + return ret; +} + static enum match_result match_chain(struct callchain_cursor_node *node, struct callchain_list *cnode) { struct symbol *sym = node->sym; u64 left, right; - if (cnode->ms.sym && sym && - callchain_param.key == CCKEY_FUNCTION) { + if (callchain_param.key == CCKEY_SRCLINE) { + enum match_result match = match_chain_srcline(node, cnode); + + if (match != MATCH_ERROR) + return match; + } + + if (cnode->ms.sym && sym && callchain_param.key == CCKEY_FUNCTION) { left = cnode->ms.sym->start; right = sym->start; } else { @@ -911,15 +952,16 @@ out: char *callchain_list__sym_name(struct callchain_list *cl, char *bf, size_t bfsize, bool show_dso) { + bool show_addr = callchain_param.key == CCKEY_ADDRESS; + bool show_srcline = show_addr || callchain_param.key == CCKEY_SRCLINE; int printed; if (cl->ms.sym) { - if (callchain_param.key == CCKEY_ADDRESS && - cl->ms.map && !cl->srcline) + if (show_srcline && cl->ms.map && !cl->srcline) cl->srcline = get_srcline(cl->ms.map->dso, map__rip_2objdump(cl->ms.map, cl->ip), - cl->ms.sym, false); + cl->ms.sym, false, show_addr); if (cl->srcline) printed = scnprintf(bf, bfsize, "%s %s", cl->ms.sym->name, cl->srcline); diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 4f4b60f1558a..c56c23dbbf72 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -77,7 +77,8 @@ typedef void (*sort_chain_func_t)(struct rb_root *, struct callchain_root *, enum chain_key { CCKEY_FUNCTION, - CCKEY_ADDRESS + CCKEY_ADDRESS, + CCKEY_SRCLINE }; enum chain_value { diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 1d9ebcf9e38e..c1870ac365a3 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -405,7 +405,8 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, if (map && map->dso) { srcline = get_srcline(map->dso, - map__rip_2objdump(map, addr), NULL, true); + map__rip_2objdump(map, addr), NULL, + true, true); if (srcline != SRCLINE_UNKNOWN) ret = fprintf(fp, "%s%s", prefix, srcline); free_srcline(srcline); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 8b0d4e39f640..73f3ec1cf2a0 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -323,7 +323,7 @@ char *hist_entry__get_srcline(struct hist_entry *he) return SRCLINE_UNKNOWN; return get_srcline(map->dso, map__rip_2objdump(map, he->ip), - he->ms.sym, true); + he->ms.sym, true, true); } static int64_t @@ -366,7 +366,8 @@ sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right) left->branch_info->srcline_from = get_srcline(map->dso, map__rip_2objdump(map, left->branch_info->from.al_addr), - left->branch_info->from.sym, true); + left->branch_info->from.sym, + true, true); } if (!right->branch_info->srcline_from) { struct map *map = right->branch_info->from.map; @@ -376,7 +377,8 @@ sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right) right->branch_info->srcline_from = get_srcline(map->dso, map__rip_2objdump(map, right->branch_info->from.al_addr), - right->branch_info->from.sym, true); + right->branch_info->from.sym, + true, true); } return strcmp(right->branch_info->srcline_from, left->branch_info->srcline_from); } @@ -407,7 +409,8 @@ sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right) left->branch_info->srcline_to = get_srcline(map->dso, map__rip_2objdump(map, left->branch_info->to.al_addr), - left->branch_info->from.sym, true); + left->branch_info->from.sym, + true, true); } if (!right->branch_info->srcline_to) { struct map *map = right->branch_info->to.map; @@ -417,7 +420,8 @@ sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right) right->branch_info->srcline_to = get_srcline(map->dso, map__rip_2objdump(map, right->branch_info->to.al_addr), - right->branch_info->to.sym, true); + right->branch_info->to.sym, + true, true); } return strcmp(right->branch_info->srcline_to, left->branch_info->srcline_to); } @@ -448,7 +452,7 @@ static char *hist_entry__get_srcfile(struct hist_entry *e) return no_srcfile; sf = __get_srcline(map->dso, map__rip_2objdump(map, e->ip), - e->ms.sym, false, true); + e->ms.sym, false, true, true); if (!strcmp(sf, SRCLINE_UNKNOWN)) return no_srcfile; p = strchr(sf, ':'); diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index 3ce28f702b36..778ccb5d99d1 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -429,7 +429,7 @@ out: #define A2L_FAIL_LIMIT 123 char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym, bool unwind_inlines) + bool show_sym, bool show_addr, bool unwind_inlines) { char *file = NULL; unsigned line = 0; @@ -463,6 +463,11 @@ out: dso->has_srcline = 0; dso__free_a2l(dso); } + + if (!show_addr) + return (show_sym && sym) ? + strndup(sym->name, sym->namelen) : NULL; + if (sym) { if (asprintf(&srcline, "%s+%" PRIu64, show_sym ? sym->name : "", addr - sym->start) < 0) @@ -479,9 +484,9 @@ void free_srcline(char *srcline) } char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym) + bool show_sym, bool show_addr) { - return __get_srcline(dso, addr, sym, show_sym, false); + return __get_srcline(dso, addr, sym, show_sym, show_addr, false); } struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr) diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index cc0700d6fef0..7cf5752b38fd 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -287,9 +287,9 @@ struct symbol; extern bool srcline_full_filename; char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym); + bool show_sym, bool show_addr); char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym, bool unwind_inlines); + bool show_sym, bool show_addr, bool unwind_inlines); void free_srcline(char *srcline); int perf_event_paranoid(void); -- cgit v1.2.3 From 6ebd2547dd24daf95a21b2bc59931de8502afcc3 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Mon, 27 Mar 2017 16:10:36 +0900 Subject: perf annotate: Fix a bug following symbolic link of a build-id file It is wrong way to read link name from a build-id file. Because a build-id file is not anymore a symbolic link but build-id directory of it is symbolic link, so fix it. For example, if build-id file name gotten from dso__build_id_filename() is as below, /root/.debug/.build-id/4f/75c7d197c951659d1c1b8b5fd49bcdf8f3f8b1/elf To correctly read link name of build-id, use the build-id dir path that is a symbolic link, instead of the above build-id file name like below. /root/.debug/.build-id/4f/75c7d197c951659d1c1b8b5fd49bcdf8f3f8b1 Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1490598638-13947-2-git-send-email-treeze.taeung@gmail.com Fixes: 01412261d994 ("perf buildid-cache: Use path/to/bin/buildid/elf instead of path/to/bin/buildid") Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 3d0263e5d1db..6dc9148b9b84 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1307,6 +1307,7 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil { char linkname[PATH_MAX]; char *build_id_filename; + char *build_id_path = NULL; if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS && !dso__is_kcore(dso)) @@ -1322,8 +1323,14 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil goto fallback; } + build_id_path = strdup(filename); + if (!build_id_path) + return -1; + + dirname(build_id_path); + if (dso__is_kcore(dso) || - readlink(filename, linkname, sizeof(linkname)) < 0 || + readlink(build_id_path, linkname, sizeof(linkname)) < 0 || strstr(linkname, DSO__NAME_KALLSYMS) || access(filename, R_OK)) { fallback: @@ -1335,6 +1342,7 @@ fallback: __symbol__join_symfs(filename, filename_size, dso->long_name); } + free(build_id_path); return 0; } -- cgit v1.2.3 From 2e933b1274dc89cd1629f6c7fd9bf952248d84c2 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Mon, 27 Mar 2017 16:10:37 +0900 Subject: perf annotate: Fix a bug of division by zero when calculating percent Currently perf-annotate with --print-line can print -nan(0x8000000000000) because of division by zero when calculating percent. The division by zero happens when a sum of samples is zero in symbol__get_source_line(), so fix it. For example: After running 'perf record' like below, $ perf record -e "{cycles,page-faults,branch-misses}" ./a.out Before: $ perf annotate --stdio -l Sorted summary for file /home/taeung/workspace/a.out ---------------------------------------------- 32.89 -nan 7.04 a.c:38 25.14 -nan 0.00 a.c:34 16.26 -nan 56.34 a.c:31 15.88 -nan 1.41 a.c:37 5.67 -nan 0.00 a.c:39 1.13 -nan 35.21 a.c:26 0.95 -nan 0.00 a.c:44 0.57 -nan 0.00 a.c:32 Percent | Source code & Disassembly of a.out for cycles (529 samples) ----------------------------------------------------------------------------------------- : ... a.c:26 0.57 -nan 4.23 : 40081a: mov %edi,-0x24(%rbp) a.c:26 0.00 -nan 9.86 : 40081d: mov %rsi,-0x30(%rbp) ... However, if a sum of samples is zero (e.g. 'page-faults'), skip calculating percent. After: $ perf annotate --stdio -l Sorted summary for file /home/taeung/workspace/a.out ---------------------------------------------- 32.89 0.00 7.04 a.c:38 25.14 0.00 0.00 a.c:34 16.26 0.00 56.34 a.c:31 15.88 0.00 1.41 a.c:37 5.67 0.00 0.00 a.c:39 1.13 0.00 35.21 a.c:26 0.95 0.00 0.00 a.c:44 0.57 0.00 0.00 a.c:32 Percent | Source code & Disassembly of old for cycles (529 samples) ----------------------------------------------------------------------------------------- : ... a.c:26 0.57 0.00 4.23 : 40081a: mov %edi,-0x24(%rbp) a.c:26 0.00 0.00 9.86 : 40081d: mov %rsi,-0x30(%rbp) ... Signed-off-by: Taeung Song Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1490598638-13947-3-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 6dc9148b9b84..11af5f0d56cc 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1671,11 +1671,15 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map, src_line->nr_pcnt = nr_pcnt; for (k = 0; k < nr_pcnt; k++) { + double percent = 0.0; + h = annotation__histogram(notes, evidx + k); - src_line->samples[k].percent = 100.0 * h->addr[i] / h->sum; + if (h->sum) + percent = 100.0 * h->addr[i] / h->sum; - if (src_line->samples[k].percent > percent_max) - percent_max = src_line->samples[k].percent; + if (percent > percent_max) + percent_max = percent; + src_line->samples[k].percent = percent; } if (percent_max <= 0.5) -- cgit v1.2.3 From 2ccc220238680642be87a2d010ce07f1c40edafb Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Wed, 22 Mar 2017 15:06:19 +0200 Subject: perf buildid: Do not update SDT cache with null filename Valgrind was complaining: ==2633== Syscall param open(filename) points to unaddressable byte(s) ==2633== at 0x5281CC0: __open_nocancel (syscall-template.S:84) ==2633== by 0x537D38: open (fcntl2.h:53) ==2633== by 0x537D38: get_sdt_note_list (symbol-elf.c:2017) ==2633== by 0x5396FD: probe_cache__scan_sdt (probe-file.c:700) ==2633== by 0x49EA2C: build_id_cache__add_sdt_cache (build-id.c:625) ==2633== by 0x49EA2C: build_id_cache__add_s (build-id.c:697) ==2633== by 0x49EE72: build_id_cache__add_b (build-id.c:717) ==2633== by 0x49EE72: dso__cache_build_id (build-id.c:782) ==2633== by 0x49F190: __dsos__cache_build_ids (build-id.c:793) ==2633== by 0x49F190: machine__cache_build_ids (build-id.c:801) ==2633== by 0x49F190: perf_session__cache_build_ids (build-id.c:815) ==2633== by 0x4CD4F2: write_build_id (header.c:165) ==2633== by 0x4D26F7: do_write_feat (header.c:2296) ==2633== by 0x4D26F7: perf_header__adds_write (header.c:2335) ==2633== by 0x4D26F7: perf_session__write_header (header.c:2414) ==2633== by 0x43B324: __cmd_record (builtin-record.c:1154) ==2633== by 0x43B324: cmd_record (builtin-record.c:1839) ==2633== by 0x455A07: __cmd_record (builtin-kmem.c:1868) ==2633== by 0x455A07: cmd_kmem (builtin-kmem.c:1944) ==2633== by 0x497150: run_builtin (perf.c:359) ==2633== by 0x428CE0: handle_internal_command (perf.c:421) ==2633== by 0x428CE0: run_argv (perf.c:467) ==2633== by 0x428CE0: main (perf.c:614) ==2633== Address 0x0 is not stack'd, malloc'd or (recently) free'd Signed-off-by: Tommi Rantala Cc: Alexander Shishkin Cc: Peter Zijlstra Cc: Tommi Rantala Link: http://lkml.kernel.org/r/20170322130624.21881-2-tommi.t.rantala@nokia.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index e528c40739cc..234859f756c4 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -690,7 +690,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name, err = 0; /* Update SDT cache : error is just warned */ - if (build_id_cache__add_sdt_cache(sbuild_id, realname) < 0) + if (realname && build_id_cache__add_sdt_cache(sbuild_id, realname) < 0) pr_debug4("Failed to update/scan SDT cache for %s\n", realname); out_free: -- cgit v1.2.3 From 5a2342111c68e623e27ee7ea3d0492d8dad6bda0 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Wed, 22 Mar 2017 15:06:20 +0200 Subject: perf buildid: Do not assume that readlink() returns a null terminated string Valgrind was complaining: $ valgrind ./perf list >/dev/null ==11643== Memcheck, a memory error detector ==11643== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al. ==11643== Using Valgrind-3.12.0 and LibVEX; rerun with -h for copyright info ==11643== Command: ./perf list ==11643== ==11643== Conditional jump or move depends on uninitialised value(s) ==11643== at 0x4C30620: rindex (vg_replace_strmem.c:199) ==11643== by 0x49DAA9: build_id_cache__origname (build-id.c:198) ==11643== by 0x49E1C7: build_id_cache__valid_id (build-id.c:222) ==11643== by 0x49E1C7: build_id_cache__list_all (build-id.c:507) ==11643== by 0x4B9C8F: print_sdt_events (parse-events.c:2067) ==11643== by 0x4BB0B3: print_events (parse-events.c:2313) ==11643== by 0x439501: cmd_list (builtin-list.c:53) ==11643== by 0x497150: run_builtin (perf.c:359) ==11643== by 0x428CE0: handle_internal_command (perf.c:421) ==11643== by 0x428CE0: run_argv (perf.c:467) ==11643== by 0x428CE0: main (perf.c:614) [...] Additionally, a zero length result from readlink() is not very interesting. Signed-off-by: Tommi Rantala Cc: Alexander Shishkin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170322130624.21881-3-tommi.t.rantala@nokia.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 234859f756c4..33af67530d30 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -182,13 +182,17 @@ char *build_id_cache__origname(const char *sbuild_id) char buf[PATH_MAX]; char *ret = NULL, *p; size_t offs = 5; /* == strlen("../..") */ + ssize_t len; linkname = build_id_cache__linkname(sbuild_id, NULL, 0); if (!linkname) return NULL; - if (readlink(linkname, buf, PATH_MAX) < 0) + len = readlink(linkname, buf, sizeof(buf) - 1); + if (len <= 0) goto out; + buf[len] = '\0'; + /* The link should be "../../" */ p = strrchr(buf, '/'); /* Cut off the "/" */ if (p && (p > buf + offs)) { -- cgit v1.2.3 From 0e6ba11511aef91ba8e2528ddc681d88922d7b0b Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Wed, 22 Mar 2017 15:06:21 +0200 Subject: perf tests: Do not assume that readlink() returns a null terminated string Ensure that the string in buf is null terminated. Signed-off-by: Tommi Rantala Cc: Alexander Shishkin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170322130624.21881-4-tommi.t.rantala@nokia.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/sdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c index f59d210e1baf..26e5b7a0b839 100644 --- a/tools/perf/tests/sdt.c +++ b/tools/perf/tests/sdt.c @@ -43,7 +43,7 @@ static char *get_self_path(void) { char *buf = calloc(PATH_MAX, sizeof(char)); - if (buf && readlink("/proc/self/exe", buf, PATH_MAX) < 0) { + if (buf && readlink("/proc/self/exe", buf, PATH_MAX - 1) < 0) { pr_debug("Failed to get correct path of perf\n"); free(buf); return NULL; -- cgit v1.2.3 From b7126ef78612a3d4a37aadf39125cff048cebb9b Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Wed, 22 Mar 2017 15:06:22 +0200 Subject: perf utils: use sizeof(buf) - 1 in readlink() call Ensure that we have space for the null byte in buf. Signed-off-by: Tommi Rantala Cc: Alexander Shishkin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170322130624.21881-5-tommi.t.rantala@nokia.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 05714d548584..cf22962ce725 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -378,7 +378,7 @@ static int write_cmdline(int fd, struct perf_header *h __maybe_unused, * actual atual path to perf binary */ sprintf(proc, "/proc/%d/exe", getpid()); - ret = readlink(proc, buf, sizeof(buf)); + ret = readlink(proc, buf, sizeof(buf) - 1); if (ret <= 0) return -1; -- cgit v1.2.3 From d4b364df5f6540e8d6a38008ce2693ba73a8508a Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Wed, 22 Mar 2017 15:06:23 +0200 Subject: perf utils: Null terminate buf in read_ftrace_printk() Ensure that the string that we read from the data file is null terminated. Valgrind was complaining: ==31357== Invalid read of size 1 ==31357== at 0x4EC8C1: __strtok_r_1c (string2.h:200) ==31357== by 0x4EC8C1: parse_ftrace_printk (trace-event-parse.c:161) ==31357== by 0x4F82A8: read_ftrace_printk (trace-event-read.c:204) ==31357== by 0x4F82A8: trace_report (trace-event-read.c:468) ==31357== by 0x4CD552: process_tracing_data (header.c:1576) ==31357== by 0x4D3397: perf_file_section__process (header.c:2705) ==31357== by 0x4D3397: perf_header__process_sections (header.c:2488) ==31357== by 0x4D3397: perf_session__read_header (header.c:2925) ==31357== by 0x4E71E2: perf_session__open (session.c:32) ==31357== by 0x4E71E2: perf_session__new (session.c:139) ==31357== by 0x429F5D: cmd_annotate (builtin-annotate.c:472) ==31357== by 0x497150: run_builtin (perf.c:359) ==31357== by 0x428CE0: handle_internal_command (perf.c:421) ==31357== by 0x428CE0: run_argv (perf.c:467) ==31357== by 0x428CE0: main (perf.c:614) ==31357== Address 0x8ac0efb is 0 bytes after a block of size 1,963 alloc'd ==31357== at 0x4C2DB9D: malloc (vg_replace_malloc.c:299) ==31357== by 0x4F827B: read_ftrace_printk (trace-event-read.c:195) ==31357== by 0x4F827B: trace_report (trace-event-read.c:468) ==31357== by 0x4CD552: process_tracing_data (header.c:1576) ==31357== by 0x4D3397: perf_file_section__process (header.c:2705) ==31357== by 0x4D3397: perf_header__process_sections (header.c:2488) ==31357== by 0x4D3397: perf_session__read_header (header.c:2925) ==31357== by 0x4E71E2: perf_session__open (session.c:32) ==31357== by 0x4E71E2: perf_session__new (session.c:139) ==31357== by 0x429F5D: cmd_annotate (builtin-annotate.c:472) ==31357== by 0x497150: run_builtin (perf.c:359) ==31357== by 0x428CE0: handle_internal_command (perf.c:421) ==31357== by 0x428CE0: run_argv (perf.c:467) ==31357== by 0x428CE0: main (perf.c:614) Signed-off-by: Tommi Rantala Cc: Alexander Shishkin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170322130624.21881-6-tommi.t.rantala@nokia.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/trace-event-read.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 27420159bf69..8a9a677f7576 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -192,7 +192,7 @@ static int read_ftrace_printk(struct pevent *pevent) if (!size) return 0; - buf = malloc(size); + buf = malloc(size + 1); if (buf == NULL) return -1; @@ -201,6 +201,8 @@ static int read_ftrace_printk(struct pevent *pevent) return -1; } + buf[size] = '\0'; + parse_ftrace_printk(pevent, buf, size); free(buf); -- cgit v1.2.3 From 55f77128e7652e537d6c226d5b56821cdb5c22de Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Wed, 22 Mar 2017 15:06:24 +0200 Subject: perf utils: Readlink /proc/self/exe to find the perf binary Simplification: it is easier to open /proc/self/exe than /proc/$pid/exe. Signed-off-by: Tommi Rantala Cc: Alexander Shishkin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170322130624.21881-7-tommi.t.rantala@nokia.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index cf22962ce725..ef09f26e67da 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -370,15 +370,11 @@ static int write_cmdline(int fd, struct perf_header *h __maybe_unused, struct perf_evlist *evlist __maybe_unused) { char buf[MAXPATHLEN]; - char proc[32]; u32 n; int i, ret; - /* - * actual atual path to perf binary - */ - sprintf(proc, "/proc/%d/exe", getpid()); - ret = readlink(proc, buf, sizeof(buf) - 1); + /* actual path to perf binary */ + ret = readlink("/proc/self/exe", buf, sizeof(buf) - 1); if (ret <= 0) return -1; -- cgit v1.2.3 From c68677014bace6a4b6ad20f0818e1470d049618f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 28 Mar 2017 11:19:59 -0300 Subject: perf tools: Remove support for command aliases This came from 'git', but isn't documented anywhere in tools/perf/Documentation/, looks like baggage we can do without, ditch it. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-e7uwkn60t4hmlnwj99ba4t2s@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-help.c | 13 ----- tools/perf/perf.c | 97 +++----------------------------------- tools/perf/util/Build | 1 - tools/perf/util/alias.c | 78 ------------------------------ tools/perf/util/cache.h | 1 - tools/perf/util/help-unknown-cmd.c | 8 +--- 6 files changed, 8 insertions(+), 190 deletions(-) delete mode 100644 tools/perf/util/alias.c diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 7ae238929e95..1eec96a0fa67 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -301,12 +301,6 @@ void list_common_cmds_help(void) } } -static int is_perf_command(const char *s) -{ - return is_in_cmdlist(&main_cmds, s) || - is_in_cmdlist(&other_cmds, s); -} - static const char *cmd_to_page(const char *perf_cmd) { char *s; @@ -446,7 +440,6 @@ int cmd_help(int argc, const char **argv) "perf help [--all] [--man|--web|--info] [command]", NULL }; - const char *alias; int rc; load_command_list("perf-", &main_cmds, &other_cmds); @@ -472,12 +465,6 @@ int cmd_help(int argc, const char **argv) return 0; } - alias = alias_lookup(argv[0]); - if (alias && !is_perf_command(argv[0])) { - printf("`perf %s' is aliased to `%s'\n", argv[0], alias); - return 0; - } - switch (help_format) { case HELP_FORMAT_MAN: rc = show_man_page(argv[0]); diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 4b283d18e158..9217f2227f3d 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -267,71 +267,6 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) return handled; } -static int handle_alias(int *argcp, const char ***argv) -{ - int envchanged = 0, ret = 0, saved_errno = errno; - int count, option_count; - const char **new_argv; - const char *alias_command; - char *alias_string; - - alias_command = (*argv)[0]; - alias_string = alias_lookup(alias_command); - if (alias_string) { - if (alias_string[0] == '!') { - if (*argcp > 1) { - struct strbuf buf; - - if (strbuf_init(&buf, PATH_MAX) < 0 || - strbuf_addstr(&buf, alias_string) < 0 || - sq_quote_argv(&buf, (*argv) + 1, - PATH_MAX) < 0) - die("Failed to allocate memory."); - free(alias_string); - alias_string = buf.buf; - } - ret = system(alias_string + 1); - if (ret >= 0 && WIFEXITED(ret) && - WEXITSTATUS(ret) != 127) - exit(WEXITSTATUS(ret)); - die("Failed to run '%s' when expanding alias '%s'", - alias_string + 1, alias_command); - } - count = split_cmdline(alias_string, &new_argv); - if (count < 0) - die("Bad alias.%s string", alias_command); - option_count = handle_options(&new_argv, &count, &envchanged); - if (envchanged) - die("alias '%s' changes environment variables\n" - "You can use '!perf' in the alias to do this.", - alias_command); - memmove(new_argv - option_count, new_argv, - count * sizeof(char *)); - new_argv -= option_count; - - if (count < 1) - die("empty alias for %s", alias_command); - - if (!strcmp(alias_command, new_argv[0])) - die("recursive alias: %s", alias_command); - - new_argv = realloc(new_argv, sizeof(char *) * - (count + *argcp + 1)); - /* insert after command name */ - memcpy(new_argv + count, *argv + 1, sizeof(char *) * *argcp); - new_argv[count + *argcp] = NULL; - - *argv = new_argv; - *argcp += count - 1; - - ret = 1; - } - - errno = saved_errno; - - return ret; -} - #define RUN_SETUP (1<<0) #define USE_PAGER (1<<1) @@ -455,25 +390,12 @@ do_die: static int run_argv(int *argcp, const char ***argv) { - int done_alias = 0; - - while (1) { - /* See if it's an internal command */ - handle_internal_command(*argcp, *argv); - - /* .. then try the external ones */ - execv_dashed_external(*argv); + /* See if it's an internal command */ + handle_internal_command(*argcp, *argv); - /* It could be an alias -- this works around the insanity - * of overriding "perf log" with "perf show" by having - * alias.log = show - */ - if (done_alias || !handle_alias(argcp, argv)) - break; - done_alias = 1; - } - - return done_alias; + /* .. then try the external ones */ + execv_dashed_external(*argv); + return 0; } static void pthread__block_sigwinch(void) @@ -606,17 +528,12 @@ int main(int argc, const char **argv) while (1) { static int done_help; - int was_alias = run_argv(&argc, &argv); + + run_argv(&argc, &argv); if (errno != ENOENT) break; - if (was_alias) { - fprintf(stderr, "Expansion of alias '%s' failed; " - "'%s' is not a perf-command\n", - cmd, argv[0]); - goto out; - } if (!done_help) { cmd = argv[0] = help_unknown_cmd(cmd); done_help = 1; diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 2ae92da613dd..5c0ea11a8f0a 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -1,4 +1,3 @@ -libperf-y += alias.o libperf-y += annotate.o libperf-y += block-range.o libperf-y += build-id.o diff --git a/tools/perf/util/alias.c b/tools/perf/util/alias.c deleted file mode 100644 index 6455471d9cd1..000000000000 --- a/tools/perf/util/alias.c +++ /dev/null @@ -1,78 +0,0 @@ -#include "cache.h" -#include "util.h" -#include "config.h" - -static const char *alias_key; -static char *alias_val; - -static int alias_lookup_cb(const char *k, const char *v, - void *cb __maybe_unused) -{ - if (!prefixcmp(k, "alias.") && !strcmp(k+6, alias_key)) { - if (!v) - return config_error_nonbool(k); - alias_val = strdup(v); - return 0; - } - return 0; -} - -char *alias_lookup(const char *alias) -{ - alias_key = alias; - alias_val = NULL; - perf_config(alias_lookup_cb, NULL); - return alias_val; -} - -int split_cmdline(char *cmdline, const char ***argv) -{ - int src, dst, count = 0, size = 16; - char quoted = 0; - - *argv = malloc(sizeof(char*) * size); - - /* split alias_string */ - (*argv)[count++] = cmdline; - for (src = dst = 0; cmdline[src];) { - char c = cmdline[src]; - if (!quoted && isspace(c)) { - cmdline[dst++] = 0; - while (cmdline[++src] - && isspace(cmdline[src])) - ; /* skip */ - if (count >= size) { - size += 16; - *argv = realloc(*argv, sizeof(char*) * size); - } - (*argv)[count++] = cmdline + dst; - } else if (!quoted && (c == '\'' || c == '"')) { - quoted = c; - src++; - } else if (c == quoted) { - quoted = 0; - src++; - } else { - if (c == '\\' && quoted != '\'') { - src++; - c = cmdline[src]; - if (!c) { - zfree(argv); - return error("cmdline ends with \\"); - } - } - cmdline[dst++] = c; - src++; - } - } - - cmdline[dst] = 0; - - if (quoted) { - zfree(argv); - return error("unclosed quote"); - } - - return count; -} - diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 512c0c83fbc6..0328f297a748 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -15,7 +15,6 @@ #define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR" #define PERF_PAGER_ENVIRONMENT "PERF_PAGER" -char *alias_lookup(const char *alias); int split_cmdline(char *cmdline, const char ***argv); #define alloc_nr(x) (((x)+16)*3/2) diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c index 2821f8d77e52..34201440ac03 100644 --- a/tools/perf/util/help-unknown-cmd.c +++ b/tools/perf/util/help-unknown-cmd.c @@ -6,16 +6,12 @@ #include "levenshtein.h" static int autocorrect; -static struct cmdnames aliases; static int perf_unknown_cmd_config(const char *var, const char *value, void *cb __maybe_unused) { if (!strcmp(var, "help.autocorrect")) autocorrect = perf_config_int(var,value); - /* Also use aliases for command lookup */ - if (!prefixcmp(var, "alias.")) - add_cmdname(&aliases, var + 6, strlen(var + 6)); return 0; } @@ -59,14 +55,12 @@ const char *help_unknown_cmd(const char *cmd) memset(&main_cmds, 0, sizeof(main_cmds)); memset(&other_cmds, 0, sizeof(main_cmds)); - memset(&aliases, 0, sizeof(aliases)); perf_config(perf_unknown_cmd_config, NULL); load_command_list("perf-", &main_cmds, &other_cmds); - if (add_cmd_list(&main_cmds, &aliases) < 0 || - add_cmd_list(&main_cmds, &other_cmds) < 0) { + if (add_cmd_list(&main_cmds, &other_cmds) < 0) { fprintf(stderr, "ERROR: Failed to allocate command list for unknown command.\n"); goto end; } -- cgit v1.2.3 From 2d01ecc580405169ecd6e3880617bc61cf482fdd Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 28 Mar 2017 15:17:52 +0530 Subject: perf/sdt/x86: Add renaming logic for (missing) 8 bit registers I found couple of events using al, bl, cl and dl registers for argument. These are not directly accepted by uprobe_events and thus needs to be mapped to ax, bx, cx and dx respectively. Few ex, /usr/bin/qemu-system-s390x css_adapter_interrupt: 1@%bl css_chpid_add: 1@%cl 1@%sil 1@%dl dma_bdrv_io: 8@%rbx 8@%rbp -8@%r14 1@%al /usr/bin/postgres buffer__read__done: ... -1@-bash -1@%al buffer__read__start: ... -1@%al I don't find any sdt events using ah, bh,... registers. But I also don't see any reason to not use them, so there might be rare events using these registers, and if so, perf should have a renaming logic for them too. Signed-off-by: Ravi Bangoria Acked-by: Masami Hiramatsu Cc: Alexander Shishkin Cc: Alexis Berlemont Cc: Hemant Kumar Cc: Michael Ellerman Cc: Naveen N. Rao Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170328094754.3156-2-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/perf_regs.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c index d8a8dcf761f7..fa1fd196837d 100644 --- a/tools/perf/arch/x86/util/perf_regs.c +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -40,12 +40,20 @@ struct sdt_name_reg { static const struct sdt_name_reg sdt_reg_renamings[] = { SDT_NAME_REG(eax, ax), SDT_NAME_REG(rax, ax), + SDT_NAME_REG(al, ax), + SDT_NAME_REG(ah, ax), SDT_NAME_REG(ebx, bx), SDT_NAME_REG(rbx, bx), + SDT_NAME_REG(bl, bx), + SDT_NAME_REG(bh, bx), SDT_NAME_REG(ecx, cx), SDT_NAME_REG(rcx, cx), + SDT_NAME_REG(cl, cx), + SDT_NAME_REG(ch, cx), SDT_NAME_REG(edx, dx), SDT_NAME_REG(rdx, dx), + SDT_NAME_REG(dl, dx), + SDT_NAME_REG(dh, dx), SDT_NAME_REG(esi, si), SDT_NAME_REG(rsi, si), SDT_NAME_REG(sil, si), -- cgit v1.2.3 From d451a205da29c5485ca634367154e83997571aa0 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 28 Mar 2017 15:17:53 +0530 Subject: perf/sdt/x86: Move OP parser to tools/perf/arch/x86/ SDT marker argument is in N@OP format. N is the size of argument and OP is the actual assembly operand. OP is arch dependent component and hence it's parsing logic also should be placed under tools/perf/arch/. Signed-off-by: Ravi Bangoria Acked-by: Masami Hiramatsu Cc: Alexander Shishkin Cc: Alexis Berlemont Cc: Hemant Kumar Cc: Michael Ellerman Cc: Naveen N. Rao Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170328094754.3156-3-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/perf_regs.c | 179 ++++++++++++++++++++++++++++------- tools/perf/util/perf_regs.c | 6 +- tools/perf/util/perf_regs.h | 11 ++- tools/perf/util/probe-file.c | 132 ++++++++------------------ 4 files changed, 194 insertions(+), 134 deletions(-) diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c index fa1fd196837d..3bf3548c5e2d 100644 --- a/tools/perf/arch/x86/util/perf_regs.c +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -1,8 +1,10 @@ #include +#include #include "../../perf.h" #include "../../util/util.h" #include "../../util/perf_regs.h" +#include "../../util/debug.h" const struct sample_reg sample_reg_masks[] = { SMPL_REG(AX, PERF_REG_X86_AX), @@ -37,7 +39,7 @@ struct sdt_name_reg { #define SDT_NAME_REG(n, m) {.sdt_name = "%" #n, .uprobe_name = "%" #m} #define SDT_NAME_REG_END {.sdt_name = NULL, .uprobe_name = NULL} -static const struct sdt_name_reg sdt_reg_renamings[] = { +static const struct sdt_name_reg sdt_reg_tbl[] = { SDT_NAME_REG(eax, ax), SDT_NAME_REG(rax, ax), SDT_NAME_REG(al, ax), @@ -95,45 +97,158 @@ static const struct sdt_name_reg sdt_reg_renamings[] = { SDT_NAME_REG_END, }; -int sdt_rename_register(char **pdesc, char *old_name) +/* + * Perf only supports OP which is in +/-NUM(REG) form. + * Here plus-minus sign, NUM and parenthesis are optional, + * only REG is mandatory. + * + * SDT events also supports indirect addressing mode with a + * symbol as offset, scaled mode and constants in OP. But + * perf does not support them yet. Below are few examples. + * + * OP with scaled mode: + * (%rax,%rsi,8) + * 10(%ras,%rsi,8) + * + * OP with indirect addressing mode: + * check_action(%rip) + * mp_+52(%rip) + * 44+mp_(%rip) + * + * OP with constant values: + * $0 + * $123 + * $-1 + */ +#define SDT_OP_REGEX "^([+\\-]?)([0-9]*)(\\(?)(%[a-z][a-z0-9]+)(\\)?)$" + +static regex_t sdt_op_regex; + +static int sdt_init_op_regex(void) { - const struct sdt_name_reg *rnames = sdt_reg_renamings; - char *new_desc, *old_desc = *pdesc; - size_t prefix_len, sdt_len, uprobe_len, old_desc_len, offset; - int ret = -1; - - while (ret != 0 && rnames->sdt_name != NULL) { - sdt_len = strlen(rnames->sdt_name); - ret = strncmp(old_name, rnames->sdt_name, sdt_len); - rnames += !!ret; - } + static int initialized; + int ret = 0; - if (rnames->sdt_name == NULL) + if (initialized) return 0; - sdt_len = strlen(rnames->sdt_name); - uprobe_len = strlen(rnames->uprobe_name); - old_desc_len = strlen(old_desc) + 1; + ret = regcomp(&sdt_op_regex, SDT_OP_REGEX, REG_EXTENDED); + if (ret < 0) { + pr_debug4("Regex compilation error.\n"); + return ret; + } - new_desc = zalloc(old_desc_len + uprobe_len - sdt_len); - if (new_desc == NULL) - return -1; + initialized = 1; + return 0; +} - /* Copy the chars before the register name (at least '%') */ - prefix_len = old_name - old_desc; - memcpy(new_desc, old_desc, prefix_len); +/* + * Max x86 register name length is 5(ex: %r15d). So, 6th char + * should always contain NULL. This helps to find register name + * length using strlen, insted of maintaing one more variable. + */ +#define SDT_REG_NAME_SIZE 6 - /* Copy the new register name */ - memcpy(new_desc + prefix_len, rnames->uprobe_name, uprobe_len); +/* + * The uprobe parser does not support all gas register names; + * so, we have to replace them (ex. for x86_64: %rax -> %ax). + * Note: If register does not require renaming, just copy + * paste as it is, but don't leave it empty. + */ +static void sdt_rename_register(char *sdt_reg, int sdt_len, char *uprobe_reg) +{ + int i = 0; - /* Copy the chars after the register name (if need be) */ - offset = prefix_len + sdt_len; - if (offset < old_desc_len) - memcpy(new_desc + prefix_len + uprobe_len, - old_desc + offset, old_desc_len - offset); + for (i = 0; sdt_reg_tbl[i].sdt_name != NULL; i++) { + if (!strncmp(sdt_reg_tbl[i].sdt_name, sdt_reg, sdt_len)) { + strcpy(uprobe_reg, sdt_reg_tbl[i].uprobe_name); + return; + } + } - free(old_desc); - *pdesc = new_desc; + strncpy(uprobe_reg, sdt_reg, sdt_len); +} - return 0; +int arch_sdt_arg_parse_op(char *old_op, char **new_op) +{ + char new_reg[SDT_REG_NAME_SIZE] = {0}; + int new_len = 0, ret; + /* + * rm[0]: +/-NUM(REG) + * rm[1]: +/- + * rm[2]: NUM + * rm[3]: ( + * rm[4]: REG + * rm[5]: ) + */ + regmatch_t rm[6]; + /* + * Max prefix length is 2 as it may contains sign(+/-) + * and displacement 0 (Both sign and displacement 0 are + * optional so it may be empty). Use one more character + * to hold last NULL so that strlen can be used to find + * prefix length, instead of maintaing one more variable. + */ + char prefix[3] = {0}; + + ret = sdt_init_op_regex(); + if (ret < 0) + return ret; + + /* + * If unsupported OR does not match with regex OR + * register name too long, skip it. + */ + if (strchr(old_op, ',') || strchr(old_op, '$') || + regexec(&sdt_op_regex, old_op, 6, rm, 0) || + rm[4].rm_eo - rm[4].rm_so > SDT_REG_NAME_SIZE) { + pr_debug4("Skipping unsupported SDT argument: %s\n", old_op); + return SDT_ARG_SKIP; + } + + /* + * Prepare prefix. + * If SDT OP has parenthesis but does not provide + * displacement, add 0 for displacement. + * SDT Uprobe Prefix + * ----------------------------- + * +24(%rdi) +24(%di) + + * 24(%rdi) +24(%di) + + * %rdi %di + * (%rdi) +0(%di) +0 + * -80(%rbx) -80(%bx) - + */ + if (rm[3].rm_so != rm[3].rm_eo) { + if (rm[1].rm_so != rm[1].rm_eo) + prefix[0] = *(old_op + rm[1].rm_so); + else if (rm[2].rm_so != rm[2].rm_eo) + prefix[0] = '+'; + else + strncpy(prefix, "+0", 2); + } + + /* Rename register */ + sdt_rename_register(old_op + rm[4].rm_so, rm[4].rm_eo - rm[4].rm_so, + new_reg); + + /* Prepare final OP which should be valid for uprobe_events */ + new_len = strlen(prefix) + + (rm[2].rm_eo - rm[2].rm_so) + + (rm[3].rm_eo - rm[3].rm_so) + + strlen(new_reg) + + (rm[5].rm_eo - rm[5].rm_so) + + 1; /* NULL */ + + *new_op = zalloc(new_len); + if (!*new_op) + return -ENOMEM; + + scnprintf(*new_op, new_len, "%.*s%.*s%.*s%.*s%.*s", + strlen(prefix), prefix, + (int)(rm[2].rm_eo - rm[2].rm_so), old_op + rm[2].rm_so, + (int)(rm[3].rm_eo - rm[3].rm_so), old_op + rm[3].rm_so, + strlen(new_reg), new_reg, + (int)(rm[5].rm_eo - rm[5].rm_so), old_op + rm[5].rm_so); + + return SDT_ARG_VALID; } diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index a37e5934aa2a..b2ae039eff85 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -6,10 +6,10 @@ const struct sample_reg __weak sample_reg_masks[] = { SMPL_REG_END }; -int __weak sdt_rename_register(char **pdesc __maybe_unused, - char *old_name __maybe_unused) +int __weak arch_sdt_arg_parse_op(char *old_op __maybe_unused, + char **new_op __maybe_unused) { - return 0; + return SDT_ARG_SKIP; } #ifdef HAVE_PERF_REGS_SUPPORT diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index 7544a157e159..32b37d19dcc3 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -15,11 +15,12 @@ struct sample_reg { extern const struct sample_reg sample_reg_masks[]; -/* - * The table sdt_reg_renamings is used for adjusting gcc/gas-generated - * registers before filling the uprobe tracer interface. - */ -int sdt_rename_register(char **pdesc, char *old_name); +enum { + SDT_ARG_VALID = 0, + SDT_ARG_SKIP, +}; + +int arch_sdt_arg_parse_op(char *old_op, char **new_op); #ifdef HAVE_PERF_REGS_SUPPORT #include diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index d741634cbfc0..88714dec8912 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -694,10 +694,29 @@ static const char * const type_to_suffix[] = { "", ":u8", ":u16", "", ":u32", "", "", "", ":u64" }; +/* + * Isolate the string number and convert it into a decimal value; + * this will be an index to get suffix of the uprobe name (defining + * the type) + */ +static int sdt_arg_parse_size(char *n_ptr, const char **suffix) +{ + long type_idx; + + type_idx = strtol(n_ptr, NULL, 10); + if (type_idx < -8 || type_idx > 8) { + pr_debug4("Failed to get a valid sdt type\n"); + return -1; + } + + *suffix = type_to_suffix[type_idx + 8]; + return 0; +} + static int synthesize_sdt_probe_arg(struct strbuf *buf, int i, const char *arg) { - char *tmp, *desc = strdup(arg); - const char *prefix = "", *suffix = ""; + char *op, *desc = strdup(arg), *new_op = NULL; + const char *suffix = ""; int ret = -1; if (desc == NULL) { @@ -705,112 +724,37 @@ static int synthesize_sdt_probe_arg(struct strbuf *buf, int i, const char *arg) return ret; } - tmp = strchr(desc, '@'); - if (tmp) { - long type_idx; - /* - * Isolate the string number and convert it into a - * binary value; this will be an index to get suffix - * of the uprobe name (defining the type) - */ - tmp[0] = '\0'; - type_idx = strtol(desc, NULL, 10); - /* Check that the conversion went OK */ - if (type_idx == LONG_MIN || type_idx == LONG_MAX) { - pr_debug4("Failed to parse sdt type\n"); - goto error; - } - /* Check that the converted value is OK */ - if (type_idx < -8 || type_idx > 8) { - pr_debug4("Failed to get a valid sdt type\n"); - goto error; - } - suffix = type_to_suffix[type_idx + 8]; - /* Get rid of the sdt prefix which is now useless */ - tmp++; - memmove(desc, tmp, strlen(tmp) + 1); - } - /* - * The uprobe tracer format does not support all the - * addressing modes (notably: in x86 the scaled mode); so, we - * detect ',' characters, if there is just one, there is no - * use converting the sdt arg into a uprobe one. + * Argument is in N@OP format. N is size of the argument and OP is + * the actual assembly operand. N can be omitted; in that case + * argument is just OP(without @). */ - if (strchr(desc, ',')) { - pr_debug4("Skipping unsupported SDT argument; %s\n", desc); - goto out; - } + op = strchr(desc, '@'); + if (op) { + op[0] = '\0'; + op++; - /* - * If the argument addressing mode is indirect, we must check - * a few things... - */ - tmp = strchr(desc, '('); - if (tmp) { - int j; - - /* - * ...if the addressing mode is indirect with a - * positive offset (ex.: "1608(%ax)"), we need to add - * a '+' prefix so as to be compliant with uprobe - * format. - */ - if (desc[0] != '+' && desc[0] != '-') - prefix = "+"; - - /* - * ...or if the addressing mode is indirect with a symbol - * as offset, the argument will not be supported by - * the uprobe tracer format; so, let's skip this one. - */ - for (j = 0; j < tmp - desc; j++) { - if (desc[j] != '+' && desc[j] != '-' && - !isdigit(desc[j])) { - pr_debug4("Skipping unsupported SDT argument; " - "%s\n", desc); - goto out; - } - } + if (sdt_arg_parse_size(desc, &suffix)) + goto error; + } else { + op = desc; } - /* - * The uprobe tracer format does not support constants; if we - * find one in the current argument, let's skip the argument. - */ - if (strchr(desc, '$')) { - pr_debug4("Skipping unsupported SDT argument; %s\n", desc); - goto out; - } + ret = arch_sdt_arg_parse_op(op, &new_op); - /* - * The uprobe parser does not support all gas register names; - * so, we have to replace them (ex. for x86_64: %rax -> %ax); - * the loop below looks for the register names (starting with - * a '%' and tries to perform the needed renamings. - */ - tmp = strchr(desc, '%'); - while (tmp) { - size_t offset = tmp - desc; + if (ret < 0) + goto error; - ret = sdt_rename_register(&desc, desc + offset); + if (ret == SDT_ARG_VALID) { + ret = strbuf_addf(buf, " arg%d=%s%s", i + 1, new_op, suffix); if (ret < 0) goto error; - - /* - * The desc pointer might have changed; so, let's not - * try to reuse tmp for next lookup - */ - tmp = strchr(desc + offset + 1, '%'); } - if (strbuf_addf(buf, " arg%d=%s%s%s", i + 1, prefix, desc, suffix) < 0) - goto error; - -out: ret = 0; error: free(desc); + free(new_op); return ret; } -- cgit v1.2.3 From c1dfcfad5879df7f41c436d887aea509dadd516d Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 9 Mar 2017 16:06:26 +0800 Subject: perf report: Drop cycles 0 for LBR print For some platforms, for example Broadwell, it doesn't support cycles for LBR. But the perf always prints cycles:0, it's not necessary. The patch refactors the LBR info print code and drops the cycles:0. For example: perf report --branch-history --no-children --stdio On Broadwell: --0.91%--__random_r random_r.c:394 (iterations:2) __random_r random_r.c:360 (predicted:0.0%) __random_r random_r.c:380 (predicted:0.0%) __random_r random_r.c:357 On Skylake: --1.07%--main div.c:39 (predicted:52.4% cycles:1 iterations:17) main div.c:44 (predicted:52.4% cycles:1) main div.c:42 (cycles:2) compute_flag div.c:28 (cycles:2) compute_flag div.c:27 (cycles:1) rand rand.c:28 (cycles:1) rand rand.c:28 (cycles:1) __random random.c:298 (cycles:1) __random random.c:297 (cycles:1) __random random.c:295 (cycles:1) __random random.c:295 (cycles:1) __random random.c:295 (cycles:1) Signed-off-by: Yao Jin Reviewed-by: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1489046786-10061-1-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 111 +++++++++++++++++++++++++++++--------------- 1 file changed, 74 insertions(+), 37 deletions(-) diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index d78776a20e80..3cea1fb5404b 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1105,63 +1105,100 @@ int callchain_branch_counts(struct callchain_root *root, cycles_count); } -static int callchain_counts_printf(FILE *fp, char *bf, int bfsize, - u64 branch_count, u64 predicted_count, - u64 abort_count, u64 cycles_count, - u64 iter_count, u64 samples_count) +static int counts_str_build(char *bf, int bfsize, + u64 branch_count, u64 predicted_count, + u64 abort_count, u64 cycles_count, + u64 iter_count, u64 samples_count) { double predicted_percent = 0.0; const char *null_str = ""; char iter_str[32]; - char *str; - u64 cycles = 0; - - if (branch_count == 0) { - if (fp) - return fprintf(fp, " (calltrace)"); + char cycle_str[32]; + char *istr, *cstr; + u64 cycles; + if (branch_count == 0) return scnprintf(bf, bfsize, " (calltrace)"); - } + + cycles = cycles_count / branch_count; if (iter_count && samples_count) { - scnprintf(iter_str, sizeof(iter_str), - ", iterations:%" PRId64 "", - iter_count / samples_count); - str = iter_str; + if (cycles > 0) + scnprintf(iter_str, sizeof(iter_str), + " iterations:%" PRId64 "", + iter_count / samples_count); + else + scnprintf(iter_str, sizeof(iter_str), + "iterations:%" PRId64 "", + iter_count / samples_count); + istr = iter_str; + } else + istr = (char *)null_str; + + if (cycles > 0) { + scnprintf(cycle_str, sizeof(cycle_str), + "cycles:%" PRId64 "", cycles); + cstr = cycle_str; } else - str = (char *)null_str; + cstr = (char *)null_str; predicted_percent = predicted_count * 100.0 / branch_count; - cycles = cycles_count / branch_count; - if ((predicted_percent >= 100.0) && (abort_count == 0)) { - if (fp) - return fprintf(fp, " (cycles:%" PRId64 "%s)", - cycles, str); + if ((predicted_count == branch_count) && (abort_count == 0)) { + if ((cycles > 0) || (istr != (char *)null_str)) + return scnprintf(bf, bfsize, " (%s%s)", cstr, istr); + else + return scnprintf(bf, bfsize, "%s", (char *)null_str); + } - return scnprintf(bf, bfsize, " (cycles:%" PRId64 "%s)", - cycles, str); + if ((predicted_count < branch_count) && (abort_count == 0)) { + if ((cycles > 0) || (istr != (char *)null_str)) + return scnprintf(bf, bfsize, + " (predicted:%.1f%% %s%s)", + predicted_percent, cstr, istr); + else { + return scnprintf(bf, bfsize, + " (predicted:%.1f%%)", + predicted_percent); + } } - if ((predicted_percent < 100.0) && (abort_count == 0)) { - if (fp) - return fprintf(fp, - " (predicted:%.1f%%, cycles:%" PRId64 "%s)", - predicted_percent, cycles, str); + if ((predicted_count == branch_count) && (abort_count > 0)) { + if ((cycles > 0) || (istr != (char *)null_str)) + return scnprintf(bf, bfsize, + " (abort:%" PRId64 " %s%s)", + abort_count, cstr, istr); + else + return scnprintf(bf, bfsize, + " (abort:%" PRId64 ")", + abort_count); + } + if ((cycles > 0) || (istr != (char *)null_str)) return scnprintf(bf, bfsize, - " (predicted:%.1f%%, cycles:%" PRId64 "%s)", - predicted_percent, cycles, str); - } + " (predicted:%.1f%% abort:%" PRId64 " %s%s)", + predicted_percent, abort_count, cstr, istr); + + return scnprintf(bf, bfsize, + " (predicted:%.1f%% abort:%" PRId64 ")", + predicted_percent, abort_count); +} + +static int callchain_counts_printf(FILE *fp, char *bf, int bfsize, + u64 branch_count, u64 predicted_count, + u64 abort_count, u64 cycles_count, + u64 iter_count, u64 samples_count) +{ + char str[128]; + + counts_str_build(str, sizeof(str), branch_count, + predicted_count, abort_count, cycles_count, + iter_count, samples_count); if (fp) - return fprintf(fp, - " (predicted:%.1f%%, abort:%" PRId64 ", cycles:%" PRId64 "%s)", - predicted_percent, abort_count, cycles, str); + return fprintf(fp, "%s", str); - return scnprintf(bf, bfsize, - " (predicted:%.1f%%, abort:%" PRId64 ", cycles:%" PRId64 "%s)", - predicted_percent, abort_count, cycles, str); + return scnprintf(bf, bfsize, "%s", str); } int callchain_list_counts__printf_value(struct callchain_node *node, -- cgit v1.2.3 From fd2b2975149f5f7099693027cece81b16842964a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 29 Mar 2017 16:37:51 -0300 Subject: perf trace: Handle unpaired raw_syscalls:sys_exit event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Which may happen when we start a tracing session and a thread is waiting for something like "poll" to return, in which case we better print "?" both for the syscall entry timestamp and for the duration. E.g.: Tracing existing mutt session: # perf trace -p `pidof mutt` ? ( ? ): mutt/17135 ... [continued]: poll()) = 1 0.027 ( 0.013 ms): mutt/17135 read(buf: 0x7ffcb3c42cef, count: 1) = 1 0.047 ( 0.008 ms): mutt/17135 poll(ufds: 0x7ffcb3c42c50, nfds: 1, timeout_msecs: 1000) = 1 0.059 ( 0.008 ms): mutt/17135 read(buf: 0x7ffcb3c42cef, count: 1) = 1 Before it would print a large number because we'd do: ttrace->entry_time - trace->base_time And entry_time would be 0, while base_time would be the timestamp for the first event 'perf trace' reads, oops. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Luis Claudio Gonçalves Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-wbcb93ofva2qdjd5ltn5eeqq@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 43 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index c88f9f215e6f..7379792a6504 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -821,12 +821,21 @@ struct syscall { void **arg_parm; }; -static size_t fprintf_duration(unsigned long t, FILE *fp) +/* + * We need to have this 'calculated' boolean because in some cases we really + * don't know what is the duration of a syscall, for instance, when we start + * a session and some threads are waiting for a syscall to finish, say 'poll', + * in which case all we can do is to print "( ? ) for duration and for the + * start timestamp. + */ +static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp) { double duration = (double)t / NSEC_PER_MSEC; size_t printed = fprintf(fp, "("); - if (duration >= 1.0) + if (!calculated) + printed += fprintf(fp, " ? "); + else if (duration >= 1.0) printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration); else if (duration >= 0.01) printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration); @@ -1028,13 +1037,27 @@ static bool trace__filter_duration(struct trace *trace, double t) return t < (trace->duration_filter * NSEC_PER_MSEC); } -static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) +static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) { double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC; return fprintf(fp, "%10.3f ", ts); } +/* + * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are + * using ttrace->entry_time for a thread that receives a sys_exit without + * first having received a sys_enter ("poll" issued before tracing session + * starts, lost sys_enter exit due to ring buffer overflow). + */ +static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) +{ + if (tstamp > 0) + return __trace__fprintf_tstamp(trace, tstamp, fp); + + return fprintf(fp, " ? "); +} + static bool done = false; static bool interrupted = false; @@ -1045,10 +1068,10 @@ static void sig_handler(int sig) } static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread, - u64 duration, u64 tstamp, FILE *fp) + u64 duration, bool duration_calculated, u64 tstamp, FILE *fp) { size_t printed = trace__fprintf_tstamp(trace, tstamp, fp); - printed += fprintf_duration(duration, fp); + printed += fprintf_duration(duration, duration_calculated, fp); if (trace->multiple_threads) { if (trace->show_comm) @@ -1450,7 +1473,7 @@ static int trace__printf_interrupted_entry(struct trace *trace, struct perf_samp duration = sample->time - ttrace->entry_time; - printed = trace__fprintf_entry_head(trace, trace->current, duration, ttrace->entry_time, trace->output); + printed = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output); printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str); ttrace->entry_pending = false; @@ -1497,7 +1520,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, if (sc->is_exit) { if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) { - trace__fprintf_entry_head(trace, thread, 1, ttrace->entry_time, trace->output); + trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output); fprintf(trace->output, "%-70s)\n", ttrace->entry_str); } } else { @@ -1545,6 +1568,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, { long ret; u64 duration = 0; + bool duration_calculated = false; struct thread *thread; int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0; struct syscall *sc = trace__syscall_info(trace, evsel, id); @@ -1573,6 +1597,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, duration = sample->time - ttrace->entry_time; if (trace__filter_duration(trace, duration)) goto out; + duration_calculated = true; } else if (trace->duration_filter) goto out; @@ -1588,7 +1613,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, if (trace->summary_only) goto out; - trace__fprintf_entry_head(trace, thread, duration, ttrace->entry_time, trace->output); + trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output); if (ttrace->entry_pending) { fprintf(trace->output, "%-70s", ttrace->entry_str); @@ -1855,7 +1880,7 @@ static int trace__pgfault(struct trace *trace, thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al); - trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output); + trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output); fprintf(trace->output, "%sfault [", evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ? -- cgit v1.2.3 From d35869ba348d3f1ff3e6d8214fe0f674bb0e404e Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Mon, 6 Feb 2017 16:41:40 +0200 Subject: perf/x86/intel/pt: Allow the disabling of branch tracing Now that Intel PT supports more types of trace content than just branch tracing, it may be useful to allow the user to disable branch tracing when it is not needed. The special case is BDW, where not setting BranchEn is not supported. This is slightly trickier than necessary, because up to this moment the driver has been setting BranchEn automatically and the userspace assumes as much. Instead of reversing the semantics of BranchEn, we introduce a 'passthrough' bit, which will forego the default and allow the user to set BranchEn to their heart's content. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: vince@deater.net Link: http://lkml.kernel.org/r/20170206144140.14402-1-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/pt.c | 74 ++++++++++++++++++++++++++++++++++++++++++++-- arch/x86/events/intel/pt.h | 1 + 2 files changed, 73 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 354e9ff2978c..ae8324d65e61 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "../perf_event.h" #include "pt.h" @@ -98,6 +99,7 @@ static struct attribute_group pt_cap_group = { .name = "caps", }; +PMU_FORMAT_ATTR(pt, "config:0" ); PMU_FORMAT_ATTR(cyc, "config:1" ); PMU_FORMAT_ATTR(pwr_evt, "config:4" ); PMU_FORMAT_ATTR(fup_on_ptw, "config:5" ); @@ -105,11 +107,13 @@ PMU_FORMAT_ATTR(mtc, "config:9" ); PMU_FORMAT_ATTR(tsc, "config:10" ); PMU_FORMAT_ATTR(noretcomp, "config:11" ); PMU_FORMAT_ATTR(ptw, "config:12" ); +PMU_FORMAT_ATTR(branch, "config:13" ); PMU_FORMAT_ATTR(mtc_period, "config:14-17" ); PMU_FORMAT_ATTR(cyc_thresh, "config:19-22" ); PMU_FORMAT_ATTR(psb_period, "config:24-27" ); static struct attribute *pt_formats_attr[] = { + &format_attr_pt.attr, &format_attr_cyc.attr, &format_attr_pwr_evt.attr, &format_attr_fup_on_ptw.attr, @@ -117,6 +121,7 @@ static struct attribute *pt_formats_attr[] = { &format_attr_tsc.attr, &format_attr_noretcomp.attr, &format_attr_ptw.attr, + &format_attr_branch.attr, &format_attr_mtc_period.attr, &format_attr_cyc_thresh.attr, &format_attr_psb_period.attr, @@ -197,6 +202,19 @@ static int __init pt_pmu_hw_init(void) pt_pmu.tsc_art_den = eax; } + /* model-specific quirks */ + switch (boot_cpu_data.x86_model) { + case INTEL_FAM6_BROADWELL_CORE: + case INTEL_FAM6_BROADWELL_XEON_D: + case INTEL_FAM6_BROADWELL_GT3E: + case INTEL_FAM6_BROADWELL_X: + /* not setting BRANCH_EN will #GP, erratum BDM106 */ + pt_pmu.branch_en_always_on = true; + break; + default: + break; + } + if (boot_cpu_has(X86_FEATURE_VMX)) { /* * Intel SDM, 36.5 "Tracing post-VMXON" says that @@ -263,8 +281,20 @@ fail: #define RTIT_CTL_PTW (RTIT_CTL_PTW_EN | \ RTIT_CTL_FUP_ON_PTW) -#define PT_CONFIG_MASK (RTIT_CTL_TSC_EN | \ +/* + * Bit 0 (TraceEn) in the attr.config is meaningless as the + * corresponding bit in the RTIT_CTL can only be controlled + * by the driver; therefore, repurpose it to mean: pass + * through the bit that was previously assumed to be always + * on for PT, thereby allowing the user to *not* set it if + * they so wish. See also pt_event_valid() and pt_config(). + */ +#define RTIT_CTL_PASSTHROUGH RTIT_CTL_TRACEEN + +#define PT_CONFIG_MASK (RTIT_CTL_TRACEEN | \ + RTIT_CTL_TSC_EN | \ RTIT_CTL_DISRETC | \ + RTIT_CTL_BRANCH_EN | \ RTIT_CTL_CYC_PSB | \ RTIT_CTL_MTC | \ RTIT_CTL_PWR_EVT_EN | \ @@ -332,6 +362,33 @@ static bool pt_event_valid(struct perf_event *event) return false; } + /* + * Setting bit 0 (TraceEn in RTIT_CTL MSR) in the attr.config + * clears the assomption that BranchEn must always be enabled, + * as was the case with the first implementation of PT. + * If this bit is not set, the legacy behavior is preserved + * for compatibility with the older userspace. + * + * Re-using bit 0 for this purpose is fine because it is never + * directly set by the user; previous attempts at setting it in + * the attr.config resulted in -EINVAL. + */ + if (config & RTIT_CTL_PASSTHROUGH) { + /* + * Disallow not setting BRANCH_EN where BRANCH_EN is + * always required. + */ + if (pt_pmu.branch_en_always_on && + !(config & RTIT_CTL_BRANCH_EN)) + return false; + } else { + /* + * Disallow BRANCH_EN without the PASSTHROUGH. + */ + if (config & RTIT_CTL_BRANCH_EN) + return false; + } + return true; } @@ -420,7 +477,20 @@ static void pt_config(struct perf_event *event) } reg = pt_config_filters(event); - reg |= RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN; + reg |= RTIT_CTL_TOPA | RTIT_CTL_TRACEEN; + + /* + * Previously, we had BRANCH_EN on by default, but now that PT has + * grown features outside of branch tracing, it is useful to allow + * the user to disable it. Setting bit 0 in the event's attr.config + * allows BRANCH_EN to pass through instead of being always on. See + * also the comment in pt_event_valid(). + */ + if (event->attr.config & BIT(0)) { + reg |= event->attr.config & RTIT_CTL_BRANCH_EN; + } else { + reg |= RTIT_CTL_BRANCH_EN; + } if (!event->attr.exclude_kernel) reg |= RTIT_CTL_OS; diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h index b528e8f373e4..0eb41d07b79a 100644 --- a/arch/x86/events/intel/pt.h +++ b/arch/x86/events/intel/pt.h @@ -110,6 +110,7 @@ struct pt_pmu { struct pmu pmu; u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES]; bool vmx; + bool branch_en_always_on; unsigned long max_nonturbo_ratio; unsigned int tsc_art_num; unsigned int tsc_art_den; -- cgit v1.2.3 From f9573e53f123ee487cca737139f3a43897a6383e Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Fri, 24 Feb 2017 02:48:13 -0600 Subject: x86/events/amd/iommu: Declare pr_fmt() format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Declare pr_fmt() format for perf/amd_iommu and remove unnecessary pr_debug() calls. Also check return value when _init_events_attrs() fails and issue an error message. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Jörg Rödel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/1487926102-13073-2-git-send-email-Suravee.Suthikulpanit@amd.com Signed-off-by: Ingo Molnar --- arch/x86/events/amd/iommu.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index b28200dea715..8d8ed40613fa 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -11,6 +11,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) "perf/amd_iommu: " fmt + #include #include #include @@ -298,7 +300,6 @@ static void perf_iommu_start(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; - pr_debug("perf: amd_iommu:perf_iommu_start\n"); if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) return; @@ -323,7 +324,6 @@ static void perf_iommu_read(struct perf_event *event) u64 prev_raw_count = 0ULL; u64 delta = 0ULL; struct hw_perf_event *hwc = &event->hw; - pr_debug("perf: amd_iommu:perf_iommu_read\n"); amd_iommu_pc_get_set_reg_val(_GET_DEVID(event), _GET_BANK(event), _GET_CNTR(event), @@ -349,8 +349,6 @@ static void perf_iommu_stop(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; u64 config; - pr_debug("perf: amd_iommu:perf_iommu_stop\n"); - if (hwc->state & PERF_HES_UPTODATE) return; @@ -372,7 +370,6 @@ static int perf_iommu_add(struct perf_event *event, int flags) struct perf_amd_iommu *perf_iommu = container_of(event->pmu, struct perf_amd_iommu, pmu); - pr_debug("perf: amd_iommu:perf_iommu_add\n"); event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; /* request an iommu bank/counter */ @@ -393,7 +390,6 @@ static void perf_iommu_del(struct perf_event *event, int flags) struct perf_amd_iommu *perf_iommu = container_of(event->pmu, struct perf_amd_iommu, pmu); - pr_debug("perf: amd_iommu:perf_iommu_del\n"); perf_iommu_stop(event, PERF_EF_UPDATE); /* clear the assigned iommu bank/counter */ @@ -444,27 +440,27 @@ static __init int _init_perf_amd_iommu( raw_spin_lock_init(&perf_iommu->lock); - /* Init format attributes */ perf_iommu->format_group = &amd_iommu_format_group; /* Init cpumask attributes to only core 0 */ cpumask_set_cpu(0, &iommu_cpumask); perf_iommu->cpumask_group = &amd_iommu_cpumask_group; - /* Init events attributes */ - if (_init_events_attrs(perf_iommu) != 0) - pr_err("perf: amd_iommu: Only support raw events.\n"); + ret = _init_events_attrs(perf_iommu); + if (ret) { + pr_err("Error initializing AMD IOMMU perf events.\n"); + return ret; + } - /* Init null attributes */ perf_iommu->null_group = NULL; perf_iommu->pmu.attr_groups = perf_iommu->attr_groups; ret = perf_pmu_register(&perf_iommu->pmu, name, -1); if (ret) { - pr_err("perf: amd_iommu: Failed to initialized.\n"); + pr_err("Error initializing AMD IOMMU perf counters.\n"); amd_iommu_pc_exit(); } else { - pr_info("perf: amd_iommu: Detected. (%d banks, %d counters/bank)\n", + pr_info("Detected AMD IOMMU (%d banks, %d counters/bank).\n", amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID), amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID)); } -- cgit v1.2.3 From 6aad0c6269052a6114259deaf664ce350bf64fa2 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Fri, 24 Feb 2017 02:48:14 -0600 Subject: x86/events/amd/iommu: Clean up bitwise operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clean up register initialization and make use of BIT_ULL(x) where appropriate. This should not affect logic and functionality. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Jörg Rödel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/1487926102-13073-3-git-send-email-Suravee.Suthikulpanit@amd.com Signed-off-by: Ingo Molnar --- arch/x86/events/amd/iommu.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index 8d8ed40613fa..e112f498a019 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -164,11 +164,11 @@ static int get_next_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu) for (bank = 0, shift = 0; bank < max_banks; bank++) { for (cntr = 0; cntr < max_cntrs; cntr++) { shift = bank + (bank*3) + cntr; - if (perf_iommu->cntr_assign_mask & (1ULL<cntr_assign_mask & BIT_ULL(shift)) { continue; } else { - perf_iommu->cntr_assign_mask |= (1ULL<cntr_assign_mask |= BIT_ULL(shift); + retval = ((bank & 0xFF) << 8) | (cntr & 0xFF); goto out; } } @@ -265,23 +265,23 @@ static void perf_iommu_enable_event(struct perf_event *ev) _GET_BANK(ev), _GET_CNTR(ev) , IOMMU_PC_COUNTER_SRC_REG, ®, true); - reg = 0ULL | devid | (_GET_DEVID_MASK(ev) << 32); + reg = devid | (_GET_DEVID_MASK(ev) << 32); if (reg) - reg |= (1UL << 31); + reg |= BIT(31); amd_iommu_pc_get_set_reg_val(devid, _GET_BANK(ev), _GET_CNTR(ev) , IOMMU_PC_DEVID_MATCH_REG, ®, true); - reg = 0ULL | _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32); + reg = _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32); if (reg) - reg |= (1UL << 31); + reg |= BIT(31); amd_iommu_pc_get_set_reg_val(devid, _GET_BANK(ev), _GET_CNTR(ev) , IOMMU_PC_PASID_MATCH_REG, ®, true); - reg = 0ULL | _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32); + reg = _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32); if (reg) - reg |= (1UL << 31); + reg |= BIT(31); amd_iommu_pc_get_set_reg_val(devid, _GET_BANK(ev), _GET_CNTR(ev) , IOMMU_PC_DOMID_MATCH_REG, ®, true); -- cgit v1.2.3 From dc6ca5e47d44c11a111807208595ff6a8fcd2a83 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Fri, 24 Feb 2017 02:48:15 -0600 Subject: x86/events/amd/iommu: Clean up perf_iommu_read() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix coding style and use GENMASK_ULL(). Signed-off-by: Suravee Suthikulpanit Signed-off-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Jörg Rödel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/1487926102-13073-4-git-send-email-Suravee.Suthikulpanit@amd.com Signed-off-by: Ingo Molnar --- arch/x86/events/amd/iommu.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index e112f498a019..d4375dadd4e9 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -320,9 +320,7 @@ static void perf_iommu_start(struct perf_event *event, int flags) static void perf_iommu_read(struct perf_event *event) { - u64 count = 0ULL; - u64 prev_raw_count = 0ULL; - u64 delta = 0ULL; + u64 count, prev, delta; struct hw_perf_event *hwc = &event->hw; amd_iommu_pc_get_set_reg_val(_GET_DEVID(event), @@ -330,18 +328,16 @@ static void perf_iommu_read(struct perf_event *event) IOMMU_PC_COUNTER_REG, &count, false); /* IOMMU pc counter register is only 48 bits */ - count &= 0xFFFFFFFFFFFFULL; + count &= GENMASK_ULL(47, 0); - prev_raw_count = local64_read(&hwc->prev_count); - if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, - count) != prev_raw_count) + prev = local64_read(&hwc->prev_count); + if (local64_cmpxchg(&hwc->prev_count, prev, count) != prev) return; - /* Handling 48-bit counter overflowing */ - delta = (count << COUNTER_SHIFT) - (prev_raw_count << COUNTER_SHIFT); + /* Handle 48-bit counter overflow */ + delta = (count << COUNTER_SHIFT) - (prev << COUNTER_SHIFT); delta >>= COUNTER_SHIFT; local64_add(delta, &event->count); - } static void perf_iommu_stop(struct perf_event *event, int flags) -- cgit v1.2.3 From 0a6d80c70b9150d6a9cf466d41955e374c2c9fab Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Fri, 24 Feb 2017 02:48:16 -0600 Subject: drivers/iommu/amd: Clean up iommu_pc_get_set_reg() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clean up coding style and fix a bug in the 64-bit register read logic since it overwrites the upper 32-bit when reading the lower 32-bit. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Jörg Rödel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/1487926102-13073-5-git-send-email-Suravee.Suthikulpanit@amd.com Signed-off-by: Ingo Molnar --- drivers/iommu/amd_iommu_init.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 6130278c5d71..ce65a47e2d0b 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -2763,22 +2763,25 @@ static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu, if (WARN_ON((fxn > 0x28) || (fxn & 7))) return -ENODEV; - offset = (u32)(((0x40|bank) << 12) | (cntr << 8) | fxn); + offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn); /* Limit the offset to the hw defined mmio region aperture */ - max_offset_lim = (u32)(((0x40|iommu->max_banks) << 12) | + max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) | (iommu->max_counters << 8) | 0x28); if ((offset < MMIO_CNTR_REG_OFFSET) || (offset > max_offset_lim)) return -EINVAL; if (is_write) { - writel((u32)*value, iommu->mmio_base + offset); - writel((*value >> 32), iommu->mmio_base + offset + 4); + u64 val = *value & GENMASK_ULL(47, 0); + + writel((u32)val, iommu->mmio_base + offset); + writel((val >> 32), iommu->mmio_base + offset + 4); } else { *value = readl(iommu->mmio_base + offset + 4); *value <<= 32; - *value = readl(iommu->mmio_base + offset); + *value |= readl(iommu->mmio_base + offset); + *value &= GENMASK_ULL(47, 0); } return 0; -- cgit v1.2.3 From 6b9376e30f42b902260371245f009bc05eb3fdfb Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Fri, 24 Feb 2017 02:48:17 -0600 Subject: x86/events, drivers/iommu/amd: Introduce amd_iommu_get_num_iommus() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce amd_iommu_get_num_iommus(), which returns the value of amd_iommus_present. The function is used to replace direct access to the variable, which is now declared as static. This function will also be used by AMD IOMMU perf driver. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Jörg Rödel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/1487926102-13073-6-git-send-email-Suravee.Suthikulpanit@amd.com Signed-off-by: Ingo Molnar --- arch/x86/events/amd/iommu.h | 2 ++ drivers/iommu/amd_iommu.c | 6 +++--- drivers/iommu/amd_iommu_init.c | 11 +++++++++-- drivers/iommu/amd_iommu_proto.h | 1 + drivers/iommu/amd_iommu_types.h | 3 --- 5 files changed, 15 insertions(+), 8 deletions(-) diff --git a/arch/x86/events/amd/iommu.h b/arch/x86/events/amd/iommu.h index 845d173278e3..5c5c9329e571 100644 --- a/arch/x86/events/amd/iommu.h +++ b/arch/x86/events/amd/iommu.h @@ -28,6 +28,8 @@ #define IOMMU_BASE_DEVID 0x0000 /* amd_iommu_init.c external support functions */ +extern int amd_iommu_get_num_iommus(void); + extern bool amd_iommu_pc_supported(void); extern u8 amd_iommu_pc_get_max_banks(u16 devid); diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index b17536d6e69b..63cacf5d6cf2 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1234,7 +1234,7 @@ static void __domain_flush_pages(struct protection_domain *domain, build_inv_iommu_pages(&cmd, address, size, domain->id, pde); - for (i = 0; i < amd_iommus_present; ++i) { + for (i = 0; i < amd_iommu_get_num_iommus(); ++i) { if (!domain->dev_iommu[i]) continue; @@ -1278,7 +1278,7 @@ static void domain_flush_complete(struct protection_domain *domain) { int i; - for (i = 0; i < amd_iommus_present; ++i) { + for (i = 0; i < amd_iommu_get_num_iommus(); ++i) { if (domain && !domain->dev_iommu[i]) continue; @@ -3363,7 +3363,7 @@ static int __flush_pasid(struct protection_domain *domain, int pasid, * IOMMU TLB needs to be flushed before Device TLB to * prevent device TLB refill from IOMMU TLB */ - for (i = 0; i < amd_iommus_present; ++i) { + for (i = 0; i < amd_iommu_get_num_iommus(); ++i) { if (domain->dev_iommu[i] == 0) continue; diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index ce65a47e2d0b..d3ec9c32c446 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -167,7 +167,9 @@ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the /* Array to assign indices to IOMMUs*/ struct amd_iommu *amd_iommus[MAX_IOMMUS]; -int amd_iommus_present; + +/* Number of IOMMUs present in the system */ +static int amd_iommus_present; /* IOMMUs have a non-present cache? */ bool amd_iommu_np_cache __read_mostly; @@ -272,6 +274,11 @@ static inline unsigned long tbl_size(int entry_size) return 1UL << shift; } +int amd_iommu_get_num_iommus(void) +{ + return amd_iommus_present; +} + /* Access to l1 and l2 indexed register spaces */ static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address) @@ -1336,7 +1343,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) /* Add IOMMU to internal data structures */ list_add_tail(&iommu->list, &amd_iommu_list); - iommu->index = amd_iommus_present++; + iommu->index = amd_iommus_present++; if (unlikely(iommu->index >= MAX_IOMMUS)) { WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n"); diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h index 7eb60c15c582..e8f0710aceb3 100644 --- a/drivers/iommu/amd_iommu_proto.h +++ b/drivers/iommu/amd_iommu_proto.h @@ -21,6 +21,7 @@ #include "amd_iommu_types.h" +extern int amd_iommu_get_num_iommus(void); extern int amd_iommu_init_dma_ops(void); extern int amd_iommu_init_passthrough(void); extern irqreturn_t amd_iommu_int_thread(int irq, void *data); diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 003f3ceb2661..4de8f4160bb8 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -611,9 +611,6 @@ extern struct list_head amd_iommu_list; */ extern struct amd_iommu *amd_iommus[MAX_IOMMUS]; -/* Number of IOMMUs present in the system */ -extern int amd_iommus_present; - /* * Declarations for the global list of all protection domains */ -- cgit v1.2.3 From f5863a00e73c432b91e4efe1d68778b4ace6a892 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Fri, 24 Feb 2017 02:48:18 -0600 Subject: x86/events/amd/iommu.c: Modify functions to query max banks and counters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, amd_iommu_pc_get_max_[banks|counters]() use end-point device ID to locate an IOMMU and check the reported max banks/counters. The logic assumes that the IOMMU_BASE_DEVID belongs to the first IOMMU, and uses it to acquire a reference to the first IOMMU, which does not work on certain systems. Instead, modify the function to take an IOMMU index, and use it to query the corresponding AMD IOMMU instance. Currently, hardcode the IOMMU index to 0 since the current AMD IOMMU perf implementation supports only a single IOMMU. A subsequent patch will add support for multiple IOMMUs, and will use a proper IOMMU index. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Jörg Rödel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/1487926102-13073-7-git-send-email-Suravee.Suthikulpanit@amd.com Signed-off-by: Ingo Molnar --- arch/x86/events/amd/iommu.c | 17 +++++++---------- arch/x86/events/amd/iommu.h | 9 ++++----- drivers/iommu/amd_iommu_init.c | 34 ++++++++++++++++++++-------------- drivers/iommu/amd_iommu_proto.h | 2 -- 4 files changed, 31 insertions(+), 31 deletions(-) diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index d4375dadd4e9..10f67d39cac5 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -239,14 +239,6 @@ static int perf_iommu_event_init(struct perf_event *event) return -EINVAL; } - /* integrate with iommu base devid (0000), assume one iommu */ - perf_iommu->max_banks = - amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID); - perf_iommu->max_counters = - amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID); - if ((perf_iommu->max_banks == 0) || (perf_iommu->max_counters == 0)) - return -EINVAL; - /* update the hw_perf_event struct with the iommu config data */ hwc->config = config; hwc->extra_reg.config = config1; @@ -448,6 +440,11 @@ static __init int _init_perf_amd_iommu( return ret; } + perf_iommu->max_banks = amd_iommu_pc_get_max_banks(0); + perf_iommu->max_counters = amd_iommu_pc_get_max_counters(0); + if (!perf_iommu->max_banks || !perf_iommu->max_counters) + return -EINVAL; + perf_iommu->null_group = NULL; perf_iommu->pmu.attr_groups = perf_iommu->attr_groups; @@ -457,8 +454,8 @@ static __init int _init_perf_amd_iommu( amd_iommu_pc_exit(); } else { pr_info("Detected AMD IOMMU (%d banks, %d counters/bank).\n", - amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID), - amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID)); + amd_iommu_pc_get_max_banks(0), + amd_iommu_pc_get_max_counters(0)); } return ret; diff --git a/arch/x86/events/amd/iommu.h b/arch/x86/events/amd/iommu.h index 5c5c9329e571..b775107c221e 100644 --- a/arch/x86/events/amd/iommu.h +++ b/arch/x86/events/amd/iommu.h @@ -24,19 +24,18 @@ #define PC_MAX_SPEC_BNKS 64 #define PC_MAX_SPEC_CNTRS 16 -/* iommu pc reg masks*/ -#define IOMMU_BASE_DEVID 0x0000 - /* amd_iommu_init.c external support functions */ extern int amd_iommu_get_num_iommus(void); extern bool amd_iommu_pc_supported(void); -extern u8 amd_iommu_pc_get_max_banks(u16 devid); +extern u8 amd_iommu_pc_get_max_banks(unsigned int idx); -extern u8 amd_iommu_pc_get_max_counters(u16 devid); +extern u8 amd_iommu_pc_get_max_counters(unsigned int idx); extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn, u64 *value, bool is_write); +extern struct amd_iommu *get_amd_iommu(int idx); + #endif /*_PERF_EVENT_AMD_IOMMU_H_*/ diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index d3ec9c32c446..8fdf109e6109 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -2718,6 +2718,18 @@ bool amd_iommu_v2_supported(void) } EXPORT_SYMBOL(amd_iommu_v2_supported); +struct amd_iommu *get_amd_iommu(unsigned int idx) +{ + unsigned int i = 0; + struct amd_iommu *iommu; + + for_each_iommu(iommu) + if (i++ == idx) + return iommu; + return NULL; +} +EXPORT_SYMBOL(get_amd_iommu); + /**************************************************************************** * * IOMMU EFR Performance Counter support functionality. This code allows @@ -2725,17 +2737,14 @@ EXPORT_SYMBOL(amd_iommu_v2_supported); * ****************************************************************************/ -u8 amd_iommu_pc_get_max_banks(u16 devid) +u8 amd_iommu_pc_get_max_banks(unsigned int idx) { - struct amd_iommu *iommu; - u8 ret = 0; + struct amd_iommu *iommu = get_amd_iommu(idx); - /* locate the iommu governing the devid */ - iommu = amd_iommu_rlookup_table[devid]; if (iommu) - ret = iommu->max_banks; + return iommu->max_banks; - return ret; + return 0; } EXPORT_SYMBOL(amd_iommu_pc_get_max_banks); @@ -2745,17 +2754,14 @@ bool amd_iommu_pc_supported(void) } EXPORT_SYMBOL(amd_iommu_pc_supported); -u8 amd_iommu_pc_get_max_counters(u16 devid) +u8 amd_iommu_pc_get_max_counters(unsigned int idx) { - struct amd_iommu *iommu; - u8 ret = 0; + struct amd_iommu *iommu = get_amd_iommu(idx); - /* locate the iommu governing the devid */ - iommu = amd_iommu_rlookup_table[devid]; if (iommu) - ret = iommu->max_counters; + return iommu->max_counters; - return ret; + return 0; } EXPORT_SYMBOL(amd_iommu_pc_get_max_counters); diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h index e8f0710aceb3..cd2257e20c3f 100644 --- a/drivers/iommu/amd_iommu_proto.h +++ b/drivers/iommu/amd_iommu_proto.h @@ -59,8 +59,6 @@ extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev); /* IOMMU Performance Counter functions */ extern bool amd_iommu_pc_supported(void); -extern u8 amd_iommu_pc_get_max_banks(u16 devid); -extern u8 amd_iommu_pc_get_max_counters(u16 devid); extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn, u64 *value, bool is_write); -- cgit v1.2.3 From 1650dfd1a9bcde8fcfaab776887bb6f4e91830c3 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Fri, 24 Feb 2017 02:48:19 -0600 Subject: x86/events, drivers/amd/iommu: Prepare for multiple IOMMUs support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, amd_iommu_pc_get_set_reg_val() cannot support multiple IOMMUs. Modify it to allow callers to specify an IOMMU. This is in preparation for supporting multiple IOMMUs. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Jörg Rödel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/1487926102-13073-8-git-send-email-Suravee.Suthikulpanit@amd.com Signed-off-by: Ingo Molnar --- arch/x86/events/amd/iommu.c | 38 ++++++++++++++++-------------------- arch/x86/events/amd/iommu.h | 9 +++++++-- drivers/iommu/amd_iommu_init.c | 43 +++++++++++++++++++++++------------------ drivers/iommu/amd_iommu_proto.h | 5 ----- 4 files changed, 48 insertions(+), 47 deletions(-) diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index 10f67d39cac5..88fbc8001460 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -248,49 +248,45 @@ static int perf_iommu_event_init(struct perf_event *event) static void perf_iommu_enable_event(struct perf_event *ev) { + struct amd_iommu *iommu = get_amd_iommu(0); u8 csource = _GET_CSOURCE(ev); u16 devid = _GET_DEVID(ev); + u8 bank = _GET_BANK(ev); + u8 cntr = _GET_CNTR(ev); u64 reg = 0ULL; reg = csource; - amd_iommu_pc_get_set_reg_val(devid, - _GET_BANK(ev), _GET_CNTR(ev) , - IOMMU_PC_COUNTER_SRC_REG, ®, true); + amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_COUNTER_SRC_REG, ®); reg = devid | (_GET_DEVID_MASK(ev) << 32); if (reg) reg |= BIT(31); - amd_iommu_pc_get_set_reg_val(devid, - _GET_BANK(ev), _GET_CNTR(ev) , - IOMMU_PC_DEVID_MATCH_REG, ®, true); + amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_DEVID_MATCH_REG, ®); reg = _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32); if (reg) reg |= BIT(31); - amd_iommu_pc_get_set_reg_val(devid, - _GET_BANK(ev), _GET_CNTR(ev) , - IOMMU_PC_PASID_MATCH_REG, ®, true); + amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_PASID_MATCH_REG, ®); reg = _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32); if (reg) reg |= BIT(31); - amd_iommu_pc_get_set_reg_val(devid, - _GET_BANK(ev), _GET_CNTR(ev) , - IOMMU_PC_DOMID_MATCH_REG, ®, true); + amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_DOMID_MATCH_REG, ®); } static void perf_iommu_disable_event(struct perf_event *event) { + struct amd_iommu *iommu = get_amd_iommu(0); u64 reg = 0ULL; - amd_iommu_pc_get_set_reg_val(_GET_DEVID(event), - _GET_BANK(event), _GET_CNTR(event), - IOMMU_PC_COUNTER_SRC_REG, ®, true); + amd_iommu_pc_set_reg(iommu, _GET_BANK(event), _GET_CNTR(event), + IOMMU_PC_COUNTER_SRC_REG, ®); } static void perf_iommu_start(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; + struct amd_iommu *iommu = get_amd_iommu(0); if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) return; @@ -300,9 +296,8 @@ static void perf_iommu_start(struct perf_event *event, int flags) if (flags & PERF_EF_RELOAD) { u64 prev_raw_count = local64_read(&hwc->prev_count); - amd_iommu_pc_get_set_reg_val(_GET_DEVID(event), - _GET_BANK(event), _GET_CNTR(event), - IOMMU_PC_COUNTER_REG, &prev_raw_count, true); + amd_iommu_pc_set_reg(iommu, _GET_BANK(event), _GET_CNTR(event), + IOMMU_PC_COUNTER_REG, &prev_raw_count); } perf_iommu_enable_event(event); @@ -314,10 +309,11 @@ static void perf_iommu_read(struct perf_event *event) { u64 count, prev, delta; struct hw_perf_event *hwc = &event->hw; + struct amd_iommu *iommu = get_amd_iommu(0); - amd_iommu_pc_get_set_reg_val(_GET_DEVID(event), - _GET_BANK(event), _GET_CNTR(event), - IOMMU_PC_COUNTER_REG, &count, false); + if (amd_iommu_pc_get_reg(iommu, _GET_BANK(event), _GET_CNTR(event), + IOMMU_PC_COUNTER_REG, &count)) + return; /* IOMMU pc counter register is only 48 bits */ count &= GENMASK_ULL(47, 0); diff --git a/arch/x86/events/amd/iommu.h b/arch/x86/events/amd/iommu.h index b775107c221e..62e0702c4374 100644 --- a/arch/x86/events/amd/iommu.h +++ b/arch/x86/events/amd/iommu.h @@ -24,6 +24,8 @@ #define PC_MAX_SPEC_BNKS 64 #define PC_MAX_SPEC_CNTRS 16 +struct amd_iommu; + /* amd_iommu_init.c external support functions */ extern int amd_iommu_get_num_iommus(void); @@ -33,8 +35,11 @@ extern u8 amd_iommu_pc_get_max_banks(unsigned int idx); extern u8 amd_iommu_pc_get_max_counters(unsigned int idx); -extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, - u8 fxn, u64 *value, bool is_write); +extern int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, + u8 fxn, u64 *value); + +extern int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, + u8 fxn, u64 *value); extern struct amd_iommu *get_amd_iommu(int idx); diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 8fdf109e6109..5a11328f4d98 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -256,10 +256,6 @@ static int amd_iommu_enable_interrupts(void); static int __init iommu_go_to_state(enum iommu_init_state state); static void init_device_table_dma(void); -static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu, - u8 bank, u8 cntr, u8 fxn, - u64 *value, bool is_write); - static inline void update_last_devid(u16 devid) { if (devid > amd_iommu_last_bdf) @@ -1484,6 +1480,8 @@ static int __init init_iommu_all(struct acpi_table_header *table) return 0; } +static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, + u8 fxn, u64 *value, bool is_write); static void init_iommu_perf_ctr(struct amd_iommu *iommu) { @@ -1495,8 +1493,8 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu) amd_iommu_pc_present = true; /* Check if the performance counters can be written to */ - if ((0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val, true)) || - (0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val2, false)) || + if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) || + (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) || (val != val2)) { pr_err("AMD-Vi: Unable to write to IOMMU perf counter.\n"); amd_iommu_pc_present = false; @@ -2765,15 +2763,18 @@ u8 amd_iommu_pc_get_max_counters(unsigned int idx) } EXPORT_SYMBOL(amd_iommu_pc_get_max_counters); -static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu, - u8 bank, u8 cntr, u8 fxn, - u64 *value, bool is_write) +static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, + u8 fxn, u64 *value, bool is_write) { u32 offset; u32 max_offset_lim; + /* Make sure the IOMMU PC resource is available */ + if (!amd_iommu_pc_present) + return -ENODEV; + /* Check for valid iommu and pc register indexing */ - if (WARN_ON((fxn > 0x28) || (fxn & 7))) + if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7))) return -ENODEV; offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn); @@ -2799,17 +2800,21 @@ static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu, return 0; } -EXPORT_SYMBOL(amd_iommu_pc_get_set_reg_val); -int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn, - u64 *value, bool is_write) +int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value) { - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; + if (!iommu) + return -EINVAL; - /* Make sure the IOMMU PC resource is available */ - if (!amd_iommu_pc_present || iommu == NULL) - return -ENODEV; + return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false); +} +EXPORT_SYMBOL(amd_iommu_pc_get_reg); + +int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value) +{ + if (!iommu) + return -EINVAL; - return iommu_pc_get_set_reg_val(iommu, bank, cntr, fxn, - value, is_write); + return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true); } +EXPORT_SYMBOL(amd_iommu_pc_set_reg); diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h index cd2257e20c3f..466260f8a1df 100644 --- a/drivers/iommu/amd_iommu_proto.h +++ b/drivers/iommu/amd_iommu_proto.h @@ -57,11 +57,6 @@ extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid, extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid); extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev); -/* IOMMU Performance Counter functions */ -extern bool amd_iommu_pc_supported(void); -extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn, - u64 *value, bool is_write); - #ifdef CONFIG_IRQ_REMAP extern int amd_iommu_create_irq_domain(struct amd_iommu *iommu); #else -- cgit v1.2.3 From 51686546304fd7f778bb31bf7e2ae9bad6b1d21c Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Fri, 24 Feb 2017 02:48:20 -0600 Subject: x86/events/amd/iommu: Fix sysfs perf attribute groups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce static amd_iommu_attr_groups to simplify the sysfs attributes initialization code. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Jörg Rödel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/1487926102-13073-9-git-send-email-Suravee.Suthikulpanit@amd.com Signed-off-by: Ingo Molnar --- arch/x86/events/amd/iommu.c | 81 ++++++++++++++++++--------------------------- 1 file changed, 32 insertions(+), 49 deletions(-) diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index 88fbc8001460..7ac8138023cc 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -43,14 +43,8 @@ struct perf_amd_iommu { u8 max_counters; u64 cntr_assign_mask; raw_spinlock_t lock; - const struct attribute_group *attr_groups[4]; }; -#define format_group attr_groups[0] -#define cpumask_group attr_groups[1] -#define events_group attr_groups[2] -#define null_group attr_groups[3] - /*--------------------------------------------- * sysfs format attributes *---------------------------------------------*/ @@ -81,6 +75,10 @@ static struct attribute_group amd_iommu_format_group = { /*--------------------------------------------- * sysfs events attributes *---------------------------------------------*/ +static struct attribute_group amd_iommu_events_group = { + .name = "events", +}; + struct amd_iommu_event_desc { struct kobj_attribute attr; const char *event; @@ -384,76 +382,60 @@ static void perf_iommu_del(struct perf_event *event, int flags) perf_event_update_userpage(event); } -static __init int _init_events_attrs(struct perf_amd_iommu *perf_iommu) +static __init int _init_events_attrs(void) { - struct attribute **attrs; - struct attribute_group *attr_group; int i = 0, j; + struct attribute **attrs; while (amd_iommu_v2_event_descs[i].attr.attr.name) i++; - attr_group = kzalloc(sizeof(struct attribute *) - * (i + 1) + sizeof(*attr_group), GFP_KERNEL); - if (!attr_group) + attrs = kzalloc(sizeof(struct attribute **) * (i + 1), GFP_KERNEL); + if (!attrs) return -ENOMEM; - attrs = (struct attribute **)(attr_group + 1); for (j = 0; j < i; j++) attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr; - attr_group->name = "events"; - attr_group->attrs = attrs; - perf_iommu->events_group = attr_group; - + amd_iommu_events_group.attrs = attrs; return 0; } static __init void amd_iommu_pc_exit(void) { - if (__perf_iommu.events_group != NULL) { - kfree(__perf_iommu.events_group); - __perf_iommu.events_group = NULL; - } + kfree(amd_iommu_events_group.attrs); } -static __init int _init_perf_amd_iommu( - struct perf_amd_iommu *perf_iommu, char *name) +const struct attribute_group *amd_iommu_attr_groups[] = { + &amd_iommu_format_group, + &amd_iommu_cpumask_group, + &amd_iommu_events_group, + NULL, +}; + +static __init int +_init_perf_amd_iommu(struct perf_amd_iommu *perf_iommu, char *name) { int ret; raw_spin_lock_init(&perf_iommu->lock); - perf_iommu->format_group = &amd_iommu_format_group; - /* Init cpumask attributes to only core 0 */ cpumask_set_cpu(0, &iommu_cpumask); - perf_iommu->cpumask_group = &amd_iommu_cpumask_group; - - ret = _init_events_attrs(perf_iommu); - if (ret) { - pr_err("Error initializing AMD IOMMU perf events.\n"); - return ret; - } perf_iommu->max_banks = amd_iommu_pc_get_max_banks(0); perf_iommu->max_counters = amd_iommu_pc_get_max_counters(0); if (!perf_iommu->max_banks || !perf_iommu->max_counters) return -EINVAL; - perf_iommu->null_group = NULL; - perf_iommu->pmu.attr_groups = perf_iommu->attr_groups; - + perf_iommu->pmu.attr_groups = amd_iommu_attr_groups; ret = perf_pmu_register(&perf_iommu->pmu, name, -1); - if (ret) { + if (ret) pr_err("Error initializing AMD IOMMU perf counters.\n"); - amd_iommu_pc_exit(); - } else { + else pr_info("Detected AMD IOMMU (%d banks, %d counters/bank).\n", amd_iommu_pc_get_max_banks(0), amd_iommu_pc_get_max_counters(0)); - } - return ret; } @@ -467,24 +449,25 @@ static struct perf_amd_iommu __perf_iommu = { .stop = perf_iommu_stop, .read = perf_iommu_read, }, - .max_banks = 0x00, - .max_counters = 0x00, - .cntr_assign_mask = 0ULL, - .format_group = NULL, - .cpumask_group = NULL, - .events_group = NULL, - .null_group = NULL, }; static __init int amd_iommu_pc_init(void) { + int ret; + /* Make sure the IOMMU PC resource is available */ if (!amd_iommu_pc_supported()) return -ENODEV; - _init_perf_amd_iommu(&__perf_iommu, "amd_iommu"); + ret = _init_events_attrs(); + if (ret) + return ret; - return 0; + ret = _init_perf_amd_iommu(&__perf_iommu, "amd_iommu"); + if (ret) + amd_iommu_pc_exit(); + + return ret; } device_initcall(amd_iommu_pc_init); -- cgit v1.2.3 From cf25f904ef75aa7c25097eb4981bbc634bf5ff9e Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Fri, 24 Feb 2017 02:48:21 -0600 Subject: x86/events/amd/iommu: Add IOMMU-specific hw_perf_event struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Current AMD IOMMU perf PMU inappropriately uses the hardware struct inside the union in struct hw_perf_event, extra_reg in particular. Instead, introduce an AMD IOMMU-specific struct with required parameters to be programmed into the IOMMU performance counter control register. Update the pasid field from 16 to 20 bits while at it. Signed-off-by: Suravee Suthikulpanit [ Fixup macros, shorten get_next_avail_iommu_bnk_cntr() local vars, massage commit message. ] Signed-off-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Jörg Rödel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/1487926102-13073-10-git-send-email-Suravee.Suthikulpanit@amd.com Signed-off-by: Ingo Molnar --- arch/x86/events/amd/iommu.c | 113 ++++++++++++++++++++------------------------ include/linux/perf_event.h | 7 +++ 2 files changed, 57 insertions(+), 63 deletions(-) diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index 7ac8138023cc..f0d94c8b382a 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -23,17 +23,16 @@ #define COUNTER_SHIFT 16 -#define _GET_BANK(ev) ((u8)(ev->hw.extra_reg.reg >> 8)) -#define _GET_CNTR(ev) ((u8)(ev->hw.extra_reg.reg)) - -/* iommu pmu config masks */ -#define _GET_CSOURCE(ev) ((ev->hw.config & 0xFFULL)) -#define _GET_DEVID(ev) ((ev->hw.config >> 8) & 0xFFFFULL) -#define _GET_PASID(ev) ((ev->hw.config >> 24) & 0xFFFFULL) -#define _GET_DOMID(ev) ((ev->hw.config >> 40) & 0xFFFFULL) -#define _GET_DEVID_MASK(ev) ((ev->hw.extra_reg.config) & 0xFFFFULL) -#define _GET_PASID_MASK(ev) ((ev->hw.extra_reg.config >> 16) & 0xFFFFULL) -#define _GET_DOMID_MASK(ev) ((ev->hw.extra_reg.config >> 32) & 0xFFFFULL) +/* iommu pmu conf masks */ +#define GET_CSOURCE(x) ((x)->conf & 0xFFULL) +#define GET_DEVID(x) (((x)->conf >> 8) & 0xFFFFULL) +#define GET_DOMID(x) (((x)->conf >> 24) & 0xFFFFULL) +#define GET_PASID(x) (((x)->conf >> 40) & 0xFFFFFULL) + +/* iommu pmu conf1 masks */ +#define GET_DEVID_MASK(x) ((x)->conf1 & 0xFFFFULL) +#define GET_DOMID_MASK(x) (((x)->conf1 >> 16) & 0xFFFFULL) +#define GET_PASID_MASK(x) (((x)->conf1 >> 32) & 0xFFFFFULL) static struct perf_amd_iommu __perf_iommu; @@ -50,11 +49,11 @@ struct perf_amd_iommu { *---------------------------------------------*/ PMU_FORMAT_ATTR(csource, "config:0-7"); PMU_FORMAT_ATTR(devid, "config:8-23"); -PMU_FORMAT_ATTR(pasid, "config:24-39"); -PMU_FORMAT_ATTR(domid, "config:40-55"); +PMU_FORMAT_ATTR(domid, "config:24-39"); +PMU_FORMAT_ATTR(pasid, "config:40-59"); PMU_FORMAT_ATTR(devid_mask, "config1:0-15"); -PMU_FORMAT_ATTR(pasid_mask, "config1:16-31"); -PMU_FORMAT_ATTR(domid_mask, "config1:32-47"); +PMU_FORMAT_ATTR(domid_mask, "config1:16-31"); +PMU_FORMAT_ATTR(pasid_mask, "config1:32-51"); static struct attribute *iommu_format_attrs[] = { &format_attr_csource.attr, @@ -150,30 +149,34 @@ static struct attribute_group amd_iommu_cpumask_group = { /*---------------------------------------------*/ -static int get_next_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu) +static int get_next_avail_iommu_bnk_cntr(struct perf_event *event) { + struct perf_amd_iommu *piommu = container_of(event->pmu, struct perf_amd_iommu, pmu); + int max_cntrs = piommu->max_counters; + int max_banks = piommu->max_banks; + u32 shift, bank, cntr; unsigned long flags; - int shift, bank, cntr, retval; - int max_banks = perf_iommu->max_banks; - int max_cntrs = perf_iommu->max_counters; + int retval; - raw_spin_lock_irqsave(&perf_iommu->lock, flags); + raw_spin_lock_irqsave(&piommu->lock, flags); for (bank = 0, shift = 0; bank < max_banks; bank++) { for (cntr = 0; cntr < max_cntrs; cntr++) { shift = bank + (bank*3) + cntr; - if (perf_iommu->cntr_assign_mask & BIT_ULL(shift)) { + if (piommu->cntr_assign_mask & BIT_ULL(shift)) { continue; } else { - perf_iommu->cntr_assign_mask |= BIT_ULL(shift); - retval = ((bank & 0xFF) << 8) | (cntr & 0xFF); + piommu->cntr_assign_mask |= BIT_ULL(shift); + event->hw.iommu_bank = bank; + event->hw.iommu_cntr = cntr; + retval = 0; goto out; } } } retval = -ENOSPC; out: - raw_spin_unlock_irqrestore(&perf_iommu->lock, flags); + raw_spin_unlock_irqrestore(&piommu->lock, flags); return retval; } @@ -202,8 +205,6 @@ static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu, static int perf_iommu_event_init(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - struct perf_amd_iommu *perf_iommu; - u64 config, config1; /* test the event attr type check for PMU enumeration */ if (event->attr.type != event->pmu->type) @@ -225,21 +226,9 @@ static int perf_iommu_event_init(struct perf_event *event) if (event->cpu < 0) return -EINVAL; - perf_iommu = &__perf_iommu; - - if (event->pmu != &perf_iommu->pmu) - return -ENOENT; - - if (perf_iommu) { - config = event->attr.config; - config1 = event->attr.config1; - } else { - return -EINVAL; - } - /* update the hw_perf_event struct with the iommu config data */ - hwc->config = config; - hwc->extra_reg.config = config1; + hwc->conf = event->attr.config; + hwc->conf1 = event->attr.config1; return 0; } @@ -247,26 +236,28 @@ static int perf_iommu_event_init(struct perf_event *event) static void perf_iommu_enable_event(struct perf_event *ev) { struct amd_iommu *iommu = get_amd_iommu(0); - u8 csource = _GET_CSOURCE(ev); - u16 devid = _GET_DEVID(ev); - u8 bank = _GET_BANK(ev); - u8 cntr = _GET_CNTR(ev); + struct hw_perf_event *hwc = &ev->hw; + u8 bank = hwc->iommu_bank; + u8 cntr = hwc->iommu_cntr; u64 reg = 0ULL; - reg = csource; + reg = GET_CSOURCE(hwc); amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_COUNTER_SRC_REG, ®); - reg = devid | (_GET_DEVID_MASK(ev) << 32); + reg = GET_DEVID_MASK(hwc); + reg = GET_DEVID(hwc) | (reg << 32); if (reg) reg |= BIT(31); amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_DEVID_MATCH_REG, ®); - reg = _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32); + reg = GET_PASID_MASK(hwc); + reg = GET_PASID(hwc) | (reg << 32); if (reg) reg |= BIT(31); amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_PASID_MATCH_REG, ®); - reg = _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32); + reg = GET_DOMID_MASK(hwc); + reg = GET_DOMID(hwc) | (reg << 32); if (reg) reg |= BIT(31); amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_DOMID_MATCH_REG, ®); @@ -275,16 +266,16 @@ static void perf_iommu_enable_event(struct perf_event *ev) static void perf_iommu_disable_event(struct perf_event *event) { struct amd_iommu *iommu = get_amd_iommu(0); + struct hw_perf_event *hwc = &event->hw; u64 reg = 0ULL; - amd_iommu_pc_set_reg(iommu, _GET_BANK(event), _GET_CNTR(event), + amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr, IOMMU_PC_COUNTER_SRC_REG, ®); } static void perf_iommu_start(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; - struct amd_iommu *iommu = get_amd_iommu(0); if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) return; @@ -293,8 +284,10 @@ static void perf_iommu_start(struct perf_event *event, int flags) hwc->state = 0; if (flags & PERF_EF_RELOAD) { - u64 prev_raw_count = local64_read(&hwc->prev_count); - amd_iommu_pc_set_reg(iommu, _GET_BANK(event), _GET_CNTR(event), + u64 prev_raw_count = local64_read(&hwc->prev_count); + struct amd_iommu *iommu = get_amd_iommu(0); + + amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr, IOMMU_PC_COUNTER_REG, &prev_raw_count); } @@ -309,7 +302,7 @@ static void perf_iommu_read(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; struct amd_iommu *iommu = get_amd_iommu(0); - if (amd_iommu_pc_get_reg(iommu, _GET_BANK(event), _GET_CNTR(event), + if (amd_iommu_pc_get_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr, IOMMU_PC_COUNTER_REG, &count)) return; @@ -329,7 +322,6 @@ static void perf_iommu_read(struct perf_event *event) static void perf_iommu_stop(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; - u64 config; if (hwc->state & PERF_HES_UPTODATE) return; @@ -341,7 +333,6 @@ static void perf_iommu_stop(struct perf_event *event, int flags) if (hwc->state & PERF_HES_UPTODATE) return; - config = hwc->config; perf_iommu_read(event); hwc->state |= PERF_HES_UPTODATE; } @@ -349,16 +340,12 @@ static void perf_iommu_stop(struct perf_event *event, int flags) static int perf_iommu_add(struct perf_event *event, int flags) { int retval; - struct perf_amd_iommu *perf_iommu = - container_of(event->pmu, struct perf_amd_iommu, pmu); event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; /* request an iommu bank/counter */ - retval = get_next_avail_iommu_bnk_cntr(perf_iommu); - if (retval != -ENOSPC) - event->hw.extra_reg.reg = (u16)retval; - else + retval = get_next_avail_iommu_bnk_cntr(event); + if (retval) return retval; if (flags & PERF_EF_START) @@ -369,6 +356,7 @@ static int perf_iommu_add(struct perf_event *event, int flags) static void perf_iommu_del(struct perf_event *event, int flags) { + struct hw_perf_event *hwc = &event->hw; struct perf_amd_iommu *perf_iommu = container_of(event->pmu, struct perf_amd_iommu, pmu); @@ -376,8 +364,7 @@ static void perf_iommu_del(struct perf_event *event, int flags) /* clear the assigned iommu bank/counter */ clear_avail_iommu_bnk_cntr(perf_iommu, - _GET_BANK(event), - _GET_CNTR(event)); + hwc->iommu_bank, hwc->iommu_cntr); perf_event_update_userpage(event); } diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b6e75c9d4791..24a635887f28 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -165,6 +165,13 @@ struct hw_perf_event { struct list_head bp_list; }; #endif + struct { /* amd_iommu */ + u8 iommu_bank; + u8 iommu_cntr; + u16 padding; + u64 conf; + u64 conf1; + }; }; /* * If the event is a per task event, this will point to the task in -- cgit v1.2.3 From 25df39f2cfd06a4b49ad592c5b7cba0cbf24e27f Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 22 Mar 2017 02:02:42 -0500 Subject: x86/events/amd/iommu: Enable support for multiple IOMMUs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for multiple IOMMUs to perf by exposing an AMD IOMMU PMU for each IOMMU found in the system via: /bus/event_source/devices/amd_iommu_x where x is the IOMMU index. This allows users to specify different events to be programmed into the performance counters of each IOMMU. Signed-off-by: Suravee Suthikulpanit [ Improve readability, shorten names. ] Signed-off-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Jörg Rödel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/1490166162-10002-11-git-send-email-Suravee.Suthikulpanit@amd.com Signed-off-by: Ingo Molnar --- arch/x86/events/amd/iommu.c | 112 ++++++++++++++++++++++++++++---------------- 1 file changed, 71 insertions(+), 41 deletions(-) diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index f0d94c8b382a..3641e24fdac5 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -34,16 +34,21 @@ #define GET_DOMID_MASK(x) (((x)->conf1 >> 16) & 0xFFFFULL) #define GET_PASID_MASK(x) (((x)->conf1 >> 32) & 0xFFFFFULL) -static struct perf_amd_iommu __perf_iommu; +#define IOMMU_NAME_SIZE 16 struct perf_amd_iommu { + struct list_head list; struct pmu pmu; + struct amd_iommu *iommu; + char name[IOMMU_NAME_SIZE]; u8 max_banks; u8 max_counters; u64 cntr_assign_mask; raw_spinlock_t lock; }; +static LIST_HEAD(perf_amd_iommu_list); + /*--------------------------------------------- * sysfs format attributes *---------------------------------------------*/ @@ -233,9 +238,14 @@ static int perf_iommu_event_init(struct perf_event *event) return 0; } +static inline struct amd_iommu *perf_event_2_iommu(struct perf_event *ev) +{ + return (container_of(ev->pmu, struct perf_amd_iommu, pmu))->iommu; +} + static void perf_iommu_enable_event(struct perf_event *ev) { - struct amd_iommu *iommu = get_amd_iommu(0); + struct amd_iommu *iommu = perf_event_2_iommu(ev); struct hw_perf_event *hwc = &ev->hw; u8 bank = hwc->iommu_bank; u8 cntr = hwc->iommu_cntr; @@ -265,7 +275,7 @@ static void perf_iommu_enable_event(struct perf_event *ev) static void perf_iommu_disable_event(struct perf_event *event) { - struct amd_iommu *iommu = get_amd_iommu(0); + struct amd_iommu *iommu = perf_event_2_iommu(event); struct hw_perf_event *hwc = &event->hw; u64 reg = 0ULL; @@ -285,7 +295,7 @@ static void perf_iommu_start(struct perf_event *event, int flags) if (flags & PERF_EF_RELOAD) { u64 prev_raw_count = local64_read(&hwc->prev_count); - struct amd_iommu *iommu = get_amd_iommu(0); + struct amd_iommu *iommu = perf_event_2_iommu(event); amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr, IOMMU_PC_COUNTER_REG, &prev_raw_count); @@ -300,7 +310,7 @@ static void perf_iommu_read(struct perf_event *event) { u64 count, prev, delta; struct hw_perf_event *hwc = &event->hw; - struct amd_iommu *iommu = get_amd_iommu(0); + struct amd_iommu *iommu = perf_event_2_iommu(event); if (amd_iommu_pc_get_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr, IOMMU_PC_COUNTER_REG, &count)) @@ -388,11 +398,6 @@ static __init int _init_events_attrs(void) return 0; } -static __init void amd_iommu_pc_exit(void) -{ - kfree(amd_iommu_events_group.attrs); -} - const struct attribute_group *amd_iommu_attr_groups[] = { &amd_iommu_format_group, &amd_iommu_cpumask_group, @@ -400,46 +405,57 @@ const struct attribute_group *amd_iommu_attr_groups[] = { NULL, }; -static __init int -_init_perf_amd_iommu(struct perf_amd_iommu *perf_iommu, char *name) +static struct pmu iommu_pmu = { + .event_init = perf_iommu_event_init, + .add = perf_iommu_add, + .del = perf_iommu_del, + .start = perf_iommu_start, + .stop = perf_iommu_stop, + .read = perf_iommu_read, + .task_ctx_nr = perf_invalid_context, + .attr_groups = amd_iommu_attr_groups, +}; + +static __init int init_one_iommu(unsigned int idx) { + struct perf_amd_iommu *perf_iommu; int ret; + perf_iommu = kzalloc(sizeof(struct perf_amd_iommu), GFP_KERNEL); + if (!perf_iommu) + return -ENOMEM; + raw_spin_lock_init(&perf_iommu->lock); - /* Init cpumask attributes to only core 0 */ - cpumask_set_cpu(0, &iommu_cpumask); + perf_iommu->pmu = iommu_pmu; + perf_iommu->iommu = get_amd_iommu(idx); + perf_iommu->max_banks = amd_iommu_pc_get_max_banks(idx); + perf_iommu->max_counters = amd_iommu_pc_get_max_counters(idx); - perf_iommu->max_banks = amd_iommu_pc_get_max_banks(0); - perf_iommu->max_counters = amd_iommu_pc_get_max_counters(0); - if (!perf_iommu->max_banks || !perf_iommu->max_counters) + if (!perf_iommu->iommu || + !perf_iommu->max_banks || + !perf_iommu->max_counters) { + kfree(perf_iommu); return -EINVAL; + } - perf_iommu->pmu.attr_groups = amd_iommu_attr_groups; - ret = perf_pmu_register(&perf_iommu->pmu, name, -1); - if (ret) - pr_err("Error initializing AMD IOMMU perf counters.\n"); - else - pr_info("Detected AMD IOMMU (%d banks, %d counters/bank).\n", - amd_iommu_pc_get_max_banks(0), - amd_iommu_pc_get_max_counters(0)); + snprintf(perf_iommu->name, IOMMU_NAME_SIZE, "amd_iommu_%u", idx); + + ret = perf_pmu_register(&perf_iommu->pmu, perf_iommu->name, -1); + if (!ret) { + pr_info("Detected AMD IOMMU #%d (%d banks, %d counters/bank).\n", + idx, perf_iommu->max_banks, perf_iommu->max_counters); + list_add_tail(&perf_iommu->list, &perf_amd_iommu_list); + } else { + pr_warn("Error initializing IOMMU %d.\n", idx); + kfree(perf_iommu); + } return ret; } -static struct perf_amd_iommu __perf_iommu = { - .pmu = { - .task_ctx_nr = perf_invalid_context, - .event_init = perf_iommu_event_init, - .add = perf_iommu_add, - .del = perf_iommu_del, - .start = perf_iommu_start, - .stop = perf_iommu_stop, - .read = perf_iommu_read, - }, -}; - static __init int amd_iommu_pc_init(void) { + unsigned int i, cnt = 0; int ret; /* Make sure the IOMMU PC resource is available */ @@ -450,11 +466,25 @@ static __init int amd_iommu_pc_init(void) if (ret) return ret; - ret = _init_perf_amd_iommu(&__perf_iommu, "amd_iommu"); - if (ret) - amd_iommu_pc_exit(); + /* + * An IOMMU PMU is specific to an IOMMU, and can function independently. + * So we go through all IOMMUs and ignore the one that fails init + * unless all IOMMU are failing. + */ + for (i = 0; i < amd_iommu_get_num_iommus(); i++) { + ret = init_one_iommu(i); + if (!ret) + cnt++; + } - return ret; + if (!cnt) { + kfree(amd_iommu_events_group.attrs); + return -ENODEV; + } + + /* Init cpumask attributes to only core 0 */ + cpumask_set_cpu(0, &iommu_cpumask); + return 0; } device_initcall(amd_iommu_pc_init); -- cgit v1.2.3 From a596a877fde0b34e622dbf123f361dacd086cd6e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 30 Mar 2017 10:54:40 +0100 Subject: perf utils: Fix spelling mistake: "Invalud" -> "Invalid" Trivial fix to spelling mistake in pr_debug message. Signed-off-by: Colin King Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Krister Johansen Cc: Peter Zijlstra Cc: kernel-janitors@vger.kernel.org Link: http://lkml.kernel.org/r/20170330095440.19444-1-colin.king@canonical.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 3c4d4d00cb2c..61bf304206fd 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -2459,7 +2459,7 @@ int parse_filter_percentage(const struct option *opt __maybe_unused, else if (!strcmp(arg, "absolute")) symbol_conf.filter_relative = false; else { - pr_debug("Invalud percentage: %s\n", arg); + pr_debug("Invalid percentage: %s\n", arg); return -1; } -- cgit v1.2.3 From 9c4e2e2589c99ed01db6245847b4bd44bc053330 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 29 Mar 2017 17:07:53 -0700 Subject: perf vendor events intel: Add missing UNC_M_DCLOCKTICKS for Broadwell DE uncore An earlier update removed the UNC_M_CLOCKTICKS event for Broadwell DE. But Metric events were still referring to it. This adds it back under a different name from the event list, and also fixes up the Metric events to use the new name. Cc: jolsa@kernel.org Link: http://lkml.kernel.org/n/tip-zxxzg4g5nr93o7np00vgqqwm@git.kernel.org Signed-off-by: Andi Kleen --- .../perf/pmu-events/arch/x86/broadwellde/uncore-memory.json | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/uncore-memory.json b/tools/perf/pmu-events/arch/x86/broadwellde/uncore-memory.json index fa09e12018ce..f4b0745cdbbf 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellde/uncore-memory.json +++ b/tools/perf/pmu-events/arch/x86/broadwellde/uncore-memory.json @@ -19,12 +19,19 @@ "UMask": "0xC", "Unit": "iMC" }, + { + "BriefDescription": "Memory controller clock ticks", + "Counter": "0,1,2,3", + "EventName": "UNC_M_DCLOCKTICKS", + "PerPkg": "1", + "Unit": "iMC" + }, { "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode", "Counter": "0,1,2,3", "EventCode": "0x85", "EventName": "UNC_M_POWER_CHANNEL_PPD", - "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_CLOCKTICKS) * 100.", + "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_DCLOCKTICKS) * 100.", "MetricName": "power_channel_ppd %", "PerPkg": "1", "Unit": "iMC" @@ -34,7 +41,7 @@ "Counter": "0,1,2,3", "EventCode": "0x86", "EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES", - "MetricExpr": "(UNC_M_POWER_CRITICAL_THROTTLE_CYCLES / UNC_M_CLOCKTICKS) * 100.", + "MetricExpr": "(UNC_M_POWER_CRITICAL_THROTTLE_CYCLES / UNC_M_DCLOCKTICKS) * 100.", "MetricName": "power_critical_throttle_cycles %", "PerPkg": "1", "Unit": "iMC" @@ -44,7 +51,7 @@ "Counter": "0,1,2,3", "EventCode": "0x43", "EventName": "UNC_M_POWER_SELF_REFRESH", - "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_CLOCKTICKS) * 100.", + "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_DCLOCKTICKS) * 100.", "MetricName": "power_self_refresh %", "PerPkg": "1", "Unit": "iMC" -- cgit v1.2.3 From 80432c7311dbcf0c814d4923480b055a725b0be2 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 29 Mar 2017 17:12:44 -0700 Subject: perf vendor events intel: Add uncore events for Sandy Bridge client Add V15 of Sandy Bridge uncore events Cc: jolsa@kernel.org Link: http://lkml.kernel.org/n/tip-2qkwutpwljdue8jmwk3xqdbl@git.kernel.org Signed-off-by: Andi Kleen --- .../pmu-events/arch/x86/sandybridge/uncore.json | 314 +++++++++++++++++++++ 1 file changed, 314 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/sandybridge/uncore.json diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/uncore.json b/tools/perf/pmu-events/arch/x86/sandybridge/uncore.json new file mode 100644 index 000000000000..42c70eed05a2 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/sandybridge/uncore.json @@ -0,0 +1,314 @@ +[ + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x01", + "EventName": "UNC_CBO_XSNP_RESPONSE.MISS", + "BriefDescription": "A snoop misses in some processor core.", + "PublicDescription": "A snoop misses in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x02", + "EventName": "UNC_CBO_XSNP_RESPONSE.INVAL", + "BriefDescription": "A snoop invalidates a non-modified line in some processor core.", + "PublicDescription": "A snoop invalidates a non-modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x04", + "EventName": "UNC_CBO_XSNP_RESPONSE.HIT", + "BriefDescription": "A snoop hits a non-modified line in some processor core.", + "PublicDescription": "A snoop hits a non-modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x08", + "EventName": "UNC_CBO_XSNP_RESPONSE.HITM", + "BriefDescription": "A snoop hits a modified line in some processor core.", + "PublicDescription": "A snoop hits a modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x10", + "EventName": "UNC_CBO_XSNP_RESPONSE.INVAL_M", + "BriefDescription": "A snoop invalidates a modified line in some processor core.", + "PublicDescription": "A snoop invalidates a modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x20", + "EventName": "UNC_CBO_XSNP_RESPONSE.EXTERNAL_FILTER", + "BriefDescription": "Filter on cross-core snoops initiated by this Cbox due to external snoop request.", + "PublicDescription": "Filter on cross-core snoops initiated by this Cbox due to external snoop request.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x40", + "EventName": "UNC_CBO_XSNP_RESPONSE.XCORE_FILTER", + "BriefDescription": "Filter on cross-core snoops initiated by this Cbox due to processor core memory request.", + "PublicDescription": "Filter on cross-core snoops initiated by this Cbox due to processor core memory request.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x80", + "EventName": "UNC_CBO_XSNP_RESPONSE.EVICTION_FILTER", + "BriefDescription": "Filter on cross-core snoops initiated by this Cbox due to LLC eviction.", + "PublicDescription": "Filter on cross-core snoops initiated by this Cbox due to LLC eviction.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x01", + "EventName": "UNC_CBO_CACHE_LOOKUP.M", + "BriefDescription": "LLC lookup request that access cache and found line in M-state.", + "PublicDescription": "LLC lookup request that access cache and found line in M-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x02", + "EventName": "UNC_CBO_CACHE_LOOKUP.E", + "BriefDescription": "LLC lookup request that access cache and found line in E-state.", + "PublicDescription": "LLC lookup request that access cache and found line in E-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x04", + "EventName": "UNC_CBO_CACHE_LOOKUP.S", + "BriefDescription": "LLC lookup request that access cache and found line in S-state.", + "PublicDescription": "LLC lookup request that access cache and found line in S-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x08", + "EventName": "UNC_CBO_CACHE_LOOKUP.I", + "BriefDescription": "LLC lookup request that access cache and found line in I-state.", + "PublicDescription": "LLC lookup request that access cache and found line in I-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x10", + "EventName": "UNC_CBO_CACHE_LOOKUP.READ_FILTER", + "BriefDescription": "Filter on processor core initiated cacheable read requests.", + "PublicDescription": "Filter on processor core initiated cacheable read requests.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x20", + "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_FILTER", + "BriefDescription": "Filter on processor core initiated cacheable write requests.", + "PublicDescription": "Filter on processor core initiated cacheable write requests.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x40", + "EventName": "UNC_CBO_CACHE_LOOKUP.EXTSNP_FILTER", + "BriefDescription": "Filter on external snoop requests.", + "PublicDescription": "Filter on external snoop requests.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x80", + "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_REQUEST_FILTER", + "BriefDescription": "Filter on any IRQ or IPQ initiated requests including uncacheable, non-coherent requests.", + "PublicDescription": "Filter on any IRQ or IPQ initiated requests including uncacheable, non-coherent requests.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x80", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL", + "BriefDescription": "Counts cycles weighted by the number of requests waiting for data returning from the memory controller. Accounts for coherent and non-coherent requests initiated by IA cores, processor graphic units, or LLC.", + "PublicDescription": "Counts cycles weighted by the number of requests waiting for data returning from the memory controller. Accounts for coherent and non-coherent requests initiated by IA cores, processor graphic units, or LLC.", + "Counter": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x81", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_REQUESTS.ALL", + "BriefDescription": "Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC.", + "PublicDescription": "Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x81", + "UMask": "0x20", + "EventName": "UNC_ARB_TRK_REQUESTS.WRITES", + "BriefDescription": "Counts the number of allocated write entries, include full, partial, and LLC evictions.", + "PublicDescription": "Counts the number of allocated write entries, include full, partial, and LLC evictions.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x81", + "UMask": "0x80", + "EventName": "UNC_ARB_TRK_REQUESTS.EVICTIONS", + "BriefDescription": "Counts the number of LLC evictions allocated.", + "PublicDescription": "Counts the number of LLC evictions allocated.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x83", + "UMask": "0x01", + "EventName": "UNC_ARB_COH_TRK_OCCUPANCY.ALL", + "BriefDescription": "Cycles weighted by number of requests pending in Coherency Tracker.", + "PublicDescription": "Cycles weighted by number of requests pending in Coherency Tracker.", + "Counter": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x84", + "UMask": "0x01", + "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL", + "BriefDescription": "Number of requests allocated in Coherency Tracker.", + "PublicDescription": "Number of requests allocated in Coherency Tracker.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x80", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST", + "BriefDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.", + "PublicDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.", + "Counter": "0,1", + "CounterMask": "1", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x80", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_OCCUPANCY.CYCLES_OVER_HALF_FULL", + "BriefDescription": "Cycles with at least half of the requests outstanding are waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.", + "PublicDescription": "Cycles with at least half of the requests outstanding are waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.", + "Counter": "0,1", + "CounterMask": "10", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x0", + "UMask": "0x01", + "EventName": "UNC_CLOCK.SOCKET", + "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles.", + "PublicDescription": "This 48-bit fixed counter counts the UCLK cycles.", + "Counter": "Fixed", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x06", + "EventName": "UNC_CBO_CACHE_LOOKUP.ES", + "BriefDescription": "LLC lookup request that access cache and found line in E-state or S-state.", + "PublicDescription": "LLC lookup request that access cache and found line in E-state or S-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + } +] \ No newline at end of file -- cgit v1.2.3 From bccdcb2a77ba0bef17baf152179e30ca35459a0c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 29 Mar 2017 17:14:02 -0700 Subject: perf vendor events intel: Add uncore events for Ivy Bridge client Add V18 of Ivy Bridge uncore events Cc: jolsa@kernel.org Link: http://lkml.kernel.org/n/tip-299k76asec5rwp0i86qygnnt@git.kernel.org Signed-off-by: Andi Kleen --- .../perf/pmu-events/arch/x86/ivybridge/uncore.json | 314 +++++++++++++++++++++ 1 file changed, 314 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/ivybridge/uncore.json diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/uncore.json b/tools/perf/pmu-events/arch/x86/ivybridge/uncore.json new file mode 100644 index 000000000000..42c70eed05a2 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/ivybridge/uncore.json @@ -0,0 +1,314 @@ +[ + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x01", + "EventName": "UNC_CBO_XSNP_RESPONSE.MISS", + "BriefDescription": "A snoop misses in some processor core.", + "PublicDescription": "A snoop misses in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x02", + "EventName": "UNC_CBO_XSNP_RESPONSE.INVAL", + "BriefDescription": "A snoop invalidates a non-modified line in some processor core.", + "PublicDescription": "A snoop invalidates a non-modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x04", + "EventName": "UNC_CBO_XSNP_RESPONSE.HIT", + "BriefDescription": "A snoop hits a non-modified line in some processor core.", + "PublicDescription": "A snoop hits a non-modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x08", + "EventName": "UNC_CBO_XSNP_RESPONSE.HITM", + "BriefDescription": "A snoop hits a modified line in some processor core.", + "PublicDescription": "A snoop hits a modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x10", + "EventName": "UNC_CBO_XSNP_RESPONSE.INVAL_M", + "BriefDescription": "A snoop invalidates a modified line in some processor core.", + "PublicDescription": "A snoop invalidates a modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x20", + "EventName": "UNC_CBO_XSNP_RESPONSE.EXTERNAL_FILTER", + "BriefDescription": "Filter on cross-core snoops initiated by this Cbox due to external snoop request.", + "PublicDescription": "Filter on cross-core snoops initiated by this Cbox due to external snoop request.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x40", + "EventName": "UNC_CBO_XSNP_RESPONSE.XCORE_FILTER", + "BriefDescription": "Filter on cross-core snoops initiated by this Cbox due to processor core memory request.", + "PublicDescription": "Filter on cross-core snoops initiated by this Cbox due to processor core memory request.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x80", + "EventName": "UNC_CBO_XSNP_RESPONSE.EVICTION_FILTER", + "BriefDescription": "Filter on cross-core snoops initiated by this Cbox due to LLC eviction.", + "PublicDescription": "Filter on cross-core snoops initiated by this Cbox due to LLC eviction.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x01", + "EventName": "UNC_CBO_CACHE_LOOKUP.M", + "BriefDescription": "LLC lookup request that access cache and found line in M-state.", + "PublicDescription": "LLC lookup request that access cache and found line in M-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x02", + "EventName": "UNC_CBO_CACHE_LOOKUP.E", + "BriefDescription": "LLC lookup request that access cache and found line in E-state.", + "PublicDescription": "LLC lookup request that access cache and found line in E-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x04", + "EventName": "UNC_CBO_CACHE_LOOKUP.S", + "BriefDescription": "LLC lookup request that access cache and found line in S-state.", + "PublicDescription": "LLC lookup request that access cache and found line in S-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x08", + "EventName": "UNC_CBO_CACHE_LOOKUP.I", + "BriefDescription": "LLC lookup request that access cache and found line in I-state.", + "PublicDescription": "LLC lookup request that access cache and found line in I-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x10", + "EventName": "UNC_CBO_CACHE_LOOKUP.READ_FILTER", + "BriefDescription": "Filter on processor core initiated cacheable read requests.", + "PublicDescription": "Filter on processor core initiated cacheable read requests.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x20", + "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_FILTER", + "BriefDescription": "Filter on processor core initiated cacheable write requests.", + "PublicDescription": "Filter on processor core initiated cacheable write requests.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x40", + "EventName": "UNC_CBO_CACHE_LOOKUP.EXTSNP_FILTER", + "BriefDescription": "Filter on external snoop requests.", + "PublicDescription": "Filter on external snoop requests.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x80", + "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_REQUEST_FILTER", + "BriefDescription": "Filter on any IRQ or IPQ initiated requests including uncacheable, non-coherent requests.", + "PublicDescription": "Filter on any IRQ or IPQ initiated requests including uncacheable, non-coherent requests.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x80", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL", + "BriefDescription": "Counts cycles weighted by the number of requests waiting for data returning from the memory controller. Accounts for coherent and non-coherent requests initiated by IA cores, processor graphic units, or LLC.", + "PublicDescription": "Counts cycles weighted by the number of requests waiting for data returning from the memory controller. Accounts for coherent and non-coherent requests initiated by IA cores, processor graphic units, or LLC.", + "Counter": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x81", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_REQUESTS.ALL", + "BriefDescription": "Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC.", + "PublicDescription": "Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x81", + "UMask": "0x20", + "EventName": "UNC_ARB_TRK_REQUESTS.WRITES", + "BriefDescription": "Counts the number of allocated write entries, include full, partial, and LLC evictions.", + "PublicDescription": "Counts the number of allocated write entries, include full, partial, and LLC evictions.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x81", + "UMask": "0x80", + "EventName": "UNC_ARB_TRK_REQUESTS.EVICTIONS", + "BriefDescription": "Counts the number of LLC evictions allocated.", + "PublicDescription": "Counts the number of LLC evictions allocated.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x83", + "UMask": "0x01", + "EventName": "UNC_ARB_COH_TRK_OCCUPANCY.ALL", + "BriefDescription": "Cycles weighted by number of requests pending in Coherency Tracker.", + "PublicDescription": "Cycles weighted by number of requests pending in Coherency Tracker.", + "Counter": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x84", + "UMask": "0x01", + "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL", + "BriefDescription": "Number of requests allocated in Coherency Tracker.", + "PublicDescription": "Number of requests allocated in Coherency Tracker.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x80", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST", + "BriefDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.", + "PublicDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.", + "Counter": "0,1", + "CounterMask": "1", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x80", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_OCCUPANCY.CYCLES_OVER_HALF_FULL", + "BriefDescription": "Cycles with at least half of the requests outstanding are waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.", + "PublicDescription": "Cycles with at least half of the requests outstanding are waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.", + "Counter": "0,1", + "CounterMask": "10", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x0", + "UMask": "0x01", + "EventName": "UNC_CLOCK.SOCKET", + "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles.", + "PublicDescription": "This 48-bit fixed counter counts the UCLK cycles.", + "Counter": "Fixed", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x06", + "EventName": "UNC_CBO_CACHE_LOOKUP.ES", + "BriefDescription": "LLC lookup request that access cache and found line in E-state or S-state.", + "PublicDescription": "LLC lookup request that access cache and found line in E-state or S-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + } +] \ No newline at end of file -- cgit v1.2.3 From 0585c6265e66f952bcb6280cf078e5e120bd367a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 29 Mar 2017 17:17:02 -0700 Subject: perf vendor events intel: Add uncore events for Haswell client Add V25 of Haswell uncore events Cc: jolsa@kernel.org Link: http://lkml.kernel.org/n/tip-133r1do7vvssoyszxgx174hj@git.kernel.org Signed-off-by: Andi Kleen --- tools/perf/pmu-events/arch/x86/haswell/uncore.json | 374 +++++++++++++++++++++ 1 file changed, 374 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/haswell/uncore.json diff --git a/tools/perf/pmu-events/arch/x86/haswell/uncore.json b/tools/perf/pmu-events/arch/x86/haswell/uncore.json new file mode 100644 index 000000000000..3ef5c21fef56 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/haswell/uncore.json @@ -0,0 +1,374 @@ +[ + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x21", + "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_EXTERNAL", + "BriefDescription": "An external snoop misses in some processor core.", + "PublicDescription": "An external snoop misses in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x41", + "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_XCORE", + "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core.", + "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x81", + "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_EVICTION", + "BriefDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.", + "PublicDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x24", + "EventName": "UNC_CBO_XSNP_RESPONSE.HIT_EXTERNAL", + "BriefDescription": "An external snoop hits a non-modified line in some processor core.", + "PublicDescription": "An external snoop hits a non-modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x44", + "EventName": "UNC_CBO_XSNP_RESPONSE.HIT_XCORE", + "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core.", + "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x84", + "EventName": "UNC_CBO_XSNP_RESPONSE.HIT_EVICTION", + "BriefDescription": "A cross-core snoop resulted from L3 Eviction which hits a non-modified line in some processor core.", + "PublicDescription": "A cross-core snoop resulted from L3 Eviction which hits a non-modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x28", + "EventName": "UNC_CBO_XSNP_RESPONSE.HITM_EXTERNAL", + "BriefDescription": "An external snoop hits a modified line in some processor core.", + "PublicDescription": "An external snoop hits a modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x48", + "EventName": "UNC_CBO_XSNP_RESPONSE.HITM_XCORE", + "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core.", + "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x88", + "EventName": "UNC_CBO_XSNP_RESPONSE.HITM_EVICTION", + "BriefDescription": "A cross-core snoop resulted from L3 Eviction which hits a modified line in some processor core.", + "PublicDescription": "A cross-core snoop resulted from L3 Eviction which hits a modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x11", + "EventName": "UNC_CBO_CACHE_LOOKUP.READ_M", + "BriefDescription": "L3 Lookup read request that access cache and found line in M-state.", + "PublicDescription": "L3 Lookup read request that access cache and found line in M-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x21", + "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_M", + "BriefDescription": "L3 Lookup write request that access cache and found line in M-state.", + "PublicDescription": "L3 Lookup write request that access cache and found line in M-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x41", + "EventName": "UNC_CBO_CACHE_LOOKUP.EXTSNP_M", + "BriefDescription": "L3 Lookup external snoop request that access cache and found line in M-state.", + "PublicDescription": "L3 Lookup external snoop request that access cache and found line in M-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x81", + "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_M", + "BriefDescription": "L3 Lookup any request that access cache and found line in M-state.", + "PublicDescription": "L3 Lookup any request that access cache and found line in M-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x18", + "EventName": "UNC_CBO_CACHE_LOOKUP.READ_I", + "BriefDescription": "L3 Lookup read request that access cache and found line in I-state.", + "PublicDescription": "L3 Lookup read request that access cache and found line in I-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x28", + "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_I", + "BriefDescription": "L3 Lookup write request that access cache and found line in I-state.", + "PublicDescription": "L3 Lookup write request that access cache and found line in I-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x48", + "EventName": "UNC_CBO_CACHE_LOOKUP.EXTSNP_I", + "BriefDescription": "L3 Lookup external snoop request that access cache and found line in I-state.", + "PublicDescription": "L3 Lookup external snoop request that access cache and found line in I-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x88", + "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_I", + "BriefDescription": "L3 Lookup any request that access cache and found line in I-state.", + "PublicDescription": "L3 Lookup any request that access cache and found line in I-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x1f", + "EventName": "UNC_CBO_CACHE_LOOKUP.READ_MESI", + "BriefDescription": "L3 Lookup read request that access cache and found line in any MESI-state.", + "PublicDescription": "L3 Lookup read request that access cache and found line in any MESI-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x2f", + "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_MESI", + "BriefDescription": "L3 Lookup write request that access cache and found line in MESI-state.", + "PublicDescription": "L3 Lookup write request that access cache and found line in MESI-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x4f", + "EventName": "UNC_CBO_CACHE_LOOKUP.EXTSNP_MESI", + "BriefDescription": "L3 Lookup external snoop request that access cache and found line in MESI-state.", + "PublicDescription": "L3 Lookup external snoop request that access cache and found line in MESI-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x8f", + "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_MESI", + "BriefDescription": "L3 Lookup any request that access cache and found line in MESI-state.", + "PublicDescription": "L3 Lookup any request that access cache and found line in MESI-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x86", + "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_ES", + "BriefDescription": "L3 Lookup any request that access cache and found line in E or S-state.", + "PublicDescription": "L3 Lookup any request that access cache and found line in E or S-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x46", + "EventName": "UNC_CBO_CACHE_LOOKUP.EXTSNP_ES", + "BriefDescription": "L3 Lookup external snoop request that access cache and found line in E or S-state.", + "PublicDescription": "L3 Lookup external snoop request that access cache and found line in E or S-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x16", + "EventName": "UNC_CBO_CACHE_LOOKUP.READ_ES", + "BriefDescription": "L3 Lookup read request that access cache and found line in E or S-state.", + "PublicDescription": "L3 Lookup read request that access cache and found line in E or S-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x26", + "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_ES", + "BriefDescription": "L3 Lookup write request that access cache and found line in E or S-state.", + "PublicDescription": "L3 Lookup write request that access cache and found line in E or S-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x80", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL", + "BriefDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from it's allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.", + "PublicDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from it's allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.", + "Counter": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x81", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_REQUESTS.ALL", + "BriefDescription": "Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic.", + "PublicDescription": "Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x81", + "UMask": "0x20", + "EventName": "UNC_ARB_TRK_REQUESTS.WRITES", + "BriefDescription": "Number of Writes allocated - any write transactions: full/partials writes and evictions.", + "PublicDescription": "Number of Writes allocated - any write transactions: full/partials writes and evictions.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x83", + "UMask": "0x01", + "EventName": "UNC_ARB_COH_TRK_OCCUPANCY.All", + "BriefDescription": "Each cycle count number of valid entries in Coherency Tracker queue from allocation till deallocation. Aperture requests (snoops) appear as NC decoded internally and become coherent (snoop L3, access memory)", + "PublicDescription": "Each cycle count number of valid entries in Coherency Tracker queue from allocation till deallocation. Aperture requests (snoops) appear as NC decoded internally and become coherent (snoop L3, access memory).", + "Counter": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x84", + "UMask": "0x01", + "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL", + "BriefDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc.", + "PublicDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "NCU", + "EventCode": "0x0", + "UMask": "0x01", + "EventName": "UNC_CLOCK.SOCKET", + "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles.", + "PublicDescription": "This 48-bit fixed counter counts the UCLK cycles.", + "Counter": "FIXED", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + } +] \ No newline at end of file -- cgit v1.2.3 From 092a95d41655bdd31d7d28f1788818724505feb2 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 29 Mar 2017 17:17:42 -0700 Subject: perf vendor events intel: Add uncore events for Broadwell client Add V18 of Broadwell uncore events Cc: jolsa@kernel.org Link: http://lkml.kernel.org/n/tip-xlbguqdzho7l3qn7di40a7av@git.kernel.org Signed-off-by: Andi Kleen --- .../perf/pmu-events/arch/x86/broadwell/uncore.json | 278 +++++++++++++++++++++ 1 file changed, 278 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/broadwell/uncore.json diff --git a/tools/perf/pmu-events/arch/x86/broadwell/uncore.json b/tools/perf/pmu-events/arch/x86/broadwell/uncore.json new file mode 100644 index 000000000000..28e1e159a3cb --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/broadwell/uncore.json @@ -0,0 +1,278 @@ +[ + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x41", + "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_XCORE", + "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core.", + "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x81", + "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_EVICTION", + "BriefDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.", + "PublicDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x44", + "EventName": "UNC_CBO_XSNP_RESPONSE.HIT_XCORE", + "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core.", + "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x48", + "EventName": "UNC_CBO_XSNP_RESPONSE.HITM_XCORE", + "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core.", + "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x11", + "EventName": "UNC_CBO_CACHE_LOOKUP.READ_M", + "BriefDescription": "L3 Lookup read request that access cache and found line in M-state", + "PublicDescription": "L3 Lookup read request that access cache and found line in M-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x21", + "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_M", + "BriefDescription": "L3 Lookup write request that access cache and found line in M-state", + "PublicDescription": "L3 Lookup write request that access cache and found line in M-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x81", + "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_M", + "BriefDescription": "L3 Lookup any request that access cache and found line in M-state", + "PublicDescription": "L3 Lookup any request that access cache and found line in M-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x18", + "EventName": "UNC_CBO_CACHE_LOOKUP.READ_I", + "BriefDescription": "L3 Lookup read request that access cache and found line in I-state", + "PublicDescription": "L3 Lookup read request that access cache and found line in I-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x88", + "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_I", + "BriefDescription": "L3 Lookup any request that access cache and found line in I-state", + "PublicDescription": "L3 Lookup any request that access cache and found line in I-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x1f", + "EventName": "UNC_CBO_CACHE_LOOKUP.READ_MESI", + "BriefDescription": "L3 Lookup read request that access cache and found line in any MESI-state", + "PublicDescription": "L3 Lookup read request that access cache and found line in any MESI-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x2f", + "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_MESI", + "BriefDescription": "L3 Lookup write request that access cache and found line in MESI-state", + "PublicDescription": "L3 Lookup write request that access cache and found line in MESI-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x8f", + "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_MESI", + "BriefDescription": "L3 Lookup any request that access cache and found line in MESI-state", + "PublicDescription": "L3 Lookup any request that access cache and found line in MESI-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x86", + "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_ES", + "BriefDescription": "L3 Lookup any request that access cache and found line in E or S-state", + "PublicDescription": "L3 Lookup any request that access cache and found line in E or S-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x16", + "EventName": "UNC_CBO_CACHE_LOOKUP.READ_ES", + "BriefDescription": "L3 Lookup read request that access cache and found line in E or S-state", + "PublicDescription": "L3 Lookup read request that access cache and found line in E or S-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x26", + "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_ES", + "BriefDescription": "L3 Lookup write request that access cache and found line in E or S-state", + "PublicDescription": "L3 Lookup write request that access cache and found line in E or S-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x80", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL", + "BriefDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from it's allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.", + "PublicDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from it's allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.", + "Counter": "0,", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x80", + "UMask": "0x02", + "EventName": "UNC_ARB_TRK_OCCUPANCY.DRD_DIRECT", + "BriefDescription": "Each cycle count number of 'valid' coherent Data Read entries that are in DirectData mode. Such entry is defined as valid when it is allocated till data sent to Core (first chunk, IDI0). Applicable for IA Cores' requests in normal case.", + "PublicDescription": "Each cycle count number of 'valid' coherent Data Read entries that are in DirectData mode. Such entry is defined as valid when it is allocated till data sent to Core (first chunk, IDI0). Applicable for IA Cores' requests in normal case.", + "Counter": "0,", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x81", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_REQUESTS.ALL", + "BriefDescription": "Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic.", + "PublicDescription": "Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x81", + "UMask": "0x02", + "EventName": "UNC_ARB_TRK_REQUESTS.DRD_DIRECT", + "BriefDescription": "Number of Core coherent Data Read entries allocated in DirectData mode", + "PublicDescription": "Number of Core coherent Data Read entries allocated in DirectData mode.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x81", + "UMask": "0x20", + "EventName": "UNC_ARB_TRK_REQUESTS.WRITES", + "BriefDescription": "Number of Writes allocated - any write transactions: full/partials writes and evictions.", + "PublicDescription": "Number of Writes allocated - any write transactions: full/partials writes and evictions.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x84", + "UMask": "0x01", + "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL", + "BriefDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc.", + "PublicDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x80", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST", + "BriefDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.;", + "PublicDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.", + "Counter": "0,", + "CounterMask": "1", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "NCU", + "EventCode": "0x0", + "UMask": "0x01", + "EventName": "UNC_CLOCK.SOCKET", + "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles", + "PublicDescription": "This 48-bit fixed counter counts the UCLK cycles.", + "Counter": "FIXED", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + } +] \ No newline at end of file -- cgit v1.2.3 From 92c6de0f10a80e4936fac04148bd3783a7c2b9f8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 29 Mar 2017 17:18:15 -0700 Subject: perf vendor events intel: Add uncore events for Skylake client Add V25 of Skylake uncore events Cc: jolsa@kernel.org Link: http://lkml.kernel.org/n/tip-00qmcrmq183x2qrj59g92fma@git.kernel.org Signed-off-by: Andi Kleen --- tools/perf/pmu-events/arch/x86/skylake/uncore.json | 254 +++++++++++++++++++++ 1 file changed, 254 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/skylake/uncore.json diff --git a/tools/perf/pmu-events/arch/x86/skylake/uncore.json b/tools/perf/pmu-events/arch/x86/skylake/uncore.json new file mode 100644 index 000000000000..dbc193252fb3 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/skylake/uncore.json @@ -0,0 +1,254 @@ +[ + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x41", + "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_XCORE", + "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core.", + "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x81", + "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_EVICTION", + "BriefDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.", + "PublicDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x44", + "EventName": "UNC_CBO_XSNP_RESPONSE.HIT_XCORE", + "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core.", + "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x48", + "EventName": "UNC_CBO_XSNP_RESPONSE.HITM_XCORE", + "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core.", + "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x21", + "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_M", + "BriefDescription": "L3 Lookup write request that access cache and found line in M-state", + "PublicDescription": "L3 Lookup write request that access cache and found line in M-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x81", + "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_M", + "BriefDescription": "L3 Lookup any request that access cache and found line in M-state", + "PublicDescription": "L3 Lookup any request that access cache and found line in M-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x18", + "EventName": "UNC_CBO_CACHE_LOOKUP.READ_I", + "BriefDescription": "L3 Lookup read request that access cache and found line in I-state", + "PublicDescription": "L3 Lookup read request that access cache and found line in I-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x88", + "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_I", + "BriefDescription": "L3 Lookup any request that access cache and found line in I-state", + "PublicDescription": "L3 Lookup any request that access cache and found line in I-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x1f", + "EventName": "UNC_CBO_CACHE_LOOKUP.READ_MESI", + "BriefDescription": "L3 Lookup read request that access cache and found line in any MESI-state", + "PublicDescription": "L3 Lookup read request that access cache and found line in any MESI-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x2f", + "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_MESI", + "BriefDescription": "L3 Lookup write request that access cache and found line in MESI-state", + "PublicDescription": "L3 Lookup write request that access cache and found line in MESI-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x8f", + "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_MESI", + "BriefDescription": "L3 Lookup any request that access cache and found line in MESI-state", + "PublicDescription": "L3 Lookup any request that access cache and found line in MESI-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x86", + "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_ES", + "BriefDescription": "L3 Lookup any request that access cache and found line in E or S-state", + "PublicDescription": "L3 Lookup any request that access cache and found line in E or S-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x16", + "EventName": "UNC_CBO_CACHE_LOOKUP.READ_ES", + "BriefDescription": "L3 Lookup read request that access cache and found line in E or S-state", + "PublicDescription": "L3 Lookup read request that access cache and found line in E or S-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x26", + "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_ES", + "BriefDescription": "L3 Lookup write request that access cache and found line in E or S-state", + "PublicDescription": "L3 Lookup write request that access cache and found line in E or S-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x80", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL", + "BriefDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from its allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.", + "PublicDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from its allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.", + "Counter": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x81", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_REQUESTS.ALL", + "BriefDescription": "Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic.", + "PublicDescription": "Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x81", + "UMask": "0x02", + "EventName": "UNC_ARB_TRK_REQUESTS.DRD_DIRECT", + "BriefDescription": "Number of Core coherent Data Read entries allocated in DirectData mode", + "PublicDescription": "Number of Core coherent Data Read entries allocated in DirectData mode.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x81", + "UMask": "0x20", + "EventName": "UNC_ARB_TRK_REQUESTS.WRITES", + "BriefDescription": "Number of Writes allocated - any write transactions: full/partials writes and evictions.", + "PublicDescription": "Number of Writes allocated - any write transactions: full/partials writes and evictions.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x84", + "UMask": "0x01", + "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL", + "BriefDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc.", + "PublicDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x80", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST", + "BriefDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.;", + "PublicDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.", + "Counter": "0", + "CounterMask": "1", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "NCU", + "EventCode": "0x0", + "UMask": "0x01", + "EventName": "UNC_CLOCK.SOCKET", + "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles", + "PublicDescription": "This 48-bit fixed counter counts the UCLK cycles.", + "Counter": "FIXED", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + } +] \ No newline at end of file -- cgit v1.2.3 From af34cb4fad1ba08db199ef1b0a529549e041dd25 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 29 Mar 2017 17:20:28 -0700 Subject: perf vendor events intel: Add uncore_arb JSON support The JSON lists call the box iMPH-U, while perf calls it arb. Add conversion support to json to convert the unit properly. Cc: jolsa@kernel.org Link: http://lkml.kernel.org/n/tip-stq5ly95z2qioggp9bfaqe0h@git.kernel.org Signed-off-by: Andi Kleen --- tools/perf/pmu-events/jevents.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 81f2ef3b15cf..3a151c35852d 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -195,6 +195,7 @@ static struct map { { "CBO", "uncore_cbox" }, { "QPI LL", "uncore_qpi" }, { "SBO", "uncore_sbox" }, + { "iMPH-U", "uncore_arb" }, {} }; -- cgit v1.2.3 From 3401e8d1e1300742ed41910b9338b9da52689a16 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 29 Mar 2017 17:22:18 -0700 Subject: perf vendor events intel: Add missing space in json descriptions Add a missing space in the JSON description after the uncore unit Before: perf list ... unc_arb_coh_trk_requests.all [Unit: uncore_arbNumber of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc] ... After: unc_arb_coh_trk_requests.all [Unit: uncore_arb Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc] Cc: jolsa@kernel.org Link: http://lkml.kernel.org/n/tip-p989c7x9kaiy2bnkmgpo6cvt@git.kernel.org Signed-off-by: Andi Kleen --- tools/perf/pmu-events/jevents.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 3a151c35852d..baa073f38334 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -469,6 +469,7 @@ int json_events(const char *fn, } addfield(map, &desc, ". ", "Unit: ", NULL); addfield(map, &desc, "", pmu, NULL); + addfield(map, &desc, "", " ", NULL); } else if (json_streq(map, field, "Filter")) { addfield(map, &filter, "", "", val); } else if (json_streq(map, field, "ScaleUnit")) { -- cgit v1.2.3 From 67ef28794d7e30f33936d655f2951e8dcae7cd5a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 30 Mar 2017 11:16:59 -0300 Subject: tools include uapi: Grab copies of stat.h and fcntl.h We will need it to build tools/perf/trace/beauty/statx.h. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-nin41ve2fa63lrfbdr6x57yr@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/types.h | 1 + tools/include/uapi/linux/fcntl.h | 72 ++++++++++++++++ tools/include/uapi/linux/stat.h | 176 +++++++++++++++++++++++++++++++++++++++ tools/perf/MANIFEST | 2 + tools/perf/check-headers.sh | 2 + 5 files changed, 253 insertions(+) create mode 100644 tools/include/uapi/linux/fcntl.h create mode 100644 tools/include/uapi/linux/stat.h diff --git a/tools/include/linux/types.h b/tools/include/linux/types.h index c24b3e3ae296..77a28a26a670 100644 --- a/tools/include/linux/types.h +++ b/tools/include/linux/types.h @@ -7,6 +7,7 @@ #define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */ #include +#include struct page; struct kmem_cache; diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h new file mode 100644 index 000000000000..813afd6eee71 --- /dev/null +++ b/tools/include/uapi/linux/fcntl.h @@ -0,0 +1,72 @@ +#ifndef _UAPI_LINUX_FCNTL_H +#define _UAPI_LINUX_FCNTL_H + +#include + +#define F_SETLEASE (F_LINUX_SPECIFIC_BASE + 0) +#define F_GETLEASE (F_LINUX_SPECIFIC_BASE + 1) + +/* + * Cancel a blocking posix lock; internal use only until we expose an + * asynchronous lock api to userspace: + */ +#define F_CANCELLK (F_LINUX_SPECIFIC_BASE + 5) + +/* Create a file descriptor with FD_CLOEXEC set. */ +#define F_DUPFD_CLOEXEC (F_LINUX_SPECIFIC_BASE + 6) + +/* + * Request nofications on a directory. + * See below for events that may be notified. + */ +#define F_NOTIFY (F_LINUX_SPECIFIC_BASE+2) + +/* + * Set and get of pipe page size array + */ +#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7) +#define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8) + +/* + * Set/Get seals + */ +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) + +/* + * Types of seals + */ +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ +#define F_SEAL_WRITE 0x0008 /* prevent writes */ +/* (1U << 31) is reserved for signed error codes */ + +/* + * Types of directory notifications that may be requested. + */ +#define DN_ACCESS 0x00000001 /* File accessed */ +#define DN_MODIFY 0x00000002 /* File modified */ +#define DN_CREATE 0x00000004 /* File created */ +#define DN_DELETE 0x00000008 /* File removed */ +#define DN_RENAME 0x00000010 /* File renamed */ +#define DN_ATTRIB 0x00000020 /* File changed attibutes */ +#define DN_MULTISHOT 0x80000000 /* Don't remove notifier */ + +#define AT_FDCWD -100 /* Special value used to indicate + openat should use the current + working directory. */ +#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */ +#define AT_REMOVEDIR 0x200 /* Remove directory instead of + unlinking file. */ +#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */ +#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */ +#define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */ + +#define AT_STATX_SYNC_TYPE 0x6000 /* Type of synchronisation required from statx() */ +#define AT_STATX_SYNC_AS_STAT 0x0000 /* - Do whatever stat() does */ +#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */ +#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */ + + +#endif /* _UAPI_LINUX_FCNTL_H */ diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h new file mode 100644 index 000000000000..51a6b86e3700 --- /dev/null +++ b/tools/include/uapi/linux/stat.h @@ -0,0 +1,176 @@ +#ifndef _UAPI_LINUX_STAT_H +#define _UAPI_LINUX_STAT_H + +#include + +#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) + +#define S_IFMT 00170000 +#define S_IFSOCK 0140000 +#define S_IFLNK 0120000 +#define S_IFREG 0100000 +#define S_IFBLK 0060000 +#define S_IFDIR 0040000 +#define S_IFCHR 0020000 +#define S_IFIFO 0010000 +#define S_ISUID 0004000 +#define S_ISGID 0002000 +#define S_ISVTX 0001000 + +#define S_ISLNK(m) (((m) & S_IFMT) == S_IFLNK) +#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) +#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) +#define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR) +#define S_ISBLK(m) (((m) & S_IFMT) == S_IFBLK) +#define S_ISFIFO(m) (((m) & S_IFMT) == S_IFIFO) +#define S_ISSOCK(m) (((m) & S_IFMT) == S_IFSOCK) + +#define S_IRWXU 00700 +#define S_IRUSR 00400 +#define S_IWUSR 00200 +#define S_IXUSR 00100 + +#define S_IRWXG 00070 +#define S_IRGRP 00040 +#define S_IWGRP 00020 +#define S_IXGRP 00010 + +#define S_IRWXO 00007 +#define S_IROTH 00004 +#define S_IWOTH 00002 +#define S_IXOTH 00001 + +#endif + +/* + * Timestamp structure for the timestamps in struct statx. + * + * tv_sec holds the number of seconds before (negative) or after (positive) + * 00:00:00 1st January 1970 UTC. + * + * tv_nsec holds a number of nanoseconds before (0..-999,999,999 if tv_sec is + * negative) or after (0..999,999,999 if tv_sec is positive) the tv_sec time. + * + * Note that if both tv_sec and tv_nsec are non-zero, then the two values must + * either be both positive or both negative. + * + * __reserved is held in case we need a yet finer resolution. + */ +struct statx_timestamp { + __s64 tv_sec; + __s32 tv_nsec; + __s32 __reserved; +}; + +/* + * Structures for the extended file attribute retrieval system call + * (statx()). + * + * The caller passes a mask of what they're specifically interested in as a + * parameter to statx(). What statx() actually got will be indicated in + * st_mask upon return. + * + * For each bit in the mask argument: + * + * - if the datum is not supported: + * + * - the bit will be cleared, and + * + * - the datum will be set to an appropriate fabricated value if one is + * available (eg. CIFS can take a default uid and gid), otherwise + * + * - the field will be cleared; + * + * - otherwise, if explicitly requested: + * + * - the datum will be synchronised to the server if AT_STATX_FORCE_SYNC is + * set or if the datum is considered out of date, and + * + * - the field will be filled in and the bit will be set; + * + * - otherwise, if not requested, but available in approximate form without any + * effort, it will be filled in anyway, and the bit will be set upon return + * (it might not be up to date, however, and no attempt will be made to + * synchronise the internal state first); + * + * - otherwise the field and the bit will be cleared before returning. + * + * Items in STATX_BASIC_STATS may be marked unavailable on return, but they + * will have values installed for compatibility purposes so that stat() and + * co. can be emulated in userspace. + */ +struct statx { + /* 0x00 */ + __u32 stx_mask; /* What results were written [uncond] */ + __u32 stx_blksize; /* Preferred general I/O size [uncond] */ + __u64 stx_attributes; /* Flags conveying information about the file [uncond] */ + /* 0x10 */ + __u32 stx_nlink; /* Number of hard links */ + __u32 stx_uid; /* User ID of owner */ + __u32 stx_gid; /* Group ID of owner */ + __u16 stx_mode; /* File mode */ + __u16 __spare0[1]; + /* 0x20 */ + __u64 stx_ino; /* Inode number */ + __u64 stx_size; /* File size */ + __u64 stx_blocks; /* Number of 512-byte blocks allocated */ + __u64 __spare1[1]; + /* 0x40 */ + struct statx_timestamp stx_atime; /* Last access time */ + struct statx_timestamp stx_btime; /* File creation time */ + struct statx_timestamp stx_ctime; /* Last attribute change time */ + struct statx_timestamp stx_mtime; /* Last data modification time */ + /* 0x80 */ + __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */ + __u32 stx_rdev_minor; + __u32 stx_dev_major; /* ID of device containing file [uncond] */ + __u32 stx_dev_minor; + /* 0x90 */ + __u64 __spare2[14]; /* Spare space for future expansion */ + /* 0x100 */ +}; + +/* + * Flags to be stx_mask + * + * Query request/result mask for statx() and struct statx::stx_mask. + * + * These bits should be set in the mask argument of statx() to request + * particular items when calling statx(). + */ +#define STATX_TYPE 0x00000001U /* Want/got stx_mode & S_IFMT */ +#define STATX_MODE 0x00000002U /* Want/got stx_mode & ~S_IFMT */ +#define STATX_NLINK 0x00000004U /* Want/got stx_nlink */ +#define STATX_UID 0x00000008U /* Want/got stx_uid */ +#define STATX_GID 0x00000010U /* Want/got stx_gid */ +#define STATX_ATIME 0x00000020U /* Want/got stx_atime */ +#define STATX_MTIME 0x00000040U /* Want/got stx_mtime */ +#define STATX_CTIME 0x00000080U /* Want/got stx_ctime */ +#define STATX_INO 0x00000100U /* Want/got stx_ino */ +#define STATX_SIZE 0x00000200U /* Want/got stx_size */ +#define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */ +#define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */ +#define STATX_BTIME 0x00000800U /* Want/got stx_btime */ +#define STATX_ALL 0x00000fffU /* All currently supported flags */ + +/* + * Attributes to be found in stx_attributes + * + * These give information about the features or the state of a file that might + * be of use to ordinary userspace programs such as GUIs or ls rather than + * specialised tools. + * + * Note that the flags marked [I] correspond to generic FS_IOC_FLAGS + * semantically. Where possible, the numerical value is picked to correspond + * also. + */ +#define STATX_ATTR_COMPRESSED 0x00000004 /* [I] File is compressed by the fs */ +#define STATX_ATTR_IMMUTABLE 0x00000010 /* [I] File is marked immutable */ +#define STATX_ATTR_APPEND 0x00000020 /* [I] File is append-only */ +#define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */ +#define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */ + +#define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */ + + +#endif /* _UAPI_LINUX_STAT_H */ diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 28648c09dcd6..89018c7311a4 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -73,9 +73,11 @@ tools/include/uapi/asm-generic/mman-common.h tools/include/uapi/asm-generic/mman.h tools/include/uapi/linux/bpf.h tools/include/uapi/linux/bpf_common.h +tools/include/uapi/linux/fcntl.h tools/include/uapi/linux/hw_breakpoint.h tools/include/uapi/linux/mman.h tools/include/uapi/linux/perf_event.h +tools/include/uapi/linux/stat.h tools/include/linux/poison.h tools/include/linux/rbtree.h tools/include/linux/rbtree_augmented.h diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index c747bfd7f14d..83fe2202382e 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -1,7 +1,9 @@ #!/bin/sh HEADERS=' +include/uapi/linux/fcntl.h include/uapi/linux/perf_event.h +include/uapi/linux/stat.h include/linux/hash.h include/uapi/linux/hw_breakpoint.h arch/x86/include/asm/disabled-features.h -- cgit v1.2.3 From 3e00cbe8891a655520ca2cfe9b6d509d0a845f07 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Mar 2017 16:46:37 +0200 Subject: perf tools: Do not fail in case of empty HOME env variable Currently we fail in the following case: $ unset HOME $ ./perf record ls $ echo $? 255 It's because the config code init fails due to a missing HOME variable value. Fix this by skipping the user config init if there's no HOME variable value. Reported-by: Jan Stancek Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170330144637.7468-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/config.c | 54 +++++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 0c7d5a4975cd..7b01d59076d3 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -627,6 +627,8 @@ static int perf_config_set__init(struct perf_config_set *set) { int ret = -1; const char *home = NULL; + char *user_config; + struct stat st; /* Setting $PERF_CONFIG makes perf read _only_ the given config file. */ if (config_exclusive_filename) @@ -637,35 +639,41 @@ static int perf_config_set__init(struct perf_config_set *set) } home = getenv("HOME"); - if (perf_config_global() && home) { - char *user_config = strdup(mkpath("%s/.perfconfig", home)); - struct stat st; - if (user_config == NULL) { - warning("Not enough memory to process %s/.perfconfig, " - "ignoring it.", home); - goto out; - } + /* + * Skip reading user config if: + * - there is no place to read it from (HOME) + * - we are asked not to (PERF_CONFIG_NOGLOBAL=1) + */ + if (!home || !*home || !perf_config_global()) + return 0; - if (stat(user_config, &st) < 0) { - if (errno == ENOENT) - ret = 0; - goto out_free; - } + user_config = strdup(mkpath("%s/.perfconfig", home)); + if (user_config == NULL) { + warning("Not enough memory to process %s/.perfconfig, " + "ignoring it.", home); + goto out; + } + + if (stat(user_config, &st) < 0) { + if (errno == ENOENT) + ret = 0; + goto out_free; + } - ret = 0; + ret = 0; - if (st.st_uid && (st.st_uid != geteuid())) { - warning("File %s not owned by current user or root, " - "ignoring it.", user_config); - goto out_free; - } + if (st.st_uid && (st.st_uid != geteuid())) { + warning("File %s not owned by current user or root, " + "ignoring it.", user_config); + goto out_free; + } + + if (st.st_size) + ret = perf_config_from_file(collect_config, user_config, set); - if (st.st_size) - ret = perf_config_from_file(collect_config, user_config, set); out_free: - free(user_config); - } + free(user_config); out: return ret; } -- cgit v1.2.3 From fd5cead23f54697310bd565aa2a23ae5128080a0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 14 Mar 2017 16:19:30 -0300 Subject: perf trace: Beautify statx syscall 'flag' and 'mask' arguments To test it, build samples/statx/test_statx, which I did as: $ make headers_install $ cc -I ~/git/linux/usr/include samples/statx/test-statx.c -o /tmp/statx And then use perf trace on it: # perf trace -e statx /tmp/statx /etc/passwd statx(/etc/passwd) = 0 results=7ff Size: 3496 Blocks: 8 IO Block: 4096 regular file Device: fd:00 Inode: 280156 Links: 1 Access: (0644/-rw-r--r--) Uid: 0 Gid: 0 Access: 2017-03-29 16:01:01.650073438-0300 Modify: 2017-03-10 16:25:14.156479354-0300 Change: 2017-03-10 16:25:14.171479328-0300 0.000 ( 0.007 ms): statx/30648 statx(dfd: CWD, filename: 0x7ef503f4, flags: SYMLINK_NOFOLLOW, mask: TYPE|MODE|NLINK|UID|GID|ATIME|MTIME|CTIME|INO|SIZE|BLOCKS|BTIME, buffer: 0x7fff7ef4eb10) = 0 # Using the test-stat.c options to change the mask: # perf trace -e statx /tmp/statx -O /etc/passwd > /dev/null 0.000 ( 0.008 ms): statx/30745 statx(dfd: CWD, filename: 0x3a0753f4, flags: SYMLINK_NOFOLLOW, mask: BTIME, buffer: 0x7ffd3a0735c0) = 0 # # perf trace -e statx /tmp/statx -A /etc/passwd > /dev/null 0.000 ( 0.010 ms): statx/30757 statx(dfd: CWD, filename: 0xa94e63f4, flags: SYMLINK_NOFOLLOW|NO_AUTOMOUNT, mask: TYPE|MODE|NLINK|UID|GID|ATIME|MTIME|CTIME|INO|SIZE|BLOCKS|BTIME, buffer: 0x7ffea94e49d0) = 0 # # trace --no-inherit -e statx /tmp/statx -F /etc/passwd > /dev/null 0.000 ( 0.011 ms): statx(dfd: CWD, filename: 0x3b02d3f3, flags: SYMLINK_NOFOLLOW|STATX_FORCE_SYNC, mask: TYPE|MODE|NLINK|UID|GID|ATIME|MTIME|CTIME|INO|SIZE|BLOCKS|BTIME, buffer: 0x7ffd3b02c850) = 0 # # trace --no-inherit -e statx /tmp/statx -F -L /etc/passwd > /dev/null 0.000 ( 0.008 ms): statx(dfd: CWD, filename: 0x15cff3f3, flags: STATX_FORCE_SYNC, mask: TYPE|MODE|NLINK|UID|GID|ATIME|MTIME|CTIME|INO|SIZE|BLOCKS|BTIME, buffer: 0x7fff15cfdda0) = 0 # # trace --no-inherit -e statx /tmp/statx -D -O /etc/passwd > /dev/null 0.000 ( 0.009 ms): statx(dfd: CWD, filename: 0xfa37f3f3, flags: SYMLINK_NOFOLLOW|STATX_DONT_SYNC, mask: BTIME, buffer: 0x7ffffa37da20) = 0 # Adding a probe to get the filename collected as well: # perf probe 'vfs_getname=getname_flags:72 pathname=result->name:string' Added new event: probe:vfs_getname (on getname_flags:72 with pathname=result->name:string) You can now use it in all perf tools, such as: perf record -e probe:vfs_getname -aR sleep 1 # trace --no-inherit -e statx /tmp/statx -D -O /etc/passwd > /dev/null 0.169 ( 0.007 ms): statx(dfd: CWD, filename: /etc/passwd, flags: SYMLINK_NOFOLLOW|STATX_DONT_SYNC, mask: BTIME, buffer: 0x7ffda9bf50f0) = 0 # Same technique could be used to collect and beautify the result put in the 'buffer' argument. Finally do a system wide 'perf trace' session looking for any use of statx, then run the test proggie with various flags: # trace -e statx 16612.967 ( 0.028 ms): statx/4562 statx(dfd: CWD, filename: /tmp/statx, flags: SYMLINK_NOFOLLOW, mask: TYPE|MODE|NLINK|UID|GID|ATIME|MTIME|CTIME|INO|SIZE|BLOCKS|BTIME, buffer: 0x7ffef195d660) = 0 33064.447 ( 0.011 ms): statx/4569 statx(dfd: CWD, filename: /tmp/statx, flags: SYMLINK_NOFOLLOW|STATX_FORCE_SYNC, mask: TYPE|MODE|NLINK|UID|GID|ATIME|MTIME|CTIME|INO|SIZE|BLOCKS|BTIME, buffer: 0x7ffc5484c790) = 0 36050.891 ( 0.023 ms): statx/4576 statx(dfd: CWD, filename: /tmp/statx, flags: SYMLINK_NOFOLLOW, mask: BTIME, buffer: 0x7ffeb18b66e0) = 0 38039.889 ( 0.023 ms): statx/4584 statx(dfd: CWD, filename: /tmp/statx, flags: SYMLINK_NOFOLLOW, mask: TYPE|MODE|NLINK|UID|GID|ATIME|MTIME|CTIME|INO|SIZE|BLOCKS|BTIME, buffer: 0x7fff1db0ea90) = 0 ^C# This one also starts moving the beautifiers from files directly included in builtin-trace.c to separate objects + a beauty.h header with prototypes, so that we can add test cases in tools/perf/tests/ to fire syscalls with various arguments and then get them intercepted as syscalls:sys_enter_foo or raw_syscalls:sys_enter + sys_exit to then format and check that the formatted output is the one we expect. Cc: Adrian Hunter Cc: Al Viro Cc: David Ahern Cc: David Howells Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-xvzw8eynffvez5czyzidhrno@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Build | 1 + tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 1 + tools/perf/builtin-trace.c | 14 ++--- tools/perf/trace/beauty/Build | 1 + tools/perf/trace/beauty/beauty.h | 24 ++++++++ tools/perf/trace/beauty/statx.c | 72 +++++++++++++++++++++++ 6 files changed, 104 insertions(+), 9 deletions(-) create mode 100644 tools/perf/trace/beauty/Build create mode 100644 tools/perf/trace/beauty/beauty.h create mode 100644 tools/perf/trace/beauty/statx.c diff --git a/tools/perf/Build b/tools/perf/Build index 9b79f8d7db50..bd8eeb60533c 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -50,5 +50,6 @@ libperf-y += util/ libperf-y += arch/ libperf-y += ui/ libperf-y += scripts/ +libperf-y += trace/beauty/ gtk-y += ui/gtk/ diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index e93ef0b38db8..5aef183e2f85 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -338,6 +338,7 @@ 329 common pkey_mprotect sys_pkey_mprotect 330 common pkey_alloc sys_pkey_alloc 331 common pkey_free sys_pkey_free +332 common statx sys_statx # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 7379792a6504..fce278d5fada 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -31,6 +31,7 @@ #include "util/intlist.h" #include "util/thread_map.h" #include "util/stat.h" +#include "trace/beauty/beauty.h" #include "trace-event.h" #include "util/parse-events.h" #include "util/bpf-loader.h" @@ -267,15 +268,6 @@ out_delete: ({ struct syscall_tp *fields = evsel->priv; \ fields->name.pointer(&fields->name, sample); }) -struct syscall_arg { - unsigned long val; - struct thread *thread; - struct trace *trace; - void *parm; - u8 idx; - u8 mask; -}; - struct strarray { int offset; int nr_entries; @@ -771,6 +763,10 @@ static struct syscall_fmt { .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, { .name = "stat", .errmsg = true, .alias = "newstat", }, { .name = "statfs", .errmsg = true, }, + { .name = "statx", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FDAT, /* flags */ + [2] = SCA_STATX_FLAGS, /* flags */ + [3] = SCA_STATX_MASK, /* mask */ }, }, { .name = "swapoff", .errmsg = true, .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, }, { .name = "swapon", .errmsg = true, diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build new file mode 100644 index 000000000000..be95ac6ce845 --- /dev/null +++ b/tools/perf/trace/beauty/Build @@ -0,0 +1 @@ +libperf-y += statx.o diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h new file mode 100644 index 000000000000..cf50be3f17a4 --- /dev/null +++ b/tools/perf/trace/beauty/beauty.h @@ -0,0 +1,24 @@ +#ifndef _PERF_TRACE_BEAUTY_H +#define _PERF_TRACE_BEAUTY_H + +#include + +struct trace; +struct thread; + +struct syscall_arg { + unsigned long val; + struct thread *thread; + struct trace *trace; + void *parm; + u8 idx; + u8 mask; +}; + +size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_STATX_FLAGS syscall_arg__scnprintf_statx_flags + +size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_STATX_MASK syscall_arg__scnprintf_statx_mask + +#endif /* _PERF_TRACE_BEAUTY_H */ diff --git a/tools/perf/trace/beauty/statx.c b/tools/perf/trace/beauty/statx.c new file mode 100644 index 000000000000..5643b692af4c --- /dev/null +++ b/tools/perf/trace/beauty/statx.c @@ -0,0 +1,72 @@ +/* + * trace/beauty/statx.c + * + * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include "trace/beauty/beauty.h" +#include +#include +#include +#include + +size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_arg *arg) +{ + int printed = 0, flags = arg->val; + + if (flags == 0) + return scnprintf(bf, size, "SYNC_AS_STAT"); +#define P_FLAG(n) \ + if (flags & AT_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~AT_##n; \ + } + + P_FLAG(SYMLINK_NOFOLLOW); + P_FLAG(REMOVEDIR); + P_FLAG(SYMLINK_FOLLOW); + P_FLAG(NO_AUTOMOUNT); + P_FLAG(EMPTY_PATH); + P_FLAG(STATX_FORCE_SYNC); + P_FLAG(STATX_DONT_SYNC); + +#undef P_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +} + +size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_arg *arg) +{ + int printed = 0, flags = arg->val; + +#define P_FLAG(n) \ + if (flags & STATX_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~STATX_##n; \ + } + + P_FLAG(TYPE); + P_FLAG(MODE); + P_FLAG(NLINK); + P_FLAG(UID); + P_FLAG(GID); + P_FLAG(ATIME); + P_FLAG(MTIME); + P_FLAG(CTIME); + P_FLAG(INO); + P_FLAG(SIZE); + P_FLAG(BLOCKS); + P_FLAG(BTIME); + +#undef P_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +} -- cgit v1.2.3 From f5a70801b7832bfcb865e95c39bdef8eac21226f Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 28 Mar 2017 15:17:54 +0530 Subject: perf sdt powerpc: Add argument support SDT marker argument is in N@OP format. Here OP is arch dependent component. Add powerpc logic to parse OP and convert it to uprobe compatible format. Signed-off-by: Ravi Bangoria Acked-by: Masami Hiramatsu Cc: Alexander Shishkin Cc: Alexis Berlemont Cc: Hemant Kumar Cc: Michael Ellerman Cc: Naveen N. Rao Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170328094754.3156-4-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/perf_regs.c | 111 +++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c index a3c3e1ce6807..4268f7762e25 100644 --- a/tools/perf/arch/powerpc/util/perf_regs.c +++ b/tools/perf/arch/powerpc/util/perf_regs.c @@ -1,5 +1,10 @@ +#include +#include + #include "../../perf.h" +#include "../../util/util.h" #include "../../util/perf_regs.h" +#include "../../util/debug.h" const struct sample_reg sample_reg_masks[] = { SMPL_REG(r0, PERF_REG_POWERPC_R0), @@ -47,3 +52,109 @@ const struct sample_reg sample_reg_masks[] = { SMPL_REG(dsisr, PERF_REG_POWERPC_DSISR), SMPL_REG_END }; + +/* REG or %rREG */ +#define SDT_OP_REGEX1 "^(%r)?([1-2]?[0-9]|3[0-1])$" + +/* -NUM(REG) or NUM(REG) or -NUM(%rREG) or NUM(%rREG) */ +#define SDT_OP_REGEX2 "^(\\-)?([0-9]+)\\((%r)?([1-2]?[0-9]|3[0-1])\\)$" + +static regex_t sdt_op_regex1, sdt_op_regex2; + +static int sdt_init_op_regex(void) +{ + static int initialized; + int ret = 0; + + if (initialized) + return 0; + + ret = regcomp(&sdt_op_regex1, SDT_OP_REGEX1, REG_EXTENDED); + if (ret) + goto error; + + ret = regcomp(&sdt_op_regex2, SDT_OP_REGEX2, REG_EXTENDED); + if (ret) + goto free_regex1; + + initialized = 1; + return 0; + +free_regex1: + regfree(&sdt_op_regex1); +error: + pr_debug4("Regex compilation error.\n"); + return ret; +} + +/* + * Parse OP and convert it into uprobe format, which is, +/-NUM(%gprREG). + * Possible variants of OP are: + * Format Example + * ------------------------- + * NUM(REG) 48(18) + * -NUM(REG) -48(18) + * NUM(%rREG) 48(%r18) + * -NUM(%rREG) -48(%r18) + * REG 18 + * %rREG %r18 + * iNUM i0 + * i-NUM i-1 + * + * SDT marker arguments on Powerpc uses %rREG form with -mregnames flag + * and REG form with -mno-regnames. Here REG is general purpose register, + * which is in 0 to 31 range. + */ +int arch_sdt_arg_parse_op(char *old_op, char **new_op) +{ + int ret, new_len; + regmatch_t rm[5]; + char prefix; + + /* Constant argument. Uprobe does not support it */ + if (old_op[0] == 'i') { + pr_debug4("Skipping unsupported SDT argument: %s\n", old_op); + return SDT_ARG_SKIP; + } + + ret = sdt_init_op_regex(); + if (ret < 0) + return ret; + + if (!regexec(&sdt_op_regex1, old_op, 3, rm, 0)) { + /* REG or %rREG --> %gprREG */ + + new_len = 5; /* % g p r NULL */ + new_len += (int)(rm[2].rm_eo - rm[2].rm_so); + + *new_op = zalloc(new_len); + if (!*new_op) + return -ENOMEM; + + scnprintf(*new_op, new_len, "%%gpr%.*s", + (int)(rm[2].rm_eo - rm[2].rm_so), old_op + rm[2].rm_so); + } else if (!regexec(&sdt_op_regex2, old_op, 5, rm, 0)) { + /* + * -NUM(REG) or NUM(REG) or -NUM(%rREG) or NUM(%rREG) --> + * +/-NUM(%gprREG) + */ + prefix = (rm[1].rm_so == -1) ? '+' : '-'; + + new_len = 8; /* +/- ( % g p r ) NULL */ + new_len += (int)(rm[2].rm_eo - rm[2].rm_so); + new_len += (int)(rm[4].rm_eo - rm[4].rm_so); + + *new_op = zalloc(new_len); + if (!*new_op) + return -ENOMEM; + + scnprintf(*new_op, new_len, "%c%.*s(%%gpr%.*s)", prefix, + (int)(rm[2].rm_eo - rm[2].rm_so), old_op + rm[2].rm_so, + (int)(rm[4].rm_eo - rm[4].rm_so), old_op + rm[4].rm_so); + } else { + pr_debug4("Skipping unsupported SDT argument: %s\n", old_op); + return SDT_ARG_SKIP; + } + + return SDT_ARG_VALID; +} -- cgit v1.2.3 From 427748068a973627b406bf7312342b6fe4742d07 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 4 Apr 2017 11:36:22 -0300 Subject: perf tools: Remove die() call We can just use the exit() right after the branch calling die(). Link: http://lkml.kernel.org/n/tip-90athn06d7atf2jkpfvq1iic@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 9217f2227f3d..9dc346f2b255 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -378,7 +378,8 @@ static void execv_dashed_external(const char **argv) if (status != -ERR_RUN_COMMAND_EXEC) { if (IS_RUN_COMMAND_ERR(status)) { do_die: - die("unable to run '%s'", argv[0]); + pr_err("FATAL: unable to run '%s'", argv[0]); + status = -128; } exit(-status); } -- cgit v1.2.3 From f05082b5479d91ee5c04f311acaa394ce0c5e6d2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 4 Apr 2017 12:05:37 -0300 Subject: perf tools: Handle allocation failures gracefully The callers of perf_read_values__enlarge_counters() already propagate errors, so just print some debug diagnostics and handle allocation failures gracefully, not trying to do silly things like 'a = realloc(a)'. Link: http://lkml.kernel.org/n/tip-nsmmh7uzpg35rzcl9nq7yztp@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/values.c | 54 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c index 5074be4ed467..2a8efa7b7c48 100644 --- a/tools/perf/util/values.c +++ b/tools/perf/util/values.c @@ -108,24 +108,45 @@ static int perf_read_values__findnew_thread(struct perf_read_values *values, return i; } -static void perf_read_values__enlarge_counters(struct perf_read_values *values) +static int perf_read_values__enlarge_counters(struct perf_read_values *values) { - int i; + char **countername; + int i, counters_max = values->counters_max * 2; + u64 *counterrawid = realloc(values->counterrawid, counters_max * sizeof(*values->counterrawid)); + + if (!counterrawid) { + pr_debug("failed to enlarge read_values rawid array"); + goto out_enomem; + } - values->counters_max *= 2; - values->counterrawid = realloc(values->counterrawid, - values->counters_max * sizeof(*values->counterrawid)); - values->countername = realloc(values->countername, - values->counters_max * sizeof(*values->countername)); - if (!values->counterrawid || !values->countername) - die("failed to enlarge read_values counters arrays"); + countername = realloc(values->countername, counters_max * sizeof(*values->countername)); + if (!countername) { + pr_debug("failed to enlarge read_values rawid array"); + goto out_free_rawid; + } for (i = 0; i < values->threads; i++) { - values->value[i] = realloc(values->value[i], - values->counters_max * sizeof(**values->value)); - if (!values->value[i]) - die("failed to enlarge read_values counters arrays"); + u64 *value = realloc(values->value[i], counters_max * sizeof(**values->value)); + + if (value) { + pr_debug("failed to enlarge read_values ->values array"); + goto out_free_name; + } + + values->value[i] = value; } + + values->counters_max = counters_max; + values->counterrawid = counterrawid; + values->countername = countername; + + return 0; +out_free_name: + free(countername); +out_free_rawid: + free(counterrawid); +out_enomem: + return -ENOMEM; } static int perf_read_values__findnew_counter(struct perf_read_values *values, @@ -137,8 +158,11 @@ static int perf_read_values__findnew_counter(struct perf_read_values *values, if (values->counterrawid[i] == rawid) return i; - if (values->counters == values->counters_max) - perf_read_values__enlarge_counters(values); + if (values->counters == values->counters_max) { + i = perf_read_values__enlarge_counters(values); + if (i) + return i; + } i = values->counters++; values->counterrawid[i] = rawid; -- cgit v1.2.3 From 9c0899f15766158df8b3bd152f5da0b6c1bd2806 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 4 Apr 2017 12:11:07 -0300 Subject: perf tools: Don't die on a print function Trying to remove die() calls from library functions, postponing exiting to the tool main code. Link: http://lkml.kernel.org/n/tip-ackxq5nqe39gunln3tkczs42@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/values.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c index 2a8efa7b7c48..5de2e15e2eda 100644 --- a/tools/perf/util/values.c +++ b/tools/perf/util/values.c @@ -1,4 +1,7 @@ +#include +#include #include +#include #include "util.h" #include "values.h" @@ -196,8 +199,10 @@ static void perf_read_values__display_pretty(FILE *fp, int *counterwidth; counterwidth = malloc(values->counters * sizeof(*counterwidth)); - if (!counterwidth) - die("failed to allocate counterwidth array"); + if (!counterwidth) { + fprintf(fp, "INTERNAL ERROR: Failed to allocate counterwidth array\n"); + return; + } tidwidth = 3; pidwidth = 3; for (j = 0; j < values->counters; j++) -- cgit v1.2.3 From 99094a5e941fe88d95cbd594e6a41bee24003ecb Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Tue, 28 Mar 2017 21:12:05 +0900 Subject: perf annotate: Fix missing number of samples for source_line_samples The option 'show-total-period' works fine without a option '-l'. But if running 'perf annotate --stdio -l --show-total-period', you can see a problem showing only zero '0' for number of samples. Before: $ perf annotate --stdio -l --show-total-period ... 0 : 400816: push %rbp 0 : 400817: mov %rsp,%rbp 0 : 40081a: mov %edi,-0x24(%rbp) 0 : 40081d: mov %rsi,-0x30(%rbp) 0 : 400821: mov -0x24(%rbp),%eax 0 : 400824: mov -0x30(%rbp),%rdx 0 : 400828: mov (%rdx),%esi 0 : 40082a: mov $0x0,%edx ... The reason is it was missed to set number of samples of source_line_samples, so set it ordinarily. After: $ perf annotate --stdio -l --show-total-period ... 3 : 400816: push %rbp 4 : 400817: mov %rsp,%rbp 0 : 40081a: mov %edi,-0x24(%rbp) 0 : 40081d: mov %rsi,-0x30(%rbp) 1 : 400821: mov -0x24(%rbp),%eax 2 : 400824: mov -0x30(%rbp),%rdx 0 : 400828: mov (%rdx),%esi 1 : 40082a: mov $0x0,%edx ... Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Martin Liska Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Fixes: 0c4a5bcea460 ("perf annotate: Display total number of samples with --show-total-period") Link: http://lkml.kernel.org/r/1490703125-13643-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 6 ++++-- tools/perf/util/annotate.h | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 11af5f0d56cc..a37032bd137d 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1665,7 +1665,7 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map, start = map__rip_2objdump(map, sym->start); for (i = 0; i < len; i++) { - u64 offset; + u64 offset, nr_samples; double percent_max = 0.0; src_line->nr_pcnt = nr_pcnt; @@ -1674,12 +1674,14 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map, double percent = 0.0; h = annotation__histogram(notes, evidx + k); + nr_samples = h->addr[i]; if (h->sum) - percent = 100.0 * h->addr[i] / h->sum; + percent = 100.0 * nr_samples / h->sum; if (percent > percent_max) percent_max = percent; src_line->samples[k].percent = percent; + src_line->samples[k].nr = nr_samples; } if (percent_max <= 0.5) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 09776b5af991..948aa8e6fd39 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -98,7 +98,7 @@ struct cyc_hist { struct source_line_samples { double percent; double percent_sum; - double nr; + u64 nr; }; struct source_line { -- cgit v1.2.3 From c2628f90c9964881a62dd8e9f7372ca05cb6fe32 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 10 Apr 2017 14:20:45 +0200 Subject: perf/amd/uncore: Do feature check first, before assignments ... and save some unnecessary work. Remove now unused label while at it. Signed-off-by: Borislav Petkov Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Suravee Suthikulpanit Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/20170410122047.3026-2-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/events/amd/uncore.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 4d1f7f2d9aff..abd4b9064fba 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -509,7 +509,10 @@ static int __init amd_uncore_init(void) int ret = -ENODEV; if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) - goto fail_nodev; + return -ENODEV; + + if (!boot_cpu_has(X86_FEATURE_TOPOEXT)) + return -ENODEV; switch(boot_cpu_data.x86) { case 23: @@ -552,9 +555,6 @@ static int __init amd_uncore_init(void) amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df; amd_llc_pmu.attr_groups = amd_uncore_attr_groups_l3; - if (!boot_cpu_has(X86_FEATURE_TOPOEXT)) - goto fail_nodev; - if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) { amd_uncore_nb = alloc_percpu(struct amd_uncore *); if (!amd_uncore_nb) { @@ -615,7 +615,6 @@ fail_nb: if (amd_uncore_nb) free_percpu(amd_uncore_nb); -fail_nodev: return ret; } device_initcall(amd_uncore_init); -- cgit v1.2.3 From 68e8038048f44e7782079e79338506246393a876 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 10 Apr 2017 14:20:46 +0200 Subject: perf/amd/uncore: Clean up per-family setup Fam16h is the same as the default one, remove it. Turn the switch-case into a simple if-else. No functionality change. Signed-off-by: Borislav Petkov Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Suravee Suthikulpanit Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/20170410122047.3026-3-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/events/amd/uncore.c | 59 ++++++++++++++++---------------------------- 1 file changed, 21 insertions(+), 38 deletions(-) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index abd4b9064fba..975f24f6e238 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -514,45 +514,28 @@ static int __init amd_uncore_init(void) if (!boot_cpu_has(X86_FEATURE_TOPOEXT)) return -ENODEV; - switch(boot_cpu_data.x86) { - case 23: - /* Family 17h: */ - num_counters_nb = NUM_COUNTERS_NB; - num_counters_llc = NUM_COUNTERS_L3; - /* - * For Family17h, the NorthBridge counters are - * re-purposed as Data Fabric counters. Also, support is - * added for L3 counters. The pmus are exported based on - * family as either L2 or L3 and NB or DF. - */ - amd_nb_pmu.name = "amd_df"; - amd_llc_pmu.name = "amd_l3"; - format_attr_event_df.show = &event_show_df; - format_attr_event_l3.show = &event_show_l3; - break; - case 22: - /* Family 16h - may change: */ - num_counters_nb = NUM_COUNTERS_NB; - num_counters_llc = NUM_COUNTERS_L2; - amd_nb_pmu.name = "amd_nb"; - amd_llc_pmu.name = "amd_l2"; - format_attr_event_df = format_attr_event; - format_attr_event_l3 = format_attr_event; - break; - default: - /* - * All prior families have the same number of - * NorthBridge and Last Level Cache counters - */ - num_counters_nb = NUM_COUNTERS_NB; - num_counters_llc = NUM_COUNTERS_L2; - amd_nb_pmu.name = "amd_nb"; - amd_llc_pmu.name = "amd_l2"; - format_attr_event_df = format_attr_event; - format_attr_event_l3 = format_attr_event; - break; + if (boot_cpu_data.x86 == 0x17) { + /* + * For F17h, the Northbridge counters are repurposed as Data + * Fabric counters. Also, L3 counters are supported too. The PMUs + * are exported based on family as either L2 or L3 and NB or DF. + */ + num_counters_nb = NUM_COUNTERS_NB; + num_counters_llc = NUM_COUNTERS_L3; + amd_nb_pmu.name = "amd_df"; + amd_llc_pmu.name = "amd_l3"; + format_attr_event_df.show = &event_show_df; + format_attr_event_l3.show = &event_show_l3; + } else { + num_counters_nb = NUM_COUNTERS_NB; + num_counters_llc = NUM_COUNTERS_L2; + amd_nb_pmu.name = "amd_nb"; + amd_llc_pmu.name = "amd_l2"; + format_attr_event_df = format_attr_event; + format_attr_event_l3 = format_attr_event; } - amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df; + + amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df; amd_llc_pmu.attr_groups = amd_uncore_attr_groups_l3; if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) { -- cgit v1.2.3 From 9df9078ef2086652647248ee6e82ca8f661cb3f5 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 10 Apr 2017 14:20:47 +0200 Subject: perf/amd/uncore: Fix pr_fmt() prefix Make it "perf/amd/uncore: ", i.e., something more specific than "perf: ". Signed-off-by: Borislav Petkov Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Suravee Suthikulpanit Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/20170410122047.3026-4-bp@alien8.de [ Changed it to perf/amd/uncore/ ] Signed-off-by: Ingo Molnar --- arch/x86/events/amd/uncore.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 975f24f6e238..ad44af0dd667 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -30,6 +30,9 @@ #define COUNTER_SHIFT 16 +#undef pr_fmt +#define pr_fmt(fmt) "amd_uncore: " fmt + static int num_counters_llc; static int num_counters_nb; @@ -548,7 +551,7 @@ static int __init amd_uncore_init(void) if (ret) goto fail_nb; - pr_info("perf: AMD NB counters detected\n"); + pr_info("AMD NB counters detected\n"); ret = 0; } @@ -562,7 +565,7 @@ static int __init amd_uncore_init(void) if (ret) goto fail_llc; - pr_info("perf: AMD LLC counters detected\n"); + pr_info("AMD LLC counters detected\n"); ret = 0; } -- cgit v1.2.3 From dadafc315ded31a36cb4899bc85ef4050f0332a7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 5 Apr 2017 11:38:10 -0300 Subject: perf callchains: Switch from strtok() to strtok_r() when parsing options Trying to keep everything reentrant. Cc: Namhyung Kim Link: http://lkml.kernel.org/n/tip-rdce0p2k9e1b4qnrb8ki9mtf@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 3cea1fb5404b..2e5eff5abef0 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -116,7 +116,7 @@ static int __parse_callchain_report_opt(const char *arg, bool allow_record_opt) { char *tok; - char *endptr; + char *endptr, *saveptr = NULL; bool minpcnt_set = false; bool record_opt_set = false; bool try_stack_size = false; @@ -127,7 +127,7 @@ __parse_callchain_report_opt(const char *arg, bool allow_record_opt) if (!arg) return 0; - while ((tok = strtok((char *)arg, ",")) != NULL) { + while ((tok = strtok_r((char *)arg, ",", &saveptr)) != NULL) { if (!strncmp(tok, "none", strlen(tok))) { callchain_param.mode = CHAIN_NONE; callchain_param.enabled = false; -- cgit v1.2.3 From 49346e858f34eda103d7c0e85c06edbaebfc83a9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 5 Apr 2017 11:43:41 -0300 Subject: perf script: Use strtok_r() when parsing output field list Just avoiding non-reentrant functions. Cc: David Ahern Link: http://lkml.kernel.org/n/tip-eqytykipd74epzl9aexvppcg@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 46acc8ece41f..2dab70fba2ba 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1708,7 +1708,7 @@ static int parse_scriptname(const struct option *opt __maybe_unused, static int parse_output_fields(const struct option *opt __maybe_unused, const char *arg, int unset __maybe_unused) { - char *tok; + char *tok, *strtok_saveptr = NULL; int i, imax = ARRAY_SIZE(all_output_options); int j; int rc = 0; @@ -1769,7 +1769,7 @@ static int parse_output_fields(const struct option *opt __maybe_unused, } } - for (tok = strtok(tok, ","); tok; tok = strtok(NULL, ",")) { + for (tok = strtok_r(tok, ",", &strtok_saveptr); tok; tok = strtok_r(NULL, ",", &strtok_saveptr)) { for (i = 0; i < imax; ++i) { if (strcmp(tok, all_output_options[i].str) == 0) break; -- cgit v1.2.3 From 32ccb130f5325abc81b32b1a538390f46e4860f6 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 7 Apr 2017 20:08:52 +0800 Subject: perf evsel: Return exact sub event which failed with EPERM for wildcards The kernel has a special check for a specific irq_vectors trace event. TRACE_EVENT_PERF_PERM(irq_work_exit, is_sampling_event(p_event) ? -EPERM : 0); The perf-record fails for this irq_vectors event when it is present, like when using a wildcard: root@skl:/tmp# perf record -a -e irq_vectors:* sleep 2 Error: You may not have permission to collect system-wide stats. Consider tweaking /proc/sys/kernel/perf_event_paranoid, which controls use of the performance events system by unprivileged users (without CAP_SYS_ADMIN). The current value is 2: -1: Allow use of (almost) all events by all users >= 0: Disallow raw tracepoint access by users without CAP_IOC_LOCK >= 1: Disallow CPU event access by users without CAP_SYS_ADMIN >= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN To make this setting permanent, edit /etc/sysctl.conf too, e.g.: kernel.perf_event_paranoid = -1 This patch prints out the exact sub event that failed with EPERM for wildcards to help in understanding what went wrong when this event is present: After the patch: root@skl:/tmp# perf record -a -e irq_vectors:* sleep 2 Error: No permission to enable irq_vectors:irq_work_exit event. You may not have permission to collect system-wide stats. ...... Committer notes: So we have a lot of irq_vectors events: [root@jouet ~]# perf list irq_vectors:* List of pre-defined events (to be used in -e): irq_vectors:call_function_entry [Tracepoint event] irq_vectors:call_function_exit [Tracepoint event] irq_vectors:call_function_single_entry [Tracepoint event] irq_vectors:call_function_single_exit [Tracepoint event] irq_vectors:deferred_error_apic_entry [Tracepoint event] irq_vectors:deferred_error_apic_exit [Tracepoint event] irq_vectors:error_apic_entry [Tracepoint event] irq_vectors:error_apic_exit [Tracepoint event] irq_vectors:irq_work_entry [Tracepoint event] irq_vectors:irq_work_exit [Tracepoint event] irq_vectors:local_timer_entry [Tracepoint event] irq_vectors:local_timer_exit [Tracepoint event] irq_vectors:reschedule_entry [Tracepoint event] irq_vectors:reschedule_exit [Tracepoint event] irq_vectors:spurious_apic_entry [Tracepoint event] irq_vectors:spurious_apic_exit [Tracepoint event] irq_vectors:thermal_apic_entry [Tracepoint event] irq_vectors:thermal_apic_exit [Tracepoint event] irq_vectors:threshold_apic_entry [Tracepoint event] irq_vectors:threshold_apic_exit [Tracepoint event] irq_vectors:x86_platform_ipi_entry [Tracepoint event] irq_vectors:x86_platform_ipi_exit [Tracepoint event] # And some may be sampled: [root@jouet ~]# perf record -e irq_vectors:local* sleep 20s [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.020 MB perf.data (2 samples) ] [root@jouet ~]# perf report -D | egrep 'stats:|events:' Aggregated stats: TOTAL events: 155 MMAP events: 144 COMM events: 2 EXIT events: 1 SAMPLE events: 2 MMAP2 events: 4 FINISHED_ROUND events: 1 TIME_CONV events: 1 irq_vectors:local_timer_entry stats: TOTAL events: 1 SAMPLE events: 1 irq_vectors:local_timer_exit stats: TOTAL events: 1 SAMPLE events: 1 [root@jouet ~]# But, as shown in the tracepoint definition at the start of this message, some, like "irq_vectors:irq_work_exit", may not be sampled, just counted, i.e. if we try to sample, as when using 'perf record', we get an error: [root@jouet ~]# perf record -e irq_vectors:irq_work_exit Error: You may not have permission to collect system-wide stats. Consider tweaking /proc/sys/kernel/perf_event_paranoid, The error message is misleading, this patch will help in pointing out what is the event causing such an error, but the error message needs improvement, i.e. we need to figure out a way to check if a tracepoint is counting only, like this one, when all we can do is to count it with 'perf stat', at most printing the delta using interval printing, as in: [root@jouet ~]# perf stat -I 5000 -e irq_vectors:irq_work_* # time counts unit events 5.000168871 0 irq_vectors:irq_work_entry 5.000168871 0 irq_vectors:irq_work_exit 10.000676730 0 irq_vectors:irq_work_entry 10.000676730 0 irq_vectors:irq_work_exit 15.001122415 0 irq_vectors:irq_work_entry 15.001122415 0 irq_vectors:irq_work_exit 20.001298051 0 irq_vectors:irq_work_entry 20.001298051 0 irq_vectors:irq_work_exit 25.001485020 1 irq_vectors:irq_work_entry 25.001485020 1 irq_vectors:irq_work_exit 30.001658706 0 irq_vectors:irq_work_entry 30.001658706 0 irq_vectors:irq_work_exit ^C 32.045711878 0 irq_vectors:irq_work_entry 32.045711878 0 irq_vectors:irq_work_exit [root@jouet ~]# But at least, when we use a wildcard, this patch helps a bit. Signed-off-by: Yao Jin Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1491566932-503-1-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 9dc7e2d6e48a..8f5d86bd3501 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2457,11 +2457,17 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, int err, char *msg, size_t size) { char sbuf[STRERR_BUFSIZE]; + int printed = 0; switch (err) { case EPERM: case EACCES: - return scnprintf(msg, size, + if (err == EPERM) + printed = scnprintf(msg, size, + "No permission to enable %s event.\n\n", + perf_evsel__name(evsel)); + + return scnprintf(msg + printed, size - printed, "You may not have permission to collect %sstats.\n\n" "Consider tweaking /proc/sys/kernel/perf_event_paranoid,\n" "which controls use of the performance events system by\n" -- cgit v1.2.3 From b07c40df1f4e6f937271921cb116d570bb9c4a31 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Fri, 7 Apr 2017 23:24:18 +0900 Subject: perf stat: Refactor the code to strip csv output with ltrim() To strip csv output, use ltrim() instead of just while loop and isspace() at print_metric_{only}_csv(). Signed-off-by: Taeung Song Cc: Andi Kleen Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1491575061-704-3-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 2158ea14da57..868e086a6b59 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -875,10 +875,7 @@ static void print_metric_csv(void *ctx, return; } snprintf(buf, sizeof(buf), fmt, val); - vals = buf; - while (isspace(*vals)) - vals++; - ends = vals; + ends = vals = ltrim(buf); while (isdigit(*ends) || *ends == '.') ends++; *ends = 0; @@ -950,10 +947,7 @@ static void print_metric_only_csv(void *ctx, const char *color __maybe_unused, return; unit = fixunit(tbuf, os->evsel, unit); snprintf(buf, sizeof buf, fmt, val); - vals = buf; - while (isspace(*vals)) - vals++; - ends = vals; + ends = vals = ltrim(buf); while (isdigit(*ends) || *ends == '.') ends++; *ends = 0; -- cgit v1.2.3 From e21600fd41106f7a0ca124cec2404b2b3562768d Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Fri, 7 Apr 2017 23:24:19 +0900 Subject: perf ui browser: Refactor the code to parse color configs with ltrim() When parsing {fore, back} ground color configs, use ltrim() instead of just while loop and isspace(). Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1491575061-704-4-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index 3eb3edb307a4..9e47ccbe07f1 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -579,7 +579,7 @@ static int ui_browser__color_config(const char *var, const char *value, break; *bg = '\0'; - while (isspace(*++bg)); + bg = ltrim(++bg); ui_browser__colorsets[i].bg = bg; ui_browser__colorsets[i].fg = fg; return 0; -- cgit v1.2.3 From aa4beb10a94358bf2474d1fc9c4ccde34660cc9d Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Fri, 7 Apr 2017 23:24:20 +0900 Subject: perf pmu: Refactor wordwrap() with ltrim() Signed-off-by: Taeung Song Cc: Andi Kleen Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1491575061-704-5-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 362051ea7f3d..11c752561c55 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1148,8 +1148,7 @@ static void wordwrap(char *s, int start, int max, int corr) break; s += wlen; column += n; - while (isspace(*s)) - s++; + s = ltrim(s); } } -- cgit v1.2.3 From bdd97ca63faa374c98314d53c0bcaedb473c5a33 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Fri, 7 Apr 2017 23:24:21 +0900 Subject: perf tools: Refactor the code to strip command name with {l,r}trim() After reading command name from /proc//status, use ltrim() and rtrim() to strip command name, not using just while loop, isspace() and etc. Signed-off-by: Taeung Song Acked-by: David Ahern Cc: Don Zickus Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1491575061-704-6-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 76b9c6bc8369..8255a26ac255 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -106,7 +106,7 @@ static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len, int fd; size_t size = 0; ssize_t n; - char *nl, *name, *tgids, *ppids; + char *name, *tgids, *ppids; *tgid = -1; *ppid = -1; @@ -134,14 +134,7 @@ static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len, if (name) { name += 5; /* strlen("Name:") */ - - while (*name && isspace(*name)) - ++name; - - nl = strchr(name, '\n'); - if (nl) - *nl = '\0'; - + name = rtrim(ltrim(name)); size = strlen(name); if (size >= len) size = len - 1; -- cgit v1.2.3 From ecbe5e10d4ad12dd3da5d9fccd153c529c8c8ce1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 7 Apr 2017 12:19:38 -0300 Subject: perf string: Simplify ltrim() implementation We don't need to use strlen(), a var, or check for the end explicitely, isspace('\0') is false: [acme@jouet c]$ cat ltrim.c #include #include static char *ltrim(char *s) { while (isspace(*s)) ++s; return s; } int main(void) { printf("ltrim(\"\")='%s'\n", ltrim("")); return 0; } [acme@jouet c]$ ./ltrim ltrim("")='' [acme@jouet c]$ Cc: Jiri Olsa Cc: Namhyung Kim Cc: Taeung Song Link: http://lkml.kernel.org/n/tip-w3nk0x3pai2vojk2ab6kdvaw@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/string.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index bddca519dd58..e8feb142c9c9 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -322,12 +322,8 @@ char *strxfrchar(char *s, char from, char to) */ char *ltrim(char *s) { - int len = strlen(s); - - while (len && isspace(*s)) { - len--; + while (isspace(*s)) s++; - } return s; } -- cgit v1.2.3 From e77852b32d6d4430c68c38aaf73efe5650fa25af Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 6 Apr 2017 09:51:51 +0200 Subject: perf annotate s390: Fix perf annotate error -95 (4.10 regression) since 4.10 perf annotate exits on s390 with an "unknown error -95". Turns out that commit 786c1b51844d ("perf annotate: Start supporting cross arch annotation") added a hard requirement for architecture support when objdump is used but only provided x86 and arm support. Meanwhile power was added so lets add s390 as well. While at it make sure to implement the branch and jump types. Signed-off-by: Christian Borntraeger Cc: Andreas Krebbel Cc: Hendrik Brueckner Cc: Martin Schwidefsky Cc: Peter Zijlstra Cc: linux-s390 Cc: stable@kernel.org # v4.10+ Fixes: 786c1b51844 "perf annotate: Start supporting cross arch annotation" Link: http://lkml.kernel.org/r/1491465112-45819-2-git-send-email-borntraeger@de.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index a37032bd137d..bfb2f1d393d5 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -130,6 +130,12 @@ static struct arch architectures[] = { .name = "powerpc", .init = powerpc__annotate_init, }, + { + .name = "s390", + .objdump = { + .comment_char = '#', + }, + }, }; static void ins__delete(struct ins_operands *ops) -- cgit v1.2.3 From d9f8dfa9baf9b6ae1f2f84f887176558ecde5268 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 6 Apr 2017 09:51:52 +0200 Subject: perf annotate s390: Implement jump types for perf annotate Implement simple detection for all kind of jumps and branches. Signed-off-by: Christian Borntraeger Cc: Andreas Krebbel Cc: Hendrik Brueckner Cc: Martin Schwidefsky Cc: Peter Zijlstra Cc: linux-s390 Cc: stable@kernel.org # v4.10+ Link: http://lkml.kernel.org/r/1491465112-45819-3-git-send-email-borntraeger@de.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/s390/annotate/instructions.c | 30 ++++++++++++++++++++++++++++ tools/perf/util/annotate.c | 2 ++ 2 files changed, 32 insertions(+) create mode 100644 tools/perf/arch/s390/annotate/instructions.c diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c new file mode 100644 index 000000000000..745b4b1b8b21 --- /dev/null +++ b/tools/perf/arch/s390/annotate/instructions.c @@ -0,0 +1,30 @@ +static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *name) +{ + struct ins_ops *ops = NULL; + + /* catch all kind of jumps */ + if (strchr(name, 'j') || + !strncmp(name, "bct", 3) || + !strncmp(name, "br", 2)) + ops = &jump_ops; + /* override call/returns */ + if (!strcmp(name, "bras") || + !strcmp(name, "brasl") || + !strcmp(name, "basr")) + ops = &call_ops; + if (!strcmp(name, "br")) + ops = &ret_ops; + + arch__associate_ins_ops(arch, name, ops); + return ops; +} + +static int s390__annotate_init(struct arch *arch) +{ + if (!arch->initialized) { + arch->initialized = true; + arch->associate_instruction_ops = s390__associate_ins_ops; + } + + return 0; +} diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index bfb2f1d393d5..44ed6652b02f 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -108,6 +108,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i #include "arch/arm64/annotate/instructions.c" #include "arch/x86/annotate/instructions.c" #include "arch/powerpc/annotate/instructions.c" +#include "arch/s390/annotate/instructions.c" static struct arch architectures[] = { { @@ -132,6 +133,7 @@ static struct arch architectures[] = { }, { .name = "s390", + .init = s390__annotate_init, .objdump = { .comment_char = '#', }, -- cgit v1.2.3 From bb8d521f77f3e68a713456b7fb1e99f52ff3342c Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Mon, 10 Apr 2017 13:14:26 -0700 Subject: perf inject: Don't proceed if perf_session__process_event() fails All paths following perf_session__process_event() in __cmd_inject() are useless if __cmd_inject() is to fail, some depend on a correct session->evlist. First commit to add code that depends on session->evlist without checking error was commmit e558a5bd8b ("perf inject: Work with files"). It has grown since then. Change __cmd_inject() to fail immediately after perf_session__process_event() fails. Signed-off-by: David Carrillo-Cisneros Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Andrew Vagin Cc: He Kuang Cc: Masami Hiramatsu Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Wang Nan Fixes: e558a5bd8b74 ("perf inject: Work with files") Link: http://lkml.kernel.org/r/20170410201432.24807-2-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 42dff0b1375a..65e1c026a2f0 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -694,6 +694,8 @@ static int __cmd_inject(struct perf_inject *inject) lseek(fd, output_data_offset, SEEK_SET); ret = perf_session__process_events(session); + if (ret) + return ret; if (!file_out->is_pipe) { if (inject->build_ids) -- cgit v1.2.3 From 1e0d4f0200e4dbdfc38d818f329d8a0955f7c6f5 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Mon, 10 Apr 2017 13:14:27 -0700 Subject: perf inject: Copy events when reordering events in pipe mode __perf_session__process_pipe_events reuses the same memory buffer to process all events in the pipe. When reordering is needed (e.g. -b option), events are not immediately flushed, but kept around until reordering is possible, causing memory corruption. The problem is usually observed by a "Unknown sample error" output. It can easily be reproduced by: perf record -o - noploop | perf inject -b > output Committer testing: Before: $ perf record -o - stress -t 2 -c 2 | perf inject -b > /dev/null stress: info: [8297] dispatching hogs: 2 cpu, 0 io, 0 vm, 0 hdd stress: info: [8297] successful run completed in 2s [ perf record: Woken up 3 times to write data ] [ perf record: Captured and wrote 0.000 MB - ] Warning: Found 1 unknown events! Is this an older tool processing a perf.data file generated by a more recent tool? If that is not the case, consider reporting to linux-kernel@vger.kernel.org. $ After: $ perf record -o - stress -t 2 -c 2 | perf inject -b > /dev/null stress: info: [9027] dispatching hogs: 2 cpu, 0 io, 0 vm, 0 hdd stress: info: [9027] successful run completed in 2s [ perf record: Woken up 3 times to write data ] [ perf record: Captured and wrote 0.000 MB - ] no symbols found in /usr/bin/stress, maybe install a debug package? no symbols found in /usr/bin/stress, maybe install a debug package? $ Signed-off-by: David Carrillo-Cisneros Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: He Kuang Cc: Masami Hiramatsu Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/20170410201432.24807-3-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/ordered-events.c | 3 ++- tools/perf/util/session.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index fe84df1875aa..e70e935b1841 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -79,7 +79,7 @@ static union perf_event *dup_event(struct ordered_events *oe, static void free_dup_event(struct ordered_events *oe, union perf_event *event) { - if (oe->copy_on_queue) { + if (event && oe->copy_on_queue) { oe->cur_alloc_size -= event->header.size; free(event); } @@ -150,6 +150,7 @@ void ordered_events__delete(struct ordered_events *oe, struct ordered_event *eve list_move(&event->list, &oe->cache); oe->nr_events--; free_dup_event(oe, event->event); + event->event = NULL; } int ordered_events__queue(struct ordered_events *oe, union perf_event *event, diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 24259bc2c598..a25302bc55a8 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1656,6 +1656,7 @@ static int __perf_session__process_pipe_events(struct perf_session *session) buf = malloc(cur_size); if (!buf) return -errno; + ordered_events__set_copy_on_queue(oe, true); more: event = buf; err = readn(fd, event, sizeof(struct perf_event_header)); -- cgit v1.2.3 From 6d13491e2d4944180c9b4fb6ddca4e34b1537836 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Mon, 10 Apr 2017 13:14:28 -0700 Subject: perf tools: Describe pipe mode in perf.data-file-fomat.txt Add a minimal description of pipe's data format. Signed-off-by: David Carrillo-Cisneros Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: He Kuang Cc: Masami Hiramatsu Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/20170410201432.24807-4-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf.data-file-format.txt | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index b664b18d3991..fa2a9132f0a9 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -11,8 +11,8 @@ All fields are in native-endian of the machine that generated the perf.data. When perf is writing to a pipe it uses a special version of the file format that does not rely on seeking to adjust data offsets. This -format is not described here. The pipe version can be converted to -normal perf.data with perf inject. +format is described in "Pipe-mode data" section. The pipe data version can be +augmented with additional events using perf inject. The file starts with a perf_header: @@ -411,6 +411,21 @@ An array bound by the perf_file_section size. ids points to a array of uint64_t defining the ids for event attr attr. +Pipe-mode data + +Pipe-mode avoid seeks in the file by removing the perf_file_section and flags +from the struct perf_header. The trimmed header is: + +struct perf_pipe_file_header { + u64 magic; + u64 size; +}; + +The information about attrs, data, and event_types is instead in the +synthesized events PERF_RECORD_ATTR, PERF_RECORD_HEADER_TRACING_DATA and +PERF_RECORD_HEADER_EVENT_TYPE that are generated by perf record in pipe-mode. + + References: include/uapi/linux/perf_event.h -- cgit v1.2.3 From 6ab11f3a35aa07be2ff167b9de37e6c1eb58396b Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Mon, 10 Apr 2017 13:14:29 -0700 Subject: perf annotate: Process attr and build_id records perf annotate did not get some love for pipe-mode, and did not have .attr and .buil_id setup (while record and inject did. Fix that. It can easily be reproduced by: perf record -o - noploop | perf annotate that in my system shows: 0xd8 [0x28]: failed to process type: 9 Committer Testing: Before: $ perf record -o - stress -t 2 -c 2 | perf annotate --stdio stress: info: [11060] dispatching hogs: 2 cpu, 0 io, 0 vm, 0 hdd 0x4470 [0x28]: failed to process type: 9 $ stress: info: [11060] successful run completed in 2s $ After: $ perf record -o - stress -t 2 -c 2 | perf annotate --stdio stress: info: [11871] dispatching hogs: 2 cpu, 0 io, 0 vm, 0 hdd stress: info: [11871] successful run completed in 2s [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.000 MB - ] no symbols found in /usr/bin/stress, maybe install a debug package? Percent | Source code & Disassembly of libc-2.24.so for cycles:uhH (6117 samples) --------------------------------------------------------------------------------------- : : Disassembly of section .text: : : 000000000003b050 : : __random_r(): 10.56 : 3b050: test %rdi,%rdi 0.00 : 3b053: je 3b0d0 0.34 : 3b055: test %rsi,%rsi 0.00 : 3b058: je 3b0d0 0.46 : 3b05a: mov 0x18(%rdi),%eax 12.44 : 3b05d: mov 0x10(%rdi),%r8 0.18 : 3b061: test %eax,%eax 0.00 : 3b063: je 3b0b0 Signed-off-by: David Carrillo-Cisneros Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: He Kuang Cc: Masami Hiramatsu Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/20170410201432.24807-5-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 56a7c8d210b9..b2b2722f6bb7 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -394,6 +394,8 @@ int cmd_annotate(int argc, const char **argv) .exit = perf_event__process_exit, .fork = perf_event__process_fork, .namespaces = perf_event__process_namespaces, + .attr = perf_event__process_attr, + .build_id = perf_event__process_build_id, .ordered_events = true, .ordering_requires_timestamps = true, }, -- cgit v1.2.3 From 0973ad97c187e06aece61f685b9c3b2d93290a73 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Mon, 10 Apr 2017 13:14:30 -0700 Subject: perf session: Don't rely on evlist in pipe mode Session sets a number parameters that rely on evlist. These parameters are not used in pipe-mode and should not be set, since evlist is unavailable. Fix that. Signed-off-by: David Carrillo-Cisneros Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: He Kuang Cc: Masami Hiramatsu Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/20170410201432.24807-6-davidcc@google.com [ Check if file != NULL in perf_session__new(), like when used by builtin-top.c ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index a25302bc55a8..7b740a73e595 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -140,8 +140,14 @@ struct perf_session *perf_session__new(struct perf_data_file *file, if (perf_session__open(session) < 0) goto out_close; - perf_session__set_id_hdr_size(session); - perf_session__set_comm_exec(session); + /* + * set session attributes that are present in perf.data + * but not in pipe-mode. + */ + if (!file->is_pipe) { + perf_session__set_id_hdr_size(session); + perf_session__set_comm_exec(session); + } } } else { session->machines.host.env = &perf_env; @@ -156,7 +162,11 @@ struct perf_session *perf_session__new(struct perf_data_file *file, pr_warning("Cannot read kernel map\n"); } - if (tool && tool->ordering_requires_timestamps && + /* + * In pipe-mode, evlist is empty until PERF_RECORD_HEADER_ATTR is + * processed, so perf_evlist__sample_id_all is not meaningful here. + */ + if ((!file || !file->is_pipe) && tool && tool->ordering_requires_timestamps && tool->ordered_events && !perf_evlist__sample_id_all(session->evlist)) { dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n"); tool->ordered_events = false; -- cgit v1.2.3 From c9d1c93421e3b3c7051b193c9cf648a3bc55cb3e Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Mon, 10 Apr 2017 13:14:32 -0700 Subject: perf tools: Do not print missing features in pipe-mode Pipe-mode has no perf.data header, hence no upfront knowledge of presend and missing features, hence, do not print missing features in pipe-mode. Signed-off-by: David Carrillo-Cisneros Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: He Kuang Cc: Masami Hiramatsu Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/20170410201432.24807-8-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index ef09f26e67da..2ccc7f06db79 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2270,6 +2270,9 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full) perf_header__process_sections(header, fd, &hd, perf_file_section__fprintf_info); + if (session->file->is_pipe) + return 0; + fprintf(fp, "# missing features: "); for_each_clear_bit(bit, header->adds_features, HEADER_LAST_FEATURE) { if (bit) -- cgit v1.2.3 From 4597cf0664d2fad785509dedfed22f8fe8951ebb Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Sat, 8 Apr 2017 09:52:24 +0900 Subject: perf annotate: Refactor the code to parse disassemble lines with {l,r}trim() When parsing disassemble lines, use ltrim() and rtrim() to strip them, not using just while loop and isspace(). Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1491612748-1605-2-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 42 +++++++----------------------------------- 1 file changed, 7 insertions(+), 35 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 44ed6652b02f..204790db10f1 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -387,9 +387,7 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map *m if (comment == NULL) return 0; - while (comment[0] != '\0' && isspace(comment[0])) - ++comment; - + comment = ltrim(comment); comment__symbol(ops->source.raw, comment, &ops->source.addr, &ops->source.name); comment__symbol(ops->target.raw, comment, &ops->target.addr, &ops->target.name); @@ -434,9 +432,7 @@ static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops if (comment == NULL) return 0; - while (comment[0] != '\0' && isspace(comment[0])) - ++comment; - + comment = ltrim(comment); comment__symbol(ops->target.raw, comment, &ops->target.addr, &ops->target.name); return 0; @@ -785,10 +781,7 @@ static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, str static int disasm_line__parse(char *line, const char **namep, char **rawp) { - char *name = line, tmp; - - while (isspace(name[0])) - ++name; + char tmp, *name = ltrim(line); if (name[0] == '\0') return -1; @@ -806,12 +799,7 @@ static int disasm_line__parse(char *line, const char **namep, char **rawp) goto out_free_name; (*rawp)[0] = tmp; - - if ((*rawp)[0] != '\0') { - (*rawp)++; - while (isspace((*rawp)[0])) - ++(*rawp); - } + *rawp = ltrim(*rawp); return 0; @@ -1156,7 +1144,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, { struct annotation *notes = symbol__annotation(sym); struct disasm_line *dl; - char *line = NULL, *parsed_line, *tmp, *tmp2, *c; + char *line = NULL, *parsed_line, *tmp, *tmp2; size_t line_len; s64 line_ip, offset = -1; regmatch_t match[2]; @@ -1167,15 +1155,8 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, if (!line) return -1; - while (line_len != 0 && isspace(line[line_len - 1])) - line[--line_len] = '\0'; - - c = strchr(line, '\n'); - if (c) - *c = 0; - line_ip = -1; - parsed_line = line; + parsed_line = rtrim(line); /* /filename:linenr ? Save line number and ignore. */ if (regexec(&file_lineno, line, 2, match, 0) == 0) { @@ -1183,16 +1164,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, return 0; } - /* - * Strip leading spaces: - */ - tmp = line; - while (*tmp) { - if (*tmp != ' ') - break; - tmp++; - } - + tmp = ltrim(parsed_line); if (*tmp) { /* * Parse hexa addresses followed by ':' -- cgit v1.2.3 From 986a5bc028a84d487c354a529730b48682d1fb41 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Sat, 8 Apr 2017 09:52:25 +0900 Subject: perf annotate: Use stripped line instead of raw disassemble line When parsing disassemble lines for source line number, use a stripped line instead of raw line. Signed-off-by: Taeung Song Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1491612748-1605-3-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 204790db10f1..30498a2d4a6f 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1159,8 +1159,8 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, parsed_line = rtrim(line); /* /filename:linenr ? Save line number and ignore. */ - if (regexec(&file_lineno, line, 2, match, 0) == 0) { - *line_nr = atoi(line + match[1].rm_so); + if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) { + *line_nr = atoi(parsed_line + match[1].rm_so); return 0; } -- cgit v1.2.3 From bd0b90676c30fe640e7ead919b3e38846ac88ab7 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 29 Mar 2017 13:56:56 +0900 Subject: kprobes/x86: Fix kprobe-booster not to boost far call instructions Fix the kprobe-booster not to boost far call instruction, because a call may store the address in the single-step execution buffer to the stack, which should be modified after single stepping. Currently, this instruction will be filtered as not boostable in resume_execution(), so this is not a critical issue. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Andrey Ryabinin Cc: Anil S Keshavamurthy Cc: Borislav Petkov Cc: Brian Gerst Cc: David S . Miller Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ye Xiaolong Link: http://lkml.kernel.org/r/149076340615.22469.14066273186134229909.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 993fa4fe4f68..9eae5a6c5870 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -200,6 +200,8 @@ retry: return (opcode != 0x62 && opcode != 0x67); case 0x70: return 0; /* can't boost conditional jump */ + case 0x90: + return opcode != 0x9a; /* can't boost call far */ case 0xc0: /* can't boost software-interruptions */ return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf; -- cgit v1.2.3 From 129d17e8e8daf50f8aff4941fb4a9cda027ab9cf Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 29 Mar 2017 13:58:06 +0900 Subject: kprobes/x86: Fix the description of __copy_instruction() Fix the description comment of __copy_instruction() function since it has already been changed to return the length of the copied instruction. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Andrey Ryabinin Cc: Anil S Keshavamurthy Cc: Borislav Petkov Cc: Brian Gerst Cc: David S . Miller Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ye Xiaolong Link: http://lkml.kernel.org/r/149076347582.22469.3775133607244923462.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 9eae5a6c5870..81d4dc786dae 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -350,11 +350,10 @@ static int is_IF_modifier(kprobe_opcode_t *insn) } /* - * Copy an instruction and adjust the displacement if the instruction - * uses the %rip-relative addressing mode. - * If it does, Return the address of the 32-bit displacement word. - * If not, return null. - * Only applicable to 64-bit x86. + * Copy an instruction with recovering modified instruction by kprobes + * and adjust the displacement if the instruction uses the %rip-relative + * addressing mode. + * This returns the length of copied instruction, or 0 if it has an error. */ int __copy_instruction(u8 *dest, u8 *src) { @@ -376,6 +375,7 @@ int __copy_instruction(u8 *dest, u8 *src) memcpy(dest, insn.kaddr, length); #ifdef CONFIG_X86_64 + /* Only x86_64 has RIP relative instructions */ if (insn_rip_relative(&insn)) { s64 newdisp; u8 *disp; -- cgit v1.2.3 From 17880e4d5777df4770081ecf0750471cda57f86b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 29 Mar 2017 13:59:15 +0900 Subject: kprobes/x86: Use instruction decoder for booster Use x86 instruction decoder for checking whether the probed instruction is able to boost or not, instead of hand-written code. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Andrey Ryabinin Cc: Anil S Keshavamurthy Cc: Borislav Petkov Cc: Brian Gerst Cc: David S . Miller Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ye Xiaolong Link: http://lkml.kernel.org/r/149076354563.22469.13379472209338986858.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/core.c | 39 ++++++++++++++++----------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 81d4dc786dae..6327f95832a0 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -169,35 +169,33 @@ NOKPROBE_SYMBOL(skip_prefixes); */ int can_boost(kprobe_opcode_t *opcodes, void *addr) { + struct insn insn; kprobe_opcode_t opcode; - kprobe_opcode_t *orig_opcodes = opcodes; if (search_exception_tables((unsigned long)addr)) return 0; /* Page fault may occur on this address. */ -retry: - if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1) - return 0; - opcode = *(opcodes++); + kernel_insn_init(&insn, (void *)opcodes, MAX_INSN_SIZE); + insn_get_opcode(&insn); /* 2nd-byte opcode */ - if (opcode == 0x0f) { - if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1) - return 0; - return test_bit(*opcodes, + if (insn.opcode.nbytes == 2) + return test_bit(insn.opcode.bytes[1], (unsigned long *)twobyte_is_boostable); - } + + if (insn.opcode.nbytes != 1) + return 0; + + /* Can't boost Address-size override prefix */ + if (unlikely(inat_is_address_size_prefix(insn.attr))) + return 0; + + opcode = insn.opcode.bytes[0]; switch (opcode & 0xf0) { -#ifdef CONFIG_X86_64 - case 0x40: - goto retry; /* REX prefix is boostable */ -#endif case 0x60: - if (0x63 < opcode && opcode < 0x67) - goto retry; /* prefixes */ - /* can't boost Address-size override and bound */ - return (opcode != 0x62 && opcode != 0x67); + /* can't boost "bound" */ + return (opcode != 0x62); case 0x70: return 0; /* can't boost conditional jump */ case 0x90: @@ -212,14 +210,9 @@ retry: /* can boost in/out and absolute jmps */ return ((opcode & 0x04) || opcode == 0xea); case 0xf0: - if ((opcode & 0x0c) == 0 && opcode != 0xf1) - goto retry; /* lock/rep(ne) prefix */ /* clear and set flags are boostable */ return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe)); default: - /* segment override prefixes are boostable */ - if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e) - goto retry; /* prefixes */ /* CS override prefix and call are not boostable */ return (opcode != 0x2e && opcode != 0x9a); } -- cgit v1.2.3 From 804dec5bda9b4fcdab5f67fe61db4a0498af5221 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 29 Mar 2017 14:00:25 +0900 Subject: kprobes/x86: Do not modify singlestep buffer while resuming Do not modify singlestep execution buffer (kprobe.ainsn.insn) while resuming from single-stepping, instead, modifies the buffer to add a jump back instruction at preparing buffer. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Andrey Ryabinin Cc: Anil S Keshavamurthy Cc: Borislav Petkov Cc: Brian Gerst Cc: David S . Miller Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ye Xiaolong Link: http://lkml.kernel.org/r/149076361560.22469.1610155860343077495.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/core.c | 42 ++++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 6327f95832a0..a654054eae7e 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -399,23 +399,36 @@ int __copy_instruction(u8 *dest, u8 *src) return length; } +/* Prepare reljump right after instruction to boost */ +static void prepare_boost(struct kprobe *p, int length) +{ + if (can_boost(p->ainsn.insn, p->addr) && + MAX_INSN_SIZE - length >= RELATIVEJUMP_SIZE) { + /* + * These instructions can be executed directly if it + * jumps back to correct address. + */ + synthesize_reljump(p->ainsn.insn + length, p->addr + length); + p->ainsn.boostable = 1; + } else { + p->ainsn.boostable = -1; + } +} + static int arch_copy_kprobe(struct kprobe *p) { - int ret; + int len; /* Copy an instruction with recovering if other optprobe modifies it.*/ - ret = __copy_instruction(p->ainsn.insn, p->addr); - if (!ret) + len = __copy_instruction(p->ainsn.insn, p->addr); + if (!len) return -EINVAL; /* * __copy_instruction can modify the displacement of the instruction, * but it doesn't affect boostable check. */ - if (can_boost(p->ainsn.insn, p->addr)) - p->ainsn.boostable = 0; - else - p->ainsn.boostable = -1; + prepare_boost(p, len); /* Check whether the instruction modifies Interrupt Flag or not */ p->ainsn.if_modifier = is_IF_modifier(p->ainsn.insn); @@ -878,21 +891,6 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs, break; } - if (p->ainsn.boostable == 0) { - if ((regs->ip > copy_ip) && - (regs->ip - copy_ip) + 5 < MAX_INSN_SIZE) { - /* - * These instructions can be executed directly if it - * jumps back to correct address. - */ - synthesize_reljump((void *)regs->ip, - (void *)orig_ip + (regs->ip - copy_ip)); - p->ainsn.boostable = 1; - } else { - p->ainsn.boostable = -1; - } - } - regs->ip += orig_ip - copy_ip; no_change: -- cgit v1.2.3 From 490154bc68d15de9e38fbb850fe470e32cc66407 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 29 Mar 2017 14:01:35 +0900 Subject: kprobes/x86: Make boostable flag boolean Make arch_specific_insn.boostable to boolean, since it has only 2 states, boostable or not. So it is better to use boolean from the viewpoint of code readability. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Andrey Ryabinin Cc: Anil S Keshavamurthy Cc: Borislav Petkov Cc: Brian Gerst Cc: David S . Miller Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ye Xiaolong Link: http://lkml.kernel.org/r/149076368566.22469.6322906866458231844.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/include/asm/kprobes.h | 7 +++---- arch/x86/kernel/kprobes/core.c | 12 ++++++------ arch/x86/kernel/kprobes/ftrace.c | 2 +- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 200581691c6e..34b984c60790 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -72,14 +72,13 @@ struct arch_specific_insn { /* copy of the original instruction */ kprobe_opcode_t *insn; /* - * boostable = -1: This instruction type is not boostable. - * boostable = 0: This instruction type is boostable. - * boostable = 1: This instruction has been boosted: we have + * boostable = false: This instruction type is not boostable. + * boostable = true: This instruction has been boosted: we have * added a relative jump after the instruction copy in insn, * so no single-step and fixup are needed (unless there's * a post_handler or break_handler). */ - int boostable; + bool boostable; bool if_modifier; }; diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index a654054eae7e..3f084a0ca722 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -409,9 +409,9 @@ static void prepare_boost(struct kprobe *p, int length) * jumps back to correct address. */ synthesize_reljump(p->ainsn.insn + length, p->addr + length); - p->ainsn.boostable = 1; + p->ainsn.boostable = true; } else { - p->ainsn.boostable = -1; + p->ainsn.boostable = false; } } @@ -467,7 +467,7 @@ void arch_disarm_kprobe(struct kprobe *p) void arch_remove_kprobe(struct kprobe *p) { if (p->ainsn.insn) { - free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1)); + free_insn_slot(p->ainsn.insn, p->ainsn.boostable); p->ainsn.insn = NULL; } } @@ -539,7 +539,7 @@ static void setup_singlestep(struct kprobe *p, struct pt_regs *regs, return; #if !defined(CONFIG_PREEMPT) - if (p->ainsn.boostable == 1 && !p->post_handler) { + if (p->ainsn.boostable && !p->post_handler) { /* Boost up -- we can execute copied instructions directly */ if (!reenter) reset_current_kprobe(); @@ -859,7 +859,7 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs, case 0xcf: case 0xea: /* jmp absolute -- ip is correct */ /* ip is already adjusted, no more changes required */ - p->ainsn.boostable = 1; + p->ainsn.boostable = true; goto no_change; case 0xe8: /* call relative - Fix return addr */ *tos = orig_ip + (*tos - copy_ip); @@ -884,7 +884,7 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs, * jmp near and far, absolute indirect * ip is correct. And this is boostable */ - p->ainsn.boostable = 1; + p->ainsn.boostable = true; goto no_change; } default: diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 5f8f0b3cc674..041f7b6dfa0f 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -94,6 +94,6 @@ NOKPROBE_SYMBOL(kprobe_ftrace_handler); int arch_prepare_kprobe_ftrace(struct kprobe *p) { p->ainsn.insn = NULL; - p->ainsn.boostable = -1; + p->ainsn.boostable = false; return 0; } -- cgit v1.2.3 From d0381c81c2f782fa2131178d11e0cfb23d50d631 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 29 Mar 2017 14:02:46 +0900 Subject: kprobes/x86: Set kprobes pages read-only Set the pages which is used for kprobes' singlestep buffer and optprobe's trampoline instruction buffer to readonly. This can prevent unexpected (or unintended) instruction modification. This also passes rodata_test as below. Without this patch, rodata_test shows a warning: WARNING: CPU: 0 PID: 1 at arch/x86/mm/dump_pagetables.c:235 note_page+0x7a9/0xa20 x86/mm: Found insecure W+X mapping at address ffffffffa0000000/0xffffffffa0000000 With this fix, no W+X pages are found: x86/mm: Checked W+X mappings: passed, no W+X pages found. rodata_test: all tests were successful Reported-by: Andrey Ryabinin Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: Borislav Petkov Cc: Brian Gerst Cc: David S . Miller Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ye Xiaolong Link: http://lkml.kernel.org/r/149076375592.22469.14174394514338612247.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/core.c | 4 ++++ arch/x86/kernel/kprobes/opt.c | 3 +++ 2 files changed, 7 insertions(+) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 3f084a0ca722..0dc24e6cdd1e 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -419,6 +419,8 @@ static int arch_copy_kprobe(struct kprobe *p) { int len; + set_memory_rw((unsigned long)p->ainsn.insn & PAGE_MASK, 1); + /* Copy an instruction with recovering if other optprobe modifies it.*/ len = __copy_instruction(p->ainsn.insn, p->addr); if (!len) @@ -430,6 +432,8 @@ static int arch_copy_kprobe(struct kprobe *p) */ prepare_boost(p, len); + set_memory_ro((unsigned long)p->ainsn.insn & PAGE_MASK, 1); + /* Check whether the instruction modifies Interrupt Flag or not */ p->ainsn.if_modifier = is_IF_modifier(p->ainsn.insn); diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 3e7c6e5a08ff..b121037739e4 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -350,6 +350,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, } buf = (u8 *)op->optinsn.insn; + set_memory_rw((unsigned long)buf & PAGE_MASK, 1); /* Copy instructions into the out-of-line buffer */ ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr); @@ -372,6 +373,8 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, (u8 *)op->kp.addr + op->optinsn.size); + set_memory_ro((unsigned long)buf & PAGE_MASK, 1); + flush_icache_range((unsigned long) buf, (unsigned long) buf + TMPL_END_IDX + op->optinsn.size + RELATIVEJUMP_SIZE); -- cgit v1.2.3 From ea1e34fc366b84e4449b37d86f2222935e29412d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 29 Mar 2017 14:03:56 +0900 Subject: kprobes/x86: Use probe_kernel_read() instead of memcpy() Use probe_kernel_read() for avoiding unexpected faults while copying kernel text in __recover_probed_insn(), __recover_optprobed_insn() and __copy_instruction(). Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Andrey Ryabinin Cc: Anil S Keshavamurthy Cc: Borislav Petkov Cc: Brian Gerst Cc: David S . Miller Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ye Xiaolong Link: http://lkml.kernel.org/r/149076382624.22469.10091613887942958518.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/core.c | 12 +++++++++--- arch/x86/kernel/kprobes/opt.c | 5 ++++- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 0dc24e6cdd1e..722f54440e7e 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -259,7 +259,10 @@ __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) * Fortunately, we know that the original code is the ideal 5-byte * long NOP. */ - memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + if (probe_kernel_read(buf, (void *)addr, + MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) + return 0UL; + if (faddr) memcpy(buf, ideal_nops[NOP_ATOMIC5], 5); else @@ -271,7 +274,7 @@ __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) * Recover the probed instruction at addr for further analysis. * Caller must lock kprobes by kprobe_mutex, or disable preemption * for preventing to release referencing kprobes. - * Returns zero if the instruction can not get recovered. + * Returns zero if the instruction can not get recovered (or access failed). */ unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) { @@ -365,7 +368,10 @@ int __copy_instruction(u8 *dest, u8 *src) /* Another subsystem puts a breakpoint, failed to recover */ if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) return 0; - memcpy(dest, insn.kaddr, length); + + /* This can access kernel text if given address is not recovered */ + if (kernel_probe_read(dest, insn.kaddr, length)) + return 0; #ifdef CONFIG_X86_64 /* Only x86_64 has RIP relative instructions */ diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index b121037739e4..5b5233441d30 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -65,7 +65,10 @@ found: * overwritten by jump destination address. In this case, original * bytes must be recovered from op->optinsn.copied_insn buffer. */ - memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + if (probe_kernel_read(buf, (void *)addr, + MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) + return 0UL; + if (addr == (unsigned long)kp->addr) { buf[0] = kp->opcode; memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); -- cgit v1.2.3 From a8d11cd0714f51877587f5ec891013ca46e163ac Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 29 Mar 2017 14:05:06 +0900 Subject: kprobes/x86: Consolidate insn decoder users for copying code Consolidate x86 instruction decoder users on the path of copying original code for kprobes. Kprobes decodes the same instruction a maximum of 3 times when preparing the instruction buffer: - The first time for getting the length of the instruction, - the 2nd for adjusting displacement, - and the 3rd for checking whether the instruction is boostable or not. For each time, the actual decoding target address is slightly different (1st is original address or recovered instruction buffer, 2nd and 3rd are pointing to the copied buffer), but all have the same instruction. Thus, this patch also changes the target address to the copied buffer at first and reuses the decoded "insn" for displacement adjusting and checking boostability. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Andrey Ryabinin Cc: Anil S Keshavamurthy Cc: Borislav Petkov Cc: Brian Gerst Cc: David S . Miller Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ye Xiaolong Link: http://lkml.kernel.org/r/149076389643.22469.13151892839998777373.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/common.h | 4 +-- arch/x86/kernel/kprobes/core.c | 66 +++++++++++++++++++--------------------- arch/x86/kernel/kprobes/opt.c | 5 +-- 3 files changed, 36 insertions(+), 39 deletions(-) diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h index d688826e5736..db2182d63ed0 100644 --- a/arch/x86/kernel/kprobes/common.h +++ b/arch/x86/kernel/kprobes/common.h @@ -67,7 +67,7 @@ #endif /* Ensure if the instruction can be boostable */ -extern int can_boost(kprobe_opcode_t *instruction, void *addr); +extern int can_boost(struct insn *insn, void *orig_addr); /* Recover instruction if given address is probed */ extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr); @@ -75,7 +75,7 @@ extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf, * Copy an instruction and adjust the displacement if the instruction * uses the %rip-relative addressing mode. */ -extern int __copy_instruction(u8 *dest, u8 *src); +extern int __copy_instruction(u8 *dest, u8 *src, struct insn *insn); /* Generate a relative-jump/call instruction */ extern void synthesize_reljump(void *from, void *to); diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 722f54440e7e..19e1f2a6d7b0 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -164,33 +164,29 @@ static kprobe_opcode_t *skip_prefixes(kprobe_opcode_t *insn) NOKPROBE_SYMBOL(skip_prefixes); /* - * Returns non-zero if opcode is boostable. + * Returns non-zero if INSN is boostable. * RIP relative instructions are adjusted at copying time in 64 bits mode */ -int can_boost(kprobe_opcode_t *opcodes, void *addr) +int can_boost(struct insn *insn, void *addr) { - struct insn insn; kprobe_opcode_t opcode; if (search_exception_tables((unsigned long)addr)) return 0; /* Page fault may occur on this address. */ - kernel_insn_init(&insn, (void *)opcodes, MAX_INSN_SIZE); - insn_get_opcode(&insn); - /* 2nd-byte opcode */ - if (insn.opcode.nbytes == 2) - return test_bit(insn.opcode.bytes[1], + if (insn->opcode.nbytes == 2) + return test_bit(insn->opcode.bytes[1], (unsigned long *)twobyte_is_boostable); - if (insn.opcode.nbytes != 1) + if (insn->opcode.nbytes != 1) return 0; /* Can't boost Address-size override prefix */ - if (unlikely(inat_is_address_size_prefix(insn.attr))) + if (unlikely(inat_is_address_size_prefix(insn->attr))) return 0; - opcode = insn.opcode.bytes[0]; + opcode = insn->opcode.bytes[0]; switch (opcode & 0xf0) { case 0x60: @@ -351,35 +347,31 @@ static int is_IF_modifier(kprobe_opcode_t *insn) * addressing mode. * This returns the length of copied instruction, or 0 if it has an error. */ -int __copy_instruction(u8 *dest, u8 *src) +int __copy_instruction(u8 *dest, u8 *src, struct insn *insn) { - struct insn insn; kprobe_opcode_t buf[MAX_INSN_SIZE]; - int length; unsigned long recovered_insn = recover_probed_instruction(buf, (unsigned long)src); - if (!recovered_insn) + if (!recovered_insn || !insn) return 0; - kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); - insn_get_length(&insn); - length = insn.length; - /* Another subsystem puts a breakpoint, failed to recover */ - if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) + /* This can access kernel text if given address is not recovered */ + if (probe_kernel_read(dest, (void *)recovered_insn, MAX_INSN_SIZE)) return 0; - /* This can access kernel text if given address is not recovered */ - if (kernel_probe_read(dest, insn.kaddr, length)) + kernel_insn_init(insn, dest, MAX_INSN_SIZE); + insn_get_length(insn); + + /* Another subsystem puts a breakpoint, failed to recover */ + if (insn->opcode.bytes[0] == BREAKPOINT_INSTRUCTION) return 0; #ifdef CONFIG_X86_64 /* Only x86_64 has RIP relative instructions */ - if (insn_rip_relative(&insn)) { + if (insn_rip_relative(insn)) { s64 newdisp; u8 *disp; - kernel_insn_init(&insn, dest, length); - insn_get_displacement(&insn); /* * The copied instruction uses the %rip-relative addressing * mode. Adjust the displacement for the difference between @@ -392,29 +384,32 @@ int __copy_instruction(u8 *dest, u8 *src) * extension of the original signed 32-bit displacement would * have given. */ - newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest; + newdisp = (u8 *) src + (s64) insn->displacement.value + - (u8 *) dest; if ((s64) (s32) newdisp != newdisp) { pr_err("Kprobes error: new displacement does not fit into s32 (%llx)\n", newdisp); - pr_err("\tSrc: %p, Dest: %p, old disp: %x\n", src, dest, insn.displacement.value); + pr_err("\tSrc: %p, Dest: %p, old disp: %x\n", + src, dest, insn->displacement.value); return 0; } - disp = (u8 *) dest + insn_offset_displacement(&insn); + disp = (u8 *) dest + insn_offset_displacement(insn); *(s32 *) disp = (s32) newdisp; } #endif - return length; + return insn->length; } /* Prepare reljump right after instruction to boost */ -static void prepare_boost(struct kprobe *p, int length) +static void prepare_boost(struct kprobe *p, struct insn *insn) { - if (can_boost(p->ainsn.insn, p->addr) && - MAX_INSN_SIZE - length >= RELATIVEJUMP_SIZE) { + if (can_boost(insn, p->addr) && + MAX_INSN_SIZE - insn->length >= RELATIVEJUMP_SIZE) { /* * These instructions can be executed directly if it * jumps back to correct address. */ - synthesize_reljump(p->ainsn.insn + length, p->addr + length); + synthesize_reljump(p->ainsn.insn + insn->length, + p->addr + insn->length); p->ainsn.boostable = true; } else { p->ainsn.boostable = false; @@ -423,12 +418,13 @@ static void prepare_boost(struct kprobe *p, int length) static int arch_copy_kprobe(struct kprobe *p) { + struct insn insn; int len; set_memory_rw((unsigned long)p->ainsn.insn & PAGE_MASK, 1); /* Copy an instruction with recovering if other optprobe modifies it.*/ - len = __copy_instruction(p->ainsn.insn, p->addr); + len = __copy_instruction(p->ainsn.insn, p->addr, &insn); if (!len) return -EINVAL; @@ -436,7 +432,7 @@ static int arch_copy_kprobe(struct kprobe *p) * __copy_instruction can modify the displacement of the instruction, * but it doesn't affect boostable check. */ - prepare_boost(p, len); + prepare_boost(p, &insn); set_memory_ro((unsigned long)p->ainsn.insn & PAGE_MASK, 1); diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 5b5233441d30..9aadff3d0902 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -177,11 +177,12 @@ NOKPROBE_SYMBOL(optimized_callback); static int copy_optimized_instructions(u8 *dest, u8 *src) { + struct insn insn; int len = 0, ret; while (len < RELATIVEJUMP_SIZE) { - ret = __copy_instruction(dest + len, src + len); - if (!ret || !can_boost(dest + len, src + len)) + ret = __copy_instruction(dest + len, src + len, &insn); + if (!ret || !can_boost(&insn, src + len)) return -EINVAL; len += ret; } -- cgit v1.2.3 From 7be6b3166ebf2c10c28ef5777d1b31a937ed8f7a Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Tue, 11 Apr 2017 23:49:13 -0700 Subject: perf tools: Pass PYTHON config to feature detection ( This is a rebased version of https://lkml.org/lkml/2017/2/7/662 ) Python's CC and link Makefile variables were not passed to feature detection, causing feature detection to use system's Python rather than PYTHON_CONFIG's one. This created a mismatch between the detected Python support and the one actually used by perf when PYTHON_CONFIG is specified. Fix it by moving Python's variable initialization to before feature detection and pass FLAGS_PYTHON_EMBED to Python's feature detection's build target. Signed-off-by: David Carrillo-Cisneros Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: He Kuang Cc: Masami Hiramatsu Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/20170412064919.92449-2-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/Makefile | 2 +- tools/perf/Makefile.config | 31 ++++++++++++------------------- 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 09c9626ea666..523e587fe05f 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -175,7 +175,7 @@ $(OUTPUT)test-libperl.bin: $(BUILD) $(FLAGS_PERL_EMBED) $(OUTPUT)test-libpython.bin: - $(BUILD) + $(BUILD) $(FLAGS_PYTHON_EMBED) $(OUTPUT)test-libpython-version.bin: $(BUILD) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 2b656de99495..cfd6015229a2 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -170,13 +170,20 @@ PYTHON2_CONFIG := \ override PYTHON_CONFIG := \ $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON2_CONFIG)) -PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG)) +grep-libs = $(filter -l%,$(1)) +strip-libs = $(filter-out -l%,$(1)) -PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null) -PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) +PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG)) -ifeq ($(CC), clang) - PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS)) +ifdef PYTHON_CONFIG + PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null) + PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) + PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil + PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) + ifeq ($(CC), clang) + PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS)) + endif + FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) endif FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS) @@ -554,8 +561,6 @@ ifndef NO_GTK2 endif endif -grep-libs = $(filter -l%,$(1)) -strip-libs = $(filter-out -l%,$(1)) ifdef NO_LIBPERL CFLAGS += -DNO_LIBPERL @@ -603,21 +608,9 @@ else $(call disable-python,No 'python-config' tool was found: disables Python support - please install python-devel/python-dev) else - PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG)) - - PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null) - PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) - PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil - PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) - ifeq ($(CC), clang) - PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS)) - endif - FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) - ifneq ($(feature-libpython), 1) $(call disable-python,No 'Python.h' (for Python 2.x support) was found: disables Python support - please install python-devel/python-dev) else - ifneq ($(feature-libpython-version), 1) $(warning Python 3 is not yet supported; please set) $(warning PYTHON and/or PYTHON_CONFIG appropriately.) -- cgit v1.2.3 From db49a71798a38f3ddf3f3462703328dca39b1ac7 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Wed, 12 Apr 2017 11:23:01 -0700 Subject: perf stat: Fix bug in handling events in error state (This is a patch has been sitting in the Intel CQM/CMT driver series for a while, despite not depend on it. Sending it now independently since the series is being discarded.) When an event is in error state, read() returns 0 instead of sizeof() buffer. In certain modes, such as interval printing, ignoring the 0 return value may cause bogus count deltas to be computed and thus invalid results printed. This patch fixes this problem by modifying read_counters() to mark the event as not scaled (scaled = -1) to force the printout routine to show . Signed-off-by: Stephane Eranian Reviewed-by: David Carrillo-Cisneros Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Mathieu Poirier Cc: Paul Turner Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/20170412182301.44406-1-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 12 +++++++++--- tools/perf/util/evsel.c | 4 ++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 868e086a6b59..610225b6326e 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -312,8 +312,12 @@ static int read_counter(struct perf_evsel *counter) struct perf_counts_values *count; count = perf_counts(counter->counts, cpu, thread); - if (perf_evsel__read(counter, cpu, thread, count)) + if (perf_evsel__read(counter, cpu, thread, count)) { + counter->counts->scaled = -1; + perf_counts(counter->counts, cpu, thread)->ena = 0; + perf_counts(counter->counts, cpu, thread)->run = 0; return -1; + } if (STAT_RECORD) { if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { @@ -338,12 +342,14 @@ static int read_counter(struct perf_evsel *counter) static void read_counters(void) { struct perf_evsel *counter; + int ret; evlist__for_each_entry(evsel_list, counter) { - if (read_counter(counter)) + ret = read_counter(counter); + if (ret) pr_debug("failed to read counter %s\n", counter->name); - if (perf_stat_process_counter(&stat_config, counter)) + if (ret == 0 && perf_stat_process_counter(&stat_config, counter)) pr_warning("failed to process counter %s\n", counter->name); } } diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 8f5d86bd3501..3779b9f3f134 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1239,7 +1239,7 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, if (FD(evsel, cpu, thread) < 0) return -EINVAL; - if (readn(FD(evsel, cpu, thread), count, sizeof(*count)) < 0) + if (readn(FD(evsel, cpu, thread), count, sizeof(*count)) <= 0) return -errno; return 0; @@ -1257,7 +1257,7 @@ int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0) return -ENOMEM; - if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0) + if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0) return -errno; perf_evsel__compute_deltas(evsel, cpu, thread, &count); -- cgit v1.2.3 From 739cf305512cb852e852099f9e12dd66bf4df076 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 13 Apr 2017 11:32:12 +0530 Subject: perf trace: Add usage of --no-syscalls in man page perf trace supports --no-syscalls option but it's not listed in the man page. (Though, I see an example using --no-syscalls in EXAMPLES section.) Committer note: The --no-syscalls option tells 'perf trace' not to automagically ask for raw_syscalls:sys_{enter,exit} to then format it in a strace like way. This become more used as 'perf trace' got support for arbitrary events, such as tracepoints, so more and more we use: # perf trace --no-syscalls -e nmi:* 0.000 nmi:nmi_handler:perf_event_nmi_handler() delta_ns: 36649 handled: 1) 0.019 nmi:nmi_handler:nmi_cpu_backtrace_handler() delta_ns: 2907 handled: 0) 0.676 nmi:nmi_handler:perf_event_nmi_handler() delta_ns: 9401 handled: 1) 0.680 nmi:nmi_handler:nmi_cpu_backtrace_handler() delta_ns: 288 handled: 0) 0.701 nmi:nmi_handler:perf_event_nmi_handler() delta_ns: 4977 handled: 1) 0.703 nmi:nmi_handler:nmi_cpu_backtrace_handler() delta_ns: 67 handled: 0) 0.736 nmi:nmi_handler:perf_event_nmi_handler() delta_ns: 8549 handled: 1) ^C# Signed-off-by: Ravi Bangoria Cc: Alexander Shishkin Cc: Alexis Berlemont Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1492063332-5745-1-git-send-email-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-trace.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index afd728672b6f..c1e3288a2dfb 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -123,7 +123,8 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. major or all pagefaults. Default value is maj. --syscalls:: - Trace system calls. This options is enabled by default. + Trace system calls. This options is enabled by default, disable with + --no-syscalls. --call-graph [mode,type,min[,limit],order[,key][,branch]]:: Setup and enable call-graph (stack chain/backtrace) recording. -- cgit v1.2.3 From e5e992a7c184c2121adf37bdf292a516af81dbbb Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Wed, 12 Apr 2017 10:07:45 -0700 Subject: perf tools: Disable JVMTI if no ELF support available The build of JVMTI depends on LIBELF (-lelf). Make Makefile.conf check this dependendancy and notify user when not present. v2: Comma nitpicking. Signed-off-by: David Carrillo-Cisneros Tested-by: Kim Phillips Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: He Kuang Cc: Masami Hiramatsu Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/20170412170745.26620-1-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index cfd6015229a2..8354d04b392f 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -274,6 +274,7 @@ ifdef NO_LIBELF NO_LIBUNWIND := 1 NO_LIBDW_DWARF_UNWIND := 1 NO_LIBBPF := 1 + NO_JVMTI := 1 else ifeq ($(feature-libelf), 0) ifeq ($(feature-glibc), 1) @@ -283,7 +284,7 @@ else LIBC_SUPPORT := 1 endif ifeq ($(LIBC_SUPPORT),1) - msg := $(warning No libelf found, disables 'probe' tool and BPF support in 'perf record', please install libelf-dev, libelf-devel or elfutils-libelf-devel); + msg := $(warning No libelf found. Disables 'probe' tool, jvmti and BPF support in 'perf record'. Please install libelf-dev, libelf-devel or elfutils-libelf-devel); NO_LIBELF := 1 NO_DWARF := 1 @@ -291,6 +292,7 @@ else NO_LIBUNWIND := 1 NO_LIBDW_DWARF_UNWIND := 1 NO_LIBBPF := 1 + NO_JVMTI := 1 else ifneq ($(filter s% -static%,$(LDFLAGS),),) msg := $(error No static glibc found, please install glibc-static); -- cgit v1.2.3 From 9961aa665b70e47d6c80141c4a2482266010f246 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Tue, 11 Apr 2017 23:49:14 -0700 Subject: tools build: Fix feature detection redefinion of build flags This change is a follow up of https://lkml.org/lkml/2017/2/2/16 The patch above avoided redefining CC, CXX and PKG_CONFIG in feature detection. The patch was not merged due to a unsolved concern with the -MD flag. Later, commit c8c188679ccf ("tools build: Use the same CC for feature detection and actual build") did the change for CC and CXX but not PKG_CONFIG. This patch makes PKG_CONFIG consistent with CC and CXX and moves the -MD to CFLAGS, as suggested by Jiri in the thread above. Signed-off-by: David Carrillo-Cisneros Cc: Alexander Shishkin Cc: Andi Kleen Cc: He Kuang Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/20170412064919.92449-3-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 523e587fe05f..e35e4e5ad192 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -53,17 +53,17 @@ FILES= \ FILES := $(addprefix $(OUTPUT),$(FILES)) -CC ?= $(CROSS_COMPILE)gcc -MD -CXX ?= $(CROSS_COMPILE)g++ -MD -PKG_CONFIG := $(CROSS_COMPILE)pkg-config +CC ?= $(CROSS_COMPILE)gcc +CXX ?= $(CROSS_COMPILE)g++ +PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config LLVM_CONFIG ?= llvm-config all: $(FILES) -__BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS) +__BUILD = $(CC) $(CFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS) BUILD = $(__BUILD) > $(@:.bin=.make.output) 2>&1 -__BUILDXX = $(CXX) $(CXXFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS) +__BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS) BUILDXX = $(__BUILDXX) > $(@:.bin=.make.output) 2>&1 ############################### -- cgit v1.2.3 From 570eda03213a216a88566c0da7bfe175832cfaa4 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Tue, 11 Apr 2017 23:49:16 -0700 Subject: perf util: Hint missing file when tool tips fail to load Besides memory allocation failure, tips.txt may fail to load because the file is not found (a more likely cause). Communicate that to the user in tips failure warning. Signed-off-by: David Carrillo-Cisneros Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: He Kuang Cc: Masami Hiramatsu Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/20170412064919.92449-5-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/util.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index d8b45cea54d0..6097d87429e2 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -696,7 +696,8 @@ const char *perf_tip(const char *dirpath) tips = strlist__new("tips.txt", &conf); if (tips == NULL) - return errno == ENOENT ? NULL : "Tip: get more memory! ;-p"; + return errno == ENOENT ? NULL : + "Tip: check path of tips.txt or get more memory! ;-p"; if (strlist__nr_entries(tips) == 0) goto out; -- cgit v1.2.3 From 16eb81365b70266c17d1141ef9b32c3110b22d17 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Tue, 11 Apr 2017 23:49:19 -0700 Subject: Revert "perf tools: Fix include of linux/mman.h" In https://lkml.org/lkml/2017/2/2/16 I reported a build error that I believed was caused by wrong uapi includes. The synthom was fixed by Arnaldo in: commit 2f7db5557994 ("perf tools: Fix include of linux/mman.h") but I was wrong attributing the problem to the uapi include. The root cause was that I was using ARCH=x86_64, hence using the wrong uapi include path. This explains why no one else ran into this build problem. Signed-off-by: David Carrillo-Cisneros Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: He Kuang Cc: Masami Hiramatsu Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/20170412064919.92449-8-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 8255a26ac255..4d7e65fa9d86 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1,5 +1,5 @@ #include -#include /* To get things like MAP_HUGETLB even on older libc headers */ +#include /* To get things like MAP_HUGETLB even on older libc headers */ #include #include "event.h" #include "debug.h" -- cgit v1.2.3 From fd583ad1563bec5f00140e1f2444adbcd331caad Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 4 Apr 2017 15:14:06 -0400 Subject: perf/x86: Fix spurious NMI with PEBS Load Latency event Spurious NMIs will be observed with the following command: while :; do perf record -bae "cpu/umask=0x01,event=0xcd,ldlat=0x80/pp" -e "cpu/umask=0x03,event=0x0/" -e "cpu/umask=0x02,event=0x0/" -e cycles,branches,cache-misses -e cache-references -- sleep 10 done The bug was introduced by commit: 8077eca079a2 ("perf/x86/pebs: Add workaround for broken OVFL status on HSW+") That commit clears the status bits for the counters used for PEBS events, by masking the whole 64 bits pebs_enabled. However, only the low 32 bits of both status and pebs_enabled are reserved for PEBS-able counters. For status bits 32-34 are fixed counter overflow bits. For pebs_enabled bits 32-34 are for PEBS Load Latency. In the test case, the PEBS Load Latency event and fixed counter event could overflow at the same time. The fixed counter overflow bit will be cleared by mistake. Once it is cleared, the fixed counter overflow never be processed, which finally trigger spurious NMI. Correct the PEBS enabled mask by ignoring the non-PEBS bits. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: 8077eca079a2 ("perf/x86/pebs: Add workaround for broken OVFL status on HSW+") Link: http://lkml.kernel.org/r/1491333246-3965-1-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 2 +- arch/x86/events/intel/ds.c | 2 +- arch/x86/events/perf_event.h | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 4244bed77824..a6d91d4e37a1 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2151,7 +2151,7 @@ again: * counters from the GLOBAL_STATUS mask and we always process PEBS * events via drain_pebs(). */ - status &= ~cpuc->pebs_enabled; + status &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK); /* * PEBS overflow sets bit 62 in the global status register diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 9dfeeeca0ea8..c6d23ffe422d 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1222,7 +1222,7 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit) /* clear non-PEBS bit and re-check */ pebs_status = p->status & cpuc->pebs_enabled; - pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1; + pebs_status &= PEBS_COUNTER_MASK; if (pebs_status == (1 << bit)) return at; } diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index bcbb1d2ae10b..be3d36254040 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -79,6 +79,7 @@ struct amd_nb { /* The maximal number of PEBS events: */ #define MAX_PEBS_EVENTS 8 +#define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1) /* * Flags PEBS can handle without an PMI. -- cgit v1.2.3 From 4c38c8f5d2c8c48dcf502cd039f30cb1f82fe63c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Apr 2017 12:49:58 -0300 Subject: perf unwind arm64: Add missing errno.h header Since it uses EINVAL unconditionally, it needs to also unconditionally include errno.h. Detected when recent changes made errno.h not be included by chance when tools/perf/arch/arm64/util/unwind-libunwind.c gets included by tools/perf/util/libunwind/arm64.c. Putting this changeset just before that change so that we don't lose bisectability on arm64. Cc: Adrian Hunter Cc: David Ahern Cc: Jean Pihet Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Fixes: 8ab596afb97b ("perf tools ARM64: Wire up perf_regs and unwind support") Link: http://lkml.kernel.org/n/tip-60zjev2o1locp5ivod38epa2@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/unwind-libunwind.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/arch/arm64/util/unwind-libunwind.c b/tools/perf/arch/arm64/util/unwind-libunwind.c index c116b713f7f7..b415dfdbccca 100644 --- a/tools/perf/arch/arm64/util/unwind-libunwind.c +++ b/tools/perf/arch/arm64/util/unwind-libunwind.c @@ -1,6 +1,6 @@ +#include #ifndef REMOTE_UNWIND_LIBUNWIND -#include #include #include "perf_regs.h" #include "../../util/unwind.h" -- cgit v1.2.3 From 7909675daf55e8222b40e5e162bbc9d633bd5bac Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 17 Apr 2017 11:08:20 -0300 Subject: perf tools: Remove FLEX_ARRAY definition We rely on symbol->name[0] since the beginning of tools/perf/, never having received any complaint about it, also all the containers build perf just fine, so remove this git codebase remnant. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-jsjpgojut8e22o2gtz83augk@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-help.c | 4 ++-- tools/perf/util/util.h | 22 ---------------------- 2 files changed, 2 insertions(+), 24 deletions(-) diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 1eec96a0fa67..9730fd409f3b 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -15,13 +15,13 @@ static struct man_viewer_list { struct man_viewer_list *next; - char name[FLEX_ARRAY]; + char name[0]; } *man_viewer_list; static struct man_viewer_info_list { struct man_viewer_info_list *next; const char *info; - char name[FLEX_ARRAY]; + char name[0]; } *man_viewer_info_list; enum help_format { diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 7cf5752b38fd..df13658377e4 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -1,28 +1,6 @@ #ifndef GIT_COMPAT_UTIL_H #define GIT_COMPAT_UTIL_H -#ifndef FLEX_ARRAY -/* - * See if our compiler is known to support flexible array members. - */ -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) -# define FLEX_ARRAY /* empty */ -#elif defined(__GNUC__) -# if (__GNUC__ >= 3) -# define FLEX_ARRAY /* empty */ -# else -# define FLEX_ARRAY 0 /* older GNU extension */ -# endif -#endif - -/* - * Otherwise, default to safer but a bit wasteful traditional style - */ -#ifndef FLEX_ARRAY -# define FLEX_ARRAY 1 -#endif -#endif - #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) #ifdef __GNUC__ -- cgit v1.2.3 From 379d61b1c7d42512cded04d372f15a7e725db9e1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 17 Apr 2017 11:23:41 -0300 Subject: tools include: Introduce linux/bug.h, from the kernel sources With just what we will need in the upcoming changesets, the BUILD_BUG_ON_ZERO() definition. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-lw8zg7x6ttwcvqhp90mwe3vo@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/bug.h | 10 ++++++++++ tools/perf/MANIFEST | 1 + 2 files changed, 11 insertions(+) create mode 100644 tools/include/linux/bug.h diff --git a/tools/include/linux/bug.h b/tools/include/linux/bug.h new file mode 100644 index 000000000000..8e4a4f49135d --- /dev/null +++ b/tools/include/linux/bug.h @@ -0,0 +1,10 @@ +#ifndef _TOOLS_PERF_LINUX_BUG_H +#define _TOOLS_PERF_LINUX_BUG_H + +/* Force a compilation error if condition is true, but also produce a + result (of value 0 and type size_t), so the expression can be used + e.g. in a structure initializer (or where-ever else comma expressions + aren't permitted). */ +#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) + +#endif /* _TOOLS_PERF_LINUX_BUG_H */ diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 89018c7311a4..a29da46d180f 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -64,6 +64,7 @@ tools/include/linux/bitops.h tools/include/linux/compiler.h tools/include/linux/compiler-gcc.h tools/include/linux/coresight-pmu.h +tools/include/linux/bug.h tools/include/linux/filter.h tools/include/linux/hash.h tools/include/linux/kernel.h -- cgit v1.2.3 From f6441aff8946f7fd6ab730d7eb9eba18a9ebeba4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 17 Apr 2017 11:25:00 -0300 Subject: tools include: Adopt __same_type() and __must_be_array() from the kernel Will be used to adopt the more stringent version of ARRAY_SIZE(), the one in the kernel sources. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-d85dpvay1hoqscpezlntyd8x@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/compiler-gcc.h | 3 +++ tools/include/linux/compiler.h | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index 616935f1ff56..825d44f89a29 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -16,3 +16,6 @@ #if GCC_VERSION >= 40300 # define __compiletime_error(message) __attribute__((error(message))) #endif /* GCC_VERSION >= 40300 */ + +/* &a[0] degrades to a pointer: a different type from an array */ +#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index c9e65e8faacd..23299d7e7160 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -17,6 +17,11 @@ # define __always_inline inline __attribute__((always_inline)) #endif +/* Are two types/vars the same type (ignoring qualifiers)? */ +#ifndef __same_type +# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) +#endif + #ifdef __ANDROID__ /* * FIXME: Big hammer to get rid of tons of: -- cgit v1.2.3 From 8607c1ee734d12f62c6a46abef13a510e25a1839 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 17 Apr 2017 11:29:26 -0300 Subject: tools include: Move ARRAY_SIZE() to linux/kernel.h To match the kernel, then look for places redefining it to make it use this version, which checks that its parameter is an array at build time. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-txlcf1im83bcbj6kh0wxmyy8@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/kernel.h | 3 +++ tools/perf/util/util.h | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h index adb4d0147755..73ccc48126bb 100644 --- a/tools/include/linux/kernel.h +++ b/tools/include/linux/kernel.h @@ -4,6 +4,7 @@ #include #include #include +#include #ifndef UINT_MAX #define UINT_MAX (~0U) @@ -76,6 +77,8 @@ int vscnprintf(char *buf, size_t size, const char *fmt, va_list args); int scnprintf(char * buf, size_t size, const char * fmt, ...); +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) + /* * This looks more complex than it should be. But we need to * get the type for the ~ right in round_down (it needs to be diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index df13658377e4..3eccd6f21d17 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -1,8 +1,6 @@ #ifndef GIT_COMPAT_UTIL_H #define GIT_COMPAT_UTIL_H -#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) - #ifdef __GNUC__ #define TYPEOF(x) (__typeof__(x)) #else -- cgit v1.2.3 From 877a7a11050ee4d465364c57f8fbf78f6b1a2559 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 17 Apr 2017 11:39:06 -0300 Subject: perf tools: Add include where ARRAY_SIZE() is used To pave the way for further cleanups where linux/kernel.h may stop being included in some header. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-qqxan6tfsl6qx3l0v3nwgjvk@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/numa.c | 1 + tools/perf/builtin-help.c | 1 + tools/perf/builtin-kmem.c | 1 + tools/perf/builtin-kvm.c | 1 + tools/perf/builtin-lock.c | 1 + tools/perf/builtin-sched.c | 1 + tools/perf/builtin-script.c | 1 + tools/perf/builtin-timechart.c | 1 + tools/perf/builtin-trace.c | 1 + tools/perf/perf.c | 1 + tools/perf/tests/bpf.c | 1 + tools/perf/tests/builtin-test.c | 1 + tools/perf/tests/clang.c | 1 + tools/perf/tests/code-reading.c | 1 + tools/perf/tests/dso-data.c | 1 + tools/perf/tests/evsel-roundtrip-name.c | 1 + tools/perf/tests/hists_common.c | 1 + tools/perf/tests/hists_cumulate.c | 1 + tools/perf/tests/hists_filter.c | 1 + tools/perf/tests/hists_link.c | 1 + tools/perf/tests/hists_output.c | 1 + tools/perf/tests/is_printable_array.c | 1 + tools/perf/tests/mmap-basic.c | 1 + tools/perf/tests/parse-events.c | 2 +- tools/perf/tests/parse-no-sample-id-all.c | 1 + tools/perf/tests/pmu.c | 1 + tools/perf/tests/sample-parsing.c | 1 + tools/perf/ui/browsers/annotate.c | 1 + tools/perf/ui/tui/setup.c | 1 + tools/perf/util/annotate.c | 1 + tools/perf/util/bpf-loader.c | 1 + tools/perf/util/data-convert-bt.c | 1 + tools/perf/util/dso.c | 1 + tools/perf/util/dwarf-regs.c | 1 + tools/perf/util/event.c | 1 + tools/perf/util/evlist.h | 1 + tools/perf/util/mem-events.c | 1 + tools/perf/util/perf-hooks.c | 1 + tools/perf/util/session.h | 1 + tools/perf/util/sort.c | 1 + tools/perf/util/symbol.c | 1 + tools/perf/util/thread.c | 1 + 42 files changed, 42 insertions(+), 1 deletion(-) diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 1fe43bd5a012..27ebd50ee619 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 9730fd409f3b..1f18385907f5 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -12,6 +12,7 @@ #include #include #include "util/debug.h" +#include static struct man_viewer_list { struct man_viewer_list *next; diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 515587825af4..8f0b94563936 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -20,6 +20,7 @@ #include "util/debug.h" +#include #include #include #include diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 38b409173693..a7d7f4c6052e 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -24,6 +24,7 @@ #include #endif +#include #include #include #include diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index b686fb6759da..f74dd869f88b 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -26,6 +26,7 @@ #include #include +#include static struct perf_session *session; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 79833e226789..5cd60882c8d9 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -22,6 +22,7 @@ #include "util/debug.h" +#include #include #include #include diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 2dab70fba2ba..36b076653d16 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -24,6 +24,7 @@ #include "util/thread-stack.h" #include "util/time-utils.h" #include +#include #include #include #include "asm/bug.h" diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index fafdb44b8bcb..822c8d39ca1d 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -23,6 +23,7 @@ #include "util/cache.h" #include "util/evlist.h" #include "util/evsel.h" +#include #include #include #include "util/symbol.h" diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index fce278d5fada..2792ed1fae43 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 9dc346f2b255..07ee1352f4ed 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -22,6 +22,7 @@ #include #include #include +#include const char perf_usage_string[] = "perf [--version] [--help] [OPTIONS] COMMAND [ARGS]"; diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 1a04fe77487d..67fe5eeff021 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include "tests.h" diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index e6d7876c94c2..f029737ad255 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -13,6 +13,7 @@ #include "color.h" #include #include "symbol.h" +#include static bool dont_fork; diff --git a/tools/perf/tests/clang.c b/tools/perf/tests/clang.c index f853e242a86c..c5bb2203f5a9 100644 --- a/tools/perf/tests/clang.c +++ b/tools/perf/tests/clang.c @@ -2,6 +2,7 @@ #include "debug.h" #include "util.h" #include "c++/clang-c.h" +#include static struct { int (*func)(void); diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index d1f693041324..99dbd5ae294a 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c index 13725e09ba22..46ea2e061b86 100644 --- a/tools/perf/tests/dso-data.c +++ b/tools/perf/tests/dso-data.c @@ -1,4 +1,5 @@ #include +#include #include #include #include diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c index 60926a1f6fd7..9221d2732cc4 100644 --- a/tools/perf/tests/evsel-roundtrip-name.c +++ b/tools/perf/tests/evsel-roundtrip-name.c @@ -3,6 +3,7 @@ #include "parse-events.h" #include "tests.h" #include "debug.h" +#include static int perf_evsel__roundtrip_cache_name_test(void) { diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c index 6b21746d6eec..f6c580e3ed84 100644 --- a/tools/perf/tests/hists_common.c +++ b/tools/perf/tests/hists_common.c @@ -7,6 +7,7 @@ #include "util/machine.h" #include "util/thread.h" #include "tests/hists_common.h" +#include static struct { u32 pid; diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 9fd54b79a788..70918b986568 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -9,6 +9,7 @@ #include "util/parse-events.h" #include "tests/tests.h" #include "tests/hists_common.h" +#include struct sample { u32 pid; diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index 62efb14f3a5a..f171b2da4899 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -9,6 +9,7 @@ #include "util/parse-events.h" #include "tests/tests.h" #include "tests/hists_common.h" +#include struct sample { u32 pid; diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index eddc7407ff8a..1bd26d23c2fc 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -9,6 +9,7 @@ #include "thread.h" #include "parse-events.h" #include "hists_common.h" +#include struct sample { u32 pid; diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index 63c5efaba1b5..cdf0dde5fe97 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -9,6 +9,7 @@ #include "util/parse-events.h" #include "tests/tests.h" #include "tests/hists_common.h" +#include struct sample { u32 cpu; diff --git a/tools/perf/tests/is_printable_array.c b/tools/perf/tests/is_printable_array.c index 42e13393e502..a008e5c2d980 100644 --- a/tools/perf/tests/is_printable_array.c +++ b/tools/perf/tests/is_printable_array.c @@ -1,4 +1,5 @@ #include +#include #include "tests.h" #include "debug.h" #include "util.h" diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 634bce9caebd..a7cb80805993 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -7,6 +7,7 @@ #include "cpumap.h" #include "tests.h" #include +#include /* * This test will generate random numbers of calls to some getpid syscalls, diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 1dc838014422..05621748aead 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -1,4 +1,3 @@ - #include "parse-events.h" #include "evsel.h" #include "evlist.h" @@ -6,6 +5,7 @@ #include "tests.h" #include "debug.h" #include "util.h" +#include #include #include diff --git a/tools/perf/tests/parse-no-sample-id-all.c b/tools/perf/tests/parse-no-sample-id-all.c index 65dcf48a92fb..c6207db09f12 100644 --- a/tools/perf/tests/parse-no-sample-id-all.c +++ b/tools/perf/tests/parse-no-sample-id-all.c @@ -1,3 +1,4 @@ +#include #include #include diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c index 1e2ba2602930..de6498dc4cbb 100644 --- a/tools/perf/tests/pmu.c +++ b/tools/perf/tests/pmu.c @@ -2,6 +2,7 @@ #include "pmu.h" #include "util.h" #include "tests.h" +#include /* Simulated format definitions. */ static struct test_format { diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 5f23710b9fee..73b5e47ef20b 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -1,4 +1,5 @@ #include +#include #include #include "util.h" diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index ba36aac340bc..9adce112d255 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -10,6 +10,7 @@ #include "../../util/evsel.h" #include "../../util/config.h" #include +#include struct disasm_line_samples { double percent; diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index 4ea2ba861fc2..d9350a1da48b 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -1,6 +1,7 @@ #include #include #include +#include #ifdef HAVE_BACKTRACE_SUPPORT #include #endif diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 30498a2d4a6f..45df4a38811a 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -22,6 +22,7 @@ #include #include #include +#include #include const char *disassembler_style; diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index bc6bc7062eb4..27af9d62d899 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include "perf.h" #include "debug.h" diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 89ece2445713..b7917519f6cc 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 42db00d78573..385c82e12473 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1,4 +1,5 @@ #include +#include #include #include #include "symbol.h" diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c index 62bc4a86a970..c708395b3cb6 100644 --- a/tools/perf/util/dwarf-regs.c +++ b/tools/perf/util/dwarf-regs.c @@ -8,6 +8,7 @@ #include #include #include +#include #ifndef EM_AARCH64 #define EM_AARCH64 183 /* ARM 64 bit */ diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 4d7e65fa9d86..a0f59f69f46a 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1,3 +1,4 @@ +#include #include #include /* To get things like MAP_HUGETLB even on older libc headers */ #include diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 39942995f537..3fed4fb2e866 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -1,6 +1,7 @@ #ifndef __PERF_EVLIST_H #define __PERF_EVLIST_H 1 +#include #include #include #include diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 1d4ab53c60ca..c56d52f90b54 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "mem-events.h" #include "debug.h" #include "symbol.h" diff --git a/tools/perf/util/perf-hooks.c b/tools/perf/util/perf-hooks.c index cb368306b12b..d55092964da2 100644 --- a/tools/perf/util/perf-hooks.c +++ b/tools/perf/util/perf-hooks.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "util/util.h" #include "util/debug.h" #include "util/perf-hooks.h" diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 4bd758553450..1ffae42f76a1 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -9,6 +9,7 @@ #include "thread.h" #include "data.h" #include "ordered-events.h" +#include #include #include diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 73f3ec1cf2a0..13b9af1d1b45 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -7,6 +7,7 @@ #include "evlist.h" #include #include "mem-events.h" +#include regex_t parent_regex; const char default_parent_pattern[] = "^sys_|^do_page_fault"; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 9b4d8ba22fed..619e3eb44c50 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index dcdb87a5d0a1..e8ce6abc5321 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -2,6 +2,7 @@ #include #include #include +#include #include "session.h" #include "thread.h" #include "thread-stack.h" -- cgit v1.2.3 From 0061459744cb985ef31a484bcd9b2fc3cfd01c1b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 17 Apr 2017 11:58:55 -0300 Subject: objtool: Drop ARRAY_SIZE() definition, tools/include/linux/kernel.h has it now And with the goodies present in the kernel.h counterpart, i.e. checking that the parameter is an array at build time. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-roiwxwgwgld4kygn65if60wa@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/objtool/builtin-check.c | 3 +-- tools/objtool/objtool.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 066086dd59a8..282a60368b14 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -36,8 +36,7 @@ #include "warn.h" #include - -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#include #define STATE_FP_SAVED 0x1 #define STATE_FP_SETUP 0x2 diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index 46c326db4f46..ecc5b1b5d15d 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -31,11 +31,10 @@ #include #include #include +#include #include "builtin.h" -#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) - struct cmd_struct { const char *name; int (*fn)(int, const char **); -- cgit v1.2.3 From 68289cbd83eaa20faef7cc818121bc8e769065de Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 17 Apr 2017 12:01:36 -0300 Subject: tools include: Drop ARRAY_SIZE() definition from linux/hashtable.h As tools/include/linux/kernel.h has it now, with the goodies present in the kernel.h counterpart, i.e. checking that the parameter is an array at build time. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-v0b41ivu6z6dyugbq9ffa9ez@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/hashtable.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tools/include/linux/hashtable.h b/tools/include/linux/hashtable.h index c65cc0aa2659..251eabf2a05e 100644 --- a/tools/include/linux/hashtable.h +++ b/tools/include/linux/hashtable.h @@ -13,10 +13,6 @@ #include #include -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#endif - #define DEFINE_HASHTABLE(name, bits) \ struct hlist_head name[1 << (bits)] = \ { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT } -- cgit v1.2.3 From b640985fe40c7446fa5db467e747fbac5c081c86 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 17 Apr 2017 12:13:25 -0300 Subject: perf tools: Remove unused macros from util.h TYPEOF(), for instance, was only used by MSB() that wasn't used at all, besides typeof() is used in many places, should be the preferred way. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-golox8oa2w1oq28snki14z6s@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/util.h | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 3eccd6f21d17..2abd4f783ffd 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -1,18 +1,6 @@ #ifndef GIT_COMPAT_UTIL_H #define GIT_COMPAT_UTIL_H -#ifdef __GNUC__ -#define TYPEOF(x) (__typeof__(x)) -#else -#define TYPEOF(x) -#endif - -#define MSB(x, bits) ((x) & TYPEOF(x)(~0ULL << (sizeof(x) * 8 - (bits)))) -#define HAS_MULTI_BITS(i) ((i) & ((i) - 1)) /* checks if an integer has more than 1 bit set */ - -/* Approximation of the length of the decimal representation of this type. */ -#define decimal_length(x) ((int)(sizeof(x) * 2.56 + 0.5) + 1) - #define _ALL_SOURCE 1 #define _BSD_SOURCE 1 /* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */ -- cgit v1.2.3 From fd20e8111cc0e51ce12fb8ee17c863088fe95065 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 17 Apr 2017 15:23:08 -0300 Subject: perf tools: Including missing inttypes.h header Needed to use the PRI[xu](32,64) formatting macros. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-wkbho8kaw24q67dd11q0j39f@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/perf-time-to-tsc.c | 1 + tools/perf/bench/numa.c | 1 + tools/perf/builtin-c2c.c | 1 + tools/perf/builtin-diff.c | 1 + tools/perf/builtin-kallsyms.c | 1 + tools/perf/builtin-kmem.c | 1 + tools/perf/builtin-kvm.c | 1 + tools/perf/builtin-lock.c | 1 + tools/perf/builtin-mem.c | 1 + tools/perf/builtin-record.c | 1 + tools/perf/builtin-report.c | 1 + tools/perf/builtin-sched.c | 1 + tools/perf/builtin-script.c | 1 + tools/perf/builtin-stat.c | 1 + tools/perf/builtin-timechart.c | 1 + tools/perf/builtin-trace.c | 1 + tools/perf/tests/attr.c | 1 + tools/perf/tests/code-reading.c | 1 + tools/perf/tests/dwarf-unwind.c | 1 + tools/perf/tests/event-times.c | 1 + tools/perf/tests/hists_common.c | 1 + tools/perf/tests/mmap-basic.c | 1 + tools/perf/tests/mmap-thread-lookup.c | 1 + tools/perf/tests/openat-syscall-all-cpus.c | 1 + tools/perf/tests/openat-syscall.c | 1 + tools/perf/tests/perf-record.c | 1 + tools/perf/tests/sample-parsing.c | 1 + tools/perf/tests/sw-clock.c | 1 + tools/perf/tests/unit_number__scnprintf.c | 1 + tools/perf/tests/vmlinux-kallsyms.c | 1 + tools/perf/ui/browsers/annotate.c | 1 + tools/perf/ui/browsers/hists.c | 1 + tools/perf/ui/gtk/annotate.c | 2 +- tools/perf/ui/hist.c | 1 + tools/perf/util/annotate.c | 1 + tools/perf/util/auxtrace.c | 1 + tools/perf/util/callchain.c | 1 + tools/perf/util/data-convert-bt.c | 1 + tools/perf/util/debug.c | 1 + tools/perf/util/dwarf-aux.c | 1 + tools/perf/util/event.c | 1 + tools/perf/util/evlist.c | 1 + tools/perf/util/evsel.c | 1 + tools/perf/util/evsel_fprintf.c | 1 + tools/perf/util/header.c | 1 + tools/perf/util/intel-bts.c | 1 + tools/perf/util/intel-pt.c | 1 + tools/perf/util/machine.c | 1 + tools/perf/util/ordered-events.c | 1 + tools/perf/util/probe-event.c | 1 + tools/perf/util/probe-finder.c | 1 + tools/perf/util/scripting-engines/trace-event-perl.c | 1 + tools/perf/util/scripting-engines/trace-event-python.c | 1 + tools/perf/util/session.c | 1 + tools/perf/util/sort.c | 1 + tools/perf/util/srcline.c | 1 + tools/perf/util/stat.c | 1 + tools/perf/util/unwind-libunwind-local.c | 1 + tools/perf/util/util.c | 1 + 59 files changed, 59 insertions(+), 1 deletion(-) diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index 5c76cc83186a..a8e37f3148c2 100644 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 27ebd50ee619..27de0c8c5c19 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -4,6 +4,7 @@ * numa: Simulate NUMA-sensitive workload and measure their NUMA performance */ +#include /* For the CLR_() macros */ #include diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 70c2c773a2b8..cc8156a969ac 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -9,6 +9,7 @@ * Dick Fowles * Joe Mario */ +#include #include #include #include diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index cd2605d86984..09be77b13788 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -19,6 +19,7 @@ #include "util/data.h" #include "util/config.h" +#include #include #include diff --git a/tools/perf/builtin-kallsyms.c b/tools/perf/builtin-kallsyms.c index 8ff38c4eb2c0..bcfb363112d3 100644 --- a/tools/perf/builtin-kallsyms.c +++ b/tools/perf/builtin-kallsyms.c @@ -7,6 +7,7 @@ * * Released under the GPL v2. (and only v2, not any later version) */ +#include #include "builtin.h" #include #include diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 8f0b94563936..fa6bf1c39b65 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index a7d7f4c6052e..e82a6979327f 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -26,6 +26,7 @@ #include #include +#include #include #include #include diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index f74dd869f88b..6f93a6f0e268 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -1,3 +1,4 @@ +#include #include "builtin.h" #include "perf.h" diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 643f4faac0d0..1ebc67390898 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -1,3 +1,4 @@ +#include #include "builtin.h" #include "perf.h" diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 3191ab063852..65429d1b29c8 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -40,6 +40,7 @@ #include "util/perf-hooks.h" #include "asm/bug.h" +#include #include #include #include diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index c18158b83eb1..66a2f44518de 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -40,6 +40,7 @@ #include "util/auxtrace.h" #include +#include #include #include diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 5cd60882c8d9..d4677fb7f7f5 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 36b076653d16..c4e36b4743f4 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -30,6 +30,7 @@ #include "asm/bug.h" #include "util/mem-events.h" #include "util/dump-insn.h" +#include static char const *script_name; static char const *generate_script_lang; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 610225b6326e..b7199f029073 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 822c8d39ca1d..a24b229a785f 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -12,6 +12,7 @@ * of the License. */ +#include #include #include "builtin.h" diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 2792ed1fae43..aba5fac41529 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -39,6 +39,7 @@ #include "syscalltbl.h" #include "rb_resort.h" +#include #include /* FIXME: Still needed for audit_errno_to_name */ #include #include diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index 88dc51f4c27b..bb2bc487f703 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -18,6 +18,7 @@ * permissions. All the event text files are stored there. */ +#include #include #include #include diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 99dbd5ae294a..6effcfb7e0c1 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 1046491de4b2..dfe5c89e2049 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -1,5 +1,6 @@ #include #include +#include #include #include "tests.h" #include "debug.h" diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c index 19ef77bd6eb4..8d376e155697 100644 --- a/tools/perf/tests/event-times.c +++ b/tools/perf/tests/event-times.c @@ -1,4 +1,5 @@ #include +#include #include #include "tests.h" #include "evlist.h" diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c index f6c580e3ed84..00b8dc50f3db 100644 --- a/tools/perf/tests/hists_common.c +++ b/tools/perf/tests/hists_common.c @@ -1,3 +1,4 @@ +#include #include "perf.h" #include "util/debug.h" #include "util/symbol.h" diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index a7cb80805993..aba40eb4c56f 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -1,3 +1,4 @@ +#include /* For the CLR_() macros */ #include diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c index 0c5ce44f723f..a5ffb754f8c6 100644 --- a/tools/perf/tests/mmap-thread-lookup.c +++ b/tools/perf/tests/mmap-thread-lookup.c @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index c8d9592eb142..eb7b1a6d507e 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -1,3 +1,4 @@ +#include /* For the CPU_* macros */ #include diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c index d7414128d7fe..5964938d4b85 100644 --- a/tools/perf/tests/openat-syscall.c +++ b/tools/perf/tests/openat-syscall.c @@ -1,3 +1,4 @@ +#include #include #include #include "thread_map.h" diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 87893f3ba5f1..a8b6fdaf8df1 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -1,3 +1,4 @@ +#include /* For the CLR_() macros */ #include diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 73b5e47ef20b..bac5c3885b3b 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -1,4 +1,5 @@ #include +#include #include #include diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 4c9fd046d57b..29f11c4b3e60 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/tools/perf/tests/unit_number__scnprintf.c b/tools/perf/tests/unit_number__scnprintf.c index 623c2aa53c4a..f84cb70ee5e5 100644 --- a/tools/perf/tests/unit_number__scnprintf.c +++ b/tools/perf/tests/unit_number__scnprintf.c @@ -1,3 +1,4 @@ +#include #include #include #include "tests.h" diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index 862b043e5924..8456175fc234 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -1,5 +1,6 @@ #include #include +#include #include #include "map.h" #include "symbol.h" diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 9adce112d255..2ed64124276f 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -9,6 +9,7 @@ #include "../../util/symbol.h" #include "../../util/evsel.h" #include "../../util/config.h" +#include #include #include diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index da24072bb76e..a2ea1fa15ae6 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index 8c9308ac30b7..71359b898b67 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -3,7 +3,7 @@ #include "util/annotate.h" #include "util/evsel.h" #include "ui/helpline.h" - +#include enum { ANN_COL__PERCENT, diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 5d632dca672a..59addd52d9cd 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -1,3 +1,4 @@ +#include #include #include diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 45df4a38811a..4d4faf99d52d 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -7,6 +7,7 @@ * Released under the GPL v2. (and only v2, not any later version) */ +#include #include "util.h" #include "ui/ui.h" #include "sort.h" diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 78bd632f144d..a81a402a7459 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -13,6 +13,7 @@ * */ +#include #include #include #include diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 2e5eff5abef0..0096d45a06b3 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -9,6 +9,7 @@ * */ +#include #include #include #include diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index b7917519f6cc..ef80221e0d9c 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -7,6 +7,7 @@ * Released under the GPL v2. (and only v2, not any later version) */ +#include #include #include #include diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 03eb81f30d0d..4d5df25f155a 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -2,6 +2,7 @@ #include "../perf.h" +#include #include #include #include diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 41e068e94349..5fb186d142f6 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -17,6 +17,7 @@ * */ +#include #include #include "util.h" #include "debug.h" diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index a0f59f69f46a..b761b0eb60af 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1,3 +1,4 @@ +#include #include #include #include /* To get things like MAP_HUGETLB even on older libc headers */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 50420cd35446..cf27039df100 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -8,6 +8,7 @@ */ #include "util.h" #include +#include #include #include "cpumap.h" #include "thread_map.h" diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 3779b9f3f134..99a13a63ff1f 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index 4ef5184819a0..8000f62d5d53 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 2ccc7f06db79..0371f3483ede 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1,3 +1,4 @@ +#include #include "util.h" #include #include diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 6c2eb5da4afc..471ed8b26a1c 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index da20cd5612e9..bdd4a28c6cee 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -13,6 +13,7 @@ * */ +#include #include #include #include diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index dfc600446586..15b2a17cf76e 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1,3 +1,4 @@ +#include #include "callchain.h" #include "debug.h" #include "event.h" diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index e70e935b1841..df05be69cc9e 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index e4b889444447..e61b4b34a929 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -19,6 +19,7 @@ * */ +#include #include #include #include diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 57cd268d4275..9ddd7dad2e6e 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -19,6 +19,7 @@ * */ +#include #include #include #include diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index dff043a29589..2b12bdb3ce33 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -19,6 +19,7 @@ * */ +#include #include #include #include diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 783326cfbaa6..dd61213e7a3c 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -21,6 +21,7 @@ #include +#include #include #include #include diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 7b740a73e595..0695e08d2252 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 13b9af1d1b45..63ad5374f364 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1,3 +1,4 @@ +#include #include #include "sort.h" #include "hist.h" diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index 778ccb5d99d1..ef192802edc9 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 0d51334a9b46..bbf30b2d1614 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -1,3 +1,4 @@ +#include #include #include "stat.h" #include "evlist.h" diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index bfb9b7987692..788f4d3c76f5 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 6097d87429e2..524bd3b9d98d 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -7,6 +7,7 @@ #ifdef HAVE_BACKTRACE_SUPPORT #include #endif +#include #include #include #include -- cgit v1.2.3 From c3dca1a1c0ec51f384515f7e6f3b4e289a1dea2f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 17 Apr 2017 15:25:00 -0300 Subject: perf tools: Remove PRI[xu] macros from perf.h We get them from inttypes.h. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-qla4e4mwbf1oewafp1ee2etd@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/util.h | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 2abd4f783ffd..593a24192924 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -50,25 +49,6 @@ extern const char *spaces; extern const char *dots; extern char buildid_dir[]; -/* On most systems would have given us this, but - * not on some systems (e.g. GNU/Hurd). - */ -#ifndef PATH_MAX -#define PATH_MAX 4096 -#endif - -#ifndef PRIuMAX -#define PRIuMAX "llu" -#endif - -#ifndef PRIu32 -#define PRIu32 "u" -#endif - -#ifndef PRIx32 -#define PRIx32 "x" -#endif - #ifndef PATH_SEP #define PATH_SEP ':' #endif -- cgit v1.2.3 From aa8cc2f6b5faae3511ea229846d8e54bd8df40d1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 17 Apr 2017 15:36:40 -0300 Subject: perf tools: Replace STR() calls with __stringify() Both do the same thing, the later is the one we get from linux/stringify.h, i.e. we now use the same function name/practice as the kernel sources. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-w2sxa5o4bfx7fjrd5mu4zmke@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/dwarf-regs.c | 4 ++-- tools/perf/arch/arm64/util/dwarf-regs.c | 3 ++- tools/perf/arch/powerpc/util/dwarf-regs.c | 5 +++-- tools/perf/util/cgroup.c | 5 +++-- tools/perf/util/jitdump.c | 3 ++- tools/perf/util/util.h | 3 --- 6 files changed, 12 insertions(+), 11 deletions(-) diff --git a/tools/perf/arch/arm/util/dwarf-regs.c b/tools/perf/arch/arm/util/dwarf-regs.c index 33ec5b339da8..8bb176a37990 100644 --- a/tools/perf/arch/arm/util/dwarf-regs.c +++ b/tools/perf/arch/arm/util/dwarf-regs.c @@ -9,6 +9,7 @@ */ #include +#include #include struct pt_regs_dwarfnum { @@ -16,10 +17,9 @@ struct pt_regs_dwarfnum { unsigned int dwarfnum; }; -#define STR(s) #s #define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num} #define GPR_DWARFNUM_NAME(num) \ - {.name = STR(%r##num), .dwarfnum = num} + {.name = __stringify(%r##num), .dwarfnum = num} #define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0} /* diff --git a/tools/perf/arch/arm64/util/dwarf-regs.c b/tools/perf/arch/arm64/util/dwarf-regs.c index 068b6189157b..aea610c292e6 100644 --- a/tools/perf/arch/arm64/util/dwarf-regs.c +++ b/tools/perf/arch/arm64/util/dwarf-regs.c @@ -11,6 +11,7 @@ #include #include #include /* for struct user_pt_regs */ +#include #include "util.h" struct pt_regs_dwarfnum { @@ -20,7 +21,7 @@ struct pt_regs_dwarfnum { #define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num} #define GPR_DWARFNUM_NAME(num) \ - {.name = STR(%x##num), .dwarfnum = num} + {.name = __stringify(%x##num), .dwarfnum = num} #define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0} #define DWARFNUM2OFFSET(index) \ (index * sizeof((struct user_pt_regs *)0)->regs[0]) diff --git a/tools/perf/arch/powerpc/util/dwarf-regs.c b/tools/perf/arch/powerpc/util/dwarf-regs.c index 41bdf9530d82..98ac87052a74 100644 --- a/tools/perf/arch/powerpc/util/dwarf-regs.c +++ b/tools/perf/arch/powerpc/util/dwarf-regs.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "util.h" struct pt_regs_dwarfnum { @@ -24,10 +25,10 @@ struct pt_regs_dwarfnum { }; #define REG_DWARFNUM_NAME(r, num) \ - {.name = STR(%)STR(r), .dwarfnum = num, \ + {.name = __stringify(%)__stringify(r), .dwarfnum = num, \ .ptregs_offset = offsetof(struct pt_regs, r)} #define GPR_DWARFNUM_NAME(num) \ - {.name = STR(%gpr##num), .dwarfnum = num, \ + {.name = __stringify(%gpr##num), .dwarfnum = num, \ .ptregs_offset = offsetof(struct pt_regs, gpr[num])} #define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0, .ptregs_offset = 0} diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index 86399eda3684..03347748f3fa 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -4,6 +4,7 @@ #include "evsel.h" #include "cgroup.h" #include "evlist.h" +#include int nr_cgroups; @@ -27,8 +28,8 @@ cgroupfs_find_mountpoint(char *buf, size_t maxlen) path_v1[0] = '\0'; path_v2[0] = '\0'; - while (fscanf(fp, "%*s %"STR(PATH_MAX)"s %"STR(PATH_MAX)"s %" - STR(PATH_MAX)"s %*d %*d\n", + while (fscanf(fp, "%*s %"__stringify(PATH_MAX)"s %"__stringify(PATH_MAX)"s %" + __stringify(PATH_MAX)"s %*d %*d\n", mountpoint, type, tokens) == 3) { if (!path_v1[0] && !strcmp(type, "cgroup")) { diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index c9a941ef0f6d..da6262dbe9e3 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "util.h" #include "event.h" @@ -181,7 +182,7 @@ jit_open(struct jit_buf_desc *jd, const char *name) jd->use_arch_timestamp); if (header.version > JITHEADER_VERSION) { - pr_err("wrong jitdump version %u, expected " STR(JITHEADER_VERSION), + pr_err("wrong jitdump version %u, expected " __stringify(JITHEADER_VERSION), header.version); goto error; } diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 593a24192924..aa35509464b5 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -180,9 +180,6 @@ struct perf_event_attr; void event_attr_init(struct perf_event_attr *attr); -#define _STR(x) #x -#define STR(x) _STR(x) - size_t hex_width(u64 v); int hex2u64(const char *ptr, u64 *val); -- cgit v1.2.3 From 28a9bb9621d9f8b4328048297cd306e09fbbf175 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 17 Apr 2017 15:39:06 -0300 Subject: perf tools: Ditch unused PATH_SEP, STRIP_EXTENSION Should make sense for windows, where git is supported. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-lzxlhmqrizk72d0zcsreggy8@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf.c | 10 ---------- tools/perf/util/util.h | 16 ---------------- 2 files changed, 26 deletions(-) diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 07ee1352f4ed..94e9418aecb1 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -328,16 +328,6 @@ static void handle_internal_command(int argc, const char **argv) { const char *cmd = argv[0]; unsigned int i; - static const char ext[] = STRIP_EXTENSION; - - if (sizeof(ext) > 1) { - i = strlen(argv[0]) - strlen(ext); - if (i > 0 && !strcmp(argv[0] + i, ext)) { - char *argv0 = strdup(argv[0]); - argv[0] = cmd = argv0; - argv0[i] = '\0'; - } - } /* Turn "perf cmd --help" into "perf help cmd" */ if (argc > 1 && !strcmp(argv[1], "--help")) { diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index aa35509464b5..f26666d77677 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -49,22 +49,6 @@ extern const char *spaces; extern const char *dots; extern char buildid_dir[]; -#ifndef PATH_SEP -#define PATH_SEP ':' -#endif - -#ifndef STRIP_EXTENSION -#define STRIP_EXTENSION "" -#endif - -#ifndef has_dos_drive_prefix -#define has_dos_drive_prefix(path) 0 -#endif - -#ifndef is_dir_sep -#define is_dir_sep(c) ((c) == '/') -#endif - #ifdef __GNUC__ #define NORETURN __attribute__((__noreturn__)) #else -- cgit v1.2.3 From 3d689ed6099a1a11c38bb78aff7498e78e287e0b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 17 Apr 2017 16:10:49 -0300 Subject: perf tools: Move sane ctype stuff from util.h to sane_ctype.h More stuff that came from git, out of the hodge-podge that is util.h Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-e3lana4gctz3ub4hn4y29hkw@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/symbol/kallsyms.c | 1 + tools/perf/arch/common.c | 2 ++ tools/perf/builtin-kmem.c | 2 ++ tools/perf/builtin-sched.c | 2 ++ tools/perf/builtin-script.c | 2 ++ tools/perf/builtin-stat.c | 2 ++ tools/perf/builtin-top.c | 2 ++ tools/perf/builtin-trace.c | 2 ++ tools/perf/tests/code-reading.c | 3 ++- tools/perf/ui/browser.c | 1 + tools/perf/ui/browsers/hists.c | 2 ++ tools/perf/ui/browsers/map.c | 2 ++ tools/perf/ui/stdio/hist.c | 2 +- tools/perf/util/annotate.c | 2 ++ tools/perf/util/auxtrace.c | 5 ++-- tools/perf/util/build-id.c | 1 + tools/perf/util/config.c | 2 ++ tools/perf/util/cpumap.c | 2 ++ tools/perf/util/ctype.c | 2 +- tools/perf/util/data-convert-bt.c | 1 + tools/perf/util/debug.c | 2 ++ tools/perf/util/demangle-java.c | 2 ++ tools/perf/util/event.c | 1 + tools/perf/util/evsel.c | 2 ++ tools/perf/util/header.c | 2 ++ tools/perf/util/jitdump.c | 2 ++ tools/perf/util/machine.c | 4 ++- tools/perf/util/probe-event.c | 2 ++ tools/perf/util/probe-finder.h | 2 +- tools/perf/util/sane_ctype.h | 51 +++++++++++++++++++++++++++++++++++++ tools/perf/util/strfilter.c | 2 ++ tools/perf/util/string.c | 2 ++ tools/perf/util/symbol-elf.c | 3 ++- tools/perf/util/symbol.c | 1 + tools/perf/util/trace-event-parse.c | 3 ++- tools/perf/util/util.c | 2 ++ tools/perf/util/util.h | 46 --------------------------------- 37 files changed, 114 insertions(+), 55 deletions(-) create mode 100644 tools/perf/util/sane_ctype.h diff --git a/tools/lib/symbol/kallsyms.c b/tools/lib/symbol/kallsyms.c index 5e431077fcd6..d270ac00613d 100644 --- a/tools/lib/symbol/kallsyms.c +++ b/tools/lib/symbol/kallsyms.c @@ -1,3 +1,4 @@ +#include #include "symbol/kallsyms.h" #include #include diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c index 886dd2aaff0d..837067f48a4c 100644 --- a/tools/perf/arch/common.c +++ b/tools/perf/arch/common.c @@ -4,6 +4,8 @@ #include "../util/util.h" #include "../util/debug.h" +#include "sane_ctype.h" + const char *const arm_triplets[] = { "arm-eabi-", "arm-linux-androideabi-", diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index fa6bf1c39b65..15754a492cad 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -27,6 +27,8 @@ #include #include +#include "sane_ctype.h" + static int kmem_slab; static int kmem_page; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index d4677fb7f7f5..aefab93d7d2f 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -34,6 +34,8 @@ #include #include +#include "sane_ctype.h" + #define PR_SET_NAME 15 /* Set process name */ #define MAX_CPUS 4096 #define COMM_LEN 20 diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index c4e36b4743f4..5afd9a62360a 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -32,6 +32,8 @@ #include "util/dump-insn.h" #include +#include "sane_ctype.h" + static char const *script_name; static char const *generate_script_lang; static bool debug_mode; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index b7199f029073..5abef25f9983 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -74,6 +74,8 @@ #include #include +#include "sane_ctype.h" + #define DEFAULT_SEPARATOR " " #define CNTR_NOT_SUPPORTED "" #define CNTR_NOT_COUNTED "" diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index a0c97c70ec81..ec3247db4826 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -72,6 +72,8 @@ #include #include +#include "sane_ctype.h" + static volatile int done; #define HEADER_LINE_NR 5 diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index aba5fac41529..0b00d8ac5226 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -51,6 +51,8 @@ #include #include +#include "sane_ctype.h" + #ifndef O_CLOEXEC # define O_CLOEXEC 02000000 #endif diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 6effcfb7e0c1..fe6aac82d54e 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -4,7 +4,6 @@ #include #include #include -#include #include #include "parse-events.h" @@ -18,6 +17,8 @@ #include "tests.h" +#include "sane_ctype.h" + #define BUFSZ 1024 #define READLEN 128 diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index 9e47ccbe07f1..73bdd07321c4 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -13,6 +13,7 @@ #include "helpline.h" #include "keysyms.h" #include "../color.h" +#include "sane_ctype.h" static int ui_browser__percent_color(struct ui_browser *browser, double percent, bool current) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index a2ea1fa15ae6..56f5c038689e 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -20,6 +20,8 @@ #include "map.h" #include "annotate.h" +#include "sane_ctype.h" + extern void hist_browser__init_hpp(void); static int perf_evsel_browser_title(struct hist_browser *browser, diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c index 9ce142de536d..ffa5addf631d 100644 --- a/tools/perf/ui/browsers/map.c +++ b/tools/perf/ui/browsers/map.c @@ -11,6 +11,8 @@ #include "../keysyms.h" #include "map.h" +#include "sane_ctype.h" + struct map_browser { struct ui_browser b; struct map *map; diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index d52d5f64ea89..297a79c69b71 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -4,7 +4,7 @@ #include "../../util/hist.h" #include "../../util/sort.h" #include "../../util/evsel.h" - +#include "../../util/sane_ctype.h" static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin) { diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 4d4faf99d52d..d69fdafba274 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -26,6 +26,8 @@ #include #include +#include "sane_ctype.h" + const char *disassembler_style; const char *objdump_path; static regex_t file_lineno; diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index a81a402a7459..0daf63b9ee3e 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -47,7 +46,6 @@ #include "cpumap.h" #include "thread_map.h" #include "asm/bug.h" -#include "symbol/kallsyms.h" #include "auxtrace.h" #include @@ -60,6 +58,9 @@ #include "intel-pt.h" #include "intel-bts.h" +#include "sane_ctype.h" +#include "symbol/kallsyms.h" + int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, struct auxtrace_mmap_params *mp, void *userpg, int fd) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 33af67530d30..3c0755563969 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -19,6 +19,7 @@ #include "vdso.h" #include "probe-file.h" +#include "sane_ctype.h" static bool no_buildid_cache; diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 7b01d59076d3..88783aa3dfc9 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -15,6 +15,8 @@ #include "util/llvm-utils.h" /* perf_llvm_config */ #include "config.h" +#include "sane_ctype.h" + #define MAXNAME (256) #define DEBUG_CACHE_DIR ".debug" diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 061018b42393..9d9ecb2430cc 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -8,6 +8,8 @@ #include #include "asm/bug.h" +#include "sane_ctype.h" + static int max_cpu_num; static int max_present_cpu_num; static int max_node_num; diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c index d4a5a21c2a7e..4b261c2ec0f1 100644 --- a/tools/perf/util/ctype.c +++ b/tools/perf/util/ctype.c @@ -3,7 +3,7 @@ * * No surprises, and works with signed and unsigned chars. */ -#include "util.h" +#include "sane_ctype.h" enum { S = GIT_SPACE, diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index ef80221e0d9c..c3277b32e917 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -29,6 +29,7 @@ #include "evsel.h" #include "machine.h" #include "config.h" +#include "sane_ctype.h" #define pr_N(n, fmt, ...) \ eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__) diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 4d5df25f155a..41aa7c63e037 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -16,6 +16,8 @@ #include "util.h" #include "target.h" +#include "sane_ctype.h" + int verbose; bool dump_trace = false, quiet = false; int debug_ordered_events; diff --git a/tools/perf/util/demangle-java.c b/tools/perf/util/demangle-java.c index 3e6062ab2cdd..cb66d334f532 100644 --- a/tools/perf/util/demangle-java.c +++ b/tools/perf/util/demangle-java.c @@ -7,6 +7,8 @@ #include "demangle-java.h" +#include "sane_ctype.h" + enum { MODE_PREFIX = 0, MODE_CLASS = 1, diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index b761b0eb60af..f8a706ba72f2 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -12,6 +12,7 @@ #include "strlist.h" #include "thread.h" #include "thread_map.h" +#include "sane_ctype.h" #include "symbol/kallsyms.h" #include "asm/bug.h" #include "stat.h" diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 99a13a63ff1f..757f73c4fa95 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -31,6 +31,8 @@ #include "stat.h" #include "util/parse-branch-options.h" +#include "sane_ctype.h" + static struct { bool sample_id_all; bool exclude_guest; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 0371f3483ede..faf046fc398f 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -27,6 +27,8 @@ #include #include "asm/bug.h" +#include "sane_ctype.h" + /* * magic2 = "PERFILE2" * must be a numerical value to let the endianness diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index da6262dbe9e3..d6d25327bc92 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -26,6 +26,8 @@ #include "genelf.h" #include "../builtin.h" +#include "sane_ctype.h" + struct jit_buf_desc { struct perf_data_file *output; struct perf_session *session; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 15b2a17cf76e..f13f46a99b36 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -11,11 +11,13 @@ #include "thread.h" #include "vdso.h" #include -#include #include "unwind.h" #include "linux/hash.h" #include "asm/bug.h" +#include "sane_ctype.h" +#include + static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock); static void dsos__init(struct dsos *dsos) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index e61b4b34a929..7caba7003734 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -48,6 +48,8 @@ #include "probe-file.h" #include "session.h" +#include "sane_ctype.h" + #define PERFPROBE_GROUP "probe" bool probe_event_dry_run; /* Dry run flag */ diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 2956c5198652..27f061551012 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -2,9 +2,9 @@ #define _PROBE_FINDER_H #include -#include "util.h" #include "intlist.h" #include "probe-event.h" +#include "sane_ctype.h" #define MAX_PROBE_BUFFER 1024 #define MAX_PROBES 128 diff --git a/tools/perf/util/sane_ctype.h b/tools/perf/util/sane_ctype.h new file mode 100644 index 000000000000..4308c22c22ad --- /dev/null +++ b/tools/perf/util/sane_ctype.h @@ -0,0 +1,51 @@ +#ifndef _PERF_SANE_CTYPE_H +#define _PERF_SANE_CTYPE_H + +extern const char *graph_line; +extern const char *graph_dotted_line; +extern const char *spaces; +extern const char *dots; + +/* Sane ctype - no locale, and works with signed chars */ +#undef isascii +#undef isspace +#undef isdigit +#undef isxdigit +#undef isalpha +#undef isprint +#undef isalnum +#undef islower +#undef isupper +#undef tolower +#undef toupper + +extern unsigned char sane_ctype[256]; +#define GIT_SPACE 0x01 +#define GIT_DIGIT 0x02 +#define GIT_ALPHA 0x04 +#define GIT_GLOB_SPECIAL 0x08 +#define GIT_REGEX_SPECIAL 0x10 +#define GIT_PRINT_EXTRA 0x20 +#define GIT_PRINT 0x3E +#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0) +#define isascii(x) (((x) & ~0x7f) == 0) +#define isspace(x) sane_istest(x,GIT_SPACE) +#define isdigit(x) sane_istest(x,GIT_DIGIT) +#define isxdigit(x) \ + (sane_istest(toupper(x), GIT_ALPHA | GIT_DIGIT) && toupper(x) < 'G') +#define isalpha(x) sane_istest(x,GIT_ALPHA) +#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) +#define isprint(x) sane_istest(x,GIT_PRINT) +#define islower(x) (sane_istest(x,GIT_ALPHA) && (x & 0x20)) +#define isupper(x) (sane_istest(x,GIT_ALPHA) && !(x & 0x20)) +#define tolower(x) sane_case((unsigned char)(x), 0x20) +#define toupper(x) sane_case((unsigned char)(x), 0) + +static inline int sane_case(int x, int high) +{ + if (sane_istest(x, GIT_ALPHA)) + x = (x & ~0x20) | high; + return x; +} + +#endif /* _PERF_SANE_CTYPE_H */ diff --git a/tools/perf/util/strfilter.c b/tools/perf/util/strfilter.c index efb53772e0ec..c663c9153245 100644 --- a/tools/perf/util/strfilter.c +++ b/tools/perf/util/strfilter.c @@ -2,6 +2,8 @@ #include "string.h" #include "strfilter.h" +#include "sane_ctype.h" + /* Operators */ static const char *OP_and = "&"; /* Logical AND */ static const char *OP_or = "|"; /* Logical OR */ diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index e8feb142c9c9..e716a6e5bb19 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -1,6 +1,8 @@ #include "util.h" #include "linux/string.h" +#include "sane_ctype.h" + #define K 1024LL /* * perf_atoll() diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index d1a40bb642ff..e7ee47f7377a 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -10,8 +10,9 @@ #include "demangle-rust.h" #include "machine.h" #include "vdso.h" -#include #include "debug.h" +#include "sane_ctype.h" +#include #ifndef EM_AARCH64 #define EM_AARCH64 183 /* ARM 64 bit */ diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 619e3eb44c50..68f14d770083 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -19,6 +19,7 @@ #include "strlist.h" #include "intlist.h" #include "header.h" +#include "sane_ctype.h" #include #include diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index de0078e21408..746bbee645d9 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -21,13 +21,14 @@ #include #include #include -#include #include #include "../perf.h" #include "util.h" #include "trace-event.h" +#include "sane_ctype.h" + static int get_common_field(struct scripting_context *context, int *offset, int *size, const char *type) { diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 524bd3b9d98d..717541e72999 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -21,6 +21,8 @@ #include "callchain.h" #include "strlist.h" +#include "sane_ctype.h" + #define CALLCHAIN_PARAM_DEFAULT \ .mode = CHAIN_GRAPH_ABS, \ .min_percent = 0.5, \ diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index f26666d77677..f7e1ead50f47 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -43,10 +43,6 @@ #include #include "strlist.h" -extern const char *graph_line; -extern const char *graph_dotted_line; -extern const char *spaces; -extern const char *dots; extern char buildid_dir[]; #ifdef __GNUC__ @@ -94,48 +90,6 @@ static inline void *zalloc(size_t size) #define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) -/* Sane ctype - no locale, and works with signed chars */ -#undef isascii -#undef isspace -#undef isdigit -#undef isxdigit -#undef isalpha -#undef isprint -#undef isalnum -#undef islower -#undef isupper -#undef tolower -#undef toupper - -extern unsigned char sane_ctype[256]; -#define GIT_SPACE 0x01 -#define GIT_DIGIT 0x02 -#define GIT_ALPHA 0x04 -#define GIT_GLOB_SPECIAL 0x08 -#define GIT_REGEX_SPECIAL 0x10 -#define GIT_PRINT_EXTRA 0x20 -#define GIT_PRINT 0x3E -#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0) -#define isascii(x) (((x) & ~0x7f) == 0) -#define isspace(x) sane_istest(x,GIT_SPACE) -#define isdigit(x) sane_istest(x,GIT_DIGIT) -#define isxdigit(x) \ - (sane_istest(toupper(x), GIT_ALPHA | GIT_DIGIT) && toupper(x) < 'G') -#define isalpha(x) sane_istest(x,GIT_ALPHA) -#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) -#define isprint(x) sane_istest(x,GIT_PRINT) -#define islower(x) (sane_istest(x,GIT_ALPHA) && (x & 0x20)) -#define isupper(x) (sane_istest(x,GIT_ALPHA) && !(x & 0x20)) -#define tolower(x) sane_case((unsigned char)(x), 0x20) -#define toupper(x) sane_case((unsigned char)(x), 0) - -static inline int sane_case(int x, int high) -{ - if (sane_istest(x, GIT_ALPHA)) - x = (x & ~0x20) | high; - return x; -} - int mkdir_p(char *path, mode_t mode); int rm_rf(const char *path); struct strlist *lsdir(const char *name, bool (*filter)(const char *, struct dirent *)); -- cgit v1.2.3 From a12a4e023a55f058178afea1ada3ce7bf4db94c3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 17 Apr 2017 16:22:30 -0300 Subject: tools include: Include missing headers for fls() and types in linux/log2.h Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-7wj865zidu5ylf87i6i7v6z7@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/log2.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/include/linux/log2.h b/tools/include/linux/log2.h index d5677d39c1e4..0325cefc2220 100644 --- a/tools/include/linux/log2.h +++ b/tools/include/linux/log2.h @@ -12,6 +12,9 @@ #ifndef _TOOLS_LINUX_LOG2_H #define _TOOLS_LINUX_LOG2_H +#include +#include + /* * non-constant log of base 2 calculators * - the arch may override these in asm/bitops.h if they can be implemented -- cgit v1.2.3 From fea013928cdcf81fbe0bfbf9e2eed1c7da2d62c2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 17 Apr 2017 16:23:22 -0300 Subject: perf tools: Move print_binary definitions to separate files Continuing the split of util.[ch] into more manageable bits. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-5eu367rwcwnvvn7fz09l7xpb@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 1 + tools/perf/builtin-trace.c | 1 + tools/perf/tests/is_printable_array.c | 2 +- tools/perf/util/Build | 1 + tools/perf/util/debug.c | 1 + tools/perf/util/print_binary.c | 55 ++++++++++++++++++++++ tools/perf/util/print_binary.h | 28 +++++++++++ tools/perf/util/python-ext-sources | 1 + tools/perf/util/python.c | 1 + .../util/scripting-engines/trace-event-python.c | 1 + tools/perf/util/util.c | 54 --------------------- tools/perf/util/util.h | 23 --------- 12 files changed, 91 insertions(+), 78 deletions(-) create mode 100644 tools/perf/util/print_binary.c create mode 100644 tools/perf/util/print_binary.h diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 5afd9a62360a..5f4e36a4c444 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -23,6 +23,7 @@ #include "util/stat.h" #include "util/thread-stack.h" #include "util/time-utils.h" +#include "print_binary.h" #include #include #include diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 0b00d8ac5226..9a8b9e6f47f7 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -36,6 +36,7 @@ #include "util/parse-events.h" #include "util/bpf-loader.h" #include "callchain.h" +#include "print_binary.h" #include "syscalltbl.h" #include "rb_resort.h" diff --git a/tools/perf/tests/is_printable_array.c b/tools/perf/tests/is_printable_array.c index a008e5c2d980..a5192f6a20d7 100644 --- a/tools/perf/tests/is_printable_array.c +++ b/tools/perf/tests/is_printable_array.c @@ -2,7 +2,7 @@ #include #include "tests.h" #include "debug.h" -#include "util.h" +#include "print_binary.h" int test__is_printable_array(int subtest __maybe_unused) { diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 5c0ea11a8f0a..f0b9e5d0e2fc 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -16,6 +16,7 @@ libperf-y += llvm-utils.o libperf-y += parse-events.o libperf-y += perf_regs.o libperf-y += path.o +libperf-y += print_binary.o libperf-y += rbtree.o libperf-y += libstring.o libperf-y += bitmap.o diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 41aa7c63e037..6e1d7e159649 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -13,6 +13,7 @@ #include "color.h" #include "event.h" #include "debug.h" +#include "print_binary.h" #include "util.h" #include "target.h" diff --git a/tools/perf/util/print_binary.c b/tools/perf/util/print_binary.c new file mode 100644 index 000000000000..e908177b9976 --- /dev/null +++ b/tools/perf/util/print_binary.c @@ -0,0 +1,55 @@ +#include "print_binary.h" +#include +#include "sane_ctype.h" + +void print_binary(unsigned char *data, size_t len, + size_t bytes_per_line, print_binary_t printer, + void *extra) +{ + size_t i, j, mask; + + if (!printer) + return; + + bytes_per_line = roundup_pow_of_two(bytes_per_line); + mask = bytes_per_line - 1; + + printer(BINARY_PRINT_DATA_BEGIN, 0, extra); + for (i = 0; i < len; i++) { + if ((i & mask) == 0) { + printer(BINARY_PRINT_LINE_BEGIN, -1, extra); + printer(BINARY_PRINT_ADDR, i, extra); + } + + printer(BINARY_PRINT_NUM_DATA, data[i], extra); + + if (((i & mask) == mask) || i == len - 1) { + for (j = 0; j < mask-(i & mask); j++) + printer(BINARY_PRINT_NUM_PAD, -1, extra); + + printer(BINARY_PRINT_SEP, i, extra); + for (j = i & ~mask; j <= i; j++) + printer(BINARY_PRINT_CHAR_DATA, data[j], extra); + for (j = 0; j < mask-(i & mask); j++) + printer(BINARY_PRINT_CHAR_PAD, i, extra); + printer(BINARY_PRINT_LINE_END, -1, extra); + } + } + printer(BINARY_PRINT_DATA_END, -1, extra); +} + +int is_printable_array(char *p, unsigned int len) +{ + unsigned int i; + + if (!p || !len || p[len - 1] != 0) + return 0; + + len--; + + for (i = 0; i < len; i++) { + if (!isprint(p[i]) && !isspace(p[i])) + return 0; + } + return 1; +} diff --git a/tools/perf/util/print_binary.h b/tools/perf/util/print_binary.h new file mode 100644 index 000000000000..da0427263d2d --- /dev/null +++ b/tools/perf/util/print_binary.h @@ -0,0 +1,28 @@ +#ifndef PERF_PRINT_BINARY_H +#define PERF_PRINT_BINARY_H + +#include + +enum binary_printer_ops { + BINARY_PRINT_DATA_BEGIN, + BINARY_PRINT_LINE_BEGIN, + BINARY_PRINT_ADDR, + BINARY_PRINT_NUM_DATA, + BINARY_PRINT_NUM_PAD, + BINARY_PRINT_SEP, + BINARY_PRINT_CHAR_DATA, + BINARY_PRINT_CHAR_PAD, + BINARY_PRINT_LINE_END, + BINARY_PRINT_DATA_END, +}; + +typedef void (*print_binary_t)(enum binary_printer_ops op, + unsigned int val, void *extra); + +void print_binary(unsigned char *data, size_t len, + size_t bytes_per_line, print_binary_t printer, + void *extra); + +int is_printable_array(char *p, unsigned int len); + +#endif /* PERF_PRINT_BINARY_H */ diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 0546a4304347..7d3927447fba 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -21,6 +21,7 @@ util/cgroup.c util/parse-branch-options.c util/rblist.c util/counts.c +util/print_binary.c util/strlist.c util/trace-event.c ../lib/rbtree.c diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index a5fbc012e3df..0533711af44d 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -7,6 +7,7 @@ #include "evsel.h" #include "event.h" #include "cpumap.h" +#include "print_binary.h" #include "thread_map.h" /* diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index dd61213e7a3c..9d92af7d0718 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -46,6 +46,7 @@ #include "../call-path.h" #include "thread_map.h" #include "cpumap.h" +#include "print_binary.h" #include "stat.h" PyMODINIT_FUNC initperf_trace_context(void); diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 717541e72999..4fb8ee552a31 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -21,8 +21,6 @@ #include "callchain.h" #include "strlist.h" -#include "sane_ctype.h" - #define CALLCHAIN_PARAM_DEFAULT \ .mode = CHAIN_GRAPH_ABS, \ .min_percent = 0.5, \ @@ -742,58 +740,6 @@ int fetch_current_timestamp(char *buf, size_t sz) return 0; } -void print_binary(unsigned char *data, size_t len, - size_t bytes_per_line, print_binary_t printer, - void *extra) -{ - size_t i, j, mask; - - if (!printer) - return; - - bytes_per_line = roundup_pow_of_two(bytes_per_line); - mask = bytes_per_line - 1; - - printer(BINARY_PRINT_DATA_BEGIN, 0, extra); - for (i = 0; i < len; i++) { - if ((i & mask) == 0) { - printer(BINARY_PRINT_LINE_BEGIN, -1, extra); - printer(BINARY_PRINT_ADDR, i, extra); - } - - printer(BINARY_PRINT_NUM_DATA, data[i], extra); - - if (((i & mask) == mask) || i == len - 1) { - for (j = 0; j < mask-(i & mask); j++) - printer(BINARY_PRINT_NUM_PAD, -1, extra); - - printer(BINARY_PRINT_SEP, i, extra); - for (j = i & ~mask; j <= i; j++) - printer(BINARY_PRINT_CHAR_DATA, data[j], extra); - for (j = 0; j < mask-(i & mask); j++) - printer(BINARY_PRINT_CHAR_PAD, i, extra); - printer(BINARY_PRINT_LINE_END, -1, extra); - } - } - printer(BINARY_PRINT_DATA_END, -1, extra); -} - -int is_printable_array(char *p, unsigned int len) -{ - unsigned int i; - - if (!p || !len || p[len - 1] != 0) - return 0; - - len--; - - for (i = 0; i < len; i++) { - if (!isprint(p[i]) && !isspace(p[i])) - return 0; - } - return 1; -} - int unit_number__scnprintf(char *buf, size_t size, u64 n) { char unit[4] = "BKMG"; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index f7e1ead50f47..4d9069ab569a 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -213,33 +213,10 @@ const char *perf_tip(const char *dirpath); bool is_regular_file(const char *file); int fetch_current_timestamp(char *buf, size_t sz); -enum binary_printer_ops { - BINARY_PRINT_DATA_BEGIN, - BINARY_PRINT_LINE_BEGIN, - BINARY_PRINT_ADDR, - BINARY_PRINT_NUM_DATA, - BINARY_PRINT_NUM_PAD, - BINARY_PRINT_SEP, - BINARY_PRINT_CHAR_DATA, - BINARY_PRINT_CHAR_PAD, - BINARY_PRINT_LINE_END, - BINARY_PRINT_DATA_END, -}; - -typedef void (*print_binary_t)(enum binary_printer_ops, - unsigned int val, - void *extra); - -void print_binary(unsigned char *data, size_t len, - size_t bytes_per_line, print_binary_t printer, - void *extra); - #ifndef HAVE_SCHED_GETCPU_SUPPORT int sched_getcpu(void); #endif -int is_printable_array(char *p, unsigned int len); - int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz); int unit_number__scnprintf(char *buf, size_t size, u64 n); -- cgit v1.2.3 From 632a5cabea21eb079b788d2bb4a9318bd6fff5e1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 17 Apr 2017 16:30:49 -0300 Subject: perf tools: Move srcline definitions to separate header Out of util.h into a new file, srcline.h Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-ludnlm4djqcdjziekzr4s3u9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 1 + tools/perf/ui/stdio/hist.c | 1 + tools/perf/util/hist.c | 1 + tools/perf/util/map.c | 1 + tools/perf/util/sort.h | 1 + tools/perf/util/srcline.c | 1 + tools/perf/util/srcline.h | 34 ++++++++++++++++++++++++++++++++++ tools/perf/util/util.h | 27 --------------------------- 8 files changed, 40 insertions(+), 27 deletions(-) create mode 100644 tools/perf/util/srcline.h diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 56f5c038689e..ac7f6a3e4a86 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -19,6 +19,7 @@ #include "../ui.h" #include "map.h" #include "annotate.h" +#include "srcline.h" #include "sane_ctype.h" diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 297a79c69b71..66aa4eb369f1 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -4,6 +4,7 @@ #include "../../util/hist.h" #include "../../util/sort.h" #include "../../util/evsel.h" +#include "../../util/srcline.h" #include "../../util/sane_ctype.h" static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 61bf304206fd..af3bd5d31d99 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -8,6 +8,7 @@ #include "evlist.h" #include "evsel.h" #include "annotate.h" +#include "srcline.h" #include "ui/progress.h" #include diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index c1870ac365a3..9059d20c3b8a 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -16,6 +16,7 @@ #include "debug.h" #include "machine.h" #include +#include "srcline.h" #include "unwind.h" static void __maps__insert(struct maps *maps, struct map *map); diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index e35fb186d048..535903297cee 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -21,6 +21,7 @@ #include #include "parse-events.h" #include "hist.h" +#include "srcline.h" #include "thread.h" extern regex_t parent_regex; diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index ef192802edc9..df051a52393c 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -9,6 +9,7 @@ #include "util/util.h" #include "util/debug.h" #include "util/callchain.h" +#include "srcline.h" #include "symbol.h" diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h new file mode 100644 index 000000000000..7b52ba88676e --- /dev/null +++ b/tools/perf/util/srcline.h @@ -0,0 +1,34 @@ +#ifndef PERF_SRCLINE_H +#define PERF_SRCLINE_H + +#include +#include + +struct dso; +struct symbol; + +extern bool srcline_full_filename; +char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, + bool show_sym, bool show_addr); +char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, + bool show_sym, bool show_addr, bool unwind_inlines); +void free_srcline(char *srcline); + +#define SRCLINE_UNKNOWN ((char *) "??:0") + +struct inline_list { + char *filename; + char *funcname; + unsigned int line_nr; + struct list_head list; +}; + +struct inline_node { + u64 addr; + struct list_head val; +}; + +struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr); +void inline_node__delete(struct inline_node *node); + +#endif /* PERF_SRCLINE_H */ diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 4d9069ab569a..46cfdccc32bc 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -144,8 +144,6 @@ struct parse_tag { unsigned long parse_tag_value(const char *str, struct parse_tag *tags); -#define SRCLINE_UNKNOWN ((char *) "??:0") - static inline int path__join(char *bf, size_t size, const char *path1, const char *path2) { @@ -161,16 +159,6 @@ static inline int path__join3(char *bf, size_t size, path2, path2[0] ? "/" : "", path3); } -struct dso; -struct symbol; - -extern bool srcline_full_filename; -char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym, bool show_addr); -char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym, bool show_addr, bool unwind_inlines); -void free_srcline(char *srcline); - int perf_event_paranoid(void); void mem_bswap_64(void *src, int byte_size); @@ -221,19 +209,4 @@ int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz); int unit_number__scnprintf(char *buf, size_t size, u64 n); -struct inline_list { - char *filename; - char *funcname; - unsigned int line_nr; - struct list_head list; -}; - -struct inline_node { - u64 addr; - struct list_head val; -}; - -struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr); -void inline_node__delete(struct inline_node *node); - #endif /* GIT_COMPAT_UTIL_H */ -- cgit v1.2.3 From a067558e2fa72445e8f6e6b2dd372a82afae6e49 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 17 Apr 2017 16:51:59 -0300 Subject: perf tools: Move extra string util functions to util/string2.h Moving them from util.h, where they don't belong. Since libc already have string.h, name it slightly differently, as string2.h. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-eh3vz5sqxsrdd8lodoro4jrw@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/mem-functions.c | 1 + tools/perf/builtin-script.c | 1 + tools/perf/builtin-stat.c | 1 + tools/perf/builtin-trace.c | 1 + tools/perf/ui/browser.c | 1 + tools/perf/ui/browsers/hists.c | 1 + tools/perf/ui/gtk/hists.c | 1 + tools/perf/ui/stdio/hist.c | 1 + tools/perf/util/annotate.c | 1 + tools/perf/util/dso.c | 1 + tools/perf/util/dwarf-aux.c | 1 + tools/perf/util/event.c | 2 +- tools/perf/util/header.c | 1 + tools/perf/util/parse-events.c | 2 +- tools/perf/util/pmu.c | 1 + tools/perf/util/probe-event.c | 1 + tools/perf/util/probe-file.c | 1 + tools/perf/util/probe-finder.c | 1 + tools/perf/util/strfilter.c | 2 +- tools/perf/util/string.c | 16 +++++++++------ tools/perf/util/string2.h | 42 ++++++++++++++++++++++++++++++++++++++++ tools/perf/util/thread_map.c | 1 + tools/perf/util/util.h | 32 ------------------------------ 23 files changed, 72 insertions(+), 41 deletions(-) create mode 100644 tools/perf/util/string2.h diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c index d1dea33dcfcf..fbd732b54047 100644 --- a/tools/perf/bench/mem-functions.c +++ b/tools/perf/bench/mem-functions.c @@ -12,6 +12,7 @@ #include #include "../util/header.h" #include "../util/cloexec.h" +#include "../util/string2.h" #include "bench.h" #include "mem-memcpy-arch.h" #include "mem-memset-arch.h" diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 5f4e36a4c444..a710f6867954 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -21,6 +21,7 @@ #include "util/cpumap.h" #include "util/thread_map.h" #include "util/stat.h" +#include "util/string2.h" #include "util/thread-stack.h" #include "util/time-utils.h" #include "print_binary.h" diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 5abef25f9983..1da86e6708c0 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -64,6 +64,7 @@ #include "util/session.h" #include "util/tool.h" #include "util/group.h" +#include "util/string2.h" #include "asm/bug.h" #include diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 9a8b9e6f47f7..4160d946a34d 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -37,6 +37,7 @@ #include "util/bpf-loader.h" #include "callchain.h" #include "print_binary.h" +#include "string2.h" #include "syscalltbl.h" #include "rb_resort.h" diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index 73bdd07321c4..a4d3762cd825 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -1,4 +1,5 @@ #include "../util.h" +#include "../string2.h" #include "../config.h" #include "../../perf.h" #include "libslang.h" diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index ac7f6a3e4a86..bc397bbbc3a7 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -20,6 +20,7 @@ #include "map.h" #include "annotate.h" #include "srcline.h" +#include "string2.h" #include "sane_ctype.h" diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index a4f02de7c1b5..c42de4dcc055 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -4,6 +4,7 @@ #include "../sort.h" #include "../hist.h" #include "../helpline.h" +#include "../string2.h" #include "gtk.h" #define MAX_COLUMNS 32 diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 66aa4eb369f1..5565105c9688 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -5,6 +5,7 @@ #include "../../util/sort.h" #include "../../util/evsel.h" #include "../../util/srcline.h" +#include "../../util/string2.h" #include "../../util/sane_ctype.h" static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index d69fdafba274..297bf5c66609 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -19,6 +19,7 @@ #include "annotate.h" #include "evsel.h" #include "block-range.h" +#include "string2.h" #include "arch/common.h" #include #include diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 385c82e12473..6ad57730a0ed 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -8,6 +8,7 @@ #include "auxtrace.h" #include "util.h" #include "debug.h" +#include "string2.h" #include "vdso.h" static const char * const debuglink_paths[] = { diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 5fb186d142f6..780ee4629bd0 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -22,6 +22,7 @@ #include "util.h" #include "debug.h" #include "dwarf-aux.h" +#include "string2.h" /** * cu_find_realpath - Find the realpath of the target file diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index f8a706ba72f2..53b81b2283c1 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -8,7 +8,7 @@ #include "hist.h" #include "machine.h" #include "sort.h" -#include "string.h" +#include "string2.h" #include "strlist.h" #include "thread.h" #include "thread_map.h" diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index faf046fc398f..51865b8cb9f3 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1,5 +1,6 @@ #include #include "util.h" +#include "string2.h" #include #include #include diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 6b498aea9fde..535c7cbb2bfb 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -7,7 +7,7 @@ #include #include "parse-events.h" #include -#include "string.h" +#include "string2.h" #include "symbol.h" #include "cache.h" #include "header.h" diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 11c752561c55..7503fa170249 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -15,6 +15,7 @@ #include "header.h" #include "pmu-events/pmu-events.h" #include "cache.h" +#include "string2.h" struct perf_pmu_format { char *name; diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 7caba7003734..8f1374071cfc 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -47,6 +47,7 @@ #include "probe-finder.h" #include "probe-file.h" #include "session.h" +#include "string2.h" #include "sane_ctype.h" diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 88714dec8912..7942ea5c7e81 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -28,6 +28,7 @@ #include "probe-file.h" #include "session.h" #include "perf_regs.h" +#include "string2.h" /* 4096 - 2 ('\n' + '\0') */ #define MAX_CMDLEN 4094 diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 9ddd7dad2e6e..3f3cd11d3b7c 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -41,6 +41,7 @@ #include "symbol.h" #include "probe-finder.h" #include "probe-file.h" +#include "string2.h" /* Kprobe tracer basic type is up to u64 */ #define MAX_BASIC_TYPE_BITS 64 diff --git a/tools/perf/util/strfilter.c b/tools/perf/util/strfilter.c index c663c9153245..3b068e0e2d59 100644 --- a/tools/perf/util/strfilter.c +++ b/tools/perf/util/strfilter.c @@ -1,5 +1,5 @@ #include "util.h" -#include "string.h" +#include "string2.h" #include "strfilter.h" #include "sane_ctype.h" diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index e716a6e5bb19..cca53b693a48 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -1,5 +1,7 @@ -#include "util.h" -#include "linux/string.h" +#include "string2.h" +#include +#include +#include #include "sane_ctype.h" @@ -101,8 +103,10 @@ static int count_argc(const char *str) void argv_free(char **argv) { char **p; - for (p = argv; *p; p++) - zfree(p); + for (p = argv; *p; p++) { + free(*p); + *p = NULL; + } free(argv); } @@ -122,7 +126,7 @@ void argv_free(char **argv) char **argv_split(const char *str, int *argcp) { int argc = count_argc(str); - char **argv = zalloc(sizeof(*argv) * (argc+1)); + char **argv = calloc(argc + 1, sizeof(*argv)); char **argvp; if (argv == NULL) @@ -379,7 +383,7 @@ char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints goto out_err_overflow; if (i > 0) - printed += snprintf(e + printed, size - printed, " %s ", or_and); + printed += scnprintf(e + printed, size - printed, " %s ", or_and); printed += scnprintf(e + printed, size - printed, "%s %s %d", var, eq_neq, ints[i]); } diff --git a/tools/perf/util/string2.h b/tools/perf/util/string2.h new file mode 100644 index 000000000000..2f619681bd6a --- /dev/null +++ b/tools/perf/util/string2.h @@ -0,0 +1,42 @@ +#ifndef PERF_STRING_H +#define PERF_STRING_H + +#include +#include +#include + +s64 perf_atoll(const char *str); +char **argv_split(const char *str, int *argcp); +void argv_free(char **argv); +bool strglobmatch(const char *str, const char *pat); +bool strglobmatch_nocase(const char *str, const char *pat); +bool strlazymatch(const char *str, const char *pat); +static inline bool strisglob(const char *str) +{ + return strpbrk(str, "*?[") != NULL; +} +int strtailcmp(const char *s1, const char *s2); +char *strxfrchar(char *s, char from, char to); + +char *ltrim(char *s); +char *rtrim(char *s); + +static inline char *trim(char *s) +{ + return ltrim(rtrim(s)); +} + +char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints); + +static inline char *asprintf_expr_in_ints(const char *var, size_t nints, int *ints) +{ + return asprintf_expr_inout_ints(var, true, nints, ints); +} + +static inline char *asprintf_expr_not_in_ints(const char *var, size_t nints, int *ints) +{ + return asprintf_expr_inout_ints(var, false, nints, ints); +} + + +#endif /* PERF_STRING_H */ diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 9026408ea55b..ef86bf517e01 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -6,6 +6,7 @@ #include #include #include +#include "string2.h" #include "strlist.h" #include #include diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 46cfdccc32bc..850ca54d17a5 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -98,18 +98,6 @@ int copyfile(const char *from, const char *to); int copyfile_mode(const char *from, const char *to, mode_t mode); int copyfile_offset(int fromfd, loff_t from_ofs, int tofd, loff_t to_ofs, u64 size); -s64 perf_atoll(const char *str); -char **argv_split(const char *str, int *argcp); -void argv_free(char **argv); -bool strglobmatch(const char *str, const char *pat); -bool strglobmatch_nocase(const char *str, const char *pat); -bool strlazymatch(const char *str, const char *pat); -static inline bool strisglob(const char *str) -{ - return strpbrk(str, "*?[") != NULL; -} -int strtailcmp(const char *s1, const char *s2); -char *strxfrchar(char *s, char from, char to); unsigned long convert_unit(unsigned long value, char *unit); ssize_t readn(int fd, void *buf, size_t n); ssize_t writen(int fd, void *buf, size_t n); @@ -121,14 +109,6 @@ void event_attr_init(struct perf_event_attr *attr); size_t hex_width(u64 v); int hex2u64(const char *ptr, u64 *val); -char *ltrim(char *s); -char *rtrim(char *s); - -static inline char *trim(char *s) -{ - return ltrim(rtrim(s)); -} - void dump_stack(void); void sighandler_dump_stack(int sig); @@ -175,18 +155,6 @@ int gzip_decompress_to_file(const char *input, int output_fd); int lzma_decompress_to_file(const char *input, int output_fd); #endif -char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints); - -static inline char *asprintf_expr_in_ints(const char *var, size_t nints, int *ints) -{ - return asprintf_expr_inout_ints(var, true, nints, ints); -} - -static inline char *asprintf_expr_not_in_ints(const char *var, size_t nints, int *ints) -{ - return asprintf_expr_inout_ints(var, false, nints, ints); -} - int get_stack_size(const char *str, unsigned long *_size); int fetch_kernel_version(unsigned int *puint, -- cgit v1.2.3 From a43783aeec5fac8ef372ff8c0a5bbb3056fc0604 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 18 Apr 2017 10:46:11 -0300 Subject: perf tools: Include errno.h where needed Removing it from util.h, part of an effort to disentangle the includes hell, that makes changes to util.h or something included by it to cause a complete rebuild of the tools. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-ztrjy52q1rqcchuy3rubfgt2@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/dwarf-regs.c | 1 + tools/perf/arch/powerpc/util/kvm-stat.c | 1 + tools/perf/arch/powerpc/util/perf_regs.c | 1 + tools/perf/arch/s390/util/kvm-stat.c | 1 + tools/perf/arch/x86/tests/intel-cqm.c | 1 + tools/perf/arch/x86/tests/perf-time-to-tsc.c | 1 + tools/perf/arch/x86/util/auxtrace.c | 1 + tools/perf/arch/x86/util/intel-bts.c | 1 + tools/perf/arch/x86/util/intel-pt.c | 1 + tools/perf/arch/x86/util/kvm-stat.c | 1 + tools/perf/arch/x86/util/perf_regs.c | 1 + tools/perf/builtin-annotate.c | 1 + tools/perf/builtin-buildid-cache.c | 1 + tools/perf/builtin-buildid-list.c | 1 + tools/perf/builtin-c2c.c | 1 + tools/perf/builtin-diff.c | 1 + tools/perf/builtin-ftrace.c | 1 + tools/perf/builtin-help.c | 1 + tools/perf/builtin-inject.c | 1 + tools/perf/builtin-kmem.c | 1 + tools/perf/builtin-kvm.c | 1 + tools/perf/builtin-lock.c | 1 + tools/perf/builtin-record.c | 1 + tools/perf/builtin-report.c | 1 + tools/perf/builtin-sched.c | 1 + tools/perf/builtin-script.c | 1 + tools/perf/builtin-stat.c | 1 + tools/perf/builtin-timechart.c | 1 + tools/perf/builtin-trace.c | 1 + tools/perf/perf.c | 1 + tools/perf/tests/attr.c | 1 + tools/perf/tests/backward-ring-buffer.c | 1 + tools/perf/tests/bpf.c | 1 + tools/perf/tests/builtin-test.c | 1 + tools/perf/tests/code-reading.c | 1 + tools/perf/tests/event-times.c | 1 + tools/perf/tests/evsel-roundtrip-name.c | 1 + tools/perf/tests/hists_link.c | 1 + tools/perf/tests/mmap-basic.c | 1 + tools/perf/tests/openat-syscall-all-cpus.c | 1 + tools/perf/tests/openat-syscall-tp-fields.c | 1 + tools/perf/tests/openat-syscall.c | 1 + tools/perf/tests/parse-events.c | 1 + tools/perf/tests/perf-record.c | 1 + tools/perf/tests/pmu.c | 1 + tools/perf/tests/sdt.c | 1 + tools/perf/tests/sw-clock.c | 1 + tools/perf/tests/switch-tracking.c | 1 + tools/perf/tests/task-exit.c | 1 + tools/perf/ui/browsers/hists.c | 1 + tools/perf/util/annotate.c | 1 + tools/perf/util/auxtrace.h | 1 + tools/perf/util/bpf-loader.c | 1 + tools/perf/util/bpf-loader.h | 2 ++ tools/perf/util/bpf-prologue.c | 1 + tools/perf/util/bpf-prologue.h | 2 ++ tools/perf/util/build-id.c | 1 + tools/perf/util/c++/clang-c.h | 1 + tools/perf/util/cloexec.c | 1 + tools/perf/util/comm.c | 1 + tools/perf/util/config.c | 1 + tools/perf/util/counts.c | 1 + tools/perf/util/data-convert-bt.c | 1 + tools/perf/util/data.c | 1 + tools/perf/util/drv_configs.c | 1 + tools/perf/util/dso.c | 1 + tools/perf/util/dwarf-aux.c | 1 + tools/perf/util/env.c | 1 + tools/perf/util/event.c | 1 + tools/perf/util/evlist.c | 1 + tools/perf/util/evsel.c | 1 + tools/perf/util/header.c | 1 + tools/perf/util/hist.c | 1 + tools/perf/util/intel-bts.c | 1 + tools/perf/util/jitdump.c | 1 + tools/perf/util/lzma.c | 1 + tools/perf/util/machine.c | 1 + tools/perf/util/ordered-events.c | 1 + tools/perf/util/parse-events.c | 1 + tools/perf/util/pmu.c | 1 + tools/perf/util/probe-file.c | 1 + tools/perf/util/quote.c | 1 + tools/perf/util/record.c | 1 + tools/perf/util/session.c | 1 + tools/perf/util/sort.c | 1 + tools/perf/util/stat.c | 1 + tools/perf/util/strbuf.c | 1 + tools/perf/util/strfilter.c | 1 + tools/perf/util/symbol-minimal.c | 1 + tools/perf/util/thread-stack.c | 1 + tools/perf/util/thread.c | 1 + tools/perf/util/thread_map.c | 1 + tools/perf/util/unwind-libunwind-local.c | 1 + tools/perf/util/util.h | 1 - tools/perf/util/vdso.c | 2 +- 95 files changed, 96 insertions(+), 2 deletions(-) diff --git a/tools/perf/arch/arm64/util/dwarf-regs.c b/tools/perf/arch/arm64/util/dwarf-regs.c index aea610c292e6..f268720ff021 100644 --- a/tools/perf/arch/arm64/util/dwarf-regs.c +++ b/tools/perf/arch/arm64/util/dwarf-regs.c @@ -8,6 +8,7 @@ * published by the Free Software Foundation. */ +#include #include #include #include /* for struct user_pt_regs */ diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c index 74eee30398f8..249723f0e6a9 100644 --- a/tools/perf/arch/powerpc/util/kvm-stat.c +++ b/tools/perf/arch/powerpc/util/kvm-stat.c @@ -1,3 +1,4 @@ +#include #include "util/kvm-stat.h" #include "util/parse-events.h" #include "util/debug.h" diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c index 4268f7762e25..f860dc411f69 100644 --- a/tools/perf/arch/powerpc/util/perf_regs.c +++ b/tools/perf/arch/powerpc/util/perf_regs.c @@ -1,3 +1,4 @@ +#include #include #include diff --git a/tools/perf/arch/s390/util/kvm-stat.c b/tools/perf/arch/s390/util/kvm-stat.c index ed57df2e6d68..d233e2eb9592 100644 --- a/tools/perf/arch/s390/util/kvm-stat.c +++ b/tools/perf/arch/s390/util/kvm-stat.c @@ -9,6 +9,7 @@ * as published by the Free Software Foundation. */ +#include #include "../../util/kvm-stat.h" #include diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c index 7f064eb37158..03c62eb0106b 100644 --- a/tools/perf/arch/x86/tests/intel-cqm.c +++ b/tools/perf/arch/x86/tests/intel-cqm.c @@ -7,6 +7,7 @@ #include "arch-tests.h" #include +#include #include static pid_t spawn(void) diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index a8e37f3148c2..e3ae9cff2b67 100644 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c index cc1d865e31f1..6aa3f2a38321 100644 --- a/tools/perf/arch/x86/util/auxtrace.c +++ b/tools/perf/arch/x86/util/auxtrace.c @@ -13,6 +13,7 @@ * */ +#include #include #include "../../util/header.h" diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index 5132775a044f..af2bce7a2cd6 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -13,6 +13,7 @@ * */ +#include #include #include #include diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 90fa2286edcf..f630de0206a1 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -13,6 +13,7 @@ * */ +#include #include #include #include diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c index b63d4be655a2..bf817beca0a8 100644 --- a/tools/perf/arch/x86/util/kvm-stat.c +++ b/tools/perf/arch/x86/util/kvm-stat.c @@ -1,3 +1,4 @@ +#include #include "../../util/kvm-stat.h" #include #include diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c index 3bf3548c5e2d..f95edebfb716 100644 --- a/tools/perf/arch/x86/util/perf_regs.c +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -1,3 +1,4 @@ +#include #include #include diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index b2b2722f6bb7..7a5dc7e5c577 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -33,6 +33,7 @@ #include "util/block-range.h" #include +#include #include struct perf_annotate { diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 94b55eee0d9b..034c3d4a7b27 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "builtin.h" #include "perf.h" diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index 26f4e608207f..fdaca16e0c74 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -16,6 +16,7 @@ #include "util/session.h" #include "util/symbol.h" #include "util/data.h" +#include static int sysfs__fprintf_build_id(FILE *fp) { diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index cc8156a969ac..a90c1260f49e 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -9,6 +9,7 @@ * Dick Fowles * Joe Mario */ +#include #include #include #include diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 09be77b13788..eec5df80f5a3 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -19,6 +19,7 @@ #include "util/data.h" #include "util/config.h" +#include #include #include #include diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index f80fb60b00b0..67d14037c03e 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -9,6 +9,7 @@ #include "builtin.h" #include "perf.h" +#include #include #include #include diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 1f18385907f5..7bde2f59dac2 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -13,6 +13,7 @@ #include #include "util/debug.h" #include +#include static struct man_viewer_list { struct man_viewer_list *next; diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 65e1c026a2f0..b102ee702aa1 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -22,6 +22,7 @@ #include #include +#include struct perf_inject { struct perf_tool tool; diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 15754a492cad..9409c9464667 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index e82a6979327f..40660428fa72 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -26,6 +26,7 @@ #include #include +#include #include #include #include diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 6f93a6f0e268..ff98652484a7 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -1,3 +1,4 @@ +#include #include #include "builtin.h" #include "perf.h" diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 65429d1b29c8..70340ff2008d 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -40,6 +40,7 @@ #include "util/perf-hooks.h" #include "asm/bug.h" +#include #include #include #include diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 66a2f44518de..2941d8df4d40 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -40,6 +40,7 @@ #include "util/auxtrace.h" #include +#include #include #include #include diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index aefab93d7d2f..39996c53995a 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -28,6 +28,7 @@ #include #include +#include #include #include #include diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index a710f6867954..853651a0f720 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -32,6 +32,7 @@ #include "asm/bug.h" #include "util/mem-events.h" #include "util/dump-insn.h" +#include #include #include "sane_ctype.h" diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 1da86e6708c0..be2cd537c537 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -69,6 +69,7 @@ #include #include +#include #include #include #include diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index a24b229a785f..e2576c8f6d4e 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -12,6 +12,7 @@ * of the License. */ +#include #include #include diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 4160d946a34d..e065c4a12f58 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -41,6 +41,7 @@ #include "syscalltbl.h" #include "rb_resort.h" +#include #include #include /* FIXME: Still needed for audit_errno_to_name */ #include diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 94e9418aecb1..9ccccb0fbd8f 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -19,6 +19,7 @@ #include "util/debug.h" #include #include +#include #include #include #include diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index bb2bc487f703..ba87cd529bfc 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -18,6 +18,7 @@ * permissions. All the event text files are stored there. */ +#include #include #include #include diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index 42e892b1e979..50f6d7afee58 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -8,6 +8,7 @@ #include #include "tests.h" #include "debug.h" +#include #define NR_ITERS 111 diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 67fe5eeff021..b78fbd611a7c 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index f029737ad255..552fd9aca08d 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -3,6 +3,7 @@ * * Builtin regression testing command: ever growing number of sanity tests */ +#include #include #include #include "builtin.h" diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index fe6aac82d54e..3a8bf1565493 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c index 8d376e155697..4683514751d6 100644 --- a/tools/perf/tests/event-times.c +++ b/tools/perf/tests/event-times.c @@ -1,4 +1,5 @@ #include +#include #include #include #include "tests.h" diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c index 9221d2732cc4..d2bea6f780f8 100644 --- a/tools/perf/tests/evsel-roundtrip-name.c +++ b/tools/perf/tests/evsel-roundtrip-name.c @@ -3,6 +3,7 @@ #include "parse-events.h" #include "tests.h" #include "debug.h" +#include #include static int perf_evsel__roundtrip_cache_name_test(void) diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index 1bd26d23c2fc..a26cbb79e988 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -9,6 +9,7 @@ #include "thread.h" #include "parse-events.h" #include "hists_common.h" +#include #include struct sample { diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index aba40eb4c56f..15c770856aac 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -1,3 +1,4 @@ +#include #include /* For the CLR_() macros */ #include diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index eb7b1a6d507e..b7dece0de42c 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -1,3 +1,4 @@ +#include #include /* For the CPU_* macros */ #include diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index f52239fed361..9788fac91095 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -5,6 +5,7 @@ #include "thread_map.h" #include "tests.h" #include "debug.h" +#include #ifndef O_DIRECTORY #define O_DIRECTORY 00200000 diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c index 5964938d4b85..5f0c700b4693 100644 --- a/tools/perf/tests/openat-syscall.c +++ b/tools/perf/tests/openat-syscall.c @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 05621748aead..66ecc3f5f618 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -5,6 +5,7 @@ #include "tests.h" #include "debug.h" #include "util.h" +#include #include #include #include diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index a8b6fdaf8df1..d37cd9588cc0 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -1,3 +1,4 @@ +#include #include /* For the CLR_() macros */ #include diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c index de6498dc4cbb..a6d7aef30030 100644 --- a/tools/perf/tests/pmu.c +++ b/tools/perf/tests/pmu.c @@ -2,6 +2,7 @@ #include "pmu.h" #include "util.h" #include "tests.h" +#include #include /* Simulated format definitions. */ diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c index 26e5b7a0b839..f73b3c5e125d 100644 --- a/tools/perf/tests/sdt.c +++ b/tools/perf/tests/sdt.c @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 29f11c4b3e60..828494db4a19 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 7ddbe267d0ac..65474fd80da7 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -1,5 +1,6 @@ #include #include +#include #include #include diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 01a5ba2788c6..32873ec91a4e 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -4,6 +4,7 @@ #include "cpumap.h" #include "tests.h" +#include #include static int exited; diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index bc397bbbc3a7..0916575c6694 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 297bf5c66609..683f8340460c 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -7,6 +7,7 @@ * Released under the GPL v2. (and only v2, not any later version) */ +#include #include #include "util.h" #include "ui/ui.h" diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 26fb1ee5746a..9f0de72d58e2 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -17,6 +17,7 @@ #define __PERF_AUXTRACE_H #include +#include #include #include #include diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 27af9d62d899..bf21c1ca9771 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "perf.h" #include "debug.h" #include "bpf-loader.h" diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h index f2b737b225f2..48863867878b 100644 --- a/tools/perf/util/bpf-loader.h +++ b/tools/perf/util/bpf-loader.h @@ -85,6 +85,8 @@ int bpf__strerror_setup_stdout(struct perf_evlist *evlist, int err, char *buf, size_t size); #else +#include + static inline struct bpf_object * bpf__prepare_load(const char *filename __maybe_unused, bool source __maybe_unused) diff --git a/tools/perf/util/bpf-prologue.c b/tools/perf/util/bpf-prologue.c index 6cdbee119ceb..1356220a9f1b 100644 --- a/tools/perf/util/bpf-prologue.c +++ b/tools/perf/util/bpf-prologue.c @@ -12,6 +12,7 @@ #include "bpf-loader.h" #include "bpf-prologue.h" #include "probe-finder.h" +#include #include #include diff --git a/tools/perf/util/bpf-prologue.h b/tools/perf/util/bpf-prologue.h index d94cbea12899..ba564838375f 100644 --- a/tools/perf/util/bpf-prologue.h +++ b/tools/perf/util/bpf-prologue.h @@ -18,6 +18,8 @@ int bpf__gen_prologue(struct probe_trace_arg *args, int nargs, struct bpf_insn *new_prog, size_t *new_cnt, size_t cnt_space); #else +#include + static inline int bpf__gen_prologue(struct probe_trace_arg *args __maybe_unused, int nargs __maybe_unused, diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 3c0755563969..8d8ef1d31b1e 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -7,6 +7,7 @@ * Copyright (C) 2009, 2010 Arnaldo Carvalho de Melo */ #include "util.h" +#include #include #include "build-id.h" #include "event.h" diff --git a/tools/perf/util/c++/clang-c.h b/tools/perf/util/c++/clang-c.h index 0eadd792ab1f..ccafcf72b37a 100644 --- a/tools/perf/util/c++/clang-c.h +++ b/tools/perf/util/c++/clang-c.h @@ -20,6 +20,7 @@ extern int perf_clang__compile_bpf(const char *filename, size_t *p_obj_buf_sz); #else +#include static inline void perf_clang__init(void) { } static inline void perf_clang__cleanup(void) { } diff --git a/tools/perf/util/cloexec.c b/tools/perf/util/cloexec.c index f0dcd0ee0afa..4b4f00df58a8 100644 --- a/tools/perf/util/cloexec.c +++ b/tools/perf/util/cloexec.c @@ -1,3 +1,4 @@ +#include #include #include "util.h" #include "../perf.h" diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c index 32837b6f7879..530a62a7b51e 100644 --- a/tools/perf/util/comm.c +++ b/tools/perf/util/comm.c @@ -1,5 +1,6 @@ #include "comm.h" #include "util.h" +#include #include #include #include diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 88783aa3dfc9..f5604039cbe4 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -8,6 +8,7 @@ * Copyright (C) Johannes Schindelin, 2005 * */ +#include #include "util.h" #include "cache.h" #include diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c index e3fde313deb2..83fedd0d22a1 100644 --- a/tools/perf/util/counts.c +++ b/tools/perf/util/counts.c @@ -1,3 +1,4 @@ +#include #include #include "evsel.h" #include "counts.h" diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index c3277b32e917..89d50318833d 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -7,6 +7,7 @@ * Released under the GPL v2. (and only v2, not any later version) */ +#include #include #include #include diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index 60bfc9ca1e22..e84bbc8ec058 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include diff --git a/tools/perf/util/drv_configs.c b/tools/perf/util/drv_configs.c index 1647f285c629..eec754243f4d 100644 --- a/tools/perf/util/drv_configs.c +++ b/tools/perf/util/drv_configs.c @@ -17,6 +17,7 @@ #include "evlist.h" #include "evsel.h" #include "pmu.h" +#include static int perf_evsel__apply_drv_configs(struct perf_evsel *evsel, diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 6ad57730a0ed..cd061dc1de70 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -2,6 +2,7 @@ #include #include #include +#include #include "symbol.h" #include "dso.h" #include "machine.h" diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 780ee4629bd0..f5acda13dcfa 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -17,6 +17,7 @@ * */ +#include #include #include #include "util.h" diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 075fc77286bf..9e21538c42ae 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -1,6 +1,7 @@ #include "cpumap.h" #include "env.h" #include "util.h" +#include struct perf_env perf_env; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 53b81b2283c1..1fc1217a0c2c 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index cf27039df100..f74ea2e55fde 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -8,6 +8,7 @@ */ #include "util.h" #include +#include #include #include #include "cpumap.h" diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 757f73c4fa95..44a7aef3911b 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 51865b8cb9f3..28a3acb7b313 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1,3 +1,4 @@ +#include #include #include "util.h" #include "string2.h" diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index af3bd5d31d99..65d42758aadd 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -10,6 +10,7 @@ #include "annotate.h" #include "srcline.h" #include "ui/progress.h" +#include #include static bool hists__filter_entry_by_dso(struct hists *hists, diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 471ed8b26a1c..b2834ac7b1f5 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -14,6 +14,7 @@ */ #include +#include #include #include #include diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index d6d25327bc92..388078d84eed 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/tools/perf/util/lzma.c b/tools/perf/util/lzma.c index 9ddea5cecd94..5b73b268c169 100644 --- a/tools/perf/util/lzma.c +++ b/tools/perf/util/lzma.c @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index f13f46a99b36..cdbfe3e32e5a 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1,3 +1,4 @@ +#include #include #include "callchain.h" #include "debug.h" diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index df05be69cc9e..4de398cfb577 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 535c7cbb2bfb..d4877c8438e5 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1,5 +1,6 @@ #include #include +#include #include "util.h" #include "../perf.h" #include "evlist.h" diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 7503fa170249..bca1844594d0 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 7942ea5c7e81..1ccaefdc05c4 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -14,6 +14,7 @@ * GNU General Public License for more details. * */ +#include #include #include "util.h" #include "event.h" diff --git a/tools/perf/util/quote.c b/tools/perf/util/quote.c index 293534c1a474..1ba8920151d8 100644 --- a/tools/perf/util/quote.c +++ b/tools/perf/util/quote.c @@ -1,3 +1,4 @@ +#include #include #include "strbuf.h" #include "quote.h" diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 98bf584853ea..d91bdf5a1aa4 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -2,6 +2,7 @@ #include "evsel.h" #include "cpumap.h" #include "parse-events.h" +#include #include #include "util.h" #include "cloexec.h" diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 0695e08d2252..19d993f2a305 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 63ad5374f364..9aa058e167e8 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1,3 +1,4 @@ +#include #include #include #include "sort.h" diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index bbf30b2d1614..c58174443dc1 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -1,3 +1,4 @@ +#include #include #include #include "stat.h" diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c index 817593908d47..e91b5e86f027 100644 --- a/tools/perf/util/strbuf.c +++ b/tools/perf/util/strbuf.c @@ -1,6 +1,7 @@ #include "debug.h" #include "util.h" #include +#include int prefixcmp(const char *str, const char *prefix) { diff --git a/tools/perf/util/strfilter.c b/tools/perf/util/strfilter.c index 3b068e0e2d59..4dc0af669a30 100644 --- a/tools/perf/util/strfilter.c +++ b/tools/perf/util/strfilter.c @@ -2,6 +2,7 @@ #include "string2.h" #include "strfilter.h" +#include #include "sane_ctype.h" /* Operators */ diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index 870ef0f0659c..40bf5d4c0bfd 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -1,6 +1,7 @@ #include "symbol.h" #include "util.h" +#include #include #include #include diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index d3301529f6a7..dd17d6a38d3a 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -15,6 +15,7 @@ #include #include +#include #include "thread.h" #include "event.h" #include "machine.h" diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index e8ce6abc5321..378c418ca0c1 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -1,4 +1,5 @@ #include "../perf.h" +#include #include #include #include diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index ef86bf517e01..63ead7b06324 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -1,4 +1,5 @@ #include +#include #include #include #include diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 788f4d3c76f5..f8455bed6e65 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -16,6 +16,7 @@ */ #include +#include #include #include #include diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 850ca54d17a5..6fd9963be1c8 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index 7bdcad484225..d3c39eec89a8 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -1,4 +1,4 @@ - +#include #include #include #include -- cgit v1.2.3 From 8ec20b176c4be72d067fa18e33a4f156d1da9bc8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 18 Apr 2017 10:57:25 -0300 Subject: perf str{filter,list}: Disentangle headers There are places where we just need a forward declaration, and others were we need to include strlist.h and/or strfilter.h, reducing the impact of changes in headers on the build time, do it. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-zab42gbiki88y9k0csorxekb@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 1 - tools/perf/builtin-timechart.c | 1 - tools/perf/util/bpf-loader.c | 1 + tools/perf/util/build-id.c | 1 + tools/perf/util/build-id.h | 4 +++- tools/perf/util/evsel_fprintf.c | 1 + tools/perf/util/jitdump.c | 1 - tools/perf/util/map.c | 1 - tools/perf/util/parse-events.c | 1 + tools/perf/util/probe-event.c | 1 + tools/perf/util/probe-event.h | 7 +++++-- tools/perf/util/probe-file.c | 1 + tools/perf/util/probe-file.h | 6 ++++-- tools/perf/util/probe-finder.c | 1 + tools/perf/util/sort.c | 1 + tools/perf/util/sort.h | 1 - tools/perf/util/util.h | 3 ++- 17 files changed, 22 insertions(+), 11 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 2941d8df4d40..f50738e0006e 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -16,7 +16,6 @@ #include #include "util/symbol.h" #include "util/callchain.h" -#include "util/strlist.h" #include "util/values.h" #include "perf.h" diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index e2576c8f6d4e..38e2c437b7b3 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -30,7 +30,6 @@ #include #include "util/symbol.h" #include "util/callchain.h" -#include "util/strlist.h" #include "perf.h" #include "util/header.h" diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index bf21c1ca9771..4bd2d1d882af 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -19,6 +19,7 @@ #include "probe-event.h" #include "probe-finder.h" // for MAX_PROBES #include "parse-events.h" +#include "strfilter.h" #include "llvm-utils.h" #include "c++/clang-c.h" diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 8d8ef1d31b1e..f9ccd053cdf6 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -19,6 +19,7 @@ #include "header.h" #include "vdso.h" #include "probe-file.h" +#include "strlist.h" #include "sane_ctype.h" diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index d27990610f9f..a96081121179 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -5,7 +5,6 @@ #define SBUILD_ID_SIZE (BUILD_ID_SIZE * 2 + 1) #include "tool.h" -#include "strlist.h" #include extern struct perf_tool build_id__mark_dso_hit_ops; @@ -34,6 +33,9 @@ char *build_id_cache__origname(const char *sbuild_id); char *build_id_cache__linkname(const char *sbuild_id, char *bf, size_t size); char *build_id_cache__cachedir(const char *sbuild_id, const char *name, bool is_kallsyms, bool is_vdso); + +struct strlist; + struct strlist *build_id_cache__list_all(bool validonly); char *build_id_cache__complement(const char *incomplete_sbuild_id); int build_id_cache__list_build_ids(const char *pathname, diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index 8000f62d5d53..e415aee6a245 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -5,6 +5,7 @@ #include "evsel.h" #include "callchain.h" #include "map.h" +#include "strlist.h" #include "symbol.h" static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...) diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index 388078d84eed..9084930e1757 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -17,7 +17,6 @@ #include "debug.h" #include "evlist.h" #include "symbol.h" -#include "strlist.h" #include #include "tsc.h" diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 9059d20c3b8a..ebfa5d92358a 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -9,7 +9,6 @@ #include /* To get things like MAP_HUGETLB even on older libc headers */ #include "map.h" #include "thread.h" -#include "strlist.h" #include "vdso.h" #include "build-id.h" #include "util.h" diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index d4877c8438e5..580f0e17ad38 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -9,6 +9,7 @@ #include "parse-events.h" #include #include "string2.h" +#include "strlist.h" #include "symbol.h" #include "cache.h" #include "header.h" diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 8f1374071cfc..84e7e698411e 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -36,6 +36,7 @@ #include "util.h" #include "event.h" #include "strlist.h" +#include "strfilter.h" #include "debug.h" #include "cache.h" #include "color.h" diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 5d4e94061402..373842656fb6 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -3,8 +3,6 @@ #include #include "intlist.h" -#include "strlist.h" -#include "strfilter.h" /* Probe related configurations */ struct probe_conf { @@ -107,6 +105,8 @@ struct line_range { struct intlist *line_list; /* Visible lines */ }; +struct strlist; + /* List of variables */ struct variable_list { struct probe_trace_point point; /* Actual probepoint */ @@ -153,6 +153,9 @@ int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs); int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs); int show_probe_trace_events(struct perf_probe_event *pevs, int npevs); void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs); + +struct strfilter; + int del_perf_probe_events(struct strfilter *filter); int show_perf_probe_event(const char *group, const char *event, diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 1ccaefdc05c4..685653f2bc32 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -19,6 +19,7 @@ #include "util.h" #include "event.h" #include "strlist.h" +#include "strfilter.h" #include "debug.h" #include "cache.h" #include "color.h" diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h index dbf95a00864a..5ecc9d3925db 100644 --- a/tools/perf/util/probe-file.h +++ b/tools/perf/util/probe-file.h @@ -1,10 +1,11 @@ #ifndef __PROBE_FILE_H #define __PROBE_FILE_H -#include "strlist.h" -#include "strfilter.h" #include "probe-event.h" +struct strlist; +struct strfilter; + /* Cache of probe definitions */ struct probe_cache_entry { struct list_head node; @@ -41,6 +42,7 @@ int probe_file__open_both(int *kfd, int *ufd, int flag); struct strlist *probe_file__get_namelist(int fd); struct strlist *probe_file__get_rawlist(int fd); int probe_file__add_event(int fd, struct probe_trace_event *tev); + int probe_file__del_events(int fd, struct strfilter *filter); int probe_file__get_events(int fd, struct strfilter *filter, struct strlist *plist); diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 3f3cd11d3b7c..a5731de0e5eb 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -38,6 +38,7 @@ #include "debug.h" #include "intlist.h" #include "util.h" +#include "strlist.h" #include "symbol.h" #include "probe-finder.h" #include "probe-file.h" diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 9aa058e167e8..4df228ab4db1 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -7,6 +7,7 @@ #include "symbol.h" #include "evsel.h" #include "evlist.h" +#include "strlist.h" #include #include "mem-events.h" #include diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 535903297cee..421232a27e7b 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -11,7 +11,6 @@ #include "symbol.h" #include "string.h" #include "callchain.h" -#include "strlist.h" #include "values.h" #include "../perf.h" diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 6fd9963be1c8..397676c25232 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -40,7 +40,6 @@ #include #include #include -#include "strlist.h" extern char buildid_dir[]; @@ -89,6 +88,8 @@ static inline void *zalloc(size_t size) #define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) +struct strlist; + int mkdir_p(char *path, mode_t mode); int rm_rf(const char *path); struct strlist *lsdir(const char *name, bool (*filter)(const char *, struct dirent *)); -- cgit v1.2.3 From b0742e90f5ab904aa835350c28bcec48e9109379 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 18 Apr 2017 11:08:10 -0300 Subject: perf tools: Don't include terminal handling headers in util.h Continuing the disentanglement, mostly the TUI needs CTRL(c), that is in sys/ttydefaults.h and term.c needs the termios headers. And term.h needs to be added to a few places too. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-il19zna7qj9ytavdbwlipc7t@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kvm.c | 1 + tools/perf/builtin-top.c | 1 + tools/perf/ui/browsers/annotate.c | 1 + tools/perf/ui/browsers/header.c | 2 ++ tools/perf/ui/browsers/hists.c | 1 + tools/perf/util/parse-events.c | 2 +- tools/perf/util/term.c | 6 +++++- tools/perf/util/util.h | 4 ---- 8 files changed, 12 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 40660428fa72..4002277475cf 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -3,6 +3,7 @@ #include "util/evsel.h" #include "util/evlist.h" +#include "util/term.h" #include "util/util.h" #include "util/cache.h" #include "util/symbol.h" diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index ec3247db4826..47984a838b73 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -40,6 +40,7 @@ #include "util/cpumap.h" #include "util/xyarray.h" #include "util/sort.h" +#include "util/term.h" #include "util/intlist.h" #include "util/parse-branch-options.h" #include "arch/common.h" diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 2ed64124276f..d990ad08a3c6 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -12,6 +12,7 @@ #include #include #include +#include struct disasm_line_samples { double percent; diff --git a/tools/perf/ui/browsers/header.c b/tools/perf/ui/browsers/header.c index edbeaaf31ace..e2c9390ff4c5 100644 --- a/tools/perf/ui/browsers/header.c +++ b/tools/perf/ui/browsers/header.c @@ -8,6 +8,8 @@ #include "util/header.h" #include "util/session.h" +#include + static void ui_browser__argv_write(struct ui_browser *browser, void *entry, int row) { diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 0916575c6694..a271b48ad0d3 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "../../util/evsel.h" #include "../../util/evlist.h" diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 580f0e17ad38..f3dd1aa59a25 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1,7 +1,7 @@ #include #include #include -#include "util.h" +#include "term.h" #include "../perf.h" #include "evlist.h" #include "evsel.h" diff --git a/tools/perf/util/term.c b/tools/perf/util/term.c index 90b47d8aa19c..8f254a74d97d 100644 --- a/tools/perf/util/term.c +++ b/tools/perf/util/term.c @@ -1,4 +1,8 @@ -#include "util.h" +#include "term.h" +#include +#include +#include +#include void get_term_dimensions(struct winsize *ws) { diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 397676c25232..fa8c2e59a0bc 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -35,11 +34,8 @@ #include #include #include -#include #include -#include #include -#include extern char buildid_dir[]; -- cgit v1.2.3 From 9a3993d408bc61b839de1a2c6c783477a04860bb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 18 Apr 2017 11:33:48 -0300 Subject: perf tools: Move path related functions to util/path.h Disentangling util.h header mess a bit more. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-aj6je8ly377i4upedmjzdsq6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 1 + tools/perf/tests/mmap-thread-lookup.c | 1 + tools/perf/tests/openat-syscall-all-cpus.c | 4 ++++ tools/perf/tests/openat-syscall.c | 3 +++ tools/perf/ui/setup.c | 1 + tools/perf/util/build-id.c | 1 + tools/perf/util/counts.c | 1 + tools/perf/util/dso.c | 1 + tools/perf/util/path.c | 28 +++++++++++++++++++++++++++- tools/perf/util/path.h | 9 +++++++++ tools/perf/util/symbol.c | 1 + tools/perf/util/symbol.h | 2 +- tools/perf/util/unwind-libdw.c | 1 + tools/perf/util/util.c | 10 ---------- tools/perf/util/util.h | 16 ---------------- 15 files changed, 52 insertions(+), 28 deletions(-) create mode 100644 tools/perf/util/path.h diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index e065c4a12f58..d1c8cdc6788b 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -24,6 +24,7 @@ #include "util/evlist.h" #include #include "util/machine.h" +#include "util/path.h" #include "util/session.h" #include "util/thread.h" #include diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c index a5ffb754f8c6..6ea4d8a5d26b 100644 --- a/tools/perf/tests/mmap-thread-lookup.c +++ b/tools/perf/tests/mmap-thread-lookup.c @@ -12,6 +12,7 @@ #include "thread_map.h" #include "symbol.h" #include "thread.h" +#include "util.h" #define THREADS 4 diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index b7dece0de42c..1a74dd9fd067 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -3,8 +3,12 @@ /* For the CPU_* macros */ #include +#include +#include +#include #include #include +#include #include "evsel.h" #include "tests.h" #include "thread_map.h" diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c index 5f0c700b4693..e44506e21ee7 100644 --- a/tools/perf/tests/openat-syscall.c +++ b/tools/perf/tests/openat-syscall.c @@ -2,6 +2,9 @@ #include #include #include +#include +#include +#include #include "thread_map.h" #include "evsel.h" #include "debug.h" diff --git a/tools/perf/ui/setup.c b/tools/perf/ui/setup.c index 50d13e58210f..5ea0b40c4fc2 100644 --- a/tools/perf/ui/setup.c +++ b/tools/perf/ui/setup.c @@ -4,6 +4,7 @@ #include "../util/cache.h" #include "../util/debug.h" #include "../util/hist.h" +#include "../util/util.h" pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER; void *perf_gtk_handle; diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index f9ccd053cdf6..b5c4892c2e18 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -18,6 +18,7 @@ #include "tool.h" #include "header.h" #include "vdso.h" +#include "path.h" #include "probe-file.h" #include "strlist.h" diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c index 83fedd0d22a1..c4af82ab7808 100644 --- a/tools/perf/util/counts.c +++ b/tools/perf/util/counts.c @@ -2,6 +2,7 @@ #include #include "evsel.h" #include "counts.h" +#include "util.h" struct perf_counts *perf_counts__new(int ncpus, int nthreads) { diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index cd061dc1de70..cbfe17f5168a 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -3,6 +3,7 @@ #include #include #include +#include "path.h" #include "symbol.h" #include "dso.h" #include "machine.h" diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c index 7c7630be5a89..50ec3bc87a60 100644 --- a/tools/perf/util/path.c +++ b/tools/perf/util/path.c @@ -11,8 +11,13 @@ * which is what it's designed for. */ #include "cache.h" -#include "util.h" +#include "path.h" +#include #include +#include +#include +#include +#include static char bad_path[] = "/bad-path/"; /* @@ -50,3 +55,24 @@ char *mkpath(const char *fmt, ...) return bad_path; return cleanup_path(pathname); } + +int path__join(char *bf, size_t size, const char *path1, const char *path2) +{ + return scnprintf(bf, size, "%s%s%s", path1, path1[0] ? "/" : "", path2); +} + +int path__join3(char *bf, size_t size, const char *path1, const char *path2, const char *path3) +{ + return scnprintf(bf, size, "%s%s%s%s%s", path1, path1[0] ? "/" : "", + path2, path2[0] ? "/" : "", path3); +} + +bool is_regular_file(const char *file) +{ + struct stat st; + + if (stat(file, &st)) + return false; + + return S_ISREG(st.st_mode); +} diff --git a/tools/perf/util/path.h b/tools/perf/util/path.h new file mode 100644 index 000000000000..9a276a58e3c2 --- /dev/null +++ b/tools/perf/util/path.h @@ -0,0 +1,9 @@ +#ifndef _PERF_PATH_H +#define _PERF_PATH_H + +int path__join(char *bf, size_t size, const char *path1, const char *path2); +int path__join3(char *bf, size_t size, const char *path1, const char *path2, const char *path3); + +bool is_regular_file(const char *file); + +#endif /* _PERF_PATH_H */ diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 68f14d770083..2cb7665e9973 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -19,6 +19,7 @@ #include "strlist.h" #include "intlist.h" #include "header.h" +#include "path.h" #include "sane_ctype.h" #include diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 5245d2fb1a0a..7acd70fce68e 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -13,7 +13,7 @@ #include #include "build-id.h" #include "event.h" -#include "util.h" +#include "path.h" #ifdef HAVE_LIBELF_SUPPORT #include diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 783a53fb7a4e..f90e11a555b2 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -12,6 +12,7 @@ #include "event.h" #include "perf_regs.h" #include "callchain.h" +#include "util.h" static char *debuginfo_path; diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 4fb8ee552a31..64877c6d09b2 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -713,16 +713,6 @@ out: return tip; } -bool is_regular_file(const char *file) -{ - struct stat st; - - if (stat(file, &st)) - return false; - - return S_ISREG(st.st_mode); -} - int fetch_current_timestamp(char *buf, size_t sz) { struct timeval tv; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index fa8c2e59a0bc..617965644ff4 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -120,21 +120,6 @@ struct parse_tag { unsigned long parse_tag_value(const char *str, struct parse_tag *tags); -static inline int path__join(char *bf, size_t size, - const char *path1, const char *path2) -{ - return scnprintf(bf, size, "%s%s%s", path1, path1[0] ? "/" : "", path2); -} - -static inline int path__join3(char *bf, size_t size, - const char *path1, const char *path2, - const char *path3) -{ - return scnprintf(bf, size, "%s%s%s%s%s", - path1, path1[0] ? "/" : "", - path2, path2[0] ? "/" : "", path3); -} - int perf_event_paranoid(void); void mem_bswap_64(void *src, int byte_size); @@ -162,7 +147,6 @@ int fetch_kernel_version(unsigned int *puint, #define KVER_PARAM(x) KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x) const char *perf_tip(const char *dirpath); -bool is_regular_file(const char *file); int fetch_current_timestamp(char *buf, size_t sz); #ifndef HAVE_SCHED_GETCPU_SUPPORT -- cgit v1.2.3 From 6dcca6df4b73d409628c7b4464c63d4eb9d4d13a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 18 Apr 2017 11:42:23 -0300 Subject: perf tools: No need to include bitops.h in util.h When we switched to the kernel's roundup_pow_of_two we forgot to remove this include from util.h, do it now. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Fixes: 91529834d1de ("perf evlist: Use roundup_pow_of_two") Link: http://lkml.kernel.org/n/tip-kfye5rxivib6155cltx0bw4h@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/util.h | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 617965644ff4..423ed7f8bcc0 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -35,7 +35,6 @@ #include #include #include -#include extern char buildid_dir[]; -- cgit v1.2.3 From 20a9ed280dde6292c529dfdaaf9ce743ef71bb1f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 18 Apr 2017 11:44:58 -0300 Subject: perf tools: Use api/fs/tracing_path.h where needed Instead of getting it out of luck from util.h, where it isn't needed at all. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-0bqugg5lc5ksla1v4m0dnmc1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-ftrace.c | 1 + tools/perf/tests/parse-events.c | 1 + tools/perf/util/util.h | 1 - 3 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 67d14037c03e..0f34ab7a9ec1 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -16,6 +16,7 @@ #include "debug.h" #include +#include #include "evlist.h" #include "target.h" #include "cpumap.h" diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 66ecc3f5f618..f5b77f5c3192 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -9,6 +9,7 @@ #include #include #include +#include #define PERF_TP_SAMPLE_TYPE (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | \ PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD) diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 423ed7f8bcc0..d4910e490f20 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -34,7 +34,6 @@ #include #include #include -#include extern char buildid_dir[]; -- cgit v1.2.3 From 767fe71b2d576762c77873021abd95c0a18538e3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 18 Apr 2017 12:20:19 -0300 Subject: perf tools: Remove misplaced __maybe_unused in some functions Those args _are_ being used. Link: http://lkml.kernel.org/n/tip-yi9s00ki1i1tcc704v042957@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index b5c4892c2e18..9815a3b6667a 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -451,14 +451,14 @@ void disable_buildid_cache(void) } static bool lsdir_bid_head_filter(const char *name __maybe_unused, - struct dirent *d __maybe_unused) + struct dirent *d) { return (strlen(d->d_name) == 2) && isxdigit(d->d_name[0]) && isxdigit(d->d_name[1]); } static bool lsdir_bid_tail_filter(const char *name __maybe_unused, - struct dirent *d __maybe_unused) + struct dirent *d) { int i = 0; while (isxdigit(d->d_name[i]) && i < SBUILD_ID_SIZE - 3) -- cgit v1.2.3 From 76b31a29ddaf2fa5f0a70458c214bed02a4a70e9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 18 Apr 2017 12:26:44 -0300 Subject: perf tools: Remove include dirent.h from util.h The files using the dirent.h routines should instead include it, reducing the includes hell that lead to longer build times. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-42g2f4z6nfg7mdb2ae97n7tj@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 1 + tools/perf/tests/dso-data.c | 1 + tools/perf/tests/parse-events.c | 1 + tools/perf/ui/browsers/hists.c | 1 + tools/perf/util/build-id.c | 1 + tools/perf/util/cpumap.c | 1 + tools/perf/util/event.c | 1 + tools/perf/util/machine.c | 1 + tools/perf/util/parse-events.c | 1 + tools/perf/util/util.c | 1 + tools/perf/util/util.h | 2 +- 11 files changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 853651a0f720..fe1dcd4f2c6d 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -32,6 +32,7 @@ #include "asm/bug.h" #include "util/mem-events.h" #include "util/dump-insn.h" +#include #include #include diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c index 46ea2e061b86..8f08df5861cb 100644 --- a/tools/perf/tests/dso-data.c +++ b/tools/perf/tests/dso-data.c @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index f5b77f5c3192..981d2bf9914f 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -5,6 +5,7 @@ #include "tests.h" #include "debug.h" #include "util.h" +#include #include #include #include diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index a271b48ad0d3..f0b5b2b0e521 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 9815a3b6667a..923ea290bb6e 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -7,6 +7,7 @@ * Copyright (C) 2009, 2010 Arnaldo Carvalho de Melo */ #include "util.h" +#include #include #include #include "build-id.h" diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 9d9ecb2430cc..37b3bb79ee08 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -3,6 +3,7 @@ #include "../perf.h" #include "cpumap.h" #include +#include #include #include #include diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 1fc1217a0c2c..cf457ef534da 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index cdbfe3e32e5a..46411742d03c 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1,3 +1,4 @@ +#include #include #include #include "callchain.h" diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index f3dd1aa59a25..7d84338b19ee 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1,5 +1,6 @@ #include #include +#include #include #include "term.h" #include "../perf.h" diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 64877c6d09b2..b9716bc6e8fd 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -7,6 +7,7 @@ #ifdef HAVE_BACKTRACE_SUPPORT #include #endif +#include #include #include #include diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index d4910e490f20..c0574e2763b7 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -82,6 +81,7 @@ static inline void *zalloc(size_t size) #define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) +struct dirent; struct strlist; int mkdir_p(char *path, mode_t mode); -- cgit v1.2.3 From 1eae20c1d40acf7676aa799b48f747d9b28bf352 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 18 Apr 2017 12:33:30 -0300 Subject: perf tools: Remove regex.h and fnmatch.h from util.h The users of regex and fnmatch functions should include those headers instead. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-ixzm5kuamsq1ixbkuv6kmwzj@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 1 + tools/perf/util/machine.c | 1 + tools/perf/util/sort.c | 1 + tools/perf/util/sort.h | 2 +- tools/perf/util/util.h | 2 -- 5 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index f50738e0006e..5bbd4b2ef6d2 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 46411742d03c..988e84ce6f88 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1,6 +1,7 @@ #include #include #include +#include #include "callchain.h" #include "debug.h" #include "event.h" diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 4df228ab4db1..fe4fd7b5f8e0 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1,5 +1,6 @@ #include #include +#include #include #include "sort.h" #include "hist.h" diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 421232a27e7b..8bcec05ee578 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -2,7 +2,7 @@ #define __PERF_SORT_H #include "../builtin.h" -#include "util.h" +#include #include "color.h" #include diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index c0574e2763b7..d79f3c23dd02 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -23,9 +23,7 @@ #include #include #include -#include #include -#include #include #include #include -- cgit v1.2.3 From 1b5ad16c7aa7177512ce141e345ff36b9f1a6136 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 18 Apr 2017 15:39:45 -0300 Subject: perf tools: Ditch unused strchrnul() reimplementation Remnants from the git codebase. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-kwaez3uxo1w9f8v5r7etl0w6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/util.h | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index d79f3c23dd02..2a1166f8bb37 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -56,22 +56,6 @@ void set_warning_routine(void (*routine)(const char *err, va_list params)); int prefixcmp(const char *str, const char *prefix); void set_buildid_dir(const char *dir); -#ifdef __GLIBC_PREREQ -#if __GLIBC_PREREQ(2, 1) -#define HAVE_STRCHRNUL -#endif -#endif - -#ifndef HAVE_STRCHRNUL -#define strchrnul gitstrchrnul -static inline char *gitstrchrnul(const char *s, int c) -{ - while (*s && *s != c) - s++; - return (char *)s; -} -#endif - static inline void *zalloc(size_t size) { return calloc(1, size); -- cgit v1.2.3 From 3dfed9102694227aa763b9984c50d72de796d39b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Apr 2017 13:28:30 -0300 Subject: perf unwind: Provide only forward declarations for pointer types No need to drag the headers, helps in untangling them and reducing build time. Link: http://lkml.kernel.org/n/tip-l8soqph92duyw5jdha0fij8b@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/unwind.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h index 61fb1e90ff51..bfbdcc6198c9 100644 --- a/tools/perf/util/unwind.h +++ b/tools/perf/util/unwind.h @@ -1,10 +1,13 @@ #ifndef __UNWIND_H #define __UNWIND_H +#include #include -#include "event.h" -#include "symbol.h" -#include "thread.h" + +struct map; +struct perf_sample; +struct symbol; +struct thread; struct unwind_entry { struct map *map; -- cgit v1.2.3 From 9607ad3a63871b074a57ce1facd04a230c38725c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Apr 2017 15:49:18 -0300 Subject: perf tools: Add signal.h to places using its definitions And remove it from util.h, disentangling it a bit more. Link: http://lkml.kernel.org/n/tip-2zg9s5nx90yde64j3g4z2uhk@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/intel-cqm.c | 1 + tools/perf/builtin-inject.c | 1 + tools/perf/builtin-kvm.c | 1 + tools/perf/builtin-record.c | 1 + tools/perf/builtin-report.c | 1 + tools/perf/builtin-script.c | 1 + tools/perf/builtin-stat.c | 1 + tools/perf/builtin-top.c | 1 + tools/perf/builtin-trace.c | 1 + tools/perf/perf.c | 1 + tools/perf/trace/beauty/signum.c | 1 + tools/perf/ui/gtk/annotate.c | 1 + tools/perf/ui/gtk/hists.c | 1 + tools/perf/util/evlist.c | 1 + tools/perf/util/evlist.h | 1 + tools/perf/util/util.c | 1 + tools/perf/util/util.h | 1 - 17 files changed, 16 insertions(+), 1 deletion(-) diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c index 03c62eb0106b..befde6708c33 100644 --- a/tools/perf/arch/x86/tests/intel-cqm.c +++ b/tools/perf/arch/x86/tests/intel-cqm.c @@ -6,6 +6,7 @@ #include "evsel.h" #include "arch-tests.h" +#include #include #include #include diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index b102ee702aa1..8bd791cca008 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -23,6 +23,7 @@ #include #include +#include struct perf_inject { struct perf_tool tool; diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 4002277475cf..2b1732cfc0be 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 70340ff2008d..e1b937f23894 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 5bbd4b2ef6d2..b8f2dd322496 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index fe1dcd4f2c6d..76a88bdeebe4 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "sane_ctype.h" diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index be2cd537c537..e3837febb4ff 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include #include diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 47984a838b73..7ab42b8311a1 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -59,6 +59,7 @@ #include #include #include +#include #include #include diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index d1c8cdc6788b..ef3613f2fe6a 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -45,6 +45,7 @@ #include #include #include /* FIXME: Still needed for audit_errno_to_name */ +#include #include #include #include diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 9ccccb0fbd8f..356588982d08 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/tools/perf/trace/beauty/signum.c b/tools/perf/trace/beauty/signum.c index d3b0b1fab077..fde8f2fc6558 100644 --- a/tools/perf/trace/beauty/signum.c +++ b/tools/perf/trace/beauty/signum.c @@ -1,3 +1,4 @@ +#include static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg) { diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index 71359b898b67..e99ba86158d2 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -4,6 +4,7 @@ #include "util/evsel.h" #include "ui/helpline.h" #include +#include enum { ANN_COL__PERCENT, diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index c42de4dcc055..e24f83957705 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -6,6 +6,7 @@ #include "../helpline.h" #include "../string2.h" #include "gtk.h" +#include #define MAX_COLUMNS 32 diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index f74ea2e55fde..8d36cf345375 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -18,6 +18,7 @@ #include "evsel.h" #include "debug.h" #include "asm/bug.h" +#include #include #include "parse-events.h" diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 3fed4fb2e866..94cea4398a13 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -11,6 +11,7 @@ #include "evsel.h" #include "util.h" #include "auxtrace.h" +#include #include struct pollfd; diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index b9716bc6e8fd..bc42c459f586 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -9,6 +9,7 @@ #endif #include #include +#include #include #include #include diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 2a1166f8bb37..6bf141647403 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 58db1d6e7d5d24afa2d32e916fd6f6b6d240ba93 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Apr 2017 16:05:56 -0300 Subject: perf tools: Move units conversion/formatting routines to separate object Out of util.h, to disentangle it a bit more. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-vpksyj3w5fk9t8s6mxmkajyr@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 1 + tools/perf/builtin-report.c | 1 + tools/perf/tests/unit_number__scnprintf.c | 2 +- tools/perf/ui/browsers/hists.c | 1 + tools/perf/util/Build | 1 + tools/perf/util/evlist.c | 1 + tools/perf/util/python-ext-sources | 1 + tools/perf/util/units.c | 39 +++++++++++++++++++++++++++++++ tools/perf/util/units.h | 10 ++++++++ tools/perf/util/util.c | 35 --------------------------- tools/perf/util/util.h | 3 --- 11 files changed, 56 insertions(+), 39 deletions(-) create mode 100644 tools/perf/util/units.c create mode 100644 tools/perf/util/units.h diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index e1b937f23894..99156b4363a5 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -38,6 +38,7 @@ #include "util/bpf-loader.h" #include "util/trigger.h" #include "util/perf-hooks.h" +#include "util/units.h" #include "asm/bug.h" #include diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index b8f2dd322496..3f89e0eaf0d4 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -37,6 +37,7 @@ #include "arch/common.h" #include "util/time-utils.h" #include "util/auxtrace.h" +#include "util/units.h" #include #include diff --git a/tools/perf/tests/unit_number__scnprintf.c b/tools/perf/tests/unit_number__scnprintf.c index f84cb70ee5e5..44589de084b8 100644 --- a/tools/perf/tests/unit_number__scnprintf.c +++ b/tools/perf/tests/unit_number__scnprintf.c @@ -2,7 +2,7 @@ #include #include #include "tests.h" -#include "util.h" +#include "units.h" #include "debug.h" int test__unit_number__scnprint(int subtest __maybe_unused) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index f0b5b2b0e521..1b12a69740b3 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -24,6 +24,7 @@ #include "annotate.h" #include "srcline.h" #include "string2.h" +#include "units.h" #include "sane_ctype.h" diff --git a/tools/perf/util/Build b/tools/perf/util/Build index f0b9e5d0e2fc..069583bdc670 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -89,6 +89,7 @@ libperf-y += help-unknown-cmd.o libperf-y += mem-events.o libperf-y += vsprintf.o libperf-y += drv_configs.o +libperf-y += units.o libperf-y += time-utils.o libperf-y += expr-bison.o diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 8d36cf345375..5eb638fd003f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -17,6 +17,7 @@ #include "evlist.h" #include "evsel.h" #include "debug.h" +#include "units.h" #include "asm/bug.h" #include #include diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 7d3927447fba..9f3b0d9754a8 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -27,3 +27,4 @@ util/trace-event.c ../lib/rbtree.c util/string.c util/symbol_fprintf.c +util/units.c diff --git a/tools/perf/util/units.c b/tools/perf/util/units.c new file mode 100644 index 000000000000..f6a2a3d117d5 --- /dev/null +++ b/tools/perf/util/units.c @@ -0,0 +1,39 @@ +#include "units.h" +#include +#include +#include + +unsigned long convert_unit(unsigned long value, char *unit) +{ + *unit = ' '; + + if (value > 1000) { + value /= 1000; + *unit = 'K'; + } + + if (value > 1000) { + value /= 1000; + *unit = 'M'; + } + + if (value > 1000) { + value /= 1000; + *unit = 'G'; + } + + return value; +} + +int unit_number__scnprintf(char *buf, size_t size, u64 n) +{ + char unit[4] = "BKMG"; + int i = 0; + + while (((n / 1024) > 1) && (i < 3)) { + n /= 1024; + i++; + } + + return scnprintf(buf, size, "%" PRIu64 "%c", n, unit[i]); +} diff --git a/tools/perf/util/units.h b/tools/perf/util/units.h new file mode 100644 index 000000000000..3ed7774afaa9 --- /dev/null +++ b/tools/perf/util/units.h @@ -0,0 +1,10 @@ +#ifndef PERF_UNIT_H +#define PERF_UNIT_H + +#include +#include + +unsigned long convert_unit(unsigned long value, char *unit); +int unit_number__scnprintf(char *buf, size_t size, u64 n); + +#endif /* PERF_UNIT_H */ diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index bc42c459f586..7741d5f6022b 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -272,28 +272,6 @@ int copyfile(const char *from, const char *to) return copyfile_mode(from, to, 0755); } -unsigned long convert_unit(unsigned long value, char *unit) -{ - *unit = ' '; - - if (value > 1000) { - value /= 1000; - *unit = 'K'; - } - - if (value > 1000) { - value /= 1000; - *unit = 'M'; - } - - if (value > 1000) { - value /= 1000; - *unit = 'G'; - } - - return value; -} - static ssize_t ion(bool is_read, int fd, void *buf, size_t n) { void *buf_start = buf; @@ -731,16 +709,3 @@ int fetch_current_timestamp(char *buf, size_t sz) return 0; } - -int unit_number__scnprintf(char *buf, size_t size, u64 n) -{ - char unit[4] = "BKMG"; - int i = 0; - - while (((n / 1024) > 1) && (i < 3)) { - n /= 1024; - i++; - } - - return scnprintf(buf, size, "%" PRIu64 "%c", n, unit[i]); -} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 6bf141647403..add9e77369a2 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -73,7 +73,6 @@ int copyfile(const char *from, const char *to); int copyfile_mode(const char *from, const char *to, mode_t mode); int copyfile_offset(int fromfd, loff_t from_ofs, int tofd, loff_t to_ofs, u64 size); -unsigned long convert_unit(unsigned long value, char *unit); ssize_t readn(int fd, void *buf, size_t n); ssize_t writen(int fd, void *buf, size_t n); @@ -134,6 +133,4 @@ int sched_getcpu(void); int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz); -int unit_number__scnprintf(char *buf, size_t size, u64 n); - #endif /* GIT_COMPAT_UTIL_H */ -- cgit v1.2.3 From c5e4027e056c3027f682f0d69fe9fd75083b65f8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Apr 2017 16:12:39 -0300 Subject: perf tools: Move timestamp routines from util.h to time-utils.h We already have a header for time utilities, so use it. Link: http://lkml.kernel.org/n/tip-sijzpbvutlg0c3oxn49hy9ca@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-buildid-cache.c | 1 + tools/perf/builtin-kvm.c | 1 + tools/perf/builtin-record.c | 1 + tools/perf/util/time-utils.c | 25 +++++++++++++++++++++++++ tools/perf/util/time-utils.h | 7 +++++++ tools/perf/util/util.c | 25 ------------------------- tools/perf/util/util.h | 6 ------ 7 files changed, 35 insertions(+), 31 deletions(-) diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 034c3d4a7b27..64b44e81c771 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -22,6 +22,7 @@ #include "util/build-id.h" #include "util/session.h" #include "util/symbol.h" +#include "util/time-utils.h" static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid) { diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 2b1732cfc0be..d86ac0ac2c99 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -24,6 +24,7 @@ #ifdef HAVE_TIMERFD_SUPPORT #include #endif +#include #include #include diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 99156b4363a5..32a9a68d38a2 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -38,6 +38,7 @@ #include "util/bpf-loader.h" #include "util/trigger.h" #include "util/perf-hooks.h" +#include "util/time-utils.h" #include "util/units.h" #include "asm/bug.h" diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index d1b21c72206d..5b5d0214debd 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -117,3 +117,28 @@ bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp) return false; } + +int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz) +{ + u64 sec = timestamp / NSEC_PER_SEC; + u64 usec = (timestamp % NSEC_PER_SEC) / NSEC_PER_USEC; + + return scnprintf(buf, sz, "%"PRIu64".%06"PRIu64, sec, usec); +} + +int fetch_current_timestamp(char *buf, size_t sz) +{ + struct timeval tv; + struct tm tm; + char dt[32]; + + if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm)) + return -1; + + if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm)) + return -1; + + scnprintf(buf, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 10000); + + return 0; +} diff --git a/tools/perf/util/time-utils.h b/tools/perf/util/time-utils.h index c1f197c4af6c..8656be08513b 100644 --- a/tools/perf/util/time-utils.h +++ b/tools/perf/util/time-utils.h @@ -1,6 +1,9 @@ #ifndef _TIME_UTILS_H_ #define _TIME_UTILS_H_ +#include +#include + struct perf_time_interval { u64 start, end; }; @@ -11,4 +14,8 @@ int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr); bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp); +int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz); + +int fetch_current_timestamp(char *buf, size_t sz); + #endif diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 7741d5f6022b..e86dba2f791a 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -381,14 +381,6 @@ void sighandler_dump_stack(int sig) raise(sig); } -int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz) -{ - u64 sec = timestamp / NSEC_PER_SEC; - u64 usec = (timestamp % NSEC_PER_SEC) / NSEC_PER_USEC; - - return scnprintf(buf, sz, "%"PRIu64".%06"PRIu64, sec, usec); -} - unsigned long parse_tag_value(const char *str, struct parse_tag *tags) { struct parse_tag *i = tags; @@ -692,20 +684,3 @@ out: return tip; } - -int fetch_current_timestamp(char *buf, size_t sz) -{ - struct timeval tv; - struct tm tm; - char dt[32]; - - if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm)) - return -1; - - if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm)) - return -1; - - scnprintf(buf, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 10000); - - return 0; -} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index add9e77369a2..dc8eb942f92b 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -20,10 +20,7 @@ #include #include #include -#include -#include #include -#include #include #include #include @@ -125,12 +122,9 @@ int fetch_kernel_version(unsigned int *puint, #define KVER_PARAM(x) KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x) const char *perf_tip(const char *dirpath); -int fetch_current_timestamp(char *buf, size_t sz); #ifndef HAVE_SCHED_GETCPU_SUPPORT int sched_getcpu(void); #endif -int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz); - #endif /* GIT_COMPAT_UTIL_H */ -- cgit v1.2.3 From bb8c16db43e48f2012c3ae8c7d682f834c5986d9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Apr 2017 16:15:13 -0300 Subject: perf kvm: Make function only used by 'perf kvm' static No need to have this polluting util.h, it was polluted enough already. Link: http://lkml.kernel.org/n/tip-wfdidqlwbvi5y0s61kv6z2gn@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kvm.c | 14 ++++++++++++++ tools/perf/util/util.c | 14 -------------- tools/perf/util/util.h | 1 - 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index d86ac0ac2c99..129af3e9c728 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -36,6 +36,20 @@ #include #include +static const char *get_filename_for_perf_kvm(void) +{ + const char *filename; + + if (perf_host && !perf_guest) + filename = strdup("perf.data.host"); + else if (!perf_host && perf_guest) + filename = strdup("perf.data.guest"); + else + filename = strdup("perf.data.kvm"); + + return filename; +} + #ifdef HAVE_KVM_STAT_SUPPORT #include "util/kvm-stat.h" diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index e86dba2f791a..eb49330c77d4 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -496,20 +496,6 @@ int parse_callchain_record(const char *arg, struct callchain_param *param) return ret; } -const char *get_filename_for_perf_kvm(void) -{ - const char *filename; - - if (perf_host && !perf_guest) - filename = strdup("perf.data.host"); - else if (!perf_host && perf_guest) - filename = strdup("perf.data.guest"); - else - filename = strdup("perf.data.kvm"); - - return filename; -} - int perf_event_paranoid(void) { int value; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index dc8eb942f92b..c3f6d0de69c5 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -100,7 +100,6 @@ int perf_event_paranoid(void); void mem_bswap_64(void *src, int byte_size); void mem_bswap_32(void *src, int byte_size); -const char *get_filename_for_perf_kvm(void); bool find_process(const char *name); #ifdef HAVE_ZLIB_SUPPORT -- cgit v1.2.3 From 8c2b7cac78e17886e8089389a570a290c9b5ca67 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Apr 2017 16:21:59 -0300 Subject: perf debug: Move dump_stack() and sighandler_dump_stack() to debug.h Two more out of util.h. Link: http://lkml.kernel.org/n/tip-polkuxm1cpr06lbgue5pyqum@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/debug.c | 32 +++++++++++++++++++++++++++++++- tools/perf/util/debug.h | 3 +++ tools/perf/util/util.c | 31 ------------------------------- tools/perf/util/util.h | 3 --- 4 files changed, 34 insertions(+), 35 deletions(-) diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 6e1d7e159649..9eaf86f4003b 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -8,7 +8,9 @@ #include #include #include - +#ifdef HAVE_BACKTRACE_SUPPORT +#include +#endif #include "cache.h" #include "color.h" #include "event.h" @@ -248,3 +250,31 @@ void perf_debug_setup(void) { libapi_set_print(pr_warning_wrapper, pr_warning_wrapper, pr_debug_wrapper); } + +/* Obtain a backtrace and print it to stdout. */ +#ifdef HAVE_BACKTRACE_SUPPORT +void dump_stack(void) +{ + void *array[16]; + size_t size = backtrace(array, ARRAY_SIZE(array)); + char **strings = backtrace_symbols(array, size); + size_t i; + + printf("Obtained %zd stack frames.\n", size); + + for (i = 0; i < size; i++) + printf("%s\n", strings[i]); + + free(strings); +} +#else +void dump_stack(void) {} +#endif + +void sighandler_dump_stack(int sig) +{ + psignal(sig, "perf"); + dump_stack(); + signal(sig, SIG_DFL); + raise(sig); +} diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index 98832f5531d3..8a23ea1a71c7 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -56,4 +56,7 @@ int perf_debug_option(const char *str); void perf_debug_setup(void); int perf_quiet_option(void); +void dump_stack(void); +void sighandler_dump_stack(int sig); + #endif /* __PERF_DEBUG_H */ diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index eb49330c77d4..ae8036f06329 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -4,9 +4,6 @@ #include #include #include -#ifdef HAVE_BACKTRACE_SUPPORT -#include -#endif #include #include #include @@ -353,34 +350,6 @@ int hex2u64(const char *ptr, u64 *long_val) return p - ptr; } -/* Obtain a backtrace and print it to stdout. */ -#ifdef HAVE_BACKTRACE_SUPPORT -void dump_stack(void) -{ - void *array[16]; - size_t size = backtrace(array, ARRAY_SIZE(array)); - char **strings = backtrace_symbols(array, size); - size_t i; - - printf("Obtained %zd stack frames.\n", size); - - for (i = 0; i < size; i++) - printf("%s\n", strings[i]); - - free(strings); -} -#else -void dump_stack(void) {} -#endif - -void sighandler_dump_stack(int sig) -{ - psignal(sig, "perf"); - dump_stack(); - signal(sig, SIG_DFL); - raise(sig); -} - unsigned long parse_tag_value(const char *str, struct parse_tag *tags) { struct parse_tag *i = tags; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index c3f6d0de69c5..07c4293742e7 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -80,9 +80,6 @@ void event_attr_init(struct perf_event_attr *attr); size_t hex_width(u64 v); int hex2u64(const char *ptr, u64 *val); -void dump_stack(void); -void sighandler_dump_stack(int sig); - extern unsigned int page_size; extern int cacheline_size; extern int sysctl_perf_event_max_stack; -- cgit v1.2.3 From 166ebdd2442660e7f942d657fc5e629000e58ec3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 19 Apr 2017 10:49:40 -0700 Subject: perf mem: Fix display of data source snoop indication 'perf mem report' doesn't display the data source snoop indication correctly. In the kernel API the definition is: #define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */ #define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */ #define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */ but the table used by the perf tools exchanged "Hit" and "Miss": "None", "Miss", "Hit", Fix the table in perf. Signed-off-by: Andi Kleen Cc: Jiri Olsa Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20170419174940.13641-1-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mem-events.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index c56d52f90b54..06f5a3a4295c 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -206,8 +206,8 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) static const char * const snoop_access[] = { "N/A", "None", - "Miss", "Hit", + "Miss", "HitM", }; -- cgit v1.2.3 From 611f0afee0e87eb6d184e7f58aa20d18d291d169 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Apr 2017 16:29:38 -0300 Subject: perf tools: Add compress.h for the *_decompress_to_file() headers Out of util.h, the implementations were already in separate files, that are built conditionally. Link: http://lkml.kernel.org/n/tip-0ur7szxsb59f8758kfe63prb@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/compress.h | 12 ++++++++++++ tools/perf/util/dso.c | 1 + tools/perf/util/lzma.c | 1 + tools/perf/util/util.h | 8 -------- tools/perf/util/zlib.c | 1 + 5 files changed, 15 insertions(+), 8 deletions(-) create mode 100644 tools/perf/util/compress.h diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h new file mode 100644 index 000000000000..67fd1bb7c2b7 --- /dev/null +++ b/tools/perf/util/compress.h @@ -0,0 +1,12 @@ +#ifndef PERF_COMPRESS_H +#define PERF_COMPRESS_H + +#ifdef HAVE_ZLIB_SUPPORT +int gzip_decompress_to_file(const char *input, int output_fd); +#endif + +#ifdef HAVE_LZMA_SUPPORT +int lzma_decompress_to_file(const char *input, int output_fd); +#endif + +#endif /* PERF_COMPRESS_H */ diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index cbfe17f5168a..3339ab7cabc5 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -3,6 +3,7 @@ #include #include #include +#include "compress.h" #include "path.h" #include "symbol.h" #include "dso.h" diff --git a/tools/perf/util/lzma.c b/tools/perf/util/lzma.c index 5b73b268c169..4ca7c5c6cdcd 100644 --- a/tools/perf/util/lzma.c +++ b/tools/perf/util/lzma.c @@ -2,6 +2,7 @@ #include #include #include +#include "compress.h" #include "util.h" #include "debug.h" diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 07c4293742e7..5dea8a96cf84 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -99,14 +99,6 @@ void mem_bswap_32(void *src, int byte_size); bool find_process(const char *name); -#ifdef HAVE_ZLIB_SUPPORT -int gzip_decompress_to_file(const char *input, int output_fd); -#endif - -#ifdef HAVE_LZMA_SUPPORT -int lzma_decompress_to_file(const char *input, int output_fd); -#endif - int get_stack_size(const char *str, unsigned long *_size); int fetch_kernel_version(unsigned int *puint, diff --git a/tools/perf/util/zlib.c b/tools/perf/util/zlib.c index 495a449fc25c..1329d843eb7b 100644 --- a/tools/perf/util/zlib.c +++ b/tools/perf/util/zlib.c @@ -4,6 +4,7 @@ #include #include +#include "util/compress.h" #include "util/util.h" #include "util/debug.h" -- cgit v1.2.3 From 56e2e05644a9494e8ba3165182dcdf43d40cc6a2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Apr 2017 18:38:33 -0300 Subject: perf callchain: Move callchain specific routines from util.[ch] Where they belong, no point in leaving those in the generic "util" files. Link: http://lkml.kernel.org/n/tip-ljx3iiip1hlfa7a7apjem7ph@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 103 +++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/python.c | 13 ++++++ tools/perf/util/util.c | 104 -------------------------------------------- tools/perf/util/util.h | 4 -- 4 files changed, 116 insertions(+), 108 deletions(-) diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 0096d45a06b3..81fc29ac798f 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -24,6 +24,21 @@ #include "machine.h" #include "callchain.h" +#define CALLCHAIN_PARAM_DEFAULT \ + .mode = CHAIN_GRAPH_ABS, \ + .min_percent = 0.5, \ + .order = ORDER_CALLEE, \ + .key = CCKEY_FUNCTION, \ + .value = CCVAL_PERCENT, \ + +struct callchain_param callchain_param = { + CALLCHAIN_PARAM_DEFAULT +}; + +struct callchain_param callchain_param_default = { + CALLCHAIN_PARAM_DEFAULT +}; + __thread struct callchain_cursor callchain_cursor; int parse_callchain_record_opt(const char *arg, struct callchain_param *param) @@ -113,6 +128,32 @@ static int parse_callchain_value(const char *value) return -1; } +static int get_stack_size(const char *str, unsigned long *_size) +{ + char *endptr; + unsigned long size; + unsigned long max_size = round_down(USHRT_MAX, sizeof(u64)); + + size = strtoul(str, &endptr, 0); + + do { + if (*endptr) + break; + + size = round_up(size, sizeof(u64)); + if (!size || size > max_size) + break; + + *_size = size; + return 0; + + } while (0); + + pr_err("callchain: Incorrect stack dump size (max %ld): %s\n", + max_size, str); + return -1; +} + static int __parse_callchain_report_opt(const char *arg, bool allow_record_opt) { @@ -196,6 +237,68 @@ int parse_callchain_top_opt(const char *arg) return __parse_callchain_report_opt(arg, true); } +int parse_callchain_record(const char *arg, struct callchain_param *param) +{ + char *tok, *name, *saveptr = NULL; + char *buf; + int ret = -1; + + /* We need buffer that we know we can write to. */ + buf = malloc(strlen(arg) + 1); + if (!buf) + return -ENOMEM; + + strcpy(buf, arg); + + tok = strtok_r((char *)buf, ",", &saveptr); + name = tok ? : (char *)buf; + + do { + /* Framepointer style */ + if (!strncmp(name, "fp", sizeof("fp"))) { + if (!strtok_r(NULL, ",", &saveptr)) { + param->record_mode = CALLCHAIN_FP; + ret = 0; + } else + pr_err("callchain: No more arguments " + "needed for --call-graph fp\n"); + break; + + /* Dwarf style */ + } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) { + const unsigned long default_stack_dump_size = 8192; + + ret = 0; + param->record_mode = CALLCHAIN_DWARF; + param->dump_size = default_stack_dump_size; + + tok = strtok_r(NULL, ",", &saveptr); + if (tok) { + unsigned long size = 0; + + ret = get_stack_size(tok, &size); + param->dump_size = size; + } + } else if (!strncmp(name, "lbr", sizeof("lbr"))) { + if (!strtok_r(NULL, ",", &saveptr)) { + param->record_mode = CALLCHAIN_LBR; + ret = 0; + } else + pr_err("callchain: No more arguments " + "needed for --call-graph lbr\n"); + break; + } else { + pr_err("callchain: Unknown --call-graph option " + "value: %s\n", arg); + break; + } + + } while (0); + + free(buf); + return ret; +} + int perf_callchain_config(const char *var, const char *value) { char *endptr; diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 0533711af44d..c129e99114ae 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -4,12 +4,25 @@ #include #include #include "evlist.h" +#include "callchain.h" #include "evsel.h" #include "event.h" #include "cpumap.h" #include "print_binary.h" #include "thread_map.h" +/* + * Provide these two so that we don't have to link against callchain.c and + * start dragging hist.c, etc. + */ +struct callchain_param callchain_param; + +int parse_callchain_record(const char *arg __maybe_unused, + struct callchain_param *param __maybe_unused) +{ + return 0; +} + /* * Support debug printing even though util/debug.c is not linked. That means * implementing 'verbose' and 'eprintf'. diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index ae8036f06329..131d21a659fb 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -17,24 +17,8 @@ #include #include #include -#include "callchain.h" #include "strlist.h" -#define CALLCHAIN_PARAM_DEFAULT \ - .mode = CHAIN_GRAPH_ABS, \ - .min_percent = 0.5, \ - .order = ORDER_CALLEE, \ - .key = CCKEY_FUNCTION, \ - .value = CCVAL_PERCENT, \ - -struct callchain_param callchain_param = { - CALLCHAIN_PARAM_DEFAULT -}; - -struct callchain_param callchain_param_default = { - CALLCHAIN_PARAM_DEFAULT -}; - /* * XXX We need to find a better place for these things... */ @@ -377,94 +361,6 @@ unsigned long parse_tag_value(const char *str, struct parse_tag *tags) return (unsigned long) -1; } -int get_stack_size(const char *str, unsigned long *_size) -{ - char *endptr; - unsigned long size; - unsigned long max_size = round_down(USHRT_MAX, sizeof(u64)); - - size = strtoul(str, &endptr, 0); - - do { - if (*endptr) - break; - - size = round_up(size, sizeof(u64)); - if (!size || size > max_size) - break; - - *_size = size; - return 0; - - } while (0); - - pr_err("callchain: Incorrect stack dump size (max %ld): %s\n", - max_size, str); - return -1; -} - -int parse_callchain_record(const char *arg, struct callchain_param *param) -{ - char *tok, *name, *saveptr = NULL; - char *buf; - int ret = -1; - - /* We need buffer that we know we can write to. */ - buf = malloc(strlen(arg) + 1); - if (!buf) - return -ENOMEM; - - strcpy(buf, arg); - - tok = strtok_r((char *)buf, ",", &saveptr); - name = tok ? : (char *)buf; - - do { - /* Framepointer style */ - if (!strncmp(name, "fp", sizeof("fp"))) { - if (!strtok_r(NULL, ",", &saveptr)) { - param->record_mode = CALLCHAIN_FP; - ret = 0; - } else - pr_err("callchain: No more arguments " - "needed for --call-graph fp\n"); - break; - - /* Dwarf style */ - } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) { - const unsigned long default_stack_dump_size = 8192; - - ret = 0; - param->record_mode = CALLCHAIN_DWARF; - param->dump_size = default_stack_dump_size; - - tok = strtok_r(NULL, ",", &saveptr); - if (tok) { - unsigned long size = 0; - - ret = get_stack_size(tok, &size); - param->dump_size = size; - } - } else if (!strncmp(name, "lbr", sizeof("lbr"))) { - if (!strtok_r(NULL, ",", &saveptr)) { - param->record_mode = CALLCHAIN_LBR; - ret = 0; - } else - pr_err("callchain: No more arguments " - "needed for --call-graph lbr\n"); - break; - } else { - pr_err("callchain: Unknown --call-graph option " - "value: %s\n", arg); - break; - } - - } while (0); - - free(buf); - return ret; -} - int perf_event_paranoid(void) { int value; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 5dea8a96cf84..fcad17ce5c19 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -25,7 +24,6 @@ #include #include #include -#include #include extern char buildid_dir[]; @@ -99,8 +97,6 @@ void mem_bswap_32(void *src, int byte_size); bool find_process(const char *name); -int get_stack_size(const char *str, unsigned long *_size); - int fetch_kernel_version(unsigned int *puint, char *str, size_t str_sz); #define KVER_VERSION(x) (((x) >> 16) & 0xff) -- cgit v1.2.3 From 391e42060098a743a87380ed02bf4c8126742b04 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Apr 2017 18:51:14 -0300 Subject: perf tools: Include sys/param.h where needed As it is going away from util.h, where it is not needed. This is mostly for things like MAXPATHLEN, MAX() and MIN(), these later two probably should go away in favor of its kernel sources replacements. Link: http://lkml.kernel.org/n/tip-z1666f3fl3fqobxvjr5o2r39@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 1 + tools/perf/builtin-script.c | 1 + tools/perf/tests/attr.c | 1 + tools/perf/tests/code-reading.c | 1 + tools/perf/util/config.c | 1 + tools/perf/util/header.c | 1 + tools/perf/util/hist.c | 1 + tools/perf/util/parse-events.c | 1 + tools/perf/util/util.h | 1 - 9 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index a90c1260f49e..a14be1cd3d70 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "util.h" #include "debug.h" #include "builtin.h" diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 76a88bdeebe4..b093a3c21e40 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "sane_ctype.h" diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index ba87cd529bfc..c19e0da54337 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "../perf.h" #include "util.h" #include diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 3a8bf1565493..1f14e7612cbb 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "parse-events.h" #include "evlist.h" diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index f5604039cbe4..07d87d2dbee7 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -9,6 +9,7 @@ * */ #include +#include #include "util.h" #include "cache.h" #include diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 28a3acb7b313..915bc4f39482 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2,6 +2,7 @@ #include #include "util.h" #include "string2.h" +#include #include #include #include diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 65d42758aadd..2944458b9edf 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -12,6 +12,7 @@ #include "ui/progress.h" #include #include +#include static bool hists__filter_entry_by_dso(struct hists *hists, struct hist_entry *he); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 7d84338b19ee..4f7e42e18f8a 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2,6 +2,7 @@ #include #include #include +#include #include "term.h" #include "../perf.h" #include "evlist.h" diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index fcad17ce5c19..4e2afd6427cd 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From bf6733432dd8d92ffd687d6ce014a37923ba8105 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Apr 2017 18:58:26 -0300 Subject: perf tools: Remove a few more needless includes from util.h Link: http://lkml.kernel.org/n/tip-sb2zu21d6h42e5qnsrtl6wuu@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/util.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 4e2afd6427cd..0df20a3973da 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -10,18 +10,14 @@ #include #include #include -#include #include #include #include #include #include #include -#include -#include #include #include -#include #include #include -- cgit v1.2.3 From 86a5e0c2028899465a129d87f60fe4018c5ae839 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Apr 2017 19:03:14 -0300 Subject: perf tools: Remove sys/ioctl.h from util.h Not needed in this header, added to the places that need 'struct winsize' and the ioctl defines. Link: http://lkml.kernel.org/n/tip-2pznlli3146y4242otlcm70m@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 1 + tools/perf/util/evsel.c | 1 + tools/perf/util/parse-events.c | 1 + tools/perf/util/top.h | 2 +- tools/perf/util/util.h | 1 - 5 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 5eb638fd003f..46c0faf6c502 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -25,6 +25,7 @@ #include "parse-events.h" #include +#include #include #include diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 44a7aef3911b..0e879097adfb 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "asm/bug.h" #include "callchain.h" diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 4f7e42e18f8a..01e779b91c8e 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include "term.h" #include "../perf.h" diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index b2940c88734a..9bdfb78a9a35 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include struct perf_evlist; struct perf_evsel; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 0df20a3973da..c014b2fc22b3 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -18,7 +18,6 @@ #include #include #include -#include #include extern char buildid_dir[]; -- cgit v1.2.3 From 72f7c4d22ccf7c7e78aa80ea3285e77404b31913 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Apr 2017 19:06:30 -0300 Subject: perf tools: Remove string.h from util.h Not needed in this header, added to the places that need strdup, strcmp and a few other prototypes. Link: http://lkml.kernel.org/n/tip-t24yy85xnlv55kyosrum2ubs@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/comm.c | 1 + tools/perf/util/namespaces.c | 1 + tools/perf/util/util.h | 1 - tools/perf/util/xyarray.c | 2 ++ 4 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c index 530a62a7b51e..7bc981b6bf29 100644 --- a/tools/perf/util/comm.c +++ b/tools/perf/util/comm.c @@ -3,6 +3,7 @@ #include #include #include +#include #include struct comm_str { diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c index 2de8da64d90c..67dcbcc73c7d 100644 --- a/tools/perf/util/namespaces.c +++ b/tools/perf/util/namespaces.c @@ -11,6 +11,7 @@ #include "event.h" #include #include +#include struct namespaces *namespaces__new(struct namespaces_event *event) { diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index c014b2fc22b3..b8dfbe1d9670 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/tools/perf/util/xyarray.c b/tools/perf/util/xyarray.c index c10ba41ef3f6..7251fdbabced 100644 --- a/tools/perf/util/xyarray.c +++ b/tools/perf/util/xyarray.c @@ -1,5 +1,7 @@ #include "xyarray.h" #include "util.h" +#include +#include struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size) { -- cgit v1.2.3 From a3b70b3bb34296a63b43614f13991111eccbb44a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Apr 2017 20:46:41 -0300 Subject: perf tools: Remove stale prototypes from builtin.h Some, like prune_packed_objects() are clearly git specific, others don't have implementations and some are used in just one place, make them static. Link: http://lkml.kernel.org/n/tip-faj3c5dnttf3hurv4pujut8n@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin.h | 4 ---- tools/perf/perf.c | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 26669bf9129c..d4d19fe3d050 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -2,16 +2,12 @@ #define BUILTIN_H #include "util/util.h" -#include "util/strbuf.h" extern const char perf_usage_string[]; extern const char perf_more_info_string[]; void list_common_cmds_help(void); const char *help_unknown_cmd(const char *cmd); -void prune_packed_objects(int); -int read_line_with_nul(char *buf, int size, FILE *file); -int check_pager_config(const char *cmd); int cmd_annotate(int argc, const char **argv); int cmd_bench(int argc, const char **argv); diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 356588982d08..0b2cad0fb3f9 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -91,7 +91,7 @@ static int pager_command_config(const char *var, const char *value, void *data) } /* returns 0 for "no pager", 1 for "use pager", and -1 for "not specified" */ -int check_pager_config(const char *cmd) +static int check_pager_config(const char *cmd) { int err; struct pager_config c; -- cgit v1.2.3 From 7a8ef4c4b5fd5c578da4dadbcb1c5da650426c74 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Apr 2017 20:57:47 -0300 Subject: perf tools: Remove string.h, unistd.h and sys/stat.h from util.h Not needed in this header, added to the places that need FILE, putchar(), access() and a few other prototypes. Link: http://lkml.kernel.org/n/tip-xxtdsl6nsna82j7puwbdjqhs@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/subcmd/help.h | 1 + tools/perf/arch/arm/util/cs-etm.c | 1 + tools/perf/arch/arm64/util/dwarf-regs.c | 1 + tools/perf/builtin-help.c | 4 ++++ tools/perf/builtin-mem.c | 3 +++ tools/perf/builtin-report.c | 3 +++ tools/perf/builtin-script.c | 3 +++ tools/perf/builtin-stat.c | 3 +++ tools/perf/builtin-version.c | 3 ++- tools/perf/perf.c | 3 +++ tools/perf/tests/attr.c | 3 +++ tools/perf/tests/bpf.c | 2 ++ tools/perf/tests/parse-events.c | 3 +++ tools/perf/util/build-id.c | 2 ++ tools/perf/util/color.h | 2 ++ tools/perf/util/config.c | 3 +++ tools/perf/util/dso.c | 3 +++ tools/perf/util/event.c | 3 +++ tools/perf/util/header.c | 3 +++ tools/perf/util/machine.c | 3 +++ tools/perf/util/pmu.c | 1 + tools/perf/util/probe-file.c | 3 +++ tools/perf/util/strlist.c | 1 + tools/perf/util/util.c | 1 + tools/perf/util/util.h | 3 --- 25 files changed, 57 insertions(+), 4 deletions(-) diff --git a/tools/lib/subcmd/help.h b/tools/lib/subcmd/help.h index e145a020780c..9bd4223dc722 100644 --- a/tools/lib/subcmd/help.h +++ b/tools/lib/subcmd/help.h @@ -2,6 +2,7 @@ #define __SUBCMD_HELP_H #include +#include struct cmdnames { size_t alloc; diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index dfea6b635525..29361d9b635a 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -33,6 +33,7 @@ #include "../../util/cs-etm.h" #include +#include #define ENABLE_SINK_MAX 128 #define CS_BUS_DEVICE_PATH "/bus/coresight/devices/" diff --git a/tools/perf/arch/arm64/util/dwarf-regs.c b/tools/perf/arch/arm64/util/dwarf-regs.c index f268720ff021..cd764a9fd098 100644 --- a/tools/perf/arch/arm64/util/dwarf-regs.c +++ b/tools/perf/arch/arm64/util/dwarf-regs.c @@ -10,6 +10,7 @@ #include #include +#include #include #include /* for struct user_pt_regs */ #include diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 7bde2f59dac2..492f8e14ab09 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -14,6 +14,10 @@ #include "util/debug.h" #include #include +#include +#include +#include +#include static struct man_viewer_list { struct man_viewer_list *next; diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 1ebc67390898..2e5be1d63af6 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -1,4 +1,7 @@ #include +#include +#include +#include #include "builtin.h" #include "perf.h" diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 3f89e0eaf0d4..22478ff2b706 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -46,6 +46,9 @@ #include #include #include +#include +#include +#include struct report { struct perf_tool tool; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index b093a3c21e40..d05aec491cff 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -37,6 +37,9 @@ #include #include #include +#include +#include +#include #include "sane_ctype.h" diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index e3837febb4ff..eb3cc0b9a9e4 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -76,6 +76,9 @@ #include #include #include +#include +#include +#include #include "sane_ctype.h" diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c index b9a095b1db99..d25149456a2f 100644 --- a/tools/perf/builtin-version.c +++ b/tools/perf/builtin-version.c @@ -1,6 +1,7 @@ -#include "util/util.h" #include "builtin.h" #include "perf.h" +#include +#include int cmd_version(int argc __maybe_unused, const char **argv __maybe_unused) { diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 0b2cad0fb3f9..4cc6960f6226 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -24,6 +24,9 @@ #include #include #include +#include +#include +#include #include const char perf_usage_string[] = diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index c19e0da54337..0dd77494bb58 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -25,6 +25,9 @@ #include #include #include +#include +#include +#include #include "../perf.h" #include "util.h" #include diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index b78fbd611a7c..5876da126b58 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -1,6 +1,8 @@ #include #include #include +#include +#include #include #include #include diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 981d2bf9914f..7fad885491c5 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -7,6 +7,9 @@ #include "util.h" #include #include +#include +#include +#include #include #include #include diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 923ea290bb6e..687b5add4bde 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -10,6 +10,8 @@ #include #include #include +#include +#include #include "build-id.h" #include "event.h" #include "symbol.h" diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h index a93997f16dec..52122bcc3170 100644 --- a/tools/perf/util/color.h +++ b/tools/perf/util/color.h @@ -1,6 +1,8 @@ #ifndef __PERF_COLOR_H #define __PERF_COLOR_H +#include + /* "\033[1;38;5;2xx;48;5;2xxm\0" is 23 bytes */ #define COLOR_MAXLEN 24 diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 07d87d2dbee7..8d724f0fa5a8 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -16,6 +16,9 @@ #include "util/hist.h" /* perf_hist_config */ #include "util/llvm-utils.h" /* perf_llvm_config */ #include "config.h" +#include +#include +#include #include "sane_ctype.h" diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 3339ab7cabc5..a96a99d2369f 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -2,6 +2,9 @@ #include #include #include +#include +#include +#include #include #include "compress.h" #include "path.h" diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index cf457ef534da..2e829ac0f615 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -3,6 +3,9 @@ #include #include #include +#include +#include +#include #include /* To get things like MAP_HUGETLB even on older libc headers */ #include #include "event.h" diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 915bc4f39482..948b2c5efb65 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -11,7 +11,10 @@ #include #include #include +#include +#include #include +#include #include "evlist.h" #include "evsel.h" diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 988e84ce6f88..7a47f52ccfcc 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -14,6 +14,9 @@ #include "thread.h" #include "vdso.h" #include +#include +#include +#include #include "unwind.h" #include "linux/hash.h" #include "asm/bug.h" diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index bca1844594d0..ac16a9db1fb5 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 685653f2bc32..d679389e627c 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -15,7 +15,10 @@ * */ #include +#include +#include #include +#include #include "util.h" #include "event.h" #include "strlist.h" diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c index 0d3dfcb919b4..9de5434bb49e 100644 --- a/tools/perf/util/strlist.c +++ b/tools/perf/util/strlist.c @@ -10,6 +10,7 @@ #include #include #include +#include static struct rb_node *strlist__node_new(struct rblist *rblist, const void *entry) diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 131d21a659fb..6450c75a6f5b 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -3,6 +3,7 @@ #include "debug.h" #include #include +#include #include #include #include diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index b8dfbe1d9670..bd883f25b76e 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -7,9 +7,6 @@ #define _DEFAULT_SOURCE 1 #define HAS_BOOL -#include -#include -#include #include #include #include -- cgit v1.2.3 From 4208735d8de58f1cbc2e0009d87514ce06681e5a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Apr 2017 19:06:30 -0300 Subject: perf tools: Remove poll.h and wait.h from util.h Not needed in this header, added to the places that need poll(), wait() and a few other prototypes. Link: http://lkml.kernel.org/n/tip-i39c7b6xmo1vwd9wxp6fmkl0@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/intel-cqm.c | 1 + tools/perf/builtin-ftrace.c | 1 + tools/perf/builtin-kvm.c | 1 + tools/perf/builtin-record.c | 2 ++ tools/perf/builtin-stat.c | 1 + tools/perf/builtin-trace.c | 1 + tools/perf/tests/builtin-test.c | 1 + tools/perf/tests/event-times.c | 1 + tools/perf/util/debug.c | 1 + tools/perf/util/help-unknown-cmd.c | 1 + tools/perf/util/llvm-utils.c | 1 + tools/perf/util/util.h | 2 -- 12 files changed, 12 insertions(+), 2 deletions(-) diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c index befde6708c33..f9713a71d77e 100644 --- a/tools/perf/arch/x86/tests/intel-cqm.c +++ b/tools/perf/arch/x86/tests/intel-cqm.c @@ -8,6 +8,7 @@ #include #include +#include #include #include diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 0f34ab7a9ec1..9e0b35cd0eea 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "debug.h" #include diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 129af3e9c728..f309c3773522 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 32a9a68d38a2..ee7d0a82ccd0 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -44,10 +44,12 @@ #include #include +#include #include #include #include #include +#include #include #include diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index eb3cc0b9a9e4..a935b5023732 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -78,6 +78,7 @@ #include #include #include +#include #include #include "sane_ctype.h" diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ef3613f2fe6a..eaa66fb57347 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -45,6 +45,7 @@ #include #include #include /* FIXME: Still needed for audit_errno_to_name */ +#include #include #include #include diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 552fd9aca08d..9e08d297f1a9 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "builtin.h" #include "hist.h" #include "intlist.h" diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c index 4683514751d6..634f20c631d8 100644 --- a/tools/perf/tests/event-times.c +++ b/tools/perf/tests/event-times.c @@ -2,6 +2,7 @@ #include #include #include +#include #include "tests.h" #include "evlist.h" #include "evsel.h" diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 9eaf86f4003b..a5b3777ffee6 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #ifdef HAVE_BACKTRACE_SUPPORT diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c index 34201440ac03..1c88ad6425b8 100644 --- a/tools/perf/util/help-unknown-cmd.c +++ b/tools/perf/util/help-unknown-cmd.c @@ -1,5 +1,6 @@ #include "cache.h" #include "config.h" +#include #include #include #include "../builtin.h" diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index 824356488ce6..c6a15f204c03 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -12,6 +12,7 @@ #include "llvm-utils.h" #include "config.h" #include "util.h" +#include #define CLANG_BPF_CMD_DEFAULT_TEMPLATE \ "$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\ diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index bd883f25b76e..3852b6d3270a 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -12,8 +12,6 @@ #include #include #include -#include -#include #include extern char buildid_dir[]; -- cgit v1.2.3 From e8b3ae40151e94e30a82c2cf8efa5ab4f99c8abf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Apr 2017 21:33:07 -0300 Subject: perf tools: Add the right header to obtain PERF_ALIGN() The util/event.h header needs PERF_ALIGN(), but wasn't including linux/kernel.h, where it is defined, instead it was getting it by luck by including map.h, which it doesn't need at all. Fix it by including the right header. Link: http://lkml.kernel.org/n/tip-nf3t9blzm5ncoxsczi8oy9mx@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index eb7a7b200737..db2de6413518 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -3,9 +3,9 @@ #include #include +#include #include "../perf.h" -#include "map.h" #include "build-id.h" #include "perf_regs.h" -- cgit v1.2.3 From e7ff8920e62f0188e77b1dc5dc5d9a7322c342d7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Apr 2017 21:34:35 -0300 Subject: perf tools: Use just forward declarations for struct thread where possible Removing various instances of unnecessary includes, reducing the maze of header dependencies. Link: http://lkml.kernel.org/n/tip-hwu6eyuok9pc57alookyzmsf@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/unwind-libdw.c | 1 + tools/perf/arch/x86/util/unwind-libdw.c | 1 + tools/perf/builtin-c2c.c | 1 + tools/perf/builtin-inject.c | 1 + tools/perf/builtin-mem.c | 1 + tools/perf/builtin-timechart.c | 1 + tools/perf/ui/browsers/hists.c | 1 + tools/perf/ui/stdio/hist.c | 1 + tools/perf/util/build-id.c | 1 + tools/perf/util/hist.c | 1 + tools/perf/util/session.c | 1 + tools/perf/util/session.h | 3 +-- tools/perf/util/sort.c | 1 + tools/perf/util/sort.h | 3 ++- tools/perf/util/unwind-libdw.h | 6 ++++-- 15 files changed, 19 insertions(+), 5 deletions(-) diff --git a/tools/perf/arch/arm/util/unwind-libdw.c b/tools/perf/arch/arm/util/unwind-libdw.c index b4176c60117a..bacfa00fca39 100644 --- a/tools/perf/arch/arm/util/unwind-libdw.c +++ b/tools/perf/arch/arm/util/unwind-libdw.c @@ -1,6 +1,7 @@ #include #include "../../util/unwind-libdw.h" #include "../../util/perf_regs.h" +#include "../../util/event.h" bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) { diff --git a/tools/perf/arch/x86/util/unwind-libdw.c b/tools/perf/arch/x86/util/unwind-libdw.c index c4b72176ca83..38dc9bb2a7c9 100644 --- a/tools/perf/arch/x86/util/unwind-libdw.c +++ b/tools/perf/arch/x86/util/unwind-libdw.c @@ -1,6 +1,7 @@ #include #include "../../util/unwind-libdw.h" #include "../../util/perf_regs.h" +#include "../../util/event.h" bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) { diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index a14be1cd3d70..e33b4acece90 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -32,6 +32,7 @@ #include #include "ui/browsers/hists.h" #include "evlist.h" +#include "thread.h" struct c2c_hists { struct hists hists; diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 8bd791cca008..ea8db38eedd1 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -18,6 +18,7 @@ #include "util/data.h" #include "util/auxtrace.h" #include "util/jit.h" +#include "util/thread.h" #include diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 2e5be1d63af6..e001c0290793 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -12,6 +12,7 @@ #include "util/data.h" #include "util/mem-events.h" #include "util/debug.h" +#include "util/symbol.h" #define MEM_OPERATION_LOAD 0x1 #define MEM_OPERATION_STORE 0x2 diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 38e2c437b7b3..4e2e61695986 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -29,6 +29,7 @@ #include #include #include "util/symbol.h" +#include "util/thread.h" #include "util/callchain.h" #include "perf.h" diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 1b12a69740b3..69f4570bd4f9 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -14,6 +14,7 @@ #include "../../util/sort.h" #include "../../util/util.h" #include "../../util/top.h" +#include "../../util/thread.h" #include "../../arch/common.h" #include "../browsers/hists.h" diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 5565105c9688..42e432bd2eb4 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -6,6 +6,7 @@ #include "../../util/evsel.h" #include "../../util/srcline.h" #include "../../util/string2.h" +#include "../../util/thread.h" #include "../../util/sane_ctype.h" static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 687b5add4bde..168cc49654e7 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -15,6 +15,7 @@ #include "build-id.h" #include "event.h" #include "symbol.h" +#include "thread.h" #include #include "debug.h" #include "session.h" diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 2944458b9edf..cf0186a088c1 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -9,6 +9,7 @@ #include "evsel.h" #include "annotate.h" #include "srcline.h" +#include "thread.h" #include "ui/progress.h" #include #include diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 19d993f2a305..3041c6b98191 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -19,6 +19,7 @@ #include "perf_regs.h" #include "asm/bug.h" #include "auxtrace.h" +#include "thread.h" #include "thread-stack.h" #include "stat.h" diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 1ffae42f76a1..47b5e7dbcb18 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -5,8 +5,6 @@ #include "event.h" #include "header.h" #include "machine.h" -#include "symbol.h" -#include "thread.h" #include "data.h" #include "ordered-events.h" #include @@ -14,6 +12,7 @@ #include struct ip_callchain; +struct symbol; struct thread; struct auxtrace; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index fe4fd7b5f8e0..5762ae4e9e91 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -6,6 +6,7 @@ #include "hist.h" #include "comm.h" #include "symbol.h" +#include "thread.h" #include "evsel.h" #include "evlist.h" #include "strlist.h" diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 8bcec05ee578..b7c75597e18f 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -21,7 +21,8 @@ #include "parse-events.h" #include "hist.h" #include "srcline.h" -#include "thread.h" + +struct thread; extern regex_t parent_regex; extern const char *sort_order; diff --git a/tools/perf/util/unwind-libdw.h b/tools/perf/util/unwind-libdw.h index 58328669ed16..4a2b269a7b3b 100644 --- a/tools/perf/util/unwind-libdw.h +++ b/tools/perf/util/unwind-libdw.h @@ -2,10 +2,12 @@ #define __PERF_UNWIND_LIBDW_H #include -#include "event.h" -#include "thread.h" #include "unwind.h" +struct machine; +struct perf_sample; +struct thread; + bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg); struct unwind_info { -- cgit v1.2.3 From 04ec75e0befbc60ff473ca1a593cad7501729526 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 25 Oct 2016 17:02:11 -0300 Subject: tools: Update asm-generic/mman-common.h copy from the kernel To get the changes in the commit Fixes: 3209f68b3ca4 ("statx: Include a mask for stx_attributes in struct statx") Silencing this perf build warning: Warning: tools/include/uapi/linux/stat.h differs from kernel No need to change the statx syscall beautifiers in 'perf trace' at this time. Cc: Adrian Hunter Cc: Al Viro Cc: David Ahern Cc: David Howells Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-y8bgiyzuvura62lffvh1zbg9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/stat.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index 51a6b86e3700..d538897b8e08 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -114,7 +114,7 @@ struct statx { __u64 stx_ino; /* Inode number */ __u64 stx_size; /* File size */ __u64 stx_blocks; /* Number of 512-byte blocks allocated */ - __u64 __spare1[1]; + __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */ /* 0x40 */ struct statx_timestamp stx_atime; /* Last access time */ struct statx_timestamp stx_btime; /* File creation time */ @@ -152,9 +152,10 @@ struct statx { #define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */ #define STATX_BTIME 0x00000800U /* Want/got stx_btime */ #define STATX_ALL 0x00000fffU /* All currently supported flags */ +#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ /* - * Attributes to be found in stx_attributes + * Attributes to be found in stx_attributes and masked in stx_attributes_mask. * * These give information about the features or the state of a file that might * be of use to ordinary userspace programs such as GUIs or ls rather than -- cgit v1.2.3 From e883d09c9eb2ffddfd057c17e6a0cef446ec8c9b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 24 Apr 2017 11:58:54 -0300 Subject: tools arch: Sync arch/x86/lib/memcpy_64.S with the kernel Just a minor fix done in: Fixes: 26a37ab319a2 ("x86/mce: Fix copy/paste error in exception table entries") Cc: Tony Luck Link: http://lkml.kernel.org/n/tip-ni9jzdd5yxlail6pq8cuexw2@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/lib/memcpy_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index 49e6ebac7e73..98dcc112b363 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -286,7 +286,7 @@ ENDPROC(memcpy_mcsafe_unrolled) _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail) -- cgit v1.2.3 From fb7b75619641d778c2b778748ca5cdf2718cc024 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 24 Apr 2017 12:19:08 -0300 Subject: tools arch x86: Sync cpufeatures.h To catch changes made in: 90218ac77d05 ("x86/cpufeature: Detect CPUID faulting support") No changes needed in the tools using this file at this time. Cc: Kyle Huey Cc: Thomas Gleixner Link: http://lkml.kernel.org/n/tip-qiqsj5qg2ljbsbfre2zaf9v4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index b04bb6dfed7f..0fe00446f9ca 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -187,6 +187,7 @@ * Reuse free bits when adding new feature flags! */ #define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */ +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */ #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ -- cgit v1.2.3 From 9d43f5e8df6804ae271407500af9062e9278167a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 20 Apr 2017 11:24:30 +0200 Subject: perf tools: Fix the code to strip command name Recent commit broke command name strip in perf_event__get_comm_ids function. It replaced left to right search for '\n' with rtrim, which actually does right to left search. It occasionally caught earlier '\n' and kept trash in the command name. Keeping the ltrim, but moving back the left to right '\n' search instead of the rtrim. Signed-off-by: Jiri Olsa Acked-by: Taeung Song Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Yao Jin Fixes: bdd97ca63faa ("perf tools: Refactor the code to strip command name with {l,r}trim()") Link: http://lkml.kernel.org/r/20170420092430.29657-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 2e829ac0f615..142835c0ca0a 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -141,8 +141,15 @@ static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len, ppids = strstr(bf, "PPid:"); if (name) { + char *nl; + name += 5; /* strlen("Name:") */ - name = rtrim(ltrim(name)); + name = ltrim(name); + + nl = strchr(name, '\n'); + if (nl) + *nl = '\0'; + size = strlen(name); if (size >= len) size = len - 1; -- cgit v1.2.3