diff options
Diffstat (limited to 'tools/perf/util')
45 files changed, 1381 insertions, 540 deletions
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 683f8340460c..be1caabb9290 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -239,10 +239,20 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op const char *s = strchr(ops->raw, '+'); const char *c = strchr(ops->raw, ','); - if (c++ != NULL) + /* + * skip over possible up to 2 operands to get to address, e.g.: + * tbnz w0, #26, ffff0000083cd190 <security_file_permission+0xd0> + */ + if (c++ != NULL) { ops->target.addr = strtoull(c, NULL, 16); - else + if (!ops->target.addr) { + c = strchr(c, ','); + if (c++ != NULL) + ops->target.addr = strtoull(c, NULL, 16); + } + } else { ops->target.addr = strtoull(ops->raw, NULL, 16); + } if (s++ != NULL) { ops->target.offset = strtoull(s, NULL, 16); @@ -257,10 +267,27 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op static int jump__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops) { + const char *c = strchr(ops->raw, ','); + if (!ops->target.addr || ops->target.offset < 0) return ins__raw_scnprintf(ins, bf, size, ops); - return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, ops->target.offset); + if (c != NULL) { + const char *c2 = strchr(c + 1, ','); + + /* check for 3-op insn */ + if (c2 != NULL) + c = c2; + c++; + + /* mirror arch objdump's space-after-comma style */ + if (*c == ' ') + c++; + } + + return scnprintf(bf, size, "%-6.6s %.*s%" PRIx64, + ins->name, c ? c - ops->raw : 0, ops->raw, + ops->target.offset); } static struct ins_ops jump_ops = { @@ -1294,6 +1321,7 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil char linkname[PATH_MAX]; char *build_id_filename; char *build_id_path = NULL; + char *pos; if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS && !dso__is_kcore(dso)) @@ -1313,7 +1341,14 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil if (!build_id_path) return -1; - dirname(build_id_path); + /* + * old style build-id cache has name of XX/XXXXXXX.. while + * new style has XX/XXXXXXX../{elf,kallsyms,vdso}. + * extract the build-id part of dirname in the new style only. + */ + pos = strrchr(build_id_path, '/'); + if (pos && strlen(pos) < SBUILD_ID_SIZE - 2) + dirname(build_id_path); if (dso__is_kcore(dso) || readlink(build_id_path, linkname, sizeof(linkname)) < 0 || @@ -1344,7 +1379,9 @@ static const char *annotate__norm_arch(const char *arch_name) return normalize_arch((char *)arch_name); } -int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize) +int symbol__disassemble(struct symbol *sym, struct map *map, + const char *arch_name, size_t privsize, + struct arch **parch) { struct dso *dso = map->dso; char command[PATH_MAX * 2]; @@ -1370,6 +1407,9 @@ int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_na if (arch == NULL) return -ENOTSUP; + if (parch) + *parch = arch; + if (arch->init) { err = arch->init(arch); if (err) { @@ -1396,31 +1436,10 @@ int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_na sizeof(symfs_filename)); } } else if (dso__needs_decompress(dso)) { - char tmp[PATH_MAX]; - struct kmod_path m; - int fd; - bool ret; - - if (kmod_path__parse_ext(&m, symfs_filename)) - goto out; - - snprintf(tmp, PATH_MAX, "/tmp/perf-kmod-XXXXXX"); - - fd = mkstemp(tmp); - if (fd < 0) { - free(m.ext); - goto out; - } - - ret = decompress_to_file(m.ext, symfs_filename, fd); - - if (ret) - pr_err("Cannot decompress %s %s\n", m.ext, symfs_filename); - - free(m.ext); - close(fd); + char tmp[KMOD_DECOMP_LEN]; - if (!ret) + if (dso__decompress_kmodule_path(dso, symfs_filename, + tmp, sizeof(tmp)) < 0) goto out; strcpy(symfs_filename, tmp); @@ -1429,7 +1448,7 @@ int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_na snprintf(command, sizeof(command), "%s %s%s --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 - " -l -d %s %s -C %s 2>/dev/null|grep -v %s:|expand", + " -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand", objdump_path ? objdump_path : "objdump", disassembler_style ? "-M " : "", disassembler_style ? disassembler_style : "", @@ -1887,7 +1906,8 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, struct rb_root source_line = RB_ROOT; u64 len; - if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), 0) < 0) + if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), + 0, NULL) < 0) return -1; len = symbol__size(sym); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 948aa8e6fd39..21055034aedd 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -158,7 +158,9 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr); int symbol__alloc_hist(struct symbol *sym); void symbol__annotate_zero_histograms(struct symbol *sym); -int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize); +int symbol__disassemble(struct symbol *sym, struct map *map, + const char *arch_name, size_t privsize, + struct arch **parch); enum symbol_disassemble_errno { SYMBOL_ANNOTATE_ERRNO__SUCCESS = 0, diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 0daf63b9ee3e..5547457566a7 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -322,6 +322,13 @@ static int auxtrace_queues__add_event_buffer(struct auxtrace_queues *queues, return auxtrace_queues__add_buffer(queues, idx, buffer); } +static bool filter_cpu(struct perf_session *session, int cpu) +{ + unsigned long *cpu_bitmap = session->itrace_synth_opts->cpu_bitmap; + + return cpu_bitmap && cpu != -1 && !test_bit(cpu, cpu_bitmap); +} + int auxtrace_queues__add_event(struct auxtrace_queues *queues, struct perf_session *session, union perf_event *event, off_t data_offset, @@ -331,6 +338,9 @@ int auxtrace_queues__add_event(struct auxtrace_queues *queues, unsigned int idx; int err; + if (filter_cpu(session, event->auxtrace.cpu)) + return 0; + buffer = zalloc(sizeof(struct auxtrace_buffer)); if (!buffer) return -ENOMEM; @@ -947,6 +957,8 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts) synth_opts->instructions = true; synth_opts->branches = true; synth_opts->transactions = true; + synth_opts->ptwrites = true; + synth_opts->pwr_events = true; synth_opts->errors = true; synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE; synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD; @@ -1030,6 +1042,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, case 'x': synth_opts->transactions = true; break; + case 'w': + synth_opts->ptwrites = true; + break; + case 'p': + synth_opts->pwr_events = true; + break; case 'e': synth_opts->errors = true; break; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 9f0de72d58e2..33b5e6cdf38c 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -59,6 +59,8 @@ enum itrace_period_type { * @instructions: whether to synthesize 'instructions' events * @branches: whether to synthesize 'branches' events * @transactions: whether to synthesize events for transactions + * @ptwrites: whether to synthesize events for ptwrites + * @pwr_events: whether to synthesize power events * @errors: whether to synthesize decoder error events * @dont_decode: whether to skip decoding entirely * @log: write a decoding log @@ -72,6 +74,7 @@ enum itrace_period_type { * @period: 'instructions' events period * @period_type: 'instructions' events period type * @initial_skip: skip N events at the beginning. + * @cpu_bitmap: CPUs for which to synthesize events, or NULL for all */ struct itrace_synth_opts { bool set; @@ -79,6 +82,8 @@ struct itrace_synth_opts { bool instructions; bool branches; bool transactions; + bool ptwrites; + bool pwr_events; bool errors; bool dont_decode; bool log; @@ -92,6 +97,7 @@ struct itrace_synth_opts { unsigned long long period; enum itrace_period_type period_type; unsigned long initial_skip; + unsigned long *cpu_bitmap; }; /** diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 168cc49654e7..e0148b081bdf 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -278,51 +278,6 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) return bf; } -bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size) -{ - char *id_name = NULL, *ch; - struct stat sb; - char sbuild_id[SBUILD_ID_SIZE]; - - if (!dso->has_build_id) - goto err; - - build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); - id_name = build_id_cache__linkname(sbuild_id, NULL, 0); - if (!id_name) - goto err; - if (access(id_name, F_OK)) - goto err; - if (lstat(id_name, &sb) == -1) - goto err; - if ((size_t)sb.st_size > size - 1) - goto err; - if (readlink(id_name, bf, size - 1) < 0) - goto err; - - bf[sb.st_size] = '\0'; - - /* - * link should be: - * ../../lib/modules/4.4.0-rc4/kernel/net/ipv4/netfilter/nf_nat_ipv4.ko/a09fe3eb3147dafa4e3b31dbd6257e4d696bdc92 - */ - ch = strrchr(bf, '/'); - if (!ch) - goto err; - if (ch - 3 < bf) - goto err; - - free(id_name); - return strncmp(".ko", ch - 3, 3) == 0; -err: - pr_err("Invalid build id: %s\n", id_name ? : - dso->long_name ? : - dso->short_name ? : - "[unknown]"); - free(id_name); - return false; -} - #define dsos__for_each_with_build_id(pos, head) \ list_for_each_entry(pos, head, node) \ if (!pos->has_build_id) \ diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 8a89b195c1fc..96690a55c62c 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -17,7 +17,6 @@ char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf, size_t size); char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size); -bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size); int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 0328f297a748..0175765c05b9 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -5,6 +5,7 @@ #include <subcmd/pager.h> #include "../ui/ui.h" +#include <linux/compiler.h> #include <linux/string.h> #define CMD_EXEC_PATH "--exec-path" @@ -24,6 +25,6 @@ static inline int is_absolute_path(const char *path) return path[0] == '/'; } -char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2))); +char *mkpath(const char *fmt, ...) __printf(1, 2); #endif /* __PERF_CACHE_H */ diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 8d724f0fa5a8..31a7dea248d0 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -335,32 +335,42 @@ static int perf_parse_long(const char *value, long *ret) return 0; } -static void die_bad_config(const char *name) +static void bad_config(const char *name) { if (config_file_name) - die("bad config value for '%s' in %s", name, config_file_name); - die("bad config value for '%s'", name); + pr_warning("bad config value for '%s' in %s, ignoring...\n", name, config_file_name); + else + pr_warning("bad config value for '%s', ignoring...\n", name); } -u64 perf_config_u64(const char *name, const char *value) +int perf_config_u64(u64 *dest, const char *name, const char *value) { long long ret = 0; - if (!perf_parse_llong(value, &ret)) - die_bad_config(name); - return (u64) ret; + if (!perf_parse_llong(value, &ret)) { + bad_config(name); + return -1; + } + + *dest = ret; + return 0; } -int perf_config_int(const char *name, const char *value) +int perf_config_int(int *dest, const char *name, const char *value) { long ret = 0; - if (!perf_parse_long(value, &ret)) - die_bad_config(name); - return ret; + if (!perf_parse_long(value, &ret)) { + bad_config(name); + return -1; + } + *dest = ret; + return 0; } static int perf_config_bool_or_int(const char *name, const char *value, int *is_bool) { + int ret; + *is_bool = 1; if (!value) return 1; @@ -371,7 +381,7 @@ static int perf_config_bool_or_int(const char *name, const char *value, int *is_ if (!strcasecmp(value, "false") || !strcasecmp(value, "no") || !strcasecmp(value, "off")) return 0; *is_bool = 0; - return perf_config_int(name, value); + return perf_config_int(&ret, name, value) < 0 ? -1 : ret; } int perf_config_bool(const char *name, const char *value) @@ -657,8 +667,7 @@ static int perf_config_set__init(struct perf_config_set *set) user_config = strdup(mkpath("%s/.perfconfig", home)); if (user_config == NULL) { - warning("Not enough memory to process %s/.perfconfig, " - "ignoring it.", home); + pr_warning("Not enough memory to process %s/.perfconfig, ignoring it.", home); goto out; } @@ -671,8 +680,7 @@ static int perf_config_set__init(struct perf_config_set *set) ret = 0; if (st.st_uid && (st.st_uid != geteuid())) { - warning("File %s not owned by current user or root, " - "ignoring it.", user_config); + pr_warning("File %s not owned by current user or root, ignoring it.", user_config); goto out_free; } @@ -795,7 +803,8 @@ void perf_config_set__delete(struct perf_config_set *set) */ int config_error_nonbool(const char *var) { - return error("Missing value for '%s'", var); + pr_err("Missing value for '%s'", var); + return -1; } void set_buildid_dir(const char *dir) diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h index 1a59a6b43f8b..b6bb11f3f165 100644 --- a/tools/perf/util/config.h +++ b/tools/perf/util/config.h @@ -27,8 +27,8 @@ extern const char *config_exclusive_filename; typedef int (*config_fn_t)(const char *, const char *, void *); int perf_default_config(const char *, const char *, void *); int perf_config(config_fn_t fn, void *); -int perf_config_int(const char *, const char *); -u64 perf_config_u64(const char *, const char *); +int perf_config_int(int *dest, const char *, const char *); +int perf_config_u64(u64 *dest, const char *, const char *); int perf_config_bool(const char *, const char *); int config_error_nonbool(const char *); const char *perf_etc_perfconfig(void); diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 89d50318833d..3149b70799fd 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -1444,10 +1444,8 @@ static int convert__config(const char *var, const char *value, void *cb) { struct convert *c = cb; - if (!strcmp(var, "convert.queue-size")) { - c->queue_size = perf_config_u64(var, value); - return 0; - } + if (!strcmp(var, "convert.queue-size")) + return perf_config_u64(&c->queue_size, var, value); return 0; } diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index 8a23ea1a71c7..c818bdb1c1ab 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -4,6 +4,7 @@ #include <stdbool.h> #include <string.h> +#include <linux/compiler.h> #include "event.h" #include "../ui/helpline.h" #include "../ui/progress.h" @@ -40,16 +41,16 @@ extern int debug_data_convert; #define STRERR_BUFSIZE 128 /* For the buffer size of str_error_r */ -int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); +int dump_printf(const char *fmt, ...) __printf(1, 2); void trace_event(union perf_event *event); -int ui__error(const char *format, ...) __attribute__((format(printf, 1, 2))); -int ui__warning(const char *format, ...) __attribute__((format(printf, 1, 2))); +int ui__error(const char *format, ...) __printf(1, 2); +int ui__warning(const char *format, ...) __printf(1, 2); void pr_stat(const char *fmt, ...); -int eprintf(int level, int var, const char *fmt, ...) __attribute__((format(printf, 3, 4))); -int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__((format(printf, 4, 5))); +int eprintf(int level, int var, const char *fmt, ...) __printf(3, 4); +int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __printf(4, 5); int veprintf(int level, int var, const char *fmt, va_list args); int perf_debug_option(const char *str); diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index a96a99d2369f..4e7ab611377a 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -248,6 +248,64 @@ bool dso__needs_decompress(struct dso *dso) dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP; } +static int decompress_kmodule(struct dso *dso, const char *name, char *tmpbuf) +{ + int fd = -1; + struct kmod_path m; + + if (!dso__needs_decompress(dso)) + return -1; + + if (kmod_path__parse_ext(&m, dso->long_name)) + return -1; + + if (!m.comp) + goto out; + + fd = mkstemp(tmpbuf); + if (fd < 0) { + dso->load_errno = errno; + goto out; + } + + if (!decompress_to_file(m.ext, name, fd)) { + dso->load_errno = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE; + close(fd); + fd = -1; + } + +out: + free(m.ext); + return fd; +} + +int dso__decompress_kmodule_fd(struct dso *dso, const char *name) +{ + char tmpbuf[] = KMOD_DECOMP_NAME; + int fd; + + fd = decompress_kmodule(dso, name, tmpbuf); + unlink(tmpbuf); + return fd; +} + +int dso__decompress_kmodule_path(struct dso *dso, const char *name, + char *pathname, size_t len) +{ + char tmpbuf[] = KMOD_DECOMP_NAME; + int fd; + + fd = decompress_kmodule(dso, name, tmpbuf); + if (fd < 0) { + unlink(tmpbuf); + return -1; + } + + strncpy(pathname, tmpbuf, len); + close(fd); + return 0; +} + /* * Parses kernel module specified in @path and updates * @m argument like: @@ -335,6 +393,21 @@ int __kmod_path__parse(struct kmod_path *m, const char *path, return 0; } +void dso__set_module_info(struct dso *dso, struct kmod_path *m, + struct machine *machine) +{ + if (machine__is_host(machine)) + dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE; + else + dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE; + + /* _KMODULE_COMP should be next to _KMODULE */ + if (m->kmod && m->comp) + dso->symtab_type++; + + dso__set_short_name(dso, strdup(m->name), true); +} + /* * Global list of open DSOs and the counter. */ @@ -381,7 +454,7 @@ static int do_open(char *name) static int __open_dso(struct dso *dso, struct machine *machine) { - int fd; + int fd = -EINVAL; char *root_dir = (char *)""; char *name = malloc(PATH_MAX); @@ -392,15 +465,30 @@ static int __open_dso(struct dso *dso, struct machine *machine) root_dir = machine->root_dir; if (dso__read_binary_type_filename(dso, dso->binary_type, - root_dir, name, PATH_MAX)) { - free(name); - return -EINVAL; - } + root_dir, name, PATH_MAX)) + goto out; if (!is_regular_file(name)) - return -EINVAL; + goto out; + + if (dso__needs_decompress(dso)) { + char newpath[KMOD_DECOMP_LEN]; + size_t len = sizeof(newpath); + + if (dso__decompress_kmodule_path(dso, name, newpath, len) < 0) { + fd = -dso->load_errno; + goto out; + } + + strcpy(name, newpath); + } fd = do_open(name); + + if (dso__needs_decompress(dso)) + unlink(name); + +out: free(name); return fd; } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 12350b171727..bd061ba7b47c 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -244,6 +244,12 @@ bool is_supported_compression(const char *ext); bool is_kernel_module(const char *pathname, int cpumode); bool decompress_to_file(const char *ext, const char *filename, int output_fd); bool dso__needs_decompress(struct dso *dso); +int dso__decompress_kmodule_fd(struct dso *dso, const char *name); +int dso__decompress_kmodule_path(struct dso *dso, const char *name, + char *pathname, size_t len); + +#define KMOD_DECOMP_NAME "/tmp/perf-kmod-XXXXXX" +#define KMOD_DECOMP_LEN sizeof(KMOD_DECOMP_NAME) struct kmod_path { char *name; @@ -259,6 +265,9 @@ int __kmod_path__parse(struct kmod_path *m, const char *path, #define kmod_path__parse_name(__m, __p) __kmod_path__parse(__m, __p, true , false) #define kmod_path__parse_ext(__m, __p) __kmod_path__parse(__m, __p, false, true) +void dso__set_module_info(struct dso *dso, struct kmod_path *m, + struct machine *machine); + /* * The dso__data_* external interface provides following functions: * dso__data_get_fd diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 7c3fa1c8cbcd..9967c87af7a6 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -252,6 +252,127 @@ enum auxtrace_error_type { PERF_AUXTRACE_ERROR_MAX }; +/* Attribute type for custom synthesized events */ +#define PERF_TYPE_SYNTH (INT_MAX + 1U) + +/* Attribute config for custom synthesized events */ +enum perf_synth_id { + PERF_SYNTH_INTEL_PTWRITE, + PERF_SYNTH_INTEL_MWAIT, + PERF_SYNTH_INTEL_PWRE, + PERF_SYNTH_INTEL_EXSTOP, + PERF_SYNTH_INTEL_PWRX, + PERF_SYNTH_INTEL_CBR, +}; + +/* + * Raw data formats for synthesized events. Note that 4 bytes of padding are + * present to match the 'size' member of PERF_SAMPLE_RAW data which is always + * 8-byte aligned. That means we must dereference raw_data with an offset of 4. + * Refer perf_sample__synth_ptr() and perf_synth__raw_data(). It also means the + * structure sizes are 4 bytes bigger than the raw_size, refer + * perf_synth__raw_size(). + */ + +struct perf_synth_intel_ptwrite { + u32 padding; + union { + struct { + u32 ip : 1, + reserved : 31; + }; + u32 flags; + }; + u64 payload; +}; + +struct perf_synth_intel_mwait { + u32 padding; + u32 reserved; + union { + struct { + u64 hints : 8, + reserved1 : 24, + extensions : 2, + reserved2 : 30; + }; + u64 payload; + }; +}; + +struct perf_synth_intel_pwre { + u32 padding; + u32 reserved; + union { + struct { + u64 reserved1 : 7, + hw : 1, + subcstate : 4, + cstate : 4, + reserved2 : 48; + }; + u64 payload; + }; +}; + +struct perf_synth_intel_exstop { + u32 padding; + union { + struct { + u32 ip : 1, + reserved : 31; + }; + u32 flags; + }; +}; + +struct perf_synth_intel_pwrx { + u32 padding; + u32 reserved; + union { + struct { + u64 deepest_cstate : 4, + last_cstate : 4, + wake_reason : 4, + reserved1 : 52; + }; + u64 payload; + }; +}; + +struct perf_synth_intel_cbr { + u32 padding; + union { + struct { + u32 cbr : 8, + reserved1 : 8, + max_nonturbo : 8, + reserved2 : 8; + }; + u32 flags; + }; + u32 freq; + u32 reserved3; +}; + +/* + * raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get + * 8-byte alignment. + */ +static inline void *perf_sample__synth_ptr(struct perf_sample *sample) +{ + return sample->raw_data - 4; +} + +static inline void *perf_synth__raw_data(void *p) +{ + return p + 4; +} + +#define perf_synth__raw_size(d) (sizeof(d) - 4) + +#define perf_sample__bad_synth_size(s, d) ((s)->raw_size < sizeof(d) - 4) + /* * The kernel collects the number of events it couldn't send in a stretch and * when possible sends this number in a PERF_RECORD_LOST event. The number of diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 94cea4398a13..8d601fbdd8d6 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -1,6 +1,7 @@ #ifndef __PERF_EVLIST_H #define __PERF_EVLIST_H 1 +#include <linux/compiler.h> #include <linux/kernel.h> #include <linux/refcount.h> #include <linux/list.h> @@ -34,7 +35,7 @@ struct perf_mmap { refcount_t refcnt; u64 prev; struct auxtrace_mmap auxtrace_mmap; - char event_copy[PERF_SAMPLE_MAX_SIZE] __attribute__((aligned(8))); + char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); }; static inline size_t diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index e4f7902d5afa..6f4882f8d61f 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -11,13 +11,17 @@ #include <errno.h> #include <inttypes.h> #include <linux/bitops.h> +#include <api/fs/fs.h> #include <api/fs/tracing_path.h> #include <traceevent/event-parse.h> #include <linux/hw_breakpoint.h> #include <linux/perf_event.h> +#include <linux/compiler.h> #include <linux/err.h> #include <sys/ioctl.h> #include <sys/resource.h> +#include <sys/types.h> +#include <dirent.h> #include "asm/bug.h" #include "callchain.h" #include "cgroup.h" @@ -273,8 +277,20 @@ struct perf_evsel *perf_evsel__new_cycles(void) struct perf_evsel *evsel; event_attr_init(&attr); + /* + * Unnamed union member, not supported as struct member named + * initializer in older compilers such as gcc 4.4.7 + * + * Just for probing the precise_ip: + */ + attr.sample_period = 1; perf_event_attr__set_max_precise_ip(&attr); + /* + * Now let the usual logic to set up the perf_event_attr defaults + * to kick in when we return and before perf_evsel__open() is called. + */ + attr.sample_period = 0; evsel = perf_evsel__new(&attr); if (evsel == NULL) @@ -1429,7 +1445,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, } static int __open_attr__fprintf(FILE *fp, const char *name, const char *val, - void *priv __attribute__((unused))) + void *priv __maybe_unused) { return fprintf(fp, " %-32s %s\n", name, val); } @@ -2459,6 +2475,42 @@ bool perf_evsel__fallback(struct perf_evsel *evsel, int err, return false; } +static bool find_process(const char *name) +{ + size_t len = strlen(name); + DIR *dir; + struct dirent *d; + int ret = -1; + + dir = opendir(procfs__mountpoint()); + if (!dir) + return false; + + /* Walk through the directory. */ + while (ret && (d = readdir(dir)) != NULL) { + char path[PATH_MAX]; + char *data; + size_t size; + + if ((d->d_type != DT_DIR) || + !strcmp(".", d->d_name) || + !strcmp("..", d->d_name)) + continue; + + scnprintf(path, sizeof(path), "%s/%s/comm", + procfs__mountpoint(), d->d_name); + + if (filename__read_str(path, &data, &size)) + continue; + + ret = strncmp(name, data, len); + free(data); + } + + closedir(dir); + return ret ? false : true; +} + int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, int err, char *msg, size_t size) { diff --git a/tools/perf/util/genelf_debug.c b/tools/perf/util/genelf_debug.c index 5980f7d256b1..40789d8603d0 100644 --- a/tools/perf/util/genelf_debug.c +++ b/tools/perf/util/genelf_debug.c @@ -11,6 +11,7 @@ * @remark Copyright 2007 OProfile authors * @author Philippe Elie */ +#include <linux/compiler.h> #include <sys/types.h> #include <stdio.h> #include <getopt.h> @@ -125,7 +126,7 @@ struct debug_line_header { * and filesize, last entry is followed by en empty string. */ /* follow the first program statement */ -} __attribute__((packed)); +} __packed; /* DWARF 2 spec talk only about one possible compilation unit header while * binutils can handle two flavours of dwarf 2, 32 and 64 bits, this is not @@ -138,7 +139,7 @@ struct compilation_unit_header { uhalf version; uword debug_abbrev_offset; ubyte pointer_size; -} __attribute__((packed)); +} __packed; #define DW_LNS_num_opcode (DW_LNS_set_isa + 1) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 314a07151fb7..76ed7d03e500 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -8,6 +8,7 @@ #include <unistd.h> #include <stdio.h> #include <stdlib.h> +#include <linux/compiler.h> #include <linux/list.h> #include <linux/kernel.h> #include <linux/bitops.h> @@ -841,7 +842,7 @@ static int write_group_desc(int fd, struct perf_header *h __maybe_unused, /* * default get_cpuid(): nothing gets recorded - * actual implementation must be in arch/$(ARCH)/util/header.c + * actual implementation must be in arch/$(SRCARCH)/util/header.c */ int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused) { @@ -1274,7 +1275,7 @@ error: } static int __desc_attr__fprintf(FILE *fp, const char *name, const char *val, - void *priv __attribute__((unused))) + void *priv __maybe_unused) { return fprintf(fp, ", %s = %s", name, val); } @@ -1469,8 +1470,16 @@ static int __event_process_build_id(struct build_id_event *bev, dso__set_build_id(dso, &bev->build_id); - if (!is_kernel_module(filename, cpumode)) - dso->kernel = dso_type; + if (dso_type != DSO_TYPE_USER) { + struct kmod_path m = { .name = NULL, }; + + if (!kmod_path__parse_name(&m, filename) && m.kmod) + dso__set_module_info(dso, &m, machine); + else + dso->kernel = dso_type; + + free(m.name); + } build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c index 1c88ad6425b8..15b95300d7f3 100644 --- a/tools/perf/util/help-unknown-cmd.c +++ b/tools/perf/util/help-unknown-cmd.c @@ -12,7 +12,7 @@ static int perf_unknown_cmd_config(const char *var, const char *value, void *cb __maybe_unused) { if (!strcmp(var, "help.autocorrect")) - autocorrect = perf_config_int(var,value); + return perf_config_int(&autocorrect, var,value); return 0; } diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index b2834ac7b1f5..218ee2bac9a5 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -866,8 +866,6 @@ static void intel_bts_print_info(u64 *arr, int start, int finish) fprintf(stdout, intel_bts_info_fmts[i], arr[i]); } -u64 intel_bts_auxtrace_info_priv[INTEL_BTS_AUXTRACE_PRIV_SIZE]; - int intel_bts_process_auxtrace_info(union perf_event *event, struct perf_session *session) { diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 7cf7f7aca4d2..aa1593ce551d 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -64,6 +64,25 @@ enum intel_pt_pkt_state { INTEL_PT_STATE_FUP_NO_TIP, }; +static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state) +{ + switch (pkt_state) { + case INTEL_PT_STATE_NO_PSB: + case INTEL_PT_STATE_NO_IP: + case INTEL_PT_STATE_ERR_RESYNC: + case INTEL_PT_STATE_IN_SYNC: + case INTEL_PT_STATE_TNT: + return true; + case INTEL_PT_STATE_TIP: + case INTEL_PT_STATE_TIP_PGD: + case INTEL_PT_STATE_FUP: + case INTEL_PT_STATE_FUP_NO_TIP: + return false; + default: + return true; + }; +} + #ifdef INTEL_PT_STRICT #define INTEL_PT_STATE_ERR1 INTEL_PT_STATE_NO_PSB #define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_PSB @@ -87,11 +106,13 @@ struct intel_pt_decoder { const unsigned char *buf; size_t len; bool return_compression; + bool branch_enable; bool mtc_insn; bool pge; bool have_tma; bool have_cyc; bool fixup_last_mtc; + bool have_last_ip; uint64_t pos; uint64_t last_ip; uint64_t ip; @@ -99,6 +120,7 @@ struct intel_pt_decoder { uint64_t timestamp; uint64_t tsc_timestamp; uint64_t ref_timestamp; + uint64_t sample_timestamp; uint64_t ret_addr; uint64_t ctc_timestamp; uint64_t ctc_delta; @@ -119,6 +141,7 @@ struct intel_pt_decoder { int pkt_len; int last_packet_type; unsigned int cbr; + unsigned int cbr_seen; unsigned int max_non_turbo_ratio; double max_non_turbo_ratio_fp; double cbr_cyc_to_tsc; @@ -136,9 +159,18 @@ struct intel_pt_decoder { bool continuous_period; bool overflow; bool set_fup_tx_flags; + bool set_fup_ptw; + bool set_fup_mwait; + bool set_fup_pwre; + bool set_fup_exstop; unsigned int fup_tx_flags; unsigned int tx_flags; + uint64_t fup_ptw_payload; + uint64_t fup_mwait_payload; + uint64_t fup_pwre_payload; + uint64_t cbr_payload; uint64_t timestamp_insn_cnt; + uint64_t sample_insn_cnt; uint64_t stuck_ip; int no_progress; int stuck_ip_prd; @@ -192,6 +224,7 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) decoder->pgd_ip = params->pgd_ip; decoder->data = params->data; decoder->return_compression = params->return_compression; + decoder->branch_enable = params->branch_enable; decoder->period = params->period; decoder->period_type = params->period_type; @@ -398,6 +431,7 @@ static uint64_t intel_pt_calc_ip(const struct intel_pt_pkt *packet, static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder) { decoder->last_ip = intel_pt_calc_ip(&decoder->packet, decoder->last_ip); + decoder->have_last_ip = true; } static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder) @@ -635,6 +669,8 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info) case INTEL_PT_PAD: case INTEL_PT_VMCS: case INTEL_PT_MNT: + case INTEL_PT_PTWRITE: + case INTEL_PT_PTWRITE_IP: return 0; case INTEL_PT_MTC: @@ -675,6 +711,12 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info) break; case INTEL_PT_TSC: + /* + * For now, do not support using TSC packets - refer + * intel_pt_calc_cyc_to_tsc(). + */ + if (data->from_mtc) + return 1; timestamp = pkt_info->packet.payload | (data->timestamp & (0xffULL << 56)); if (data->from_mtc && timestamp < data->timestamp && @@ -733,6 +775,11 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info) case INTEL_PT_TIP_PGD: case INTEL_PT_TRACESTOP: + case INTEL_PT_EXSTOP: + case INTEL_PT_EXSTOP_IP: + case INTEL_PT_MWAIT: + case INTEL_PT_PWRE: + case INTEL_PT_PWRX: case INTEL_PT_OVF: case INTEL_PT_BAD: /* Does not happen */ default: @@ -787,6 +834,14 @@ static void intel_pt_calc_cyc_to_tsc(struct intel_pt_decoder *decoder, .cbr_cyc_to_tsc = 0, }; + /* + * For now, do not support using TSC packets for at least the reasons: + * 1) timing might have stopped + * 2) TSC packets within PSB+ can slip against CYC packets + */ + if (!from_mtc) + return; + intel_pt_pkt_lookahead(decoder, intel_pt_calc_cyc_cb, &data); } @@ -898,6 +953,7 @@ static int intel_pt_walk_insn(struct intel_pt_decoder *decoder, decoder->tot_insn_cnt += insn_cnt; decoder->timestamp_insn_cnt += insn_cnt; + decoder->sample_insn_cnt += insn_cnt; decoder->period_insn_cnt += insn_cnt; if (err) { @@ -990,6 +1046,57 @@ out_no_progress: return err; } +static bool intel_pt_fup_event(struct intel_pt_decoder *decoder) +{ + bool ret = false; + + if (decoder->set_fup_tx_flags) { + decoder->set_fup_tx_flags = false; + decoder->tx_flags = decoder->fup_tx_flags; + decoder->state.type = INTEL_PT_TRANSACTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.flags = decoder->fup_tx_flags; + return true; + } + if (decoder->set_fup_ptw) { + decoder->set_fup_ptw = false; + decoder->state.type = INTEL_PT_PTW; + decoder->state.flags |= INTEL_PT_FUP_IP; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.ptw_payload = decoder->fup_ptw_payload; + return true; + } + if (decoder->set_fup_mwait) { + decoder->set_fup_mwait = false; + decoder->state.type = INTEL_PT_MWAIT_OP; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.mwait_payload = decoder->fup_mwait_payload; + ret = true; + } + if (decoder->set_fup_pwre) { + decoder->set_fup_pwre = false; + decoder->state.type |= INTEL_PT_PWR_ENTRY; + decoder->state.type &= ~INTEL_PT_BRANCH; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.pwre_payload = decoder->fup_pwre_payload; + ret = true; + } + if (decoder->set_fup_exstop) { + decoder->set_fup_exstop = false; + decoder->state.type |= INTEL_PT_EX_STOP; + decoder->state.type &= ~INTEL_PT_BRANCH; + decoder->state.flags |= INTEL_PT_FUP_IP; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + ret = true; + } + return ret; +} + static int intel_pt_walk_fup(struct intel_pt_decoder *decoder) { struct intel_pt_insn intel_pt_insn; @@ -1003,15 +1110,8 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder) if (err == INTEL_PT_RETURN) return 0; if (err == -EAGAIN) { - if (decoder->set_fup_tx_flags) { - decoder->set_fup_tx_flags = false; - decoder->tx_flags = decoder->fup_tx_flags; - decoder->state.type = INTEL_PT_TRANSACTION; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; - decoder->state.flags = decoder->fup_tx_flags; + if (intel_pt_fup_event(decoder)) return 0; - } return err; } decoder->set_fup_tx_flags = false; @@ -1360,7 +1460,9 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder) static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder) { - unsigned int cbr = decoder->packet.payload; + unsigned int cbr = decoder->packet.payload & 0xff; + + decoder->cbr_payload = decoder->packet.payload; if (decoder->cbr == cbr) return; @@ -1417,6 +1519,13 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) case INTEL_PT_TRACESTOP: case INTEL_PT_BAD: case INTEL_PT_PSB: + case INTEL_PT_PTWRITE: + case INTEL_PT_PTWRITE_IP: + case INTEL_PT_EXSTOP: + case INTEL_PT_EXSTOP_IP: + case INTEL_PT_MWAIT: + case INTEL_PT_PWRE: + case INTEL_PT_PWRX: decoder->have_tma = false; intel_pt_log("ERROR: Unexpected packet\n"); return -EAGAIN; @@ -1446,7 +1555,8 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) case INTEL_PT_FUP: decoder->pge = true; - intel_pt_set_last_ip(decoder); + if (decoder->packet.count) + intel_pt_set_last_ip(decoder); break; case INTEL_PT_MODE_TSX: @@ -1497,6 +1607,13 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) case INTEL_PT_MODE_TSX: case INTEL_PT_BAD: case INTEL_PT_PSBEND: + case INTEL_PT_PTWRITE: + case INTEL_PT_PTWRITE_IP: + case INTEL_PT_EXSTOP: + case INTEL_PT_EXSTOP_IP: + case INTEL_PT_MWAIT: + case INTEL_PT_PWRE: + case INTEL_PT_PWRX: intel_pt_log("ERROR: Missing TIP after FUP\n"); decoder->pkt_state = INTEL_PT_STATE_ERR3; return -ENOENT; @@ -1625,6 +1742,15 @@ next: break; } intel_pt_set_last_ip(decoder); + if (!decoder->branch_enable) { + decoder->ip = decoder->last_ip; + if (intel_pt_fup_event(decoder)) + return 0; + no_tip = false; + break; + } + if (decoder->set_fup_mwait) + no_tip = true; err = intel_pt_walk_fup(decoder); if (err != -EAGAIN) { if (err) @@ -1650,6 +1776,8 @@ next: break; case INTEL_PT_PSB: + decoder->last_ip = 0; + decoder->have_last_ip = true; intel_pt_clear_stack(&decoder->stack); err = intel_pt_walk_psbend(decoder); if (err == -EAGAIN) @@ -1696,6 +1824,16 @@ next: case INTEL_PT_CBR: intel_pt_calc_cbr(decoder); + if (!decoder->branch_enable && + decoder->cbr != decoder->cbr_seen) { + decoder->cbr_seen = decoder->cbr; + decoder->state.type = INTEL_PT_CBR_CHG; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.cbr_payload = + decoder->packet.payload; + return 0; + } break; case INTEL_PT_MODE_EXEC: @@ -1722,6 +1860,71 @@ next: case INTEL_PT_PAD: break; + case INTEL_PT_PTWRITE_IP: + decoder->fup_ptw_payload = decoder->packet.payload; + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + if (decoder->packet.type == INTEL_PT_FUP) { + decoder->set_fup_ptw = true; + no_tip = true; + } else { + intel_pt_log_at("ERROR: Missing FUP after PTWRITE", + decoder->pos); + } + goto next; + + case INTEL_PT_PTWRITE: + decoder->state.type = INTEL_PT_PTW; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.ptw_payload = decoder->packet.payload; + return 0; + + case INTEL_PT_MWAIT: + decoder->fup_mwait_payload = decoder->packet.payload; + decoder->set_fup_mwait = true; + break; + + case INTEL_PT_PWRE: + if (decoder->set_fup_mwait) { + decoder->fup_pwre_payload = + decoder->packet.payload; + decoder->set_fup_pwre = true; + break; + } + decoder->state.type = INTEL_PT_PWR_ENTRY; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.pwrx_payload = decoder->packet.payload; + return 0; + + case INTEL_PT_EXSTOP_IP: + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + if (decoder->packet.type == INTEL_PT_FUP) { + decoder->set_fup_exstop = true; + no_tip = true; + } else { + intel_pt_log_at("ERROR: Missing FUP after EXSTOP", + decoder->pos); + } + goto next; + + case INTEL_PT_EXSTOP: + decoder->state.type = INTEL_PT_EX_STOP; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + return 0; + + case INTEL_PT_PWRX: + decoder->state.type = INTEL_PT_PWR_EXIT; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.pwrx_payload = decoder->packet.payload; + return 0; + default: return intel_pt_bug(decoder); } @@ -1730,8 +1933,9 @@ next: static inline bool intel_pt_have_ip(struct intel_pt_decoder *decoder) { - return decoder->last_ip || decoder->packet.count == 0 || - decoder->packet.count == 3 || decoder->packet.count == 6; + return decoder->packet.count && + (decoder->have_last_ip || decoder->packet.count == 3 || + decoder->packet.count == 6); } /* Walk PSB+ packets to get in sync. */ @@ -1750,6 +1954,13 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) __fallthrough; case INTEL_PT_TIP_PGE: case INTEL_PT_TIP: + case INTEL_PT_PTWRITE: + case INTEL_PT_PTWRITE_IP: + case INTEL_PT_EXSTOP: + case INTEL_PT_EXSTOP_IP: + case INTEL_PT_MWAIT: + case INTEL_PT_PWRE: + case INTEL_PT_PWRX: intel_pt_log("ERROR: Unexpected packet\n"); return -ENOENT; @@ -1854,14 +2065,10 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) break; case INTEL_PT_FUP: - if (decoder->overflow) { - if (intel_pt_have_ip(decoder)) - intel_pt_set_ip(decoder); - if (decoder->ip) - return 0; - } - if (decoder->packet.count) - intel_pt_set_last_ip(decoder); + if (intel_pt_have_ip(decoder)) + intel_pt_set_ip(decoder); + if (decoder->ip) + return 0; break; case INTEL_PT_MTC: @@ -1910,6 +2117,9 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) break; case INTEL_PT_PSB: + decoder->last_ip = 0; + decoder->have_last_ip = true; + intel_pt_clear_stack(&decoder->stack); err = intel_pt_walk_psb(decoder); if (err) return err; @@ -1925,6 +2135,13 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) case INTEL_PT_VMCS: case INTEL_PT_MNT: case INTEL_PT_PAD: + case INTEL_PT_PTWRITE: + case INTEL_PT_PTWRITE_IP: + case INTEL_PT_EXSTOP: + case INTEL_PT_EXSTOP_IP: + case INTEL_PT_MWAIT: + case INTEL_PT_PWRE: + case INTEL_PT_PWRX: default: break; } @@ -1935,6 +2152,19 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder) { int err; + decoder->set_fup_tx_flags = false; + decoder->set_fup_ptw = false; + decoder->set_fup_mwait = false; + decoder->set_fup_pwre = false; + decoder->set_fup_exstop = false; + + if (!decoder->branch_enable) { + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + decoder->overflow = false; + decoder->state.type = 0; /* Do not have a sample */ + return 0; + } + intel_pt_log("Scanning for full IP\n"); err = intel_pt_walk_to_ip(decoder); if (err) @@ -2043,6 +2273,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder) decoder->pge = false; decoder->continuous_period = false; + decoder->have_last_ip = false; decoder->last_ip = 0; decoder->ip = 0; intel_pt_clear_stack(&decoder->stack); @@ -2051,6 +2282,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder) if (err) return err; + decoder->have_last_ip = true; decoder->pkt_state = INTEL_PT_STATE_NO_IP; err = intel_pt_walk_psb(decoder); @@ -2069,7 +2301,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder) static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder) { - uint64_t est = decoder->timestamp_insn_cnt << 1; + uint64_t est = decoder->sample_insn_cnt << 1; if (!decoder->cbr || !decoder->max_non_turbo_ratio) goto out; @@ -2077,7 +2309,7 @@ static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder) est *= decoder->max_non_turbo_ratio; est /= decoder->cbr; out: - return decoder->timestamp + est; + return decoder->sample_timestamp + est; } const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) @@ -2093,8 +2325,10 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) err = intel_pt_sync(decoder); break; case INTEL_PT_STATE_NO_IP: + decoder->have_last_ip = false; decoder->last_ip = 0; - /* Fall through */ + decoder->ip = 0; + __fallthrough; case INTEL_PT_STATE_ERR_RESYNC: err = intel_pt_sync_ip(decoder); break; @@ -2130,15 +2364,29 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) } } while (err == -ENOLINK); - decoder->state.err = err ? intel_pt_ext_err(err) : 0; - decoder->state.timestamp = decoder->timestamp; + if (err) { + decoder->state.err = intel_pt_ext_err(err); + decoder->state.from_ip = decoder->ip; + decoder->sample_timestamp = decoder->timestamp; + decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; + } else { + decoder->state.err = 0; + if (decoder->cbr != decoder->cbr_seen && decoder->state.type) { + decoder->cbr_seen = decoder->cbr; + decoder->state.type |= INTEL_PT_CBR_CHG; + decoder->state.cbr_payload = decoder->cbr_payload; + } + if (intel_pt_sample_time(decoder->pkt_state)) { + decoder->sample_timestamp = decoder->timestamp; + decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; + } + } + + decoder->state.timestamp = decoder->sample_timestamp; decoder->state.est_timestamp = intel_pt_est_timestamp(decoder); decoder->state.cr3 = decoder->cr3; decoder->state.tot_insn_cnt = decoder->tot_insn_cnt; - if (err) - decoder->state.from_ip = decoder->ip; - return &decoder->state; } diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index e90619a43c0c..921b22e8ca0e 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -25,11 +25,18 @@ #define INTEL_PT_IN_TX (1 << 0) #define INTEL_PT_ABORT_TX (1 << 1) #define INTEL_PT_ASYNC (1 << 2) +#define INTEL_PT_FUP_IP (1 << 3) enum intel_pt_sample_type { INTEL_PT_BRANCH = 1 << 0, INTEL_PT_INSTRUCTION = 1 << 1, INTEL_PT_TRANSACTION = 1 << 2, + INTEL_PT_PTW = 1 << 3, + INTEL_PT_MWAIT_OP = 1 << 4, + INTEL_PT_PWR_ENTRY = 1 << 5, + INTEL_PT_EX_STOP = 1 << 6, + INTEL_PT_PWR_EXIT = 1 << 7, + INTEL_PT_CBR_CHG = 1 << 8, }; enum intel_pt_period_type { @@ -63,6 +70,11 @@ struct intel_pt_state { uint64_t timestamp; uint64_t est_timestamp; uint64_t trace_nr; + uint64_t ptw_payload; + uint64_t mwait_payload; + uint64_t pwre_payload; + uint64_t pwrx_payload; + uint64_t cbr_payload; uint32_t flags; enum intel_pt_insn_op insn_op; int insn_len; @@ -87,6 +99,7 @@ struct intel_pt_params { bool (*pgd_ip)(uint64_t ip, void *data); void *data; bool return_compression; + bool branch_enable; uint64_t period; enum intel_pt_period_type period_type; unsigned max_non_turbo_ratio; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h index debe751dc3d6..45b64f93f358 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-log.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h @@ -16,6 +16,7 @@ #ifndef INCLUDE__INTEL_PT_LOG_H__ #define INCLUDE__INTEL_PT_LOG_H__ +#include <linux/compiler.h> #include <stdint.h> #include <inttypes.h> @@ -34,8 +35,7 @@ void __intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip); void __intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, uint64_t ip); -__attribute__((format(printf, 1, 2))) -void __intel_pt_log(const char *fmt, ...); +void __intel_pt_log(const char *fmt, ...) __printf(1, 2); #define intel_pt_log(fmt, ...) \ do { \ diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c index 7528ae4f7e28..ba4c9dd18643 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -64,6 +64,13 @@ static const char * const packet_name[] = { [INTEL_PT_PIP] = "PIP", [INTEL_PT_OVF] = "OVF", [INTEL_PT_MNT] = "MNT", + [INTEL_PT_PTWRITE] = "PTWRITE", + [INTEL_PT_PTWRITE_IP] = "PTWRITE", + [INTEL_PT_EXSTOP] = "EXSTOP", + [INTEL_PT_EXSTOP_IP] = "EXSTOP", + [INTEL_PT_MWAIT] = "MWAIT", + [INTEL_PT_PWRE] = "PWRE", + [INTEL_PT_PWRX] = "PWRX", }; const char *intel_pt_pkt_name(enum intel_pt_pkt_type type) @@ -123,7 +130,7 @@ static int intel_pt_get_cbr(const unsigned char *buf, size_t len, if (len < 4) return INTEL_PT_NEED_MORE_BYTES; packet->type = INTEL_PT_CBR; - packet->payload = buf[2]; + packet->payload = le16_to_cpu(*(uint16_t *)(buf + 2)); return 4; } @@ -217,12 +224,80 @@ static int intel_pt_get_3byte(const unsigned char *buf, size_t len, } } +static int intel_pt_get_ptwrite(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + packet->count = (buf[1] >> 5) & 0x3; + packet->type = buf[1] & BIT(7) ? INTEL_PT_PTWRITE_IP : + INTEL_PT_PTWRITE; + + switch (packet->count) { + case 0: + if (len < 6) + return INTEL_PT_NEED_MORE_BYTES; + packet->payload = le32_to_cpu(*(uint32_t *)(buf + 2)); + return 6; + case 1: + if (len < 10) + return INTEL_PT_NEED_MORE_BYTES; + packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2)); + return 10; + default: + return INTEL_PT_BAD_PACKET; + } +} + +static int intel_pt_get_exstop(struct intel_pt_pkt *packet) +{ + packet->type = INTEL_PT_EXSTOP; + return 2; +} + +static int intel_pt_get_exstop_ip(struct intel_pt_pkt *packet) +{ + packet->type = INTEL_PT_EXSTOP_IP; + return 2; +} + +static int intel_pt_get_mwait(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 10) + return INTEL_PT_NEED_MORE_BYTES; + packet->type = INTEL_PT_MWAIT; + packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2)); + return 10; +} + +static int intel_pt_get_pwre(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 4) + return INTEL_PT_NEED_MORE_BYTES; + packet->type = INTEL_PT_PWRE; + memcpy_le64(&packet->payload, buf + 2, 2); + return 4; +} + +static int intel_pt_get_pwrx(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 7) + return INTEL_PT_NEED_MORE_BYTES; + packet->type = INTEL_PT_PWRX; + memcpy_le64(&packet->payload, buf + 2, 5); + return 7; +} + static int intel_pt_get_ext(const unsigned char *buf, size_t len, struct intel_pt_pkt *packet) { if (len < 2) return INTEL_PT_NEED_MORE_BYTES; + if ((buf[1] & 0x1f) == 0x12) + return intel_pt_get_ptwrite(buf, len, packet); + switch (buf[1]) { case 0xa3: /* Long TNT */ return intel_pt_get_long_tnt(buf, len, packet); @@ -244,6 +319,16 @@ static int intel_pt_get_ext(const unsigned char *buf, size_t len, return intel_pt_get_tma(buf, len, packet); case 0xC3: /* 3-byte header */ return intel_pt_get_3byte(buf, len, packet); + case 0x62: /* EXSTOP no IP */ + return intel_pt_get_exstop(packet); + case 0xE2: /* EXSTOP with IP */ + return intel_pt_get_exstop_ip(packet); + case 0xC2: /* MWAIT */ + return intel_pt_get_mwait(buf, len, packet); + case 0x22: /* PWRE */ + return intel_pt_get_pwre(buf, len, packet); + case 0xA2: /* PWRX */ + return intel_pt_get_pwrx(buf, len, packet); default: return INTEL_PT_BAD_PACKET; } @@ -522,6 +607,29 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, ret = snprintf(buf, buf_len, "%s 0x%llx (NR=%d)", name, payload, nr); return ret; + case INTEL_PT_PTWRITE: + return snprintf(buf, buf_len, "%s 0x%llx IP:0", name, payload); + case INTEL_PT_PTWRITE_IP: + return snprintf(buf, buf_len, "%s 0x%llx IP:1", name, payload); + case INTEL_PT_EXSTOP: + return snprintf(buf, buf_len, "%s IP:0", name); + case INTEL_PT_EXSTOP_IP: + return snprintf(buf, buf_len, "%s IP:1", name); + case INTEL_PT_MWAIT: + return snprintf(buf, buf_len, "%s 0x%llx Hints 0x%x Extensions 0x%x", + name, payload, (unsigned int)(payload & 0xff), + (unsigned int)((payload >> 32) & 0x3)); + case INTEL_PT_PWRE: + return snprintf(buf, buf_len, "%s 0x%llx HW:%u CState:%u Sub-CState:%u", + name, payload, !!(payload & 0x80), + (unsigned int)((payload >> 12) & 0xf), + (unsigned int)((payload >> 8) & 0xf)); + case INTEL_PT_PWRX: + return snprintf(buf, buf_len, "%s 0x%llx Last CState:%u Deepest CState:%u Wake Reason 0x%x", + name, payload, + (unsigned int)((payload >> 4) & 0xf), + (unsigned int)(payload & 0xf), + (unsigned int)((payload >> 8) & 0xf)); default: break; } diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h index 781bb79883bd..73ddc3a88d07 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h @@ -52,6 +52,13 @@ enum intel_pt_pkt_type { INTEL_PT_PIP, INTEL_PT_OVF, INTEL_PT_MNT, + INTEL_PT_PTWRITE, + INTEL_PT_PTWRITE_IP, + INTEL_PT_EXSTOP, + INTEL_PT_EXSTOP_IP, + INTEL_PT_MWAIT, + INTEL_PT_PWRE, + INTEL_PT_PWRX, }; struct intel_pt_pkt { diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt index 767be7c76034..12e377184ee4 100644 --- a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt +++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt @@ -1009,7 +1009,7 @@ GrpTable: Grp15 1: fxstor | RDGSBASE Ry (F3),(11B) 2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B) 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) -4: XSAVE +4: XSAVE | ptwrite Ey (F3),(11B) 5: XRSTOR | lfence (11B) 6: XSAVEOPT | clwb (66) | mfence (11B) 7: clflush | clflushopt (66) | sfence (11B) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 4c7718f87a08..b58f9fd1e2ee 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -81,7 +81,6 @@ struct intel_pt { bool sample_instructions; u64 instructions_sample_type; - u64 instructions_sample_period; u64 instructions_id; bool sample_branches; @@ -93,6 +92,18 @@ struct intel_pt { u64 transactions_sample_type; u64 transactions_id; + bool sample_ptwrites; + u64 ptwrites_sample_type; + u64 ptwrites_id; + + bool sample_pwr_events; + u64 pwr_events_sample_type; + u64 mwait_id; + u64 pwre_id; + u64 exstop_id; + u64 pwrx_id; + u64 cbr_id; + bool synth_needs_swap; u64 tsc_bit; @@ -103,6 +114,7 @@ struct intel_pt { u64 cyc_bit; u64 noretcomp_bit; unsigned max_non_turbo_ratio; + unsigned cbr2khz; unsigned long num_events; @@ -668,6 +680,19 @@ static bool intel_pt_return_compression(struct intel_pt *pt) return true; } +static bool intel_pt_branch_enable(struct intel_pt *pt) +{ + struct perf_evsel *evsel; + u64 config; + + evlist__for_each_entry(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->attr, &config) && + (config & 1) && !(config & 0x2000)) + return false; + } + return true; +} + static unsigned int intel_pt_mtc_period(struct intel_pt *pt) { struct perf_evsel *evsel; @@ -799,6 +824,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, params.walk_insn = intel_pt_walk_next_insn; params.data = ptq; params.return_compression = intel_pt_return_compression(pt); + params.branch_enable = intel_pt_branch_enable(pt); params.max_non_turbo_ratio = pt->max_non_turbo_ratio; params.mtc_period = intel_pt_mtc_period(pt); params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n; @@ -1044,6 +1070,36 @@ static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq) bs->nr += 1; } +static inline bool intel_pt_skip_event(struct intel_pt *pt) +{ + return pt->synth_opts.initial_skip && + pt->num_events++ < pt->synth_opts.initial_skip; +} + +static void intel_pt_prep_b_sample(struct intel_pt *pt, + struct intel_pt_queue *ptq, + union perf_event *event, + struct perf_sample *sample) +{ + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.misc = PERF_RECORD_MISC_USER; + event->sample.header.size = sizeof(struct perf_event_header); + + if (!pt->timeless_decoding) + sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc); + + sample->cpumode = PERF_RECORD_MISC_USER; + sample->ip = ptq->state->from_ip; + sample->pid = ptq->pid; + sample->tid = ptq->tid; + sample->addr = ptq->state->to_ip; + sample->period = 1; + sample->cpu = ptq->cpu; + sample->flags = ptq->flags; + sample->insn_len = ptq->insn_len; + memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); +} + static int intel_pt_inject_event(union perf_event *event, struct perf_sample *sample, u64 type, bool swapped) @@ -1052,9 +1108,35 @@ static int intel_pt_inject_event(union perf_event *event, return perf_event__synthesize_sample(event, type, 0, sample, swapped); } -static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) +static inline int intel_pt_opt_inject(struct intel_pt *pt, + union perf_event *event, + struct perf_sample *sample, u64 type) +{ + if (!pt->synth_opts.inject) + return 0; + + return intel_pt_inject_event(event, sample, type, pt->synth_needs_swap); +} + +static int intel_pt_deliver_synth_b_event(struct intel_pt *pt, + union perf_event *event, + struct perf_sample *sample, u64 type) { int ret; + + ret = intel_pt_opt_inject(pt, event, sample, type); + if (ret) + return ret; + + ret = perf_session__deliver_synth_event(pt->session, event, sample); + if (ret) + pr_err("Intel PT: failed to deliver event, error %d\n", ret); + + return ret; +} + +static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) +{ struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; struct perf_sample sample = { .ip = 0, }; @@ -1066,29 +1148,13 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) return 0; - if (pt->synth_opts.initial_skip && - pt->num_events++ < pt->synth_opts.initial_skip) + if (intel_pt_skip_event(pt)) return 0; - event->sample.header.type = PERF_RECORD_SAMPLE; - event->sample.header.misc = PERF_RECORD_MISC_USER; - event->sample.header.size = sizeof(struct perf_event_header); + intel_pt_prep_b_sample(pt, ptq, event, &sample); - if (!pt->timeless_decoding) - sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); - - sample.cpumode = PERF_RECORD_MISC_USER; - sample.ip = ptq->state->from_ip; - sample.pid = ptq->pid; - sample.tid = ptq->tid; - sample.addr = ptq->state->to_ip; sample.id = ptq->pt->branches_id; sample.stream_id = ptq->pt->branches_id; - sample.period = 1; - sample.cpu = ptq->cpu; - sample.flags = ptq->flags; - sample.insn_len = ptq->insn_len; - memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); /* * perf report cannot handle events without a branch stack when using @@ -1105,144 +1171,251 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) sample.branch_stack = (struct branch_stack *)&dummy_bs; } - if (pt->synth_opts.inject) { - ret = intel_pt_inject_event(event, &sample, - pt->branches_sample_type, - pt->synth_needs_swap); - if (ret) - return ret; + return intel_pt_deliver_synth_b_event(pt, event, &sample, + pt->branches_sample_type); +} + +static void intel_pt_prep_sample(struct intel_pt *pt, + struct intel_pt_queue *ptq, + union perf_event *event, + struct perf_sample *sample) +{ + intel_pt_prep_b_sample(pt, ptq, event, sample); + + if (pt->synth_opts.callchain) { + thread_stack__sample(ptq->thread, ptq->chain, + pt->synth_opts.callchain_sz, sample->ip); + sample->callchain = ptq->chain; } - ret = perf_session__deliver_synth_event(pt->session, event, &sample); - if (ret) - pr_err("Intel Processor Trace: failed to deliver branch event, error %d\n", - ret); + if (pt->synth_opts.last_branch) { + intel_pt_copy_last_branch_rb(ptq); + sample->branch_stack = ptq->last_branch; + } +} + +static inline int intel_pt_deliver_synth_event(struct intel_pt *pt, + struct intel_pt_queue *ptq, + union perf_event *event, + struct perf_sample *sample, + u64 type) +{ + int ret; + + ret = intel_pt_deliver_synth_b_event(pt, event, sample, type); + + if (pt->synth_opts.last_branch) + intel_pt_reset_last_branch_rb(ptq); return ret; } static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) { - int ret; struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; struct perf_sample sample = { .ip = 0, }; - if (pt->synth_opts.initial_skip && - pt->num_events++ < pt->synth_opts.initial_skip) + if (intel_pt_skip_event(pt)) return 0; - event->sample.header.type = PERF_RECORD_SAMPLE; - event->sample.header.misc = PERF_RECORD_MISC_USER; - event->sample.header.size = sizeof(struct perf_event_header); - - if (!pt->timeless_decoding) - sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); + intel_pt_prep_sample(pt, ptq, event, &sample); - sample.cpumode = PERF_RECORD_MISC_USER; - sample.ip = ptq->state->from_ip; - sample.pid = ptq->pid; - sample.tid = ptq->tid; - sample.addr = ptq->state->to_ip; sample.id = ptq->pt->instructions_id; sample.stream_id = ptq->pt->instructions_id; sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; - sample.cpu = ptq->cpu; - sample.flags = ptq->flags; - sample.insn_len = ptq->insn_len; - memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); ptq->last_insn_cnt = ptq->state->tot_insn_cnt; - if (pt->synth_opts.callchain) { - thread_stack__sample(ptq->thread, ptq->chain, - pt->synth_opts.callchain_sz, sample.ip); - sample.callchain = ptq->chain; - } + return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + pt->instructions_sample_type); +} - if (pt->synth_opts.last_branch) { - intel_pt_copy_last_branch_rb(ptq); - sample.branch_stack = ptq->last_branch; - } +static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; - if (pt->synth_opts.inject) { - ret = intel_pt_inject_event(event, &sample, - pt->instructions_sample_type, - pt->synth_needs_swap); - if (ret) - return ret; - } + if (intel_pt_skip_event(pt)) + return 0; - ret = perf_session__deliver_synth_event(pt->session, event, &sample); - if (ret) - pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n", - ret); + intel_pt_prep_sample(pt, ptq, event, &sample); - if (pt->synth_opts.last_branch) - intel_pt_reset_last_branch_rb(ptq); + sample.id = ptq->pt->transactions_id; + sample.stream_id = ptq->pt->transactions_id; - return ret; + return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + pt->transactions_sample_type); } -static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) +static void intel_pt_prep_p_sample(struct intel_pt *pt, + struct intel_pt_queue *ptq, + union perf_event *event, + struct perf_sample *sample) +{ + intel_pt_prep_sample(pt, ptq, event, sample); + + /* + * Zero IP is used to mean "trace start" but that is not the case for + * power or PTWRITE events with no IP, so clear the flags. + */ + if (!sample->ip) + sample->flags = 0; +} + +static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue *ptq) { - int ret; struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; struct perf_sample sample = { .ip = 0, }; + struct perf_synth_intel_ptwrite raw; - if (pt->synth_opts.initial_skip && - pt->num_events++ < pt->synth_opts.initial_skip) + if (intel_pt_skip_event(pt)) return 0; - event->sample.header.type = PERF_RECORD_SAMPLE; - event->sample.header.misc = PERF_RECORD_MISC_USER; - event->sample.header.size = sizeof(struct perf_event_header); + intel_pt_prep_p_sample(pt, ptq, event, &sample); - if (!pt->timeless_decoding) - sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); + sample.id = ptq->pt->ptwrites_id; + sample.stream_id = ptq->pt->ptwrites_id; - sample.cpumode = PERF_RECORD_MISC_USER; - sample.ip = ptq->state->from_ip; - sample.pid = ptq->pid; - sample.tid = ptq->tid; - sample.addr = ptq->state->to_ip; - sample.id = ptq->pt->transactions_id; - sample.stream_id = ptq->pt->transactions_id; - sample.period = 1; - sample.cpu = ptq->cpu; - sample.flags = ptq->flags; - sample.insn_len = ptq->insn_len; - memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); + raw.flags = 0; + raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP); + raw.payload = cpu_to_le64(ptq->state->ptw_payload); - if (pt->synth_opts.callchain) { - thread_stack__sample(ptq->thread, ptq->chain, - pt->synth_opts.callchain_sz, sample.ip); - sample.callchain = ptq->chain; - } + sample.raw_size = perf_synth__raw_size(raw); + sample.raw_data = perf_synth__raw_data(&raw); - if (pt->synth_opts.last_branch) { - intel_pt_copy_last_branch_rb(ptq); - sample.branch_stack = ptq->last_branch; - } + return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + pt->ptwrites_sample_type); +} - if (pt->synth_opts.inject) { - ret = intel_pt_inject_event(event, &sample, - pt->transactions_sample_type, - pt->synth_needs_swap); - if (ret) - return ret; - } +static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; + struct perf_synth_intel_cbr raw; + u32 flags; - ret = perf_session__deliver_synth_event(pt->session, event, &sample); - if (ret) - pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n", - ret); + if (intel_pt_skip_event(pt)) + return 0; - if (pt->synth_opts.last_branch) - intel_pt_reset_last_branch_rb(ptq); + intel_pt_prep_p_sample(pt, ptq, event, &sample); - return ret; + sample.id = ptq->pt->cbr_id; + sample.stream_id = ptq->pt->cbr_id; + + flags = (u16)ptq->state->cbr_payload | (pt->max_non_turbo_ratio << 16); + raw.flags = cpu_to_le32(flags); + raw.freq = cpu_to_le32(raw.cbr * pt->cbr2khz); + raw.reserved3 = 0; + + sample.raw_size = perf_synth__raw_size(raw); + sample.raw_data = perf_synth__raw_data(&raw); + + return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + pt->pwr_events_sample_type); +} + +static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; + struct perf_synth_intel_mwait raw; + + if (intel_pt_skip_event(pt)) + return 0; + + intel_pt_prep_p_sample(pt, ptq, event, &sample); + + sample.id = ptq->pt->mwait_id; + sample.stream_id = ptq->pt->mwait_id; + + raw.reserved = 0; + raw.payload = cpu_to_le64(ptq->state->mwait_payload); + + sample.raw_size = perf_synth__raw_size(raw); + sample.raw_data = perf_synth__raw_data(&raw); + + return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + pt->pwr_events_sample_type); +} + +static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; + struct perf_synth_intel_pwre raw; + + if (intel_pt_skip_event(pt)) + return 0; + + intel_pt_prep_p_sample(pt, ptq, event, &sample); + + sample.id = ptq->pt->pwre_id; + sample.stream_id = ptq->pt->pwre_id; + + raw.reserved = 0; + raw.payload = cpu_to_le64(ptq->state->pwre_payload); + + sample.raw_size = perf_synth__raw_size(raw); + sample.raw_data = perf_synth__raw_data(&raw); + + return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + pt->pwr_events_sample_type); +} + +static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; + struct perf_synth_intel_exstop raw; + + if (intel_pt_skip_event(pt)) + return 0; + + intel_pt_prep_p_sample(pt, ptq, event, &sample); + + sample.id = ptq->pt->exstop_id; + sample.stream_id = ptq->pt->exstop_id; + + raw.flags = 0; + raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP); + + sample.raw_size = perf_synth__raw_size(raw); + sample.raw_data = perf_synth__raw_data(&raw); + + return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + pt->pwr_events_sample_type); +} + +static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; + struct perf_synth_intel_pwrx raw; + + if (intel_pt_skip_event(pt)) + return 0; + + intel_pt_prep_p_sample(pt, ptq, event, &sample); + + sample.id = ptq->pt->pwrx_id; + sample.stream_id = ptq->pt->pwrx_id; + + raw.reserved = 0; + raw.payload = cpu_to_le64(ptq->state->pwrx_payload); + + sample.raw_size = perf_synth__raw_size(raw); + sample.raw_data = perf_synth__raw_data(&raw); + + return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + pt->pwr_events_sample_type); } static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, @@ -1296,6 +1469,10 @@ static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip) PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT)); } +#define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \ + INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT | \ + INTEL_PT_CBR_CHG) + static int intel_pt_sample(struct intel_pt_queue *ptq) { const struct intel_pt_state *state = ptq->state; @@ -1307,24 +1484,52 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) ptq->have_sample = false; - if (pt->sample_instructions && - (state->type & INTEL_PT_INSTRUCTION) && - (!pt->synth_opts.initial_skip || - pt->num_events++ >= pt->synth_opts.initial_skip)) { + if (pt->sample_pwr_events && (state->type & INTEL_PT_PWR_EVT)) { + if (state->type & INTEL_PT_CBR_CHG) { + err = intel_pt_synth_cbr_sample(ptq); + if (err) + return err; + } + if (state->type & INTEL_PT_MWAIT_OP) { + err = intel_pt_synth_mwait_sample(ptq); + if (err) + return err; + } + if (state->type & INTEL_PT_PWR_ENTRY) { + err = intel_pt_synth_pwre_sample(ptq); + if (err) + return err; + } + if (state->type & INTEL_PT_EX_STOP) { + err = intel_pt_synth_exstop_sample(ptq); + if (err) + return err; + } + if (state->type & INTEL_PT_PWR_EXIT) { + err = intel_pt_synth_pwrx_sample(ptq); + if (err) + return err; + } + } + + if (pt->sample_instructions && (state->type & INTEL_PT_INSTRUCTION)) { err = intel_pt_synth_instruction_sample(ptq); if (err) return err; } - if (pt->sample_transactions && - (state->type & INTEL_PT_TRANSACTION) && - (!pt->synth_opts.initial_skip || - pt->num_events++ >= pt->synth_opts.initial_skip)) { + if (pt->sample_transactions && (state->type & INTEL_PT_TRANSACTION)) { err = intel_pt_synth_transaction_sample(ptq); if (err) return err; } + if (pt->sample_ptwrites && (state->type & INTEL_PT_PTW)) { + err = intel_pt_synth_ptwrite_sample(ptq); + if (err) + return err; + } + if (!(state->type & INTEL_PT_BRANCH)) return 0; @@ -1925,36 +2130,65 @@ static int intel_pt_event_synth(struct perf_tool *tool, NULL); } -static int intel_pt_synth_event(struct perf_session *session, +static int intel_pt_synth_event(struct perf_session *session, const char *name, struct perf_event_attr *attr, u64 id) { struct intel_pt_synth intel_pt_synth; + int err; + + pr_debug("Synthesizing '%s' event with id %" PRIu64 " sample type %#" PRIx64 "\n", + name, id, (u64)attr->sample_type); memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth)); intel_pt_synth.session = session; - return perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1, - &id, intel_pt_event_synth); + err = perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1, + &id, intel_pt_event_synth); + if (err) + pr_err("%s: failed to synthesize '%s' event type\n", + __func__, name); + + return err; } -static int intel_pt_synth_events(struct intel_pt *pt, - struct perf_session *session) +static void intel_pt_set_event_name(struct perf_evlist *evlist, u64 id, + const char *name) { - struct perf_evlist *evlist = session->evlist; struct perf_evsel *evsel; - struct perf_event_attr attr; - bool found = false; - u64 id; - int err; evlist__for_each_entry(evlist, evsel) { - if (evsel->attr.type == pt->pmu_type && evsel->ids) { - found = true; + if (evsel->id && evsel->id[0] == id) { + if (evsel->name) + zfree(&evsel->name); + evsel->name = strdup(name); break; } } +} - if (!found) { +static struct perf_evsel *intel_pt_evsel(struct intel_pt *pt, + struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->attr.type == pt->pmu_type && evsel->ids) + return evsel; + } + + return NULL; +} + +static int intel_pt_synth_events(struct intel_pt *pt, + struct perf_session *session) +{ + struct perf_evlist *evlist = session->evlist; + struct perf_evsel *evsel = intel_pt_evsel(pt, evlist); + struct perf_event_attr attr; + u64 id; + int err; + + if (!evsel) { pr_debug("There are no selected events with Intel Processor Trace data\n"); return 0; } @@ -1983,6 +2217,25 @@ static int intel_pt_synth_events(struct intel_pt *pt, if (!id) id = 1; + if (pt->synth_opts.branches) { + attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; + attr.sample_period = 1; + attr.sample_type |= PERF_SAMPLE_ADDR; + err = intel_pt_synth_event(session, "branches", &attr, id); + if (err) + return err; + pt->sample_branches = true; + pt->branches_sample_type = attr.sample_type; + pt->branches_id = id; + id += 1; + attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR; + } + + if (pt->synth_opts.callchain) + attr.sample_type |= PERF_SAMPLE_CALLCHAIN; + if (pt->synth_opts.last_branch) + attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + if (pt->synth_opts.instructions) { attr.config = PERF_COUNT_HW_INSTRUCTIONS; if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS) @@ -1990,70 +2243,90 @@ static int intel_pt_synth_events(struct intel_pt *pt, intel_pt_ns_to_ticks(pt, pt->synth_opts.period); else attr.sample_period = pt->synth_opts.period; - pt->instructions_sample_period = attr.sample_period; - if (pt->synth_opts.callchain) - attr.sample_type |= PERF_SAMPLE_CALLCHAIN; - if (pt->synth_opts.last_branch) - attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; - pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", - id, (u64)attr.sample_type); - err = intel_pt_synth_event(session, &attr, id); - if (err) { - pr_err("%s: failed to synthesize 'instructions' event type\n", - __func__); + err = intel_pt_synth_event(session, "instructions", &attr, id); + if (err) return err; - } pt->sample_instructions = true; pt->instructions_sample_type = attr.sample_type; pt->instructions_id = id; id += 1; } + attr.sample_type &= ~(u64)PERF_SAMPLE_PERIOD; + attr.sample_period = 1; + if (pt->synth_opts.transactions) { attr.config = PERF_COUNT_HW_INSTRUCTIONS; - attr.sample_period = 1; - if (pt->synth_opts.callchain) - attr.sample_type |= PERF_SAMPLE_CALLCHAIN; - if (pt->synth_opts.last_branch) - attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; - pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", - id, (u64)attr.sample_type); - err = intel_pt_synth_event(session, &attr, id); - if (err) { - pr_err("%s: failed to synthesize 'transactions' event type\n", - __func__); + err = intel_pt_synth_event(session, "transactions", &attr, id); + if (err) return err; - } pt->sample_transactions = true; + pt->transactions_sample_type = attr.sample_type; pt->transactions_id = id; + intel_pt_set_event_name(evlist, id, "transactions"); id += 1; - evlist__for_each_entry(evlist, evsel) { - if (evsel->id && evsel->id[0] == pt->transactions_id) { - if (evsel->name) - zfree(&evsel->name); - evsel->name = strdup("transactions"); - break; - } - } } - if (pt->synth_opts.branches) { - attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; - attr.sample_period = 1; - attr.sample_type |= PERF_SAMPLE_ADDR; - attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN; - attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK; - pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n", - id, (u64)attr.sample_type); - err = intel_pt_synth_event(session, &attr, id); - if (err) { - pr_err("%s: failed to synthesize 'branches' event type\n", - __func__); + attr.type = PERF_TYPE_SYNTH; + attr.sample_type |= PERF_SAMPLE_RAW; + + if (pt->synth_opts.ptwrites) { + attr.config = PERF_SYNTH_INTEL_PTWRITE; + err = intel_pt_synth_event(session, "ptwrite", &attr, id); + if (err) return err; - } - pt->sample_branches = true; - pt->branches_sample_type = attr.sample_type; - pt->branches_id = id; + pt->sample_ptwrites = true; + pt->ptwrites_sample_type = attr.sample_type; + pt->ptwrites_id = id; + intel_pt_set_event_name(evlist, id, "ptwrite"); + id += 1; + } + + if (pt->synth_opts.pwr_events) { + pt->sample_pwr_events = true; + pt->pwr_events_sample_type = attr.sample_type; + + attr.config = PERF_SYNTH_INTEL_CBR; + err = intel_pt_synth_event(session, "cbr", &attr, id); + if (err) + return err; + pt->cbr_id = id; + intel_pt_set_event_name(evlist, id, "cbr"); + id += 1; + } + + if (pt->synth_opts.pwr_events && (evsel->attr.config & 0x10)) { + attr.config = PERF_SYNTH_INTEL_MWAIT; + err = intel_pt_synth_event(session, "mwait", &attr, id); + if (err) + return err; + pt->mwait_id = id; + intel_pt_set_event_name(evlist, id, "mwait"); + id += 1; + + attr.config = PERF_SYNTH_INTEL_PWRE; + err = intel_pt_synth_event(session, "pwre", &attr, id); + if (err) + return err; + pt->pwre_id = id; + intel_pt_set_event_name(evlist, id, "pwre"); + id += 1; + + attr.config = PERF_SYNTH_INTEL_EXSTOP; + err = intel_pt_synth_event(session, "exstop", &attr, id); + if (err) + return err; + pt->exstop_id = id; + intel_pt_set_event_name(evlist, id, "exstop"); + id += 1; + + attr.config = PERF_SYNTH_INTEL_PWRX; + err = intel_pt_synth_event(session, "pwrx", &attr, id); + if (err) + return err; + pt->pwrx_id = id; + intel_pt_set_event_name(evlist, id, "pwrx"); + id += 1; } pt->synth_needs_swap = evsel->needs_swap; @@ -2322,6 +2595,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event, intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq); intel_pt_log("Maximum non-turbo ratio %u\n", pt->max_non_turbo_ratio); + pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000; } if (pt->synth_opts.calls) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index d97e014c3df3..5de2b86b9880 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -572,16 +572,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine, if (dso == NULL) goto out_unlock; - if (machine__is_host(machine)) - dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE; - else - dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE; - - /* _KMODULE_COMP should be next to _KMODULE */ - if (m->kmod && m->comp) - dso->symtab_type++; - - dso__set_short_name(dso, strdup(m->name), true); + dso__set_module_info(dso, m, machine); dso__set_long_name(dso, strdup(filename), true); } @@ -1218,10 +1209,12 @@ int machine__create_kernel_maps(struct machine *machine) */ map_groups__fixup_end(&machine->kmaps); - if (machine__get_running_kernel_start(machine, &name, &addr)) { - } else if (maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) { - machine__destroy_kernel_maps(machine); - return -1; + if (!machine__get_running_kernel_start(machine, &name, &addr)) { + if (name && + maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) { + machine__destroy_kernel_maps(machine); + return -1; + } } return 0; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index ea7f450dc609..389e9729331f 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -2,6 +2,7 @@ #define __PMU_H #include <linux/bitmap.h> +#include <linux/compiler.h> #include <linux/perf_event.h> #include <stdbool.h> #include "evsel.h" @@ -83,8 +84,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet, bool long_desc, bool details_flag); bool pmu_have_event(const char *pname, const char *name); -int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, - ...) __attribute__((format(scanf, 3, 4))); +int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...) __scanf(3, 4); int perf_pmu__test(void); diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 84e7e698411e..a2670e9d652d 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -619,7 +619,7 @@ static int post_process_probe_trace_point(struct probe_trace_point *tp, struct map *map, unsigned long offs) { struct symbol *sym; - u64 addr = tp->address + tp->offset - offs; + u64 addr = tp->address - offs; sym = map__find_symbol(map, addr); if (!sym) diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 373842656fb6..5812947418dd 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -1,6 +1,7 @@ #ifndef _PROBE_EVENT_H #define _PROBE_EVENT_H +#include <linux/compiler.h> #include <stdbool.h> #include "intlist.h" @@ -171,8 +172,7 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev, struct symbol *sym); /* If there is no space to write, returns -E2BIG. */ -int e_snprintf(char *str, size_t size, const char *format, ...) - __attribute__((format(printf, 3, 4))); +int e_snprintf(char *str, size_t size, const char *format, ...) __printf(3, 4); /* Maximum index number of event-name postfix */ #define MAX_EVENT_INDEX 1024 diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 9d92af7d0718..57b7a00e6f16 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -28,6 +28,7 @@ #include <stdbool.h> #include <errno.h> #include <linux/bitmap.h> +#include <linux/compiler.h> #include <linux/time64.h> #include "../../perf.h" @@ -84,7 +85,7 @@ struct tables { static struct tables tables_global; -static void handler_call_die(const char *handler_name) NORETURN; +static void handler_call_die(const char *handler_name) __noreturn; static void handler_call_die(const char *handler_name) { PyErr_Print(); @@ -1219,7 +1220,7 @@ static int python_generate_script(struct pevent *pevent, const char *outfile) fprintf(ofp, "# be retrieved using Python functions of the form " "common_*(context).\n"); - fprintf(ofp, "# See the perf-trace-python Documentation for the list " + fprintf(ofp, "# See the perf-script-python Documentation for the list " "of available functions.\n\n"); fprintf(ofp, "import os\n"); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 7dc1096264c5..d19c40a81040 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2035,7 +2035,7 @@ int perf_session__cpu_bitmap(struct perf_session *session, if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) { pr_err("File does not contain CPU events. " - "Remove -c option to proceed.\n"); + "Remove -C option to proceed.\n"); return -1; } } diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 5762ae4e9e91..8b327c955a4f 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2532,12 +2532,12 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str, ret = sort_dimension__add(list, tok, evlist, level); if (ret == -EINVAL) { if (!cacheline_size && !strncasecmp(tok, "dcacheline", strlen(tok))) - error("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system"); + pr_err("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system"); else - error("Invalid --sort key: `%s'", tok); + pr_err("Invalid --sort key: `%s'", tok); break; } else if (ret == -ESRCH) { - error("Unknown --sort key: `%s'", tok); + pr_err("Unknown --sort key: `%s'", tok); break; } } @@ -2594,7 +2594,7 @@ static int setup_sort_order(struct perf_evlist *evlist) return 0; if (sort_order[1] == '\0') { - error("Invalid --sort key: `+'"); + pr_err("Invalid --sort key: `+'"); return -EINVAL; } @@ -2604,7 +2604,7 @@ static int setup_sort_order(struct perf_evlist *evlist) */ if (asprintf(&new_sort_order, "%s,%s", get_default_sort_order(evlist), sort_order + 1) < 0) { - error("Not enough memory to set up --sort"); + pr_err("Not enough memory to set up --sort"); return -ENOMEM; } @@ -2668,7 +2668,7 @@ static int __setup_sorting(struct perf_evlist *evlist) str = strdup(sort_keys); if (str == NULL) { - error("Not enough memory to setup sort keys"); + pr_err("Not enough memory to setup sort keys"); return -ENOMEM; } @@ -2678,7 +2678,7 @@ static int __setup_sorting(struct perf_evlist *evlist) if (!is_strict_order(field_order)) { str = setup_overhead(str); if (str == NULL) { - error("Not enough memory to setup overhead keys"); + pr_err("Not enough memory to setup overhead keys"); return -ENOMEM; } } @@ -2834,10 +2834,10 @@ static int setup_output_list(struct perf_hpp_list *list, char *str) tok; tok = strtok_r(NULL, ", ", &tmp)) { ret = output_field_add(list, tok); if (ret == -EINVAL) { - error("Invalid --fields key: `%s'", tok); + pr_err("Invalid --fields key: `%s'", tok); break; } else if (ret == -ESRCH) { - error("Unknown --fields key: `%s'", tok); + pr_err("Unknown --fields key: `%s'", tok); break; } } @@ -2877,7 +2877,7 @@ static int __setup_output_field(void) strp = str = strdup(field_order); if (str == NULL) { - error("Not enough memory to setup output fields"); + pr_err("Not enough memory to setup output fields"); return -ENOMEM; } @@ -2885,7 +2885,7 @@ static int __setup_output_field(void) strp++; if (!strlen(strp)) { - error("Invalid --fields key: `+'"); + pr_err("Invalid --fields key: `+'"); goto out; } diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index ac10cc675d39..719d6cb86952 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -44,6 +44,8 @@ static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS]; static struct rblist runtime_saved_values; static bool have_frontend_stalled; @@ -157,6 +159,8 @@ void perf_stat__reset_shadow_stats(void) memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued)); memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles)); memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles)); + memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats)); + memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats)); next = rb_first(&runtime_saved_values.entries); while (next) { @@ -217,6 +221,10 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]); + else if (perf_stat_evsel__is(counter, SMI_NUM)) + update_stats(&runtime_smi_num_stats[ctx][cpu], count[0]); + else if (perf_stat_evsel__is(counter, APERF)) + update_stats(&runtime_aperf_stats[ctx][cpu], count[0]); if (counter->collect_stat) { struct saved_value *v = saved_value_lookup(counter, cpu, ctx, @@ -592,6 +600,29 @@ static double td_be_bound(int ctx, int cpu) return sanitize_val(1.0 - sum); } +static void print_smi_cost(int cpu, struct perf_evsel *evsel, + struct perf_stat_output_ctx *out) +{ + double smi_num, aperf, cycles, cost = 0.0; + int ctx = evsel_context(evsel); + const char *color = NULL; + + smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]); + aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]); + cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]); + + if ((cycles == 0) || (aperf == 0)) + return; + + if (smi_num) + cost = (aperf - cycles) / aperf * 100.00; + + if (cost > 10) + color = PERF_COLOR_RED; + out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost); + out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num); +} + void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out) @@ -825,6 +856,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, } snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); + } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { + print_smi_cost(cpu, evsel, out); } else { print_metric(ctxp, NULL, NULL, NULL, 0); } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index c58174443dc1..53b9a994a3dc 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -86,6 +86,8 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = { ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired), ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles), ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles), + ID(SMI_NUM, msr/smi/), + ID(APERF, msr/aperf/), }; #undef ID diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 0a65ae23f495..7522bf10b03e 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -22,6 +22,8 @@ enum perf_stat_evsel_id { PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED, PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES, PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES, + PERF_STAT_EVSEL_ID__SMI_NUM, + PERF_STAT_EVSEL_ID__APERF, PERF_STAT_EVSEL_ID__MAX, }; diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h index 318424ea561d..802d743378af 100644 --- a/tools/perf/util/strbuf.h +++ b/tools/perf/util/strbuf.h @@ -42,6 +42,7 @@ #include <stdarg.h> #include <stddef.h> #include <string.h> +#include <linux/compiler.h> #include <sys/types.h> extern char strbuf_slopbuf[]; @@ -85,8 +86,7 @@ static inline int strbuf_addstr(struct strbuf *sb, const char *s) { return strbuf_add(sb, s, strlen(s)); } -__attribute__((format(printf,2,3))) -int strbuf_addf(struct strbuf *sb, const char *fmt, ...); +int strbuf_addf(struct strbuf *sb, const char *fmt, ...) __printf(2, 3); /* XXX: if read fails, any partial read is undone */ ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index e7ee47f7377a..502505cf236a 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -637,43 +637,6 @@ static int dso__swap_init(struct dso *dso, unsigned char eidata) return 0; } -static int decompress_kmodule(struct dso *dso, const char *name, - enum dso_binary_type type) -{ - int fd = -1; - char tmpbuf[] = "/tmp/perf-kmod-XXXXXX"; - struct kmod_path m; - - if (type != DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP && - type != DSO_BINARY_TYPE__GUEST_KMODULE_COMP && - type != DSO_BINARY_TYPE__BUILD_ID_CACHE) - return -1; - - if (type == DSO_BINARY_TYPE__BUILD_ID_CACHE) - name = dso->long_name; - - if (kmod_path__parse_ext(&m, name) || !m.comp) - return -1; - - fd = mkstemp(tmpbuf); - if (fd < 0) { - dso->load_errno = errno; - goto out; - } - - if (!decompress_to_file(m.ext, name, fd)) { - dso->load_errno = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE; - close(fd); - fd = -1; - } - - unlink(tmpbuf); - -out: - free(m.ext); - return fd; -} - bool symsrc__possibly_runtime(struct symsrc *ss) { return ss->dynsym || ss->opdsec; @@ -705,9 +668,11 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, int fd; if (dso__needs_decompress(dso)) { - fd = decompress_kmodule(dso, name, type); + fd = dso__decompress_kmodule_fd(dso, name); if (fd < 0) return -1; + + type = dso->symtab_type; } else { fd = open(name, O_RDONLY); if (fd < 0) { diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 8f2b068ff756..e7a98dbd2aed 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1562,10 +1562,6 @@ int dso__load(struct dso *dso, struct map *map) if (!runtime_ss && syms_ss) runtime_ss = syms_ss; - if (syms_ss && syms_ss->type == DSO_BINARY_TYPE__BUILD_ID_CACHE) - if (dso__build_id_is_kmod(dso, name, PATH_MAX)) - kmod = true; - if (syms_ss) ret = dso__load_sym(dso, map, syms_ss, runtime_ss, kmod); else diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 746bbee645d9..e0a6e9a6a053 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -24,7 +24,7 @@ #include <errno.h> #include "../perf.h" -#include "util.h" +#include "debug.h" #include "trace-event.h" #include "sane_ctype.h" @@ -150,7 +150,7 @@ void parse_ftrace_printk(struct pevent *pevent, while (line) { addr_str = strtok_r(line, ":", &fmt); if (!addr_str) { - warning("printk format with empty entry"); + pr_warning("printk format with empty entry"); break; } addr = strtoull(addr_str, NULL, 16); diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 943a06291587..7755a5e0fe5e 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -39,6 +39,14 @@ static int __report_module(struct addr_location *al, u64 ip, return 0; mod = dwfl_addrmodule(ui->dwfl, ip); + if (mod) { + Dwarf_Addr s; + + dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL); + if (s != al->map->start) + mod = 0; + } + if (!mod) mod = dwfl_report_elf(ui->dwfl, dso->short_name, dso->long_name, -1, al->map->start, @@ -170,6 +178,14 @@ frame_callback(Dwfl_Frame *state, void *arg) Dwarf_Addr pc; bool isactivation; + if (!dwfl_frame_pc(state, &pc, NULL)) { + pr_err("%s", dwfl_errmsg(-1)); + return DWARF_CB_ABORT; + } + + // report the module before we query for isactivation + report_module(pc, ui); + if (!dwfl_frame_pc(state, &pc, &isactivation)) { pr_err("%s", dwfl_errmsg(-1)); return DWARF_CB_ABORT; @@ -224,7 +240,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, err = dwfl_getthread_frames(ui->dwfl, thread->tid, frame_callback, ui); - if (err && !ui->max_stack) + if (err && ui->max_stack != max_stack) err = 0; /* diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c index 996046a66fe5..6cc9d9888ce0 100644 --- a/tools/perf/util/usage.c +++ b/tools/perf/util/usage.c @@ -9,75 +9,17 @@ #include "util.h" #include "debug.h" -static void report(const char *prefix, const char *err, va_list params) -{ - char msg[1024]; - vsnprintf(msg, sizeof(msg), err, params); - fprintf(stderr, " %s%s\n", prefix, msg); -} - -static NORETURN void usage_builtin(const char *err) +static __noreturn void usage_builtin(const char *err) { fprintf(stderr, "\n Usage: %s\n", err); exit(129); } -static NORETURN void die_builtin(const char *err, va_list params) -{ - report(" Fatal: ", err, params); - exit(128); -} - -static void error_builtin(const char *err, va_list params) -{ - report(" Error: ", err, params); -} - -static void warn_builtin(const char *warn, va_list params) -{ - report(" Warning: ", warn, params); -} - /* If we are in a dlopen()ed .so write to a global variable would segfault * (ugh), so keep things static. */ -static void (*usage_routine)(const char *err) NORETURN = usage_builtin; -static void (*error_routine)(const char *err, va_list params) = error_builtin; -static void (*warn_routine)(const char *err, va_list params) = warn_builtin; - -void set_warning_routine(void (*routine)(const char *err, va_list params)) -{ - warn_routine = routine; -} +static void (*usage_routine)(const char *err) __noreturn = usage_builtin; void usage(const char *err) { usage_routine(err); } - -void die(const char *err, ...) -{ - va_list params; - - va_start(params, err); - die_builtin(err, params); - va_end(params); -} - -int error(const char *err, ...) -{ - va_list params; - - va_start(params, err); - error_routine(err, params); - va_end(params); - return -1; -} - -void warning(const char *warn, ...) -{ - va_list params; - - va_start(params, warn); - warn_routine(warn, params); - va_end(params); -} diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 28c9f335006c..988111e0bab5 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -343,43 +343,6 @@ int perf_event_paranoid(void) return value; } - -bool find_process(const char *name) -{ - size_t len = strlen(name); - DIR *dir; - struct dirent *d; - int ret = -1; - - dir = opendir(procfs__mountpoint()); - if (!dir) - return false; - - /* Walk through the directory. */ - while (ret && (d = readdir(dir)) != NULL) { - char path[PATH_MAX]; - char *data; - size_t size; - - if ((d->d_type != DT_DIR) || - !strcmp(".", d->d_name) || - !strcmp("..", d->d_name)) - continue; - - scnprintf(path, sizeof(path), "%s/%s/comm", - procfs__mountpoint(), d->d_name); - - if (filename__read_str(path, &data, &size)) - continue; - - ret = strncmp(name, data, len); - free(data); - } - - closedir(dir); - return ret ? false : true; -} - static int fetch_ubuntu_kernel_version(unsigned int *puint) { @@ -387,8 +350,12 @@ fetch_ubuntu_kernel_version(unsigned int *puint) size_t line_len = 0; char *ptr, *line = NULL; int version, patchlevel, sublevel, err; - FILE *vsig = fopen("/proc/version_signature", "r"); + FILE *vsig; + + if (!puint) + return 0; + vsig = fopen("/proc/version_signature", "r"); if (!vsig) { pr_debug("Open /proc/version_signature failed: %s\n", strerror(errno)); @@ -418,8 +385,7 @@ fetch_ubuntu_kernel_version(unsigned int *puint) goto errout; } - if (puint) - *puint = (version << 16) + (patchlevel << 8) + sublevel; + *puint = (version << 16) + (patchlevel << 8) + sublevel; err = 0; errout: free(line); @@ -446,6 +412,9 @@ fetch_kernel_version(unsigned int *puint, char *str, str[str_size - 1] = '\0'; } + if (!puint || int_ver_ready) + return 0; + err = sscanf(utsname.release, "%d.%d.%d", &version, &patchlevel, &sublevel); @@ -455,8 +424,7 @@ fetch_kernel_version(unsigned int *puint, char *str, return -1; } - if (puint && !int_ver_ready) - *puint = (version << 16) + (patchlevel << 8) + sublevel; + *puint = (version << 16) + (patchlevel << 8) + sublevel; return 0; } diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 5dfb9bb6482d..2c9e58a45310 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -1,7 +1,6 @@ #ifndef GIT_COMPAT_UTIL_H #define GIT_COMPAT_UTIL_H -#define _ALL_SOURCE 1 #define _BSD_SOURCE 1 /* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */ #define _DEFAULT_SOURCE 1 @@ -11,24 +10,12 @@ #include <stddef.h> #include <stdlib.h> #include <stdarg.h> +#include <linux/compiler.h> #include <linux/types.h> -#ifdef __GNUC__ -#define NORETURN __attribute__((__noreturn__)) -#else -#define NORETURN -#ifndef __attribute__ -#define __attribute__(x) -#endif -#endif - /* General helper functions */ -void usage(const char *err) NORETURN; -void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2))); -int error(const char *err, ...) __attribute__((format (printf, 1, 2))); -void warning(const char *err, ...) __attribute__((format (printf, 1, 2))); - -void set_warning_routine(void (*routine)(const char *err, va_list params)); +void usage(const char *err) __noreturn; +void die(const char *err, ...) __noreturn __printf(1, 2); static inline void *zalloc(size_t size) { @@ -57,8 +44,6 @@ int hex2u64(const char *ptr, u64 *val); extern unsigned int page_size; extern int cacheline_size; -bool find_process(const char *name); - int fetch_kernel_version(unsigned int *puint, char *str, size_t str_sz); #define KVER_VERSION(x) (((x) >> 16) & 0xff) |