diff options
Diffstat (limited to 'tools/perf/util')
93 files changed, 11818 insertions, 891 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 601d11440596..e912856cc4e5 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -14,6 +14,7 @@ libperf-y += find_next_bit.o libperf-y += help.o libperf-y += kallsyms.o libperf-y += levenshtein.o +libperf-y += llvm-utils.o libperf-y += parse-options.o libperf-y += parse-events.o libperf-y += path.o @@ -67,6 +68,7 @@ libperf-y += target.o libperf-y += rblist.o libperf-y += intlist.o libperf-y += vdso.o +libperf-y += counts.o libperf-y += stat.o libperf-y += stat-shadow.o libperf-y += record.o @@ -76,9 +78,13 @@ libperf-$(CONFIG_X86) += tsc.o libperf-y += cloexec.o libperf-y += thread-stack.o libperf-$(CONFIG_AUXTRACE) += auxtrace.o +libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ +libperf-$(CONFIG_AUXTRACE) += intel-pt.o +libperf-$(CONFIG_AUXTRACE) += intel-bts.o libperf-y += parse-branch-options.o libperf-$(CONFIG_LIBELF) += symbol-elf.o +libperf-$(CONFIG_LIBELF) += probe-file.o libperf-$(CONFIG_LIBELF) += probe-event.o ifndef CONFIG_LIBELF @@ -143,6 +149,6 @@ $(OUTPUT)util/rbtree.o: ../lib/rbtree.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) -$(OUTPUT)util/hweight.o: ../../lib/hweight.c FORCE +$(OUTPUT)util/hweight.o: ../lib/hweight.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 03b7bc70eb66..d1eece70b84d 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -473,17 +473,73 @@ int symbol__alloc_hist(struct symbol *sym) return 0; } +/* The cycles histogram is lazily allocated. */ +static int symbol__alloc_hist_cycles(struct symbol *sym) +{ + struct annotation *notes = symbol__annotation(sym); + const size_t size = symbol__size(sym); + + notes->src->cycles_hist = calloc(size, sizeof(struct cyc_hist)); + if (notes->src->cycles_hist == NULL) + return -1; + return 0; +} + void symbol__annotate_zero_histograms(struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); pthread_mutex_lock(¬es->lock); - if (notes->src != NULL) + if (notes->src != NULL) { memset(notes->src->histograms, 0, notes->src->nr_histograms * notes->src->sizeof_sym_hist); + if (notes->src->cycles_hist) + memset(notes->src->cycles_hist, 0, + symbol__size(sym) * sizeof(struct cyc_hist)); + } pthread_mutex_unlock(¬es->lock); } +static int __symbol__account_cycles(struct annotation *notes, + u64 start, + unsigned offset, unsigned cycles, + unsigned have_start) +{ + struct cyc_hist *ch; + + ch = notes->src->cycles_hist; + /* + * For now we can only account one basic block per + * final jump. But multiple could be overlapping. + * Always account the longest one. So when + * a shorter one has been already seen throw it away. + * + * We separately always account the full cycles. + */ + ch[offset].num_aggr++; + ch[offset].cycles_aggr += cycles; + + if (!have_start && ch[offset].have_start) + return 0; + if (ch[offset].num) { + if (have_start && (!ch[offset].have_start || + ch[offset].start > start)) { + ch[offset].have_start = 0; + ch[offset].cycles = 0; + ch[offset].num = 0; + if (ch[offset].reset < 0xffff) + ch[offset].reset++; + } else if (have_start && + ch[offset].start < start) + return 0; + } + ch[offset].have_start = have_start; + ch[offset].start = start; + ch[offset].cycles += cycles; + ch[offset].num++; + return 0; +} + static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, struct annotation *notes, int evidx, u64 addr) { @@ -506,7 +562,7 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, return 0; } -static struct annotation *symbol__get_annotation(struct symbol *sym) +static struct annotation *symbol__get_annotation(struct symbol *sym, bool cycles) { struct annotation *notes = symbol__annotation(sym); @@ -514,6 +570,10 @@ static struct annotation *symbol__get_annotation(struct symbol *sym) if (symbol__alloc_hist(sym) < 0) return NULL; } + if (!notes->src->cycles_hist && cycles) { + if (symbol__alloc_hist_cycles(sym) < 0) + return NULL; + } return notes; } @@ -524,12 +584,73 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map, if (sym == NULL) return 0; - notes = symbol__get_annotation(sym); + notes = symbol__get_annotation(sym, false); if (notes == NULL) return -ENOMEM; return __symbol__inc_addr_samples(sym, map, notes, evidx, addr); } +static int symbol__account_cycles(u64 addr, u64 start, + struct symbol *sym, unsigned cycles) +{ + struct annotation *notes; + unsigned offset; + + if (sym == NULL) + return 0; + notes = symbol__get_annotation(sym, true); + if (notes == NULL) + return -ENOMEM; + if (addr < sym->start || addr >= sym->end) + return -ERANGE; + + if (start) { + if (start < sym->start || start >= sym->end) + return -ERANGE; + if (start >= addr) + start = 0; + } + offset = addr - sym->start; + return __symbol__account_cycles(notes, + start ? start - sym->start : 0, + offset, cycles, + !!start); +} + +int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, + struct addr_map_symbol *start, + unsigned cycles) +{ + u64 saddr = 0; + int err; + + if (!cycles) + return 0; + + /* + * Only set start when IPC can be computed. We can only + * compute it when the basic block is completely in a single + * function. + * Special case the case when the jump is elsewhere, but + * it starts on the function start. + */ + if (start && + (start->sym == ams->sym || + (ams->sym && + start->addr == ams->sym->start + ams->map->start))) + saddr = start->al_addr; + if (saddr == 0) + pr_debug2("BB with bad start: addr %"PRIx64" start %"PRIx64" sym %"PRIx64" saddr %"PRIx64"\n", + ams->addr, + start ? start->addr : 0, + ams->sym ? ams->sym->start + ams->map->start : 0, + saddr); + err = symbol__account_cycles(ams->al_addr, saddr, ams->sym, cycles); + if (err) + pr_debug2("account_cycles failed %d\n", err); + return err; +} + int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx) { return symbol__inc_addr_samples(ams->sym, ams->map, evidx, ams->al_addr); @@ -1005,6 +1126,7 @@ fallback: dso->annotate_warned = 1; pr_err("Can't annotate %s:\n\n" "No vmlinux file%s\nwas found in the path.\n\n" + "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability.\n\n" "Please use:\n\n" " perf buildid-cache -vu vmlinux\n\n" "or:\n\n" diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 7e78e6c27078..e9996092a093 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -59,6 +59,8 @@ struct disasm_line { char *name; struct ins *ins; int line_nr; + float ipc; + u64 cycles; struct ins_operands ops; }; @@ -79,6 +81,17 @@ struct sym_hist { u64 addr[0]; }; +struct cyc_hist { + u64 start; + u64 cycles; + u64 cycles_aggr; + u32 num; + u32 num_aggr; + u8 have_start; + /* 1 byte padding */ + u16 reset; +}; + struct source_line_samples { double percent; double percent_sum; @@ -97,6 +110,7 @@ struct source_line { * @histogram: Array of addr hit histograms per event being monitored * @lines: If 'print_lines' is specified, per source code line percentages * @source: source parsed from a disassembler like objdump -dS + * @cyc_hist: Average cycles per basic block * * lines is allocated, percentages calculated and all sorted by percentage * when the annotation is about to be presented, so the percentages are for @@ -109,6 +123,7 @@ struct annotated_source { struct source_line *lines; int nr_histograms; int sizeof_sym_hist; + struct cyc_hist *cycles_hist; struct sym_hist histograms[0]; }; @@ -130,6 +145,10 @@ static inline struct annotation *symbol__annotation(struct symbol *sym) int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx); +int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, + struct addr_map_symbol *start, + unsigned cycles); + int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr); int symbol__alloc_hist(struct symbol *sym); diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 7e7405c9b936..a980e7c50ee0 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -47,17 +47,15 @@ #include "debug.h" #include "parse-options.h" +#include "intel-pt.h" +#include "intel-bts.h" + int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, struct auxtrace_mmap_params *mp, void *userpg, int fd) { struct perf_event_mmap_page *pc = userpg; -#if BITS_PER_LONG != 64 && !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT) - pr_err("Cannot use AUX area tracing mmaps\n"); - return -1; -#endif - WARN_ONCE(mm->base, "Uninitialized auxtrace_mmap\n"); mm->userpg = userpg; @@ -73,6 +71,11 @@ int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, return 0; } +#if BITS_PER_LONG != 64 && !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT) + pr_err("Cannot use AUX area tracing mmaps\n"); + return -1; +#endif + pc->aux_offset = mp->offset; pc->aux_size = mp->len; @@ -876,7 +879,7 @@ static bool auxtrace__dont_decode(struct perf_session *session) int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_session *session __maybe_unused) + struct perf_session *session) { enum auxtrace_type type = event->auxtrace_info.type; @@ -884,6 +887,10 @@ int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, fprintf(stdout, " type: %u\n", type); switch (type) { + case PERF_AUXTRACE_INTEL_PT: + return intel_pt_process_auxtrace_info(event, session); + case PERF_AUXTRACE_INTEL_BTS: + return intel_bts_process_auxtrace_info(event, session); case PERF_AUXTRACE_UNKNOWN: default: return -EINVAL; @@ -942,6 +949,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, struct itrace_synth_opts *synth_opts = opt->value; const char *p; char *endptr; + bool period_type_set = false; synth_opts->set = true; @@ -970,10 +978,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, case 'i': synth_opts->period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS; + period_type_set = true; break; case 't': synth_opts->period_type = PERF_ITRACE_PERIOD_TICKS; + period_type_set = true; break; case 'm': synth_opts->period *= 1000; @@ -986,6 +996,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, goto out_err; synth_opts->period_type = PERF_ITRACE_PERIOD_NANOSECS; + period_type_set = true; break; case '\0': goto out; @@ -1039,7 +1050,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, } out: if (synth_opts->instructions) { - if (!synth_opts->period_type) + if (!period_type_set) synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE; if (!synth_opts->period) diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 471aecbc4d68..bf72b77a588a 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -39,6 +39,8 @@ struct events_stats; enum auxtrace_type { PERF_AUXTRACE_UNKNOWN, + PERF_AUXTRACE_INTEL_PT, + PERF_AUXTRACE_INTEL_BTS, }; enum itrace_period_type { diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 1f6fc2323ef9..d909459fb54c 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -93,6 +93,38 @@ int build_id__sprintf(const u8 *build_id, int len, char *bf) return raw - build_id; } +int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id) +{ + char notes[PATH_MAX]; + u8 build_id[BUILD_ID_SIZE]; + int ret; + + if (!root_dir) + root_dir = ""; + + scnprintf(notes, sizeof(notes), "%s/sys/kernel/notes", root_dir); + + ret = sysfs__read_build_id(notes, build_id, sizeof(build_id)); + if (ret < 0) + return ret; + + return build_id__sprintf(build_id, sizeof(build_id), sbuild_id); +} + +int filename__sprintf_build_id(const char *pathname, char *sbuild_id) +{ + u8 build_id[BUILD_ID_SIZE]; + int ret; + + ret = filename__read_build_id(pathname, build_id, sizeof(build_id)); + if (ret < 0) + return ret; + else if (ret != sizeof(build_id)) + return -EINVAL; + + return build_id__sprintf(build_id, sizeof(build_id), sbuild_id); +} + /* asnprintf consolidates asprintf and snprintf */ static int asnprintf(char **strp, size_t size, const char *fmt, ...) { @@ -124,7 +156,7 @@ static char *build_id__filename(const char *sbuild_id, char *bf, size_t size) char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) { - char build_id_hex[BUILD_ID_SIZE * 2 + 1]; + char build_id_hex[SBUILD_ID_SIZE]; if (!dso->has_build_id) return NULL; @@ -291,7 +323,7 @@ int build_id_cache__list_build_ids(const char *pathname, struct dirent *d; int ret = 0; - list = strlist__new(true, NULL); + list = strlist__new(NULL, NULL); dir_name = build_id_cache__dirname_from_path(pathname, false, false); if (!list || !dir_name) { ret = -ENOMEM; @@ -384,7 +416,7 @@ static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size, const char *name, bool is_kallsyms, bool is_vdso) { - char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + char sbuild_id[SBUILD_ID_SIZE]; build_id__sprintf(build_id, build_id_size, sbuild_id); diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 85011222cc14..27a14a8a945b 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -1,7 +1,8 @@ #ifndef PERF_BUILD_ID_H_ #define PERF_BUILD_ID_H_ 1 -#define BUILD_ID_SIZE 20 +#define BUILD_ID_SIZE 20 +#define SBUILD_ID_SIZE (BUILD_ID_SIZE * 2 + 1) #include "tool.h" #include "strlist.h" @@ -11,6 +12,9 @@ extern struct perf_tool build_id__mark_dso_hit_ops; struct dso; int build_id__sprintf(const u8 *build_id, int len, char *bf); +int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id); +int filename__sprintf_build_id(const char *pathname, char *sbuild_id); + char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size); int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event, diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 9f643ee77001..773fe13ce627 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -25,96 +25,9 @@ __thread struct callchain_cursor callchain_cursor; -#ifdef HAVE_DWARF_UNWIND_SUPPORT -static int get_stack_size(const char *str, unsigned long *_size) -{ - char *endptr; - unsigned long size; - unsigned long max_size = round_down(USHRT_MAX, sizeof(u64)); - - size = strtoul(str, &endptr, 0); - - do { - if (*endptr) - break; - - size = round_up(size, sizeof(u64)); - if (!size || size > max_size) - break; - - *_size = size; - return 0; - - } while (0); - - pr_err("callchain: Incorrect stack dump size (max %ld): %s\n", - max_size, str); - return -1; -} -#endif /* HAVE_DWARF_UNWIND_SUPPORT */ - -int parse_callchain_record_opt(const char *arg) +int parse_callchain_record_opt(const char *arg, struct callchain_param *param) { - char *tok, *name, *saveptr = NULL; - char *buf; - int ret = -1; - - /* We need buffer that we know we can write to. */ - buf = malloc(strlen(arg) + 1); - if (!buf) - return -ENOMEM; - - strcpy(buf, arg); - - tok = strtok_r((char *)buf, ",", &saveptr); - name = tok ? : (char *)buf; - - do { - /* Framepointer style */ - if (!strncmp(name, "fp", sizeof("fp"))) { - if (!strtok_r(NULL, ",", &saveptr)) { - callchain_param.record_mode = CALLCHAIN_FP; - ret = 0; - } else - pr_err("callchain: No more arguments " - "needed for --call-graph fp\n"); - break; - -#ifdef HAVE_DWARF_UNWIND_SUPPORT - /* Dwarf style */ - } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) { - const unsigned long default_stack_dump_size = 8192; - - ret = 0; - callchain_param.record_mode = CALLCHAIN_DWARF; - callchain_param.dump_size = default_stack_dump_size; - - tok = strtok_r(NULL, ",", &saveptr); - if (tok) { - unsigned long size = 0; - - ret = get_stack_size(tok, &size); - callchain_param.dump_size = size; - } -#endif /* HAVE_DWARF_UNWIND_SUPPORT */ - } else if (!strncmp(name, "lbr", sizeof("lbr"))) { - if (!strtok_r(NULL, ",", &saveptr)) { - callchain_param.record_mode = CALLCHAIN_LBR; - ret = 0; - } else - pr_err("callchain: No more arguments " - "needed for --call-graph lbr\n"); - break; - } else { - pr_err("callchain: Unknown --call-graph option " - "value: %s\n", arg); - break; - } - - } while (0); - - free(buf); - return ret; + return parse_callchain_record(arg, param); } static int parse_callchain_mode(const char *value) @@ -219,7 +132,7 @@ int perf_callchain_config(const char *var, const char *value) var += sizeof("call-graph.") - 1; if (!strcmp(var, "record-mode")) - return parse_callchain_record_opt(value); + return parse_callchain_record_opt(value, &callchain_param); #ifdef HAVE_DWARF_UNWIND_SUPPORT if (!strcmp(var, "dump-size")) { unsigned long size = 0; diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 679c2c6d8ade..acee2b3cd801 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -177,7 +177,8 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node * bool hide_unresolved); extern const char record_callchain_help[]; -int parse_callchain_record_opt(const char *arg); +extern int parse_callchain_record(const char *arg, struct callchain_param *param); +int parse_callchain_record_opt(const char *arg, struct callchain_param *param); int parse_callchain_report_opt(const char *arg); int perf_callchain_config(const char *var, const char *value); diff --git a/tools/perf/util/cloexec.h b/tools/perf/util/cloexec.h index 68888c29b04a..3bee6773ddb0 100644 --- a/tools/perf/util/cloexec.h +++ b/tools/perf/util/cloexec.h @@ -4,7 +4,7 @@ unsigned long perf_event_open_cloexec_flag(void); #ifdef __GLIBC_PREREQ -#if !__GLIBC_PREREQ(2, 6) +#if !__GLIBC_PREREQ(2, 6) && !defined(__UCLIBC__) extern int sched_getcpu(void) __THROW; #endif #endif diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index 55355b3d4f85..9b9565416f90 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -67,8 +67,9 @@ static int __color_vsnprintf(char *bf, size_t size, const char *color, return r; } +/* Colors are not included in return value */ static int __color_vfprintf(FILE *fp, const char *color, const char *fmt, - va_list args, const char *trail) + va_list args) { int r = 0; @@ -83,12 +84,10 @@ static int __color_vfprintf(FILE *fp, const char *color, const char *fmt, } if (perf_use_color_default && *color) - r += fprintf(fp, "%s", color); + fprintf(fp, "%s", color); r += vfprintf(fp, fmt, args); if (perf_use_color_default && *color) - r += fprintf(fp, "%s", PERF_COLOR_RESET); - if (trail) - r += fprintf(fp, "%s", trail); + fprintf(fp, "%s", PERF_COLOR_RESET); return r; } @@ -100,7 +99,7 @@ int color_vsnprintf(char *bf, size_t size, const char *color, int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args) { - return __color_vfprintf(fp, color, fmt, args, NULL); + return __color_vfprintf(fp, color, fmt, args); } int color_snprintf(char *bf, size_t size, const char *color, @@ -126,16 +125,6 @@ int color_fprintf(FILE *fp, const char *color, const char *fmt, ...) return r; } -int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...) -{ - va_list args; - int r; - va_start(args, fmt); - r = __color_vfprintf(fp, color, fmt, args, "\n"); - va_end(args); - return r; -} - /* * This function splits the buffer by newlines and colors the lines individually. * diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h index 38146f922c54..a93997f16dec 100644 --- a/tools/perf/util/color.h +++ b/tools/perf/util/color.h @@ -35,7 +35,6 @@ int color_vsnprintf(char *bf, size_t size, const char *color, int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args); int color_fprintf(FILE *fp, const char *color, const char *fmt, ...); int color_snprintf(char *bf, size_t size, const char *color, const char *fmt, ...); -int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...); int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf); int value_color_snprintf(char *bf, size_t size, const char *fmt, double value); int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...); diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index e18f653cd7db..2e452ac1353d 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -12,6 +12,7 @@ #include "cache.h" #include "exec_cmd.h" #include "util/hist.h" /* perf_hist_config */ +#include "util/llvm-utils.h" /* perf_llvm_config */ #define MAXNAME (256) @@ -408,6 +409,9 @@ int perf_default_config(const char *var, const char *value, if (!prefixcmp(var, "call-graph.")) return perf_callchain_config(var, value); + if (!prefixcmp(var, "llvm.")) + return perf_llvm_config(var, value); + /* Add other config variables here. */ return 0; } diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c new file mode 100644 index 000000000000..e3fde313deb2 --- /dev/null +++ b/tools/perf/util/counts.c @@ -0,0 +1,52 @@ +#include <stdlib.h> +#include "evsel.h" +#include "counts.h" + +struct perf_counts *perf_counts__new(int ncpus, int nthreads) +{ + struct perf_counts *counts = zalloc(sizeof(*counts)); + + if (counts) { + struct xyarray *values; + + values = xyarray__new(ncpus, nthreads, sizeof(struct perf_counts_values)); + if (!values) { + free(counts); + return NULL; + } + + counts->values = values; + } + + return counts; +} + +void perf_counts__delete(struct perf_counts *counts) +{ + if (counts) { + xyarray__delete(counts->values); + free(counts); + } +} + +static void perf_counts__reset(struct perf_counts *counts) +{ + xyarray__reset(counts->values); +} + +void perf_evsel__reset_counts(struct perf_evsel *evsel) +{ + perf_counts__reset(evsel->counts); +} + +int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads) +{ + evsel->counts = perf_counts__new(ncpus, nthreads); + return evsel->counts != NULL ? 0 : -ENOMEM; +} + +void perf_evsel__free_counts(struct perf_evsel *evsel) +{ + perf_counts__delete(evsel->counts); + evsel->counts = NULL; +} diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h new file mode 100644 index 000000000000..34d8baaf558a --- /dev/null +++ b/tools/perf/util/counts.h @@ -0,0 +1,37 @@ +#ifndef __PERF_COUNTS_H +#define __PERF_COUNTS_H + +#include "xyarray.h" + +struct perf_counts_values { + union { + struct { + u64 val; + u64 ena; + u64 run; + }; + u64 values[3]; + }; +}; + +struct perf_counts { + s8 scaled; + struct perf_counts_values aggr; + struct xyarray *values; +}; + + +static inline struct perf_counts_values* +perf_counts(struct perf_counts *counts, int cpu, int thread) +{ + return xyarray__entry(counts->values, cpu, thread); +} + +struct perf_counts *perf_counts__new(int ncpus, int nthreads); +void perf_counts__delete(struct perf_counts *counts); + +void perf_evsel__reset_counts(struct perf_evsel *evsel); +int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads); +void perf_evsel__free_counts(struct perf_evsel *evsel); + +#endif /* __PERF_COUNTS_H */ diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 2da5581ec74d..86d9c7302598 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -36,6 +36,11 @@ static int _eprintf(int level, int var, const char *fmt, va_list args) return ret; } +int veprintf(int level, int var, const char *fmt, va_list args) +{ + return _eprintf(level, var, fmt, args); +} + int eprintf(int level, int var, const char *fmt, ...) { va_list args; diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index caac2fdc6105..8b9a088c32ab 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -50,6 +50,7 @@ void pr_stat(const char *fmt, ...); int eprintf(int level, int var, const char *fmt, ...) __attribute__((format(printf, 3, 4))); int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__((format(printf, 4, 5))); +int veprintf(int level, int var, const char *fmt, va_list args); int perf_debug_option(const char *str); diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 2fe98bb0e95b..fc8db9c764ac 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -137,6 +137,10 @@ struct dso { struct rb_node rb_node; /* rbtree node sorted by long name */ struct rb_root symbols[MAP__NR_TYPES]; struct rb_root symbol_names[MAP__NR_TYPES]; + struct { + u64 addr; + struct symbol *symbol; + } last_find_result[MAP__NR_TYPES]; void *a2l; char *symsrc_filename; unsigned int a2l_fails; @@ -320,6 +324,8 @@ struct dso *__dsos__findnew(struct dsos *dsos, const char *name); struct dso *dsos__findnew(struct dsos *dsos, const char *name); bool __dsos__read_build_ids(struct list_head *head, bool with_hits); +void dso__reset_find_symbol_cache(struct dso *dso); + size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, bool (skip)(struct dso *dso, int parm), int parm); size_t __dsos__fprintf(struct list_head *head, FILE *fp); diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 57f3ef41c2bc..a509aa8433a1 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -734,15 +734,18 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) Dwarf_Lines *lines; Dwarf_Line *line; Dwarf_Addr addr; - const char *fname; + const char *fname, *decf = NULL; int lineno, ret = 0; + int decl = 0, inl; Dwarf_Die die_mem, *cu_die; size_t nlines, i; /* Get the CU die */ - if (dwarf_tag(rt_die) != DW_TAG_compile_unit) + if (dwarf_tag(rt_die) != DW_TAG_compile_unit) { cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL); - else + dwarf_decl_line(rt_die, &decl); + decf = dwarf_decl_file(rt_die); + } else cu_die = rt_die; if (!cu_die) { pr_debug2("Failed to get CU from given DIE.\n"); @@ -767,15 +770,21 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) continue; } /* Filter lines based on address */ - if (rt_die != cu_die) + if (rt_die != cu_die) { /* * Address filtering * The line is included in given function, and * no inline block includes it. */ - if (!dwarf_haspc(rt_die, addr) || - die_find_inlinefunc(rt_die, addr, &die_mem)) + if (!dwarf_haspc(rt_die, addr)) continue; + if (die_find_inlinefunc(rt_die, addr, &die_mem)) { + dwarf_decl_line(&die_mem, &inl); + if (inl != decl || + decf != dwarf_decl_file(&die_mem)) + continue; + } + } /* Get source line */ fname = dwarf_linesrc(line, NULL, NULL); diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 67a977e5d0ab..7ff61274ed57 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -26,6 +26,8 @@ static const char *perf_event__names[] = { [PERF_RECORD_AUX] = "AUX", [PERF_RECORD_ITRACE_START] = "ITRACE_START", [PERF_RECORD_LOST_SAMPLES] = "LOST_SAMPLES", + [PERF_RECORD_SWITCH] = "SWITCH", + [PERF_RECORD_SWITCH_CPU_WIDE] = "SWITCH_CPU_WIDE", [PERF_RECORD_HEADER_ATTR] = "ATTR", [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", @@ -749,6 +751,14 @@ int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused, return machine__process_lost_samples_event(machine, event, sample); } +int perf_event__process_switch(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine) +{ + return machine__process_switch_event(machine, event); +} + size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp) { return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n", @@ -827,6 +837,20 @@ size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp) event->itrace_start.pid, event->itrace_start.tid); } +size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp) +{ + bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; + const char *in_out = out ? "OUT" : "IN "; + + if (event->header.type == PERF_RECORD_SWITCH) + return fprintf(fp, " %s\n", in_out); + + return fprintf(fp, " %s %s pid/tid: %5u/%-5u\n", + in_out, out ? "next" : "prev", + event->context_switch.next_prev_pid, + event->context_switch.next_prev_tid); +} + size_t perf_event__fprintf(union perf_event *event, FILE *fp) { size_t ret = fprintf(fp, "PERF_RECORD_%s", @@ -852,6 +876,10 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp) case PERF_RECORD_ITRACE_START: ret += perf_event__fprintf_itrace_start(event, fp); break; + case PERF_RECORD_SWITCH: + case PERF_RECORD_SWITCH_CPU_WIDE: + ret += perf_event__fprintf_switch(event, fp); + break; default: ret += fprintf(fp, "\n"); } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index c53f36384b64..f729df5e25e6 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -134,7 +134,8 @@ struct branch_flags { u64 predicted:1; u64 in_tx:1; u64 abort:1; - u64 reserved:60; + u64 cycles:16; + u64 reserved:44; }; struct branch_entry { @@ -348,6 +349,12 @@ struct itrace_start_event { u32 pid, tid; }; +struct context_switch_event { + struct perf_event_header header; + u32 next_prev_pid; + u32 next_prev_tid; +}; + union perf_event { struct perf_event_header header; struct mmap_event mmap; @@ -369,6 +376,7 @@ union perf_event { struct auxtrace_error_event auxtrace_error; struct aux_event aux; struct itrace_start_event itrace_start; + struct context_switch_event context_switch; }; void perf_event__print_totals(void); @@ -418,6 +426,10 @@ int perf_event__process_itrace_start(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); +int perf_event__process_switch(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); int perf_event__process_mmap(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -480,6 +492,7 @@ size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp); size_t perf_event__fprintf_task(union perf_event *event, FILE *fp); size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp); size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp); size_t perf_event__fprintf(union perf_event *event, FILE *fp); u64 kallsyms__get_function_start(const char *kallsyms_filename, diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 6cfdee68e763..8d00039d6a20 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -98,6 +98,7 @@ static void perf_evlist__purge(struct perf_evlist *evlist) evlist__for_each_safe(evlist, n, pos) { list_del_init(&pos->node); + pos->evlist = NULL; perf_evsel__delete(pos); } @@ -125,6 +126,7 @@ void perf_evlist__delete(struct perf_evlist *evlist) void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) { + entry->evlist = evlist; list_add_tail(&entry->node, &evlist->entries); entry->idx = evlist->nr_entries; entry->tracking = !entry->idx; @@ -573,7 +575,7 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) { struct perf_sample_id *sid; - if (evlist->nr_entries == 1) + if (evlist->nr_entries == 1 || !id) return perf_evlist__first(evlist); sid = perf_evlist__id2sid(evlist, id); @@ -1102,7 +1104,7 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, } static int perf_evlist__propagate_maps(struct perf_evlist *evlist, - struct target *target) + bool has_user_cpus) { struct perf_evsel *evsel; @@ -1111,15 +1113,16 @@ static int perf_evlist__propagate_maps(struct perf_evlist *evlist, * We already have cpus for evsel (via PMU sysfs) so * keep it, if there's no target cpu list defined. */ - if (evsel->cpus && target->cpu_list) + if (evsel->cpus && has_user_cpus) cpu_map__put(evsel->cpus); - if (!evsel->cpus || target->cpu_list) + if (!evsel->cpus || has_user_cpus) evsel->cpus = cpu_map__get(evlist->cpus); evsel->threads = thread_map__get(evlist->threads); - if (!evsel->cpus || !evsel->threads) + if ((evlist->cpus && !evsel->cpus) || + (evlist->threads && !evsel->threads)) return -ENOMEM; } @@ -1142,7 +1145,7 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) if (evlist->cpus == NULL) goto out_delete_threads; - return perf_evlist__propagate_maps(evlist, target); + return perf_evlist__propagate_maps(evlist, !!target->cpu_list); out_delete_threads: thread_map__put(evlist->threads); @@ -1150,6 +1153,23 @@ out_delete_threads: return -1; } +int perf_evlist__set_maps(struct perf_evlist *evlist, + struct cpu_map *cpus, + struct thread_map *threads) +{ + if (evlist->cpus) + cpu_map__put(evlist->cpus); + + evlist->cpus = cpus; + + if (evlist->threads) + thread_map__put(evlist->threads); + + evlist->threads = threads; + + return perf_evlist__propagate_maps(evlist, false); +} + int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) { struct perf_evsel *evsel; @@ -1161,7 +1181,7 @@ int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **e if (evsel->filter == NULL) continue; - err = perf_evsel__set_filter(evsel, ncpus, nthreads, evsel->filter); + err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter); if (err) { *err_evsel = evsel; break; @@ -1175,11 +1195,9 @@ int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) { struct perf_evsel *evsel; int err = 0; - const int ncpus = cpu_map__nr(evlist->cpus), - nthreads = thread_map__nr(evlist->threads); evlist__for_each(evlist, evsel) { - err = perf_evsel__set_filter(evsel, ncpus, nthreads, filter); + err = perf_evsel__set_filter(evsel, filter); if (err) break; } @@ -1257,6 +1275,16 @@ u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist) return __perf_evlist__combined_sample_type(evlist); } +u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + u64 branch_type = 0; + + evlist__for_each(evlist, evsel) + branch_type |= evsel->attr.branch_sample_type; + return branch_type; +} + bool perf_evlist__valid_read_format(struct perf_evlist *evlist) { struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 037633c1da9d..b39a6198f4ac 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -56,6 +56,7 @@ struct perf_evlist { struct cpu_map *cpus; struct perf_evsel *selected; struct events_stats stats; + struct perf_env *env; }; struct perf_evsel_str_handler { @@ -114,6 +115,8 @@ void perf_evlist__close(struct perf_evlist *evlist); void perf_evlist__set_id_pos(struct perf_evlist *evlist); bool perf_can_sample_identifier(void); +bool perf_can_record_switch_events(void); +bool perf_can_record_cpu_wide(void); void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts); int record_opts__config(struct record_opts *opts); @@ -152,14 +155,9 @@ int perf_evlist__enable_event_idx(struct perf_evlist *evlist, void perf_evlist__set_selected(struct perf_evlist *evlist, struct perf_evsel *evsel); -static inline void perf_evlist__set_maps(struct perf_evlist *evlist, - struct cpu_map *cpus, - struct thread_map *threads) -{ - evlist->cpus = cpus; - evlist->threads = threads; -} - +int perf_evlist__set_maps(struct perf_evlist *evlist, + struct cpu_map *cpus, + struct thread_map *threads); int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target); int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel); @@ -169,6 +167,7 @@ void perf_evlist__set_leader(struct perf_evlist *evlist); u64 perf_evlist__read_format(struct perf_evlist *evlist); u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist); u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist); +u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist); bool perf_evlist__sample_id_all(struct perf_evlist *evlist); u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 2936b3080722..bac25f41a751 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -206,10 +206,13 @@ void perf_evsel__init(struct perf_evsel *evsel, evsel->leader = evsel; evsel->unit = ""; evsel->scale = 1.0; + evsel->evlist = NULL; INIT_LIST_HEAD(&evsel->node); + INIT_LIST_HEAD(&evsel->config_terms); perf_evsel__object.init(evsel); evsel->sample_size = __perf_evsel__sample_size(attr->sample_type); perf_evsel__calc_id_pos(evsel); + evsel->cmdline_group_boundary = false; } struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) @@ -543,14 +546,15 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size) static void perf_evsel__config_callgraph(struct perf_evsel *evsel, - struct record_opts *opts) + struct record_opts *opts, + struct callchain_param *param) { bool function = perf_evsel__is_function_event(evsel); struct perf_event_attr *attr = &evsel->attr; perf_evsel__set_sample_bit(evsel, CALLCHAIN); - if (callchain_param.record_mode == CALLCHAIN_LBR) { + if (param->record_mode == CALLCHAIN_LBR) { if (!opts->branch_stack) { if (attr->exclude_user) { pr_warning("LBR callstack option is only available " @@ -566,12 +570,12 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel, "Falling back to framepointers.\n"); } - if (callchain_param.record_mode == CALLCHAIN_DWARF) { + if (param->record_mode == CALLCHAIN_DWARF) { if (!function) { perf_evsel__set_sample_bit(evsel, REGS_USER); perf_evsel__set_sample_bit(evsel, STACK_USER); attr->sample_regs_user = PERF_REGS_MASK; - attr->sample_stack_user = callchain_param.dump_size; + attr->sample_stack_user = param->dump_size; attr->exclude_callchain_user = 1; } else { pr_info("Cannot use DWARF unwind for function trace event," @@ -585,6 +589,97 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel, } } +static void +perf_evsel__reset_callgraph(struct perf_evsel *evsel, + struct callchain_param *param) +{ + struct perf_event_attr *attr = &evsel->attr; + + perf_evsel__reset_sample_bit(evsel, CALLCHAIN); + if (param->record_mode == CALLCHAIN_LBR) { + perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); + attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER | + PERF_SAMPLE_BRANCH_CALL_STACK); + } + if (param->record_mode == CALLCHAIN_DWARF) { + perf_evsel__reset_sample_bit(evsel, REGS_USER); + perf_evsel__reset_sample_bit(evsel, STACK_USER); + } +} + +static void apply_config_terms(struct perf_evsel *evsel, + struct record_opts *opts) +{ + struct perf_evsel_config_term *term; + struct list_head *config_terms = &evsel->config_terms; + struct perf_event_attr *attr = &evsel->attr; + struct callchain_param param; + u32 dump_size = 0; + char *callgraph_buf = NULL; + + /* callgraph default */ + param.record_mode = callchain_param.record_mode; + + list_for_each_entry(term, config_terms, list) { + switch (term->type) { + case PERF_EVSEL__CONFIG_TERM_PERIOD: + attr->sample_period = term->val.period; + attr->freq = 0; + break; + case PERF_EVSEL__CONFIG_TERM_FREQ: + attr->sample_freq = term->val.freq; + attr->freq = 1; + break; + case PERF_EVSEL__CONFIG_TERM_TIME: + if (term->val.time) + perf_evsel__set_sample_bit(evsel, TIME); + else + perf_evsel__reset_sample_bit(evsel, TIME); + break; + case PERF_EVSEL__CONFIG_TERM_CALLGRAPH: + callgraph_buf = term->val.callgraph; + break; + case PERF_EVSEL__CONFIG_TERM_STACK_USER: + dump_size = term->val.stack_user; + break; + default: + break; + } + } + + /* User explicitly set per-event callgraph, clear the old setting and reset. */ + if ((callgraph_buf != NULL) || (dump_size > 0)) { + + /* parse callgraph parameters */ + if (callgraph_buf != NULL) { + if (!strcmp(callgraph_buf, "no")) { + param.enabled = false; + param.record_mode = CALLCHAIN_NONE; + } else { + param.enabled = true; + if (parse_callchain_record(callgraph_buf, ¶m)) { + pr_err("per-event callgraph setting for %s failed. " + "Apply callgraph global setting for it\n", + evsel->name); + return; + } + } + } + if (dump_size > 0) { + dump_size = round_up(dump_size, sizeof(u64)); + param.dump_size = dump_size; + } + + /* If global callgraph set, clear it */ + if (callchain_param.enabled) + perf_evsel__reset_callgraph(evsel, &callchain_param); + + /* set perf-event callgraph */ + if (param.enabled) + perf_evsel__config_callgraph(evsel, opts, ¶m); + } +} + /* * The enable_on_exec/disabled value strategy: * @@ -689,7 +784,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) evsel->attr.exclude_callchain_user = 1; if (callchain_param.enabled && !evsel->no_aux_samples) - perf_evsel__config_callgraph(evsel, opts); + perf_evsel__config_callgraph(evsel, opts, &callchain_param); if (opts->sample_intr_regs) { attr->sample_regs_intr = PERF_REGS_MASK; @@ -707,7 +802,8 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) */ if (opts->sample_time && (!perf_missing_features.sample_id_all && - (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu))) + (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu || + opts->sample_time_set))) perf_evsel__set_sample_bit(evsel, TIME); if (opts->raw_samples && !evsel->no_aux_samples) { @@ -736,6 +832,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) attr->mmap2 = track && !perf_missing_features.mmap2; attr->comm = track; + if (opts->record_switch_events) + attr->context_switch = track; + if (opts->sample_transaction) perf_evsel__set_sample_bit(evsel, TRANSACTION); @@ -772,6 +871,12 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) attr->use_clockid = 1; attr->clockid = opts->clockid; } + + /* + * Apply event specific term settings, + * it overloads any global configuration. + */ + apply_config_terms(evsel, opts); } static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) @@ -815,14 +920,44 @@ static int perf_evsel__run_ioctl(struct perf_evsel *evsel, int ncpus, int nthrea return 0; } -int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads, - const char *filter) +int perf_evsel__apply_filter(struct perf_evsel *evsel, int ncpus, int nthreads, + const char *filter) { return perf_evsel__run_ioctl(evsel, ncpus, nthreads, PERF_EVENT_IOC_SET_FILTER, (void *)filter); } +int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter) +{ + char *new_filter = strdup(filter); + + if (new_filter != NULL) { + free(evsel->filter); + evsel->filter = new_filter; + return 0; + } + + return -1; +} + +int perf_evsel__append_filter(struct perf_evsel *evsel, + const char *op, const char *filter) +{ + char *new_filter; + + if (evsel->filter == NULL) + return perf_evsel__set_filter(evsel, filter); + + if (asprintf(&new_filter,"(%s) %s (%s)", evsel->filter, op, filter) > 0) { + free(evsel->filter); + evsel->filter = new_filter; + return 0; + } + + return -1; +} + int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads) { return perf_evsel__run_ioctl(evsel, ncpus, nthreads, @@ -865,6 +1000,16 @@ static void perf_evsel__free_id(struct perf_evsel *evsel) zfree(&evsel->id); } +static void perf_evsel__free_config_terms(struct perf_evsel *evsel) +{ + struct perf_evsel_config_term *term, *h; + + list_for_each_entry_safe(term, h, &evsel->config_terms, list) { + list_del(&term->list); + free(term); + } +} + void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) { int cpu, thread; @@ -882,8 +1027,10 @@ void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) void perf_evsel__exit(struct perf_evsel *evsel) { assert(list_empty(&evsel->node)); + assert(evsel->evlist == NULL); perf_evsel__free_fd(evsel); perf_evsel__free_id(evsel); + perf_evsel__free_config_terms(evsel); close_cgroup(evsel->cgrp); cpu_map__put(evsel->cpus); thread_map__put(evsel->threads); @@ -1095,6 +1242,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(mmap2, p_unsigned); PRINT_ATTRf(comm_exec, p_unsigned); PRINT_ATTRf(use_clockid, p_unsigned); + PRINT_ATTRf(context_switch, p_unsigned); PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); PRINT_ATTRf(bp_type, p_unsigned); @@ -2075,8 +2223,13 @@ int perf_evsel__fprintf(struct perf_evsel *evsel, printed += perf_event_attr__fprintf(fp, &evsel->attr, __print_attr__fprintf, &first); } else if (details->freq) { - printed += comma_fprintf(fp, &first, " sample_freq=%" PRIu64, - (u64)evsel->attr.sample_freq); + const char *term = "sample_freq"; + + if (!evsel->attr.freq) + term = "sample_period"; + + printed += comma_fprintf(fp, &first, " %s=%" PRIu64, + term, (u64)evsel->attr.sample_freq); } out: fputc('\n', fp); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 4a7ed5656cf0..298e6bbca200 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -9,7 +9,7 @@ #include "xyarray.h" #include "symbol.h" #include "cpumap.h" -#include "stat.h" +#include "counts.h" struct perf_evsel; @@ -31,8 +31,38 @@ struct perf_sample_id { struct cgroup_sel; +/* + * The 'struct perf_evsel_config_term' is used to pass event + * specific configuration data to perf_evsel__config routine. + * It is allocated within event parsing and attached to + * perf_evsel::config_terms list head. +*/ +enum { + PERF_EVSEL__CONFIG_TERM_PERIOD, + PERF_EVSEL__CONFIG_TERM_FREQ, + PERF_EVSEL__CONFIG_TERM_TIME, + PERF_EVSEL__CONFIG_TERM_CALLGRAPH, + PERF_EVSEL__CONFIG_TERM_STACK_USER, + PERF_EVSEL__CONFIG_TERM_MAX, +}; + +struct perf_evsel_config_term { + struct list_head list; + int type; + union { + u64 period; + u64 freq; + bool time; + char *callgraph; + u64 stack_user; + } val; +}; + /** struct perf_evsel - event selector * + * @evlist - evlist this evsel is in, if it is in one. + * @node - To insert it into evlist->entries or in other list_heads, say in + * the event parsing routines. * @name - Can be set to retain the original event name passed by the user, * so that when showing results in tools such as 'perf stat', we * show the name used, not some alias. @@ -46,6 +76,7 @@ struct cgroup_sel; */ struct perf_evsel { struct list_head node; + struct perf_evlist *evlist; struct perf_event_attr attr; char *filter; struct xyarray *fd; @@ -86,6 +117,8 @@ struct perf_evsel { unsigned long *per_pkg_mask; struct perf_evsel *leader; char *group_name; + bool cmdline_group_boundary; + struct list_head config_terms; }; union u64_swap { @@ -182,8 +215,11 @@ void __perf_evsel__reset_sample_bit(struct perf_evsel *evsel, void perf_evsel__set_sample_id(struct perf_evsel *evsel, bool use_sample_identifier); -int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads, - const char *filter); +int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter); +int perf_evsel__append_filter(struct perf_evsel *evsel, + const char *op, const char *filter); +int perf_evsel__apply_filter(struct perf_evsel *evsel, int ncpus, int nthreads, + const char *filter); int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads); int perf_evsel__open_per_cpu(struct perf_evsel *evsel, diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 03ace57a800c..41814547da15 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -923,17 +923,13 @@ static void print_cmdline(struct perf_header *ph, int fd __maybe_unused, FILE *fp) { int nr, i; - char *str; nr = ph->env.nr_cmdline; - str = ph->env.cmdline; fprintf(fp, "# cmdline : "); - for (i = 0; i < nr; i++) { - fprintf(fp, "%s ", str); - str += strlen(str) + 1; - } + for (i = 0; i < nr; i++) + fprintf(fp, "%s ", ph->env.cmdline_argv[i]); fputc('\n', fp); } @@ -1541,14 +1537,13 @@ process_event_desc(struct perf_file_section *section __maybe_unused, return 0; } -static int process_cmdline(struct perf_file_section *section __maybe_unused, +static int process_cmdline(struct perf_file_section *section, struct perf_header *ph, int fd, void *data __maybe_unused) { ssize_t ret; - char *str; - u32 nr, i; - struct strbuf sb; + char *str, *cmdline = NULL, **argv = NULL; + u32 nr, i, len = 0; ret = readn(fd, &nr, sizeof(nr)); if (ret != sizeof(nr)) @@ -1558,22 +1553,32 @@ static int process_cmdline(struct perf_file_section *section __maybe_unused, nr = bswap_32(nr); ph->env.nr_cmdline = nr; - strbuf_init(&sb, 128); + + cmdline = zalloc(section->size + nr + 1); + if (!cmdline) + return -1; + + argv = zalloc(sizeof(char *) * (nr + 1)); + if (!argv) + goto error; for (i = 0; i < nr; i++) { str = do_read_string(fd, ph); if (!str) goto error; - /* include a NULL character at the end */ - strbuf_add(&sb, str, strlen(str) + 1); + argv[i] = cmdline + len; + memcpy(argv[i], str, strlen(str) + 1); + len += strlen(str) + 1; free(str); } - ph->env.cmdline = strbuf_detach(&sb, NULL); + ph->env.cmdline = cmdline; + ph->env.cmdline_argv = (const char **) argv; return 0; error: - strbuf_release(&sb); + free(argv); + free(cmdline); return -1; } @@ -2509,6 +2514,7 @@ int perf_session__read_header(struct perf_session *session) if (session->evlist == NULL) return -ENOMEM; + session->evlist->env = &header->env; if (perf_data_file__is_pipe(file)) return perf_header__read_pipe(session); diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index d4d57962c591..396e4965f0c9 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -66,7 +66,7 @@ struct perf_header; int perf_file_header__read(struct perf_file_header *header, struct perf_header *ph, int fd); -struct perf_session_env { +struct perf_env { char *hostname; char *os_release; char *version; @@ -84,6 +84,7 @@ struct perf_session_env { int nr_pmu_mappings; int nr_groups; char *cmdline; + const char **cmdline_argv; char *sibling_cores; char *sibling_threads; char *numa_nodes; @@ -97,7 +98,7 @@ struct perf_header { u64 data_size; u64 feat_offset; DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS); - struct perf_session_env env; + struct perf_env env; }; struct perf_evlist; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 6f28d53d4e46..08b6cd945f1e 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -151,6 +151,12 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12); hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12); + if (h->srcline) + hists__new_col_len(hists, HISTC_SRCLINE, strlen(h->srcline)); + + if (h->srcfile) + hists__new_col_len(hists, HISTC_SRCFILE, strlen(h->srcfile)); + if (h->transaction) hists__new_col_len(hists, HISTC_TRANSACTION, hist_entry__transaction_len()); @@ -618,7 +624,8 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a * and not events sampled. Thus we use a pseudo period of 1. */ he = __hists__add_entry(hists, al, iter->parent, &bi[i], NULL, - 1, 1, 0, true); + 1, bi->flags.cycles ? bi->flags.cycles : 1, + 0, true); if (he == NULL) return -ENOMEM; @@ -760,6 +767,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, struct hist_entry **he_cache = iter->priv; struct hist_entry *he; struct hist_entry he_tmp = { + .hists = evsel__hists(evsel), .cpu = al->cpu, .thread = al->thread, .comm = thread__comm(al->thread), @@ -944,6 +952,8 @@ void hist_entry__delete(struct hist_entry *he) zfree(&he->stat_acc); free_srcline(he->srcline); + if (he->srcfile && he->srcfile[0]) + free(he->srcfile); free_callchain(he->callchain); free(he); } @@ -1099,13 +1109,14 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h) static void __hists__insert_output_entry(struct rb_root *entries, struct hist_entry *he, - u64 min_callchain_hits) + u64 min_callchain_hits, + bool use_callchain) { struct rb_node **p = &entries->rb_node; struct rb_node *parent = NULL; struct hist_entry *iter; - if (symbol_conf.use_callchain) + if (use_callchain) callchain_param.sort(&he->sorted_chain, he->callchain, min_callchain_hits, &callchain_param); @@ -1129,6 +1140,13 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog) struct rb_node *next; struct hist_entry *n; u64 min_callchain_hits; + struct perf_evsel *evsel = hists_to_evsel(hists); + bool use_callchain; + + if (evsel && !symbol_conf.show_ref_callgraph) + use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN; + else + use_callchain = symbol_conf.use_callchain; min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100); @@ -1147,7 +1165,7 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog) n = rb_entry(next, struct hist_entry, rb_node_in); next = rb_next(&n->rb_node_in); - __hists__insert_output_entry(&hists->entries, n, min_callchain_hits); + __hists__insert_output_entry(&hists->entries, n, min_callchain_hits, use_callchain); hists__inc_stats(hists, n); if (!n->filtered) @@ -1414,6 +1432,39 @@ int hists__link(struct hists *leader, struct hists *other) return 0; } +void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, + struct perf_sample *sample, bool nonany_branch_mode) +{ + struct branch_info *bi; + + /* If we have branch cycles always annotate them. */ + if (bs && bs->nr && bs->entries[0].flags.cycles) { + int i; + + bi = sample__resolve_bstack(sample, al); + if (bi) { + struct addr_map_symbol *prev = NULL; + + /* + * Ignore errors, still want to process the + * other entries. + * + * For non standard branch modes always + * force no IPC (prev == NULL) + * + * Note that perf stores branches reversed from + * program order! + */ + for (i = bs->nr - 1; i >= 0; i--) { + addr_map_symbol__account_cycles(&bi[i].from, + nonany_branch_mode ? NULL : prev, + bi[i].flags.cycles); + prev = &bi[i].to; + } + free(bi); + } + } +} size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp) { diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 5ed8d9c22981..de6d58e7f0d5 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -30,6 +30,7 @@ enum hist_column { HISTC_PARENT, HISTC_CPU, HISTC_SRCLINE, + HISTC_SRCFILE, HISTC_MISPREDICT, HISTC_IN_TX, HISTC_ABORT, @@ -47,6 +48,7 @@ enum hist_column { HISTC_MEM_SNOOP, HISTC_MEM_DCACHELINE, HISTC_TRANSACTION, + HISTC_CYCLES, HISTC_NR_COLS, /* Last entry */ }; @@ -311,7 +313,7 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, struct hist_browser_timer *hbt, float min_pcnt, - struct perf_session_env *env); + struct perf_env *env); int script_browse(const char *script_opt); #else static inline @@ -319,7 +321,7 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused, const char *help __maybe_unused, struct hist_browser_timer *hbt __maybe_unused, float min_pcnt __maybe_unused, - struct perf_session_env *env __maybe_unused) + struct perf_env *env __maybe_unused) { return 0; } @@ -349,6 +351,9 @@ static inline int script_browse(const char *script_opt __maybe_unused) unsigned int hists__sort_list_width(struct hists *hists); +void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, + struct perf_sample *sample, bool nonany_branch_mode); + struct option; int parse_filter_percentage(const struct option *opt __maybe_unused, const char *arg, int unset __maybe_unused); diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c new file mode 100644 index 000000000000..ea768625ab5b --- /dev/null +++ b/tools/perf/util/intel-bts.c @@ -0,0 +1,933 @@ +/* + * intel-bts.c: Intel Processor Trace support + * Copyright (c) 2013-2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <endian.h> +#include <byteswap.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/bitops.h> +#include <linux/log2.h> + +#include "cpumap.h" +#include "color.h" +#include "evsel.h" +#include "evlist.h" +#include "machine.h" +#include "session.h" +#include "util.h" +#include "thread.h" +#include "thread-stack.h" +#include "debug.h" +#include "tsc.h" +#include "auxtrace.h" +#include "intel-pt-decoder/intel-pt-insn-decoder.h" +#include "intel-bts.h" + +#define MAX_TIMESTAMP (~0ULL) + +#define INTEL_BTS_ERR_NOINSN 5 +#define INTEL_BTS_ERR_LOST 9 + +#if __BYTE_ORDER == __BIG_ENDIAN +#define le64_to_cpu bswap_64 +#else +#define le64_to_cpu +#endif + +struct intel_bts { + struct auxtrace auxtrace; + struct auxtrace_queues queues; + struct auxtrace_heap heap; + u32 auxtrace_type; + struct perf_session *session; + struct machine *machine; + bool sampling_mode; + bool snapshot_mode; + bool data_queued; + u32 pmu_type; + struct perf_tsc_conversion tc; + bool cap_user_time_zero; + struct itrace_synth_opts synth_opts; + bool sample_branches; + u32 branches_filter; + u64 branches_sample_type; + u64 branches_id; + size_t branches_event_size; + bool synth_needs_swap; +}; + +struct intel_bts_queue { + struct intel_bts *bts; + unsigned int queue_nr; + struct auxtrace_buffer *buffer; + bool on_heap; + bool done; + pid_t pid; + pid_t tid; + int cpu; + u64 time; + struct intel_pt_insn intel_pt_insn; + u32 sample_flags; +}; + +struct branch { + u64 from; + u64 to; + u64 misc; +}; + +static void intel_bts_dump(struct intel_bts *bts __maybe_unused, + unsigned char *buf, size_t len) +{ + struct branch *branch; + size_t i, pos = 0, br_sz = sizeof(struct branch), sz; + const char *color = PERF_COLOR_BLUE; + + color_fprintf(stdout, color, + ". ... Intel BTS data: size %zu bytes\n", + len); + + while (len) { + if (len >= br_sz) + sz = br_sz; + else + sz = len; + printf("."); + color_fprintf(stdout, color, " %08x: ", pos); + for (i = 0; i < sz; i++) + color_fprintf(stdout, color, " %02x", buf[i]); + for (; i < br_sz; i++) + color_fprintf(stdout, color, " "); + if (len >= br_sz) { + branch = (struct branch *)buf; + color_fprintf(stdout, color, " %"PRIx64" -> %"PRIx64" %s\n", + le64_to_cpu(branch->from), + le64_to_cpu(branch->to), + le64_to_cpu(branch->misc) & 0x10 ? + "pred" : "miss"); + } else { + color_fprintf(stdout, color, " Bad record!\n"); + } + pos += sz; + buf += sz; + len -= sz; + } +} + +static void intel_bts_dump_event(struct intel_bts *bts, unsigned char *buf, + size_t len) +{ + printf(".\n"); + intel_bts_dump(bts, buf, len); +} + +static int intel_bts_lost(struct intel_bts *bts, struct perf_sample *sample) +{ + union perf_event event; + int err; + + auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, + INTEL_BTS_ERR_LOST, sample->cpu, sample->pid, + sample->tid, 0, "Lost trace data"); + + err = perf_session__deliver_synth_event(bts->session, &event, NULL); + if (err) + pr_err("Intel BTS: failed to deliver error event, error %d\n", + err); + + return err; +} + +static struct intel_bts_queue *intel_bts_alloc_queue(struct intel_bts *bts, + unsigned int queue_nr) +{ + struct intel_bts_queue *btsq; + + btsq = zalloc(sizeof(struct intel_bts_queue)); + if (!btsq) + return NULL; + + btsq->bts = bts; + btsq->queue_nr = queue_nr; + btsq->pid = -1; + btsq->tid = -1; + btsq->cpu = -1; + + return btsq; +} + +static int intel_bts_setup_queue(struct intel_bts *bts, + struct auxtrace_queue *queue, + unsigned int queue_nr) +{ + struct intel_bts_queue *btsq = queue->priv; + + if (list_empty(&queue->head)) + return 0; + + if (!btsq) { + btsq = intel_bts_alloc_queue(bts, queue_nr); + if (!btsq) + return -ENOMEM; + queue->priv = btsq; + + if (queue->cpu != -1) + btsq->cpu = queue->cpu; + btsq->tid = queue->tid; + } + + if (bts->sampling_mode) + return 0; + + if (!btsq->on_heap && !btsq->buffer) { + int ret; + + btsq->buffer = auxtrace_buffer__next(queue, NULL); + if (!btsq->buffer) + return 0; + + ret = auxtrace_heap__add(&bts->heap, queue_nr, + btsq->buffer->reference); + if (ret) + return ret; + btsq->on_heap = true; + } + + return 0; +} + +static int intel_bts_setup_queues(struct intel_bts *bts) +{ + unsigned int i; + int ret; + + for (i = 0; i < bts->queues.nr_queues; i++) { + ret = intel_bts_setup_queue(bts, &bts->queues.queue_array[i], + i); + if (ret) + return ret; + } + return 0; +} + +static inline int intel_bts_update_queues(struct intel_bts *bts) +{ + if (bts->queues.new_data) { + bts->queues.new_data = false; + return intel_bts_setup_queues(bts); + } + return 0; +} + +static unsigned char *intel_bts_find_overlap(unsigned char *buf_a, size_t len_a, + unsigned char *buf_b, size_t len_b) +{ + size_t offs, len; + + if (len_a > len_b) + offs = len_a - len_b; + else + offs = 0; + + for (; offs < len_a; offs += sizeof(struct branch)) { + len = len_a - offs; + if (!memcmp(buf_a + offs, buf_b, len)) + return buf_b + len; + } + + return buf_b; +} + +static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue, + struct auxtrace_buffer *b) +{ + struct auxtrace_buffer *a; + void *start; + + if (b->list.prev == &queue->head) + return 0; + a = list_entry(b->list.prev, struct auxtrace_buffer, list); + start = intel_bts_find_overlap(a->data, a->size, b->data, b->size); + if (!start) + return -EINVAL; + b->use_size = b->data + b->size - start; + b->use_data = start; + return 0; +} + +static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, + struct branch *branch) +{ + int ret; + struct intel_bts *bts = btsq->bts; + union perf_event event; + struct perf_sample sample = { .ip = 0, }; + + event.sample.header.type = PERF_RECORD_SAMPLE; + event.sample.header.misc = PERF_RECORD_MISC_USER; + event.sample.header.size = sizeof(struct perf_event_header); + + sample.ip = le64_to_cpu(branch->from); + sample.pid = btsq->pid; + sample.tid = btsq->tid; + sample.addr = le64_to_cpu(branch->to); + sample.id = btsq->bts->branches_id; + sample.stream_id = btsq->bts->branches_id; + sample.period = 1; + sample.cpu = btsq->cpu; + sample.flags = btsq->sample_flags; + sample.insn_len = btsq->intel_pt_insn.length; + + if (bts->synth_opts.inject) { + event.sample.header.size = bts->branches_event_size; + ret = perf_event__synthesize_sample(&event, + bts->branches_sample_type, + 0, &sample, + bts->synth_needs_swap); + if (ret) + return ret; + } + + ret = perf_session__deliver_synth_event(bts->session, &event, &sample); + if (ret) + pr_err("Intel BTS: failed to deliver branch event, error %d\n", + ret); + + return ret; +} + +static int intel_bts_get_next_insn(struct intel_bts_queue *btsq, u64 ip) +{ + struct machine *machine = btsq->bts->machine; + struct thread *thread; + struct addr_location al; + unsigned char buf[1024]; + size_t bufsz; + ssize_t len; + int x86_64; + uint8_t cpumode; + int err = -1; + + bufsz = intel_pt_insn_max_size(); + + if (machine__kernel_ip(machine, ip)) + cpumode = PERF_RECORD_MISC_KERNEL; + else + cpumode = PERF_RECORD_MISC_USER; + + thread = machine__find_thread(machine, -1, btsq->tid); + if (!thread) + return -1; + + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al); + if (!al.map || !al.map->dso) + goto out_put; + + len = dso__data_read_addr(al.map->dso, al.map, machine, ip, buf, bufsz); + if (len <= 0) + goto out_put; + + /* Load maps to ensure dso->is_64_bit has been updated */ + map__load(al.map, machine->symbol_filter); + + x86_64 = al.map->dso->is_64_bit; + + if (intel_pt_get_insn(buf, len, x86_64, &btsq->intel_pt_insn)) + goto out_put; + + err = 0; +out_put: + thread__put(thread); + return err; +} + +static int intel_bts_synth_error(struct intel_bts *bts, int cpu, pid_t pid, + pid_t tid, u64 ip) +{ + union perf_event event; + int err; + + auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, + INTEL_BTS_ERR_NOINSN, cpu, pid, tid, ip, + "Failed to get instruction"); + + err = perf_session__deliver_synth_event(bts->session, &event, NULL); + if (err) + pr_err("Intel BTS: failed to deliver error event, error %d\n", + err); + + return err; +} + +static int intel_bts_get_branch_type(struct intel_bts_queue *btsq, + struct branch *branch) +{ + int err; + + if (!branch->from) { + if (branch->to) + btsq->sample_flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_TRACE_BEGIN; + else + btsq->sample_flags = 0; + btsq->intel_pt_insn.length = 0; + } else if (!branch->to) { + btsq->sample_flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_TRACE_END; + btsq->intel_pt_insn.length = 0; + } else { + err = intel_bts_get_next_insn(btsq, branch->from); + if (err) { + btsq->sample_flags = 0; + btsq->intel_pt_insn.length = 0; + if (!btsq->bts->synth_opts.errors) + return 0; + err = intel_bts_synth_error(btsq->bts, btsq->cpu, + btsq->pid, btsq->tid, + branch->from); + return err; + } + btsq->sample_flags = intel_pt_insn_type(btsq->intel_pt_insn.op); + /* Check for an async branch into the kernel */ + if (!machine__kernel_ip(btsq->bts->machine, branch->from) && + machine__kernel_ip(btsq->bts->machine, branch->to) && + btsq->sample_flags != (PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_CALL | + PERF_IP_FLAG_SYSCALLRET)) + btsq->sample_flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_CALL | + PERF_IP_FLAG_ASYNC | + PERF_IP_FLAG_INTERRUPT; + } + + return 0; +} + +static int intel_bts_process_buffer(struct intel_bts_queue *btsq, + struct auxtrace_buffer *buffer) +{ + struct branch *branch; + size_t sz, bsz = sizeof(struct branch); + u32 filter = btsq->bts->branches_filter; + int err = 0; + + if (buffer->use_data) { + sz = buffer->use_size; + branch = buffer->use_data; + } else { + sz = buffer->size; + branch = buffer->data; + } + + if (!btsq->bts->sample_branches) + return 0; + + for (; sz > bsz; branch += 1, sz -= bsz) { + if (!branch->from && !branch->to) + continue; + intel_bts_get_branch_type(btsq, branch); + if (filter && !(filter & btsq->sample_flags)) + continue; + err = intel_bts_synth_branch_sample(btsq, branch); + if (err) + break; + } + return err; +} + +static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp) +{ + struct auxtrace_buffer *buffer = btsq->buffer, *old_buffer = buffer; + struct auxtrace_queue *queue; + struct thread *thread; + int err; + + if (btsq->done) + return 1; + + if (btsq->pid == -1) { + thread = machine__find_thread(btsq->bts->machine, -1, + btsq->tid); + if (thread) + btsq->pid = thread->pid_; + } else { + thread = machine__findnew_thread(btsq->bts->machine, btsq->pid, + btsq->tid); + } + + queue = &btsq->bts->queues.queue_array[btsq->queue_nr]; + + if (!buffer) + buffer = auxtrace_buffer__next(queue, NULL); + + if (!buffer) { + if (!btsq->bts->sampling_mode) + btsq->done = 1; + err = 1; + goto out_put; + } + + /* Currently there is no support for split buffers */ + if (buffer->consecutive) { + err = -EINVAL; + goto out_put; + } + + if (!buffer->data) { + int fd = perf_data_file__fd(btsq->bts->session->file); + + buffer->data = auxtrace_buffer__get_data(buffer, fd); + if (!buffer->data) { + err = -ENOMEM; + goto out_put; + } + } + + if (btsq->bts->snapshot_mode && !buffer->consecutive && + intel_bts_do_fix_overlap(queue, buffer)) { + err = -ENOMEM; + goto out_put; + } + + if (!btsq->bts->synth_opts.callchain && thread && + (!old_buffer || btsq->bts->sampling_mode || + (btsq->bts->snapshot_mode && !buffer->consecutive))) + thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1); + + err = intel_bts_process_buffer(btsq, buffer); + + auxtrace_buffer__drop_data(buffer); + + btsq->buffer = auxtrace_buffer__next(queue, buffer); + if (btsq->buffer) { + if (timestamp) + *timestamp = btsq->buffer->reference; + } else { + if (!btsq->bts->sampling_mode) + btsq->done = 1; + } +out_put: + thread__put(thread); + return err; +} + +static int intel_bts_flush_queue(struct intel_bts_queue *btsq) +{ + u64 ts = 0; + int ret; + + while (1) { + ret = intel_bts_process_queue(btsq, &ts); + if (ret < 0) + return ret; + if (ret) + break; + } + return 0; +} + +static int intel_bts_process_tid_exit(struct intel_bts *bts, pid_t tid) +{ + struct auxtrace_queues *queues = &bts->queues; + unsigned int i; + + for (i = 0; i < queues->nr_queues; i++) { + struct auxtrace_queue *queue = &bts->queues.queue_array[i]; + struct intel_bts_queue *btsq = queue->priv; + + if (btsq && btsq->tid == tid) + return intel_bts_flush_queue(btsq); + } + return 0; +} + +static int intel_bts_process_queues(struct intel_bts *bts, u64 timestamp) +{ + while (1) { + unsigned int queue_nr; + struct auxtrace_queue *queue; + struct intel_bts_queue *btsq; + u64 ts = 0; + int ret; + + if (!bts->heap.heap_cnt) + return 0; + + if (bts->heap.heap_array[0].ordinal > timestamp) + return 0; + + queue_nr = bts->heap.heap_array[0].queue_nr; + queue = &bts->queues.queue_array[queue_nr]; + btsq = queue->priv; + + auxtrace_heap__pop(&bts->heap); + + ret = intel_bts_process_queue(btsq, &ts); + if (ret < 0) { + auxtrace_heap__add(&bts->heap, queue_nr, ts); + return ret; + } + + if (!ret) { + ret = auxtrace_heap__add(&bts->heap, queue_nr, ts); + if (ret < 0) + return ret; + } else { + btsq->on_heap = false; + } + } + + return 0; +} + +static int intel_bts_process_event(struct perf_session *session, + union perf_event *event, + struct perf_sample *sample, + struct perf_tool *tool) +{ + struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, + auxtrace); + u64 timestamp; + int err; + + if (dump_trace) + return 0; + + if (!tool->ordered_events) { + pr_err("Intel BTS requires ordered events\n"); + return -EINVAL; + } + + if (sample->time && sample->time != (u64)-1) + timestamp = perf_time_to_tsc(sample->time, &bts->tc); + else + timestamp = 0; + + err = intel_bts_update_queues(bts); + if (err) + return err; + + err = intel_bts_process_queues(bts, timestamp); + if (err) + return err; + if (event->header.type == PERF_RECORD_EXIT) { + err = intel_bts_process_tid_exit(bts, event->comm.tid); + if (err) + return err; + } + + if (event->header.type == PERF_RECORD_AUX && + (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) && + bts->synth_opts.errors) + err = intel_bts_lost(bts, sample); + + return err; +} + +static int intel_bts_process_auxtrace_event(struct perf_session *session, + union perf_event *event, + struct perf_tool *tool __maybe_unused) +{ + struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, + auxtrace); + + if (bts->sampling_mode) + return 0; + + if (!bts->data_queued) { + struct auxtrace_buffer *buffer; + off_t data_offset; + int fd = perf_data_file__fd(session->file); + int err; + + if (perf_data_file__is_pipe(session->file)) { + data_offset = 0; + } else { + data_offset = lseek(fd, 0, SEEK_CUR); + if (data_offset == -1) + return -errno; + } + + err = auxtrace_queues__add_event(&bts->queues, session, event, + data_offset, &buffer); + if (err) + return err; + + /* Dump here now we have copied a piped trace out of the pipe */ + if (dump_trace) { + if (auxtrace_buffer__get_data(buffer, fd)) { + intel_bts_dump_event(bts, buffer->data, + buffer->size); + auxtrace_buffer__put_data(buffer); + } + } + } + + return 0; +} + +static int intel_bts_flush(struct perf_session *session __maybe_unused, + struct perf_tool *tool __maybe_unused) +{ + struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, + auxtrace); + int ret; + + if (dump_trace || bts->sampling_mode) + return 0; + + if (!tool->ordered_events) + return -EINVAL; + + ret = intel_bts_update_queues(bts); + if (ret < 0) + return ret; + + return intel_bts_process_queues(bts, MAX_TIMESTAMP); +} + +static void intel_bts_free_queue(void *priv) +{ + struct intel_bts_queue *btsq = priv; + + if (!btsq) + return; + free(btsq); +} + +static void intel_bts_free_events(struct perf_session *session) +{ + struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, + auxtrace); + struct auxtrace_queues *queues = &bts->queues; + unsigned int i; + + for (i = 0; i < queues->nr_queues; i++) { + intel_bts_free_queue(queues->queue_array[i].priv); + queues->queue_array[i].priv = NULL; + } + auxtrace_queues__free(queues); +} + +static void intel_bts_free(struct perf_session *session) +{ + struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, + auxtrace); + + auxtrace_heap__free(&bts->heap); + intel_bts_free_events(session); + session->auxtrace = NULL; + free(bts); +} + +struct intel_bts_synth { + struct perf_tool dummy_tool; + struct perf_session *session; +}; + +static int intel_bts_event_synth(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + struct intel_bts_synth *intel_bts_synth = + container_of(tool, struct intel_bts_synth, dummy_tool); + + return perf_session__deliver_synth_event(intel_bts_synth->session, + event, NULL); +} + +static int intel_bts_synth_event(struct perf_session *session, + struct perf_event_attr *attr, u64 id) +{ + struct intel_bts_synth intel_bts_synth; + + memset(&intel_bts_synth, 0, sizeof(struct intel_bts_synth)); + intel_bts_synth.session = session; + + return perf_event__synthesize_attr(&intel_bts_synth.dummy_tool, attr, 1, + &id, intel_bts_event_synth); +} + +static int intel_bts_synth_events(struct intel_bts *bts, + struct perf_session *session) +{ + struct perf_evlist *evlist = session->evlist; + struct perf_evsel *evsel; + struct perf_event_attr attr; + bool found = false; + u64 id; + int err; + + evlist__for_each(evlist, evsel) { + if (evsel->attr.type == bts->pmu_type && evsel->ids) { + found = true; + break; + } + } + + if (!found) { + pr_debug("There are no selected events with Intel BTS data\n"); + return 0; + } + + memset(&attr, 0, sizeof(struct perf_event_attr)); + attr.size = sizeof(struct perf_event_attr); + attr.type = PERF_TYPE_HARDWARE; + attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK; + attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | + PERF_SAMPLE_PERIOD; + attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; + attr.sample_type &= ~(u64)PERF_SAMPLE_CPU; + attr.exclude_user = evsel->attr.exclude_user; + attr.exclude_kernel = evsel->attr.exclude_kernel; + attr.exclude_hv = evsel->attr.exclude_hv; + attr.exclude_host = evsel->attr.exclude_host; + attr.exclude_guest = evsel->attr.exclude_guest; + attr.sample_id_all = evsel->attr.sample_id_all; + attr.read_format = evsel->attr.read_format; + + id = evsel->id[0] + 1000000000; + if (!id) + id = 1; + + if (bts->synth_opts.branches) { + attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; + attr.sample_period = 1; + attr.sample_type |= PERF_SAMPLE_ADDR; + pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n", + id, (u64)attr.sample_type); + err = intel_bts_synth_event(session, &attr, id); + if (err) { + pr_err("%s: failed to synthesize 'branches' event type\n", + __func__); + return err; + } + bts->sample_branches = true; + bts->branches_sample_type = attr.sample_type; + bts->branches_id = id; + /* + * We only use sample types from PERF_SAMPLE_MASK so we can use + * __perf_evsel__sample_size() here. + */ + bts->branches_event_size = sizeof(struct sample_event) + + __perf_evsel__sample_size(attr.sample_type); + } + + bts->synth_needs_swap = evsel->needs_swap; + + return 0; +} + +static const char * const intel_bts_info_fmts[] = { + [INTEL_BTS_PMU_TYPE] = " PMU Type %"PRId64"\n", + [INTEL_BTS_TIME_SHIFT] = " Time Shift %"PRIu64"\n", + [INTEL_BTS_TIME_MULT] = " Time Muliplier %"PRIu64"\n", + [INTEL_BTS_TIME_ZERO] = " Time Zero %"PRIu64"\n", + [INTEL_BTS_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n", + [INTEL_BTS_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n", +}; + +static void intel_bts_print_info(u64 *arr, int start, int finish) +{ + int i; + + if (!dump_trace) + return; + + for (i = start; i <= finish; i++) + fprintf(stdout, intel_bts_info_fmts[i], arr[i]); +} + +u64 intel_bts_auxtrace_info_priv[INTEL_BTS_AUXTRACE_PRIV_SIZE]; + +int intel_bts_process_auxtrace_info(union perf_event *event, + struct perf_session *session) +{ + struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; + size_t min_sz = sizeof(u64) * INTEL_BTS_SNAPSHOT_MODE; + struct intel_bts *bts; + int err; + + if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) + + min_sz) + return -EINVAL; + + bts = zalloc(sizeof(struct intel_bts)); + if (!bts) + return -ENOMEM; + + err = auxtrace_queues__init(&bts->queues); + if (err) + goto err_free; + + bts->session = session; + bts->machine = &session->machines.host; /* No kvm support */ + bts->auxtrace_type = auxtrace_info->type; + bts->pmu_type = auxtrace_info->priv[INTEL_BTS_PMU_TYPE]; + bts->tc.time_shift = auxtrace_info->priv[INTEL_BTS_TIME_SHIFT]; + bts->tc.time_mult = auxtrace_info->priv[INTEL_BTS_TIME_MULT]; + bts->tc.time_zero = auxtrace_info->priv[INTEL_BTS_TIME_ZERO]; + bts->cap_user_time_zero = + auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO]; + bts->snapshot_mode = auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE]; + + bts->sampling_mode = false; + + bts->auxtrace.process_event = intel_bts_process_event; + bts->auxtrace.process_auxtrace_event = intel_bts_process_auxtrace_event; + bts->auxtrace.flush_events = intel_bts_flush; + bts->auxtrace.free_events = intel_bts_free_events; + bts->auxtrace.free = intel_bts_free; + session->auxtrace = &bts->auxtrace; + + intel_bts_print_info(&auxtrace_info->priv[0], INTEL_BTS_PMU_TYPE, + INTEL_BTS_SNAPSHOT_MODE); + + if (dump_trace) + return 0; + + if (session->itrace_synth_opts && session->itrace_synth_opts->set) + bts->synth_opts = *session->itrace_synth_opts; + else + itrace_synth_opts__set_default(&bts->synth_opts); + + if (bts->synth_opts.calls) + bts->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | + PERF_IP_FLAG_TRACE_END; + if (bts->synth_opts.returns) + bts->branches_filter |= PERF_IP_FLAG_RETURN | + PERF_IP_FLAG_TRACE_BEGIN; + + err = intel_bts_synth_events(bts, session); + if (err) + goto err_free_queues; + + err = auxtrace_queues__process_index(&bts->queues, session); + if (err) + goto err_free_queues; + + if (bts->queues.populated) + bts->data_queued = true; + + return 0; + +err_free_queues: + auxtrace_queues__free(&bts->queues); + session->auxtrace = NULL; +err_free: + free(bts); + return err; +} diff --git a/tools/perf/util/intel-bts.h b/tools/perf/util/intel-bts.h new file mode 100644 index 000000000000..ca65e21b3e83 --- /dev/null +++ b/tools/perf/util/intel-bts.h @@ -0,0 +1,43 @@ +/* + * intel-bts.h: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef INCLUDE__PERF_INTEL_BTS_H__ +#define INCLUDE__PERF_INTEL_BTS_H__ + +#define INTEL_BTS_PMU_NAME "intel_bts" + +enum { + INTEL_BTS_PMU_TYPE, + INTEL_BTS_TIME_SHIFT, + INTEL_BTS_TIME_MULT, + INTEL_BTS_TIME_ZERO, + INTEL_BTS_CAP_USER_TIME_ZERO, + INTEL_BTS_SNAPSHOT_MODE, + INTEL_BTS_AUXTRACE_PRIV_MAX, +}; + +#define INTEL_BTS_AUXTRACE_PRIV_SIZE (INTEL_BTS_AUXTRACE_PRIV_MAX * sizeof(u64)) + +struct auxtrace_record; +struct perf_tool; +union perf_event; +struct perf_session; + +struct auxtrace_record *intel_bts_recording_init(int *err); + +int intel_bts_process_auxtrace_info(union perf_event *event, + struct perf_session *session); + +#endif diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build new file mode 100644 index 000000000000..240730d682c1 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/Build @@ -0,0 +1,11 @@ +libperf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o + +inat_tables_script = util/intel-pt-decoder/gen-insn-attr-x86.awk +inat_tables_maps = util/intel-pt-decoder/x86-opcode-map.txt + +$(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) + @$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@ + +$(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c + +CFLAGS_intel-pt-insn-decoder.o += -I$(OUTPUT)util/intel-pt-decoder -Wno-override-init diff --git a/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk new file mode 100644 index 000000000000..517567347aac --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk @@ -0,0 +1,386 @@ +#!/bin/awk -f +# gen-insn-attr-x86.awk: Instruction attribute table generator +# Written by Masami Hiramatsu <mhiramat@redhat.com> +# +# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c + +# Awk implementation sanity check +function check_awk_implement() { + if (sprintf("%x", 0) != "0") + return "Your awk has a printf-format problem." + return "" +} + +# Clear working vars +function clear_vars() { + delete table + delete lptable2 + delete lptable1 + delete lptable3 + eid = -1 # escape id + gid = -1 # group id + aid = -1 # AVX id + tname = "" +} + +BEGIN { + # Implementation error checking + awkchecked = check_awk_implement() + if (awkchecked != "") { + print "Error: " awkchecked > "/dev/stderr" + print "Please try to use gawk." > "/dev/stderr" + exit 1 + } + + # Setup generating tables + print "/* x86 opcode map generated from x86-opcode-map.txt */" + print "/* Do not change this code. */\n" + ggid = 1 + geid = 1 + gaid = 0 + delete etable + delete gtable + delete atable + + opnd_expr = "^[A-Za-z/]" + ext_expr = "^\\(" + sep_expr = "^\\|$" + group_expr = "^Grp[0-9A-Za-z]+" + + imm_expr = "^[IJAOL][a-z]" + imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" + imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)" + imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)" + imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)" + imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" + imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" + imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" + imm_flag["Ob"] = "INAT_MOFFSET" + imm_flag["Ov"] = "INAT_MOFFSET" + imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + + modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" + force64_expr = "\\([df]64\\)" + rex_expr = "^REX(\\.[XRWB]+)*" + fpu_expr = "^ESC" # TODO + + lprefix1_expr = "\\((66|!F3)\\)" + lprefix2_expr = "\\(F3\\)" + lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)" + lprefix_expr = "\\((66|F2|F3)\\)" + max_lprefix = 4 + + # All opcodes starting with lower-case 'v' or with (v1) superscript + # accepts VEX prefix + vexok_opcode_expr = "^v.*" + vexok_expr = "\\(v1\\)" + # All opcodes with (v) superscript supports *only* VEX prefix + vexonly_expr = "\\(v\\)" + + prefix_expr = "\\(Prefix\\)" + prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" + prefix_num["REPNE"] = "INAT_PFX_REPNE" + prefix_num["REP/REPE"] = "INAT_PFX_REPE" + prefix_num["XACQUIRE"] = "INAT_PFX_REPNE" + prefix_num["XRELEASE"] = "INAT_PFX_REPE" + prefix_num["LOCK"] = "INAT_PFX_LOCK" + prefix_num["SEG=CS"] = "INAT_PFX_CS" + prefix_num["SEG=DS"] = "INAT_PFX_DS" + prefix_num["SEG=ES"] = "INAT_PFX_ES" + prefix_num["SEG=FS"] = "INAT_PFX_FS" + prefix_num["SEG=GS"] = "INAT_PFX_GS" + prefix_num["SEG=SS"] = "INAT_PFX_SS" + prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" + prefix_num["VEX+1byte"] = "INAT_PFX_VEX2" + prefix_num["VEX+2byte"] = "INAT_PFX_VEX3" + + clear_vars() +} + +function semantic_error(msg) { + print "Semantic error at " NR ": " msg > "/dev/stderr" + exit 1 +} + +function debug(msg) { + print "DEBUG: " msg +} + +function array_size(arr, i,c) { + c = 0 + for (i in arr) + c++ + return c +} + +/^Table:/ { + print "/* " $0 " */" + if (tname != "") + semantic_error("Hit Table: before EndTable:."); +} + +/^Referrer:/ { + if (NF != 1) { + # escape opcode table + ref = "" + for (i = 2; i <= NF; i++) + ref = ref $i + eid = escape[ref] + tname = sprintf("inat_escape_table_%d", eid) + } +} + +/^AVXcode:/ { + if (NF != 1) { + # AVX/escape opcode table + aid = $2 + if (gaid <= aid) + gaid = aid + 1 + if (tname == "") # AVX only opcode table + tname = sprintf("inat_avx_table_%d", $2) + } + if (aid == -1 && eid == -1) # primary opcode table + tname = "inat_primary_table" +} + +/^GrpTable:/ { + print "/* " $0 " */" + if (!($2 in group)) + semantic_error("No group: " $2 ) + gid = group[$2] + tname = "inat_group_table_" gid +} + +function print_table(tbl,name,fmt,n) +{ + print "const insn_attr_t " name " = {" + for (i = 0; i < n; i++) { + id = sprintf(fmt, i) + if (tbl[id]) + print " [" id "] = " tbl[id] "," + } + print "};" +} + +/^EndTable/ { + if (gid != -1) { + # print group tables + if (array_size(table) != 0) { + print_table(table, tname "[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,0] = tname + } + if (array_size(lptable1) != 0) { + print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,1] = tname "_1" + } + if (array_size(lptable2) != 0) { + print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,2] = tname "_2" + } + if (array_size(lptable3) != 0) { + print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,3] = tname "_3" + } + } else { + # print primary/escaped tables + if (array_size(table) != 0) { + print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,0] = tname + if (aid >= 0) + atable[aid,0] = tname + } + if (array_size(lptable1) != 0) { + print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,1] = tname "_1" + if (aid >= 0) + atable[aid,1] = tname "_1" + } + if (array_size(lptable2) != 0) { + print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,2] = tname "_2" + if (aid >= 0) + atable[aid,2] = tname "_2" + } + if (array_size(lptable3) != 0) { + print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,3] = tname "_3" + if (aid >= 0) + atable[aid,3] = tname "_3" + } + } + print "" + clear_vars() +} + +function add_flags(old,new) { + if (old && new) + return old " | " new + else if (old) + return old + else + return new +} + +# convert operands to flags. +function convert_operands(count,opnd, i,j,imm,mod) +{ + imm = null + mod = null + for (j = 1; j <= count; j++) { + i = opnd[j] + if (match(i, imm_expr) == 1) { + if (!imm_flag[i]) + semantic_error("Unknown imm opnd: " i) + if (imm) { + if (i != "Ib") + semantic_error("Second IMM error") + imm = add_flags(imm, "INAT_SCNDIMM") + } else + imm = imm_flag[i] + } else if (match(i, modrm_expr)) + mod = "INAT_MODRM" + } + return add_flags(imm, mod) +} + +/^[0-9a-f]+\:/ { + if (NR == 1) + next + # get index + idx = "0x" substr($1, 1, index($1,":") - 1) + if (idx in table) + semantic_error("Redefine " idx " in " tname) + + # check if escaped opcode + if ("escape" == $2) { + if ($3 != "#") + semantic_error("No escaped name") + ref = "" + for (i = 4; i <= NF; i++) + ref = ref $i + if (ref in escape) + semantic_error("Redefine escape (" ref ")") + escape[ref] = geid + geid++ + table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")" + next + } + + variant = null + # converts + i = 2 + while (i <= NF) { + opcode = $(i++) + delete opnds + ext = null + flags = null + opnd = null + # parse one opcode + if (match($i, opnd_expr)) { + opnd = $i + count = split($(i++), opnds, ",") + flags = convert_operands(count, opnds) + } + if (match($i, ext_expr)) + ext = $(i++) + if (match($i, sep_expr)) + i++ + else if (i < NF) + semantic_error($i " is not a separator") + + # check if group opcode + if (match(opcode, group_expr)) { + if (!(opcode in group)) { + group[opcode] = ggid + ggid++ + } + flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")") + } + # check force(or default) 64bit + if (match(ext, force64_expr)) + flags = add_flags(flags, "INAT_FORCE64") + + # check REX prefix + if (match(opcode, rex_expr)) + flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)") + + # check coprocessor escape : TODO + if (match(opcode, fpu_expr)) + flags = add_flags(flags, "INAT_MODRM") + + # check VEX codes + if (match(ext, vexonly_expr)) + flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") + else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr)) + flags = add_flags(flags, "INAT_VEXOK") + + # check prefixes + if (match(ext, prefix_expr)) { + if (!prefix_num[opcode]) + semantic_error("Unknown prefix: " opcode) + flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")") + } + if (length(flags) == 0) + continue + # check if last prefix + if (match(ext, lprefix1_expr)) { + lptable1[idx] = add_flags(lptable1[idx],flags) + variant = "INAT_VARIANT" + } + if (match(ext, lprefix2_expr)) { + lptable2[idx] = add_flags(lptable2[idx],flags) + variant = "INAT_VARIANT" + } + if (match(ext, lprefix3_expr)) { + lptable3[idx] = add_flags(lptable3[idx],flags) + variant = "INAT_VARIANT" + } + if (!match(ext, lprefix_expr)){ + table[idx] = add_flags(table[idx],flags) + } + } + if (variant) + table[idx] = add_flags(table[idx],variant) +} + +END { + if (awkchecked != "") + exit 1 + # print escape opcode map's array + print "/* Escape opcode map array */" + print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \ + "[INAT_LSTPFX_MAX + 1] = {" + for (i = 0; i < geid; i++) + for (j = 0; j < max_lprefix; j++) + if (etable[i,j]) + print " ["i"]["j"] = "etable[i,j]"," + print "};\n" + # print group opcode map's array + print "/* Group opcode map array */" + print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\ + "[INAT_LSTPFX_MAX + 1] = {" + for (i = 0; i < ggid; i++) + for (j = 0; j < max_lprefix; j++) + if (gtable[i,j]) + print " ["i"]["j"] = "gtable[i,j]"," + print "};\n" + # print AVX opcode map's array + print "/* AVX opcode map array */" + print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\ + "[INAT_LSTPFX_MAX + 1] = {" + for (i = 0; i < gaid; i++) + for (j = 0; j < max_lprefix; j++) + if (atable[i,j]) + print " ["i"]["j"] = "atable[i,j]"," + print "};" +} diff --git a/tools/perf/util/intel-pt-decoder/inat.c b/tools/perf/util/intel-pt-decoder/inat.c new file mode 100644 index 000000000000..906d94aa0a24 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/inat.c @@ -0,0 +1,96 @@ +/* + * x86 instruction attribute tables + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#include "insn.h" + +/* Attribute tables are generated from opcode map */ +#include "inat-tables.c" + +/* Attribute search APIs */ +insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) +{ + return inat_primary_table[opcode]; +} + +int inat_get_last_prefix_id(insn_byte_t last_pfx) +{ + insn_attr_t lpfx_attr; + + lpfx_attr = inat_get_opcode_attribute(last_pfx); + return inat_last_prefix_id(lpfx_attr); +} + +insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id, + insn_attr_t esc_attr) +{ + const insn_attr_t *table; + int n; + + n = inat_escape_id(esc_attr); + + table = inat_escape_tables[n][0]; + if (!table) + return 0; + if (inat_has_variant(table[opcode]) && lpfx_id) { + table = inat_escape_tables[n][lpfx_id]; + if (!table) + return 0; + } + return table[opcode]; +} + +insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id, + insn_attr_t grp_attr) +{ + const insn_attr_t *table; + int n; + + n = inat_group_id(grp_attr); + + table = inat_group_tables[n][0]; + if (!table) + return inat_group_common_attribute(grp_attr); + if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) { + table = inat_group_tables[n][lpfx_id]; + if (!table) + return inat_group_common_attribute(grp_attr); + } + return table[X86_MODRM_REG(modrm)] | + inat_group_common_attribute(grp_attr); +} + +insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, + insn_byte_t vex_p) +{ + const insn_attr_t *table; + if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) + return 0; + /* At first, this checks the master table */ + table = inat_avx_tables[vex_m][0]; + if (!table) + return 0; + if (!inat_is_group(table[opcode]) && vex_p) { + /* If this is not a group, get attribute directly */ + table = inat_avx_tables[vex_m][vex_p]; + if (!table) + return 0; + } + return table[opcode]; +} diff --git a/tools/perf/util/intel-pt-decoder/inat.h b/tools/perf/util/intel-pt-decoder/inat.h new file mode 100644 index 000000000000..611645e903a8 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/inat.h @@ -0,0 +1,221 @@ +#ifndef _ASM_X86_INAT_H +#define _ASM_X86_INAT_H +/* + * x86 instruction attributes + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#include "inat_types.h" + +/* + * Internal bits. Don't use bitmasks directly, because these bits are + * unstable. You should use checking functions. + */ + +#define INAT_OPCODE_TABLE_SIZE 256 +#define INAT_GROUP_TABLE_SIZE 8 + +/* Legacy last prefixes */ +#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */ +#define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */ +#define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */ +/* Other Legacy prefixes */ +#define INAT_PFX_LOCK 4 /* 0xF0 */ +#define INAT_PFX_CS 5 /* 0x2E */ +#define INAT_PFX_DS 6 /* 0x3E */ +#define INAT_PFX_ES 7 /* 0x26 */ +#define INAT_PFX_FS 8 /* 0x64 */ +#define INAT_PFX_GS 9 /* 0x65 */ +#define INAT_PFX_SS 10 /* 0x36 */ +#define INAT_PFX_ADDRSZ 11 /* 0x67 */ +/* x86-64 REX prefix */ +#define INAT_PFX_REX 12 /* 0x4X */ +/* AVX VEX prefixes */ +#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ +#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ + +#define INAT_LSTPFX_MAX 3 +#define INAT_LGCPFX_MAX 11 + +/* Immediate size */ +#define INAT_IMM_BYTE 1 +#define INAT_IMM_WORD 2 +#define INAT_IMM_DWORD 3 +#define INAT_IMM_QWORD 4 +#define INAT_IMM_PTR 5 +#define INAT_IMM_VWORD32 6 +#define INAT_IMM_VWORD 7 + +/* Legacy prefix */ +#define INAT_PFX_OFFS 0 +#define INAT_PFX_BITS 4 +#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1) +#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS) +/* Escape opcodes */ +#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS) +#define INAT_ESC_BITS 2 +#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1) +#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS) +/* Group opcodes (1-16) */ +#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS) +#define INAT_GRP_BITS 5 +#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1) +#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS) +/* Immediates */ +#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS) +#define INAT_IMM_BITS 3 +#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS) +/* Flags */ +#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS) +#define INAT_MODRM (1 << (INAT_FLAG_OFFS)) +#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1)) +#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2)) +#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3)) +#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) +#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) +#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) +/* Attribute making macros for attribute tables */ +#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) +#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) +#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM) +#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS) + +/* Attribute search APIs */ +extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); +extern int inat_get_last_prefix_id(insn_byte_t last_pfx); +extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, + int lpfx_id, + insn_attr_t esc_attr); +extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, + int lpfx_id, + insn_attr_t esc_attr); +extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, + insn_byte_t vex_m, + insn_byte_t vex_pp); + +/* Attribute checking functions */ +static inline int inat_is_legacy_prefix(insn_attr_t attr) +{ + attr &= INAT_PFX_MASK; + return attr && attr <= INAT_LGCPFX_MAX; +} + +static inline int inat_is_address_size_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ; +} + +static inline int inat_is_operand_size_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ; +} + +static inline int inat_is_rex_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_REX; +} + +static inline int inat_last_prefix_id(insn_attr_t attr) +{ + if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX) + return 0; + else + return attr & INAT_PFX_MASK; +} + +static inline int inat_is_vex_prefix(insn_attr_t attr) +{ + attr &= INAT_PFX_MASK; + return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3; +} + +static inline int inat_is_vex3_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3; +} + +static inline int inat_is_escape(insn_attr_t attr) +{ + return attr & INAT_ESC_MASK; +} + +static inline int inat_escape_id(insn_attr_t attr) +{ + return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS; +} + +static inline int inat_is_group(insn_attr_t attr) +{ + return attr & INAT_GRP_MASK; +} + +static inline int inat_group_id(insn_attr_t attr) +{ + return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS; +} + +static inline int inat_group_common_attribute(insn_attr_t attr) +{ + return attr & ~INAT_GRP_MASK; +} + +static inline int inat_has_immediate(insn_attr_t attr) +{ + return attr & INAT_IMM_MASK; +} + +static inline int inat_immediate_size(insn_attr_t attr) +{ + return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS; +} + +static inline int inat_has_modrm(insn_attr_t attr) +{ + return attr & INAT_MODRM; +} + +static inline int inat_is_force64(insn_attr_t attr) +{ + return attr & INAT_FORCE64; +} + +static inline int inat_has_second_immediate(insn_attr_t attr) +{ + return attr & INAT_SCNDIMM; +} + +static inline int inat_has_moffset(insn_attr_t attr) +{ + return attr & INAT_MOFFSET; +} + +static inline int inat_has_variant(insn_attr_t attr) +{ + return attr & INAT_VARIANT; +} + +static inline int inat_accept_vex(insn_attr_t attr) +{ + return attr & INAT_VEXOK; +} + +static inline int inat_must_vex(insn_attr_t attr) +{ + return attr & INAT_VEXONLY; +} +#endif diff --git a/tools/perf/util/intel-pt-decoder/inat_types.h b/tools/perf/util/intel-pt-decoder/inat_types.h new file mode 100644 index 000000000000..cb3c20ce39cf --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/inat_types.h @@ -0,0 +1,29 @@ +#ifndef _ASM_X86_INAT_TYPES_H +#define _ASM_X86_INAT_TYPES_H +/* + * x86 instruction attributes + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ + +/* Instruction attributes */ +typedef unsigned int insn_attr_t; +typedef unsigned char insn_byte_t; +typedef signed int insn_value_t; + +#endif diff --git a/tools/perf/util/intel-pt-decoder/insn.c b/tools/perf/util/intel-pt-decoder/insn.c new file mode 100644 index 000000000000..47314a64399c --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/insn.c @@ -0,0 +1,594 @@ +/* + * x86 instruction analysis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004, 2009 + */ + +#ifdef __KERNEL__ +#include <linux/string.h> +#else +#include <string.h> +#endif +#include "inat.h" +#include "insn.h" + +/* Verify next sizeof(t) bytes can be on the same instruction */ +#define validate_next(t, insn, n) \ + ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) + +#define __get_next(t, insn) \ + ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) + +#define __peek_nbyte_next(t, insn, n) \ + ({ t r = *(t*)((insn)->next_byte + n); r; }) + +#define get_next(t, insn) \ + ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) + +#define peek_nbyte_next(t, insn, n) \ + ({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); }) + +#define peek_next(t, insn) peek_nbyte_next(t, insn, 0) + +/** + * insn_init() - initialize struct insn + * @insn: &struct insn to be initialized + * @kaddr: address (in kernel memory) of instruction (or copy thereof) + * @x86_64: !0 for 64-bit kernel or 64-bit app + */ +void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) +{ + /* + * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid + * even if the input buffer is long enough to hold them. + */ + if (buf_len > MAX_INSN_SIZE) + buf_len = MAX_INSN_SIZE; + + memset(insn, 0, sizeof(*insn)); + insn->kaddr = kaddr; + insn->end_kaddr = kaddr + buf_len; + insn->next_byte = kaddr; + insn->x86_64 = x86_64 ? 1 : 0; + insn->opnd_bytes = 4; + if (x86_64) + insn->addr_bytes = 8; + else + insn->addr_bytes = 4; +} + +/** + * insn_get_prefixes - scan x86 instruction prefix bytes + * @insn: &struct insn containing instruction + * + * Populates the @insn->prefixes bitmap, and updates @insn->next_byte + * to point to the (first) opcode. No effect if @insn->prefixes.got + * is already set. + */ +void insn_get_prefixes(struct insn *insn) +{ + struct insn_field *prefixes = &insn->prefixes; + insn_attr_t attr; + insn_byte_t b, lb; + int i, nb; + + if (prefixes->got) + return; + + nb = 0; + lb = 0; + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + while (inat_is_legacy_prefix(attr)) { + /* Skip if same prefix */ + for (i = 0; i < nb; i++) + if (prefixes->bytes[i] == b) + goto found; + if (nb == 4) + /* Invalid instruction */ + break; + prefixes->bytes[nb++] = b; + if (inat_is_address_size_prefix(attr)) { + /* address size switches 2/4 or 4/8 */ + if (insn->x86_64) + insn->addr_bytes ^= 12; + else + insn->addr_bytes ^= 6; + } else if (inat_is_operand_size_prefix(attr)) { + /* oprand size switches 2/4 */ + insn->opnd_bytes ^= 6; + } +found: + prefixes->nbytes++; + insn->next_byte++; + lb = b; + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + } + /* Set the last prefix */ + if (lb && lb != insn->prefixes.bytes[3]) { + if (unlikely(insn->prefixes.bytes[3])) { + /* Swap the last prefix */ + b = insn->prefixes.bytes[3]; + for (i = 0; i < nb; i++) + if (prefixes->bytes[i] == lb) + prefixes->bytes[i] = b; + } + insn->prefixes.bytes[3] = lb; + } + + /* Decode REX prefix */ + if (insn->x86_64) { + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + if (inat_is_rex_prefix(attr)) { + insn->rex_prefix.value = b; + insn->rex_prefix.nbytes = 1; + insn->next_byte++; + if (X86_REX_W(b)) + /* REX.W overrides opnd_size */ + insn->opnd_bytes = 8; + } + } + insn->rex_prefix.got = 1; + + /* Decode VEX prefix */ + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + if (inat_is_vex_prefix(attr)) { + insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1); + if (!insn->x86_64) { + /* + * In 32-bits mode, if the [7:6] bits (mod bits of + * ModRM) on the second byte are not 11b, it is + * LDS or LES. + */ + if (X86_MODRM_MOD(b2) != 3) + goto vex_end; + } + insn->vex_prefix.bytes[0] = b; + insn->vex_prefix.bytes[1] = b2; + if (inat_is_vex3_prefix(attr)) { + b2 = peek_nbyte_next(insn_byte_t, insn, 2); + insn->vex_prefix.bytes[2] = b2; + insn->vex_prefix.nbytes = 3; + insn->next_byte += 3; + if (insn->x86_64 && X86_VEX_W(b2)) + /* VEX.W overrides opnd_size */ + insn->opnd_bytes = 8; + } else { + /* + * For VEX2, fake VEX3-like byte#2. + * Makes it easier to decode vex.W, vex.vvvv, + * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0. + */ + insn->vex_prefix.bytes[2] = b2 & 0x7f; + insn->vex_prefix.nbytes = 2; + insn->next_byte += 2; + } + } +vex_end: + insn->vex_prefix.got = 1; + + prefixes->got = 1; + +err_out: + return; +} + +/** + * insn_get_opcode - collect opcode(s) + * @insn: &struct insn containing instruction + * + * Populates @insn->opcode, updates @insn->next_byte to point past the + * opcode byte(s), and set @insn->attr (except for groups). + * If necessary, first collects any preceding (prefix) bytes. + * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got + * is already 1. + */ +void insn_get_opcode(struct insn *insn) +{ + struct insn_field *opcode = &insn->opcode; + insn_byte_t op; + int pfx_id; + if (opcode->got) + return; + if (!insn->prefixes.got) + insn_get_prefixes(insn); + + /* Get first opcode */ + op = get_next(insn_byte_t, insn); + opcode->bytes[0] = op; + opcode->nbytes = 1; + + /* Check if there is VEX prefix or not */ + if (insn_is_avx(insn)) { + insn_byte_t m, p; + m = insn_vex_m_bits(insn); + p = insn_vex_p_bits(insn); + insn->attr = inat_get_avx_attribute(op, m, p); + if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr)) + insn->attr = 0; /* This instruction is bad */ + goto end; /* VEX has only 1 byte for opcode */ + } + + insn->attr = inat_get_opcode_attribute(op); + while (inat_is_escape(insn->attr)) { + /* Get escaped opcode */ + op = get_next(insn_byte_t, insn); + opcode->bytes[opcode->nbytes++] = op; + pfx_id = insn_last_prefix_id(insn); + insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); + } + if (inat_must_vex(insn->attr)) + insn->attr = 0; /* This instruction is bad */ +end: + opcode->got = 1; + +err_out: + return; +} + +/** + * insn_get_modrm - collect ModRM byte, if any + * @insn: &struct insn containing instruction + * + * Populates @insn->modrm and updates @insn->next_byte to point past the + * ModRM byte, if any. If necessary, first collects the preceding bytes + * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. + */ +void insn_get_modrm(struct insn *insn) +{ + struct insn_field *modrm = &insn->modrm; + insn_byte_t pfx_id, mod; + if (modrm->got) + return; + if (!insn->opcode.got) + insn_get_opcode(insn); + + if (inat_has_modrm(insn->attr)) { + mod = get_next(insn_byte_t, insn); + modrm->value = mod; + modrm->nbytes = 1; + if (inat_is_group(insn->attr)) { + pfx_id = insn_last_prefix_id(insn); + insn->attr = inat_get_group_attribute(mod, pfx_id, + insn->attr); + if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) + insn->attr = 0; /* This is bad */ + } + } + + if (insn->x86_64 && inat_is_force64(insn->attr)) + insn->opnd_bytes = 8; + modrm->got = 1; + +err_out: + return; +} + + +/** + * insn_rip_relative() - Does instruction use RIP-relative addressing mode? + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * ModRM byte. No effect if @insn->x86_64 is 0. + */ +int insn_rip_relative(struct insn *insn) +{ + struct insn_field *modrm = &insn->modrm; + + if (!insn->x86_64) + return 0; + if (!modrm->got) + insn_get_modrm(insn); + /* + * For rip-relative instructions, the mod field (top 2 bits) + * is zero and the r/m field (bottom 3 bits) is 0x5. + */ + return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); +} + +/** + * insn_get_sib() - Get the SIB byte of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * ModRM byte. + */ +void insn_get_sib(struct insn *insn) +{ + insn_byte_t modrm; + + if (insn->sib.got) + return; + if (!insn->modrm.got) + insn_get_modrm(insn); + if (insn->modrm.nbytes) { + modrm = (insn_byte_t)insn->modrm.value; + if (insn->addr_bytes != 2 && + X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { + insn->sib.value = get_next(insn_byte_t, insn); + insn->sib.nbytes = 1; + } + } + insn->sib.got = 1; + +err_out: + return; +} + + +/** + * insn_get_displacement() - Get the displacement of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * SIB byte. + * Displacement value is sign-expanded. + */ +void insn_get_displacement(struct insn *insn) +{ + insn_byte_t mod, rm, base; + + if (insn->displacement.got) + return; + if (!insn->sib.got) + insn_get_sib(insn); + if (insn->modrm.nbytes) { + /* + * Interpreting the modrm byte: + * mod = 00 - no displacement fields (exceptions below) + * mod = 01 - 1-byte displacement field + * mod = 10 - displacement field is 4 bytes, or 2 bytes if + * address size = 2 (0x67 prefix in 32-bit mode) + * mod = 11 - no memory operand + * + * If address size = 2... + * mod = 00, r/m = 110 - displacement field is 2 bytes + * + * If address size != 2... + * mod != 11, r/m = 100 - SIB byte exists + * mod = 00, SIB base = 101 - displacement field is 4 bytes + * mod = 00, r/m = 101 - rip-relative addressing, displacement + * field is 4 bytes + */ + mod = X86_MODRM_MOD(insn->modrm.value); + rm = X86_MODRM_RM(insn->modrm.value); + base = X86_SIB_BASE(insn->sib.value); + if (mod == 3) + goto out; + if (mod == 1) { + insn->displacement.value = get_next(char, insn); + insn->displacement.nbytes = 1; + } else if (insn->addr_bytes == 2) { + if ((mod == 0 && rm == 6) || mod == 2) { + insn->displacement.value = + get_next(short, insn); + insn->displacement.nbytes = 2; + } + } else { + if ((mod == 0 && rm == 5) || mod == 2 || + (mod == 0 && base == 5)) { + insn->displacement.value = get_next(int, insn); + insn->displacement.nbytes = 4; + } + } + } +out: + insn->displacement.got = 1; + +err_out: + return; +} + +/* Decode moffset16/32/64. Return 0 if failed */ +static int __get_moffset(struct insn *insn) +{ + switch (insn->addr_bytes) { + case 2: + insn->moffset1.value = get_next(short, insn); + insn->moffset1.nbytes = 2; + break; + case 4: + insn->moffset1.value = get_next(int, insn); + insn->moffset1.nbytes = 4; + break; + case 8: + insn->moffset1.value = get_next(int, insn); + insn->moffset1.nbytes = 4; + insn->moffset2.value = get_next(int, insn); + insn->moffset2.nbytes = 4; + break; + default: /* opnd_bytes must be modified manually */ + goto err_out; + } + insn->moffset1.got = insn->moffset2.got = 1; + + return 1; + +err_out: + return 0; +} + +/* Decode imm v32(Iz). Return 0 if failed */ +static int __get_immv32(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate.value = get_next(short, insn); + insn->immediate.nbytes = 2; + break; + case 4: + case 8: + insn->immediate.value = get_next(int, insn); + insn->immediate.nbytes = 4; + break; + default: /* opnd_bytes must be modified manually */ + goto err_out; + } + + return 1; + +err_out: + return 0; +} + +/* Decode imm v64(Iv/Ov), Return 0 if failed */ +static int __get_immv(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate1.value = get_next(short, insn); + insn->immediate1.nbytes = 2; + break; + case 4: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + break; + case 8: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + insn->immediate2.value = get_next(int, insn); + insn->immediate2.nbytes = 4; + break; + default: /* opnd_bytes must be modified manually */ + goto err_out; + } + insn->immediate1.got = insn->immediate2.got = 1; + + return 1; +err_out: + return 0; +} + +/* Decode ptr16:16/32(Ap) */ +static int __get_immptr(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate1.value = get_next(short, insn); + insn->immediate1.nbytes = 2; + break; + case 4: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + break; + case 8: + /* ptr16:64 is not exist (no segment) */ + return 0; + default: /* opnd_bytes must be modified manually */ + goto err_out; + } + insn->immediate2.value = get_next(unsigned short, insn); + insn->immediate2.nbytes = 2; + insn->immediate1.got = insn->immediate2.got = 1; + + return 1; +err_out: + return 0; +} + +/** + * insn_get_immediate() - Get the immediates of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * displacement bytes. + * Basically, most of immediates are sign-expanded. Unsigned-value can be + * get by bit masking with ((1 << (nbytes * 8)) - 1) + */ +void insn_get_immediate(struct insn *insn) +{ + if (insn->immediate.got) + return; + if (!insn->displacement.got) + insn_get_displacement(insn); + + if (inat_has_moffset(insn->attr)) { + if (!__get_moffset(insn)) + goto err_out; + goto done; + } + + if (!inat_has_immediate(insn->attr)) + /* no immediates */ + goto done; + + switch (inat_immediate_size(insn->attr)) { + case INAT_IMM_BYTE: + insn->immediate.value = get_next(char, insn); + insn->immediate.nbytes = 1; + break; + case INAT_IMM_WORD: + insn->immediate.value = get_next(short, insn); + insn->immediate.nbytes = 2; + break; + case INAT_IMM_DWORD: + insn->immediate.value = get_next(int, insn); + insn->immediate.nbytes = 4; + break; + case INAT_IMM_QWORD: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + insn->immediate2.value = get_next(int, insn); + insn->immediate2.nbytes = 4; + break; + case INAT_IMM_PTR: + if (!__get_immptr(insn)) + goto err_out; + break; + case INAT_IMM_VWORD32: + if (!__get_immv32(insn)) + goto err_out; + break; + case INAT_IMM_VWORD: + if (!__get_immv(insn)) + goto err_out; + break; + default: + /* Here, insn must have an immediate, but failed */ + goto err_out; + } + if (inat_has_second_immediate(insn->attr)) { + insn->immediate2.value = get_next(char, insn); + insn->immediate2.nbytes = 1; + } +done: + insn->immediate.got = 1; + +err_out: + return; +} + +/** + * insn_get_length() - Get the length of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * immediates bytes. + */ +void insn_get_length(struct insn *insn) +{ + if (insn->length) + return; + if (!insn->immediate.got) + insn_get_immediate(insn); + insn->length = (unsigned char)((unsigned long)insn->next_byte + - (unsigned long)insn->kaddr); +} diff --git a/tools/perf/util/intel-pt-decoder/insn.h b/tools/perf/util/intel-pt-decoder/insn.h new file mode 100644 index 000000000000..dd12da0f4593 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/insn.h @@ -0,0 +1,201 @@ +#ifndef _ASM_X86_INSN_H +#define _ASM_X86_INSN_H +/* + * x86 instruction analysis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2009 + */ + +/* insn_attr_t is defined in inat.h */ +#include "inat.h" + +struct insn_field { + union { + insn_value_t value; + insn_byte_t bytes[4]; + }; + /* !0 if we've run insn_get_xxx() for this field */ + unsigned char got; + unsigned char nbytes; +}; + +struct insn { + struct insn_field prefixes; /* + * Prefixes + * prefixes.bytes[3]: last prefix + */ + struct insn_field rex_prefix; /* REX prefix */ + struct insn_field vex_prefix; /* VEX prefix */ + struct insn_field opcode; /* + * opcode.bytes[0]: opcode1 + * opcode.bytes[1]: opcode2 + * opcode.bytes[2]: opcode3 + */ + struct insn_field modrm; + struct insn_field sib; + struct insn_field displacement; + union { + struct insn_field immediate; + struct insn_field moffset1; /* for 64bit MOV */ + struct insn_field immediate1; /* for 64bit imm or off16/32 */ + }; + union { + struct insn_field moffset2; /* for 64bit MOV */ + struct insn_field immediate2; /* for 64bit imm or seg16 */ + }; + + insn_attr_t attr; + unsigned char opnd_bytes; + unsigned char addr_bytes; + unsigned char length; + unsigned char x86_64; + + const insn_byte_t *kaddr; /* kernel address of insn to analyze */ + const insn_byte_t *end_kaddr; /* kernel address of last insn in buffer */ + const insn_byte_t *next_byte; +}; + +#define MAX_INSN_SIZE 15 + +#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) +#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) +#define X86_MODRM_RM(modrm) ((modrm) & 0x07) + +#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6) +#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3) +#define X86_SIB_BASE(sib) ((sib) & 0x07) + +#define X86_REX_W(rex) ((rex) & 8) +#define X86_REX_R(rex) ((rex) & 4) +#define X86_REX_X(rex) ((rex) & 2) +#define X86_REX_B(rex) ((rex) & 1) + +/* VEX bit flags */ +#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */ +#define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */ +#define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */ +#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ +#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ +/* VEX bit fields */ +#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ +#define X86_VEX2_M 1 /* VEX2.M always 1 */ +#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ +#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ +#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ + +extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64); +extern void insn_get_prefixes(struct insn *insn); +extern void insn_get_opcode(struct insn *insn); +extern void insn_get_modrm(struct insn *insn); +extern void insn_get_sib(struct insn *insn); +extern void insn_get_displacement(struct insn *insn); +extern void insn_get_immediate(struct insn *insn); +extern void insn_get_length(struct insn *insn); + +/* Attribute will be determined after getting ModRM (for opcode groups) */ +static inline void insn_get_attribute(struct insn *insn) +{ + insn_get_modrm(insn); +} + +/* Instruction uses RIP-relative addressing */ +extern int insn_rip_relative(struct insn *insn); + +/* Init insn for kernel text */ +static inline void kernel_insn_init(struct insn *insn, + const void *kaddr, int buf_len) +{ +#ifdef CONFIG_X86_64 + insn_init(insn, kaddr, buf_len, 1); +#else /* CONFIG_X86_32 */ + insn_init(insn, kaddr, buf_len, 0); +#endif +} + +static inline int insn_is_avx(struct insn *insn) +{ + if (!insn->prefixes.got) + insn_get_prefixes(insn); + return (insn->vex_prefix.value != 0); +} + +/* Ensure this instruction is decoded completely */ +static inline int insn_complete(struct insn *insn) +{ + return insn->opcode.got && insn->modrm.got && insn->sib.got && + insn->displacement.got && insn->immediate.got; +} + +static inline insn_byte_t insn_vex_m_bits(struct insn *insn) +{ + if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ + return X86_VEX2_M; + else + return X86_VEX3_M(insn->vex_prefix.bytes[1]); +} + +static inline insn_byte_t insn_vex_p_bits(struct insn *insn) +{ + if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ + return X86_VEX_P(insn->vex_prefix.bytes[1]); + else + return X86_VEX_P(insn->vex_prefix.bytes[2]); +} + +/* Get the last prefix id from last prefix or VEX prefix */ +static inline int insn_last_prefix_id(struct insn *insn) +{ + if (insn_is_avx(insn)) + return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */ + + if (insn->prefixes.bytes[3]) + return inat_get_last_prefix_id(insn->prefixes.bytes[3]); + + return 0; +} + +/* Offset of each field from kaddr */ +static inline int insn_offset_rex_prefix(struct insn *insn) +{ + return insn->prefixes.nbytes; +} +static inline int insn_offset_vex_prefix(struct insn *insn) +{ + return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes; +} +static inline int insn_offset_opcode(struct insn *insn) +{ + return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes; +} +static inline int insn_offset_modrm(struct insn *insn) +{ + return insn_offset_opcode(insn) + insn->opcode.nbytes; +} +static inline int insn_offset_sib(struct insn *insn) +{ + return insn_offset_modrm(insn) + insn->modrm.nbytes; +} +static inline int insn_offset_displacement(struct insn *insn) +{ + return insn_offset_sib(insn) + insn->sib.nbytes; +} +static inline int insn_offset_immediate(struct insn *insn) +{ + return insn_offset_displacement(insn) + insn->displacement.nbytes; +} + +#endif /* _ASM_X86_INSN_H */ diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c new file mode 100644 index 000000000000..22ba50224319 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -0,0 +1,2345 @@ +/* + * intel_pt_decoder.c: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> +#include <errno.h> +#include <stdint.h> +#include <inttypes.h> + +#include "../cache.h" +#include "../util.h" + +#include "intel-pt-insn-decoder.h" +#include "intel-pt-pkt-decoder.h" +#include "intel-pt-decoder.h" +#include "intel-pt-log.h" + +#define INTEL_PT_BLK_SIZE 1024 + +#define BIT63 (((uint64_t)1 << 63)) + +#define INTEL_PT_RETURN 1 + +/* Maximum number of loops with no packets consumed i.e. stuck in a loop */ +#define INTEL_PT_MAX_LOOPS 10000 + +struct intel_pt_blk { + struct intel_pt_blk *prev; + uint64_t ip[INTEL_PT_BLK_SIZE]; +}; + +struct intel_pt_stack { + struct intel_pt_blk *blk; + struct intel_pt_blk *spare; + int pos; +}; + +enum intel_pt_pkt_state { + INTEL_PT_STATE_NO_PSB, + INTEL_PT_STATE_NO_IP, + INTEL_PT_STATE_ERR_RESYNC, + INTEL_PT_STATE_IN_SYNC, + INTEL_PT_STATE_TNT, + INTEL_PT_STATE_TIP, + INTEL_PT_STATE_TIP_PGD, + INTEL_PT_STATE_FUP, + INTEL_PT_STATE_FUP_NO_TIP, +}; + +#ifdef INTEL_PT_STRICT +#define INTEL_PT_STATE_ERR1 INTEL_PT_STATE_NO_PSB +#define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_PSB +#define INTEL_PT_STATE_ERR3 INTEL_PT_STATE_NO_PSB +#define INTEL_PT_STATE_ERR4 INTEL_PT_STATE_NO_PSB +#else +#define INTEL_PT_STATE_ERR1 (decoder->pkt_state) +#define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_IP +#define INTEL_PT_STATE_ERR3 INTEL_PT_STATE_ERR_RESYNC +#define INTEL_PT_STATE_ERR4 INTEL_PT_STATE_IN_SYNC +#endif + +struct intel_pt_decoder { + int (*get_trace)(struct intel_pt_buffer *buffer, void *data); + int (*walk_insn)(struct intel_pt_insn *intel_pt_insn, + uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, + uint64_t max_insn_cnt, void *data); + void *data; + struct intel_pt_state state; + const unsigned char *buf; + size_t len; + bool return_compression; + bool mtc_insn; + bool pge; + bool have_tma; + bool have_cyc; + uint64_t pos; + uint64_t last_ip; + uint64_t ip; + uint64_t cr3; + uint64_t timestamp; + uint64_t tsc_timestamp; + uint64_t ref_timestamp; + uint64_t ret_addr; + uint64_t ctc_timestamp; + uint64_t ctc_delta; + uint64_t cycle_cnt; + uint64_t cyc_ref_timestamp; + uint32_t last_mtc; + uint32_t tsc_ctc_ratio_n; + uint32_t tsc_ctc_ratio_d; + uint32_t tsc_ctc_mult; + uint32_t tsc_slip; + uint32_t ctc_rem_mask; + int mtc_shift; + struct intel_pt_stack stack; + enum intel_pt_pkt_state pkt_state; + struct intel_pt_pkt packet; + struct intel_pt_pkt tnt; + int pkt_step; + int pkt_len; + int last_packet_type; + unsigned int cbr; + unsigned int max_non_turbo_ratio; + double max_non_turbo_ratio_fp; + double cbr_cyc_to_tsc; + double calc_cyc_to_tsc; + bool have_calc_cyc_to_tsc; + int exec_mode; + unsigned int insn_bytes; + uint64_t sign_bit; + uint64_t sign_bits; + uint64_t period; + enum intel_pt_period_type period_type; + uint64_t tot_insn_cnt; + uint64_t period_insn_cnt; + uint64_t period_mask; + uint64_t period_ticks; + uint64_t last_masked_timestamp; + bool continuous_period; + bool overflow; + bool set_fup_tx_flags; + unsigned int fup_tx_flags; + unsigned int tx_flags; + uint64_t timestamp_insn_cnt; + uint64_t stuck_ip; + int no_progress; + int stuck_ip_prd; + int stuck_ip_cnt; + const unsigned char *next_buf; + size_t next_len; + unsigned char temp_buf[INTEL_PT_PKT_MAX_SZ]; +}; + +static uint64_t intel_pt_lower_power_of_2(uint64_t x) +{ + int i; + + for (i = 0; x != 1; i++) + x >>= 1; + + return x << i; +} + +static void intel_pt_setup_period(struct intel_pt_decoder *decoder) +{ + if (decoder->period_type == INTEL_PT_PERIOD_TICKS) { + uint64_t period; + + period = intel_pt_lower_power_of_2(decoder->period); + decoder->period_mask = ~(period - 1); + decoder->period_ticks = period; + } +} + +static uint64_t multdiv(uint64_t t, uint32_t n, uint32_t d) +{ + if (!d) + return 0; + return (t / d) * n + ((t % d) * n) / d; +} + +struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) +{ + struct intel_pt_decoder *decoder; + + if (!params->get_trace || !params->walk_insn) + return NULL; + + decoder = zalloc(sizeof(struct intel_pt_decoder)); + if (!decoder) + return NULL; + + decoder->get_trace = params->get_trace; + decoder->walk_insn = params->walk_insn; + decoder->data = params->data; + decoder->return_compression = params->return_compression; + + decoder->sign_bit = (uint64_t)1 << 47; + decoder->sign_bits = ~(((uint64_t)1 << 48) - 1); + + decoder->period = params->period; + decoder->period_type = params->period_type; + + decoder->max_non_turbo_ratio = params->max_non_turbo_ratio; + decoder->max_non_turbo_ratio_fp = params->max_non_turbo_ratio; + + intel_pt_setup_period(decoder); + + decoder->mtc_shift = params->mtc_period; + decoder->ctc_rem_mask = (1 << decoder->mtc_shift) - 1; + + decoder->tsc_ctc_ratio_n = params->tsc_ctc_ratio_n; + decoder->tsc_ctc_ratio_d = params->tsc_ctc_ratio_d; + + if (!decoder->tsc_ctc_ratio_n) + decoder->tsc_ctc_ratio_d = 0; + + if (decoder->tsc_ctc_ratio_d) { + if (!(decoder->tsc_ctc_ratio_n % decoder->tsc_ctc_ratio_d)) + decoder->tsc_ctc_mult = decoder->tsc_ctc_ratio_n / + decoder->tsc_ctc_ratio_d; + + /* + * Allow for timestamps appearing to backwards because a TSC + * packet has slipped past a MTC packet, so allow 2 MTC ticks + * or ... + */ + decoder->tsc_slip = multdiv(2 << decoder->mtc_shift, + decoder->tsc_ctc_ratio_n, + decoder->tsc_ctc_ratio_d); + } + /* ... or 0x100 paranoia */ + if (decoder->tsc_slip < 0x100) + decoder->tsc_slip = 0x100; + + intel_pt_log("timestamp: mtc_shift %u\n", decoder->mtc_shift); + intel_pt_log("timestamp: tsc_ctc_ratio_n %u\n", decoder->tsc_ctc_ratio_n); + intel_pt_log("timestamp: tsc_ctc_ratio_d %u\n", decoder->tsc_ctc_ratio_d); + intel_pt_log("timestamp: tsc_ctc_mult %u\n", decoder->tsc_ctc_mult); + intel_pt_log("timestamp: tsc_slip %#x\n", decoder->tsc_slip); + + return decoder; +} + +static void intel_pt_pop_blk(struct intel_pt_stack *stack) +{ + struct intel_pt_blk *blk = stack->blk; + + stack->blk = blk->prev; + if (!stack->spare) + stack->spare = blk; + else + free(blk); +} + +static uint64_t intel_pt_pop(struct intel_pt_stack *stack) +{ + if (!stack->pos) { + if (!stack->blk) + return 0; + intel_pt_pop_blk(stack); + if (!stack->blk) + return 0; + stack->pos = INTEL_PT_BLK_SIZE; + } + return stack->blk->ip[--stack->pos]; +} + +static int intel_pt_alloc_blk(struct intel_pt_stack *stack) +{ + struct intel_pt_blk *blk; + + if (stack->spare) { + blk = stack->spare; + stack->spare = NULL; + } else { + blk = malloc(sizeof(struct intel_pt_blk)); + if (!blk) + return -ENOMEM; + } + + blk->prev = stack->blk; + stack->blk = blk; + stack->pos = 0; + return 0; +} + +static int intel_pt_push(struct intel_pt_stack *stack, uint64_t ip) +{ + int err; + + if (!stack->blk || stack->pos == INTEL_PT_BLK_SIZE) { + err = intel_pt_alloc_blk(stack); + if (err) + return err; + } + + stack->blk->ip[stack->pos++] = ip; + return 0; +} + +static void intel_pt_clear_stack(struct intel_pt_stack *stack) +{ + while (stack->blk) + intel_pt_pop_blk(stack); + stack->pos = 0; +} + +static void intel_pt_free_stack(struct intel_pt_stack *stack) +{ + intel_pt_clear_stack(stack); + zfree(&stack->blk); + zfree(&stack->spare); +} + +void intel_pt_decoder_free(struct intel_pt_decoder *decoder) +{ + intel_pt_free_stack(&decoder->stack); + free(decoder); +} + +static int intel_pt_ext_err(int code) +{ + switch (code) { + case -ENOMEM: + return INTEL_PT_ERR_NOMEM; + case -ENOSYS: + return INTEL_PT_ERR_INTERN; + case -EBADMSG: + return INTEL_PT_ERR_BADPKT; + case -ENODATA: + return INTEL_PT_ERR_NODATA; + case -EILSEQ: + return INTEL_PT_ERR_NOINSN; + case -ENOENT: + return INTEL_PT_ERR_MISMAT; + case -EOVERFLOW: + return INTEL_PT_ERR_OVR; + case -ENOSPC: + return INTEL_PT_ERR_LOST; + case -ELOOP: + return INTEL_PT_ERR_NELOOP; + default: + return INTEL_PT_ERR_UNK; + } +} + +static const char *intel_pt_err_msgs[] = { + [INTEL_PT_ERR_NOMEM] = "Memory allocation failed", + [INTEL_PT_ERR_INTERN] = "Internal error", + [INTEL_PT_ERR_BADPKT] = "Bad packet", + [INTEL_PT_ERR_NODATA] = "No more data", + [INTEL_PT_ERR_NOINSN] = "Failed to get instruction", + [INTEL_PT_ERR_MISMAT] = "Trace doesn't match instruction", + [INTEL_PT_ERR_OVR] = "Overflow packet", + [INTEL_PT_ERR_LOST] = "Lost trace data", + [INTEL_PT_ERR_UNK] = "Unknown error!", + [INTEL_PT_ERR_NELOOP] = "Never-ending loop", +}; + +int intel_pt__strerror(int code, char *buf, size_t buflen) +{ + if (code < 1 || code > INTEL_PT_ERR_MAX) + code = INTEL_PT_ERR_UNK; + strlcpy(buf, intel_pt_err_msgs[code], buflen); + return 0; +} + +static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder, + const struct intel_pt_pkt *packet, + uint64_t last_ip) +{ + uint64_t ip; + + switch (packet->count) { + case 2: + ip = (last_ip & (uint64_t)0xffffffffffff0000ULL) | + packet->payload; + break; + case 4: + ip = (last_ip & (uint64_t)0xffffffff00000000ULL) | + packet->payload; + break; + case 6: + ip = packet->payload; + break; + default: + return 0; + } + + if (ip & decoder->sign_bit) + return ip | decoder->sign_bits; + + return ip; +} + +static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder) +{ + decoder->last_ip = intel_pt_calc_ip(decoder, &decoder->packet, + decoder->last_ip); +} + +static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder) +{ + intel_pt_set_last_ip(decoder); + decoder->ip = decoder->last_ip; +} + +static void intel_pt_decoder_log_packet(struct intel_pt_decoder *decoder) +{ + intel_pt_log_packet(&decoder->packet, decoder->pkt_len, decoder->pos, + decoder->buf); +} + +static int intel_pt_bug(struct intel_pt_decoder *decoder) +{ + intel_pt_log("ERROR: Internal error\n"); + decoder->pkt_state = INTEL_PT_STATE_NO_PSB; + return -ENOSYS; +} + +static inline void intel_pt_clear_tx_flags(struct intel_pt_decoder *decoder) +{ + decoder->tx_flags = 0; +} + +static inline void intel_pt_update_in_tx(struct intel_pt_decoder *decoder) +{ + decoder->tx_flags = decoder->packet.payload & INTEL_PT_IN_TX; +} + +static int intel_pt_bad_packet(struct intel_pt_decoder *decoder) +{ + intel_pt_clear_tx_flags(decoder); + decoder->have_tma = false; + decoder->pkt_len = 1; + decoder->pkt_step = 1; + intel_pt_decoder_log_packet(decoder); + if (decoder->pkt_state != INTEL_PT_STATE_NO_PSB) { + intel_pt_log("ERROR: Bad packet\n"); + decoder->pkt_state = INTEL_PT_STATE_ERR1; + } + return -EBADMSG; +} + +static int intel_pt_get_data(struct intel_pt_decoder *decoder) +{ + struct intel_pt_buffer buffer = { .buf = 0, }; + int ret; + + decoder->pkt_step = 0; + + intel_pt_log("Getting more data\n"); + ret = decoder->get_trace(&buffer, decoder->data); + if (ret) + return ret; + decoder->buf = buffer.buf; + decoder->len = buffer.len; + if (!decoder->len) { + intel_pt_log("No more data\n"); + return -ENODATA; + } + if (!buffer.consecutive) { + decoder->ip = 0; + decoder->pkt_state = INTEL_PT_STATE_NO_PSB; + decoder->ref_timestamp = buffer.ref_timestamp; + decoder->timestamp = 0; + decoder->have_tma = false; + decoder->state.trace_nr = buffer.trace_nr; + intel_pt_log("Reference timestamp 0x%" PRIx64 "\n", + decoder->ref_timestamp); + return -ENOLINK; + } + + return 0; +} + +static int intel_pt_get_next_data(struct intel_pt_decoder *decoder) +{ + if (!decoder->next_buf) + return intel_pt_get_data(decoder); + + decoder->buf = decoder->next_buf; + decoder->len = decoder->next_len; + decoder->next_buf = 0; + decoder->next_len = 0; + return 0; +} + +static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder) +{ + unsigned char *buf = decoder->temp_buf; + size_t old_len, len, n; + int ret; + + old_len = decoder->len; + len = decoder->len; + memcpy(buf, decoder->buf, len); + + ret = intel_pt_get_data(decoder); + if (ret) { + decoder->pos += old_len; + return ret < 0 ? ret : -EINVAL; + } + + n = INTEL_PT_PKT_MAX_SZ - len; + if (n > decoder->len) + n = decoder->len; + memcpy(buf + len, decoder->buf, n); + len += n; + + ret = intel_pt_get_packet(buf, len, &decoder->packet); + if (ret < (int)old_len) { + decoder->next_buf = decoder->buf; + decoder->next_len = decoder->len; + decoder->buf = buf; + decoder->len = old_len; + return intel_pt_bad_packet(decoder); + } + + decoder->next_buf = decoder->buf + (ret - old_len); + decoder->next_len = decoder->len - (ret - old_len); + + decoder->buf = buf; + decoder->len = ret; + + return ret; +} + +struct intel_pt_pkt_info { + struct intel_pt_decoder *decoder; + struct intel_pt_pkt packet; + uint64_t pos; + int pkt_len; + int last_packet_type; + void *data; +}; + +typedef int (*intel_pt_pkt_cb_t)(struct intel_pt_pkt_info *pkt_info); + +/* Lookahead packets in current buffer */ +static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder, + intel_pt_pkt_cb_t cb, void *data) +{ + struct intel_pt_pkt_info pkt_info; + const unsigned char *buf = decoder->buf; + size_t len = decoder->len; + int ret; + + pkt_info.decoder = decoder; + pkt_info.pos = decoder->pos; + pkt_info.pkt_len = decoder->pkt_step; + pkt_info.last_packet_type = decoder->last_packet_type; + pkt_info.data = data; + + while (1) { + do { + pkt_info.pos += pkt_info.pkt_len; + buf += pkt_info.pkt_len; + len -= pkt_info.pkt_len; + + if (!len) + return INTEL_PT_NEED_MORE_BYTES; + + ret = intel_pt_get_packet(buf, len, &pkt_info.packet); + if (!ret) + return INTEL_PT_NEED_MORE_BYTES; + if (ret < 0) + return ret; + + pkt_info.pkt_len = ret; + } while (pkt_info.packet.type == INTEL_PT_PAD); + + ret = cb(&pkt_info); + if (ret) + return 0; + + pkt_info.last_packet_type = pkt_info.packet.type; + } +} + +struct intel_pt_calc_cyc_to_tsc_info { + uint64_t cycle_cnt; + unsigned int cbr; + uint32_t last_mtc; + uint64_t ctc_timestamp; + uint64_t ctc_delta; + uint64_t tsc_timestamp; + uint64_t timestamp; + bool have_tma; + bool from_mtc; + double cbr_cyc_to_tsc; +}; + +static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info) +{ + struct intel_pt_decoder *decoder = pkt_info->decoder; + struct intel_pt_calc_cyc_to_tsc_info *data = pkt_info->data; + uint64_t timestamp; + double cyc_to_tsc; + unsigned int cbr; + uint32_t mtc, mtc_delta, ctc, fc, ctc_rem; + + switch (pkt_info->packet.type) { + case INTEL_PT_TNT: + case INTEL_PT_TIP_PGE: + case INTEL_PT_TIP: + case INTEL_PT_FUP: + case INTEL_PT_PSB: + case INTEL_PT_PIP: + case INTEL_PT_MODE_EXEC: + case INTEL_PT_MODE_TSX: + case INTEL_PT_PSBEND: + case INTEL_PT_PAD: + case INTEL_PT_VMCS: + case INTEL_PT_MNT: + return 0; + + case INTEL_PT_MTC: + if (!data->have_tma) + return 0; + + mtc = pkt_info->packet.payload; + if (mtc > data->last_mtc) + mtc_delta = mtc - data->last_mtc; + else + mtc_delta = mtc + 256 - data->last_mtc; + data->ctc_delta += mtc_delta << decoder->mtc_shift; + data->last_mtc = mtc; + + if (decoder->tsc_ctc_mult) { + timestamp = data->ctc_timestamp + + data->ctc_delta * decoder->tsc_ctc_mult; + } else { + timestamp = data->ctc_timestamp + + multdiv(data->ctc_delta, + decoder->tsc_ctc_ratio_n, + decoder->tsc_ctc_ratio_d); + } + + if (timestamp < data->timestamp) + return 1; + + if (pkt_info->last_packet_type != INTEL_PT_CYC) { + data->timestamp = timestamp; + return 0; + } + + break; + + case INTEL_PT_TSC: + timestamp = pkt_info->packet.payload | + (data->timestamp & (0xffULL << 56)); + if (data->from_mtc && timestamp < data->timestamp && + data->timestamp - timestamp < decoder->tsc_slip) + return 1; + while (timestamp < data->timestamp) + timestamp += (1ULL << 56); + if (pkt_info->last_packet_type != INTEL_PT_CYC) { + if (data->from_mtc) + return 1; + data->tsc_timestamp = timestamp; + data->timestamp = timestamp; + return 0; + } + break; + + case INTEL_PT_TMA: + if (data->from_mtc) + return 1; + + if (!decoder->tsc_ctc_ratio_d) + return 0; + + ctc = pkt_info->packet.payload; + fc = pkt_info->packet.count; + ctc_rem = ctc & decoder->ctc_rem_mask; + + data->last_mtc = (ctc >> decoder->mtc_shift) & 0xff; + + data->ctc_timestamp = data->tsc_timestamp - fc; + if (decoder->tsc_ctc_mult) { + data->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult; + } else { + data->ctc_timestamp -= + multdiv(ctc_rem, decoder->tsc_ctc_ratio_n, + decoder->tsc_ctc_ratio_d); + } + + data->ctc_delta = 0; + data->have_tma = true; + + return 0; + + case INTEL_PT_CYC: + data->cycle_cnt += pkt_info->packet.payload; + return 0; + + case INTEL_PT_CBR: + cbr = pkt_info->packet.payload; + if (data->cbr && data->cbr != cbr) + return 1; + data->cbr = cbr; + data->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr; + return 0; + + case INTEL_PT_TIP_PGD: + case INTEL_PT_TRACESTOP: + case INTEL_PT_OVF: + case INTEL_PT_BAD: /* Does not happen */ + default: + return 1; + } + + if (!data->cbr && decoder->cbr) { + data->cbr = decoder->cbr; + data->cbr_cyc_to_tsc = decoder->cbr_cyc_to_tsc; + } + + if (!data->cycle_cnt) + return 1; + + cyc_to_tsc = (double)(timestamp - decoder->timestamp) / data->cycle_cnt; + + if (data->cbr && cyc_to_tsc > data->cbr_cyc_to_tsc && + cyc_to_tsc / data->cbr_cyc_to_tsc > 1.25) { + intel_pt_log("Timestamp: calculated %g TSC ticks per cycle too big (c.f. CBR-based value %g), pos " x64_fmt "\n", + cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos); + return 1; + } + + decoder->calc_cyc_to_tsc = cyc_to_tsc; + decoder->have_calc_cyc_to_tsc = true; + + if (data->cbr) { + intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. CBR-based value %g, pos " x64_fmt "\n", + cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos); + } else { + intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. unknown CBR-based value, pos " x64_fmt "\n", + cyc_to_tsc, pkt_info->pos); + } + + return 1; +} + +static void intel_pt_calc_cyc_to_tsc(struct intel_pt_decoder *decoder, + bool from_mtc) +{ + struct intel_pt_calc_cyc_to_tsc_info data = { + .cycle_cnt = 0, + .cbr = 0, + .last_mtc = decoder->last_mtc, + .ctc_timestamp = decoder->ctc_timestamp, + .ctc_delta = decoder->ctc_delta, + .tsc_timestamp = decoder->tsc_timestamp, + .timestamp = decoder->timestamp, + .have_tma = decoder->have_tma, + .from_mtc = from_mtc, + .cbr_cyc_to_tsc = 0, + }; + + intel_pt_pkt_lookahead(decoder, intel_pt_calc_cyc_cb, &data); +} + +static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder) +{ + int ret; + + decoder->last_packet_type = decoder->packet.type; + + do { + decoder->pos += decoder->pkt_step; + decoder->buf += decoder->pkt_step; + decoder->len -= decoder->pkt_step; + + if (!decoder->len) { + ret = intel_pt_get_next_data(decoder); + if (ret) + return ret; + } + + ret = intel_pt_get_packet(decoder->buf, decoder->len, + &decoder->packet); + if (ret == INTEL_PT_NEED_MORE_BYTES && + decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) { + ret = intel_pt_get_split_packet(decoder); + if (ret < 0) + return ret; + } + if (ret <= 0) + return intel_pt_bad_packet(decoder); + + decoder->pkt_len = ret; + decoder->pkt_step = ret; + intel_pt_decoder_log_packet(decoder); + } while (decoder->packet.type == INTEL_PT_PAD); + + return 0; +} + +static uint64_t intel_pt_next_period(struct intel_pt_decoder *decoder) +{ + uint64_t timestamp, masked_timestamp; + + timestamp = decoder->timestamp + decoder->timestamp_insn_cnt; + masked_timestamp = timestamp & decoder->period_mask; + if (decoder->continuous_period) { + if (masked_timestamp != decoder->last_masked_timestamp) + return 1; + } else { + timestamp += 1; + masked_timestamp = timestamp & decoder->period_mask; + if (masked_timestamp != decoder->last_masked_timestamp) { + decoder->last_masked_timestamp = masked_timestamp; + decoder->continuous_period = true; + } + } + return decoder->period_ticks - (timestamp - masked_timestamp); +} + +static uint64_t intel_pt_next_sample(struct intel_pt_decoder *decoder) +{ + switch (decoder->period_type) { + case INTEL_PT_PERIOD_INSTRUCTIONS: + return decoder->period - decoder->period_insn_cnt; + case INTEL_PT_PERIOD_TICKS: + return intel_pt_next_period(decoder); + case INTEL_PT_PERIOD_NONE: + case INTEL_PT_PERIOD_MTC: + default: + return 0; + } +} + +static void intel_pt_sample_insn(struct intel_pt_decoder *decoder) +{ + uint64_t timestamp, masked_timestamp; + + switch (decoder->period_type) { + case INTEL_PT_PERIOD_INSTRUCTIONS: + decoder->period_insn_cnt = 0; + break; + case INTEL_PT_PERIOD_TICKS: + timestamp = decoder->timestamp + decoder->timestamp_insn_cnt; + masked_timestamp = timestamp & decoder->period_mask; + decoder->last_masked_timestamp = masked_timestamp; + break; + case INTEL_PT_PERIOD_NONE: + case INTEL_PT_PERIOD_MTC: + default: + break; + } + + decoder->state.type |= INTEL_PT_INSTRUCTION; +} + +static int intel_pt_walk_insn(struct intel_pt_decoder *decoder, + struct intel_pt_insn *intel_pt_insn, uint64_t ip) +{ + uint64_t max_insn_cnt, insn_cnt = 0; + int err; + + if (!decoder->mtc_insn) + decoder->mtc_insn = true; + + max_insn_cnt = intel_pt_next_sample(decoder); + + err = decoder->walk_insn(intel_pt_insn, &insn_cnt, &decoder->ip, ip, + max_insn_cnt, decoder->data); + + decoder->tot_insn_cnt += insn_cnt; + decoder->timestamp_insn_cnt += insn_cnt; + decoder->period_insn_cnt += insn_cnt; + + if (err) { + decoder->no_progress = 0; + decoder->pkt_state = INTEL_PT_STATE_ERR2; + intel_pt_log_at("ERROR: Failed to get instruction", + decoder->ip); + if (err == -ENOENT) + return -ENOLINK; + return -EILSEQ; + } + + if (ip && decoder->ip == ip) { + err = -EAGAIN; + goto out; + } + + if (max_insn_cnt && insn_cnt >= max_insn_cnt) + intel_pt_sample_insn(decoder); + + if (intel_pt_insn->branch == INTEL_PT_BR_NO_BRANCH) { + decoder->state.type = INTEL_PT_INSTRUCTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->ip += intel_pt_insn->length; + err = INTEL_PT_RETURN; + goto out; + } + + if (intel_pt_insn->op == INTEL_PT_OP_CALL) { + /* Zero-length calls are excluded */ + if (intel_pt_insn->branch != INTEL_PT_BR_UNCONDITIONAL || + intel_pt_insn->rel) { + err = intel_pt_push(&decoder->stack, decoder->ip + + intel_pt_insn->length); + if (err) + goto out; + } + } else if (intel_pt_insn->op == INTEL_PT_OP_RET) { + decoder->ret_addr = intel_pt_pop(&decoder->stack); + } + + if (intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL) { + int cnt = decoder->no_progress++; + + decoder->state.from_ip = decoder->ip; + decoder->ip += intel_pt_insn->length + + intel_pt_insn->rel; + decoder->state.to_ip = decoder->ip; + err = INTEL_PT_RETURN; + + /* + * Check for being stuck in a loop. This can happen if a + * decoder error results in the decoder erroneously setting the + * ip to an address that is itself in an infinite loop that + * consumes no packets. When that happens, there must be an + * unconditional branch. + */ + if (cnt) { + if (cnt == 1) { + decoder->stuck_ip = decoder->state.to_ip; + decoder->stuck_ip_prd = 1; + decoder->stuck_ip_cnt = 1; + } else if (cnt > INTEL_PT_MAX_LOOPS || + decoder->state.to_ip == decoder->stuck_ip) { + intel_pt_log_at("ERROR: Never-ending loop", + decoder->state.to_ip); + decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; + err = -ELOOP; + goto out; + } else if (!--decoder->stuck_ip_cnt) { + decoder->stuck_ip_prd += 1; + decoder->stuck_ip_cnt = decoder->stuck_ip_prd; + decoder->stuck_ip = decoder->state.to_ip; + } + } + goto out_no_progress; + } +out: + decoder->no_progress = 0; +out_no_progress: + decoder->state.insn_op = intel_pt_insn->op; + decoder->state.insn_len = intel_pt_insn->length; + + if (decoder->tx_flags & INTEL_PT_IN_TX) + decoder->state.flags |= INTEL_PT_IN_TX; + + return err; +} + +static int intel_pt_walk_fup(struct intel_pt_decoder *decoder) +{ + struct intel_pt_insn intel_pt_insn; + uint64_t ip; + int err; + + ip = decoder->last_ip; + + while (1) { + err = intel_pt_walk_insn(decoder, &intel_pt_insn, ip); + if (err == INTEL_PT_RETURN) + return 0; + if (err == -EAGAIN) { + if (decoder->set_fup_tx_flags) { + decoder->set_fup_tx_flags = false; + decoder->tx_flags = decoder->fup_tx_flags; + decoder->state.type = INTEL_PT_TRANSACTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.flags = decoder->fup_tx_flags; + return 0; + } + return err; + } + decoder->set_fup_tx_flags = false; + if (err) + return err; + + if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) { + intel_pt_log_at("ERROR: Unexpected indirect branch", + decoder->ip); + decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; + return -ENOENT; + } + + if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) { + intel_pt_log_at("ERROR: Unexpected conditional branch", + decoder->ip); + decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; + return -ENOENT; + } + + intel_pt_bug(decoder); + } +} + +static int intel_pt_walk_tip(struct intel_pt_decoder *decoder) +{ + struct intel_pt_insn intel_pt_insn; + int err; + + err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0); + if (err == INTEL_PT_RETURN) + return 0; + if (err) + return err; + + if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) { + if (decoder->pkt_state == INTEL_PT_STATE_TIP_PGD) { + decoder->pge = false; + decoder->continuous_period = false; + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + if (decoder->packet.count != 0) + decoder->ip = decoder->last_ip; + } else { + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + decoder->state.from_ip = decoder->ip; + if (decoder->packet.count == 0) { + decoder->state.to_ip = 0; + } else { + decoder->state.to_ip = decoder->last_ip; + decoder->ip = decoder->last_ip; + } + } + return 0; + } + + if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) { + intel_pt_log_at("ERROR: Conditional branch when expecting indirect branch", + decoder->ip); + decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; + return -ENOENT; + } + + return intel_pt_bug(decoder); +} + +static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder) +{ + struct intel_pt_insn intel_pt_insn; + int err; + + while (1) { + err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0); + if (err == INTEL_PT_RETURN) + return 0; + if (err) + return err; + + if (intel_pt_insn.op == INTEL_PT_OP_RET) { + if (!decoder->return_compression) { + intel_pt_log_at("ERROR: RET when expecting conditional branch", + decoder->ip); + decoder->pkt_state = INTEL_PT_STATE_ERR3; + return -ENOENT; + } + if (!decoder->ret_addr) { + intel_pt_log_at("ERROR: Bad RET compression (stack empty)", + decoder->ip); + decoder->pkt_state = INTEL_PT_STATE_ERR3; + return -ENOENT; + } + if (!(decoder->tnt.payload & BIT63)) { + intel_pt_log_at("ERROR: Bad RET compression (TNT=N)", + decoder->ip); + decoder->pkt_state = INTEL_PT_STATE_ERR3; + return -ENOENT; + } + decoder->tnt.count -= 1; + if (!decoder->tnt.count) + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + decoder->tnt.payload <<= 1; + decoder->state.from_ip = decoder->ip; + decoder->ip = decoder->ret_addr; + decoder->state.to_ip = decoder->ip; + return 0; + } + + if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) { + /* Handle deferred TIPs */ + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + if (decoder->packet.type != INTEL_PT_TIP || + decoder->packet.count == 0) { + intel_pt_log_at("ERROR: Missing deferred TIP for indirect branch", + decoder->ip); + decoder->pkt_state = INTEL_PT_STATE_ERR3; + decoder->pkt_step = 0; + return -ENOENT; + } + intel_pt_set_last_ip(decoder); + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = decoder->last_ip; + decoder->ip = decoder->last_ip; + return 0; + } + + if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) { + decoder->tnt.count -= 1; + if (!decoder->tnt.count) + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + if (decoder->tnt.payload & BIT63) { + decoder->tnt.payload <<= 1; + decoder->state.from_ip = decoder->ip; + decoder->ip += intel_pt_insn.length + + intel_pt_insn.rel; + decoder->state.to_ip = decoder->ip; + return 0; + } + /* Instruction sample for a non-taken branch */ + if (decoder->state.type & INTEL_PT_INSTRUCTION) { + decoder->tnt.payload <<= 1; + decoder->state.type = INTEL_PT_INSTRUCTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->ip += intel_pt_insn.length; + return 0; + } + decoder->ip += intel_pt_insn.length; + if (!decoder->tnt.count) + return -EAGAIN; + decoder->tnt.payload <<= 1; + continue; + } + + return intel_pt_bug(decoder); + } +} + +static int intel_pt_mode_tsx(struct intel_pt_decoder *decoder, bool *no_tip) +{ + unsigned int fup_tx_flags; + int err; + + fup_tx_flags = decoder->packet.payload & + (INTEL_PT_IN_TX | INTEL_PT_ABORT_TX); + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + if (decoder->packet.type == INTEL_PT_FUP) { + decoder->fup_tx_flags = fup_tx_flags; + decoder->set_fup_tx_flags = true; + if (!(decoder->fup_tx_flags & INTEL_PT_ABORT_TX)) + *no_tip = true; + } else { + intel_pt_log_at("ERROR: Missing FUP after MODE.TSX", + decoder->pos); + intel_pt_update_in_tx(decoder); + } + return 0; +} + +static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder) +{ + uint64_t timestamp; + + decoder->have_tma = false; + + if (decoder->ref_timestamp) { + timestamp = decoder->packet.payload | + (decoder->ref_timestamp & (0xffULL << 56)); + if (timestamp < decoder->ref_timestamp) { + if (decoder->ref_timestamp - timestamp > (1ULL << 55)) + timestamp += (1ULL << 56); + } else { + if (timestamp - decoder->ref_timestamp > (1ULL << 55)) + timestamp -= (1ULL << 56); + } + decoder->tsc_timestamp = timestamp; + decoder->timestamp = timestamp; + decoder->ref_timestamp = 0; + decoder->timestamp_insn_cnt = 0; + } else if (decoder->timestamp) { + timestamp = decoder->packet.payload | + (decoder->timestamp & (0xffULL << 56)); + decoder->tsc_timestamp = timestamp; + if (timestamp < decoder->timestamp && + decoder->timestamp - timestamp < decoder->tsc_slip) { + intel_pt_log_to("Suppressing backwards timestamp", + timestamp); + timestamp = decoder->timestamp; + } + while (timestamp < decoder->timestamp) { + intel_pt_log_to("Wraparound timestamp", timestamp); + timestamp += (1ULL << 56); + decoder->tsc_timestamp = timestamp; + } + decoder->timestamp = timestamp; + decoder->timestamp_insn_cnt = 0; + } + + if (decoder->last_packet_type == INTEL_PT_CYC) { + decoder->cyc_ref_timestamp = decoder->timestamp; + decoder->cycle_cnt = 0; + decoder->have_calc_cyc_to_tsc = false; + intel_pt_calc_cyc_to_tsc(decoder, false); + } + + intel_pt_log_to("Setting timestamp", decoder->timestamp); +} + +static int intel_pt_overflow(struct intel_pt_decoder *decoder) +{ + intel_pt_log("ERROR: Buffer overflow\n"); + intel_pt_clear_tx_flags(decoder); + decoder->have_tma = false; + decoder->cbr = 0; + decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; + decoder->overflow = true; + return -EOVERFLOW; +} + +static void intel_pt_calc_tma(struct intel_pt_decoder *decoder) +{ + uint32_t ctc = decoder->packet.payload; + uint32_t fc = decoder->packet.count; + uint32_t ctc_rem = ctc & decoder->ctc_rem_mask; + + if (!decoder->tsc_ctc_ratio_d) + return; + + decoder->last_mtc = (ctc >> decoder->mtc_shift) & 0xff; + decoder->ctc_timestamp = decoder->tsc_timestamp - fc; + if (decoder->tsc_ctc_mult) { + decoder->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult; + } else { + decoder->ctc_timestamp -= multdiv(ctc_rem, + decoder->tsc_ctc_ratio_n, + decoder->tsc_ctc_ratio_d); + } + decoder->ctc_delta = 0; + decoder->have_tma = true; + intel_pt_log("CTC timestamp " x64_fmt " last MTC %#x CTC rem %#x\n", + decoder->ctc_timestamp, decoder->last_mtc, ctc_rem); +} + +static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder) +{ + uint64_t timestamp; + uint32_t mtc, mtc_delta; + + if (!decoder->have_tma) + return; + + mtc = decoder->packet.payload; + + if (mtc > decoder->last_mtc) + mtc_delta = mtc - decoder->last_mtc; + else + mtc_delta = mtc + 256 - decoder->last_mtc; + + decoder->ctc_delta += mtc_delta << decoder->mtc_shift; + + if (decoder->tsc_ctc_mult) { + timestamp = decoder->ctc_timestamp + + decoder->ctc_delta * decoder->tsc_ctc_mult; + } else { + timestamp = decoder->ctc_timestamp + + multdiv(decoder->ctc_delta, + decoder->tsc_ctc_ratio_n, + decoder->tsc_ctc_ratio_d); + } + + if (timestamp < decoder->timestamp) + intel_pt_log("Suppressing MTC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n", + timestamp, decoder->timestamp); + else + decoder->timestamp = timestamp; + + decoder->timestamp_insn_cnt = 0; + decoder->last_mtc = mtc; + + if (decoder->last_packet_type == INTEL_PT_CYC) { + decoder->cyc_ref_timestamp = decoder->timestamp; + decoder->cycle_cnt = 0; + decoder->have_calc_cyc_to_tsc = false; + intel_pt_calc_cyc_to_tsc(decoder, true); + } +} + +static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder) +{ + unsigned int cbr = decoder->packet.payload; + + if (decoder->cbr == cbr) + return; + + decoder->cbr = cbr; + decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr; +} + +static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder) +{ + uint64_t timestamp = decoder->cyc_ref_timestamp; + + decoder->have_cyc = true; + + decoder->cycle_cnt += decoder->packet.payload; + + if (!decoder->cyc_ref_timestamp) + return; + + if (decoder->have_calc_cyc_to_tsc) + timestamp += decoder->cycle_cnt * decoder->calc_cyc_to_tsc; + else if (decoder->cbr) + timestamp += decoder->cycle_cnt * decoder->cbr_cyc_to_tsc; + else + return; + + if (timestamp < decoder->timestamp) + intel_pt_log("Suppressing CYC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n", + timestamp, decoder->timestamp); + else + decoder->timestamp = timestamp; +} + +/* Walk PSB+ packets when already in sync. */ +static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) +{ + int err; + + while (1) { + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + + switch (decoder->packet.type) { + case INTEL_PT_PSBEND: + return 0; + + case INTEL_PT_TIP_PGD: + case INTEL_PT_TIP_PGE: + case INTEL_PT_TIP: + case INTEL_PT_TNT: + case INTEL_PT_TRACESTOP: + case INTEL_PT_BAD: + case INTEL_PT_PSB: + decoder->have_tma = false; + intel_pt_log("ERROR: Unexpected packet\n"); + return -EAGAIN; + + case INTEL_PT_OVF: + return intel_pt_overflow(decoder); + + case INTEL_PT_TSC: + intel_pt_calc_tsc_timestamp(decoder); + break; + + case INTEL_PT_TMA: + intel_pt_calc_tma(decoder); + break; + + case INTEL_PT_CBR: + intel_pt_calc_cbr(decoder); + break; + + case INTEL_PT_MODE_EXEC: + decoder->exec_mode = decoder->packet.payload; + break; + + case INTEL_PT_PIP: + decoder->cr3 = decoder->packet.payload & (BIT63 - 1); + break; + + case INTEL_PT_FUP: + decoder->pge = true; + intel_pt_set_last_ip(decoder); + break; + + case INTEL_PT_MODE_TSX: + intel_pt_update_in_tx(decoder); + break; + + case INTEL_PT_MTC: + intel_pt_calc_mtc_timestamp(decoder); + if (decoder->period_type == INTEL_PT_PERIOD_MTC) + decoder->state.type |= INTEL_PT_INSTRUCTION; + break; + + case INTEL_PT_CYC: + case INTEL_PT_VMCS: + case INTEL_PT_MNT: + case INTEL_PT_PAD: + default: + break; + } + } +} + +static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) +{ + int err; + + if (decoder->tx_flags & INTEL_PT_ABORT_TX) { + decoder->tx_flags = 0; + decoder->state.flags &= ~INTEL_PT_IN_TX; + decoder->state.flags |= INTEL_PT_ABORT_TX; + } else { + decoder->state.flags |= INTEL_PT_ASYNC; + } + + while (1) { + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + + switch (decoder->packet.type) { + case INTEL_PT_TNT: + case INTEL_PT_FUP: + case INTEL_PT_TRACESTOP: + case INTEL_PT_PSB: + case INTEL_PT_TSC: + case INTEL_PT_TMA: + case INTEL_PT_CBR: + case INTEL_PT_MODE_TSX: + case INTEL_PT_BAD: + case INTEL_PT_PSBEND: + intel_pt_log("ERROR: Missing TIP after FUP\n"); + decoder->pkt_state = INTEL_PT_STATE_ERR3; + return -ENOENT; + + case INTEL_PT_OVF: + return intel_pt_overflow(decoder); + + case INTEL_PT_TIP_PGD: + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + if (decoder->packet.count != 0) { + intel_pt_set_ip(decoder); + intel_pt_log("Omitting PGD ip " x64_fmt "\n", + decoder->ip); + } + decoder->pge = false; + decoder->continuous_period = false; + return 0; + + case INTEL_PT_TIP_PGE: + decoder->pge = true; + intel_pt_log("Omitting PGE ip " x64_fmt "\n", + decoder->ip); + decoder->state.from_ip = 0; + if (decoder->packet.count == 0) { + decoder->state.to_ip = 0; + } else { + intel_pt_set_ip(decoder); + decoder->state.to_ip = decoder->ip; + } + return 0; + + case INTEL_PT_TIP: + decoder->state.from_ip = decoder->ip; + if (decoder->packet.count == 0) { + decoder->state.to_ip = 0; + } else { + intel_pt_set_ip(decoder); + decoder->state.to_ip = decoder->ip; + } + return 0; + + case INTEL_PT_PIP: + decoder->cr3 = decoder->packet.payload & (BIT63 - 1); + break; + + case INTEL_PT_MTC: + intel_pt_calc_mtc_timestamp(decoder); + if (decoder->period_type == INTEL_PT_PERIOD_MTC) + decoder->state.type |= INTEL_PT_INSTRUCTION; + break; + + case INTEL_PT_CYC: + intel_pt_calc_cyc_timestamp(decoder); + break; + + case INTEL_PT_MODE_EXEC: + decoder->exec_mode = decoder->packet.payload; + break; + + case INTEL_PT_VMCS: + case INTEL_PT_MNT: + case INTEL_PT_PAD: + break; + + default: + return intel_pt_bug(decoder); + } + } +} + +static int intel_pt_walk_trace(struct intel_pt_decoder *decoder) +{ + bool no_tip = false; + int err; + + while (1) { + err = intel_pt_get_next_packet(decoder); + if (err) + return err; +next: + switch (decoder->packet.type) { + case INTEL_PT_TNT: + if (!decoder->packet.count) + break; + decoder->tnt = decoder->packet; + decoder->pkt_state = INTEL_PT_STATE_TNT; + err = intel_pt_walk_tnt(decoder); + if (err == -EAGAIN) + break; + return err; + + case INTEL_PT_TIP_PGD: + if (decoder->packet.count != 0) + intel_pt_set_last_ip(decoder); + decoder->pkt_state = INTEL_PT_STATE_TIP_PGD; + return intel_pt_walk_tip(decoder); + + case INTEL_PT_TIP_PGE: { + decoder->pge = true; + if (decoder->packet.count == 0) { + intel_pt_log_at("Skipping zero TIP.PGE", + decoder->pos); + break; + } + intel_pt_set_ip(decoder); + decoder->state.from_ip = 0; + decoder->state.to_ip = decoder->ip; + return 0; + } + + case INTEL_PT_OVF: + return intel_pt_overflow(decoder); + + case INTEL_PT_TIP: + if (decoder->packet.count != 0) + intel_pt_set_last_ip(decoder); + decoder->pkt_state = INTEL_PT_STATE_TIP; + return intel_pt_walk_tip(decoder); + + case INTEL_PT_FUP: + if (decoder->packet.count == 0) { + intel_pt_log_at("Skipping zero FUP", + decoder->pos); + no_tip = false; + break; + } + intel_pt_set_last_ip(decoder); + err = intel_pt_walk_fup(decoder); + if (err != -EAGAIN) { + if (err) + return err; + if (no_tip) + decoder->pkt_state = + INTEL_PT_STATE_FUP_NO_TIP; + else + decoder->pkt_state = INTEL_PT_STATE_FUP; + return 0; + } + if (no_tip) { + no_tip = false; + break; + } + return intel_pt_walk_fup_tip(decoder); + + case INTEL_PT_TRACESTOP: + decoder->pge = false; + decoder->continuous_period = false; + intel_pt_clear_tx_flags(decoder); + decoder->have_tma = false; + break; + + case INTEL_PT_PSB: + intel_pt_clear_stack(&decoder->stack); + err = intel_pt_walk_psbend(decoder); + if (err == -EAGAIN) + goto next; + if (err) + return err; + break; + + case INTEL_PT_PIP: + decoder->cr3 = decoder->packet.payload & (BIT63 - 1); + break; + + case INTEL_PT_MTC: + intel_pt_calc_mtc_timestamp(decoder); + if (decoder->period_type != INTEL_PT_PERIOD_MTC) + break; + /* + * Ensure that there has been an instruction since the + * last MTC. + */ + if (!decoder->mtc_insn) + break; + decoder->mtc_insn = false; + /* Ensure that there is a timestamp */ + if (!decoder->timestamp) + break; + decoder->state.type = INTEL_PT_INSTRUCTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->mtc_insn = false; + return 0; + + case INTEL_PT_TSC: + intel_pt_calc_tsc_timestamp(decoder); + break; + + case INTEL_PT_TMA: + intel_pt_calc_tma(decoder); + break; + + case INTEL_PT_CYC: + intel_pt_calc_cyc_timestamp(decoder); + break; + + case INTEL_PT_CBR: + intel_pt_calc_cbr(decoder); + break; + + case INTEL_PT_MODE_EXEC: + decoder->exec_mode = decoder->packet.payload; + break; + + case INTEL_PT_MODE_TSX: + /* MODE_TSX need not be followed by FUP */ + if (!decoder->pge) { + intel_pt_update_in_tx(decoder); + break; + } + err = intel_pt_mode_tsx(decoder, &no_tip); + if (err) + return err; + goto next; + + case INTEL_PT_BAD: /* Does not happen */ + return intel_pt_bug(decoder); + + case INTEL_PT_PSBEND: + case INTEL_PT_VMCS: + case INTEL_PT_MNT: + case INTEL_PT_PAD: + break; + + default: + return intel_pt_bug(decoder); + } + } +} + +/* Walk PSB+ packets to get in sync. */ +static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) +{ + int err; + + while (1) { + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + + switch (decoder->packet.type) { + case INTEL_PT_TIP_PGD: + decoder->continuous_period = false; + case INTEL_PT_TIP_PGE: + case INTEL_PT_TIP: + intel_pt_log("ERROR: Unexpected packet\n"); + return -ENOENT; + + case INTEL_PT_FUP: + decoder->pge = true; + if (decoder->last_ip || decoder->packet.count == 6 || + decoder->packet.count == 0) { + uint64_t current_ip = decoder->ip; + + intel_pt_set_ip(decoder); + if (current_ip) + intel_pt_log_to("Setting IP", + decoder->ip); + } + break; + + case INTEL_PT_MTC: + intel_pt_calc_mtc_timestamp(decoder); + break; + + case INTEL_PT_TSC: + intel_pt_calc_tsc_timestamp(decoder); + break; + + case INTEL_PT_TMA: + intel_pt_calc_tma(decoder); + break; + + case INTEL_PT_CYC: + intel_pt_calc_cyc_timestamp(decoder); + break; + + case INTEL_PT_CBR: + intel_pt_calc_cbr(decoder); + break; + + case INTEL_PT_PIP: + decoder->cr3 = decoder->packet.payload & (BIT63 - 1); + break; + + case INTEL_PT_MODE_EXEC: + decoder->exec_mode = decoder->packet.payload; + break; + + case INTEL_PT_MODE_TSX: + intel_pt_update_in_tx(decoder); + break; + + case INTEL_PT_TRACESTOP: + decoder->pge = false; + decoder->continuous_period = false; + intel_pt_clear_tx_flags(decoder); + case INTEL_PT_TNT: + decoder->have_tma = false; + intel_pt_log("ERROR: Unexpected packet\n"); + if (decoder->ip) + decoder->pkt_state = INTEL_PT_STATE_ERR4; + else + decoder->pkt_state = INTEL_PT_STATE_ERR3; + return -ENOENT; + + case INTEL_PT_BAD: /* Does not happen */ + return intel_pt_bug(decoder); + + case INTEL_PT_OVF: + return intel_pt_overflow(decoder); + + case INTEL_PT_PSBEND: + return 0; + + case INTEL_PT_PSB: + case INTEL_PT_VMCS: + case INTEL_PT_MNT: + case INTEL_PT_PAD: + default: + break; + } + } +} + +static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) +{ + int err; + + while (1) { + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + + switch (decoder->packet.type) { + case INTEL_PT_TIP_PGD: + decoder->continuous_period = false; + case INTEL_PT_TIP_PGE: + case INTEL_PT_TIP: + decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD; + if (decoder->last_ip || decoder->packet.count == 6 || + decoder->packet.count == 0) + intel_pt_set_ip(decoder); + if (decoder->ip) + return 0; + break; + + case INTEL_PT_FUP: + if (decoder->overflow) { + if (decoder->last_ip || + decoder->packet.count == 6 || + decoder->packet.count == 0) + intel_pt_set_ip(decoder); + if (decoder->ip) + return 0; + } + if (decoder->packet.count) + intel_pt_set_last_ip(decoder); + break; + + case INTEL_PT_MTC: + intel_pt_calc_mtc_timestamp(decoder); + break; + + case INTEL_PT_TSC: + intel_pt_calc_tsc_timestamp(decoder); + break; + + case INTEL_PT_TMA: + intel_pt_calc_tma(decoder); + break; + + case INTEL_PT_CYC: + intel_pt_calc_cyc_timestamp(decoder); + break; + + case INTEL_PT_CBR: + intel_pt_calc_cbr(decoder); + break; + + case INTEL_PT_PIP: + decoder->cr3 = decoder->packet.payload & (BIT63 - 1); + break; + + case INTEL_PT_MODE_EXEC: + decoder->exec_mode = decoder->packet.payload; + break; + + case INTEL_PT_MODE_TSX: + intel_pt_update_in_tx(decoder); + break; + + case INTEL_PT_OVF: + return intel_pt_overflow(decoder); + + case INTEL_PT_BAD: /* Does not happen */ + return intel_pt_bug(decoder); + + case INTEL_PT_TRACESTOP: + decoder->pge = false; + decoder->continuous_period = false; + intel_pt_clear_tx_flags(decoder); + decoder->have_tma = false; + break; + + case INTEL_PT_PSB: + err = intel_pt_walk_psb(decoder); + if (err) + return err; + if (decoder->ip) { + /* Do not have a sample */ + decoder->state.type = 0; + return 0; + } + break; + + case INTEL_PT_TNT: + case INTEL_PT_PSBEND: + case INTEL_PT_VMCS: + case INTEL_PT_MNT: + case INTEL_PT_PAD: + default: + break; + } + } +} + +static int intel_pt_sync_ip(struct intel_pt_decoder *decoder) +{ + int err; + + intel_pt_log("Scanning for full IP\n"); + err = intel_pt_walk_to_ip(decoder); + if (err) + return err; + + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + decoder->overflow = false; + + decoder->state.from_ip = 0; + decoder->state.to_ip = decoder->ip; + intel_pt_log_to("Setting IP", decoder->ip); + + return 0; +} + +static int intel_pt_part_psb(struct intel_pt_decoder *decoder) +{ + const unsigned char *end = decoder->buf + decoder->len; + size_t i; + + for (i = INTEL_PT_PSB_LEN - 1; i; i--) { + if (i > decoder->len) + continue; + if (!memcmp(end - i, INTEL_PT_PSB_STR, i)) + return i; + } + return 0; +} + +static int intel_pt_rest_psb(struct intel_pt_decoder *decoder, int part_psb) +{ + size_t rest_psb = INTEL_PT_PSB_LEN - part_psb; + const char *psb = INTEL_PT_PSB_STR; + + if (rest_psb > decoder->len || + memcmp(decoder->buf, psb + part_psb, rest_psb)) + return 0; + + return rest_psb; +} + +static int intel_pt_get_split_psb(struct intel_pt_decoder *decoder, + int part_psb) +{ + int rest_psb, ret; + + decoder->pos += decoder->len; + decoder->len = 0; + + ret = intel_pt_get_next_data(decoder); + if (ret) + return ret; + + rest_psb = intel_pt_rest_psb(decoder, part_psb); + if (!rest_psb) + return 0; + + decoder->pos -= part_psb; + decoder->next_buf = decoder->buf + rest_psb; + decoder->next_len = decoder->len - rest_psb; + memcpy(decoder->temp_buf, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN); + decoder->buf = decoder->temp_buf; + decoder->len = INTEL_PT_PSB_LEN; + + return 0; +} + +static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder) +{ + unsigned char *next; + int ret; + + intel_pt_log("Scanning for PSB\n"); + while (1) { + if (!decoder->len) { + ret = intel_pt_get_next_data(decoder); + if (ret) + return ret; + } + + next = memmem(decoder->buf, decoder->len, INTEL_PT_PSB_STR, + INTEL_PT_PSB_LEN); + if (!next) { + int part_psb; + + part_psb = intel_pt_part_psb(decoder); + if (part_psb) { + ret = intel_pt_get_split_psb(decoder, part_psb); + if (ret) + return ret; + } else { + decoder->pos += decoder->len; + decoder->len = 0; + } + continue; + } + + decoder->pkt_step = next - decoder->buf; + return intel_pt_get_next_packet(decoder); + } +} + +static int intel_pt_sync(struct intel_pt_decoder *decoder) +{ + int err; + + decoder->pge = false; + decoder->continuous_period = false; + decoder->last_ip = 0; + decoder->ip = 0; + intel_pt_clear_stack(&decoder->stack); + + err = intel_pt_scan_for_psb(decoder); + if (err) + return err; + + decoder->pkt_state = INTEL_PT_STATE_NO_IP; + + err = intel_pt_walk_psb(decoder); + if (err) + return err; + + if (decoder->ip) { + decoder->state.type = 0; /* Do not have a sample */ + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + } else { + return intel_pt_sync_ip(decoder); + } + + return 0; +} + +static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder) +{ + uint64_t est = decoder->timestamp_insn_cnt << 1; + + if (!decoder->cbr || !decoder->max_non_turbo_ratio) + goto out; + + est *= decoder->max_non_turbo_ratio; + est /= decoder->cbr; +out: + return decoder->timestamp + est; +} + +const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) +{ + int err; + + do { + decoder->state.type = INTEL_PT_BRANCH; + decoder->state.flags = 0; + + switch (decoder->pkt_state) { + case INTEL_PT_STATE_NO_PSB: + err = intel_pt_sync(decoder); + break; + case INTEL_PT_STATE_NO_IP: + decoder->last_ip = 0; + /* Fall through */ + case INTEL_PT_STATE_ERR_RESYNC: + err = intel_pt_sync_ip(decoder); + break; + case INTEL_PT_STATE_IN_SYNC: + err = intel_pt_walk_trace(decoder); + break; + case INTEL_PT_STATE_TNT: + err = intel_pt_walk_tnt(decoder); + if (err == -EAGAIN) + err = intel_pt_walk_trace(decoder); + break; + case INTEL_PT_STATE_TIP: + case INTEL_PT_STATE_TIP_PGD: + err = intel_pt_walk_tip(decoder); + break; + case INTEL_PT_STATE_FUP: + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + err = intel_pt_walk_fup(decoder); + if (err == -EAGAIN) + err = intel_pt_walk_fup_tip(decoder); + else if (!err) + decoder->pkt_state = INTEL_PT_STATE_FUP; + break; + case INTEL_PT_STATE_FUP_NO_TIP: + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + err = intel_pt_walk_fup(decoder); + if (err == -EAGAIN) + err = intel_pt_walk_trace(decoder); + break; + default: + err = intel_pt_bug(decoder); + break; + } + } while (err == -ENOLINK); + + decoder->state.err = err ? intel_pt_ext_err(err) : 0; + decoder->state.timestamp = decoder->timestamp; + decoder->state.est_timestamp = intel_pt_est_timestamp(decoder); + decoder->state.cr3 = decoder->cr3; + decoder->state.tot_insn_cnt = decoder->tot_insn_cnt; + + if (err) + decoder->state.from_ip = decoder->ip; + + return &decoder->state; +} + +static bool intel_pt_at_psb(unsigned char *buf, size_t len) +{ + if (len < INTEL_PT_PSB_LEN) + return false; + return memmem(buf, INTEL_PT_PSB_LEN, INTEL_PT_PSB_STR, + INTEL_PT_PSB_LEN); +} + +/** + * intel_pt_next_psb - move buffer pointer to the start of the next PSB packet. + * @buf: pointer to buffer pointer + * @len: size of buffer + * + * Updates the buffer pointer to point to the start of the next PSB packet if + * there is one, otherwise the buffer pointer is unchanged. If @buf is updated, + * @len is adjusted accordingly. + * + * Return: %true if a PSB packet is found, %false otherwise. + */ +static bool intel_pt_next_psb(unsigned char **buf, size_t *len) +{ + unsigned char *next; + + next = memmem(*buf, *len, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN); + if (next) { + *len -= next - *buf; + *buf = next; + return true; + } + return false; +} + +/** + * intel_pt_step_psb - move buffer pointer to the start of the following PSB + * packet. + * @buf: pointer to buffer pointer + * @len: size of buffer + * + * Updates the buffer pointer to point to the start of the following PSB packet + * (skipping the PSB at @buf itself) if there is one, otherwise the buffer + * pointer is unchanged. If @buf is updated, @len is adjusted accordingly. + * + * Return: %true if a PSB packet is found, %false otherwise. + */ +static bool intel_pt_step_psb(unsigned char **buf, size_t *len) +{ + unsigned char *next; + + if (!*len) + return false; + + next = memmem(*buf + 1, *len - 1, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN); + if (next) { + *len -= next - *buf; + *buf = next; + return true; + } + return false; +} + +/** + * intel_pt_last_psb - find the last PSB packet in a buffer. + * @buf: buffer + * @len: size of buffer + * + * This function finds the last PSB in a buffer. + * + * Return: A pointer to the last PSB in @buf if found, %NULL otherwise. + */ +static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len) +{ + const char *n = INTEL_PT_PSB_STR; + unsigned char *p; + size_t k; + + if (len < INTEL_PT_PSB_LEN) + return NULL; + + k = len - INTEL_PT_PSB_LEN + 1; + while (1) { + p = memrchr(buf, n[0], k); + if (!p) + return NULL; + if (!memcmp(p + 1, n + 1, INTEL_PT_PSB_LEN - 1)) + return p; + k = p - buf; + if (!k) + return NULL; + } +} + +/** + * intel_pt_next_tsc - find and return next TSC. + * @buf: buffer + * @len: size of buffer + * @tsc: TSC value returned + * + * Find a TSC packet in @buf and return the TSC value. This function assumes + * that @buf starts at a PSB and that PSB+ will contain TSC and so stops if a + * PSBEND packet is found. + * + * Return: %true if TSC is found, false otherwise. + */ +static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc) +{ + struct intel_pt_pkt packet; + int ret; + + while (len) { + ret = intel_pt_get_packet(buf, len, &packet); + if (ret <= 0) + return false; + if (packet.type == INTEL_PT_TSC) { + *tsc = packet.payload; + return true; + } + if (packet.type == INTEL_PT_PSBEND) + return false; + buf += ret; + len -= ret; + } + return false; +} + +/** + * intel_pt_tsc_cmp - compare 7-byte TSCs. + * @tsc1: first TSC to compare + * @tsc2: second TSC to compare + * + * This function compares 7-byte TSC values allowing for the possibility that + * TSC wrapped around. Generally it is not possible to know if TSC has wrapped + * around so for that purpose this function assumes the absolute difference is + * less than half the maximum difference. + * + * Return: %-1 if @tsc1 is before @tsc2, %0 if @tsc1 == @tsc2, %1 if @tsc1 is + * after @tsc2. + */ +static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2) +{ + const uint64_t halfway = (1ULL << 55); + + if (tsc1 == tsc2) + return 0; + + if (tsc1 < tsc2) { + if (tsc2 - tsc1 < halfway) + return -1; + else + return 1; + } else { + if (tsc1 - tsc2 < halfway) + return 1; + else + return -1; + } +} + +/** + * intel_pt_find_overlap_tsc - determine start of non-overlapped trace data + * using TSC. + * @buf_a: first buffer + * @len_a: size of first buffer + * @buf_b: second buffer + * @len_b: size of second buffer + * + * If the trace contains TSC we can look at the last TSC of @buf_a and the + * first TSC of @buf_b in order to determine if the buffers overlap, and then + * walk forward in @buf_b until a later TSC is found. A precondition is that + * @buf_a and @buf_b are positioned at a PSB. + * + * Return: A pointer into @buf_b from where non-overlapped data starts, or + * @buf_b + @len_b if there is no non-overlapped data. + */ +static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a, + size_t len_a, + unsigned char *buf_b, + size_t len_b) +{ + uint64_t tsc_a, tsc_b; + unsigned char *p; + size_t len; + + p = intel_pt_last_psb(buf_a, len_a); + if (!p) + return buf_b; /* No PSB in buf_a => no overlap */ + + len = len_a - (p - buf_a); + if (!intel_pt_next_tsc(p, len, &tsc_a)) { + /* The last PSB+ in buf_a is incomplete, so go back one more */ + len_a -= len; + p = intel_pt_last_psb(buf_a, len_a); + if (!p) + return buf_b; /* No full PSB+ => assume no overlap */ + len = len_a - (p - buf_a); + if (!intel_pt_next_tsc(p, len, &tsc_a)) + return buf_b; /* No TSC in buf_a => assume no overlap */ + } + + while (1) { + /* Ignore PSB+ with no TSC */ + if (intel_pt_next_tsc(buf_b, len_b, &tsc_b) && + intel_pt_tsc_cmp(tsc_a, tsc_b) < 0) + return buf_b; /* tsc_a < tsc_b => no overlap */ + + if (!intel_pt_step_psb(&buf_b, &len_b)) + return buf_b + len_b; /* No PSB in buf_b => no data */ + } +} + +/** + * intel_pt_find_overlap - determine start of non-overlapped trace data. + * @buf_a: first buffer + * @len_a: size of first buffer + * @buf_b: second buffer + * @len_b: size of second buffer + * @have_tsc: can use TSC packets to detect overlap + * + * When trace samples or snapshots are recorded there is the possibility that + * the data overlaps. Note that, for the purposes of decoding, data is only + * useful if it begins with a PSB packet. + * + * Return: A pointer into @buf_b from where non-overlapped data starts, or + * @buf_b + @len_b if there is no non-overlapped data. + */ +unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, + unsigned char *buf_b, size_t len_b, + bool have_tsc) +{ + unsigned char *found; + + /* Buffer 'b' must start at PSB so throw away everything before that */ + if (!intel_pt_next_psb(&buf_b, &len_b)) + return buf_b + len_b; /* No PSB */ + + if (!intel_pt_next_psb(&buf_a, &len_a)) + return buf_b; /* No overlap */ + + if (have_tsc) { + found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b); + if (found) + return found; + } + + /* + * Buffer 'b' cannot end within buffer 'a' so, for comparison purposes, + * we can ignore the first part of buffer 'a'. + */ + while (len_b < len_a) { + if (!intel_pt_step_psb(&buf_a, &len_a)) + return buf_b; /* No overlap */ + } + + /* Now len_b >= len_a */ + if (len_b > len_a) { + /* The leftover buffer 'b' must start at a PSB */ + while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) { + if (!intel_pt_step_psb(&buf_a, &len_a)) + return buf_b; /* No overlap */ + } + } + + while (1) { + /* Potential overlap so check the bytes */ + found = memmem(buf_a, len_a, buf_b, len_a); + if (found) + return buf_b + len_a; + + /* Try again at next PSB in buffer 'a' */ + if (!intel_pt_step_psb(&buf_a, &len_a)) + return buf_b; /* No overlap */ + + /* The leftover buffer 'b' must start at a PSB */ + while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) { + if (!intel_pt_step_psb(&buf_a, &len_a)) + return buf_b; /* No overlap */ + } + } +} diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h new file mode 100644 index 000000000000..02c38fec1c37 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -0,0 +1,109 @@ +/* + * intel_pt_decoder.h: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef INCLUDE__INTEL_PT_DECODER_H__ +#define INCLUDE__INTEL_PT_DECODER_H__ + +#include <stdint.h> +#include <stddef.h> +#include <stdbool.h> + +#include "intel-pt-insn-decoder.h" + +#define INTEL_PT_IN_TX (1 << 0) +#define INTEL_PT_ABORT_TX (1 << 1) +#define INTEL_PT_ASYNC (1 << 2) + +enum intel_pt_sample_type { + INTEL_PT_BRANCH = 1 << 0, + INTEL_PT_INSTRUCTION = 1 << 1, + INTEL_PT_TRANSACTION = 1 << 2, +}; + +enum intel_pt_period_type { + INTEL_PT_PERIOD_NONE, + INTEL_PT_PERIOD_INSTRUCTIONS, + INTEL_PT_PERIOD_TICKS, + INTEL_PT_PERIOD_MTC, +}; + +enum { + INTEL_PT_ERR_NOMEM = 1, + INTEL_PT_ERR_INTERN, + INTEL_PT_ERR_BADPKT, + INTEL_PT_ERR_NODATA, + INTEL_PT_ERR_NOINSN, + INTEL_PT_ERR_MISMAT, + INTEL_PT_ERR_OVR, + INTEL_PT_ERR_LOST, + INTEL_PT_ERR_UNK, + INTEL_PT_ERR_NELOOP, + INTEL_PT_ERR_MAX, +}; + +struct intel_pt_state { + enum intel_pt_sample_type type; + int err; + uint64_t from_ip; + uint64_t to_ip; + uint64_t cr3; + uint64_t tot_insn_cnt; + uint64_t timestamp; + uint64_t est_timestamp; + uint64_t trace_nr; + uint32_t flags; + enum intel_pt_insn_op insn_op; + int insn_len; +}; + +struct intel_pt_insn; + +struct intel_pt_buffer { + const unsigned char *buf; + size_t len; + bool consecutive; + uint64_t ref_timestamp; + uint64_t trace_nr; +}; + +struct intel_pt_params { + int (*get_trace)(struct intel_pt_buffer *buffer, void *data); + int (*walk_insn)(struct intel_pt_insn *intel_pt_insn, + uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, + uint64_t max_insn_cnt, void *data); + void *data; + bool return_compression; + uint64_t period; + enum intel_pt_period_type period_type; + unsigned max_non_turbo_ratio; + unsigned int mtc_period; + uint32_t tsc_ctc_ratio_n; + uint32_t tsc_ctc_ratio_d; +}; + +struct intel_pt_decoder; + +struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params); +void intel_pt_decoder_free(struct intel_pt_decoder *decoder); + +const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder); + +unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, + unsigned char *buf_b, size_t len_b, + bool have_tsc); + +int intel_pt__strerror(int code, char *buf, size_t buflen); + +#endif diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c new file mode 100644 index 000000000000..9e4eb8fcd559 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -0,0 +1,246 @@ +/* + * intel_pt_insn_decoder.c: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <stdio.h> +#include <string.h> +#include <endian.h> +#include <byteswap.h> + +#include "event.h" + +#include "insn.h" + +#include "inat.c" +#include "insn.c" + +#include "intel-pt-insn-decoder.h" + +/* Based on branch_type() from perf_event_intel_lbr.c */ +static void intel_pt_insn_decoder(struct insn *insn, + struct intel_pt_insn *intel_pt_insn) +{ + enum intel_pt_insn_op op = INTEL_PT_OP_OTHER; + enum intel_pt_insn_branch branch = INTEL_PT_BR_NO_BRANCH; + int ext; + + if (insn_is_avx(insn)) { + intel_pt_insn->op = INTEL_PT_OP_OTHER; + intel_pt_insn->branch = INTEL_PT_BR_NO_BRANCH; + intel_pt_insn->length = insn->length; + return; + } + + switch (insn->opcode.bytes[0]) { + case 0xf: + switch (insn->opcode.bytes[1]) { + case 0x05: /* syscall */ + case 0x34: /* sysenter */ + op = INTEL_PT_OP_SYSCALL; + branch = INTEL_PT_BR_INDIRECT; + break; + case 0x07: /* sysret */ + case 0x35: /* sysexit */ + op = INTEL_PT_OP_SYSRET; + branch = INTEL_PT_BR_INDIRECT; + break; + case 0x80 ... 0x8f: /* jcc */ + op = INTEL_PT_OP_JCC; + branch = INTEL_PT_BR_CONDITIONAL; + break; + default: + break; + } + break; + case 0x70 ... 0x7f: /* jcc */ + op = INTEL_PT_OP_JCC; + branch = INTEL_PT_BR_CONDITIONAL; + break; + case 0xc2: /* near ret */ + case 0xc3: /* near ret */ + case 0xca: /* far ret */ + case 0xcb: /* far ret */ + op = INTEL_PT_OP_RET; + branch = INTEL_PT_BR_INDIRECT; + break; + case 0xcf: /* iret */ + op = INTEL_PT_OP_IRET; + branch = INTEL_PT_BR_INDIRECT; + break; + case 0xcc ... 0xce: /* int */ + op = INTEL_PT_OP_INT; + branch = INTEL_PT_BR_INDIRECT; + break; + case 0xe8: /* call near rel */ + op = INTEL_PT_OP_CALL; + branch = INTEL_PT_BR_UNCONDITIONAL; + break; + case 0x9a: /* call far absolute */ + op = INTEL_PT_OP_CALL; + branch = INTEL_PT_BR_INDIRECT; + break; + case 0xe0 ... 0xe2: /* loop */ + op = INTEL_PT_OP_LOOP; + branch = INTEL_PT_BR_CONDITIONAL; + break; + case 0xe3: /* jcc */ + op = INTEL_PT_OP_JCC; + branch = INTEL_PT_BR_CONDITIONAL; + break; + case 0xe9: /* jmp */ + case 0xeb: /* jmp */ + op = INTEL_PT_OP_JMP; + branch = INTEL_PT_BR_UNCONDITIONAL; + break; + case 0xea: /* far jmp */ + op = INTEL_PT_OP_JMP; + branch = INTEL_PT_BR_INDIRECT; + break; + case 0xff: /* call near absolute, call far absolute ind */ + ext = (insn->modrm.bytes[0] >> 3) & 0x7; + switch (ext) { + case 2: /* near ind call */ + case 3: /* far ind call */ + op = INTEL_PT_OP_CALL; + branch = INTEL_PT_BR_INDIRECT; + break; + case 4: + case 5: + op = INTEL_PT_OP_JMP; + branch = INTEL_PT_BR_INDIRECT; + break; + default: + break; + } + break; + default: + break; + } + + intel_pt_insn->op = op; + intel_pt_insn->branch = branch; + intel_pt_insn->length = insn->length; + + if (branch == INTEL_PT_BR_CONDITIONAL || + branch == INTEL_PT_BR_UNCONDITIONAL) { +#if __BYTE_ORDER == __BIG_ENDIAN + switch (insn->immediate.nbytes) { + case 1: + intel_pt_insn->rel = insn->immediate.value; + break; + case 2: + intel_pt_insn->rel = + bswap_16((short)insn->immediate.value); + break; + case 4: + intel_pt_insn->rel = bswap_32(insn->immediate.value); + break; + } +#else + intel_pt_insn->rel = insn->immediate.value; +#endif + } +} + +int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64, + struct intel_pt_insn *intel_pt_insn) +{ + struct insn insn; + + insn_init(&insn, buf, len, x86_64); + insn_get_length(&insn); + if (!insn_complete(&insn) || insn.length > len) + return -1; + intel_pt_insn_decoder(&insn, intel_pt_insn); + if (insn.length < INTEL_PT_INSN_DBG_BUF_SZ) + memcpy(intel_pt_insn->buf, buf, insn.length); + else + memcpy(intel_pt_insn->buf, buf, INTEL_PT_INSN_DBG_BUF_SZ); + return 0; +} + +const char *branch_name[] = { + [INTEL_PT_OP_OTHER] = "Other", + [INTEL_PT_OP_CALL] = "Call", + [INTEL_PT_OP_RET] = "Ret", + [INTEL_PT_OP_JCC] = "Jcc", + [INTEL_PT_OP_JMP] = "Jmp", + [INTEL_PT_OP_LOOP] = "Loop", + [INTEL_PT_OP_IRET] = "IRet", + [INTEL_PT_OP_INT] = "Int", + [INTEL_PT_OP_SYSCALL] = "Syscall", + [INTEL_PT_OP_SYSRET] = "Sysret", +}; + +const char *intel_pt_insn_name(enum intel_pt_insn_op op) +{ + return branch_name[op]; +} + +int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf, + size_t buf_len) +{ + switch (intel_pt_insn->branch) { + case INTEL_PT_BR_CONDITIONAL: + case INTEL_PT_BR_UNCONDITIONAL: + return snprintf(buf, buf_len, "%s %s%d", + intel_pt_insn_name(intel_pt_insn->op), + intel_pt_insn->rel > 0 ? "+" : "", + intel_pt_insn->rel); + case INTEL_PT_BR_NO_BRANCH: + case INTEL_PT_BR_INDIRECT: + return snprintf(buf, buf_len, "%s", + intel_pt_insn_name(intel_pt_insn->op)); + default: + break; + } + return 0; +} + +size_t intel_pt_insn_max_size(void) +{ + return MAX_INSN_SIZE; +} + +int intel_pt_insn_type(enum intel_pt_insn_op op) +{ + switch (op) { + case INTEL_PT_OP_OTHER: + return 0; + case INTEL_PT_OP_CALL: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL; + case INTEL_PT_OP_RET: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN; + case INTEL_PT_OP_JCC: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL; + case INTEL_PT_OP_JMP: + return PERF_IP_FLAG_BRANCH; + case INTEL_PT_OP_LOOP: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL; + case INTEL_PT_OP_IRET: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | + PERF_IP_FLAG_INTERRUPT; + case INTEL_PT_OP_INT: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | + PERF_IP_FLAG_INTERRUPT; + case INTEL_PT_OP_SYSCALL: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | + PERF_IP_FLAG_SYSCALLRET; + case INTEL_PT_OP_SYSRET: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | + PERF_IP_FLAG_SYSCALLRET; + default: + return 0; + } +} diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h new file mode 100644 index 000000000000..b0adbf37323e --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h @@ -0,0 +1,65 @@ +/* + * intel_pt_insn_decoder.h: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef INCLUDE__INTEL_PT_INSN_DECODER_H__ +#define INCLUDE__INTEL_PT_INSN_DECODER_H__ + +#include <stddef.h> +#include <stdint.h> + +#define INTEL_PT_INSN_DESC_MAX 32 +#define INTEL_PT_INSN_DBG_BUF_SZ 16 + +enum intel_pt_insn_op { + INTEL_PT_OP_OTHER, + INTEL_PT_OP_CALL, + INTEL_PT_OP_RET, + INTEL_PT_OP_JCC, + INTEL_PT_OP_JMP, + INTEL_PT_OP_LOOP, + INTEL_PT_OP_IRET, + INTEL_PT_OP_INT, + INTEL_PT_OP_SYSCALL, + INTEL_PT_OP_SYSRET, +}; + +enum intel_pt_insn_branch { + INTEL_PT_BR_NO_BRANCH, + INTEL_PT_BR_INDIRECT, + INTEL_PT_BR_CONDITIONAL, + INTEL_PT_BR_UNCONDITIONAL, +}; + +struct intel_pt_insn { + enum intel_pt_insn_op op; + enum intel_pt_insn_branch branch; + int length; + int32_t rel; + unsigned char buf[INTEL_PT_INSN_DBG_BUF_SZ]; +}; + +int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64, + struct intel_pt_insn *intel_pt_insn); + +const char *intel_pt_insn_name(enum intel_pt_insn_op op); + +int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf, + size_t buf_len); + +size_t intel_pt_insn_max_size(void); + +int intel_pt_insn_type(enum intel_pt_insn_op op); + +#endif diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c new file mode 100644 index 000000000000..d09c7d9f9050 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c @@ -0,0 +1,155 @@ +/* + * intel_pt_log.c: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <stdio.h> +#include <stdint.h> +#include <inttypes.h> +#include <stdarg.h> +#include <stdbool.h> +#include <string.h> + +#include "intel-pt-log.h" +#include "intel-pt-insn-decoder.h" + +#include "intel-pt-pkt-decoder.h" + +#define MAX_LOG_NAME 256 + +static FILE *f; +static char log_name[MAX_LOG_NAME]; +static bool enable_logging; + +void intel_pt_log_enable(void) +{ + enable_logging = true; +} + +void intel_pt_log_disable(void) +{ + if (f) + fflush(f); + enable_logging = false; +} + +void intel_pt_log_set_name(const char *name) +{ + strncpy(log_name, name, MAX_LOG_NAME - 5); + strcat(log_name, ".log"); +} + +static void intel_pt_print_data(const unsigned char *buf, int len, uint64_t pos, + int indent) +{ + int i; + + for (i = 0; i < indent; i++) + fprintf(f, " "); + + fprintf(f, " %08" PRIx64 ": ", pos); + for (i = 0; i < len; i++) + fprintf(f, " %02x", buf[i]); + for (; i < 16; i++) + fprintf(f, " "); + fprintf(f, " "); +} + +static void intel_pt_print_no_data(uint64_t pos, int indent) +{ + int i; + + for (i = 0; i < indent; i++) + fprintf(f, " "); + + fprintf(f, " %08" PRIx64 ": ", pos); + for (i = 0; i < 16; i++) + fprintf(f, " "); + fprintf(f, " "); +} + +static int intel_pt_log_open(void) +{ + if (!enable_logging) + return -1; + + if (f) + return 0; + + if (!log_name[0]) + return -1; + + f = fopen(log_name, "w+"); + if (!f) { + enable_logging = false; + return -1; + } + + return 0; +} + +void intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len, + uint64_t pos, const unsigned char *buf) +{ + char desc[INTEL_PT_PKT_DESC_MAX]; + + if (intel_pt_log_open()) + return; + + intel_pt_print_data(buf, pkt_len, pos, 0); + intel_pt_pkt_desc(packet, desc, INTEL_PT_PKT_DESC_MAX); + fprintf(f, "%s\n", desc); +} + +void intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip) +{ + char desc[INTEL_PT_INSN_DESC_MAX]; + size_t len = intel_pt_insn->length; + + if (intel_pt_log_open()) + return; + + if (len > INTEL_PT_INSN_DBG_BUF_SZ) + len = INTEL_PT_INSN_DBG_BUF_SZ; + intel_pt_print_data(intel_pt_insn->buf, len, ip, 8); + if (intel_pt_insn_desc(intel_pt_insn, desc, INTEL_PT_INSN_DESC_MAX) > 0) + fprintf(f, "%s\n", desc); + else + fprintf(f, "Bad instruction!\n"); +} + +void intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, uint64_t ip) +{ + char desc[INTEL_PT_INSN_DESC_MAX]; + + if (intel_pt_log_open()) + return; + + intel_pt_print_no_data(ip, 8); + if (intel_pt_insn_desc(intel_pt_insn, desc, INTEL_PT_INSN_DESC_MAX) > 0) + fprintf(f, "%s\n", desc); + else + fprintf(f, "Bad instruction!\n"); +} + +void intel_pt_log(const char *fmt, ...) +{ + va_list args; + + if (intel_pt_log_open()) + return; + + va_start(args, fmt); + vfprintf(f, fmt, args); + va_end(args); +} diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h new file mode 100644 index 000000000000..db3942f83677 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h @@ -0,0 +1,52 @@ +/* + * intel_pt_log.h: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef INCLUDE__INTEL_PT_LOG_H__ +#define INCLUDE__INTEL_PT_LOG_H__ + +#include <stdint.h> +#include <inttypes.h> + +struct intel_pt_pkt; + +void intel_pt_log_enable(void); +void intel_pt_log_disable(void); +void intel_pt_log_set_name(const char *name); + +void intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len, + uint64_t pos, const unsigned char *buf); + +struct intel_pt_insn; + +void intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip); +void intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, + uint64_t ip); + +__attribute__((format(printf, 1, 2))) +void intel_pt_log(const char *fmt, ...); + +#define x64_fmt "0x%" PRIx64 + +static inline void intel_pt_log_at(const char *msg, uint64_t u) +{ + intel_pt_log("%s at " x64_fmt "\n", msg, u); +} + +static inline void intel_pt_log_to(const char *msg, uint64_t u) +{ + intel_pt_log("%s to " x64_fmt "\n", msg, u); +} + +#endif diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c new file mode 100644 index 000000000000..b1257c816310 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -0,0 +1,518 @@ +/* + * intel_pt_pkt_decoder.c: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <stdio.h> +#include <string.h> +#include <endian.h> +#include <byteswap.h> + +#include "intel-pt-pkt-decoder.h" + +#define BIT(n) (1 << (n)) + +#define BIT63 ((uint64_t)1 << 63) + +#define NR_FLAG BIT63 + +#if __BYTE_ORDER == __BIG_ENDIAN +#define le16_to_cpu bswap_16 +#define le32_to_cpu bswap_32 +#define le64_to_cpu bswap_64 +#define memcpy_le64(d, s, n) do { \ + memcpy((d), (s), (n)); \ + *(d) = le64_to_cpu(*(d)); \ +} while (0) +#else +#define le16_to_cpu +#define le32_to_cpu +#define le64_to_cpu +#define memcpy_le64 memcpy +#endif + +static const char * const packet_name[] = { + [INTEL_PT_BAD] = "Bad Packet!", + [INTEL_PT_PAD] = "PAD", + [INTEL_PT_TNT] = "TNT", + [INTEL_PT_TIP_PGD] = "TIP.PGD", + [INTEL_PT_TIP_PGE] = "TIP.PGE", + [INTEL_PT_TSC] = "TSC", + [INTEL_PT_TMA] = "TMA", + [INTEL_PT_MODE_EXEC] = "MODE.Exec", + [INTEL_PT_MODE_TSX] = "MODE.TSX", + [INTEL_PT_MTC] = "MTC", + [INTEL_PT_TIP] = "TIP", + [INTEL_PT_FUP] = "FUP", + [INTEL_PT_CYC] = "CYC", + [INTEL_PT_VMCS] = "VMCS", + [INTEL_PT_PSB] = "PSB", + [INTEL_PT_PSBEND] = "PSBEND", + [INTEL_PT_CBR] = "CBR", + [INTEL_PT_TRACESTOP] = "TraceSTOP", + [INTEL_PT_PIP] = "PIP", + [INTEL_PT_OVF] = "OVF", + [INTEL_PT_MNT] = "MNT", +}; + +const char *intel_pt_pkt_name(enum intel_pt_pkt_type type) +{ + return packet_name[type]; +} + +static int intel_pt_get_long_tnt(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + uint64_t payload; + int count; + + if (len < 8) + return INTEL_PT_NEED_MORE_BYTES; + + payload = le64_to_cpu(*(uint64_t *)buf); + + for (count = 47; count; count--) { + if (payload & BIT63) + break; + payload <<= 1; + } + + packet->type = INTEL_PT_TNT; + packet->count = count; + packet->payload = payload << 1; + return 8; +} + +static int intel_pt_get_pip(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + uint64_t payload = 0; + + if (len < 8) + return INTEL_PT_NEED_MORE_BYTES; + + packet->type = INTEL_PT_PIP; + memcpy_le64(&payload, buf + 2, 6); + packet->payload = payload >> 1; + if (payload & 1) + packet->payload |= NR_FLAG; + + return 8; +} + +static int intel_pt_get_tracestop(struct intel_pt_pkt *packet) +{ + packet->type = INTEL_PT_TRACESTOP; + return 2; +} + +static int intel_pt_get_cbr(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 4) + return INTEL_PT_NEED_MORE_BYTES; + packet->type = INTEL_PT_CBR; + packet->payload = buf[2]; + return 4; +} + +static int intel_pt_get_vmcs(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + unsigned int count = (52 - 5) >> 3; + + if (count < 1 || count > 7) + return INTEL_PT_BAD_PACKET; + + if (len < count + 2) + return INTEL_PT_NEED_MORE_BYTES; + + packet->type = INTEL_PT_VMCS; + packet->count = count; + memcpy_le64(&packet->payload, buf + 2, count); + + return count + 2; +} + +static int intel_pt_get_ovf(struct intel_pt_pkt *packet) +{ + packet->type = INTEL_PT_OVF; + return 2; +} + +static int intel_pt_get_psb(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + int i; + + if (len < 16) + return INTEL_PT_NEED_MORE_BYTES; + + for (i = 2; i < 16; i += 2) { + if (buf[i] != 2 || buf[i + 1] != 0x82) + return INTEL_PT_BAD_PACKET; + } + + packet->type = INTEL_PT_PSB; + return 16; +} + +static int intel_pt_get_psbend(struct intel_pt_pkt *packet) +{ + packet->type = INTEL_PT_PSBEND; + return 2; +} + +static int intel_pt_get_tma(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 7) + return INTEL_PT_NEED_MORE_BYTES; + + packet->type = INTEL_PT_TMA; + packet->payload = buf[2] | (buf[3] << 8); + packet->count = buf[5] | ((buf[6] & BIT(0)) << 8); + return 7; +} + +static int intel_pt_get_pad(struct intel_pt_pkt *packet) +{ + packet->type = INTEL_PT_PAD; + return 1; +} + +static int intel_pt_get_mnt(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 11) + return INTEL_PT_NEED_MORE_BYTES; + packet->type = INTEL_PT_MNT; + memcpy_le64(&packet->payload, buf + 3, 8); + return 11 +; +} + +static int intel_pt_get_3byte(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 3) + return INTEL_PT_NEED_MORE_BYTES; + + switch (buf[2]) { + case 0x88: /* MNT */ + return intel_pt_get_mnt(buf, len, packet); + default: + return INTEL_PT_BAD_PACKET; + } +} + +static int intel_pt_get_ext(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 2) + return INTEL_PT_NEED_MORE_BYTES; + + switch (buf[1]) { + case 0xa3: /* Long TNT */ + return intel_pt_get_long_tnt(buf, len, packet); + case 0x43: /* PIP */ + return intel_pt_get_pip(buf, len, packet); + case 0x83: /* TraceStop */ + return intel_pt_get_tracestop(packet); + case 0x03: /* CBR */ + return intel_pt_get_cbr(buf, len, packet); + case 0xc8: /* VMCS */ + return intel_pt_get_vmcs(buf, len, packet); + case 0xf3: /* OVF */ + return intel_pt_get_ovf(packet); + case 0x82: /* PSB */ + return intel_pt_get_psb(buf, len, packet); + case 0x23: /* PSBEND */ + return intel_pt_get_psbend(packet); + case 0x73: /* TMA */ + return intel_pt_get_tma(buf, len, packet); + case 0xC3: /* 3-byte header */ + return intel_pt_get_3byte(buf, len, packet); + default: + return INTEL_PT_BAD_PACKET; + } +} + +static int intel_pt_get_short_tnt(unsigned int byte, + struct intel_pt_pkt *packet) +{ + int count; + + for (count = 6; count; count--) { + if (byte & BIT(7)) + break; + byte <<= 1; + } + + packet->type = INTEL_PT_TNT; + packet->count = count; + packet->payload = (uint64_t)byte << 57; + + return 1; +} + +static int intel_pt_get_cyc(unsigned int byte, const unsigned char *buf, + size_t len, struct intel_pt_pkt *packet) +{ + unsigned int offs = 1, shift; + uint64_t payload = byte >> 3; + + byte >>= 2; + len -= 1; + for (shift = 5; byte & 1; shift += 7) { + if (offs > 9) + return INTEL_PT_BAD_PACKET; + if (len < offs) + return INTEL_PT_NEED_MORE_BYTES; + byte = buf[offs++]; + payload |= (byte >> 1) << shift; + } + + packet->type = INTEL_PT_CYC; + packet->payload = payload; + return offs; +} + +static int intel_pt_get_ip(enum intel_pt_pkt_type type, unsigned int byte, + const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + switch (byte >> 5) { + case 0: + packet->count = 0; + break; + case 1: + if (len < 3) + return INTEL_PT_NEED_MORE_BYTES; + packet->count = 2; + packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1)); + break; + case 2: + if (len < 5) + return INTEL_PT_NEED_MORE_BYTES; + packet->count = 4; + packet->payload = le32_to_cpu(*(uint32_t *)(buf + 1)); + break; + case 3: + case 6: + if (len < 7) + return INTEL_PT_NEED_MORE_BYTES; + packet->count = 6; + memcpy_le64(&packet->payload, buf + 1, 6); + break; + default: + return INTEL_PT_BAD_PACKET; + } + + packet->type = type; + + return packet->count + 1; +} + +static int intel_pt_get_mode(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 2) + return INTEL_PT_NEED_MORE_BYTES; + + switch (buf[1] >> 5) { + case 0: + packet->type = INTEL_PT_MODE_EXEC; + switch (buf[1] & 3) { + case 0: + packet->payload = 16; + break; + case 1: + packet->payload = 64; + break; + case 2: + packet->payload = 32; + break; + default: + return INTEL_PT_BAD_PACKET; + } + break; + case 1: + packet->type = INTEL_PT_MODE_TSX; + if ((buf[1] & 3) == 3) + return INTEL_PT_BAD_PACKET; + packet->payload = buf[1] & 3; + break; + default: + return INTEL_PT_BAD_PACKET; + } + + return 2; +} + +static int intel_pt_get_tsc(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 8) + return INTEL_PT_NEED_MORE_BYTES; + packet->type = INTEL_PT_TSC; + memcpy_le64(&packet->payload, buf + 1, 7); + return 8; +} + +static int intel_pt_get_mtc(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 2) + return INTEL_PT_NEED_MORE_BYTES; + packet->type = INTEL_PT_MTC; + packet->payload = buf[1]; + return 2; +} + +static int intel_pt_do_get_packet(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + unsigned int byte; + + memset(packet, 0, sizeof(struct intel_pt_pkt)); + + if (!len) + return INTEL_PT_NEED_MORE_BYTES; + + byte = buf[0]; + if (!(byte & BIT(0))) { + if (byte == 0) + return intel_pt_get_pad(packet); + if (byte == 2) + return intel_pt_get_ext(buf, len, packet); + return intel_pt_get_short_tnt(byte, packet); + } + + if ((byte & 2)) + return intel_pt_get_cyc(byte, buf, len, packet); + + switch (byte & 0x1f) { + case 0x0D: + return intel_pt_get_ip(INTEL_PT_TIP, byte, buf, len, packet); + case 0x11: + return intel_pt_get_ip(INTEL_PT_TIP_PGE, byte, buf, len, + packet); + case 0x01: + return intel_pt_get_ip(INTEL_PT_TIP_PGD, byte, buf, len, + packet); + case 0x1D: + return intel_pt_get_ip(INTEL_PT_FUP, byte, buf, len, packet); + case 0x19: + switch (byte) { + case 0x99: + return intel_pt_get_mode(buf, len, packet); + case 0x19: + return intel_pt_get_tsc(buf, len, packet); + case 0x59: + return intel_pt_get_mtc(buf, len, packet); + default: + return INTEL_PT_BAD_PACKET; + } + default: + return INTEL_PT_BAD_PACKET; + } +} + +int intel_pt_get_packet(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + int ret; + + ret = intel_pt_do_get_packet(buf, len, packet); + if (ret > 0) { + while (ret < 8 && len > (size_t)ret && !buf[ret]) + ret += 1; + } + return ret; +} + +int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, + size_t buf_len) +{ + int ret, i, nr; + unsigned long long payload = packet->payload; + const char *name = intel_pt_pkt_name(packet->type); + + switch (packet->type) { + case INTEL_PT_BAD: + case INTEL_PT_PAD: + case INTEL_PT_PSB: + case INTEL_PT_PSBEND: + case INTEL_PT_TRACESTOP: + case INTEL_PT_OVF: + return snprintf(buf, buf_len, "%s", name); + case INTEL_PT_TNT: { + size_t blen = buf_len; + + ret = snprintf(buf, blen, "%s ", name); + if (ret < 0) + return ret; + buf += ret; + blen -= ret; + for (i = 0; i < packet->count; i++) { + if (payload & BIT63) + ret = snprintf(buf, blen, "T"); + else + ret = snprintf(buf, blen, "N"); + if (ret < 0) + return ret; + buf += ret; + blen -= ret; + payload <<= 1; + } + ret = snprintf(buf, blen, " (%d)", packet->count); + if (ret < 0) + return ret; + blen -= ret; + return buf_len - blen; + } + case INTEL_PT_TIP_PGD: + case INTEL_PT_TIP_PGE: + case INTEL_PT_TIP: + case INTEL_PT_FUP: + if (!(packet->count)) + return snprintf(buf, buf_len, "%s no ip", name); + case INTEL_PT_CYC: + case INTEL_PT_VMCS: + case INTEL_PT_MTC: + case INTEL_PT_MNT: + case INTEL_PT_CBR: + case INTEL_PT_TSC: + return snprintf(buf, buf_len, "%s 0x%llx", name, payload); + case INTEL_PT_TMA: + return snprintf(buf, buf_len, "%s CTC 0x%x FC 0x%x", name, + (unsigned)payload, packet->count); + case INTEL_PT_MODE_EXEC: + return snprintf(buf, buf_len, "%s %lld", name, payload); + case INTEL_PT_MODE_TSX: + return snprintf(buf, buf_len, "%s TXAbort:%u InTX:%u", + name, (unsigned)(payload >> 1) & 1, + (unsigned)payload & 1); + case INTEL_PT_PIP: + nr = packet->payload & NR_FLAG ? 1 : 0; + payload &= ~NR_FLAG; + ret = snprintf(buf, buf_len, "%s 0x%llx (NR=%d)", + name, payload, nr); + return ret; + default: + break; + } + return snprintf(buf, buf_len, "%s 0x%llx (%d)", + name, payload, packet->count); +} diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h new file mode 100644 index 000000000000..781bb79883bd --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h @@ -0,0 +1,70 @@ +/* + * intel_pt_pkt_decoder.h: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef INCLUDE__INTEL_PT_PKT_DECODER_H__ +#define INCLUDE__INTEL_PT_PKT_DECODER_H__ + +#include <stddef.h> +#include <stdint.h> + +#define INTEL_PT_PKT_DESC_MAX 256 + +#define INTEL_PT_NEED_MORE_BYTES -1 +#define INTEL_PT_BAD_PACKET -2 + +#define INTEL_PT_PSB_STR "\002\202\002\202\002\202\002\202" \ + "\002\202\002\202\002\202\002\202" +#define INTEL_PT_PSB_LEN 16 + +#define INTEL_PT_PKT_MAX_SZ 16 + +enum intel_pt_pkt_type { + INTEL_PT_BAD, + INTEL_PT_PAD, + INTEL_PT_TNT, + INTEL_PT_TIP_PGD, + INTEL_PT_TIP_PGE, + INTEL_PT_TSC, + INTEL_PT_TMA, + INTEL_PT_MODE_EXEC, + INTEL_PT_MODE_TSX, + INTEL_PT_MTC, + INTEL_PT_TIP, + INTEL_PT_FUP, + INTEL_PT_CYC, + INTEL_PT_VMCS, + INTEL_PT_PSB, + INTEL_PT_PSBEND, + INTEL_PT_CBR, + INTEL_PT_TRACESTOP, + INTEL_PT_PIP, + INTEL_PT_OVF, + INTEL_PT_MNT, +}; + +struct intel_pt_pkt { + enum intel_pt_pkt_type type; + int count; + uint64_t payload; +}; + +const char *intel_pt_pkt_name(enum intel_pt_pkt_type); + +int intel_pt_get_packet(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet); + +int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, size_t len); + +#endif diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt new file mode 100644 index 000000000000..816488c0b97e --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt @@ -0,0 +1,970 @@ +# x86 Opcode Maps +# +# This is (mostly) based on following documentations. +# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2C +# (#326018-047US, June 2013) +# +#<Opcode maps> +# Table: table-name +# Referrer: escaped-name +# AVXcode: avx-code +# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] +# (or) +# opcode: escape # escaped-name +# EndTable +# +#<group maps> +# GrpTable: GrpXXX +# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] +# EndTable +# +# AVX Superscripts +# (v): this opcode requires VEX prefix. +# (v1): this opcode only supports 128bit VEX. +# +# Last Prefix Superscripts +# - (66): the last prefix is 0x66 +# - (F3): the last prefix is 0xF3 +# - (F2): the last prefix is 0xF2 +# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) +# - (66&F2): Both 0x66 and 0xF2 prefixes are specified. + +Table: one byte opcode +Referrer: +AVXcode: +# 0x00 - 0x0f +00: ADD Eb,Gb +01: ADD Ev,Gv +02: ADD Gb,Eb +03: ADD Gv,Ev +04: ADD AL,Ib +05: ADD rAX,Iz +06: PUSH ES (i64) +07: POP ES (i64) +08: OR Eb,Gb +09: OR Ev,Gv +0a: OR Gb,Eb +0b: OR Gv,Ev +0c: OR AL,Ib +0d: OR rAX,Iz +0e: PUSH CS (i64) +0f: escape # 2-byte escape +# 0x10 - 0x1f +10: ADC Eb,Gb +11: ADC Ev,Gv +12: ADC Gb,Eb +13: ADC Gv,Ev +14: ADC AL,Ib +15: ADC rAX,Iz +16: PUSH SS (i64) +17: POP SS (i64) +18: SBB Eb,Gb +19: SBB Ev,Gv +1a: SBB Gb,Eb +1b: SBB Gv,Ev +1c: SBB AL,Ib +1d: SBB rAX,Iz +1e: PUSH DS (i64) +1f: POP DS (i64) +# 0x20 - 0x2f +20: AND Eb,Gb +21: AND Ev,Gv +22: AND Gb,Eb +23: AND Gv,Ev +24: AND AL,Ib +25: AND rAx,Iz +26: SEG=ES (Prefix) +27: DAA (i64) +28: SUB Eb,Gb +29: SUB Ev,Gv +2a: SUB Gb,Eb +2b: SUB Gv,Ev +2c: SUB AL,Ib +2d: SUB rAX,Iz +2e: SEG=CS (Prefix) +2f: DAS (i64) +# 0x30 - 0x3f +30: XOR Eb,Gb +31: XOR Ev,Gv +32: XOR Gb,Eb +33: XOR Gv,Ev +34: XOR AL,Ib +35: XOR rAX,Iz +36: SEG=SS (Prefix) +37: AAA (i64) +38: CMP Eb,Gb +39: CMP Ev,Gv +3a: CMP Gb,Eb +3b: CMP Gv,Ev +3c: CMP AL,Ib +3d: CMP rAX,Iz +3e: SEG=DS (Prefix) +3f: AAS (i64) +# 0x40 - 0x4f +40: INC eAX (i64) | REX (o64) +41: INC eCX (i64) | REX.B (o64) +42: INC eDX (i64) | REX.X (o64) +43: INC eBX (i64) | REX.XB (o64) +44: INC eSP (i64) | REX.R (o64) +45: INC eBP (i64) | REX.RB (o64) +46: INC eSI (i64) | REX.RX (o64) +47: INC eDI (i64) | REX.RXB (o64) +48: DEC eAX (i64) | REX.W (o64) +49: DEC eCX (i64) | REX.WB (o64) +4a: DEC eDX (i64) | REX.WX (o64) +4b: DEC eBX (i64) | REX.WXB (o64) +4c: DEC eSP (i64) | REX.WR (o64) +4d: DEC eBP (i64) | REX.WRB (o64) +4e: DEC eSI (i64) | REX.WRX (o64) +4f: DEC eDI (i64) | REX.WRXB (o64) +# 0x50 - 0x5f +50: PUSH rAX/r8 (d64) +51: PUSH rCX/r9 (d64) +52: PUSH rDX/r10 (d64) +53: PUSH rBX/r11 (d64) +54: PUSH rSP/r12 (d64) +55: PUSH rBP/r13 (d64) +56: PUSH rSI/r14 (d64) +57: PUSH rDI/r15 (d64) +58: POP rAX/r8 (d64) +59: POP rCX/r9 (d64) +5a: POP rDX/r10 (d64) +5b: POP rBX/r11 (d64) +5c: POP rSP/r12 (d64) +5d: POP rBP/r13 (d64) +5e: POP rSI/r14 (d64) +5f: POP rDI/r15 (d64) +# 0x60 - 0x6f +60: PUSHA/PUSHAD (i64) +61: POPA/POPAD (i64) +62: BOUND Gv,Ma (i64) +63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) +64: SEG=FS (Prefix) +65: SEG=GS (Prefix) +66: Operand-Size (Prefix) +67: Address-Size (Prefix) +68: PUSH Iz (d64) +69: IMUL Gv,Ev,Iz +6a: PUSH Ib (d64) +6b: IMUL Gv,Ev,Ib +6c: INS/INSB Yb,DX +6d: INS/INSW/INSD Yz,DX +6e: OUTS/OUTSB DX,Xb +6f: OUTS/OUTSW/OUTSD DX,Xz +# 0x70 - 0x7f +70: JO Jb +71: JNO Jb +72: JB/JNAE/JC Jb +73: JNB/JAE/JNC Jb +74: JZ/JE Jb +75: JNZ/JNE Jb +76: JBE/JNA Jb +77: JNBE/JA Jb +78: JS Jb +79: JNS Jb +7a: JP/JPE Jb +7b: JNP/JPO Jb +7c: JL/JNGE Jb +7d: JNL/JGE Jb +7e: JLE/JNG Jb +7f: JNLE/JG Jb +# 0x80 - 0x8f +80: Grp1 Eb,Ib (1A) +81: Grp1 Ev,Iz (1A) +82: Grp1 Eb,Ib (1A),(i64) +83: Grp1 Ev,Ib (1A) +84: TEST Eb,Gb +85: TEST Ev,Gv +86: XCHG Eb,Gb +87: XCHG Ev,Gv +88: MOV Eb,Gb +89: MOV Ev,Gv +8a: MOV Gb,Eb +8b: MOV Gv,Ev +8c: MOV Ev,Sw +8d: LEA Gv,M +8e: MOV Sw,Ew +8f: Grp1A (1A) | POP Ev (d64) +# 0x90 - 0x9f +90: NOP | PAUSE (F3) | XCHG r8,rAX +91: XCHG rCX/r9,rAX +92: XCHG rDX/r10,rAX +93: XCHG rBX/r11,rAX +94: XCHG rSP/r12,rAX +95: XCHG rBP/r13,rAX +96: XCHG rSI/r14,rAX +97: XCHG rDI/r15,rAX +98: CBW/CWDE/CDQE +99: CWD/CDQ/CQO +9a: CALLF Ap (i64) +9b: FWAIT/WAIT +9c: PUSHF/D/Q Fv (d64) +9d: POPF/D/Q Fv (d64) +9e: SAHF +9f: LAHF +# 0xa0 - 0xaf +a0: MOV AL,Ob +a1: MOV rAX,Ov +a2: MOV Ob,AL +a3: MOV Ov,rAX +a4: MOVS/B Yb,Xb +a5: MOVS/W/D/Q Yv,Xv +a6: CMPS/B Xb,Yb +a7: CMPS/W/D Xv,Yv +a8: TEST AL,Ib +a9: TEST rAX,Iz +aa: STOS/B Yb,AL +ab: STOS/W/D/Q Yv,rAX +ac: LODS/B AL,Xb +ad: LODS/W/D/Q rAX,Xv +ae: SCAS/B AL,Yb +# Note: The May 2011 Intel manual shows Xv for the second parameter of the +# next instruction but Yv is correct +af: SCAS/W/D/Q rAX,Yv +# 0xb0 - 0xbf +b0: MOV AL/R8L,Ib +b1: MOV CL/R9L,Ib +b2: MOV DL/R10L,Ib +b3: MOV BL/R11L,Ib +b4: MOV AH/R12L,Ib +b5: MOV CH/R13L,Ib +b6: MOV DH/R14L,Ib +b7: MOV BH/R15L,Ib +b8: MOV rAX/r8,Iv +b9: MOV rCX/r9,Iv +ba: MOV rDX/r10,Iv +bb: MOV rBX/r11,Iv +bc: MOV rSP/r12,Iv +bd: MOV rBP/r13,Iv +be: MOV rSI/r14,Iv +bf: MOV rDI/r15,Iv +# 0xc0 - 0xcf +c0: Grp2 Eb,Ib (1A) +c1: Grp2 Ev,Ib (1A) +c2: RETN Iw (f64) +c3: RETN +c4: LES Gz,Mp (i64) | VEX+2byte (Prefix) +c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix) +c6: Grp11A Eb,Ib (1A) +c7: Grp11B Ev,Iz (1A) +c8: ENTER Iw,Ib +c9: LEAVE (d64) +ca: RETF Iw +cb: RETF +cc: INT3 +cd: INT Ib +ce: INTO (i64) +cf: IRET/D/Q +# 0xd0 - 0xdf +d0: Grp2 Eb,1 (1A) +d1: Grp2 Ev,1 (1A) +d2: Grp2 Eb,CL (1A) +d3: Grp2 Ev,CL (1A) +d4: AAM Ib (i64) +d5: AAD Ib (i64) +d6: +d7: XLAT/XLATB +d8: ESC +d9: ESC +da: ESC +db: ESC +dc: ESC +dd: ESC +de: ESC +df: ESC +# 0xe0 - 0xef +# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix +# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation +# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD. +e0: LOOPNE/LOOPNZ Jb (f64) +e1: LOOPE/LOOPZ Jb (f64) +e2: LOOP Jb (f64) +e3: JrCXZ Jb (f64) +e4: IN AL,Ib +e5: IN eAX,Ib +e6: OUT Ib,AL +e7: OUT Ib,eAX +# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset +# in "near" jumps and calls is 16-bit. For CALL, +# push of return address is 16-bit wide, RSP is decremented by 2 +# but is not truncated to 16 bits, unlike RIP. +e8: CALL Jz (f64) +e9: JMP-near Jz (f64) +ea: JMP-far Ap (i64) +eb: JMP-short Jb (f64) +ec: IN AL,DX +ed: IN eAX,DX +ee: OUT DX,AL +ef: OUT DX,eAX +# 0xf0 - 0xff +f0: LOCK (Prefix) +f1: +f2: REPNE (Prefix) | XACQUIRE (Prefix) +f3: REP/REPE (Prefix) | XRELEASE (Prefix) +f4: HLT +f5: CMC +f6: Grp3_1 Eb (1A) +f7: Grp3_2 Ev (1A) +f8: CLC +f9: STC +fa: CLI +fb: STI +fc: CLD +fd: STD +fe: Grp4 (1A) +ff: Grp5 (1A) +EndTable + +Table: 2-byte opcode (0x0f) +Referrer: 2-byte escape +AVXcode: 1 +# 0x0f 0x00-0x0f +00: Grp6 (1A) +01: Grp7 (1A) +02: LAR Gv,Ew +03: LSL Gv,Ew +04: +05: SYSCALL (o64) +06: CLTS +07: SYSRET (o64) +08: INVD +09: WBINVD +0a: +0b: UD2 (1B) +0c: +# AMD's prefetch group. Intel supports prefetchw(/1) only. +0d: GrpP +0e: FEMMS +# 3DNow! uses the last imm byte as opcode extension. +0f: 3DNow! Pq,Qq,Ib +# 0x0f 0x10-0x1f +# NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands +# but it actually has operands. And also, vmovss and vmovsd only accept 128bit. +# MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form. +# Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming +# Reference A.1 +10: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1) +11: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1) +12: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2) +13: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1) +14: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66) +15: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66) +16: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3) +17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1) +18: Grp16 (1A) +19: +1a: BNDCL Ev,Gv | BNDCU Ev,Gv | BNDMOV Gv,Ev | BNDLDX Gv,Ev,Gv +1b: BNDCN Ev,Gv | BNDMOV Ev,Gv | BNDMK Gv,Ev | BNDSTX Ev,GV,Gv +1c: +1d: +1e: +1f: NOP Ev +# 0x0f 0x20-0x2f +20: MOV Rd,Cd +21: MOV Rd,Dd +22: MOV Cd,Rd +23: MOV Dd,Rd +24: +25: +26: +27: +28: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66) +29: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66) +2a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1) +2b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66) +2c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1) +2d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1) +2e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1) +2f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1) +# 0x0f 0x30-0x3f +30: WRMSR +31: RDTSC +32: RDMSR +33: RDPMC +34: SYSENTER +35: SYSEXIT +36: +37: GETSEC +38: escape # 3-byte escape 1 +39: +3a: escape # 3-byte escape 2 +3b: +3c: +3d: +3e: +3f: +# 0x0f 0x40-0x4f +40: CMOVO Gv,Ev +41: CMOVNO Gv,Ev +42: CMOVB/C/NAE Gv,Ev +43: CMOVAE/NB/NC Gv,Ev +44: CMOVE/Z Gv,Ev +45: CMOVNE/NZ Gv,Ev +46: CMOVBE/NA Gv,Ev +47: CMOVA/NBE Gv,Ev +48: CMOVS Gv,Ev +49: CMOVNS Gv,Ev +4a: CMOVP/PE Gv,Ev +4b: CMOVNP/PO Gv,Ev +4c: CMOVL/NGE Gv,Ev +4d: CMOVNL/GE Gv,Ev +4e: CMOVLE/NG Gv,Ev +4f: CMOVNLE/G Gv,Ev +# 0x0f 0x50-0x5f +50: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66) +51: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1) +52: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1) +53: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1) +54: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66) +55: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66) +56: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66) +57: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66) +58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1) +59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1) +5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1) +5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3) +5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1) +5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1) +5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1) +5f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1) +# 0x0f 0x60-0x6f +60: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1) +61: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1) +62: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1) +63: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1) +64: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1) +65: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1) +66: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1) +67: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1) +68: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1) +69: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1) +6a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1) +6b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1) +6c: vpunpcklqdq Vx,Hx,Wx (66),(v1) +6d: vpunpckhqdq Vx,Hx,Wx (66),(v1) +6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1) +6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3) +# 0x0f 0x70-0x7f +70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1) +71: Grp12 (1A) +72: Grp13 (1A) +73: Grp14 (1A) +74: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1) +75: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1) +76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1) +# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX. +77: emms | vzeroupper | vzeroall +78: VMREAD Ey,Gy +79: VMWRITE Gy,Ey +7a: +7b: +7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2) +7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2) +7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) +7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3) +# 0x0f 0x80-0x8f +# Note: "forced64" is Intel CPU behavior (see comment about CALL insn). +80: JO Jz (f64) +81: JNO Jz (f64) +82: JB/JC/JNAE Jz (f64) +83: JAE/JNB/JNC Jz (f64) +84: JE/JZ Jz (f64) +85: JNE/JNZ Jz (f64) +86: JBE/JNA Jz (f64) +87: JA/JNBE Jz (f64) +88: JS Jz (f64) +89: JNS Jz (f64) +8a: JP/JPE Jz (f64) +8b: JNP/JPO Jz (f64) +8c: JL/JNGE Jz (f64) +8d: JNL/JGE Jz (f64) +8e: JLE/JNG Jz (f64) +8f: JNLE/JG Jz (f64) +# 0x0f 0x90-0x9f +90: SETO Eb +91: SETNO Eb +92: SETB/C/NAE Eb +93: SETAE/NB/NC Eb +94: SETE/Z Eb +95: SETNE/NZ Eb +96: SETBE/NA Eb +97: SETA/NBE Eb +98: SETS Eb +99: SETNS Eb +9a: SETP/PE Eb +9b: SETNP/PO Eb +9c: SETL/NGE Eb +9d: SETNL/GE Eb +9e: SETLE/NG Eb +9f: SETNLE/G Eb +# 0x0f 0xa0-0xaf +a0: PUSH FS (d64) +a1: POP FS (d64) +a2: CPUID +a3: BT Ev,Gv +a4: SHLD Ev,Gv,Ib +a5: SHLD Ev,Gv,CL +a6: GrpPDLK +a7: GrpRNG +a8: PUSH GS (d64) +a9: POP GS (d64) +aa: RSM +ab: BTS Ev,Gv +ac: SHRD Ev,Gv,Ib +ad: SHRD Ev,Gv,CL +ae: Grp15 (1A),(1C) +af: IMUL Gv,Ev +# 0x0f 0xb0-0xbf +b0: CMPXCHG Eb,Gb +b1: CMPXCHG Ev,Gv +b2: LSS Gv,Mp +b3: BTR Ev,Gv +b4: LFS Gv,Mp +b5: LGS Gv,Mp +b6: MOVZX Gv,Eb +b7: MOVZX Gv,Ew +b8: JMPE (!F3) | POPCNT Gv,Ev (F3) +b9: Grp10 (1A) +ba: Grp8 Ev,Ib (1A) +bb: BTC Ev,Gv +bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3) +bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3) +be: MOVSX Gv,Eb +bf: MOVSX Gv,Ew +# 0x0f 0xc0-0xcf +c0: XADD Eb,Gb +c1: XADD Ev,Gv +c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1) +c3: movnti My,Gy +c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1) +c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1) +c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66) +c7: Grp9 (1A) +c8: BSWAP RAX/EAX/R8/R8D +c9: BSWAP RCX/ECX/R9/R9D +ca: BSWAP RDX/EDX/R10/R10D +cb: BSWAP RBX/EBX/R11/R11D +cc: BSWAP RSP/ESP/R12/R12D +cd: BSWAP RBP/EBP/R13/R13D +ce: BSWAP RSI/ESI/R14/R14D +cf: BSWAP RDI/EDI/R15/R15D +# 0x0f 0xd0-0xdf +d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2) +d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1) +d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1) +d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1) +d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1) +d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1) +d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) +d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1) +d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1) +d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1) +da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1) +db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) +dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1) +dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1) +de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1) +df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) +# 0x0f 0xe0-0xef +e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1) +e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1) +e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1) +e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1) +e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1) +e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1) +e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2) +e7: movntq Mq,Pq | vmovntdq Mx,Vx (66) +e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1) +e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1) +ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1) +eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) +ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1) +ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1) +ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1) +ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) +# 0x0f 0xf0-0xff +f0: vlddqu Vx,Mx (F2) +f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1) +f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1) +f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1) +f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1) +f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1) +f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1) +f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1) +f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1) +f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1) +fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1) +fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1) +fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1) +fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1) +fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1) +ff: +EndTable + +Table: 3-byte opcode 1 (0x0f 0x38) +Referrer: 3-byte escape 1 +AVXcode: 2 +# 0x0f 0x38 0x00-0x0f +00: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1) +01: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1) +02: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1) +03: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1) +04: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1) +05: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1) +06: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1) +07: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1) +08: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1) +09: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1) +0a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1) +0b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1) +0c: vpermilps Vx,Hx,Wx (66),(v) +0d: vpermilpd Vx,Hx,Wx (66),(v) +0e: vtestps Vx,Wx (66),(v) +0f: vtestpd Vx,Wx (66),(v) +# 0x0f 0x38 0x10-0x1f +10: pblendvb Vdq,Wdq (66) +11: +12: +13: vcvtph2ps Vx,Wx,Ib (66),(v) +14: blendvps Vdq,Wdq (66) +15: blendvpd Vdq,Wdq (66) +16: vpermps Vqq,Hqq,Wqq (66),(v) +17: vptest Vx,Wx (66) +18: vbroadcastss Vx,Wd (66),(v) +19: vbroadcastsd Vqq,Wq (66),(v) +1a: vbroadcastf128 Vqq,Mdq (66),(v) +1b: +1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1) +1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1) +1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1) +1f: +# 0x0f 0x38 0x20-0x2f +20: vpmovsxbw Vx,Ux/Mq (66),(v1) +21: vpmovsxbd Vx,Ux/Md (66),(v1) +22: vpmovsxbq Vx,Ux/Mw (66),(v1) +23: vpmovsxwd Vx,Ux/Mq (66),(v1) +24: vpmovsxwq Vx,Ux/Md (66),(v1) +25: vpmovsxdq Vx,Ux/Mq (66),(v1) +26: +27: +28: vpmuldq Vx,Hx,Wx (66),(v1) +29: vpcmpeqq Vx,Hx,Wx (66),(v1) +2a: vmovntdqa Vx,Mx (66),(v1) +2b: vpackusdw Vx,Hx,Wx (66),(v1) +2c: vmaskmovps Vx,Hx,Mx (66),(v) +2d: vmaskmovpd Vx,Hx,Mx (66),(v) +2e: vmaskmovps Mx,Hx,Vx (66),(v) +2f: vmaskmovpd Mx,Hx,Vx (66),(v) +# 0x0f 0x38 0x30-0x3f +30: vpmovzxbw Vx,Ux/Mq (66),(v1) +31: vpmovzxbd Vx,Ux/Md (66),(v1) +32: vpmovzxbq Vx,Ux/Mw (66),(v1) +33: vpmovzxwd Vx,Ux/Mq (66),(v1) +34: vpmovzxwq Vx,Ux/Md (66),(v1) +35: vpmovzxdq Vx,Ux/Mq (66),(v1) +36: vpermd Vqq,Hqq,Wqq (66),(v) +37: vpcmpgtq Vx,Hx,Wx (66),(v1) +38: vpminsb Vx,Hx,Wx (66),(v1) +39: vpminsd Vx,Hx,Wx (66),(v1) +3a: vpminuw Vx,Hx,Wx (66),(v1) +3b: vpminud Vx,Hx,Wx (66),(v1) +3c: vpmaxsb Vx,Hx,Wx (66),(v1) +3d: vpmaxsd Vx,Hx,Wx (66),(v1) +3e: vpmaxuw Vx,Hx,Wx (66),(v1) +3f: vpmaxud Vx,Hx,Wx (66),(v1) +# 0x0f 0x38 0x40-0x8f +40: vpmulld Vx,Hx,Wx (66),(v1) +41: vphminposuw Vdq,Wdq (66),(v1) +42: +43: +44: +45: vpsrlvd/q Vx,Hx,Wx (66),(v) +46: vpsravd Vx,Hx,Wx (66),(v) +47: vpsllvd/q Vx,Hx,Wx (66),(v) +# Skip 0x48-0x57 +58: vpbroadcastd Vx,Wx (66),(v) +59: vpbroadcastq Vx,Wx (66),(v) +5a: vbroadcasti128 Vqq,Mdq (66),(v) +# Skip 0x5b-0x77 +78: vpbroadcastb Vx,Wx (66),(v) +79: vpbroadcastw Vx,Wx (66),(v) +# Skip 0x7a-0x7f +80: INVEPT Gy,Mdq (66) +81: INVPID Gy,Mdq (66) +82: INVPCID Gy,Mdq (66) +8c: vpmaskmovd/q Vx,Hx,Mx (66),(v) +8e: vpmaskmovd/q Mx,Vx,Hx (66),(v) +# 0x0f 0x38 0x90-0xbf (FMA) +90: vgatherdd/q Vx,Hx,Wx (66),(v) +91: vgatherqd/q Vx,Hx,Wx (66),(v) +92: vgatherdps/d Vx,Hx,Wx (66),(v) +93: vgatherqps/d Vx,Hx,Wx (66),(v) +94: +95: +96: vfmaddsub132ps/d Vx,Hx,Wx (66),(v) +97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v) +98: vfmadd132ps/d Vx,Hx,Wx (66),(v) +99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1) +9a: vfmsub132ps/d Vx,Hx,Wx (66),(v) +9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1) +9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v) +9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1) +9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v) +9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1) +a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v) +a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v) +a8: vfmadd213ps/d Vx,Hx,Wx (66),(v) +a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1) +aa: vfmsub213ps/d Vx,Hx,Wx (66),(v) +ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1) +ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v) +ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1) +ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v) +af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1) +b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v) +b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v) +b8: vfmadd231ps/d Vx,Hx,Wx (66),(v) +b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1) +ba: vfmsub231ps/d Vx,Hx,Wx (66),(v) +bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1) +bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v) +bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1) +be: vfnmsub231ps/d Vx,Hx,Wx (66),(v) +bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1) +# 0x0f 0x38 0xc0-0xff +db: VAESIMC Vdq,Wdq (66),(v1) +dc: VAESENC Vdq,Hdq,Wdq (66),(v1) +dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) +de: VAESDEC Vdq,Hdq,Wdq (66),(v1) +df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1) +f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2) +f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2) +f2: ANDN Gy,By,Ey (v) +f3: Grp17 (1A) +f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) +f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) +f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) +EndTable + +Table: 3-byte opcode 2 (0x0f 0x3a) +Referrer: 3-byte escape 2 +AVXcode: 3 +# 0x0f 0x3a 0x00-0xff +00: vpermq Vqq,Wqq,Ib (66),(v) +01: vpermpd Vqq,Wqq,Ib (66),(v) +02: vpblendd Vx,Hx,Wx,Ib (66),(v) +03: +04: vpermilps Vx,Wx,Ib (66),(v) +05: vpermilpd Vx,Wx,Ib (66),(v) +06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) +07: +08: vroundps Vx,Wx,Ib (66) +09: vroundpd Vx,Wx,Ib (66) +0a: vroundss Vss,Wss,Ib (66),(v1) +0b: vroundsd Vsd,Wsd,Ib (66),(v1) +0c: vblendps Vx,Hx,Wx,Ib (66) +0d: vblendpd Vx,Hx,Wx,Ib (66) +0e: vpblendw Vx,Hx,Wx,Ib (66),(v1) +0f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1) +14: vpextrb Rd/Mb,Vdq,Ib (66),(v1) +15: vpextrw Rd/Mw,Vdq,Ib (66),(v1) +16: vpextrd/q Ey,Vdq,Ib (66),(v1) +17: vextractps Ed,Vdq,Ib (66),(v1) +18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) +19: vextractf128 Wdq,Vqq,Ib (66),(v) +1d: vcvtps2ph Wx,Vx,Ib (66),(v) +20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1) +21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1) +22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) +38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) +39: vextracti128 Wdq,Vqq,Ib (66),(v) +40: vdpps Vx,Hx,Wx,Ib (66) +41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1) +42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) +44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1) +46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v) +4a: vblendvps Vx,Hx,Wx,Lx (66),(v) +4b: vblendvpd Vx,Hx,Wx,Lx (66),(v) +4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1) +60: vpcmpestrm Vdq,Wdq,Ib (66),(v1) +61: vpcmpestri Vdq,Wdq,Ib (66),(v1) +62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) +63: vpcmpistri Vdq,Wdq,Ib (66),(v1) +df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) +f0: RORX Gy,Ey,Ib (F2),(v) +EndTable + +GrpTable: Grp1 +0: ADD +1: OR +2: ADC +3: SBB +4: AND +5: SUB +6: XOR +7: CMP +EndTable + +GrpTable: Grp1A +0: POP +EndTable + +GrpTable: Grp2 +0: ROL +1: ROR +2: RCL +3: RCR +4: SHL/SAL +5: SHR +6: +7: SAR +EndTable + +GrpTable: Grp3_1 +0: TEST Eb,Ib +1: +2: NOT Eb +3: NEG Eb +4: MUL AL,Eb +5: IMUL AL,Eb +6: DIV AL,Eb +7: IDIV AL,Eb +EndTable + +GrpTable: Grp3_2 +0: TEST Ev,Iz +1: +2: NOT Ev +3: NEG Ev +4: MUL rAX,Ev +5: IMUL rAX,Ev +6: DIV rAX,Ev +7: IDIV rAX,Ev +EndTable + +GrpTable: Grp4 +0: INC Eb +1: DEC Eb +EndTable + +GrpTable: Grp5 +0: INC Ev +1: DEC Ev +# Note: "forced64" is Intel CPU behavior (see comment about CALL insn). +2: CALLN Ev (f64) +3: CALLF Ep +4: JMPN Ev (f64) +5: JMPF Mp +6: PUSH Ev (d64) +7: +EndTable + +GrpTable: Grp6 +0: SLDT Rv/Mw +1: STR Rv/Mw +2: LLDT Ew +3: LTR Ew +4: VERR Ew +5: VERW Ew +EndTable + +GrpTable: Grp7 +0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) +1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) +2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) +3: LIDT Ms +4: SMSW Mw/Rv +5: +6: LMSW Ew +7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) +EndTable + +GrpTable: Grp8 +4: BT +5: BTS +6: BTR +7: BTC +EndTable + +GrpTable: Grp9 +1: CMPXCHG8B/16B Mq/Mdq +6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) +7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B) +EndTable + +GrpTable: Grp10 +EndTable + +# Grp11A and Grp11B are expressed as Grp11 in Intel SDM +GrpTable: Grp11A +0: MOV Eb,Ib +7: XABORT Ib (000),(11B) +EndTable + +GrpTable: Grp11B +0: MOV Eb,Iz +7: XBEGIN Jz (000),(11B) +EndTable + +GrpTable: Grp12 +2: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1) +4: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1) +6: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1) +EndTable + +GrpTable: Grp13 +2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1) +4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) +6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1) +EndTable + +GrpTable: Grp14 +2: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1) +3: vpsrldq Hx,Ux,Ib (66),(11B),(v1) +6: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1) +7: vpslldq Hx,Ux,Ib (66),(11B),(v1) +EndTable + +GrpTable: Grp15 +0: fxsave | RDFSBASE Ry (F3),(11B) +1: fxstor | RDGSBASE Ry (F3),(11B) +2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B) +3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) +4: XSAVE +5: XRSTOR | lfence (11B) +6: XSAVEOPT | mfence (11B) +7: clflush | sfence (11B) +EndTable + +GrpTable: Grp16 +0: prefetch NTA +1: prefetch T0 +2: prefetch T1 +3: prefetch T2 +EndTable + +GrpTable: Grp17 +1: BLSR By,Ey (v) +2: BLSMSK By,Ey (v) +3: BLSI By,Ey (v) +EndTable + +# AMD's Prefetch Group +GrpTable: GrpP +0: PREFETCH +1: PREFETCHW +EndTable + +GrpTable: GrpPDLK +0: MONTMUL +1: XSHA1 +2: XSHA2 +EndTable + +GrpTable: GrpRNG +0: xstore-rng +1: xcrypt-ecb +2: xcrypt-cbc +4: xcrypt-cfb +5: xcrypt-ofb +EndTable diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c new file mode 100644 index 000000000000..bb41c20e6005 --- /dev/null +++ b/tools/perf/util/intel-pt.c @@ -0,0 +1,1956 @@ +/* + * intel_pt.c: Intel Processor Trace support + * Copyright (c) 2013-2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <stdio.h> +#include <stdbool.h> +#include <errno.h> +#include <linux/kernel.h> +#include <linux/types.h> + +#include "../perf.h" +#include "session.h" +#include "machine.h" +#include "tool.h" +#include "event.h" +#include "evlist.h" +#include "evsel.h" +#include "map.h" +#include "color.h" +#include "util.h" +#include "thread.h" +#include "thread-stack.h" +#include "symbol.h" +#include "callchain.h" +#include "dso.h" +#include "debug.h" +#include "auxtrace.h" +#include "tsc.h" +#include "intel-pt.h" + +#include "intel-pt-decoder/intel-pt-log.h" +#include "intel-pt-decoder/intel-pt-decoder.h" +#include "intel-pt-decoder/intel-pt-insn-decoder.h" +#include "intel-pt-decoder/intel-pt-pkt-decoder.h" + +#define MAX_TIMESTAMP (~0ULL) + +struct intel_pt { + struct auxtrace auxtrace; + struct auxtrace_queues queues; + struct auxtrace_heap heap; + u32 auxtrace_type; + struct perf_session *session; + struct machine *machine; + struct perf_evsel *switch_evsel; + struct thread *unknown_thread; + bool timeless_decoding; + bool sampling_mode; + bool snapshot_mode; + bool per_cpu_mmaps; + bool have_tsc; + bool data_queued; + bool est_tsc; + bool sync_switch; + int have_sched_switch; + u32 pmu_type; + u64 kernel_start; + u64 switch_ip; + u64 ptss_ip; + + struct perf_tsc_conversion tc; + bool cap_user_time_zero; + + struct itrace_synth_opts synth_opts; + + bool sample_instructions; + u64 instructions_sample_type; + u64 instructions_sample_period; + u64 instructions_id; + + bool sample_branches; + u32 branches_filter; + u64 branches_sample_type; + u64 branches_id; + + bool sample_transactions; + u64 transactions_sample_type; + u64 transactions_id; + + bool synth_needs_swap; + + u64 tsc_bit; + u64 mtc_bit; + u64 mtc_freq_bits; + u32 tsc_ctc_ratio_n; + u32 tsc_ctc_ratio_d; + u64 cyc_bit; + u64 noretcomp_bit; + unsigned max_non_turbo_ratio; +}; + +enum switch_state { + INTEL_PT_SS_NOT_TRACING, + INTEL_PT_SS_UNKNOWN, + INTEL_PT_SS_TRACING, + INTEL_PT_SS_EXPECTING_SWITCH_EVENT, + INTEL_PT_SS_EXPECTING_SWITCH_IP, +}; + +struct intel_pt_queue { + struct intel_pt *pt; + unsigned int queue_nr; + struct auxtrace_buffer *buffer; + void *decoder; + const struct intel_pt_state *state; + struct ip_callchain *chain; + union perf_event *event_buf; + bool on_heap; + bool stop; + bool step_through_buffers; + bool use_buffer_pid_tid; + pid_t pid, tid; + int cpu; + int switch_state; + pid_t next_tid; + struct thread *thread; + bool exclude_kernel; + bool have_sample; + u64 time; + u64 timestamp; + u32 flags; + u16 insn_len; + u64 last_insn_cnt; +}; + +static void intel_pt_dump(struct intel_pt *pt __maybe_unused, + unsigned char *buf, size_t len) +{ + struct intel_pt_pkt packet; + size_t pos = 0; + int ret, pkt_len, i; + char desc[INTEL_PT_PKT_DESC_MAX]; + const char *color = PERF_COLOR_BLUE; + + color_fprintf(stdout, color, + ". ... Intel Processor Trace data: size %zu bytes\n", + len); + + while (len) { + ret = intel_pt_get_packet(buf, len, &packet); + if (ret > 0) + pkt_len = ret; + else + pkt_len = 1; + printf("."); + color_fprintf(stdout, color, " %08x: ", pos); + for (i = 0; i < pkt_len; i++) + color_fprintf(stdout, color, " %02x", buf[i]); + for (; i < 16; i++) + color_fprintf(stdout, color, " "); + if (ret > 0) { + ret = intel_pt_pkt_desc(&packet, desc, + INTEL_PT_PKT_DESC_MAX); + if (ret > 0) + color_fprintf(stdout, color, " %s\n", desc); + } else { + color_fprintf(stdout, color, " Bad packet!\n"); + } + pos += pkt_len; + buf += pkt_len; + len -= pkt_len; + } +} + +static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf, + size_t len) +{ + printf(".\n"); + intel_pt_dump(pt, buf, len); +} + +static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, + struct auxtrace_buffer *b) +{ + void *start; + + start = intel_pt_find_overlap(a->data, a->size, b->data, b->size, + pt->have_tsc); + if (!start) + return -EINVAL; + b->use_size = b->data + b->size - start; + b->use_data = start; + return 0; +} + +static void intel_pt_use_buffer_pid_tid(struct intel_pt_queue *ptq, + struct auxtrace_queue *queue, + struct auxtrace_buffer *buffer) +{ + if (queue->cpu == -1 && buffer->cpu != -1) + ptq->cpu = buffer->cpu; + + ptq->pid = buffer->pid; + ptq->tid = buffer->tid; + + intel_pt_log("queue %u cpu %d pid %d tid %d\n", + ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid); + + thread__zput(ptq->thread); + + if (ptq->tid != -1) { + if (ptq->pid != -1) + ptq->thread = machine__findnew_thread(ptq->pt->machine, + ptq->pid, + ptq->tid); + else + ptq->thread = machine__find_thread(ptq->pt->machine, -1, + ptq->tid); + } +} + +/* This function assumes data is processed sequentially only */ +static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) +{ + struct intel_pt_queue *ptq = data; + struct auxtrace_buffer *buffer = ptq->buffer, *old_buffer = buffer; + struct auxtrace_queue *queue; + + if (ptq->stop) { + b->len = 0; + return 0; + } + + queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; + + buffer = auxtrace_buffer__next(queue, buffer); + if (!buffer) { + if (old_buffer) + auxtrace_buffer__drop_data(old_buffer); + b->len = 0; + return 0; + } + + ptq->buffer = buffer; + + if (!buffer->data) { + int fd = perf_data_file__fd(ptq->pt->session->file); + + buffer->data = auxtrace_buffer__get_data(buffer, fd); + if (!buffer->data) + return -ENOMEM; + } + + if (ptq->pt->snapshot_mode && !buffer->consecutive && old_buffer && + intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer)) + return -ENOMEM; + + if (old_buffer) + auxtrace_buffer__drop_data(old_buffer); + + if (buffer->use_data) { + b->len = buffer->use_size; + b->buf = buffer->use_data; + } else { + b->len = buffer->size; + b->buf = buffer->data; + } + b->ref_timestamp = buffer->reference; + + if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode && + !buffer->consecutive)) { + b->consecutive = false; + b->trace_nr = buffer->buffer_nr + 1; + } else { + b->consecutive = true; + } + + if (ptq->use_buffer_pid_tid && (ptq->pid != buffer->pid || + ptq->tid != buffer->tid)) + intel_pt_use_buffer_pid_tid(ptq, queue, buffer); + + if (ptq->step_through_buffers) + ptq->stop = true; + + if (!b->len) + return intel_pt_get_trace(b, data); + + return 0; +} + +struct intel_pt_cache_entry { + struct auxtrace_cache_entry entry; + u64 insn_cnt; + u64 byte_cnt; + enum intel_pt_insn_op op; + enum intel_pt_insn_branch branch; + int length; + int32_t rel; +}; + +static int intel_pt_config_div(const char *var, const char *value, void *data) +{ + int *d = data; + long val; + + if (!strcmp(var, "intel-pt.cache-divisor")) { + val = strtol(value, NULL, 0); + if (val > 0 && val <= INT_MAX) + *d = val; + } + + return 0; +} + +static int intel_pt_cache_divisor(void) +{ + static int d; + + if (d) + return d; + + perf_config(intel_pt_config_div, &d); + + if (!d) + d = 64; + + return d; +} + +static unsigned int intel_pt_cache_size(struct dso *dso, + struct machine *machine) +{ + off_t size; + + size = dso__data_size(dso, machine); + size /= intel_pt_cache_divisor(); + if (size < 1000) + return 10; + if (size > (1 << 21)) + return 21; + return 32 - __builtin_clz(size); +} + +static struct auxtrace_cache *intel_pt_cache(struct dso *dso, + struct machine *machine) +{ + struct auxtrace_cache *c; + unsigned int bits; + + if (dso->auxtrace_cache) + return dso->auxtrace_cache; + + bits = intel_pt_cache_size(dso, machine); + + /* Ignoring cache creation failure */ + c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200); + + dso->auxtrace_cache = c; + + return c; +} + +static int intel_pt_cache_add(struct dso *dso, struct machine *machine, + u64 offset, u64 insn_cnt, u64 byte_cnt, + struct intel_pt_insn *intel_pt_insn) +{ + struct auxtrace_cache *c = intel_pt_cache(dso, machine); + struct intel_pt_cache_entry *e; + int err; + + if (!c) + return -ENOMEM; + + e = auxtrace_cache__alloc_entry(c); + if (!e) + return -ENOMEM; + + e->insn_cnt = insn_cnt; + e->byte_cnt = byte_cnt; + e->op = intel_pt_insn->op; + e->branch = intel_pt_insn->branch; + e->length = intel_pt_insn->length; + e->rel = intel_pt_insn->rel; + + err = auxtrace_cache__add(c, offset, &e->entry); + if (err) + auxtrace_cache__free_entry(c, e); + + return err; +} + +static struct intel_pt_cache_entry * +intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset) +{ + struct auxtrace_cache *c = intel_pt_cache(dso, machine); + + if (!c) + return NULL; + + return auxtrace_cache__lookup(dso->auxtrace_cache, offset); +} + +static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, + uint64_t *insn_cnt_ptr, uint64_t *ip, + uint64_t to_ip, uint64_t max_insn_cnt, + void *data) +{ + struct intel_pt_queue *ptq = data; + struct machine *machine = ptq->pt->machine; + struct thread *thread; + struct addr_location al; + unsigned char buf[1024]; + size_t bufsz; + ssize_t len; + int x86_64; + u8 cpumode; + u64 offset, start_offset, start_ip; + u64 insn_cnt = 0; + bool one_map = true; + + if (to_ip && *ip == to_ip) + goto out_no_cache; + + bufsz = intel_pt_insn_max_size(); + + if (*ip >= ptq->pt->kernel_start) + cpumode = PERF_RECORD_MISC_KERNEL; + else + cpumode = PERF_RECORD_MISC_USER; + + thread = ptq->thread; + if (!thread) { + if (cpumode != PERF_RECORD_MISC_KERNEL) + return -EINVAL; + thread = ptq->pt->unknown_thread; + } + + while (1) { + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, *ip, &al); + if (!al.map || !al.map->dso) + return -EINVAL; + + if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && + dso__data_status_seen(al.map->dso, + DSO_DATA_STATUS_SEEN_ITRACE)) + return -ENOENT; + + offset = al.map->map_ip(al.map, *ip); + + if (!to_ip && one_map) { + struct intel_pt_cache_entry *e; + + e = intel_pt_cache_lookup(al.map->dso, machine, offset); + if (e && + (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) { + *insn_cnt_ptr = e->insn_cnt; + *ip += e->byte_cnt; + intel_pt_insn->op = e->op; + intel_pt_insn->branch = e->branch; + intel_pt_insn->length = e->length; + intel_pt_insn->rel = e->rel; + intel_pt_log_insn_no_data(intel_pt_insn, *ip); + return 0; + } + } + + start_offset = offset; + start_ip = *ip; + + /* Load maps to ensure dso->is_64_bit has been updated */ + map__load(al.map, machine->symbol_filter); + + x86_64 = al.map->dso->is_64_bit; + + while (1) { + len = dso__data_read_offset(al.map->dso, machine, + offset, buf, bufsz); + if (len <= 0) + return -EINVAL; + + if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn)) + return -EINVAL; + + intel_pt_log_insn(intel_pt_insn, *ip); + + insn_cnt += 1; + + if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH) + goto out; + + if (max_insn_cnt && insn_cnt >= max_insn_cnt) + goto out_no_cache; + + *ip += intel_pt_insn->length; + + if (to_ip && *ip == to_ip) + goto out_no_cache; + + if (*ip >= al.map->end) + break; + + offset += intel_pt_insn->length; + } + one_map = false; + } +out: + *insn_cnt_ptr = insn_cnt; + + if (!one_map) + goto out_no_cache; + + /* + * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate + * entries. + */ + if (to_ip) { + struct intel_pt_cache_entry *e; + + e = intel_pt_cache_lookup(al.map->dso, machine, start_offset); + if (e) + return 0; + } + + /* Ignore cache errors */ + intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt, + *ip - start_ip, intel_pt_insn); + + return 0; + +out_no_cache: + *insn_cnt_ptr = insn_cnt; + return 0; +} + +static bool intel_pt_get_config(struct intel_pt *pt, + struct perf_event_attr *attr, u64 *config) +{ + if (attr->type == pt->pmu_type) { + if (config) + *config = attr->config; + return true; + } + + return false; +} + +static bool intel_pt_exclude_kernel(struct intel_pt *pt) +{ + struct perf_evsel *evsel; + + evlist__for_each(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->attr, NULL) && + !evsel->attr.exclude_kernel) + return false; + } + return true; +} + +static bool intel_pt_return_compression(struct intel_pt *pt) +{ + struct perf_evsel *evsel; + u64 config; + + if (!pt->noretcomp_bit) + return true; + + evlist__for_each(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->attr, &config) && + (config & pt->noretcomp_bit)) + return false; + } + return true; +} + +static unsigned int intel_pt_mtc_period(struct intel_pt *pt) +{ + struct perf_evsel *evsel; + unsigned int shift; + u64 config; + + if (!pt->mtc_freq_bits) + return 0; + + for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++) + config >>= 1; + + evlist__for_each(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->attr, &config)) + return (config & pt->mtc_freq_bits) >> shift; + } + return 0; +} + +static bool intel_pt_timeless_decoding(struct intel_pt *pt) +{ + struct perf_evsel *evsel; + bool timeless_decoding = true; + u64 config; + + if (!pt->tsc_bit || !pt->cap_user_time_zero) + return true; + + evlist__for_each(pt->session->evlist, evsel) { + if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME)) + return true; + if (intel_pt_get_config(pt, &evsel->attr, &config)) { + if (config & pt->tsc_bit) + timeless_decoding = false; + else + return true; + } + } + return timeless_decoding; +} + +static bool intel_pt_tracing_kernel(struct intel_pt *pt) +{ + struct perf_evsel *evsel; + + evlist__for_each(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->attr, NULL) && + !evsel->attr.exclude_kernel) + return true; + } + return false; +} + +static bool intel_pt_have_tsc(struct intel_pt *pt) +{ + struct perf_evsel *evsel; + bool have_tsc = false; + u64 config; + + if (!pt->tsc_bit) + return false; + + evlist__for_each(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->attr, &config)) { + if (config & pt->tsc_bit) + have_tsc = true; + else + return false; + } + } + return have_tsc; +} + +static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns) +{ + u64 quot, rem; + + quot = ns / pt->tc.time_mult; + rem = ns % pt->tc.time_mult; + return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) / + pt->tc.time_mult; +} + +static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, + unsigned int queue_nr) +{ + struct intel_pt_params params = { .get_trace = 0, }; + struct intel_pt_queue *ptq; + + ptq = zalloc(sizeof(struct intel_pt_queue)); + if (!ptq) + return NULL; + + if (pt->synth_opts.callchain) { + size_t sz = sizeof(struct ip_callchain); + + sz += pt->synth_opts.callchain_sz * sizeof(u64); + ptq->chain = zalloc(sz); + if (!ptq->chain) + goto out_free; + } + + ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); + if (!ptq->event_buf) + goto out_free; + + ptq->pt = pt; + ptq->queue_nr = queue_nr; + ptq->exclude_kernel = intel_pt_exclude_kernel(pt); + ptq->pid = -1; + ptq->tid = -1; + ptq->cpu = -1; + ptq->next_tid = -1; + + params.get_trace = intel_pt_get_trace; + params.walk_insn = intel_pt_walk_next_insn; + params.data = ptq; + params.return_compression = intel_pt_return_compression(pt); + params.max_non_turbo_ratio = pt->max_non_turbo_ratio; + params.mtc_period = intel_pt_mtc_period(pt); + params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n; + params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d; + + if (pt->synth_opts.instructions) { + if (pt->synth_opts.period) { + switch (pt->synth_opts.period_type) { + case PERF_ITRACE_PERIOD_INSTRUCTIONS: + params.period_type = + INTEL_PT_PERIOD_INSTRUCTIONS; + params.period = pt->synth_opts.period; + break; + case PERF_ITRACE_PERIOD_TICKS: + params.period_type = INTEL_PT_PERIOD_TICKS; + params.period = pt->synth_opts.period; + break; + case PERF_ITRACE_PERIOD_NANOSECS: + params.period_type = INTEL_PT_PERIOD_TICKS; + params.period = intel_pt_ns_to_ticks(pt, + pt->synth_opts.period); + break; + default: + break; + } + } + + if (!params.period) { + params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS; + params.period = 1000; + } + } + + ptq->decoder = intel_pt_decoder_new(¶ms); + if (!ptq->decoder) + goto out_free; + + return ptq; + +out_free: + zfree(&ptq->event_buf); + zfree(&ptq->chain); + free(ptq); + return NULL; +} + +static void intel_pt_free_queue(void *priv) +{ + struct intel_pt_queue *ptq = priv; + + if (!ptq) + return; + thread__zput(ptq->thread); + intel_pt_decoder_free(ptq->decoder); + zfree(&ptq->event_buf); + zfree(&ptq->chain); + free(ptq); +} + +static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt, + struct auxtrace_queue *queue) +{ + struct intel_pt_queue *ptq = queue->priv; + + if (queue->tid == -1 || pt->have_sched_switch) { + ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu); + thread__zput(ptq->thread); + } + + if (!ptq->thread && ptq->tid != -1) + ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid); + + if (ptq->thread) { + ptq->pid = ptq->thread->pid_; + if (queue->cpu == -1) + ptq->cpu = ptq->thread->cpu; + } +} + +static void intel_pt_sample_flags(struct intel_pt_queue *ptq) +{ + if (ptq->state->flags & INTEL_PT_ABORT_TX) { + ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT; + } else if (ptq->state->flags & INTEL_PT_ASYNC) { + if (ptq->state->to_ip) + ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | + PERF_IP_FLAG_ASYNC | + PERF_IP_FLAG_INTERRUPT; + else + ptq->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_TRACE_END; + ptq->insn_len = 0; + } else { + if (ptq->state->from_ip) + ptq->flags = intel_pt_insn_type(ptq->state->insn_op); + else + ptq->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_TRACE_BEGIN; + if (ptq->state->flags & INTEL_PT_IN_TX) + ptq->flags |= PERF_IP_FLAG_IN_TX; + ptq->insn_len = ptq->state->insn_len; + } +} + +static int intel_pt_setup_queue(struct intel_pt *pt, + struct auxtrace_queue *queue, + unsigned int queue_nr) +{ + struct intel_pt_queue *ptq = queue->priv; + + if (list_empty(&queue->head)) + return 0; + + if (!ptq) { + ptq = intel_pt_alloc_queue(pt, queue_nr); + if (!ptq) + return -ENOMEM; + queue->priv = ptq; + + if (queue->cpu != -1) + ptq->cpu = queue->cpu; + ptq->tid = queue->tid; + + if (pt->sampling_mode) { + if (pt->timeless_decoding) + ptq->step_through_buffers = true; + if (pt->timeless_decoding || !pt->have_sched_switch) + ptq->use_buffer_pid_tid = true; + } + } + + if (!ptq->on_heap && + (!pt->sync_switch || + ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) { + const struct intel_pt_state *state; + int ret; + + if (pt->timeless_decoding) + return 0; + + intel_pt_log("queue %u getting timestamp\n", queue_nr); + intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n", + queue_nr, ptq->cpu, ptq->pid, ptq->tid); + while (1) { + state = intel_pt_decode(ptq->decoder); + if (state->err) { + if (state->err == INTEL_PT_ERR_NODATA) { + intel_pt_log("queue %u has no timestamp\n", + queue_nr); + return 0; + } + continue; + } + if (state->timestamp) + break; + } + + ptq->timestamp = state->timestamp; + intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n", + queue_nr, ptq->timestamp); + ptq->state = state; + ptq->have_sample = true; + intel_pt_sample_flags(ptq); + ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp); + if (ret) + return ret; + ptq->on_heap = true; + } + + return 0; +} + +static int intel_pt_setup_queues(struct intel_pt *pt) +{ + unsigned int i; + int ret; + + for (i = 0; i < pt->queues.nr_queues; i++) { + ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i); + if (ret) + return ret; + } + return 0; +} + +static int intel_pt_inject_event(union perf_event *event, + struct perf_sample *sample, u64 type, + bool swapped) +{ + event->header.size = perf_event__sample_event_size(sample, type, 0); + return perf_event__synthesize_sample(event, type, 0, sample, swapped); +} + +static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) +{ + int ret; + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; + + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.misc = PERF_RECORD_MISC_USER; + event->sample.header.size = sizeof(struct perf_event_header); + + if (!pt->timeless_decoding) + sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); + + sample.ip = ptq->state->from_ip; + sample.pid = ptq->pid; + sample.tid = ptq->tid; + sample.addr = ptq->state->to_ip; + sample.id = ptq->pt->branches_id; + sample.stream_id = ptq->pt->branches_id; + sample.period = 1; + sample.cpu = ptq->cpu; + sample.flags = ptq->flags; + sample.insn_len = ptq->insn_len; + + if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) + return 0; + + if (pt->synth_opts.inject) { + ret = intel_pt_inject_event(event, &sample, + pt->branches_sample_type, + pt->synth_needs_swap); + if (ret) + return ret; + } + + ret = perf_session__deliver_synth_event(pt->session, event, &sample); + if (ret) + pr_err("Intel Processor Trace: failed to deliver branch event, error %d\n", + ret); + + return ret; +} + +static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) +{ + int ret; + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; + + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.misc = PERF_RECORD_MISC_USER; + event->sample.header.size = sizeof(struct perf_event_header); + + if (!pt->timeless_decoding) + sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); + + sample.ip = ptq->state->from_ip; + sample.pid = ptq->pid; + sample.tid = ptq->tid; + sample.addr = ptq->state->to_ip; + sample.id = ptq->pt->instructions_id; + sample.stream_id = ptq->pt->instructions_id; + sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; + sample.cpu = ptq->cpu; + sample.flags = ptq->flags; + sample.insn_len = ptq->insn_len; + + ptq->last_insn_cnt = ptq->state->tot_insn_cnt; + + if (pt->synth_opts.callchain) { + thread_stack__sample(ptq->thread, ptq->chain, + pt->synth_opts.callchain_sz, sample.ip); + sample.callchain = ptq->chain; + } + + if (pt->synth_opts.inject) { + ret = intel_pt_inject_event(event, &sample, + pt->instructions_sample_type, + pt->synth_needs_swap); + if (ret) + return ret; + } + + ret = perf_session__deliver_synth_event(pt->session, event, &sample); + if (ret) + pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n", + ret); + + return ret; +} + +static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) +{ + int ret; + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; + + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.misc = PERF_RECORD_MISC_USER; + event->sample.header.size = sizeof(struct perf_event_header); + + if (!pt->timeless_decoding) + sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); + + sample.ip = ptq->state->from_ip; + sample.pid = ptq->pid; + sample.tid = ptq->tid; + sample.addr = ptq->state->to_ip; + sample.id = ptq->pt->transactions_id; + sample.stream_id = ptq->pt->transactions_id; + sample.period = 1; + sample.cpu = ptq->cpu; + sample.flags = ptq->flags; + sample.insn_len = ptq->insn_len; + + if (pt->synth_opts.callchain) { + thread_stack__sample(ptq->thread, ptq->chain, + pt->synth_opts.callchain_sz, sample.ip); + sample.callchain = ptq->chain; + } + + if (pt->synth_opts.inject) { + ret = intel_pt_inject_event(event, &sample, + pt->transactions_sample_type, + pt->synth_needs_swap); + if (ret) + return ret; + } + + ret = perf_session__deliver_synth_event(pt->session, event, &sample); + if (ret) + pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n", + ret); + + return ret; +} + +static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, + pid_t pid, pid_t tid, u64 ip) +{ + union perf_event event; + char msg[MAX_AUXTRACE_ERROR_MSG]; + int err; + + intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG); + + auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, + code, cpu, pid, tid, ip, msg); + + err = perf_session__deliver_synth_event(pt->session, &event, NULL); + if (err) + pr_err("Intel Processor Trace: failed to deliver error event, error %d\n", + err); + + return err; +} + +static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq) +{ + struct auxtrace_queue *queue; + pid_t tid = ptq->next_tid; + int err; + + if (tid == -1) + return 0; + + intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid); + + err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid); + + queue = &pt->queues.queue_array[ptq->queue_nr]; + intel_pt_set_pid_tid_cpu(pt, queue); + + ptq->next_tid = -1; + + return err; +} + +static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip) +{ + struct intel_pt *pt = ptq->pt; + + return ip == pt->switch_ip && + (ptq->flags & PERF_IP_FLAG_BRANCH) && + !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC | + PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT)); +} + +static int intel_pt_sample(struct intel_pt_queue *ptq) +{ + const struct intel_pt_state *state = ptq->state; + struct intel_pt *pt = ptq->pt; + int err; + + if (!ptq->have_sample) + return 0; + + ptq->have_sample = false; + + if (pt->sample_instructions && + (state->type & INTEL_PT_INSTRUCTION)) { + err = intel_pt_synth_instruction_sample(ptq); + if (err) + return err; + } + + if (pt->sample_transactions && + (state->type & INTEL_PT_TRANSACTION)) { + err = intel_pt_synth_transaction_sample(ptq); + if (err) + return err; + } + + if (!(state->type & INTEL_PT_BRANCH)) + return 0; + + if (pt->synth_opts.callchain) + thread_stack__event(ptq->thread, ptq->flags, state->from_ip, + state->to_ip, ptq->insn_len, + state->trace_nr); + else + thread_stack__set_trace_nr(ptq->thread, state->trace_nr); + + if (pt->sample_branches) { + err = intel_pt_synth_branch_sample(ptq); + if (err) + return err; + } + + if (!pt->sync_switch) + return 0; + + if (intel_pt_is_switch_ip(ptq, state->to_ip)) { + switch (ptq->switch_state) { + case INTEL_PT_SS_UNKNOWN: + case INTEL_PT_SS_EXPECTING_SWITCH_IP: + err = intel_pt_next_tid(pt, ptq); + if (err) + return err; + ptq->switch_state = INTEL_PT_SS_TRACING; + break; + default: + ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT; + return 1; + } + } else if (!state->to_ip) { + ptq->switch_state = INTEL_PT_SS_NOT_TRACING; + } else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) { + ptq->switch_state = INTEL_PT_SS_UNKNOWN; + } else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN && + state->to_ip == pt->ptss_ip && + (ptq->flags & PERF_IP_FLAG_CALL)) { + ptq->switch_state = INTEL_PT_SS_TRACING; + } + + return 0; +} + +static u64 intel_pt_switch_ip(struct machine *machine, u64 *ptss_ip) +{ + struct map *map; + struct symbol *sym, *start; + u64 ip, switch_ip = 0; + + if (ptss_ip) + *ptss_ip = 0; + + map = machine__kernel_map(machine, MAP__FUNCTION); + if (!map) + return 0; + + if (map__load(map, machine->symbol_filter)) + return 0; + + start = dso__first_symbol(map->dso, MAP__FUNCTION); + + for (sym = start; sym; sym = dso__next_symbol(sym)) { + if (sym->binding == STB_GLOBAL && + !strcmp(sym->name, "__switch_to")) { + ip = map->unmap_ip(map, sym->start); + if (ip >= map->start && ip < map->end) { + switch_ip = ip; + break; + } + } + } + + if (!switch_ip || !ptss_ip) + return 0; + + for (sym = start; sym; sym = dso__next_symbol(sym)) { + if (!strcmp(sym->name, "perf_trace_sched_switch")) { + ip = map->unmap_ip(map, sym->start); + if (ip >= map->start && ip < map->end) { + *ptss_ip = ip; + break; + } + } + } + + return switch_ip; +} + +static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) +{ + const struct intel_pt_state *state = ptq->state; + struct intel_pt *pt = ptq->pt; + int err; + + if (!pt->kernel_start) { + pt->kernel_start = machine__kernel_start(pt->machine); + if (pt->per_cpu_mmaps && pt->have_sched_switch && + !pt->timeless_decoding && intel_pt_tracing_kernel(pt) && + !pt->sampling_mode) { + pt->switch_ip = intel_pt_switch_ip(pt->machine, + &pt->ptss_ip); + if (pt->switch_ip) { + intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n", + pt->switch_ip, pt->ptss_ip); + pt->sync_switch = true; + } + } + } + + intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n", + ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid); + while (1) { + err = intel_pt_sample(ptq); + if (err) + return err; + + state = intel_pt_decode(ptq->decoder); + if (state->err) { + if (state->err == INTEL_PT_ERR_NODATA) + return 1; + if (pt->sync_switch && + state->from_ip >= pt->kernel_start) { + pt->sync_switch = false; + intel_pt_next_tid(pt, ptq); + } + if (pt->synth_opts.errors) { + err = intel_pt_synth_error(pt, state->err, + ptq->cpu, ptq->pid, + ptq->tid, + state->from_ip); + if (err) + return err; + } + continue; + } + + ptq->state = state; + ptq->have_sample = true; + intel_pt_sample_flags(ptq); + + /* Use estimated TSC upon return to user space */ + if (pt->est_tsc && + (state->from_ip >= pt->kernel_start || !state->from_ip) && + state->to_ip && state->to_ip < pt->kernel_start) { + intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n", + state->timestamp, state->est_timestamp); + ptq->timestamp = state->est_timestamp; + /* Use estimated TSC in unknown switch state */ + } else if (pt->sync_switch && + ptq->switch_state == INTEL_PT_SS_UNKNOWN && + intel_pt_is_switch_ip(ptq, state->to_ip) && + ptq->next_tid == -1) { + intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n", + state->timestamp, state->est_timestamp); + ptq->timestamp = state->est_timestamp; + } else if (state->timestamp > ptq->timestamp) { + ptq->timestamp = state->timestamp; + } + + if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) { + *timestamp = ptq->timestamp; + return 0; + } + } + return 0; +} + +static inline int intel_pt_update_queues(struct intel_pt *pt) +{ + if (pt->queues.new_data) { + pt->queues.new_data = false; + return intel_pt_setup_queues(pt); + } + return 0; +} + +static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp) +{ + unsigned int queue_nr; + u64 ts; + int ret; + + while (1) { + struct auxtrace_queue *queue; + struct intel_pt_queue *ptq; + + if (!pt->heap.heap_cnt) + return 0; + + if (pt->heap.heap_array[0].ordinal >= timestamp) + return 0; + + queue_nr = pt->heap.heap_array[0].queue_nr; + queue = &pt->queues.queue_array[queue_nr]; + ptq = queue->priv; + + intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n", + queue_nr, pt->heap.heap_array[0].ordinal, + timestamp); + + auxtrace_heap__pop(&pt->heap); + + if (pt->heap.heap_cnt) { + ts = pt->heap.heap_array[0].ordinal + 1; + if (ts > timestamp) + ts = timestamp; + } else { + ts = timestamp; + } + + intel_pt_set_pid_tid_cpu(pt, queue); + + ret = intel_pt_run_decoder(ptq, &ts); + + if (ret < 0) { + auxtrace_heap__add(&pt->heap, queue_nr, ts); + return ret; + } + + if (!ret) { + ret = auxtrace_heap__add(&pt->heap, queue_nr, ts); + if (ret < 0) + return ret; + } else { + ptq->on_heap = false; + } + } + + return 0; +} + +static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid, + u64 time_) +{ + struct auxtrace_queues *queues = &pt->queues; + unsigned int i; + u64 ts = 0; + + for (i = 0; i < queues->nr_queues; i++) { + struct auxtrace_queue *queue = &pt->queues.queue_array[i]; + struct intel_pt_queue *ptq = queue->priv; + + if (ptq && (tid == -1 || ptq->tid == tid)) { + ptq->time = time_; + intel_pt_set_pid_tid_cpu(pt, queue); + intel_pt_run_decoder(ptq, &ts); + } + } + return 0; +} + +static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample) +{ + return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu, + sample->pid, sample->tid, 0); +} + +static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu) +{ + unsigned i, j; + + if (cpu < 0 || !pt->queues.nr_queues) + return NULL; + + if ((unsigned)cpu >= pt->queues.nr_queues) + i = pt->queues.nr_queues - 1; + else + i = cpu; + + if (pt->queues.queue_array[i].cpu == cpu) + return pt->queues.queue_array[i].priv; + + for (j = 0; i > 0; j++) { + if (pt->queues.queue_array[--i].cpu == cpu) + return pt->queues.queue_array[i].priv; + } + + for (; j < pt->queues.nr_queues; j++) { + if (pt->queues.queue_array[j].cpu == cpu) + return pt->queues.queue_array[j].priv; + } + + return NULL; +} + +static int intel_pt_process_switch(struct intel_pt *pt, + struct perf_sample *sample) +{ + struct intel_pt_queue *ptq; + struct perf_evsel *evsel; + pid_t tid; + int cpu, err; + + evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id); + if (evsel != pt->switch_evsel) + return 0; + + tid = perf_evsel__intval(evsel, sample, "next_pid"); + cpu = sample->cpu; + + intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", + cpu, tid, sample->time, perf_time_to_tsc(sample->time, + &pt->tc)); + + if (!pt->sync_switch) + goto out; + + ptq = intel_pt_cpu_to_ptq(pt, cpu); + if (!ptq) + goto out; + + switch (ptq->switch_state) { + case INTEL_PT_SS_NOT_TRACING: + ptq->next_tid = -1; + break; + case INTEL_PT_SS_UNKNOWN: + case INTEL_PT_SS_TRACING: + ptq->next_tid = tid; + ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP; + return 0; + case INTEL_PT_SS_EXPECTING_SWITCH_EVENT: + if (!ptq->on_heap) { + ptq->timestamp = perf_time_to_tsc(sample->time, + &pt->tc); + err = auxtrace_heap__add(&pt->heap, ptq->queue_nr, + ptq->timestamp); + if (err) + return err; + ptq->on_heap = true; + } + ptq->switch_state = INTEL_PT_SS_TRACING; + break; + case INTEL_PT_SS_EXPECTING_SWITCH_IP: + ptq->next_tid = tid; + intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu); + break; + default: + break; + } +out: + return machine__set_current_tid(pt->machine, cpu, -1, tid); +} + +static int intel_pt_process_itrace_start(struct intel_pt *pt, + union perf_event *event, + struct perf_sample *sample) +{ + if (!pt->per_cpu_mmaps) + return 0; + + intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", + sample->cpu, event->itrace_start.pid, + event->itrace_start.tid, sample->time, + perf_time_to_tsc(sample->time, &pt->tc)); + + return machine__set_current_tid(pt->machine, sample->cpu, + event->itrace_start.pid, + event->itrace_start.tid); +} + +static int intel_pt_process_event(struct perf_session *session, + union perf_event *event, + struct perf_sample *sample, + struct perf_tool *tool) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + u64 timestamp; + int err = 0; + + if (dump_trace) + return 0; + + if (!tool->ordered_events) { + pr_err("Intel Processor Trace requires ordered events\n"); + return -EINVAL; + } + + if (sample->time && sample->time != (u64)-1) + timestamp = perf_time_to_tsc(sample->time, &pt->tc); + else + timestamp = 0; + + if (timestamp || pt->timeless_decoding) { + err = intel_pt_update_queues(pt); + if (err) + return err; + } + + if (pt->timeless_decoding) { + if (event->header.type == PERF_RECORD_EXIT) { + err = intel_pt_process_timeless_queues(pt, + event->comm.tid, + sample->time); + } + } else if (timestamp) { + err = intel_pt_process_queues(pt, timestamp); + } + if (err) + return err; + + if (event->header.type == PERF_RECORD_AUX && + (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) && + pt->synth_opts.errors) { + err = intel_pt_lost(pt, sample); + if (err) + return err; + } + + if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE) + err = intel_pt_process_switch(pt, sample); + else if (event->header.type == PERF_RECORD_ITRACE_START) + err = intel_pt_process_itrace_start(pt, event, sample); + + intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n", + perf_event__name(event->header.type), event->header.type, + sample->cpu, sample->time, timestamp); + + return err; +} + +static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + int ret; + + if (dump_trace) + return 0; + + if (!tool->ordered_events) + return -EINVAL; + + ret = intel_pt_update_queues(pt); + if (ret < 0) + return ret; + + if (pt->timeless_decoding) + return intel_pt_process_timeless_queues(pt, -1, + MAX_TIMESTAMP - 1); + + return intel_pt_process_queues(pt, MAX_TIMESTAMP); +} + +static void intel_pt_free_events(struct perf_session *session) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + struct auxtrace_queues *queues = &pt->queues; + unsigned int i; + + for (i = 0; i < queues->nr_queues; i++) { + intel_pt_free_queue(queues->queue_array[i].priv); + queues->queue_array[i].priv = NULL; + } + intel_pt_log_disable(); + auxtrace_queues__free(queues); +} + +static void intel_pt_free(struct perf_session *session) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + + auxtrace_heap__free(&pt->heap); + intel_pt_free_events(session); + session->auxtrace = NULL; + thread__delete(pt->unknown_thread); + free(pt); +} + +static int intel_pt_process_auxtrace_event(struct perf_session *session, + union perf_event *event, + struct perf_tool *tool __maybe_unused) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + + if (pt->sampling_mode) + return 0; + + if (!pt->data_queued) { + struct auxtrace_buffer *buffer; + off_t data_offset; + int fd = perf_data_file__fd(session->file); + int err; + + if (perf_data_file__is_pipe(session->file)) { + data_offset = 0; + } else { + data_offset = lseek(fd, 0, SEEK_CUR); + if (data_offset == -1) + return -errno; + } + + err = auxtrace_queues__add_event(&pt->queues, session, event, + data_offset, &buffer); + if (err) + return err; + + /* Dump here now we have copied a piped trace out of the pipe */ + if (dump_trace) { + if (auxtrace_buffer__get_data(buffer, fd)) { + intel_pt_dump_event(pt, buffer->data, + buffer->size); + auxtrace_buffer__put_data(buffer); + } + } + } + + return 0; +} + +struct intel_pt_synth { + struct perf_tool dummy_tool; + struct perf_session *session; +}; + +static int intel_pt_event_synth(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + struct intel_pt_synth *intel_pt_synth = + container_of(tool, struct intel_pt_synth, dummy_tool); + + return perf_session__deliver_synth_event(intel_pt_synth->session, event, + NULL); +} + +static int intel_pt_synth_event(struct perf_session *session, + struct perf_event_attr *attr, u64 id) +{ + struct intel_pt_synth intel_pt_synth; + + memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth)); + intel_pt_synth.session = session; + + return perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1, + &id, intel_pt_event_synth); +} + +static int intel_pt_synth_events(struct intel_pt *pt, + struct perf_session *session) +{ + struct perf_evlist *evlist = session->evlist; + struct perf_evsel *evsel; + struct perf_event_attr attr; + bool found = false; + u64 id; + int err; + + evlist__for_each(evlist, evsel) { + if (evsel->attr.type == pt->pmu_type && evsel->ids) { + found = true; + break; + } + } + + if (!found) { + pr_debug("There are no selected events with Intel Processor Trace data\n"); + return 0; + } + + memset(&attr, 0, sizeof(struct perf_event_attr)); + attr.size = sizeof(struct perf_event_attr); + attr.type = PERF_TYPE_HARDWARE; + attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK; + attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | + PERF_SAMPLE_PERIOD; + if (pt->timeless_decoding) + attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; + else + attr.sample_type |= PERF_SAMPLE_TIME; + if (!pt->per_cpu_mmaps) + attr.sample_type &= ~(u64)PERF_SAMPLE_CPU; + attr.exclude_user = evsel->attr.exclude_user; + attr.exclude_kernel = evsel->attr.exclude_kernel; + attr.exclude_hv = evsel->attr.exclude_hv; + attr.exclude_host = evsel->attr.exclude_host; + attr.exclude_guest = evsel->attr.exclude_guest; + attr.sample_id_all = evsel->attr.sample_id_all; + attr.read_format = evsel->attr.read_format; + + id = evsel->id[0] + 1000000000; + if (!id) + id = 1; + + if (pt->synth_opts.instructions) { + attr.config = PERF_COUNT_HW_INSTRUCTIONS; + if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS) + attr.sample_period = + intel_pt_ns_to_ticks(pt, pt->synth_opts.period); + else + attr.sample_period = pt->synth_opts.period; + pt->instructions_sample_period = attr.sample_period; + if (pt->synth_opts.callchain) + attr.sample_type |= PERF_SAMPLE_CALLCHAIN; + pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", + id, (u64)attr.sample_type); + err = intel_pt_synth_event(session, &attr, id); + if (err) { + pr_err("%s: failed to synthesize 'instructions' event type\n", + __func__); + return err; + } + pt->sample_instructions = true; + pt->instructions_sample_type = attr.sample_type; + pt->instructions_id = id; + id += 1; + } + + if (pt->synth_opts.transactions) { + attr.config = PERF_COUNT_HW_INSTRUCTIONS; + attr.sample_period = 1; + if (pt->synth_opts.callchain) + attr.sample_type |= PERF_SAMPLE_CALLCHAIN; + pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", + id, (u64)attr.sample_type); + err = intel_pt_synth_event(session, &attr, id); + if (err) { + pr_err("%s: failed to synthesize 'transactions' event type\n", + __func__); + return err; + } + pt->sample_transactions = true; + pt->transactions_id = id; + id += 1; + evlist__for_each(evlist, evsel) { + if (evsel->id && evsel->id[0] == pt->transactions_id) { + if (evsel->name) + zfree(&evsel->name); + evsel->name = strdup("transactions"); + break; + } + } + } + + if (pt->synth_opts.branches) { + attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; + attr.sample_period = 1; + attr.sample_type |= PERF_SAMPLE_ADDR; + attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN; + pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n", + id, (u64)attr.sample_type); + err = intel_pt_synth_event(session, &attr, id); + if (err) { + pr_err("%s: failed to synthesize 'branches' event type\n", + __func__); + return err; + } + pt->sample_branches = true; + pt->branches_sample_type = attr.sample_type; + pt->branches_id = id; + } + + pt->synth_needs_swap = evsel->needs_swap; + + return 0; +} + +static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each_reverse(evlist, evsel) { + const char *name = perf_evsel__name(evsel); + + if (!strcmp(name, "sched:sched_switch")) + return evsel; + } + + return NULL; +} + +static const char * const intel_pt_info_fmts[] = { + [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", + [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", + [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n", + [INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n", + [INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n", + [INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n", + [INTEL_PT_NORETCOMP_BIT] = " NoRETComp bit %#"PRIx64"\n", + [INTEL_PT_HAVE_SCHED_SWITCH] = " Have sched_switch %"PRId64"\n", + [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n", + [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n", + [INTEL_PT_MTC_BIT] = " MTC bit %#"PRIx64"\n", + [INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n", + [INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n", + [INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n", +}; + +static void intel_pt_print_info(u64 *arr, int start, int finish) +{ + int i; + + if (!dump_trace) + return; + + for (i = start; i <= finish; i++) + fprintf(stdout, intel_pt_info_fmts[i], arr[i]); +} + +int intel_pt_process_auxtrace_info(union perf_event *event, + struct perf_session *session) +{ + struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; + size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS; + struct intel_pt *pt; + int err; + + if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) + + min_sz) + return -EINVAL; + + pt = zalloc(sizeof(struct intel_pt)); + if (!pt) + return -ENOMEM; + + err = auxtrace_queues__init(&pt->queues); + if (err) + goto err_free; + + intel_pt_log_set_name(INTEL_PT_PMU_NAME); + + pt->session = session; + pt->machine = &session->machines.host; /* No kvm support */ + pt->auxtrace_type = auxtrace_info->type; + pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE]; + pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT]; + pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT]; + pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO]; + pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO]; + pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT]; + pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT]; + pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH]; + pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE]; + pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS]; + intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE, + INTEL_PT_PER_CPU_MMAPS); + + if (auxtrace_info->header.size >= sizeof(struct auxtrace_info_event) + + (sizeof(u64) * INTEL_PT_CYC_BIT)) { + pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT]; + pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS]; + pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N]; + pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D]; + pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT]; + intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT, + INTEL_PT_CYC_BIT); + } + + pt->timeless_decoding = intel_pt_timeless_decoding(pt); + pt->have_tsc = intel_pt_have_tsc(pt); + pt->sampling_mode = false; + pt->est_tsc = !pt->timeless_decoding; + + pt->unknown_thread = thread__new(999999999, 999999999); + if (!pt->unknown_thread) { + err = -ENOMEM; + goto err_free_queues; + } + err = thread__set_comm(pt->unknown_thread, "unknown", 0); + if (err) + goto err_delete_thread; + if (thread__init_map_groups(pt->unknown_thread, pt->machine)) { + err = -ENOMEM; + goto err_delete_thread; + } + + pt->auxtrace.process_event = intel_pt_process_event; + pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event; + pt->auxtrace.flush_events = intel_pt_flush; + pt->auxtrace.free_events = intel_pt_free_events; + pt->auxtrace.free = intel_pt_free; + session->auxtrace = &pt->auxtrace; + + if (dump_trace) + return 0; + + if (pt->have_sched_switch == 1) { + pt->switch_evsel = intel_pt_find_sched_switch(session->evlist); + if (!pt->switch_evsel) { + pr_err("%s: missing sched_switch event\n", __func__); + goto err_delete_thread; + } + } + + if (session->itrace_synth_opts && session->itrace_synth_opts->set) { + pt->synth_opts = *session->itrace_synth_opts; + } else { + itrace_synth_opts__set_default(&pt->synth_opts); + if (use_browser != -1) { + pt->synth_opts.branches = false; + pt->synth_opts.callchain = true; + } + } + + if (pt->synth_opts.log) + intel_pt_log_enable(); + + /* Maximum non-turbo ratio is TSC freq / 100 MHz */ + if (pt->tc.time_mult) { + u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000); + + pt->max_non_turbo_ratio = (tsc_freq + 50000000) / 100000000; + intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq); + intel_pt_log("Maximum non-turbo ratio %u\n", + pt->max_non_turbo_ratio); + } + + if (pt->synth_opts.calls) + pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | + PERF_IP_FLAG_TRACE_END; + if (pt->synth_opts.returns) + pt->branches_filter |= PERF_IP_FLAG_RETURN | + PERF_IP_FLAG_TRACE_BEGIN; + + if (pt->synth_opts.callchain && !symbol_conf.use_callchain) { + symbol_conf.use_callchain = true; + if (callchain_register_param(&callchain_param) < 0) { + symbol_conf.use_callchain = false; + pt->synth_opts.callchain = false; + } + } + + err = intel_pt_synth_events(pt, session); + if (err) + goto err_delete_thread; + + err = auxtrace_queues__process_index(&pt->queues, session); + if (err) + goto err_delete_thread; + + if (pt->queues.populated) + pt->data_queued = true; + + if (pt->timeless_decoding) + pr_debug2("Intel PT decoding without timestamps\n"); + + return 0; + +err_delete_thread: + thread__delete(pt->unknown_thread); +err_free_queues: + intel_pt_log_disable(); + auxtrace_queues__free(&pt->queues); + session->auxtrace = NULL; +err_free: + free(pt); + return err; +} diff --git a/tools/perf/util/intel-pt.h b/tools/perf/util/intel-pt.h new file mode 100644 index 000000000000..0065949df693 --- /dev/null +++ b/tools/perf/util/intel-pt.h @@ -0,0 +1,56 @@ +/* + * intel_pt.h: Intel Processor Trace support + * Copyright (c) 2013-2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef INCLUDE__PERF_INTEL_PT_H__ +#define INCLUDE__PERF_INTEL_PT_H__ + +#define INTEL_PT_PMU_NAME "intel_pt" + +enum { + INTEL_PT_PMU_TYPE, + INTEL_PT_TIME_SHIFT, + INTEL_PT_TIME_MULT, + INTEL_PT_TIME_ZERO, + INTEL_PT_CAP_USER_TIME_ZERO, + INTEL_PT_TSC_BIT, + INTEL_PT_NORETCOMP_BIT, + INTEL_PT_HAVE_SCHED_SWITCH, + INTEL_PT_SNAPSHOT_MODE, + INTEL_PT_PER_CPU_MMAPS, + INTEL_PT_MTC_BIT, + INTEL_PT_MTC_FREQ_BITS, + INTEL_PT_TSC_CTC_N, + INTEL_PT_TSC_CTC_D, + INTEL_PT_CYC_BIT, + INTEL_PT_AUXTRACE_PRIV_MAX, +}; + +#define INTEL_PT_AUXTRACE_PRIV_SIZE (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64)) + +struct auxtrace_record; +struct perf_tool; +union perf_event; +struct perf_session; +struct perf_event_attr; +struct perf_pmu; + +struct auxtrace_record *intel_pt_recording_init(int *err); + +int intel_pt_process_auxtrace_info(union perf_event *event, + struct perf_session *session); + +struct perf_event_attr *intel_pt_pmu_default_config(struct perf_pmu *pmu); + +#endif diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c new file mode 100644 index 000000000000..4f6a4780bd5f --- /dev/null +++ b/tools/perf/util/llvm-utils.c @@ -0,0 +1,408 @@ +/* + * Copyright (C) 2015, Wang Nan <wangnan0@huawei.com> + * Copyright (C) 2015, Huawei Inc. + */ + +#include <stdio.h> +#include <sys/utsname.h> +#include "util.h" +#include "debug.h" +#include "llvm-utils.h" +#include "cache.h" + +#define CLANG_BPF_CMD_DEFAULT_TEMPLATE \ + "$CLANG_EXEC -D__KERNEL__ $CLANG_OPTIONS " \ + "$KERNEL_INC_OPTIONS -Wno-unused-value " \ + "-Wno-pointer-sign -working-directory " \ + "$WORKING_DIR -c \"$CLANG_SOURCE\" -target bpf -O2 -o -" + +struct llvm_param llvm_param = { + .clang_path = "clang", + .clang_bpf_cmd_template = CLANG_BPF_CMD_DEFAULT_TEMPLATE, + .clang_opt = NULL, + .kbuild_dir = NULL, + .kbuild_opts = NULL, + .user_set_param = false, +}; + +int perf_llvm_config(const char *var, const char *value) +{ + if (prefixcmp(var, "llvm.")) + return 0; + var += sizeof("llvm.") - 1; + + if (!strcmp(var, "clang-path")) + llvm_param.clang_path = strdup(value); + else if (!strcmp(var, "clang-bpf-cmd-template")) + llvm_param.clang_bpf_cmd_template = strdup(value); + else if (!strcmp(var, "clang-opt")) + llvm_param.clang_opt = strdup(value); + else if (!strcmp(var, "kbuild-dir")) + llvm_param.kbuild_dir = strdup(value); + else if (!strcmp(var, "kbuild-opts")) + llvm_param.kbuild_opts = strdup(value); + else + return -1; + llvm_param.user_set_param = true; + return 0; +} + +static int +search_program(const char *def, const char *name, + char *output) +{ + char *env, *path, *tmp = NULL; + char buf[PATH_MAX]; + int ret; + + output[0] = '\0'; + if (def && def[0] != '\0') { + if (def[0] == '/') { + if (access(def, F_OK) == 0) { + strlcpy(output, def, PATH_MAX); + return 0; + } + } else if (def[0] != '\0') + name = def; + } + + env = getenv("PATH"); + if (!env) + return -1; + env = strdup(env); + if (!env) + return -1; + + ret = -ENOENT; + path = strtok_r(env, ":", &tmp); + while (path) { + scnprintf(buf, sizeof(buf), "%s/%s", path, name); + if (access(buf, F_OK) == 0) { + strlcpy(output, buf, PATH_MAX); + ret = 0; + break; + } + path = strtok_r(NULL, ":", &tmp); + } + + free(env); + return ret; +} + +#define READ_SIZE 4096 +static int +read_from_pipe(const char *cmd, void **p_buf, size_t *p_read_sz) +{ + int err = 0; + void *buf = NULL; + FILE *file = NULL; + size_t read_sz = 0, buf_sz = 0; + + file = popen(cmd, "r"); + if (!file) { + pr_err("ERROR: unable to popen cmd: %s\n", + strerror(errno)); + return -EINVAL; + } + + while (!feof(file) && !ferror(file)) { + /* + * Make buf_sz always have obe byte extra space so we + * can put '\0' there. + */ + if (buf_sz - read_sz < READ_SIZE + 1) { + void *new_buf; + + buf_sz = read_sz + READ_SIZE + 1; + new_buf = realloc(buf, buf_sz); + + if (!new_buf) { + pr_err("ERROR: failed to realloc memory\n"); + err = -ENOMEM; + goto errout; + } + + buf = new_buf; + } + read_sz += fread(buf + read_sz, 1, READ_SIZE, file); + } + + if (buf_sz - read_sz < 1) { + pr_err("ERROR: internal error\n"); + err = -EINVAL; + goto errout; + } + + if (ferror(file)) { + pr_err("ERROR: error occurred when reading from pipe: %s\n", + strerror(errno)); + err = -EIO; + goto errout; + } + + err = WEXITSTATUS(pclose(file)); + file = NULL; + if (err) { + err = -EINVAL; + goto errout; + } + + /* + * If buf is string, give it terminal '\0' to make our life + * easier. If buf is not string, that '\0' is out of space + * indicated by read_sz so caller won't even notice it. + */ + ((char *)buf)[read_sz] = '\0'; + + if (!p_buf) + free(buf); + else + *p_buf = buf; + + if (p_read_sz) + *p_read_sz = read_sz; + return 0; + +errout: + if (file) + pclose(file); + free(buf); + if (p_buf) + *p_buf = NULL; + if (p_read_sz) + *p_read_sz = 0; + return err; +} + +static inline void +force_set_env(const char *var, const char *value) +{ + if (value) { + setenv(var, value, 1); + pr_debug("set env: %s=%s\n", var, value); + } else { + unsetenv(var); + pr_debug("unset env: %s\n", var); + } +} + +static void +version_notice(void) +{ + pr_err( +" \tLLVM 3.7 or newer is required. Which can be found from http://llvm.org\n" +" \tYou may want to try git trunk:\n" +" \t\tgit clone http://llvm.org/git/llvm.git\n" +" \t\t and\n" +" \t\tgit clone http://llvm.org/git/clang.git\n\n" +" \tOr fetch the latest clang/llvm 3.7 from pre-built llvm packages for\n" +" \tdebian/ubuntu:\n" +" \t\thttp://llvm.org/apt\n\n" +" \tIf you are using old version of clang, change 'clang-bpf-cmd-template'\n" +" \toption in [llvm] section of ~/.perfconfig to:\n\n" +" \t \"$CLANG_EXEC $CLANG_OPTIONS $KERNEL_INC_OPTIONS \\\n" +" \t -working-directory $WORKING_DIR -c $CLANG_SOURCE \\\n" +" \t -emit-llvm -o - | /path/to/llc -march=bpf -filetype=obj -o -\"\n" +" \t(Replace /path/to/llc with path to your llc)\n\n" +); +} + +static int detect_kbuild_dir(char **kbuild_dir) +{ + const char *test_dir = llvm_param.kbuild_dir; + const char *prefix_dir = ""; + const char *suffix_dir = ""; + + char *autoconf_path; + struct utsname utsname; + + int err; + + if (!test_dir) { + err = uname(&utsname); + if (err) { + pr_warning("uname failed: %s\n", strerror(errno)); + return -EINVAL; + } + + test_dir = utsname.release; + prefix_dir = "/lib/modules/"; + suffix_dir = "/build"; + } + + err = asprintf(&autoconf_path, "%s%s%s/include/generated/autoconf.h", + prefix_dir, test_dir, suffix_dir); + if (err < 0) + return -ENOMEM; + + if (access(autoconf_path, R_OK) == 0) { + free(autoconf_path); + + err = asprintf(kbuild_dir, "%s%s%s", prefix_dir, test_dir, + suffix_dir); + if (err < 0) + return -ENOMEM; + return 0; + } + free(autoconf_path); + return -ENOENT; +} + +static const char *kinc_fetch_script = +"#!/usr/bin/env sh\n" +"if ! test -d \"$KBUILD_DIR\"\n" +"then\n" +" exit -1\n" +"fi\n" +"if ! test -f \"$KBUILD_DIR/include/generated/autoconf.h\"\n" +"then\n" +" exit -1\n" +"fi\n" +"TMPDIR=`mktemp -d`\n" +"if test -z \"$TMPDIR\"\n" +"then\n" +" exit -1\n" +"fi\n" +"cat << EOF > $TMPDIR/Makefile\n" +"obj-y := dummy.o\n" +"\\$(obj)/%.o: \\$(src)/%.c\n" +"\t@echo -n \"\\$(NOSTDINC_FLAGS) \\$(LINUXINCLUDE) \\$(EXTRA_CFLAGS)\"\n" +"EOF\n" +"touch $TMPDIR/dummy.c\n" +"make -s -C $KBUILD_DIR M=$TMPDIR $KBUILD_OPTS dummy.o 2>/dev/null\n" +"RET=$?\n" +"rm -rf $TMPDIR\n" +"exit $RET\n"; + +static inline void +get_kbuild_opts(char **kbuild_dir, char **kbuild_include_opts) +{ + int err; + + if (!kbuild_dir || !kbuild_include_opts) + return; + + *kbuild_dir = NULL; + *kbuild_include_opts = NULL; + + if (llvm_param.kbuild_dir && !llvm_param.kbuild_dir[0]) { + pr_debug("[llvm.kbuild-dir] is set to \"\" deliberately.\n"); + pr_debug("Skip kbuild options detection.\n"); + return; + } + + err = detect_kbuild_dir(kbuild_dir); + if (err) { + pr_warning( +"WARNING:\tunable to get correct kernel building directory.\n" +"Hint:\tSet correct kbuild directory using 'kbuild-dir' option in [llvm]\n" +" \tsection of ~/.perfconfig or set it to \"\" to suppress kbuild\n" +" \tdetection.\n\n"); + return; + } + + pr_debug("Kernel build dir is set to %s\n", *kbuild_dir); + force_set_env("KBUILD_DIR", *kbuild_dir); + force_set_env("KBUILD_OPTS", llvm_param.kbuild_opts); + err = read_from_pipe(kinc_fetch_script, + (void **)kbuild_include_opts, + NULL); + if (err) { + pr_warning( +"WARNING:\tunable to get kernel include directories from '%s'\n" +"Hint:\tTry set clang include options using 'clang-bpf-cmd-template'\n" +" \toption in [llvm] section of ~/.perfconfig and set 'kbuild-dir'\n" +" \toption in [llvm] to \"\" to suppress this detection.\n\n", + *kbuild_dir); + + free(*kbuild_dir); + *kbuild_dir = NULL; + return; + } + + pr_debug("include option is set to %s\n", *kbuild_include_opts); +} + +int llvm__compile_bpf(const char *path, void **p_obj_buf, + size_t *p_obj_buf_sz) +{ + int err; + char clang_path[PATH_MAX]; + const char *clang_opt = llvm_param.clang_opt; + const char *template = llvm_param.clang_bpf_cmd_template; + char *kbuild_dir = NULL, *kbuild_include_opts = NULL; + void *obj_buf = NULL; + size_t obj_buf_sz; + + if (!template) + template = CLANG_BPF_CMD_DEFAULT_TEMPLATE; + + err = search_program(llvm_param.clang_path, + "clang", clang_path); + if (err) { + pr_err( +"ERROR:\tunable to find clang.\n" +"Hint:\tTry to install latest clang/llvm to support BPF. Check your $PATH\n" +" \tand 'clang-path' option in [llvm] section of ~/.perfconfig.\n"); + version_notice(); + return -ENOENT; + } + + /* + * This is an optional work. Even it fail we can continue our + * work. Needn't to check error return. + */ + get_kbuild_opts(&kbuild_dir, &kbuild_include_opts); + + force_set_env("CLANG_EXEC", clang_path); + force_set_env("CLANG_OPTIONS", clang_opt); + force_set_env("KERNEL_INC_OPTIONS", kbuild_include_opts); + force_set_env("WORKING_DIR", kbuild_dir ? : "."); + + /* + * Since we may reset clang's working dir, path of source file + * should be transferred into absolute path, except we want + * stdin to be source file (testing). + */ + force_set_env("CLANG_SOURCE", + (path[0] == '-') ? path : + make_nonrelative_path(path)); + + pr_debug("llvm compiling command template: %s\n", template); + err = read_from_pipe(template, &obj_buf, &obj_buf_sz); + if (err) { + pr_err("ERROR:\tunable to compile %s\n", path); + pr_err("Hint:\tCheck error message shown above.\n"); + pr_err("Hint:\tYou can also pre-compile it into .o using:\n"); + pr_err(" \t\tclang -target bpf -O2 -c %s\n", path); + pr_err(" \twith proper -I and -D options.\n"); + goto errout; + } + + free(kbuild_dir); + free(kbuild_include_opts); + if (!p_obj_buf) + free(obj_buf); + else + *p_obj_buf = obj_buf; + + if (p_obj_buf_sz) + *p_obj_buf_sz = obj_buf_sz; + return 0; +errout: + free(kbuild_dir); + free(kbuild_include_opts); + free(obj_buf); + if (p_obj_buf) + *p_obj_buf = NULL; + if (p_obj_buf_sz) + *p_obj_buf_sz = 0; + return err; +} + +int llvm__search_clang(void) +{ + char clang_path[PATH_MAX]; + + return search_program(llvm_param.clang_path, "clang", clang_path); +} diff --git a/tools/perf/util/llvm-utils.h b/tools/perf/util/llvm-utils.h new file mode 100644 index 000000000000..5b3cf1c229e2 --- /dev/null +++ b/tools/perf/util/llvm-utils.h @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2015, Wang Nan <wangnan0@huawei.com> + * Copyright (C) 2015, Huawei Inc. + */ +#ifndef __LLVM_UTILS_H +#define __LLVM_UTILS_H + +#include "debug.h" + +struct llvm_param { + /* Path of clang executable */ + const char *clang_path; + /* + * Template of clang bpf compiling. 5 env variables + * can be used: + * $CLANG_EXEC: Path to clang. + * $CLANG_OPTIONS: Extra options to clang. + * $KERNEL_INC_OPTIONS: Kernel include directories. + * $WORKING_DIR: Kernel source directory. + * $CLANG_SOURCE: Source file to be compiled. + */ + const char *clang_bpf_cmd_template; + /* Will be filled in $CLANG_OPTIONS */ + const char *clang_opt; + /* Where to find kbuild system */ + const char *kbuild_dir; + /* + * Arguments passed to make, like 'ARCH=arm' if doing cross + * compiling. Should not be used for dynamic compiling. + */ + const char *kbuild_opts; + /* + * Default is false. If one of the above fields is set by user + * explicitly then user_set_llvm is set to true. This is used + * for perf test. If user doesn't set anything in .perfconfig + * and clang is not found, don't trigger llvm test. + */ + bool user_set_param; +}; + +extern struct llvm_param llvm_param; +extern int perf_llvm_config(const char *var, const char *value); + +extern int llvm__compile_bpf(const char *path, void **p_obj_buf, + size_t *p_obj_buf_sz); + +/* This function is for test__llvm() use only */ +extern int llvm__search_clang(void); +#endif diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 7ff682770fdb..6309f7ceb08f 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -250,7 +250,7 @@ struct machine *machines__findnew(struct machines *machines, pid_t pid) static struct strlist *seen; if (!seen) - seen = strlist__new(true, NULL); + seen = strlist__new(NULL, NULL); if (!strlist__has_entry(seen, path)) { pr_err("Can't access file %s\n", path); @@ -550,6 +550,14 @@ int machine__process_itrace_start_event(struct machine *machine __maybe_unused, return 0; } +int machine__process_switch_event(struct machine *machine __maybe_unused, + union perf_event *event) +{ + if (dump_trace) + perf_event__fprintf_switch(event, stdout); + return 0; +} + struct map *machine__findnew_module_map(struct machine *machine, u64 start, const char *filename) { @@ -1387,6 +1395,24 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event event->fork.ptid); int err = 0; + if (dump_trace) + perf_event__fprintf_task(event, stdout); + + /* + * There may be an existing thread that is not actually the parent, + * either because we are processing events out of order, or because the + * (fork) event that would have removed the thread was lost. Assume the + * latter case and continue on as best we can. + */ + if (parent->pid_ != (pid_t)event->fork.ppid) { + dump_printf("removing erroneous parent thread %d/%d\n", + parent->pid_, parent->tid); + machine__remove_thread(machine, parent); + thread__put(parent); + parent = machine__findnew_thread(machine, event->fork.ppid, + event->fork.ptid); + } + /* if a thread currently exists for the thread id remove it */ if (thread != NULL) { machine__remove_thread(machine, thread); @@ -1395,8 +1421,6 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid); - if (dump_trace) - perf_event__fprintf_task(event, stdout); if (thread == NULL || parent == NULL || thread__fork(thread, parent, sample->time) < 0) { @@ -1451,6 +1475,9 @@ int machine__process_event(struct machine *machine, union perf_event *event, ret = machine__process_itrace_start_event(machine, event); break; case PERF_RECORD_LOST_SAMPLES: ret = machine__process_lost_samples_event(machine, event, sample); break; + case PERF_RECORD_SWITCH: + case PERF_RECORD_SWITCH_CPU_WIDE: + ret = machine__process_switch_event(machine, event); break; default: ret = -1; break; @@ -1993,3 +2020,17 @@ struct dso *machine__findnew_dso(struct machine *machine, const char *filename) { return dsos__findnew(&machine->dsos, filename); } + +char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp) +{ + struct machine *machine = vmachine; + struct map *map; + struct symbol *sym = map_groups__find_symbol(&machine->kmaps, MAP__FUNCTION, *addrp, &map, NULL); + + if (sym == NULL) + return NULL; + + *modp = __map__is_kmodule(map) ? (char *)map->dso->short_name : NULL; + *addrp = map->unmap_ip(map, sym->start); + return sym->name; +} diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 887798e511e9..ea5cb4a621db 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -87,6 +87,8 @@ int machine__process_aux_event(struct machine *machine, union perf_event *event); int machine__process_itrace_start_event(struct machine *machine, union perf_event *event); +int machine__process_switch_event(struct machine *machine __maybe_unused, + union perf_event *event); int machine__process_mmap_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); int machine__process_mmap2_event(struct machine *machine, union perf_event *event, @@ -237,5 +239,9 @@ int machine__synthesize_threads(struct machine *machine, struct target *target, pid_t machine__get_current_tid(struct machine *machine, int cpu); int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, pid_t tid); +/* + * For use with libtraceevent's pevent_set_function_resolver() + */ +char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp); #endif /* __PERF_MACHINE_H */ diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index b5a5e9c02437..b1c475d9b240 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -224,6 +224,20 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type) return map; } +/* + * Use this and __map__is_kmodule() for map instances that are in + * machine->kmaps, and thus have map->groups->machine all properly set, to + * disambiguate between the kernel and modules. + * + * When the need arises, introduce map__is_{kernel,kmodule)() that + * checks (map->groups != NULL && map->groups->machine != NULL && + * map->dso->kernel) before calling __map__is_{kernel,kmodule}()) + */ +bool __map__is_kernel(const struct map *map) +{ + return map->groups->machine->vmlinux_maps[map->type] == map; +} + static void map__exit(struct map *map) { BUG_ON(!RB_EMPTY_NODE(&map->rb_node)); @@ -334,9 +348,18 @@ struct symbol *map__find_symbol_by_name(struct map *map, const char *name, return dso__find_symbol_by_name(map->dso, map->type, name); } -struct map *map__clone(struct map *map) +struct map *map__clone(struct map *from) { - return memdup(map, sizeof(*map)); + struct map *map = memdup(from, sizeof(*map)); + + if (map != NULL) { + atomic_set(&map->refcnt, 1); + RB_CLEAR_NODE(&map->rb_node); + dso__get(map->dso); + map->groups = NULL; + } + + return map; } int map__overlap(struct map *l, struct map *r) diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index d73e687b224e..57829e89b78b 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -256,4 +256,11 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, struct map *map_groups__find_by_name(struct map_groups *mg, enum map_type type, const char *name); +bool __map__is_kernel(const struct map *map); + +static inline bool __map__is_kmodule(const struct map *map) +{ + return !__map__is_kernel(map); +} + #endif /* __PERF_MAP_H */ diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index 52be201b9b25..b1b9e2385f4b 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -220,6 +220,9 @@ static int __ordered_events__flush(struct ordered_events *oe) else if (last_ts <= limit) oe->last = list_entry(head->prev, struct ordered_event, list); + if (show_progress) + ui_progress__finish(); + return 0; } diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 09f8d2357108..d826e6f515db 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -276,7 +276,8 @@ const char *event_type(int type) static struct perf_evsel * __add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, - char *name, struct cpu_map *cpus) + char *name, struct cpu_map *cpus, + struct list_head *config_terms) { struct perf_evsel *evsel; @@ -291,14 +292,19 @@ __add_event(struct list_head *list, int *idx, if (name) evsel->name = strdup(name); + + if (config_terms) + list_splice(config_terms, &evsel->config_terms); + list_add_tail(&evsel->node, list); return evsel; } static int add_event(struct list_head *list, int *idx, - struct perf_event_attr *attr, char *name) + struct perf_event_attr *attr, char *name, + struct list_head *config_terms) { - return __add_event(list, idx, attr, name, NULL) ? 0 : -ENOMEM; + return __add_event(list, idx, attr, name, NULL, config_terms) ? 0 : -ENOMEM; } static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size) @@ -377,7 +383,7 @@ int parse_events_add_cache(struct list_head *list, int *idx, memset(&attr, 0, sizeof(attr)); attr.config = cache_type | (cache_op << 8) | (cache_result << 16); attr.type = PERF_TYPE_HW_CACHE; - return add_event(list, idx, &attr, name); + return add_event(list, idx, &attr, name, NULL); } static int add_tracepoint(struct list_head *list, int *idx, @@ -539,7 +545,7 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx, attr.type = PERF_TYPE_BREAKPOINT; attr.sample_period = 1; - return add_event(list, idx, &attr, NULL); + return add_event(list, idx, &attr, NULL, NULL); } static int check_type_val(struct parse_events_term *term, @@ -590,7 +596,9 @@ do { \ break; case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: CHECK_TYPE_VAL(NUM); - attr->sample_period = term->val.num; + break; + case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ: + CHECK_TYPE_VAL(NUM); break; case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE: /* @@ -598,6 +606,20 @@ do { \ * attr->branch_sample_type = term->val.num; */ break; + case PARSE_EVENTS__TERM_TYPE_TIME: + CHECK_TYPE_VAL(NUM); + if (term->val.num > 1) { + err->str = strdup("expected 0 or 1"); + err->idx = term->err_val; + return -EINVAL; + } + break; + case PARSE_EVENTS__TERM_TYPE_CALLGRAPH: + CHECK_TYPE_VAL(STR); + break; + case PARSE_EVENTS__TERM_TYPE_STACKSIZE: + CHECK_TYPE_VAL(NUM); + break; case PARSE_EVENTS__TERM_TYPE_NAME: CHECK_TYPE_VAL(STR); break; @@ -622,22 +644,71 @@ static int config_attr(struct perf_event_attr *attr, return 0; } +static int get_config_terms(struct list_head *head_config, + struct list_head *head_terms __maybe_unused) +{ +#define ADD_CONFIG_TERM(__type, __name, __val) \ +do { \ + struct perf_evsel_config_term *__t; \ + \ + __t = zalloc(sizeof(*__t)); \ + if (!__t) \ + return -ENOMEM; \ + \ + INIT_LIST_HEAD(&__t->list); \ + __t->type = PERF_EVSEL__CONFIG_TERM_ ## __type; \ + __t->val.__name = __val; \ + list_add_tail(&__t->list, head_terms); \ +} while (0) + + struct parse_events_term *term; + + list_for_each_entry(term, head_config, list) { + switch (term->type_term) { + case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: + ADD_CONFIG_TERM(PERIOD, period, term->val.num); + break; + case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ: + ADD_CONFIG_TERM(FREQ, freq, term->val.num); + break; + case PARSE_EVENTS__TERM_TYPE_TIME: + ADD_CONFIG_TERM(TIME, time, term->val.num); + break; + case PARSE_EVENTS__TERM_TYPE_CALLGRAPH: + ADD_CONFIG_TERM(CALLGRAPH, callgraph, term->val.str); + break; + case PARSE_EVENTS__TERM_TYPE_STACKSIZE: + ADD_CONFIG_TERM(STACK_USER, stack_user, term->val.num); + break; + default: + break; + } + } +#undef ADD_EVSEL_CONFIG + return 0; +} + int parse_events_add_numeric(struct parse_events_evlist *data, struct list_head *list, u32 type, u64 config, struct list_head *head_config) { struct perf_event_attr attr; + LIST_HEAD(config_terms); memset(&attr, 0, sizeof(attr)); attr.type = type; attr.config = config; - if (head_config && - config_attr(&attr, head_config, data->error)) - return -EINVAL; + if (head_config) { + if (config_attr(&attr, head_config, data->error)) + return -EINVAL; + + if (get_config_terms(head_config, &config_terms)) + return -ENOMEM; + } - return add_event(list, &data->idx, &attr, NULL); + return add_event(list, &data->idx, &attr, NULL, &config_terms); } static int parse_events__is_name_term(struct parse_events_term *term) @@ -664,6 +735,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data, struct perf_pmu_info info; struct perf_pmu *pmu; struct perf_evsel *evsel; + LIST_HEAD(config_terms); pmu = perf_pmu__find(name); if (!pmu) @@ -678,7 +750,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data, if (!head_config) { attr.type = pmu->type; - evsel = __add_event(list, &data->idx, &attr, NULL, pmu->cpus); + evsel = __add_event(list, &data->idx, &attr, NULL, pmu->cpus, NULL); return evsel ? 0 : -ENOMEM; } @@ -692,11 +764,15 @@ int parse_events_add_pmu(struct parse_events_evlist *data, if (config_attr(&attr, head_config, data->error)) return -EINVAL; + if (get_config_terms(head_config, &config_terms)) + return -ENOMEM; + if (perf_pmu__config(pmu, &attr, head_config, data->error)) return -EINVAL; evsel = __add_event(list, &data->idx, &attr, - pmu_event_name(head_config), pmu->cpus); + pmu_event_name(head_config), pmu->cpus, + &config_terms); if (evsel) { evsel->unit = info.unit; evsel->scale = info.scale; @@ -1065,8 +1141,13 @@ int parse_events(struct perf_evlist *evlist, const char *str, perf_pmu__parse_cleanup(); if (!ret) { int entries = data.idx - evlist->nr_entries; + struct perf_evsel *last; + perf_evlist__splice_list_tail(evlist, &data.list, entries); evlist->nr_groups += data.nr_groups; + last = perf_evlist__last(evlist); + last->cmdline_group_boundary = true; + return 0; } @@ -1105,7 +1186,7 @@ static void parse_events_print_error(struct parse_events_error *err, * Maximum error index indent, we will cut * the event string if it's bigger. */ - int max_err_idx = 10; + int max_err_idx = 13; /* * Let's be specific with the message when @@ -1162,30 +1243,93 @@ int parse_events_option(const struct option *opt, const char *str, return ret; } -int parse_filter(const struct option *opt, const char *str, - int unset __maybe_unused) +static int +foreach_evsel_in_last_glob(struct perf_evlist *evlist, + int (*func)(struct perf_evsel *evsel, + const void *arg), + const void *arg) { - struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; struct perf_evsel *last = NULL; + int err; if (evlist->nr_entries > 0) last = perf_evlist__last(evlist); - if (last == NULL || last->attr.type != PERF_TYPE_TRACEPOINT) { + do { + err = (*func)(last, arg); + if (err) + return -1; + if (!last) + return 0; + + if (last->node.prev == &evlist->entries) + return 0; + last = list_entry(last->node.prev, struct perf_evsel, node); + } while (!last->cmdline_group_boundary); + + return 0; +} + +static int set_filter(struct perf_evsel *evsel, const void *arg) +{ + const char *str = arg; + + if (evsel == NULL || evsel->attr.type != PERF_TYPE_TRACEPOINT) { fprintf(stderr, "--filter option should follow a -e tracepoint option\n"); return -1; } - last->filter = strdup(str); - if (last->filter == NULL) { - fprintf(stderr, "not enough memory to hold filter string\n"); + if (perf_evsel__append_filter(evsel, "&&", str) < 0) { + fprintf(stderr, + "not enough memory to hold filter string\n"); return -1; } return 0; } +int parse_filter(const struct option *opt, const char *str, + int unset __maybe_unused) +{ + struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; + + return foreach_evsel_in_last_glob(evlist, set_filter, + (const void *)str); +} + +static int add_exclude_perf_filter(struct perf_evsel *evsel, + const void *arg __maybe_unused) +{ + char new_filter[64]; + + if (evsel == NULL || evsel->attr.type != PERF_TYPE_TRACEPOINT) { + fprintf(stderr, + "--exclude-perf option should follow a -e tracepoint option\n"); + return -1; + } + + snprintf(new_filter, sizeof(new_filter), "common_pid != %d", getpid()); + + if (perf_evsel__append_filter(evsel, "&&", new_filter) < 0) { + fprintf(stderr, + "not enough memory to hold filter string\n"); + return -1; + } + + return 0; +} + +int exclude_perf(const struct option *opt, + const char *arg __maybe_unused, + int unset __maybe_unused) +{ + struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; + + return foreach_evsel_in_last_glob(evlist, add_exclude_perf_filter, + NULL); +} + static const char * const event_type_descriptors[] = { "Hardware event", "Software event", diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 131f29b2f132..a09b0e210997 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -34,6 +34,7 @@ extern int parse_events(struct perf_evlist *evlist, const char *str, struct parse_events_error *error); extern int parse_events_terms(struct list_head *terms, const char *str); extern int parse_filter(const struct option *opt, const char *str, int unset); +extern int exclude_perf(const struct option *opt, const char *arg, int unset); #define EVENTS_HELP_MAX (128*1024) @@ -61,7 +62,11 @@ enum { PARSE_EVENTS__TERM_TYPE_CONFIG2, PARSE_EVENTS__TERM_TYPE_NAME, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD, + PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE, + PARSE_EVENTS__TERM_TYPE_TIME, + PARSE_EVENTS__TERM_TYPE_CALLGRAPH, + PARSE_EVENTS__TERM_TYPE_STACKSIZE, }; struct parse_events_term { diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 13cef3c65565..936d566f48d8 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -182,7 +182,11 @@ config1 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); } config2 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); } name { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); } period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); } +freq { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ); } branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); } +time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); } +call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); } +stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); } , { return ','; } "/" { BEGIN(INITIAL); return '/'; } {name_minus} { return str(yyscanner, PE_NAME); } diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 7bcb8c315615..89c91a1a67e7 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -462,10 +462,6 @@ static struct perf_pmu *pmu_lookup(const char *name) LIST_HEAD(aliases); __u32 type; - /* No support for intel_bts or intel_pt so disallow them */ - if (!strcmp(name, "intel_bts") || !strcmp(name, "intel_pt")) - return NULL; - /* * The pmu data we store & need consists of the pmu * type value and format definitions. Load both right @@ -542,7 +538,7 @@ struct perf_pmu *perf_pmu__find(const char *name) } static struct perf_pmu_format * -pmu_find_format(struct list_head *formats, char *name) +pmu_find_format(struct list_head *formats, const char *name) { struct perf_pmu_format *format; @@ -553,6 +549,21 @@ pmu_find_format(struct list_head *formats, char *name) return NULL; } +__u64 perf_pmu__format_bits(struct list_head *formats, const char *name) +{ + struct perf_pmu_format *format = pmu_find_format(formats, name); + __u64 bits = 0; + int fbit; + + if (!format) + return 0; + + for_each_set_bit(fbit, format->bits, PERF_PMU_FORMAT_BITS) + bits |= 1ULL << fbit; + + return bits; +} + /* * Sets value based on the format definition (format parameter) * and unformated value (value parameter). @@ -574,6 +585,18 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v, } } +static __u64 pmu_format_max_value(const unsigned long *format) +{ + int w; + + w = bitmap_weight(format, PERF_PMU_FORMAT_BITS); + if (!w) + return 0; + if (w < 64) + return (1ULL << w) - 1; + return -1; +} + /* * Term is a string term, and might be a param-term. Try to look up it's value * in the remaining terms. @@ -607,7 +630,9 @@ static char *formats_error_string(struct list_head *formats) { struct perf_pmu_format *format; char *err, *str; - static const char *static_terms = "config,config1,config2,name,period,branch_type\n"; + static const char *static_terms = "config,config1,config2,name," + "period,freq,branch_type,time," + "call-graph,stack-size\n"; unsigned i = 0; if (!asprintf(&str, "valid terms:")) @@ -647,7 +672,7 @@ static int pmu_config_term(struct list_head *formats, { struct perf_pmu_format *format; __u64 *vp; - __u64 val; + __u64 val, max_val; /* * If this is a parameter we've already used for parameterized-eval, @@ -713,6 +738,22 @@ static int pmu_config_term(struct list_head *formats, } else return -EINVAL; + max_val = pmu_format_max_value(format->bits); + if (val > max_val) { + if (err) { + err->idx = term->err_val; + if (asprintf(&err->str, + "value too big for format, maximum is %llu", + (unsigned long long)max_val) < 0) + err->str = strdup("value too big for format"); + return -EINVAL; + } + /* + * Assume we don't care if !err, in which case the value will be + * silently truncated. + */ + } + pmu_format_value(format->bits, val, vp, zero); return 0; } diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 7b9c8cf8ae3e..5d7e84466bee 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -54,6 +54,7 @@ int perf_pmu__config_terms(struct list_head *formats, struct perf_event_attr *attr, struct list_head *head_terms, bool zero, struct parse_events_error *error); +__u64 perf_pmu__format_bits(struct list_head *formats, const char *name); int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, struct perf_pmu_info *info); struct list_head *perf_pmu__alias(struct perf_pmu *pmu, diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 381f23a443c7..eb5f18b75402 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -45,6 +45,7 @@ #include "trace-event.h" /* For __maybe_unused */ #include "probe-event.h" #include "probe-finder.h" +#include "probe-file.h" #include "session.h" #define MAX_CMDLEN 256 @@ -55,11 +56,7 @@ struct probe_conf probe_conf; #define semantic_error(msg ...) pr_err("Semantic error :" msg) -/* If there is no space to write, returns -E2BIG. */ -static int e_snprintf(char *str, size_t size, const char *format, ...) - __attribute__((format(printf, 3, 4))); - -static int e_snprintf(char *str, size_t size, const char *format, ...) +int e_snprintf(char *str, size_t size, const char *format, ...) { int ret; va_list ap; @@ -72,7 +69,6 @@ static int e_snprintf(char *str, size_t size, const char *format, ...) } static char *synthesize_perf_probe_point(struct perf_probe_point *pp); -static void clear_probe_trace_event(struct probe_trace_event *tev); static struct machine *host_machine; /* Initialize symbol maps and path of vmlinux/modules */ @@ -519,7 +515,7 @@ static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp, if (ret < 0) goto error; addr += stext; - } else { + } else if (tp->symbol) { addr = kernel_get_symbol_address_by_name(tp->symbol, false); if (addr == 0) goto error; @@ -709,9 +705,10 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, } /* Error path : ntevs < 0 */ pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs); - if (ntevs == -EBADF) { - pr_warning("Warning: No dwarf info found in the vmlinux - " - "please rebuild kernel with CONFIG_DEBUG_INFO=y.\n"); + if (ntevs < 0) { + if (ntevs == -EBADF) + pr_warning("Warning: No dwarf info found in the vmlinux - " + "please rebuild kernel with CONFIG_DEBUG_INFO=y.\n"); if (!need_dwarf) { pr_debug("Trying to use symbols.\n"); return 0; @@ -1197,15 +1194,37 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) *ptr++ = '\0'; } - tmp = strdup(arg); - if (tmp == NULL) - return -ENOMEM; + if (arg[0] == '\0') + tmp = NULL; + else { + tmp = strdup(arg); + if (tmp == NULL) + return -ENOMEM; + } if (file_spec) pp->file = tmp; - else + else { pp->function = tmp; + /* + * Keep pp->function even if this is absolute address, + * so it can mark whether abs_address is valid. + * Which make 'perf probe lib.bin 0x0' possible. + * + * Note that checking length of tmp is not needed + * because when we access tmp[1] we know tmp[0] is '0', + * so tmp[1] should always valid (but could be '\0'). + */ + if (tmp && !strncmp(tmp, "0x", 2)) { + pp->abs_address = strtoul(pp->function, &tmp, 0); + if (*tmp != '\0') { + semantic_error("Invalid absolute address.\n"); + return -EINVAL; + } + } + } + /* Parse other options */ while (ptr) { arg = ptr; @@ -1467,8 +1486,7 @@ bool perf_probe_event_need_dwarf(struct perf_probe_event *pev) } /* Parse probe_events event into struct probe_point */ -static int parse_probe_trace_command(const char *cmd, - struct probe_trace_event *tev) +int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev) { struct probe_trace_point *tp = &tev->point; char pr; @@ -1523,9 +1541,31 @@ static int parse_probe_trace_command(const char *cmd, } else p = argv[1]; fmt1_str = strtok_r(p, "+", &fmt); - if (fmt1_str[0] == '0') /* only the address started with 0x */ - tp->address = strtoul(fmt1_str, NULL, 0); - else { + /* only the address started with 0x */ + if (fmt1_str[0] == '0') { + /* + * Fix a special case: + * if address == 0, kernel reports something like: + * p:probe_libc/abs_0 /lib/libc-2.18.so:0x (null) arg1=%ax + * Newer kernel may fix that, but we want to + * support old kernel also. + */ + if (strcmp(fmt1_str, "0x") == 0) { + if (!argv[2] || strcmp(argv[2], "(null)")) { + ret = -EINVAL; + goto out; + } + tp->address = 0; + + free(argv[2]); + for (i = 2; argv[i + 1] != NULL; i++) + argv[i] = argv[i + 1]; + + argv[i] = NULL; + argc -= 1; + } else + tp->address = strtoul(fmt1_str, NULL, 0); + } else { /* Only the symbol-based probe has offset */ tp->symbol = strdup(fmt1_str); if (tp->symbol == NULL) { @@ -1782,14 +1822,29 @@ char *synthesize_probe_trace_command(struct probe_trace_event *tev) if (len <= 0) goto error; - /* Uprobes must have tp->address and tp->module */ - if (tev->uprobes && (!tp->address || !tp->module)) + /* Uprobes must have tp->module */ + if (tev->uprobes && !tp->module) goto error; + /* + * If tp->address == 0, then this point must be a + * absolute address uprobe. + * try_to_find_absolute_address() should have made + * tp->symbol to "0x0". + */ + if (tev->uprobes && !tp->address) { + if (!tp->symbol || strcmp(tp->symbol, "0x0")) + goto error; + } /* Use the tp->address for uprobes */ if (tev->uprobes) ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s:0x%lx", tp->module, tp->address); + else if (!strncmp(tp->symbol, "0x", 2)) + /* Absolute address. See try_to_find_absolute_address() */ + ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s%s0x%lx", + tp->module ?: "", tp->module ? ":" : "", + tp->address); else ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s%s%s+%lu", tp->module ?: "", tp->module ? ":" : "", @@ -1819,17 +1874,17 @@ static int find_perf_probe_point_from_map(struct probe_trace_point *tp, { struct symbol *sym = NULL; struct map *map; - u64 addr; + u64 addr = tp->address; int ret = -ENOENT; if (!is_kprobe) { map = dso__new_map(tp->module); if (!map) goto out; - addr = tp->address; sym = map__find_symbol(map, addr, NULL); } else { - addr = kernel_get_symbol_address_by_name(tp->symbol, true); + if (tp->symbol) + addr = kernel_get_symbol_address_by_name(tp->symbol, true); if (addr) { addr += tp->offset; sym = __find_kernel_function(addr, &map); @@ -1852,8 +1907,8 @@ out: } static int convert_to_perf_probe_point(struct probe_trace_point *tp, - struct perf_probe_point *pp, - bool is_kprobe) + struct perf_probe_point *pp, + bool is_kprobe) { char buf[128]; int ret; @@ -1870,7 +1925,7 @@ static int convert_to_perf_probe_point(struct probe_trace_point *tp, if (tp->symbol) { pp->function = strdup(tp->symbol); pp->offset = tp->offset; - } else if (!tp->module && !is_kprobe) { + } else { ret = e_snprintf(buf, 128, "0x%" PRIx64, (u64)tp->address); if (ret < 0) return ret; @@ -1951,7 +2006,7 @@ void clear_perf_probe_event(struct perf_probe_event *pev) memset(pev, 0, sizeof(*pev)); } -static void clear_probe_trace_event(struct probe_trace_event *tev) +void clear_probe_trace_event(struct probe_trace_event *tev) { struct probe_trace_arg_ref *ref, *next; int i; @@ -1976,119 +2031,6 @@ static void clear_probe_trace_event(struct probe_trace_event *tev) memset(tev, 0, sizeof(*tev)); } -static void print_open_warning(int err, bool is_kprobe) -{ - char sbuf[STRERR_BUFSIZE]; - - if (err == -ENOENT) { - const char *config; - - if (!is_kprobe) - config = "CONFIG_UPROBE_EVENTS"; - else - config = "CONFIG_KPROBE_EVENTS"; - - pr_warning("%cprobe_events file does not exist" - " - please rebuild kernel with %s.\n", - is_kprobe ? 'k' : 'u', config); - } else if (err == -ENOTSUP) - pr_warning("Tracefs or debugfs is not mounted.\n"); - else - pr_warning("Failed to open %cprobe_events: %s\n", - is_kprobe ? 'k' : 'u', - strerror_r(-err, sbuf, sizeof(sbuf))); -} - -static void print_both_open_warning(int kerr, int uerr) -{ - /* Both kprobes and uprobes are disabled, warn it. */ - if (kerr == -ENOTSUP && uerr == -ENOTSUP) - pr_warning("Tracefs or debugfs is not mounted.\n"); - else if (kerr == -ENOENT && uerr == -ENOENT) - pr_warning("Please rebuild kernel with CONFIG_KPROBE_EVENTS " - "or/and CONFIG_UPROBE_EVENTS.\n"); - else { - char sbuf[STRERR_BUFSIZE]; - pr_warning("Failed to open kprobe events: %s.\n", - strerror_r(-kerr, sbuf, sizeof(sbuf))); - pr_warning("Failed to open uprobe events: %s.\n", - strerror_r(-uerr, sbuf, sizeof(sbuf))); - } -} - -static int open_probe_events(const char *trace_file, bool readwrite) -{ - char buf[PATH_MAX]; - const char *__debugfs; - const char *tracing_dir = ""; - int ret; - - __debugfs = tracefs_find_mountpoint(); - if (__debugfs == NULL) { - tracing_dir = "tracing/"; - - __debugfs = debugfs_find_mountpoint(); - if (__debugfs == NULL) - return -ENOTSUP; - } - - ret = e_snprintf(buf, PATH_MAX, "%s/%s%s", - __debugfs, tracing_dir, trace_file); - if (ret >= 0) { - pr_debug("Opening %s write=%d\n", buf, readwrite); - if (readwrite && !probe_event_dry_run) - ret = open(buf, O_RDWR | O_APPEND, 0); - else - ret = open(buf, O_RDONLY, 0); - - if (ret < 0) - ret = -errno; - } - return ret; -} - -static int open_kprobe_events(bool readwrite) -{ - return open_probe_events("kprobe_events", readwrite); -} - -static int open_uprobe_events(bool readwrite) -{ - return open_probe_events("uprobe_events", readwrite); -} - -/* Get raw string list of current kprobe_events or uprobe_events */ -static struct strlist *get_probe_trace_command_rawlist(int fd) -{ - int ret, idx; - FILE *fp; - char buf[MAX_CMDLEN]; - char *p; - struct strlist *sl; - - sl = strlist__new(true, NULL); - - fp = fdopen(dup(fd), "r"); - while (!feof(fp)) { - p = fgets(buf, MAX_CMDLEN, fp); - if (!p) - break; - - idx = strlen(p) - 1; - if (p[idx] == '\n') - p[idx] = '\0'; - ret = strlist__add(sl, buf); - if (ret < 0) { - pr_debug("strlist__add failed (%d)\n", ret); - strlist__delete(sl); - return NULL; - } - } - fclose(fp); - - return sl; -} - struct kprobe_blacklist_node { struct list_head list; unsigned long start; @@ -2284,7 +2226,7 @@ static int __show_perf_probe_events(int fd, bool is_kprobe, memset(&tev, 0, sizeof(tev)); memset(&pev, 0, sizeof(pev)); - rawlist = get_probe_trace_command_rawlist(fd); + rawlist = probe_file__get_rawlist(fd); if (!rawlist) return -ENOMEM; @@ -2325,89 +2267,20 @@ int show_perf_probe_events(struct strfilter *filter) if (ret < 0) return ret; - kp_fd = open_kprobe_events(false); - if (kp_fd >= 0) { - ret = __show_perf_probe_events(kp_fd, true, filter); - close(kp_fd); - if (ret < 0) - goto out; - } - - up_fd = open_uprobe_events(false); - if (kp_fd < 0 && up_fd < 0) { - print_both_open_warning(kp_fd, up_fd); - ret = kp_fd; - goto out; - } + ret = probe_file__open_both(&kp_fd, &up_fd, 0); + if (ret < 0) + return ret; - if (up_fd >= 0) { + if (kp_fd >= 0) + ret = __show_perf_probe_events(kp_fd, true, filter); + if (up_fd >= 0 && ret >= 0) ret = __show_perf_probe_events(up_fd, false, filter); + if (kp_fd > 0) + close(kp_fd); + if (up_fd > 0) close(up_fd); - } -out: exit_symbol_maps(); - return ret; -} -/* Get current perf-probe event names */ -static struct strlist *get_probe_trace_event_names(int fd, bool include_group) -{ - char buf[128]; - struct strlist *sl, *rawlist; - struct str_node *ent; - struct probe_trace_event tev; - int ret = 0; - - memset(&tev, 0, sizeof(tev)); - rawlist = get_probe_trace_command_rawlist(fd); - if (!rawlist) - return NULL; - sl = strlist__new(true, NULL); - strlist__for_each(ent, rawlist) { - ret = parse_probe_trace_command(ent->s, &tev); - if (ret < 0) - break; - if (include_group) { - ret = e_snprintf(buf, 128, "%s:%s", tev.group, - tev.event); - if (ret >= 0) - ret = strlist__add(sl, buf); - } else - ret = strlist__add(sl, tev.event); - clear_probe_trace_event(&tev); - if (ret < 0) - break; - } - strlist__delete(rawlist); - - if (ret < 0) { - strlist__delete(sl); - return NULL; - } - return sl; -} - -static int write_probe_trace_event(int fd, struct probe_trace_event *tev) -{ - int ret = 0; - char *buf = synthesize_probe_trace_command(tev); - char sbuf[STRERR_BUFSIZE]; - - if (!buf) { - pr_debug("Failed to synthesize probe trace event.\n"); - return -EINVAL; - } - - pr_debug("Writing event: %s\n", buf); - if (!probe_event_dry_run) { - ret = write(fd, buf, strlen(buf)); - if (ret <= 0) { - ret = -errno; - pr_warning("Failed to write event: %s\n", - strerror_r(errno, sbuf, sizeof(sbuf))); - } - } - free(buf); return ret; } @@ -2478,36 +2351,69 @@ out: free(buf); } +/* Set new name from original perf_probe_event and namelist */ +static int probe_trace_event__set_name(struct probe_trace_event *tev, + struct perf_probe_event *pev, + struct strlist *namelist, + bool allow_suffix) +{ + const char *event, *group; + char buf[64]; + int ret; + + if (pev->event) + event = pev->event; + else + if (pev->point.function && + (strncmp(pev->point.function, "0x", 2) != 0) && + !strisglob(pev->point.function)) + event = pev->point.function; + else + event = tev->point.realname; + if (pev->group) + group = pev->group; + else + group = PERFPROBE_GROUP; + + /* Get an unused new event name */ + ret = get_new_event_name(buf, 64, event, + namelist, allow_suffix); + if (ret < 0) + return ret; + + event = buf; + + tev->event = strdup(event); + tev->group = strdup(group); + if (tev->event == NULL || tev->group == NULL) + return -ENOMEM; + + /* Add added event name to namelist */ + strlist__add(namelist, event); + return 0; +} + static int __add_probe_trace_events(struct perf_probe_event *pev, struct probe_trace_event *tevs, int ntevs, bool allow_suffix) { int i, fd, ret; struct probe_trace_event *tev = NULL; - char buf[64]; const char *event = NULL, *group = NULL; struct strlist *namelist; - bool safename; - - if (pev->uprobes) - fd = open_uprobe_events(true); - else - fd = open_kprobe_events(true); - if (fd < 0) { - print_open_warning(fd, !pev->uprobes); + fd = probe_file__open(PF_FL_RW | (pev->uprobes ? PF_FL_UPROBE : 0)); + if (fd < 0) return fd; - } /* Get current event names */ - namelist = get_probe_trace_event_names(fd, false); + namelist = probe_file__get_namelist(fd); if (!namelist) { pr_debug("Failed to get current event list.\n"); ret = -ENOMEM; goto close_out; } - safename = (pev->point.function && !strisglob(pev->point.function)); ret = 0; pr_info("Added new event%s\n", (ntevs > 1) ? "s:" : ":"); for (i = 0; i < ntevs; i++) { @@ -2516,36 +2422,15 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, if (!tev->point.symbol) continue; - if (pev->event) - event = pev->event; - else - if (safename) - event = pev->point.function; - else - event = tev->point.realname; - if (pev->group) - group = pev->group; - else - group = PERFPROBE_GROUP; - - /* Get an unused new event name */ - ret = get_new_event_name(buf, 64, event, - namelist, allow_suffix); + /* Set new name for tev (and update namelist) */ + ret = probe_trace_event__set_name(tev, pev, namelist, + allow_suffix); if (ret < 0) break; - event = buf; - tev->event = strdup(event); - tev->group = strdup(group); - if (tev->event == NULL || tev->group == NULL) { - ret = -ENOMEM; - break; - } - ret = write_probe_trace_event(fd, tev); + ret = probe_file__add_event(fd, tev); if (ret < 0) break; - /* Add added event name to namelist */ - strlist__add(namelist, event); /* We use tev's name for showing new events */ show_perf_probe_event(tev->group, tev->event, pev, @@ -2748,6 +2633,98 @@ err_out: goto out; } +static int try_to_find_absolute_address(struct perf_probe_event *pev, + struct probe_trace_event **tevs) +{ + struct perf_probe_point *pp = &pev->point; + struct probe_trace_event *tev; + struct probe_trace_point *tp; + int i, err; + + if (!(pev->point.function && !strncmp(pev->point.function, "0x", 2))) + return -EINVAL; + if (perf_probe_event_need_dwarf(pev)) + return -EINVAL; + + /* + * This is 'perf probe /lib/libc.so 0xabcd'. Try to probe at + * absolute address. + * + * Only one tev can be generated by this. + */ + *tevs = zalloc(sizeof(*tev)); + if (!*tevs) + return -ENOMEM; + + tev = *tevs; + tp = &tev->point; + + /* + * Don't use tp->offset, use address directly, because + * in synthesize_probe_trace_command() address cannot be + * zero. + */ + tp->address = pev->point.abs_address; + tp->retprobe = pp->retprobe; + tev->uprobes = pev->uprobes; + + err = -ENOMEM; + /* + * Give it a '0x' leading symbol name. + * In __add_probe_trace_events, a NULL symbol is interpreted as + * invalud. + */ + if (asprintf(&tp->symbol, "0x%lx", tp->address) < 0) + goto errout; + + /* For kprobe, check range */ + if ((!tev->uprobes) && + (kprobe_warn_out_range(tev->point.symbol, + tev->point.address))) { + err = -EACCES; + goto errout; + } + + if (asprintf(&tp->realname, "abs_%lx", tp->address) < 0) + goto errout; + + if (pev->target) { + tp->module = strdup(pev->target); + if (!tp->module) + goto errout; + } + + if (tev->group) { + tev->group = strdup(pev->group); + if (!tev->group) + goto errout; + } + + if (pev->event) { + tev->event = strdup(pev->event); + if (!tev->event) + goto errout; + } + + tev->nargs = pev->nargs; + tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs); + if (!tev->args) { + err = -ENOMEM; + goto errout; + } + for (i = 0; i < tev->nargs; i++) + copy_to_probe_trace_arg(&tev->args[i], &pev->args[i]); + + return 1; + +errout: + if (*tevs) { + clear_probe_trace_events(*tevs, 1); + *tevs = NULL; + } + return err; +} + bool __weak arch__prefers_symtab(void) { return false; } static int convert_to_probe_trace_events(struct perf_probe_event *pev, @@ -2764,6 +2741,10 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev, } } + ret = try_to_find_absolute_address(pev, tevs); + if (ret > 0) + return ret; + if (arch__prefers_symtab() && !perf_probe_event_need_dwarf(pev)) { ret = find_probe_trace_events_from_map(pev, tevs); if (ret > 0) @@ -2838,68 +2819,9 @@ end: return ret; } -static int __del_trace_probe_event(int fd, struct str_node *ent) -{ - char *p; - char buf[128]; - int ret; - - /* Convert from perf-probe event to trace-probe event */ - ret = e_snprintf(buf, 128, "-:%s", ent->s); - if (ret < 0) - goto error; - - p = strchr(buf + 2, ':'); - if (!p) { - pr_debug("Internal error: %s should have ':' but not.\n", - ent->s); - ret = -ENOTSUP; - goto error; - } - *p = '/'; - - pr_debug("Writing event: %s\n", buf); - ret = write(fd, buf, strlen(buf)); - if (ret < 0) { - ret = -errno; - goto error; - } - - pr_info("Removed event: %s\n", ent->s); - return 0; -error: - pr_warning("Failed to delete event: %s\n", - strerror_r(-ret, buf, sizeof(buf))); - return ret; -} - -static int del_trace_probe_events(int fd, struct strfilter *filter, - struct strlist *namelist) -{ - struct str_node *ent; - const char *p; - int ret = -ENOENT; - - if (!namelist) - return -ENOENT; - - strlist__for_each(ent, namelist) { - p = strchr(ent->s, ':'); - if ((p && strfilter__compare(filter, p + 1)) || - strfilter__compare(filter, ent->s)) { - ret = __del_trace_probe_event(fd, ent); - if (ret < 0) - break; - } - } - - return ret; -} - int del_perf_probe_events(struct strfilter *filter) { int ret, ret2, ufd = -1, kfd = -1; - struct strlist *namelist = NULL, *unamelist = NULL; char *str = strfilter__string(filter); if (!str) @@ -2908,25 +2830,15 @@ int del_perf_probe_events(struct strfilter *filter) pr_debug("Delete filter: \'%s\'\n", str); /* Get current event names */ - kfd = open_kprobe_events(true); - if (kfd >= 0) - namelist = get_probe_trace_event_names(kfd, true); - - ufd = open_uprobe_events(true); - if (ufd >= 0) - unamelist = get_probe_trace_event_names(ufd, true); - - if (kfd < 0 && ufd < 0) { - print_both_open_warning(kfd, ufd); - ret = kfd; - goto error; - } + ret = probe_file__open_both(&kfd, &ufd, PF_FL_RW); + if (ret < 0) + goto out; - ret = del_trace_probe_events(kfd, filter, namelist); + ret = probe_file__del_events(kfd, filter); if (ret < 0 && ret != -ENOENT) goto error; - ret2 = del_trace_probe_events(ufd, filter, unamelist); + ret2 = probe_file__del_events(ufd, filter); if (ret2 < 0 && ret2 != -ENOENT) { ret = ret2; goto error; @@ -2937,15 +2849,11 @@ int del_perf_probe_events(struct strfilter *filter) ret = 0; error: - if (kfd >= 0) { - strlist__delete(namelist); + if (kfd >= 0) close(kfd); - } - - if (ufd >= 0) { - strlist__delete(unamelist); + if (ufd >= 0) close(ufd); - } +out: free(str); return ret; @@ -3007,3 +2915,22 @@ end: return ret; } +int copy_to_probe_trace_arg(struct probe_trace_arg *tvar, + struct perf_probe_arg *pvar) +{ + tvar->value = strdup(pvar->var); + if (tvar->value == NULL) + return -ENOMEM; + if (pvar->type) { + tvar->type = strdup(pvar->type); + if (tvar->type == NULL) + return -ENOMEM; + } + if (pvar->name) { + tvar->name = strdup(pvar->name); + if (tvar->name == NULL) + return -ENOMEM; + } else + tvar->name = NULL; + return 0; +} diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 31db6ee7db54..6e7ec68a4aa8 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -59,6 +59,7 @@ struct perf_probe_point { bool retprobe; /* Return probe flag */ char *lazy_line; /* Lazy matching pattern */ unsigned long offset; /* Offset from function entry */ + unsigned long abs_address; /* Absolute address of the point */ }; /* Perf probe probing argument field chain */ @@ -106,9 +107,13 @@ struct variable_list { struct strlist *vars; /* Available variables */ }; +struct map; + /* Command string to events */ extern int parse_perf_probe_command(const char *cmd, struct perf_probe_event *pev); +extern int parse_probe_trace_command(const char *cmd, + struct probe_trace_event *tev); /* Events to command string */ extern char *synthesize_perf_probe_command(struct perf_probe_event *pev); @@ -121,6 +126,7 @@ extern bool perf_probe_event_need_dwarf(struct perf_probe_event *pev); /* Release event contents */ extern void clear_perf_probe_event(struct perf_probe_event *pev); +extern void clear_probe_trace_event(struct probe_trace_event *tev); /* Command string to line-range */ extern int parse_line_range_desc(const char *cmd, struct line_range *lr); @@ -144,7 +150,14 @@ bool arch__prefers_symtab(void); void arch__fix_tev_from_maps(struct perf_probe_event *pev, struct probe_trace_event *tev, struct map *map); +/* If there is no space to write, returns -E2BIG. */ +int e_snprintf(char *str, size_t size, const char *format, ...) + __attribute__((format(printf, 3, 4))); + /* Maximum index number of event-name postfix */ #define MAX_EVENT_INDEX 1024 +int copy_to_probe_trace_arg(struct probe_trace_arg *tvar, + struct perf_probe_arg *pvar); + #endif /*_PROBE_EVENT_H */ diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c new file mode 100644 index 000000000000..bbb243717ec8 --- /dev/null +++ b/tools/perf/util/probe-file.c @@ -0,0 +1,301 @@ +/* + * probe-file.c : operate ftrace k/uprobe events files + * + * Written by Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include "util.h" +#include "event.h" +#include "strlist.h" +#include "debug.h" +#include "cache.h" +#include "color.h" +#include "symbol.h" +#include "thread.h" +#include <api/fs/debugfs.h> +#include <api/fs/tracefs.h> +#include "probe-event.h" +#include "probe-file.h" +#include "session.h" + +#define MAX_CMDLEN 256 + +static void print_open_warning(int err, bool uprobe) +{ + char sbuf[STRERR_BUFSIZE]; + + if (err == -ENOENT) { + const char *config; + + if (uprobe) + config = "CONFIG_UPROBE_EVENTS"; + else + config = "CONFIG_KPROBE_EVENTS"; + + pr_warning("%cprobe_events file does not exist" + " - please rebuild kernel with %s.\n", + uprobe ? 'u' : 'k', config); + } else if (err == -ENOTSUP) + pr_warning("Tracefs or debugfs is not mounted.\n"); + else + pr_warning("Failed to open %cprobe_events: %s\n", + uprobe ? 'u' : 'k', + strerror_r(-err, sbuf, sizeof(sbuf))); +} + +static void print_both_open_warning(int kerr, int uerr) +{ + /* Both kprobes and uprobes are disabled, warn it. */ + if (kerr == -ENOTSUP && uerr == -ENOTSUP) + pr_warning("Tracefs or debugfs is not mounted.\n"); + else if (kerr == -ENOENT && uerr == -ENOENT) + pr_warning("Please rebuild kernel with CONFIG_KPROBE_EVENTS " + "or/and CONFIG_UPROBE_EVENTS.\n"); + else { + char sbuf[STRERR_BUFSIZE]; + pr_warning("Failed to open kprobe events: %s.\n", + strerror_r(-kerr, sbuf, sizeof(sbuf))); + pr_warning("Failed to open uprobe events: %s.\n", + strerror_r(-uerr, sbuf, sizeof(sbuf))); + } +} + +static int open_probe_events(const char *trace_file, bool readwrite) +{ + char buf[PATH_MAX]; + const char *__debugfs; + const char *tracing_dir = ""; + int ret; + + __debugfs = tracefs_find_mountpoint(); + if (__debugfs == NULL) { + tracing_dir = "tracing/"; + + __debugfs = debugfs_find_mountpoint(); + if (__debugfs == NULL) + return -ENOTSUP; + } + + ret = e_snprintf(buf, PATH_MAX, "%s/%s%s", + __debugfs, tracing_dir, trace_file); + if (ret >= 0) { + pr_debug("Opening %s write=%d\n", buf, readwrite); + if (readwrite && !probe_event_dry_run) + ret = open(buf, O_RDWR | O_APPEND, 0); + else + ret = open(buf, O_RDONLY, 0); + + if (ret < 0) + ret = -errno; + } + return ret; +} + +static int open_kprobe_events(bool readwrite) +{ + return open_probe_events("kprobe_events", readwrite); +} + +static int open_uprobe_events(bool readwrite) +{ + return open_probe_events("uprobe_events", readwrite); +} + +int probe_file__open(int flag) +{ + int fd; + + if (flag & PF_FL_UPROBE) + fd = open_uprobe_events(flag & PF_FL_RW); + else + fd = open_kprobe_events(flag & PF_FL_RW); + if (fd < 0) + print_open_warning(fd, flag & PF_FL_UPROBE); + + return fd; +} + +int probe_file__open_both(int *kfd, int *ufd, int flag) +{ + if (!kfd || !ufd) + return -EINVAL; + + *kfd = open_kprobe_events(flag & PF_FL_RW); + *ufd = open_uprobe_events(flag & PF_FL_RW); + if (*kfd < 0 && *ufd < 0) { + print_both_open_warning(*kfd, *ufd); + return *kfd; + } + + return 0; +} + +/* Get raw string list of current kprobe_events or uprobe_events */ +struct strlist *probe_file__get_rawlist(int fd) +{ + int ret, idx; + FILE *fp; + char buf[MAX_CMDLEN]; + char *p; + struct strlist *sl; + + sl = strlist__new(NULL, NULL); + + fp = fdopen(dup(fd), "r"); + while (!feof(fp)) { + p = fgets(buf, MAX_CMDLEN, fp); + if (!p) + break; + + idx = strlen(p) - 1; + if (p[idx] == '\n') + p[idx] = '\0'; + ret = strlist__add(sl, buf); + if (ret < 0) { + pr_debug("strlist__add failed (%d)\n", ret); + strlist__delete(sl); + return NULL; + } + } + fclose(fp); + + return sl; +} + +static struct strlist *__probe_file__get_namelist(int fd, bool include_group) +{ + char buf[128]; + struct strlist *sl, *rawlist; + struct str_node *ent; + struct probe_trace_event tev; + int ret = 0; + + memset(&tev, 0, sizeof(tev)); + rawlist = probe_file__get_rawlist(fd); + if (!rawlist) + return NULL; + sl = strlist__new(NULL, NULL); + strlist__for_each(ent, rawlist) { + ret = parse_probe_trace_command(ent->s, &tev); + if (ret < 0) + break; + if (include_group) { + ret = e_snprintf(buf, 128, "%s:%s", tev.group, + tev.event); + if (ret >= 0) + ret = strlist__add(sl, buf); + } else + ret = strlist__add(sl, tev.event); + clear_probe_trace_event(&tev); + if (ret < 0) + break; + } + strlist__delete(rawlist); + + if (ret < 0) { + strlist__delete(sl); + return NULL; + } + return sl; +} + +/* Get current perf-probe event names */ +struct strlist *probe_file__get_namelist(int fd) +{ + return __probe_file__get_namelist(fd, false); +} + +int probe_file__add_event(int fd, struct probe_trace_event *tev) +{ + int ret = 0; + char *buf = synthesize_probe_trace_command(tev); + char sbuf[STRERR_BUFSIZE]; + + if (!buf) { + pr_debug("Failed to synthesize probe trace event.\n"); + return -EINVAL; + } + + pr_debug("Writing event: %s\n", buf); + if (!probe_event_dry_run) { + ret = write(fd, buf, strlen(buf)); + if (ret <= 0) { + ret = -errno; + pr_warning("Failed to write event: %s\n", + strerror_r(errno, sbuf, sizeof(sbuf))); + } + } + free(buf); + + return ret; +} + +static int __del_trace_probe_event(int fd, struct str_node *ent) +{ + char *p; + char buf[128]; + int ret; + + /* Convert from perf-probe event to trace-probe event */ + ret = e_snprintf(buf, 128, "-:%s", ent->s); + if (ret < 0) + goto error; + + p = strchr(buf + 2, ':'); + if (!p) { + pr_debug("Internal error: %s should have ':' but not.\n", + ent->s); + ret = -ENOTSUP; + goto error; + } + *p = '/'; + + pr_debug("Writing event: %s\n", buf); + ret = write(fd, buf, strlen(buf)); + if (ret < 0) { + ret = -errno; + goto error; + } + + pr_info("Removed event: %s\n", ent->s); + return 0; +error: + pr_warning("Failed to delete event: %s\n", + strerror_r(-ret, buf, sizeof(buf))); + return ret; +} + +int probe_file__del_events(int fd, struct strfilter *filter) +{ + struct strlist *namelist; + struct str_node *ent; + const char *p; + int ret = -ENOENT; + + namelist = __probe_file__get_namelist(fd, true); + if (!namelist) + return -ENOENT; + + strlist__for_each(ent, namelist) { + p = strchr(ent->s, ':'); + if ((p && strfilter__compare(filter, p + 1)) || + strfilter__compare(filter, ent->s)) { + ret = __del_trace_probe_event(fd, ent); + if (ret < 0) + break; + } + } + strlist__delete(namelist); + + return ret; +} diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h new file mode 100644 index 000000000000..ada94a242a17 --- /dev/null +++ b/tools/perf/util/probe-file.h @@ -0,0 +1,18 @@ +#ifndef __PROBE_FILE_H +#define __PROBE_FILE_H + +#include "strlist.h" +#include "strfilter.h" +#include "probe-event.h" + +#define PF_FL_UPROBE 1 +#define PF_FL_RW 2 + +int probe_file__open(int flag); +int probe_file__open_both(int *kfd, int *ufd, int flag); +struct strlist *probe_file__get_namelist(int fd); +struct strlist *probe_file__get_rawlist(int fd); +int probe_file__add_event(int fd, struct probe_trace_event *tev); +int probe_file__del_events(int fd, struct strfilter *filter); + +#endif diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 2da65a710893..29c43c0680a8 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -553,24 +553,9 @@ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf) char buf[32], *ptr; int ret = 0; - if (!is_c_varname(pf->pvar->var)) { - /* Copy raw parameters */ - pf->tvar->value = strdup(pf->pvar->var); - if (pf->tvar->value == NULL) - return -ENOMEM; - if (pf->pvar->type) { - pf->tvar->type = strdup(pf->pvar->type); - if (pf->tvar->type == NULL) - return -ENOMEM; - } - if (pf->pvar->name) { - pf->tvar->name = strdup(pf->pvar->name); - if (pf->tvar->name == NULL) - return -ENOMEM; - } else - pf->tvar->name = NULL; - return 0; - } + /* Copy raw parameters */ + if (!is_c_varname(pf->pvar->var)) + return copy_to_probe_trace_arg(pf->tvar, pf->pvar); if (pf->pvar->name) pf->tvar->name = strdup(pf->pvar->name); @@ -1355,7 +1340,7 @@ static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf) vl->point.offset); /* Find local variables */ - vl->vars = strlist__new(true, NULL); + vl->vars = strlist__new(NULL, NULL); if (vl->vars == NULL) return -ENOMEM; af->child = true; diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index e23ded40c79e..51be28b1bca2 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -10,14 +10,14 @@ util/ctype.c util/evlist.c util/evsel.c util/cpumap.c -../../lib/hweight.c +../lib/hweight.c util/thread_map.c util/util.c util/xyarray.c util/cgroup.c util/rblist.c -util/stat.c +util/counts.c util/strlist.c util/trace-event.c -../../lib/rbtree.c +../lib/rbtree.c util/string.c diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 626422eda727..6324fe6b161e 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -941,76 +941,84 @@ static int pyrf_evlist__setup_types(void) return PyType_Ready(&pyrf_evlist__type); } +#define PERF_CONST(name) { #name, PERF_##name } + static struct { const char *name; int value; } perf__constants[] = { - { "TYPE_HARDWARE", PERF_TYPE_HARDWARE }, - { "TYPE_SOFTWARE", PERF_TYPE_SOFTWARE }, - { "TYPE_TRACEPOINT", PERF_TYPE_TRACEPOINT }, - { "TYPE_HW_CACHE", PERF_TYPE_HW_CACHE }, - { "TYPE_RAW", PERF_TYPE_RAW }, - { "TYPE_BREAKPOINT", PERF_TYPE_BREAKPOINT }, - - { "COUNT_HW_CPU_CYCLES", PERF_COUNT_HW_CPU_CYCLES }, - { "COUNT_HW_INSTRUCTIONS", PERF_COUNT_HW_INSTRUCTIONS }, - { "COUNT_HW_CACHE_REFERENCES", PERF_COUNT_HW_CACHE_REFERENCES }, - { "COUNT_HW_CACHE_MISSES", PERF_COUNT_HW_CACHE_MISSES }, - { "COUNT_HW_BRANCH_INSTRUCTIONS", PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, - { "COUNT_HW_BRANCH_MISSES", PERF_COUNT_HW_BRANCH_MISSES }, - { "COUNT_HW_BUS_CYCLES", PERF_COUNT_HW_BUS_CYCLES }, - { "COUNT_HW_CACHE_L1D", PERF_COUNT_HW_CACHE_L1D }, - { "COUNT_HW_CACHE_L1I", PERF_COUNT_HW_CACHE_L1I }, - { "COUNT_HW_CACHE_LL", PERF_COUNT_HW_CACHE_LL }, - { "COUNT_HW_CACHE_DTLB", PERF_COUNT_HW_CACHE_DTLB }, - { "COUNT_HW_CACHE_ITLB", PERF_COUNT_HW_CACHE_ITLB }, - { "COUNT_HW_CACHE_BPU", PERF_COUNT_HW_CACHE_BPU }, - { "COUNT_HW_CACHE_OP_READ", PERF_COUNT_HW_CACHE_OP_READ }, - { "COUNT_HW_CACHE_OP_WRITE", PERF_COUNT_HW_CACHE_OP_WRITE }, - { "COUNT_HW_CACHE_OP_PREFETCH", PERF_COUNT_HW_CACHE_OP_PREFETCH }, - { "COUNT_HW_CACHE_RESULT_ACCESS", PERF_COUNT_HW_CACHE_RESULT_ACCESS }, - { "COUNT_HW_CACHE_RESULT_MISS", PERF_COUNT_HW_CACHE_RESULT_MISS }, - - { "COUNT_HW_STALLED_CYCLES_FRONTEND", PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, - { "COUNT_HW_STALLED_CYCLES_BACKEND", PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, - - { "COUNT_SW_CPU_CLOCK", PERF_COUNT_SW_CPU_CLOCK }, - { "COUNT_SW_TASK_CLOCK", PERF_COUNT_SW_TASK_CLOCK }, - { "COUNT_SW_PAGE_FAULTS", PERF_COUNT_SW_PAGE_FAULTS }, - { "COUNT_SW_CONTEXT_SWITCHES", PERF_COUNT_SW_CONTEXT_SWITCHES }, - { "COUNT_SW_CPU_MIGRATIONS", PERF_COUNT_SW_CPU_MIGRATIONS }, - { "COUNT_SW_PAGE_FAULTS_MIN", PERF_COUNT_SW_PAGE_FAULTS_MIN }, - { "COUNT_SW_PAGE_FAULTS_MAJ", PERF_COUNT_SW_PAGE_FAULTS_MAJ }, - { "COUNT_SW_ALIGNMENT_FAULTS", PERF_COUNT_SW_ALIGNMENT_FAULTS }, - { "COUNT_SW_EMULATION_FAULTS", PERF_COUNT_SW_EMULATION_FAULTS }, - { "COUNT_SW_DUMMY", PERF_COUNT_SW_DUMMY }, - - { "SAMPLE_IP", PERF_SAMPLE_IP }, - { "SAMPLE_TID", PERF_SAMPLE_TID }, - { "SAMPLE_TIME", PERF_SAMPLE_TIME }, - { "SAMPLE_ADDR", PERF_SAMPLE_ADDR }, - { "SAMPLE_READ", PERF_SAMPLE_READ }, - { "SAMPLE_CALLCHAIN", PERF_SAMPLE_CALLCHAIN }, - { "SAMPLE_ID", PERF_SAMPLE_ID }, - { "SAMPLE_CPU", PERF_SAMPLE_CPU }, - { "SAMPLE_PERIOD", PERF_SAMPLE_PERIOD }, - { "SAMPLE_STREAM_ID", PERF_SAMPLE_STREAM_ID }, - { "SAMPLE_RAW", PERF_SAMPLE_RAW }, - - { "FORMAT_TOTAL_TIME_ENABLED", PERF_FORMAT_TOTAL_TIME_ENABLED }, - { "FORMAT_TOTAL_TIME_RUNNING", PERF_FORMAT_TOTAL_TIME_RUNNING }, - { "FORMAT_ID", PERF_FORMAT_ID }, - { "FORMAT_GROUP", PERF_FORMAT_GROUP }, - - { "RECORD_MMAP", PERF_RECORD_MMAP }, - { "RECORD_LOST", PERF_RECORD_LOST }, - { "RECORD_COMM", PERF_RECORD_COMM }, - { "RECORD_EXIT", PERF_RECORD_EXIT }, - { "RECORD_THROTTLE", PERF_RECORD_THROTTLE }, - { "RECORD_UNTHROTTLE", PERF_RECORD_UNTHROTTLE }, - { "RECORD_FORK", PERF_RECORD_FORK }, - { "RECORD_READ", PERF_RECORD_READ }, - { "RECORD_SAMPLE", PERF_RECORD_SAMPLE }, + PERF_CONST(TYPE_HARDWARE), + PERF_CONST(TYPE_SOFTWARE), + PERF_CONST(TYPE_TRACEPOINT), + PERF_CONST(TYPE_HW_CACHE), + PERF_CONST(TYPE_RAW), + PERF_CONST(TYPE_BREAKPOINT), + + PERF_CONST(COUNT_HW_CPU_CYCLES), + PERF_CONST(COUNT_HW_INSTRUCTIONS), + PERF_CONST(COUNT_HW_CACHE_REFERENCES), + PERF_CONST(COUNT_HW_CACHE_MISSES), + PERF_CONST(COUNT_HW_BRANCH_INSTRUCTIONS), + PERF_CONST(COUNT_HW_BRANCH_MISSES), + PERF_CONST(COUNT_HW_BUS_CYCLES), + PERF_CONST(COUNT_HW_CACHE_L1D), + PERF_CONST(COUNT_HW_CACHE_L1I), + PERF_CONST(COUNT_HW_CACHE_LL), + PERF_CONST(COUNT_HW_CACHE_DTLB), + PERF_CONST(COUNT_HW_CACHE_ITLB), + PERF_CONST(COUNT_HW_CACHE_BPU), + PERF_CONST(COUNT_HW_CACHE_OP_READ), + PERF_CONST(COUNT_HW_CACHE_OP_WRITE), + PERF_CONST(COUNT_HW_CACHE_OP_PREFETCH), + PERF_CONST(COUNT_HW_CACHE_RESULT_ACCESS), + PERF_CONST(COUNT_HW_CACHE_RESULT_MISS), + + PERF_CONST(COUNT_HW_STALLED_CYCLES_FRONTEND), + PERF_CONST(COUNT_HW_STALLED_CYCLES_BACKEND), + + PERF_CONST(COUNT_SW_CPU_CLOCK), + PERF_CONST(COUNT_SW_TASK_CLOCK), + PERF_CONST(COUNT_SW_PAGE_FAULTS), + PERF_CONST(COUNT_SW_CONTEXT_SWITCHES), + PERF_CONST(COUNT_SW_CPU_MIGRATIONS), + PERF_CONST(COUNT_SW_PAGE_FAULTS_MIN), + PERF_CONST(COUNT_SW_PAGE_FAULTS_MAJ), + PERF_CONST(COUNT_SW_ALIGNMENT_FAULTS), + PERF_CONST(COUNT_SW_EMULATION_FAULTS), + PERF_CONST(COUNT_SW_DUMMY), + + PERF_CONST(SAMPLE_IP), + PERF_CONST(SAMPLE_TID), + PERF_CONST(SAMPLE_TIME), + PERF_CONST(SAMPLE_ADDR), + PERF_CONST(SAMPLE_READ), + PERF_CONST(SAMPLE_CALLCHAIN), + PERF_CONST(SAMPLE_ID), + PERF_CONST(SAMPLE_CPU), + PERF_CONST(SAMPLE_PERIOD), + PERF_CONST(SAMPLE_STREAM_ID), + PERF_CONST(SAMPLE_RAW), + + PERF_CONST(FORMAT_TOTAL_TIME_ENABLED), + PERF_CONST(FORMAT_TOTAL_TIME_RUNNING), + PERF_CONST(FORMAT_ID), + PERF_CONST(FORMAT_GROUP), + + PERF_CONST(RECORD_MMAP), + PERF_CONST(RECORD_LOST), + PERF_CONST(RECORD_COMM), + PERF_CONST(RECORD_EXIT), + PERF_CONST(RECORD_THROTTLE), + PERF_CONST(RECORD_UNTHROTTLE), + PERF_CONST(RECORD_FORK), + PERF_CONST(RECORD_READ), + PERF_CONST(RECORD_SAMPLE), + PERF_CONST(RECORD_MMAP2), + PERF_CONST(RECORD_AUX), + PERF_CONST(RECORD_ITRACE_START), + PERF_CONST(RECORD_LOST_SAMPLES), + PERF_CONST(RECORD_SWITCH), + PERF_CONST(RECORD_SWITCH_CPU_WIDE), { .name = NULL, }, }; diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 1f7becbe5e18..0467367dc315 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -85,6 +85,11 @@ static void perf_probe_comm_exec(struct perf_evsel *evsel) evsel->attr.comm_exec = 1; } +static void perf_probe_context_switch(struct perf_evsel *evsel) +{ + evsel->attr.context_switch = 1; +} + bool perf_can_sample_identifier(void) { return perf_probe_api(perf_probe_sample_identifier); @@ -95,6 +100,35 @@ static bool perf_can_comm_exec(void) return perf_probe_api(perf_probe_comm_exec); } +bool perf_can_record_switch_events(void) +{ + return perf_probe_api(perf_probe_context_switch); +} + +bool perf_can_record_cpu_wide(void) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_CPU_CLOCK, + .exclude_kernel = 1, + }; + struct cpu_map *cpus; + int cpu, fd; + + cpus = cpu_map__new(NULL); + if (!cpus) + return false; + cpu = cpus->map[0]; + cpu_map__put(cpus); + + fd = sys_perf_event_open(&attr, -1, cpu, -1, 0); + if (fd < 0) + return false; + close(fd); + + return true; +} + void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts) { struct perf_evsel *evsel; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index ed9dc2555ec7..8a4537ee9bc3 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -170,7 +170,7 @@ static void perf_session__delete_threads(struct perf_session *session) machine__delete_threads(&session->machines.host); } -static void perf_session_env__delete(struct perf_session_env *env) +static void perf_session_env__exit(struct perf_env *env) { zfree(&env->hostname); zfree(&env->os_release); @@ -180,6 +180,7 @@ static void perf_session_env__delete(struct perf_session_env *env) zfree(&env->cpuid); zfree(&env->cmdline); + zfree(&env->cmdline_argv); zfree(&env->sibling_cores); zfree(&env->sibling_threads); zfree(&env->numa_nodes); @@ -192,7 +193,7 @@ void perf_session__delete(struct perf_session *session) auxtrace_index__free(&session->auxtrace_index); perf_session__destroy_kernel_maps(session); perf_session__delete_threads(session); - perf_session_env__delete(&session->header.env); + perf_session_env__exit(&session->header.env); machines__exit(&session->machines); if (session->file) perf_data_file__close(session->file); @@ -332,6 +333,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->aux = perf_event__process_aux; if (tool->itrace_start == NULL) tool->itrace_start = perf_event__process_itrace_start; + if (tool->context_switch == NULL) + tool->context_switch = perf_event__process_switch; if (tool->read == NULL) tool->read = process_event_sample_stub; if (tool->throttle == NULL) @@ -470,6 +473,19 @@ static void perf_event__itrace_start_swap(union perf_event *event, swap_sample_id_all(event, &event->itrace_start + 1); } +static void perf_event__switch_swap(union perf_event *event, bool sample_id_all) +{ + if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) { + event->context_switch.next_prev_pid = + bswap_32(event->context_switch.next_prev_pid); + event->context_switch.next_prev_tid = + bswap_32(event->context_switch.next_prev_tid); + } + + if (sample_id_all) + swap_sample_id_all(event, &event->context_switch + 1); +} + static void perf_event__throttle_swap(union perf_event *event, bool sample_id_all) { @@ -632,6 +648,8 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_AUX] = perf_event__aux_swap, [PERF_RECORD_ITRACE_START] = perf_event__itrace_start_swap, [PERF_RECORD_LOST_SAMPLES] = perf_event__all64_swap, + [PERF_RECORD_SWITCH] = perf_event__switch_swap, + [PERF_RECORD_SWITCH_CPU_WIDE] = perf_event__switch_swap, [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap, [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap, @@ -766,10 +784,18 @@ static void branch_stack__printf(struct perf_sample *sample) printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr); - for (i = 0; i < sample->branch_stack->nr; i++) - printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 "\n", - i, sample->branch_stack->entries[i].from, - sample->branch_stack->entries[i].to); + for (i = 0; i < sample->branch_stack->nr; i++) { + struct branch_entry *e = &sample->branch_stack->entries[i]; + + printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n", + i, e->from, e->to, + e->flags.cycles, + e->flags.mispred ? "M" : " ", + e->flags.predicted ? "P" : " ", + e->flags.abort ? "A" : " ", + e->flags.in_tx ? "T" : " ", + (unsigned)e->flags.reserved); + } } static void regs_dump__printf(u64 mask, u64 *regs) @@ -1093,6 +1119,9 @@ static int machines__deliver_event(struct machines *machines, return tool->aux(tool, event, sample, machine); case PERF_RECORD_ITRACE_START: return tool->itrace_start(tool, event, sample, machine); + case PERF_RECORD_SWITCH: + case PERF_RECORD_SWITCH_CPU_WIDE: + return tool->context_switch(tool, event, sample, machine); default: ++evlist->stats.nr_unknown_events; return -1; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 4c65a143a34c..7e3871606df3 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -9,7 +9,7 @@ regex_t parent_regex; const char default_parent_pattern[] = "^sys_|^do_page_fault"; const char *parent_pattern = default_parent_pattern; const char default_sort_order[] = "comm,dso,symbol"; -const char default_branch_sort_order[] = "comm,dso_from,symbol_from,dso_to,symbol_to"; +const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles"; const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked"; const char default_top_sort_order[] = "dso,symbol"; const char default_diff_sort_order[] = "dso,symbol"; @@ -319,6 +319,59 @@ struct sort_entry sort_srcline = { .se_width_idx = HISTC_SRCLINE, }; +/* --sort srcfile */ + +static char no_srcfile[1]; + +static char *get_srcfile(struct hist_entry *e) +{ + char *sf, *p; + struct map *map = e->ms.map; + + sf = get_srcline(map->dso, map__rip_2objdump(map, e->ip), + e->ms.sym, true); + if (!strcmp(sf, SRCLINE_UNKNOWN)) + return no_srcfile; + p = strchr(sf, ':'); + if (p && *sf) { + *p = 0; + return sf; + } + free(sf); + return no_srcfile; +} + +static int64_t +sort__srcfile_cmp(struct hist_entry *left, struct hist_entry *right) +{ + if (!left->srcfile) { + if (!left->ms.map) + left->srcfile = no_srcfile; + else + left->srcfile = get_srcfile(left); + } + if (!right->srcfile) { + if (!right->ms.map) + right->srcfile = no_srcfile; + else + right->srcfile = get_srcfile(right); + } + return strcmp(right->srcfile, left->srcfile); +} + +static int hist_entry__srcfile_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + return repsep_snprintf(bf, size, "%-*.*s", width, width, he->srcfile); +} + +struct sort_entry sort_srcfile = { + .se_header = "Source File", + .se_cmp = sort__srcfile_cmp, + .se_snprintf = hist_entry__srcfile_snprintf, + .se_width_idx = HISTC_SRCFILE, +}; + /* --sort parent */ static int64_t @@ -526,6 +579,29 @@ static int hist_entry__mispredict_snprintf(struct hist_entry *he, char *bf, return repsep_snprintf(bf, size, "%-*.*s", width, width, out); } +static int64_t +sort__cycles_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return left->branch_info->flags.cycles - + right->branch_info->flags.cycles; +} + +static int hist_entry__cycles_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + if (he->branch_info->flags.cycles == 0) + return repsep_snprintf(bf, size, "%-*s", width, "-"); + return repsep_snprintf(bf, size, "%-*hd", width, + he->branch_info->flags.cycles); +} + +struct sort_entry sort_cycles = { + .se_header = "Basic Block Cycles", + .se_cmp = sort__cycles_cmp, + .se_snprintf = hist_entry__cycles_snprintf, + .se_width_idx = HISTC_CYCLES, +}; + /* --sort daddr_sym */ static int64_t sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right) @@ -1173,6 +1249,7 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_PARENT, "parent", sort_parent), DIM(SORT_CPU, "cpu", sort_cpu), DIM(SORT_SRCLINE, "srcline", sort_srcline), + DIM(SORT_SRCFILE, "srcfile", sort_srcfile), DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight), DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight), DIM(SORT_TRANSACTION, "transaction", sort_transaction), @@ -1190,6 +1267,7 @@ static struct sort_dimension bstack_sort_dimensions[] = { DIM(SORT_MISPREDICT, "mispredict", sort_mispredict), DIM(SORT_IN_TX, "in_tx", sort_in_tx), DIM(SORT_ABORT, "abort", sort_abort), + DIM(SORT_CYCLES, "cycles", sort_cycles), }; #undef DIM diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index e97cd476d336..3c2a399f8f5b 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -114,6 +114,7 @@ struct hist_entry { }; }; char *srcline; + char *srcfile; struct symbol *parent; struct rb_root sorted_chain; struct branch_info *branch_info; @@ -172,6 +173,7 @@ enum sort_type { SORT_PARENT, SORT_CPU, SORT_SRCLINE, + SORT_SRCFILE, SORT_LOCAL_WEIGHT, SORT_GLOBAL_WEIGHT, SORT_TRANSACTION, @@ -185,6 +187,7 @@ enum sort_type { SORT_MISPREDICT, SORT_ABORT, SORT_IN_TX, + SORT_CYCLES, /* memory mode specific sort keys */ __SORT_MEMORY_MODE, diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index c93fb0c5bd0b..fc08248f08ca 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -10,6 +10,8 @@ #include "symbol.h" +bool srcline_full_filename; + #ifdef HAVE_LIBBFD_SUPPORT /* @@ -277,7 +279,9 @@ char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, if (!addr2line(dso_name, addr, &file, &line, dso)) goto out; - if (asprintf(&srcline, "%s:%u", basename(file), line) < 0) { + if (asprintf(&srcline, "%s:%u", + srcline_full_filename ? file : basename(file), + line) < 0) { free(file); goto out; } diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 53e8bb7bc852..2a5d8d7698ae 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -85,7 +85,7 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) update_stats(&runtime_cycles_stats[ctx][cpu], count[0]); else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) - update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); + update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count[0]); else if (perf_stat_evsel__is(counter, TRANSACTION_START)) update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); else if (perf_stat_evsel__is(counter, ELISION_START)) @@ -398,20 +398,18 @@ void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, " # %5.2f%% aborted cycles ", 100.0 * ((total2-avg) / total)); } else if (perf_stat_evsel__is(evsel, TRANSACTION_START) && - avg > 0 && runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); - if (total) + if (avg) ratio = total / avg; fprintf(out, " # %8.0f cycles / transaction ", ratio); } else if (perf_stat_evsel__is(evsel, ELISION_START) && - avg > 0 && runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); - if (total) + if (avg) ratio = total / avg; fprintf(out, " # %8.0f cycles / elision ", ratio); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index f2a0d1521e26..415c359de465 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -97,55 +97,6 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel) } } -struct perf_counts *perf_counts__new(int ncpus, int nthreads) -{ - struct perf_counts *counts = zalloc(sizeof(*counts)); - - if (counts) { - struct xyarray *values; - - values = xyarray__new(ncpus, nthreads, sizeof(struct perf_counts_values)); - if (!values) { - free(counts); - return NULL; - } - - counts->values = values; - } - - return counts; -} - -void perf_counts__delete(struct perf_counts *counts) -{ - if (counts) { - xyarray__delete(counts->values); - free(counts); - } -} - -static void perf_counts__reset(struct perf_counts *counts) -{ - xyarray__reset(counts->values); -} - -void perf_evsel__reset_counts(struct perf_evsel *evsel) -{ - perf_counts__reset(evsel->counts); -} - -int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads) -{ - evsel->counts = perf_counts__new(ncpus, nthreads); - return evsel->counts != NULL ? 0 : -ENOMEM; -} - -void perf_evsel__free_counts(struct perf_evsel *evsel) -{ - perf_counts__delete(evsel->counts); - evsel->counts = NULL; -} - void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) { int i; @@ -238,3 +189,142 @@ void perf_evlist__reset_stats(struct perf_evlist *evlist) perf_evsel__reset_counts(evsel); } } + +static void zero_per_pkg(struct perf_evsel *counter) +{ + if (counter->per_pkg_mask) + memset(counter->per_pkg_mask, 0, MAX_NR_CPUS); +} + +static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip) +{ + unsigned long *mask = counter->per_pkg_mask; + struct cpu_map *cpus = perf_evsel__cpus(counter); + int s; + + *skip = false; + + if (!counter->per_pkg) + return 0; + + if (cpu_map__empty(cpus)) + return 0; + + if (!mask) { + mask = zalloc(MAX_NR_CPUS); + if (!mask) + return -ENOMEM; + + counter->per_pkg_mask = mask; + } + + s = cpu_map__get_socket(cpus, cpu); + if (s < 0) + return -1; + + *skip = test_and_set_bit(s, mask) == 1; + return 0; +} + +static int +process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel, + int cpu, int thread, + struct perf_counts_values *count) +{ + struct perf_counts_values *aggr = &evsel->counts->aggr; + static struct perf_counts_values zero; + bool skip = false; + + if (check_per_pkg(evsel, cpu, &skip)) { + pr_err("failed to read per-pkg counter\n"); + return -1; + } + + if (skip) + count = &zero; + + switch (config->aggr_mode) { + case AGGR_THREAD: + case AGGR_CORE: + case AGGR_SOCKET: + case AGGR_NONE: + if (!evsel->snapshot) + perf_evsel__compute_deltas(evsel, cpu, thread, count); + perf_counts_values__scale(count, config->scale, NULL); + if (config->aggr_mode == AGGR_NONE) + perf_stat__update_shadow_stats(evsel, count->values, cpu); + break; + case AGGR_GLOBAL: + aggr->val += count->val; + if (config->scale) { + aggr->ena += count->ena; + aggr->run += count->run; + } + default: + break; + } + + return 0; +} + +static int process_counter_maps(struct perf_stat_config *config, + struct perf_evsel *counter) +{ + int nthreads = thread_map__nr(counter->threads); + int ncpus = perf_evsel__nr_cpus(counter); + int cpu, thread; + + if (counter->system_wide) + nthreads = 1; + + for (thread = 0; thread < nthreads; thread++) { + for (cpu = 0; cpu < ncpus; cpu++) { + if (process_counter_values(config, counter, cpu, thread, + perf_counts(counter->counts, cpu, thread))) + return -1; + } + } + + return 0; +} + +int perf_stat_process_counter(struct perf_stat_config *config, + struct perf_evsel *counter) +{ + struct perf_counts_values *aggr = &counter->counts->aggr; + struct perf_stat *ps = counter->priv; + u64 *count = counter->counts->aggr.values; + int i, ret; + + aggr->val = aggr->ena = aggr->run = 0; + init_stats(ps->res_stats); + + if (counter->per_pkg) + zero_per_pkg(counter); + + ret = process_counter_maps(config, counter); + if (ret) + return ret; + + if (config->aggr_mode != AGGR_GLOBAL) + return 0; + + if (!counter->snapshot) + perf_evsel__compute_deltas(counter, -1, -1, aggr); + perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled); + + for (i = 0; i < 3; i++) + update_stats(&ps->res_stats[i], count[i]); + + if (verbose) { + fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", + perf_evsel__name(counter), count[0], count[1], count[2]); + } + + /* + * Save the full runtime - to allow normalization during printout: + */ + perf_stat__update_shadow_stats(counter, count, 0); + + return 0; +} diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 1cfbe0a980ac..62448c8175d3 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -33,29 +33,13 @@ enum aggr_mode { AGGR_THREAD, }; -struct perf_counts_values { - union { - struct { - u64 val; - u64 ena; - u64 run; - }; - u64 values[3]; - }; +struct perf_stat_config { + enum aggr_mode aggr_mode; + bool scale; + FILE *output; + unsigned int interval; }; -struct perf_counts { - s8 scaled; - struct perf_counts_values aggr; - struct xyarray *values; -}; - -static inline struct perf_counts_values* -perf_counts(struct perf_counts *counts, int cpu, int thread) -{ - return xyarray__entry(counts->values, cpu, thread); -} - void update_stats(struct stats *stats, u64 val); double avg_stats(struct stats *stats); double stddev_stats(struct stats *stats); @@ -89,13 +73,6 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, double avg, int cpu, enum aggr_mode aggr); -struct perf_counts *perf_counts__new(int ncpus, int nthreads); -void perf_counts__delete(struct perf_counts *counts); - -void perf_evsel__reset_counts(struct perf_evsel *evsel); -int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads); -void perf_evsel__free_counts(struct perf_evsel *evsel); - void perf_evsel__reset_stat_priv(struct perf_evsel *evsel); int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel); void perf_evsel__free_stat_priv(struct perf_evsel *evsel); @@ -109,4 +86,7 @@ int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw); int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); void perf_evlist__free_stats(struct perf_evlist *evlist); void perf_evlist__reset_stats(struct perf_evlist *evlist); + +int perf_stat_process_counter(struct perf_stat_config *config, + struct perf_evsel *counter); #endif diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index 6afd6106ceb5..fc8781de62db 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -357,3 +357,42 @@ void *memdup(const void *src, size_t len) return p; } + +char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints) +{ + /* + * FIXME: replace this with an expression using log10() when we + * find a suitable implementation, maybe the one in the dvb drivers... + * + * "%s == %d || " = log10(MAXINT) * 2 + 8 chars for the operators + */ + size_t size = nints * 28 + 1; /* \0 */ + size_t i, printed = 0; + char *expr = malloc(size); + + if (expr) { + const char *or_and = "||", *eq_neq = "=="; + char *e = expr; + + if (!in) { + or_and = "&&"; + eq_neq = "!="; + } + + for (i = 0; i < nints; ++i) { + if (printed == size) + goto out_err_overflow; + + if (i > 0) + printed += snprintf(e + printed, size - printed, " %s ", or_and); + printed += scnprintf(e + printed, size - printed, + "%s %s %d", var, eq_neq, ints[i]); + } + } + + return expr; + +out_err_overflow: + free(expr); + return NULL; +} diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c index 71f9d102b96f..bdf98f6f27bb 100644 --- a/tools/perf/util/strlist.c +++ b/tools/perf/util/strlist.c @@ -72,7 +72,7 @@ int strlist__load(struct strlist *slist, const char *filename) FILE *fp = fopen(filename, "r"); if (fp == NULL) - return errno; + return -errno; while (fgets(entry, sizeof(entry), fp) != NULL) { const size_t len = strlen(entry); @@ -108,43 +108,70 @@ struct str_node *strlist__find(struct strlist *slist, const char *entry) return snode; } -static int strlist__parse_list_entry(struct strlist *slist, const char *s) +static int strlist__parse_list_entry(struct strlist *slist, const char *s, + const char *subst_dir) { + int err; + char *subst = NULL; + if (strncmp(s, "file://", 7) == 0) return strlist__load(slist, s + 7); - return strlist__add(slist, s); + if (subst_dir) { + err = -ENOMEM; + if (asprintf(&subst, "%s/%s", subst_dir, s) < 0) + goto out; + + if (access(subst, F_OK) == 0) { + err = strlist__load(slist, subst); + goto out; + } + } + + err = strlist__add(slist, s); +out: + free(subst); + return err; } -int strlist__parse_list(struct strlist *slist, const char *s) +static int strlist__parse_list(struct strlist *slist, const char *s, const char *subst_dir) { char *sep; int err; while ((sep = strchr(s, ',')) != NULL) { *sep = '\0'; - err = strlist__parse_list_entry(slist, s); + err = strlist__parse_list_entry(slist, s, subst_dir); *sep = ','; if (err != 0) return err; s = sep + 1; } - return *s ? strlist__parse_list_entry(slist, s) : 0; + return *s ? strlist__parse_list_entry(slist, s, subst_dir) : 0; } -struct strlist *strlist__new(bool dupstr, const char *list) +struct strlist *strlist__new(const char *list, const struct strlist_config *config) { struct strlist *slist = malloc(sizeof(*slist)); if (slist != NULL) { + bool dupstr = true; + const char *dirname = NULL; + + if (config) { + dupstr = !config->dont_dupstr; + dirname = config->dirname; + } + rblist__init(&slist->rblist); slist->rblist.node_cmp = strlist__node_cmp; slist->rblist.node_new = strlist__node_new; slist->rblist.node_delete = strlist__node_delete; slist->dupstr = dupstr; - if (list && strlist__parse_list(slist, list) != 0) + + if (list && strlist__parse_list(slist, list, dirname) != 0) goto out_error; } diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h index 5c7f87069d9c..297565aa7535 100644 --- a/tools/perf/util/strlist.h +++ b/tools/perf/util/strlist.h @@ -16,7 +16,12 @@ struct strlist { bool dupstr; }; -struct strlist *strlist__new(bool dupstr, const char *slist); +struct strlist_config { + bool dont_dupstr; + const char *dirname; +}; + +struct strlist *strlist__new(const char *slist, const struct strlist_config *config); void strlist__delete(struct strlist *slist); void strlist__remove(struct strlist *slist, struct str_node *sn); @@ -74,6 +79,4 @@ static inline struct str_node *strlist__next(struct str_node *sn) #define strlist__for_each_safe(pos, n, slist) \ for (pos = strlist__first(slist), n = strlist__next(pos); pos;\ pos = n, n = strlist__next(n)) - -int strlist__parse_list(struct strlist *slist, const char *s); #endif /* __PERF_STRLIST_H */ diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 65f7e389ae09..53bb5f59ec58 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -38,7 +38,7 @@ static inline char *bfd_demangle(void __maybe_unused *v, #endif #ifndef HAVE_ELF_GETPHDRNUM_SUPPORT -static int elf_getphdrnum(Elf *elf, size_t *dst) +int elf_getphdrnum(Elf *elf, size_t *dst) { GElf_Ehdr gehdr; GElf_Ehdr *ehdr; @@ -875,6 +875,17 @@ int dso__load_sym(struct dso *dso, struct map *map, } } + /* + * Handle any relocation of vdso necessary because older kernels + * attempted to prelink vdso to its virtual address. + */ + if (dso__is_vdso(dso)) { + GElf_Shdr tshdr; + + if (elf_section_by_name(elf, &ehdr, &tshdr, ".text", NULL)) + map->reloc = map->start - tshdr.sh_addr + tshdr.sh_offset; + } + dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap); /* * Initial kernel and module mappings do not map to the dso. For diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 48b588c6951a..1f97ffb158a6 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -441,10 +441,25 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols, return &s->sym; } +void dso__reset_find_symbol_cache(struct dso *dso) +{ + enum map_type type; + + for (type = MAP__FUNCTION; type <= MAP__VARIABLE; ++type) { + dso->last_find_result[type].addr = 0; + dso->last_find_result[type].symbol = NULL; + } +} + struct symbol *dso__find_symbol(struct dso *dso, enum map_type type, u64 addr) { - return symbols__find(&dso->symbols[type], addr); + if (dso->last_find_result[type].addr != addr) { + dso->last_find_result[type].addr = addr; + dso->last_find_result[type].symbol = symbols__find(&dso->symbols[type], addr); + } + + return dso->last_find_result[type].symbol; } struct symbol *dso__first_symbol(struct dso *dso, enum map_type type) @@ -1133,8 +1148,8 @@ static int dso__load_kcore(struct dso *dso, struct map *map, fd = open(kcore_filename, O_RDONLY); if (fd < 0) { - pr_err("%s requires CAP_SYS_RAWIO capability to access.\n", - kcore_filename); + pr_debug("Failed to open %s. Note /proc/kcore requires CAP_SYS_RAWIO capability to access.\n", + kcore_filename); return -EINVAL; } @@ -1838,7 +1853,7 @@ static void vmlinux_path__exit(void) zfree(&vmlinux_path); } -static int vmlinux_path__init(struct perf_session_env *env) +static int vmlinux_path__init(struct perf_env *env) { struct utsname uts; char bf[PATH_MAX]; @@ -1906,11 +1921,13 @@ int setup_list(struct strlist **list, const char *list_str, if (list_str == NULL) return 0; - *list = strlist__new(true, list_str); + *list = strlist__new(list_str, NULL); if (!*list) { pr_err("problems parsing %s list\n", list_name); return -1; } + + symbol_conf.has_filter = true; return 0; } @@ -1947,7 +1964,7 @@ static bool symbol__read_kptr_restrict(void) return value; } -int symbol__init(struct perf_session_env *env) +int symbol__init(struct perf_env *env) { const char *symfs; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index bef47ead1d9b..440ba8ae888f 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -105,7 +105,9 @@ struct symbol_conf { demangle_kernel, filter_relative, show_hist_headers, - branch_callstack; + branch_callstack, + has_filter, + show_ref_callgraph; const char *vmlinux_name, *kallsyms_name, *source_prefix, @@ -250,8 +252,8 @@ int modules__parse(const char *filename, void *arg, int filename__read_debuglink(const char *filename, char *debuglink, size_t size); -struct perf_session_env; -int symbol__init(struct perf_session_env *env); +struct perf_env; +int symbol__init(struct perf_env *env); void symbol__exit(void); void symbol__elf_init(void); struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name); diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 28c4b746baa1..0a9ae8014729 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -191,6 +191,12 @@ static int thread__clone_map_groups(struct thread *thread, if (thread->pid_ == parent->pid_) return 0; + if (thread->mg == parent->mg) { + pr_debug("broken map groups on thread %d/%d parent %d/%d\n", + thread->pid_, thread->tid, parent->pid_, parent->tid); + return 0; + } + /* But this one is new process, copy maps. */ for (i = 0; i < MAP__NR_TYPES; ++i) if (map_groups__clone(thread->mg, parent->mg, i) < 0) diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index da7646d767fe..6ec3c5ca438f 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -136,8 +136,7 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) if (grow) { struct thread_map *tmp; - tmp = realloc(threads, (sizeof(*threads) + - max_threads * sizeof(pid_t))); + tmp = thread_map__realloc(threads, max_threads); if (tmp == NULL) goto out_free_namelist; @@ -196,7 +195,8 @@ static struct thread_map *thread_map__new_by_pid_str(const char *pid_str) pid_t pid, prev_pid = INT_MAX; char *end_ptr; struct str_node *pos; - struct strlist *slist = strlist__new(false, pid_str); + struct strlist_config slist_config = { .dont_dupstr = true, }; + struct strlist *slist = strlist__new(pid_str, &slist_config); if (!slist) return NULL; @@ -266,13 +266,14 @@ static struct thread_map *thread_map__new_by_tid_str(const char *tid_str) pid_t tid, prev_tid = INT_MAX; char *end_ptr; struct str_node *pos; + struct strlist_config slist_config = { .dont_dupstr = true, }; struct strlist *slist; /* perf-stat expects threads to be generated even if tid not given */ if (!tid_str) return thread_map__new_dummy(); - slist = strlist__new(false, tid_str); + slist = strlist__new(tid_str, &slist_config); if (!slist) return NULL; diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index c307dd438286..cab8cc24831b 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -46,6 +46,7 @@ struct perf_tool { lost_samples, aux, itrace_start, + context_switch, throttle, unthrottle; event_attr_op attr; diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index eb72716017ac..22245986e59e 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -341,20 +341,14 @@ out: static int record_proc_kallsyms(void) { - unsigned int size; - const char *path = "/proc/kallsyms"; - struct stat st; - int ret, err = 0; - - ret = stat(path, &st); - if (ret < 0) { - /* not found */ - size = 0; - if (write(output_fd, &size, 4) != 4) - err = -EIO; - return err; - } - return record_file(path, 4); + unsigned long long size = 0; + /* + * Just to keep older perf.data file parsers happy, record a zero + * sized kallsyms file, i.e. do the same thing that was done when + * /proc/kallsyms (or something specified via --kallsyms, in a + * different path) couldn't be read. + */ + return write(output_fd, &size, 4) != 4 ? -EIO : 0; } static int record_ftrace_printk(void) diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index d4957418657e..8ff7d620d942 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -135,36 +135,6 @@ void event_format__print(struct event_format *event, return event_format__fprintf(event, cpu, data, size, stdout); } -void parse_proc_kallsyms(struct pevent *pevent, - char *file, unsigned int size __maybe_unused) -{ - unsigned long long addr; - char *func; - char *line; - char *next = NULL; - char *addr_str; - char *mod; - char *fmt = NULL; - - line = strtok_r(file, "\n", &next); - while (line) { - mod = NULL; - addr_str = strtok_r(line, " ", &fmt); - addr = strtoull(addr_str, NULL, 16); - /* skip character */ - strtok_r(NULL, " ", &fmt); - func = strtok_r(NULL, "\t", &fmt); - mod = strtok_r(NULL, "]", &fmt); - /* truncate the extra '[' */ - if (mod) - mod = mod + 1; - - pevent_register_function(pevent, func, addr, mod); - - line = strtok_r(NULL, "\n", &next); - } -} - void parse_ftrace_printk(struct pevent *pevent, char *file, unsigned int size __maybe_unused) { diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 54d9e9b548a8..b67a0ccf5ab9 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -162,25 +162,23 @@ out: static int read_proc_kallsyms(struct pevent *pevent) { unsigned int size; - char *buf; size = read4(pevent); if (!size) return 0; - - buf = malloc(size + 1); - if (buf == NULL) - return -1; - - if (do_read(buf, size) < 0) { - free(buf); - return -1; - } - buf[size] = '\0'; - - parse_proc_kallsyms(pevent, buf, size); - - free(buf); + /* + * Just skip it, now that we configure libtraceevent to use the + * tools/perf/ symbol resolver. + * + * We need to skip it so that we can continue parsing old perf.data + * files, that contains this /proc/kallsyms payload. + * + * Newer perf.data files will have just the 4-bytes zeros "kallsyms + * payload", so that older tools can continue reading it and interpret + * it as "no kallsyms payload is present". + */ + lseek(input_fd, size, SEEK_CUR); + trace_data_size += size; return 0; } diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c index 6322d37164c5..b90e646c7a91 100644 --- a/tools/perf/util/trace-event.c +++ b/tools/perf/util/trace-event.c @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <traceevent/event-parse.h> #include "trace-event.h" +#include "machine.h" #include "util.h" /* @@ -19,6 +20,7 @@ * there. */ static struct trace_event tevent; +static bool tevent_initialized; int trace_event__init(struct trace_event *t) { @@ -32,6 +34,31 @@ int trace_event__init(struct trace_event *t) return pevent ? 0 : -1; } +static int trace_event__init2(void) +{ + int be = traceevent_host_bigendian(); + struct pevent *pevent; + + if (trace_event__init(&tevent)) + return -1; + + pevent = tevent.pevent; + pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT); + pevent_set_file_bigendian(pevent, be); + pevent_set_host_bigendian(pevent, be); + tevent_initialized = true; + return 0; +} + +int trace_event__register_resolver(struct machine *machine, + pevent_func_resolver_t *func) +{ + if (!tevent_initialized && trace_event__init2()) + return -1; + + return pevent_set_function_resolver(tevent.pevent, func, machine); +} + void trace_event__cleanup(struct trace_event *t) { traceevent_unload_plugins(t->plugin_list, t->pevent); @@ -62,21 +89,8 @@ tp_format(const char *sys, const char *name) struct event_format* trace_event__tp_format(const char *sys, const char *name) { - static bool initialized; - - if (!initialized) { - int be = traceevent_host_bigendian(); - struct pevent *pevent; - - if (trace_event__init(&tevent)) - return NULL; - - pevent = tevent.pevent; - pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT); - pevent_set_file_bigendian(pevent, be); - pevent_set_host_bigendian(pevent, be); - initialized = true; - } + if (!tevent_initialized && trace_event__init2()) + return NULL; return tp_format(sys, name); } diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index d5168f0be4ec..da6cc4cc2a4f 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -18,6 +18,8 @@ struct trace_event { int trace_event__init(struct trace_event *t); void trace_event__cleanup(struct trace_event *t); +int trace_event__register_resolver(struct machine *machine, + pevent_func_resolver_t *func); struct event_format* trace_event__tp_format(const char *sys, const char *name); diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index edc2d633b332..7acafb3c5592 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -34,6 +34,7 @@ bool test_attr__enabled; bool perf_host = true; bool perf_guest = false; +char tracing_path[PATH_MAX + 1] = "/sys/kernel/debug/tracing"; char tracing_events_path[PATH_MAX + 1] = "/sys/kernel/debug/tracing/events"; void event_attr_init(struct perf_event_attr *attr) @@ -391,6 +392,8 @@ void set_term_quiet_input(struct termios *old) static void set_tracing_events_path(const char *tracing, const char *mountpoint) { + snprintf(tracing_path, sizeof(tracing_path), "%s/%s", + mountpoint, tracing); snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s%s", mountpoint, tracing, "events"); } @@ -436,66 +439,14 @@ const char *perf_debugfs_mount(const char *mountpoint) void perf_debugfs_set_path(const char *mntpt) { - snprintf(debugfs_mountpoint, strlen(debugfs_mountpoint), "%s", mntpt); set_tracing_events_path("tracing/", mntpt); } -static const char *find_tracefs(void) -{ - const char *path = __perf_tracefs_mount(NULL); - - return path; -} - -static const char *find_debugfs(void) -{ - const char *path = __perf_debugfs_mount(NULL); - - if (!path) - fprintf(stderr, "Your kernel does not support the debugfs filesystem"); - - return path; -} - -/* - * Finds the path to the debugfs/tracing - * Allocates the string and stores it. - */ -const char *find_tracing_dir(void) -{ - const char *tracing_dir = ""; - static char *tracing; - static int tracing_found; - const char *debugfs; - - if (tracing_found) - return tracing; - - debugfs = find_tracefs(); - if (!debugfs) { - tracing_dir = "/tracing"; - debugfs = find_debugfs(); - if (!debugfs) - return NULL; - } - - if (asprintf(&tracing, "%s%s", debugfs, tracing_dir) < 0) - return NULL; - - tracing_found = 1; - return tracing; -} - char *get_tracing_file(const char *name) { - const char *tracing; char *file; - tracing = find_tracing_dir(); - if (!tracing) - return NULL; - - if (asprintf(&file, "%s/%s", tracing, name) < 0) + if (asprintf(&file, "%s/%s", tracing_path, name) < 0) return NULL; return file; @@ -566,6 +517,96 @@ unsigned long parse_tag_value(const char *str, struct parse_tag *tags) return (unsigned long) -1; } +int get_stack_size(const char *str, unsigned long *_size) +{ + char *endptr; + unsigned long size; + unsigned long max_size = round_down(USHRT_MAX, sizeof(u64)); + + size = strtoul(str, &endptr, 0); + + do { + if (*endptr) + break; + + size = round_up(size, sizeof(u64)); + if (!size || size > max_size) + break; + + *_size = size; + return 0; + + } while (0); + + pr_err("callchain: Incorrect stack dump size (max %ld): %s\n", + max_size, str); + return -1; +} + +int parse_callchain_record(const char *arg, struct callchain_param *param) +{ + char *tok, *name, *saveptr = NULL; + char *buf; + int ret = -1; + + /* We need buffer that we know we can write to. */ + buf = malloc(strlen(arg) + 1); + if (!buf) + return -ENOMEM; + + strcpy(buf, arg); + + tok = strtok_r((char *)buf, ",", &saveptr); + name = tok ? : (char *)buf; + + do { + /* Framepointer style */ + if (!strncmp(name, "fp", sizeof("fp"))) { + if (!strtok_r(NULL, ",", &saveptr)) { + param->record_mode = CALLCHAIN_FP; + ret = 0; + } else + pr_err("callchain: No more arguments " + "needed for --call-graph fp\n"); + break; + +#ifdef HAVE_DWARF_UNWIND_SUPPORT + /* Dwarf style */ + } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) { + const unsigned long default_stack_dump_size = 8192; + + ret = 0; + param->record_mode = CALLCHAIN_DWARF; + param->dump_size = default_stack_dump_size; + + tok = strtok_r(NULL, ",", &saveptr); + if (tok) { + unsigned long size = 0; + + ret = get_stack_size(tok, &size); + param->dump_size = size; + } +#endif /* HAVE_DWARF_UNWIND_SUPPORT */ + } else if (!strncmp(name, "lbr", sizeof("lbr"))) { + if (!strtok_r(NULL, ",", &saveptr)) { + param->record_mode = CALLCHAIN_LBR; + ret = 0; + } else + pr_err("callchain: No more arguments " + "needed for --call-graph lbr\n"); + break; + } else { + pr_err("callchain: Unknown --call-graph option " + "value: %s\n", arg); + break; + } + + } while (0); + + free(buf); + return ret; +} + int filename__read_str(const char *filename, char **buf, size_t *sizep) { size_t size = 0, alloc_size = 0; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 8bce58b47a82..291be1d84bc3 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -83,10 +83,10 @@ extern const char *graph_line; extern const char *graph_dotted_line; extern char buildid_dir[]; +extern char tracing_path[]; extern char tracing_events_path[]; extern void perf_debugfs_set_path(const char *mountpoint); const char *perf_debugfs_mount(const char *mountpoint); -const char *find_tracing_dir(void); char *get_tracing_file(const char *name); void put_tracing_file(char *file); @@ -318,6 +318,7 @@ static inline int path__join3(char *bf, size_t size, struct dso; struct symbol; +extern bool srcline_full_filename; char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, bool show_sym); void free_srcline(char *srcline); @@ -339,4 +340,18 @@ int gzip_decompress_to_file(const char *input, int output_fd); int lzma_decompress_to_file(const char *input, int output_fd); #endif +char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints); + +static inline char *asprintf_expr_in_ints(const char *var, size_t nints, int *ints) +{ + return asprintf_expr_inout_ints(var, true, nints, ints); +} + +static inline char *asprintf_expr_not_in_ints(const char *var, size_t nints, int *ints) +{ + return asprintf_expr_inout_ints(var, false, nints, ints); +} + +int get_stack_size(const char *str, unsigned long *_size); + #endif /* GIT_COMPAT_UTIL_H */ diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index 4b89118f158d..44d440da15dc 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -236,18 +236,16 @@ static struct dso *__machine__findnew_compat(struct machine *machine, const char *file_name; struct dso *dso; - pthread_rwlock_wrlock(&machine->dsos.lock); dso = __dsos__find(&machine->dsos, vdso_file->dso_name, true); if (dso) - goto out_unlock; + goto out; file_name = vdso__get_compat_file(vdso_file); if (!file_name) - goto out_unlock; + goto out; dso = __machine__addnew_vdso(machine, vdso_file->dso_name, file_name); -out_unlock: - pthread_rwlock_unlock(&machine->dsos.lock); +out: return dso; } |