diff options
Diffstat (limited to 'tools/perf/util')
63 files changed, 2040 insertions, 430 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build index eb60e613d795..3840e3a87057 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -87,6 +87,7 @@ libperf-y += help-unknown-cmd.o libperf-y += mem-events.o libperf-y += vsprintf.o libperf-y += drv_configs.o +libperf-y += time-utils.o libperf-$(CONFIG_LIBBPF) += bpf-loader.o libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o @@ -120,9 +121,13 @@ libperf-y += demangle-rust.o ifdef CONFIG_JITDUMP libperf-$(CONFIG_LIBELF) += jitdump.o libperf-$(CONFIG_LIBELF) += genelf.o -libperf-$(CONFIG_LIBELF) += genelf_debug.o +libperf-$(CONFIG_DWARF) += genelf_debug.o endif +libperf-y += perf-hooks.o + +libperf-$(CONFIG_CXX) += c++/ + CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" # avoid compiler warnings in 32-bit mode CFLAGS_genelf_debug.o += -Wno-packed diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index aeb5a441bd74..ea7e0de4b9c1 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -18,16 +18,119 @@ #include "annotate.h" #include "evsel.h" #include "block-range.h" +#include "arch/common.h" #include <regex.h> #include <pthread.h> #include <linux/bitops.h> +#include <sys/utsname.h> const char *disassembler_style; const char *objdump_path; static regex_t file_lineno; -static struct ins *ins__find(const char *name); -static int disasm_line__parse(char *line, char **namep, char **rawp); +static struct ins_ops *ins__find(struct arch *arch, const char *name); +static void ins__sort(struct arch *arch); +static int disasm_line__parse(char *line, const char **namep, char **rawp); + +struct arch { + const char *name; + struct ins *instructions; + size_t nr_instructions; + size_t nr_instructions_allocated; + struct ins_ops *(*associate_instruction_ops)(struct arch *arch, const char *name); + bool sorted_instructions; + bool initialized; + void *priv; + int (*init)(struct arch *arch); + struct { + char comment_char; + char skip_functions_char; + } objdump; +}; + +static struct ins_ops call_ops; +static struct ins_ops dec_ops; +static struct ins_ops jump_ops; +static struct ins_ops mov_ops; +static struct ins_ops nop_ops; +static struct ins_ops lock_ops; +static struct ins_ops ret_ops; + +static int arch__grow_instructions(struct arch *arch) +{ + struct ins *new_instructions; + size_t new_nr_allocated; + + if (arch->nr_instructions_allocated == 0 && arch->instructions) + goto grow_from_non_allocated_table; + + new_nr_allocated = arch->nr_instructions_allocated + 128; + new_instructions = realloc(arch->instructions, new_nr_allocated * sizeof(struct ins)); + if (new_instructions == NULL) + return -1; + +out_update_instructions: + arch->instructions = new_instructions; + arch->nr_instructions_allocated = new_nr_allocated; + return 0; + +grow_from_non_allocated_table: + new_nr_allocated = arch->nr_instructions + 128; + new_instructions = calloc(new_nr_allocated, sizeof(struct ins)); + if (new_instructions == NULL) + return -1; + + memcpy(new_instructions, arch->instructions, arch->nr_instructions); + goto out_update_instructions; +} + +static int arch__associate_ins_ops(struct arch* arch, const char *name, struct ins_ops *ops) +{ + struct ins *ins; + + if (arch->nr_instructions == arch->nr_instructions_allocated && + arch__grow_instructions(arch)) + return -1; + + ins = &arch->instructions[arch->nr_instructions]; + ins->name = strdup(name); + if (!ins->name) + return -1; + + ins->ops = ops; + arch->nr_instructions++; + + ins__sort(arch); + return 0; +} + +#include "arch/arm/annotate/instructions.c" +#include "arch/arm64/annotate/instructions.c" +#include "arch/x86/annotate/instructions.c" +#include "arch/powerpc/annotate/instructions.c" + +static struct arch architectures[] = { + { + .name = "arm", + .init = arm__annotate_init, + }, + { + .name = "arm64", + .init = arm64__annotate_init, + }, + { + .name = "x86", + .instructions = x86__instructions, + .nr_instructions = ARRAY_SIZE(x86__instructions), + .objdump = { + .comment_char = '#', + }, + }, + { + .name = "powerpc", + .init = powerpc__annotate_init, + }, +}; static void ins__delete(struct ins_operands *ops) { @@ -54,7 +157,7 @@ int ins__scnprintf(struct ins *ins, char *bf, size_t size, return ins__raw_scnprintf(ins, bf, size, ops); } -static int call__parse(struct ins_operands *ops, struct map *map) +static int call__parse(struct arch *arch, struct ins_operands *ops, struct map *map) { char *endptr, *tok, *name; @@ -66,10 +169,9 @@ static int call__parse(struct ins_operands *ops, struct map *map) name++; -#ifdef __arm__ - if (strchr(name, '+')) + if (arch->objdump.skip_functions_char && + strchr(name, arch->objdump.skip_functions_char)) return -1; -#endif tok = strchr(name, '>'); if (tok == NULL) @@ -118,7 +220,7 @@ bool ins__is_call(const struct ins *ins) return ins->ops == &call_ops; } -static int jump__parse(struct ins_operands *ops, struct map *map __maybe_unused) +static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map *map __maybe_unused) { const char *s = strchr(ops->raw, '+'); @@ -135,6 +237,9 @@ static int jump__parse(struct ins_operands *ops, struct map *map __maybe_unused) static int jump__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops) { + if (!ops->target.addr) + return ins__raw_scnprintf(ins, bf, size, ops); + return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, ops->target.offset); } @@ -173,28 +278,22 @@ static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep) return 0; } -static int lock__parse(struct ins_operands *ops, struct map *map) +static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map *map) { - char *name; - ops->locked.ops = zalloc(sizeof(*ops->locked.ops)); if (ops->locked.ops == NULL) return 0; - if (disasm_line__parse(ops->raw, &name, &ops->locked.ops->raw) < 0) + if (disasm_line__parse(ops->raw, &ops->locked.ins.name, &ops->locked.ops->raw) < 0) goto out_free_ops; - ops->locked.ins = ins__find(name); - free(name); + ops->locked.ins.ops = ins__find(arch, ops->locked.ins.name); - if (ops->locked.ins == NULL) + if (ops->locked.ins.ops == NULL) goto out_free_ops; - if (!ops->locked.ins->ops) - return 0; - - if (ops->locked.ins->ops->parse && - ops->locked.ins->ops->parse(ops->locked.ops, map) < 0) + if (ops->locked.ins.ops->parse && + ops->locked.ins.ops->parse(arch, ops->locked.ops, map) < 0) goto out_free_ops; return 0; @@ -209,19 +308,19 @@ static int lock__scnprintf(struct ins *ins, char *bf, size_t size, { int printed; - if (ops->locked.ins == NULL) + if (ops->locked.ins.ops == NULL) return ins__raw_scnprintf(ins, bf, size, ops); printed = scnprintf(bf, size, "%-6.6s ", ins->name); - return printed + ins__scnprintf(ops->locked.ins, bf + printed, + return printed + ins__scnprintf(&ops->locked.ins, bf + printed, size - printed, ops->locked.ops); } static void lock__delete(struct ins_operands *ops) { - struct ins *ins = ops->locked.ins; + struct ins *ins = &ops->locked.ins; - if (ins && ins->ops->free) + if (ins->ops && ins->ops->free) ins->ops->free(ops->locked.ops); else ins__delete(ops->locked.ops); @@ -237,7 +336,7 @@ static struct ins_ops lock_ops = { .scnprintf = lock__scnprintf, }; -static int mov__parse(struct ins_operands *ops, struct map *map __maybe_unused) +static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map *map __maybe_unused) { char *s = strchr(ops->raw, ','), *target, *comment, prev; @@ -252,11 +351,7 @@ static int mov__parse(struct ins_operands *ops, struct map *map __maybe_unused) return -1; target = ++s; -#ifdef __arm__ - comment = strchr(s, ';'); -#else - comment = strchr(s, '#'); -#endif + comment = strchr(s, arch->objdump.comment_char); if (comment != NULL) s = comment - 1; @@ -304,7 +399,7 @@ static struct ins_ops mov_ops = { .scnprintf = mov__scnprintf, }; -static int dec__parse(struct ins_operands *ops, struct map *map __maybe_unused) +static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map *map __maybe_unused) { char *target, *comment, *s, prev; @@ -321,7 +416,7 @@ static int dec__parse(struct ins_operands *ops, struct map *map __maybe_unused) if (ops->target.raw == NULL) return -1; - comment = strchr(s, '#'); + comment = strchr(s, arch->objdump.comment_char); if (comment == NULL) return 0; @@ -364,99 +459,6 @@ bool ins__is_ret(const struct ins *ins) return ins->ops == &ret_ops; } -static struct ins instructions[] = { - { .name = "add", .ops = &mov_ops, }, - { .name = "addl", .ops = &mov_ops, }, - { .name = "addq", .ops = &mov_ops, }, - { .name = "addw", .ops = &mov_ops, }, - { .name = "and", .ops = &mov_ops, }, -#ifdef __arm__ - { .name = "b", .ops = &jump_ops, }, // might also be a call - { .name = "bcc", .ops = &jump_ops, }, - { .name = "bcs", .ops = &jump_ops, }, - { .name = "beq", .ops = &jump_ops, }, - { .name = "bge", .ops = &jump_ops, }, - { .name = "bgt", .ops = &jump_ops, }, - { .name = "bhi", .ops = &jump_ops, }, - { .name = "bl", .ops = &call_ops, }, - { .name = "bls", .ops = &jump_ops, }, - { .name = "blt", .ops = &jump_ops, }, - { .name = "blx", .ops = &call_ops, }, - { .name = "bne", .ops = &jump_ops, }, -#endif - { .name = "bts", .ops = &mov_ops, }, - { .name = "call", .ops = &call_ops, }, - { .name = "callq", .ops = &call_ops, }, - { .name = "cmp", .ops = &mov_ops, }, - { .name = "cmpb", .ops = &mov_ops, }, - { .name = "cmpl", .ops = &mov_ops, }, - { .name = "cmpq", .ops = &mov_ops, }, - { .name = "cmpw", .ops = &mov_ops, }, - { .name = "cmpxch", .ops = &mov_ops, }, - { .name = "dec", .ops = &dec_ops, }, - { .name = "decl", .ops = &dec_ops, }, - { .name = "imul", .ops = &mov_ops, }, - { .name = "inc", .ops = &dec_ops, }, - { .name = "incl", .ops = &dec_ops, }, - { .name = "ja", .ops = &jump_ops, }, - { .name = "jae", .ops = &jump_ops, }, - { .name = "jb", .ops = &jump_ops, }, - { .name = "jbe", .ops = &jump_ops, }, - { .name = "jc", .ops = &jump_ops, }, - { .name = "jcxz", .ops = &jump_ops, }, - { .name = "je", .ops = &jump_ops, }, - { .name = "jecxz", .ops = &jump_ops, }, - { .name = "jg", .ops = &jump_ops, }, - { .name = "jge", .ops = &jump_ops, }, - { .name = "jl", .ops = &jump_ops, }, - { .name = "jle", .ops = &jump_ops, }, - { .name = "jmp", .ops = &jump_ops, }, - { .name = "jmpq", .ops = &jump_ops, }, - { .name = "jna", .ops = &jump_ops, }, - { .name = "jnae", .ops = &jump_ops, }, - { .name = "jnb", .ops = &jump_ops, }, - { .name = "jnbe", .ops = &jump_ops, }, - { .name = "jnc", .ops = &jump_ops, }, - { .name = "jne", .ops = &jump_ops, }, - { .name = "jng", .ops = &jump_ops, }, - { .name = "jnge", .ops = &jump_ops, }, - { .name = "jnl", .ops = &jump_ops, }, - { .name = "jnle", .ops = &jump_ops, }, - { .name = "jno", .ops = &jump_ops, }, - { .name = "jnp", .ops = &jump_ops, }, - { .name = "jns", .ops = &jump_ops, }, - { .name = "jnz", .ops = &jump_ops, }, - { .name = "jo", .ops = &jump_ops, }, - { .name = "jp", .ops = &jump_ops, }, - { .name = "jpe", .ops = &jump_ops, }, - { .name = "jpo", .ops = &jump_ops, }, - { .name = "jrcxz", .ops = &jump_ops, }, - { .name = "js", .ops = &jump_ops, }, - { .name = "jz", .ops = &jump_ops, }, - { .name = "lea", .ops = &mov_ops, }, - { .name = "lock", .ops = &lock_ops, }, - { .name = "mov", .ops = &mov_ops, }, - { .name = "movb", .ops = &mov_ops, }, - { .name = "movdqa",.ops = &mov_ops, }, - { .name = "movl", .ops = &mov_ops, }, - { .name = "movq", .ops = &mov_ops, }, - { .name = "movslq", .ops = &mov_ops, }, - { .name = "movzbl", .ops = &mov_ops, }, - { .name = "movzwl", .ops = &mov_ops, }, - { .name = "nop", .ops = &nop_ops, }, - { .name = "nopl", .ops = &nop_ops, }, - { .name = "nopw", .ops = &nop_ops, }, - { .name = "or", .ops = &mov_ops, }, - { .name = "orl", .ops = &mov_ops, }, - { .name = "test", .ops = &mov_ops, }, - { .name = "testb", .ops = &mov_ops, }, - { .name = "testl", .ops = &mov_ops, }, - { .name = "xadd", .ops = &mov_ops, }, - { .name = "xbeginl", .ops = &jump_ops, }, - { .name = "xbeginq", .ops = &jump_ops, }, - { .name = "retq", .ops = &ret_ops, }, -}; - static int ins__key_cmp(const void *name, const void *insp) { const struct ins *ins = insp; @@ -472,24 +474,70 @@ static int ins__cmp(const void *a, const void *b) return strcmp(ia->name, ib->name); } -static void ins__sort(void) +static void ins__sort(struct arch *arch) +{ + const int nmemb = arch->nr_instructions; + + qsort(arch->instructions, nmemb, sizeof(struct ins), ins__cmp); +} + +static struct ins_ops *__ins__find(struct arch *arch, const char *name) +{ + struct ins *ins; + const int nmemb = arch->nr_instructions; + + if (!arch->sorted_instructions) { + ins__sort(arch); + arch->sorted_instructions = true; + } + + ins = bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp); + return ins ? ins->ops : NULL; +} + +static struct ins_ops *ins__find(struct arch *arch, const char *name) +{ + struct ins_ops *ops = __ins__find(arch, name); + + if (!ops && arch->associate_instruction_ops) + ops = arch->associate_instruction_ops(arch, name); + + return ops; +} + +static int arch__key_cmp(const void *name, const void *archp) +{ + const struct arch *arch = archp; + + return strcmp(name, arch->name); +} + +static int arch__cmp(const void *a, const void *b) { - const int nmemb = ARRAY_SIZE(instructions); + const struct arch *aa = a; + const struct arch *ab = b; - qsort(instructions, nmemb, sizeof(struct ins), ins__cmp); + return strcmp(aa->name, ab->name); } -static struct ins *ins__find(const char *name) +static void arch__sort(void) { - const int nmemb = ARRAY_SIZE(instructions); + const int nmemb = ARRAY_SIZE(architectures); + + qsort(architectures, nmemb, sizeof(struct arch), arch__cmp); +} + +static struct arch *arch__find(const char *name) +{ + const int nmemb = ARRAY_SIZE(architectures); static bool sorted; if (!sorted) { - ins__sort(); + arch__sort(); sorted = true; } - return bsearch(name, instructions, nmemb, sizeof(struct ins), ins__key_cmp); + return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key_cmp); } int symbol__alloc_hist(struct symbol *sym) @@ -709,21 +757,18 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 ip) return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip); } -static void disasm_line__init_ins(struct disasm_line *dl, struct map *map) +static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map *map) { - dl->ins = ins__find(dl->name); + dl->ins.ops = ins__find(arch, dl->ins.name); - if (dl->ins == NULL) + if (!dl->ins.ops) return; - if (!dl->ins->ops) - return; - - if (dl->ins->ops->parse && dl->ins->ops->parse(&dl->ops, map) < 0) - dl->ins = NULL; + if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, map) < 0) + dl->ins.ops = NULL; } -static int disasm_line__parse(char *line, char **namep, char **rawp) +static int disasm_line__parse(char *line, const char **namep, char **rawp) { char *name = line, tmp; @@ -756,12 +801,14 @@ static int disasm_line__parse(char *line, char **namep, char **rawp) return 0; out_free_name: - zfree(namep); + free((void *)namep); + *namep = NULL; return -1; } static struct disasm_line *disasm_line__new(s64 offset, char *line, size_t privsize, int line_nr, + struct arch *arch, struct map *map) { struct disasm_line *dl = zalloc(sizeof(*dl) + privsize); @@ -774,10 +821,10 @@ static struct disasm_line *disasm_line__new(s64 offset, char *line, goto out_delete; if (offset != -1) { - if (disasm_line__parse(dl->line, &dl->name, &dl->ops.raw) < 0) + if (disasm_line__parse(dl->line, &dl->ins.name, &dl->ops.raw) < 0) goto out_free_line; - disasm_line__init_ins(dl, map); + disasm_line__init_ins(dl, arch, map); } } @@ -793,20 +840,21 @@ out_delete: void disasm_line__free(struct disasm_line *dl) { zfree(&dl->line); - zfree(&dl->name); - if (dl->ins && dl->ins->ops->free) - dl->ins->ops->free(&dl->ops); + if (dl->ins.ops && dl->ins.ops->free) + dl->ins.ops->free(&dl->ops); else ins__delete(&dl->ops); + free((void *)dl->ins.name); + dl->ins.name = NULL; free(dl); } int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw) { - if (raw || !dl->ins) - return scnprintf(bf, size, "%-6.6s %s", dl->name, dl->ops.raw); + if (raw || !dl->ins.ops) + return scnprintf(bf, size, "%-6.6s %s", dl->ins.name, dl->ops.raw); - return ins__scnprintf(dl->ins, bf, size, &dl->ops); + return ins__scnprintf(&dl->ins, bf, size, &dl->ops); } static void disasm__add(struct list_head *head, struct disasm_line *line) @@ -1087,6 +1135,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st * The ops.raw part will be parsed further according to type of the instruction. */ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, + struct arch *arch, FILE *file, size_t privsize, int *line_nr) { @@ -1149,7 +1198,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, parsed_line = tmp2 + 1; } - dl = disasm_line__new(offset, parsed_line, privsize, *line_nr, map); + dl = disasm_line__new(offset, parsed_line, privsize, *line_nr, arch, map); free(line); (*line_nr)++; @@ -1161,7 +1210,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, map__rip_2objdump(map, sym->start); /* kcore has no symbols, so add the call target name */ - if (dl->ins && ins__is_call(dl->ins) && !dl->ops.target.name) { + if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.name) { struct addr_map_symbol target = { .map = map, .addr = dl->ops.target.addr, @@ -1191,8 +1240,8 @@ static void delete_last_nop(struct symbol *sym) while (!list_empty(list)) { dl = list_entry(list->prev, struct disasm_line, node); - if (dl->ins && dl->ins->ops) { - if (dl->ins->ops != &nop_ops) + if (dl->ins.ops) { + if (dl->ins.ops != &nop_ops) return; } else { if (!strstr(dl->line, " nop ") && @@ -1280,10 +1329,23 @@ fallback: return 0; } -int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize) +static const char *annotate__norm_arch(const char *arch_name) +{ + struct utsname uts; + + if (!arch_name) { /* Assume we are annotating locally. */ + if (uname(&uts) < 0) + return NULL; + arch_name = uts.machine; + } + return normalize_arch((char *)arch_name); +} + +int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize) { struct dso *dso = map->dso; char command[PATH_MAX * 2]; + struct arch *arch = NULL; FILE *file; char symfs_filename[PATH_MAX]; struct kcore_extract kce; @@ -1297,6 +1359,22 @@ int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize) if (err) return err; + arch_name = annotate__norm_arch(arch_name); + if (!arch_name) + return -1; + + arch = arch__find(arch_name); + if (arch == NULL) + return -ENOTSUP; + + if (arch->init) { + err = arch->init(arch); + if (err) { + pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name); + return err; + } + } + pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, symfs_filename, sym->name, map->unmap_ip(map, sym->start), map->unmap_ip(map, sym->end)); @@ -1395,7 +1473,7 @@ int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize) nline = 0; while (!feof(file)) { - if (symbol__parse_objdump_line(sym, map, file, privsize, + if (symbol__parse_objdump_line(sym, map, arch, file, privsize, &lineno) < 0) break; nline++; @@ -1764,7 +1842,7 @@ static size_t disasm_line__fprintf(struct disasm_line *dl, FILE *fp) if (dl->offset == -1) return fprintf(fp, "%s\n", dl->line); - printed = fprintf(fp, "%#" PRIx64 " %s", dl->offset, dl->name); + printed = fprintf(fp, "%#" PRIx64 " %s", dl->offset, dl->ins.name); if (dl->ops.raw[0] != '\0') { printed += fprintf(fp, "%.*s %s\n", 6 - (int)printed, " ", @@ -1793,7 +1871,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, struct rb_root source_line = RB_ROOT; u64 len; - if (symbol__disassemble(sym, map, 0) < 0) + if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), 0) < 0) return -1; len = symbol__size(sym); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 5bbcec173b82..87e4cadc5d27 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -11,7 +11,12 @@ #include <linux/rbtree.h> #include <pthread.h> -struct ins; +struct ins_ops; + +struct ins { + const char *name; + struct ins_ops *ops; +}; struct ins_operands { char *raw; @@ -28,24 +33,21 @@ struct ins_operands { u64 addr; } source; struct { - struct ins *ins; + struct ins ins; struct ins_operands *ops; } locked; }; }; +struct arch; + struct ins_ops { void (*free)(struct ins_operands *ops); - int (*parse)(struct ins_operands *ops, struct map *map); + int (*parse)(struct arch *arch, struct ins_operands *ops, struct map *map); int (*scnprintf)(struct ins *ins, char *bf, size_t size, struct ins_operands *ops); }; -struct ins { - const char *name; - struct ins_ops *ops; -}; - bool ins__is_jump(const struct ins *ins); bool ins__is_call(const struct ins *ins); bool ins__is_ret(const struct ins *ins); @@ -57,8 +59,7 @@ struct disasm_line { struct list_head node; s64 offset; char *line; - char *name; - struct ins *ins; + struct ins ins; int line_nr; float ipc; u64 cycles; @@ -156,7 +157,7 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr); int symbol__alloc_hist(struct symbol *sym); void symbol__annotate_zero_histograms(struct symbol *sym); -int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize); +int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize); enum symbol_disassemble_errno { SYMBOL_ANNOTATE_ERRNO__SUCCESS = 0, diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 2b2c9b82f5ab..36c861103291 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -14,11 +14,11 @@ #include "debug.h" #include "bpf-loader.h" #include "bpf-prologue.h" -#include "llvm-utils.h" #include "probe-event.h" #include "probe-finder.h" // for MAX_PROBES #include "parse-events.h" #include "llvm-utils.h" +#include "c++/clang-c.h" #define DEFINE_PRINT_FN(name, level) \ static int libbpf_##name(const char *fmt, ...) \ @@ -86,10 +86,21 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source) void *obj_buf; size_t obj_buf_sz; - err = llvm__compile_bpf(filename, &obj_buf, &obj_buf_sz); - if (err) - return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE); + perf_clang__init(); + err = perf_clang__compile_bpf(filename, &obj_buf, &obj_buf_sz); + perf_clang__cleanup(); + if (err) { + pr_warning("bpf: builtin compilation failed: %d, try external compiler\n", err); + err = llvm__compile_bpf(filename, &obj_buf, &obj_buf_sz); + if (err) + return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE); + } else + pr_debug("bpf: successfull builtin compilation\n"); obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, filename); + + if (!IS_ERR(obj) && llvm_param.dump_obj) + llvm__dump_obj(filename, obj_buf, obj_buf_sz); + free(obj_buf); } else obj = bpf_object__open(filename); @@ -241,7 +252,7 @@ parse_prog_config_kvpair(const char *config_str, struct perf_probe_event *pev) int err = 0; if (!text) { - pr_debug("No enough memory: dup config_str failed\n"); + pr_debug("Not enough memory: dup config_str failed\n"); return ERR_PTR(-ENOMEM); } @@ -531,7 +542,7 @@ static int map_prologue(struct perf_probe_event *pev, int *mapping, ptevs = malloc(array_sz); if (!ptevs) { - pr_debug("No enough memory: alloc ptevs failed\n"); + pr_debug("Not enough memory: alloc ptevs failed\n"); return -ENOMEM; } @@ -604,13 +615,13 @@ static int hook_load_preprocessor(struct bpf_program *prog) priv->need_prologue = true; priv->insns_buf = malloc(sizeof(struct bpf_insn) * BPF_MAXINSNS); if (!priv->insns_buf) { - pr_debug("No enough memory: alloc insns_buf failed\n"); + pr_debug("Not enough memory: alloc insns_buf failed\n"); return -ENOMEM; } priv->type_mapping = malloc(sizeof(int) * pev->ntevs); if (!priv->type_mapping) { - pr_debug("No enough memory: alloc type_mapping failed\n"); + pr_debug("Not enough memory: alloc type_mapping failed\n"); return -ENOMEM; } memset(priv->type_mapping, -1, @@ -864,7 +875,7 @@ bpf_map_op_setkey(struct bpf_map_op *op, struct parse_events_term *term) op->k.array.ranges = memdup(term->array.ranges, memsz); if (!op->k.array.ranges) { - pr_debug("No enough memory to alloc indices for map\n"); + pr_debug("Not enough memory to alloc indices for map\n"); return -ENOMEM; } op->key_type = BPF_MAP_KEY_RANGES; @@ -929,7 +940,7 @@ bpf_map_priv__clone(struct bpf_map_priv *priv) newpriv = zalloc(sizeof(*newpriv)); if (!newpriv) { - pr_debug("No enough memory to alloc map private\n"); + pr_debug("Not enough memory to alloc map private\n"); return NULL; } INIT_LIST_HEAD(&newpriv->ops_list); @@ -960,7 +971,7 @@ bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op) if (!priv) { priv = zalloc(sizeof(*priv)); if (!priv) { - pr_debug("No enough memory to alloc map private\n"); + pr_debug("Not enough memory to alloc map private\n"); return -ENOMEM; } INIT_LIST_HEAD(&priv->ops_list); diff --git a/tools/perf/util/c++/Build b/tools/perf/util/c++/Build new file mode 100644 index 000000000000..988fef1b11d7 --- /dev/null +++ b/tools/perf/util/c++/Build @@ -0,0 +1,2 @@ +libperf-$(CONFIG_CLANGLLVM) += clang.o +libperf-$(CONFIG_CLANGLLVM) += clang-test.o diff --git a/tools/perf/util/c++/clang-c.h b/tools/perf/util/c++/clang-c.h new file mode 100644 index 000000000000..0eadd792ab1f --- /dev/null +++ b/tools/perf/util/c++/clang-c.h @@ -0,0 +1,43 @@ +#ifndef PERF_UTIL_CLANG_C_H +#define PERF_UTIL_CLANG_C_H + +#include <stddef.h> /* for size_t */ +#include <util-cxx.h> /* for __maybe_unused */ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef HAVE_LIBCLANGLLVM_SUPPORT +extern void perf_clang__init(void); +extern void perf_clang__cleanup(void); + +extern int test__clang_to_IR(void); +extern int test__clang_to_obj(void); + +extern int perf_clang__compile_bpf(const char *filename, + void **p_obj_buf, + size_t *p_obj_buf_sz); +#else + + +static inline void perf_clang__init(void) { } +static inline void perf_clang__cleanup(void) { } + +static inline int test__clang_to_IR(void) { return -1; } +static inline int test__clang_to_obj(void) { return -1;} + +static inline int +perf_clang__compile_bpf(const char *filename __maybe_unused, + void **p_obj_buf __maybe_unused, + size_t *p_obj_buf_sz __maybe_unused) +{ + return -ENOTSUP; +} + +#endif + +#ifdef __cplusplus +} +#endif +#endif diff --git a/tools/perf/util/c++/clang-test.cpp b/tools/perf/util/c++/clang-test.cpp new file mode 100644 index 000000000000..9b11e8c82798 --- /dev/null +++ b/tools/perf/util/c++/clang-test.cpp @@ -0,0 +1,62 @@ +#include "clang.h" +#include "clang-c.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" + +#include <util-cxx.h> +#include <tests/llvm.h> +#include <string> + +class perf_clang_scope { +public: + explicit perf_clang_scope() {perf_clang__init();} + ~perf_clang_scope() {perf_clang__cleanup();} +}; + +static std::unique_ptr<llvm::Module> +__test__clang_to_IR(void) +{ + unsigned int kernel_version; + + if (fetch_kernel_version(&kernel_version, NULL, 0)) + return std::unique_ptr<llvm::Module>(nullptr); + + std::string cflag_kver("-DLINUX_VERSION_CODE=" + + std::to_string(kernel_version)); + + std::unique_ptr<llvm::Module> M = + perf::getModuleFromSource({cflag_kver.c_str()}, + "perf-test.c", + test_llvm__bpf_base_prog); + return M; +} + +extern "C" { +int test__clang_to_IR(void) +{ + perf_clang_scope _scope; + + auto M = __test__clang_to_IR(); + if (!M) + return -1; + for (llvm::Function& F : *M) + if (F.getName() == "bpf_func__SyS_epoll_wait") + return 0; + return -1; +} + +int test__clang_to_obj(void) +{ + perf_clang_scope _scope; + + auto M = __test__clang_to_IR(); + if (!M) + return -1; + + auto Buffer = perf::getBPFObjectFromModule(&*M); + if (!Buffer) + return -1; + return 0; +} + +} diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp new file mode 100644 index 000000000000..1e974152cac2 --- /dev/null +++ b/tools/perf/util/c++/clang.cpp @@ -0,0 +1,195 @@ +/* + * llvm C frontend for perf. Support dynamically compile C file + * + * Inspired by clang example code: + * http://llvm.org/svn/llvm-project/cfe/trunk/examples/clang-interpreter/main.cpp + * + * Copyright (C) 2016 Wang Nan <wangnan0@huawei.com> + * Copyright (C) 2016 Huawei Inc. + */ + +#include "clang/CodeGen/CodeGenAction.h" +#include "clang/Frontend/CompilerInvocation.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/TextDiagnosticPrinter.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include <memory> + +#include "clang.h" +#include "clang-c.h" + +namespace perf { + +static std::unique_ptr<llvm::LLVMContext> LLVMCtx; + +using namespace clang; + +static CompilerInvocation * +createCompilerInvocation(llvm::opt::ArgStringList CFlags, StringRef& Path, + DiagnosticsEngine& Diags) +{ + llvm::opt::ArgStringList CCArgs { + "-cc1", + "-triple", "bpf-pc-linux", + "-fsyntax-only", + "-ferror-limit", "19", + "-fmessage-length", "127", + "-O2", + "-nostdsysteminc", + "-nobuiltininc", + "-vectorize-loops", + "-vectorize-slp", + "-Wno-unused-value", + "-Wno-pointer-sign", + "-x", "c"}; + + CCArgs.append(CFlags.begin(), CFlags.end()); + CompilerInvocation *CI = tooling::newInvocation(&Diags, CCArgs); + + FrontendOptions& Opts = CI->getFrontendOpts(); + Opts.Inputs.clear(); + Opts.Inputs.emplace_back(Path, IK_C); + return CI; +} + +static std::unique_ptr<llvm::Module> +getModuleFromSource(llvm::opt::ArgStringList CFlags, + StringRef Path, IntrusiveRefCntPtr<vfs::FileSystem> VFS) +{ + CompilerInstance Clang; + Clang.createDiagnostics(); + + Clang.setVirtualFileSystem(&*VFS); + + IntrusiveRefCntPtr<CompilerInvocation> CI = + createCompilerInvocation(std::move(CFlags), Path, + Clang.getDiagnostics()); + Clang.setInvocation(&*CI); + + std::unique_ptr<CodeGenAction> Act(new EmitLLVMOnlyAction(&*LLVMCtx)); + if (!Clang.ExecuteAction(*Act)) + return std::unique_ptr<llvm::Module>(nullptr); + + return Act->takeModule(); +} + +std::unique_ptr<llvm::Module> +getModuleFromSource(llvm::opt::ArgStringList CFlags, + StringRef Name, StringRef Content) +{ + using namespace vfs; + + llvm::IntrusiveRefCntPtr<OverlayFileSystem> OverlayFS( + new OverlayFileSystem(getRealFileSystem())); + llvm::IntrusiveRefCntPtr<InMemoryFileSystem> MemFS( + new InMemoryFileSystem(true)); + + /* + * pushOverlay helps setting working dir for MemFS. Must call + * before addFile. + */ + OverlayFS->pushOverlay(MemFS); + MemFS->addFile(Twine(Name), 0, llvm::MemoryBuffer::getMemBuffer(Content)); + + return getModuleFromSource(std::move(CFlags), Name, OverlayFS); +} + +std::unique_ptr<llvm::Module> +getModuleFromSource(llvm::opt::ArgStringList CFlags, StringRef Path) +{ + IntrusiveRefCntPtr<vfs::FileSystem> VFS(vfs::getRealFileSystem()); + return getModuleFromSource(std::move(CFlags), Path, VFS); +} + +std::unique_ptr<llvm::SmallVectorImpl<char>> +getBPFObjectFromModule(llvm::Module *Module) +{ + using namespace llvm; + + std::string TargetTriple("bpf-pc-linux"); + std::string Error; + const Target* Target = TargetRegistry::lookupTarget(TargetTriple, Error); + if (!Target) { + llvm::errs() << Error; + return std::unique_ptr<llvm::SmallVectorImpl<char>>(nullptr); + } + + llvm::TargetOptions Opt; + TargetMachine *TargetMachine = + Target->createTargetMachine(TargetTriple, + "generic", "", + Opt, Reloc::Static); + + Module->setDataLayout(TargetMachine->createDataLayout()); + Module->setTargetTriple(TargetTriple); + + std::unique_ptr<SmallVectorImpl<char>> Buffer(new SmallVector<char, 0>()); + raw_svector_ostream ostream(*Buffer); + + legacy::PassManager PM; + if (TargetMachine->addPassesToEmitFile(PM, ostream, + TargetMachine::CGFT_ObjectFile)) { + llvm::errs() << "TargetMachine can't emit a file of this type\n"; + return std::unique_ptr<llvm::SmallVectorImpl<char>>(nullptr);; + } + PM.run(*Module); + + return std::move(Buffer); +} + +} + +extern "C" { +void perf_clang__init(void) +{ + perf::LLVMCtx.reset(new llvm::LLVMContext()); + LLVMInitializeBPFTargetInfo(); + LLVMInitializeBPFTarget(); + LLVMInitializeBPFTargetMC(); + LLVMInitializeBPFAsmPrinter(); +} + +void perf_clang__cleanup(void) +{ + perf::LLVMCtx.reset(nullptr); + llvm::llvm_shutdown(); +} + +int perf_clang__compile_bpf(const char *filename, + void **p_obj_buf, + size_t *p_obj_buf_sz) +{ + using namespace perf; + + if (!p_obj_buf || !p_obj_buf_sz) + return -EINVAL; + + llvm::opt::ArgStringList CFlags; + auto M = getModuleFromSource(std::move(CFlags), filename); + if (!M) + return -EINVAL; + auto O = getBPFObjectFromModule(&*M); + if (!O) + return -EINVAL; + + size_t size = O->size_in_bytes(); + void *buffer; + + buffer = malloc(size); + if (!buffer) + return -ENOMEM; + memcpy(buffer, O->data(), size); + *p_obj_buf = buffer; + *p_obj_buf_sz = size; + return 0; +} +} diff --git a/tools/perf/util/c++/clang.h b/tools/perf/util/c++/clang.h new file mode 100644 index 000000000000..dd8b0427550d --- /dev/null +++ b/tools/perf/util/c++/clang.h @@ -0,0 +1,26 @@ +#ifndef PERF_UTIL_CLANG_H +#define PERF_UTIL_CLANG_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Option/Option.h" +#include <memory> + +namespace perf { + +using namespace llvm; + +std::unique_ptr<Module> +getModuleFromSource(opt::ArgStringList CFlags, + StringRef Name, StringRef Content); + +std::unique_ptr<Module> +getModuleFromSource(opt::ArgStringList CFlags, + StringRef Path); + +std::unique_ptr<llvm::SmallVectorImpl<char>> +getBPFObjectFromModule(llvm::Module *Module); + +} +#endif diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 07fd30bc2f81..42922512c1c6 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -193,7 +193,6 @@ int perf_callchain_config(const char *var, const char *value) if (!strcmp(var, "record-mode")) return parse_callchain_record_opt(value, &callchain_param); -#ifdef HAVE_DWARF_UNWIND_SUPPORT if (!strcmp(var, "dump-size")) { unsigned long size = 0; int ret; @@ -203,7 +202,6 @@ int perf_callchain_config(const char *var, const char *value) return ret; } -#endif if (!strcmp(var, "print-type")) return parse_callchain_mode(value); if (!strcmp(var, "order")) @@ -440,6 +438,21 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) call->ip = cursor_node->ip; call->ms.sym = cursor_node->sym; call->ms.map = cursor_node->map; + + if (cursor_node->branch) { + call->branch_count = 1; + + if (cursor_node->branch_flags.predicted) + call->predicted_count = 1; + + if (cursor_node->branch_flags.abort) + call->abort_count = 1; + + call->cycles_count = cursor_node->branch_flags.cycles; + call->iter_count = cursor_node->nr_loop_iter; + call->samples_count = cursor_node->samples; + } + list_add_tail(&call->list, &node->val); callchain_cursor_advance(cursor); @@ -499,8 +512,23 @@ static enum match_result match_chain(struct callchain_cursor_node *node, right = node->ip; } - if (left == right) + if (left == right) { + if (node->branch) { + cnode->branch_count++; + + if (node->branch_flags.predicted) + cnode->predicted_count++; + + if (node->branch_flags.abort) + cnode->abort_count++; + + cnode->cycles_count += node->branch_flags.cycles; + cnode->iter_count += node->nr_loop_iter; + cnode->samples_count += node->samples; + } + return MATCH_EQ; + } return left > right ? MATCH_GT : MATCH_LT; } @@ -730,7 +758,8 @@ merge_chain_branch(struct callchain_cursor *cursor, list_for_each_entry_safe(list, next_list, &src->val, list) { callchain_cursor_append(cursor, list->ip, - list->ms.map, list->ms.sym); + list->ms.map, list->ms.sym, + false, NULL, 0, 0); list_del(&list->list); free(list); } @@ -767,7 +796,9 @@ int callchain_merge(struct callchain_cursor *cursor, } int callchain_cursor_append(struct callchain_cursor *cursor, - u64 ip, struct map *map, struct symbol *sym) + u64 ip, struct map *map, struct symbol *sym, + bool branch, struct branch_flags *flags, + int nr_loop_iter, int samples) { struct callchain_cursor_node *node = *cursor->last; @@ -782,6 +813,13 @@ int callchain_cursor_append(struct callchain_cursor *cursor, node->ip = ip; node->map = map; node->sym = sym; + node->branch = branch; + node->nr_loop_iter = nr_loop_iter; + node->samples = samples; + + if (flags) + memcpy(&node->branch_flags, flags, + sizeof(struct branch_flags)); cursor->nr++; @@ -939,6 +977,163 @@ int callchain_node__fprintf_value(struct callchain_node *node, return 0; } +static void callchain_counts_value(struct callchain_node *node, + u64 *branch_count, u64 *predicted_count, + u64 *abort_count, u64 *cycles_count) +{ + struct callchain_list *clist; + + list_for_each_entry(clist, &node->val, list) { + if (branch_count) + *branch_count += clist->branch_count; + + if (predicted_count) + *predicted_count += clist->predicted_count; + + if (abort_count) + *abort_count += clist->abort_count; + + if (cycles_count) + *cycles_count += clist->cycles_count; + } +} + +static int callchain_node_branch_counts_cumul(struct callchain_node *node, + u64 *branch_count, + u64 *predicted_count, + u64 *abort_count, + u64 *cycles_count) +{ + struct callchain_node *child; + struct rb_node *n; + + n = rb_first(&node->rb_root_in); + while (n) { + child = rb_entry(n, struct callchain_node, rb_node_in); + n = rb_next(n); + + callchain_node_branch_counts_cumul(child, branch_count, + predicted_count, + abort_count, + cycles_count); + + callchain_counts_value(child, branch_count, + predicted_count, abort_count, + cycles_count); + } + + return 0; +} + +int callchain_branch_counts(struct callchain_root *root, + u64 *branch_count, u64 *predicted_count, + u64 *abort_count, u64 *cycles_count) +{ + if (branch_count) + *branch_count = 0; + + if (predicted_count) + *predicted_count = 0; + + if (abort_count) + *abort_count = 0; + + if (cycles_count) + *cycles_count = 0; + + return callchain_node_branch_counts_cumul(&root->node, + branch_count, + predicted_count, + abort_count, + cycles_count); +} + +static int callchain_counts_printf(FILE *fp, char *bf, int bfsize, + u64 branch_count, u64 predicted_count, + u64 abort_count, u64 cycles_count, + u64 iter_count, u64 samples_count) +{ + double predicted_percent = 0.0; + const char *null_str = ""; + char iter_str[32]; + char *str; + u64 cycles = 0; + + if (branch_count == 0) { + if (fp) + return fprintf(fp, " (calltrace)"); + + return scnprintf(bf, bfsize, " (calltrace)"); + } + + if (iter_count && samples_count) { + scnprintf(iter_str, sizeof(iter_str), + ", iterations:%" PRId64 "", + iter_count / samples_count); + str = iter_str; + } else + str = (char *)null_str; + + predicted_percent = predicted_count * 100.0 / branch_count; + cycles = cycles_count / branch_count; + + if ((predicted_percent >= 100.0) && (abort_count == 0)) { + if (fp) + return fprintf(fp, " (cycles:%" PRId64 "%s)", + cycles, str); + + return scnprintf(bf, bfsize, " (cycles:%" PRId64 "%s)", + cycles, str); + } + + if ((predicted_percent < 100.0) && (abort_count == 0)) { + if (fp) + return fprintf(fp, + " (predicted:%.1f%%, cycles:%" PRId64 "%s)", + predicted_percent, cycles, str); + + return scnprintf(bf, bfsize, + " (predicted:%.1f%%, cycles:%" PRId64 "%s)", + predicted_percent, cycles, str); + } + + if (fp) + return fprintf(fp, + " (predicted:%.1f%%, abort:%" PRId64 ", cycles:%" PRId64 "%s)", + predicted_percent, abort_count, cycles, str); + + return scnprintf(bf, bfsize, + " (predicted:%.1f%%, abort:%" PRId64 ", cycles:%" PRId64 "%s)", + predicted_percent, abort_count, cycles, str); +} + +int callchain_list_counts__printf_value(struct callchain_node *node, + struct callchain_list *clist, + FILE *fp, char *bf, int bfsize) +{ + u64 branch_count, predicted_count; + u64 abort_count, cycles_count; + u64 iter_count = 0, samples_count = 0; + + branch_count = clist->branch_count; + predicted_count = clist->predicted_count; + abort_count = clist->abort_count; + cycles_count = clist->cycles_count; + + if (node) { + struct callchain_list *call; + + list_for_each_entry(call, &node->val, list) { + iter_count += call->iter_count; + samples_count += call->samples_count; + } + } + + return callchain_counts_printf(fp, bf, bfsize, branch_count, + predicted_count, abort_count, + cycles_count, iter_count, samples_count); +} + static void free_callchain_node(struct callchain_node *node) { struct callchain_list *list, *tmp; @@ -1039,3 +1234,30 @@ out: } return -ENOMEM; } + +int callchain_cursor__copy(struct callchain_cursor *dst, + struct callchain_cursor *src) +{ + int rc = 0; + + callchain_cursor_reset(dst); + callchain_cursor_commit(src); + + while (true) { + struct callchain_cursor_node *node; + + node = callchain_cursor_current(src); + if (node == NULL) + break; + + rc = callchain_cursor_append(dst, node->ip, node->map, node->sym, + node->branch, &node->branch_flags, + node->nr_loop_iter, node->samples); + if (rc) + break; + + callchain_cursor_advance(src); + } + + return rc; +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 13e75549c440..35c8e379530f 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -11,11 +11,7 @@ #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace):\n\n" -#ifdef HAVE_DWARF_UNWIND_SUPPORT # define RECORD_MODE_HELP HELP_PAD "record_mode:\tcall graph recording mode (fp|dwarf|lbr)\n" -#else -# define RECORD_MODE_HELP HELP_PAD "record_mode:\tcall graph recording mode (fp|lbr)\n" -#endif #define RECORD_SIZE_HELP \ HELP_PAD "record_size:\tif record_mode is 'dwarf', max size of stack recording (<bytes>)\n" \ @@ -115,6 +111,12 @@ struct callchain_list { bool unfolded; bool has_children; }; + u64 branch_count; + u64 predicted_count; + u64 abort_count; + u64 cycles_count; + u64 iter_count; + u64 samples_count; char *srcline; struct list_head list; }; @@ -129,6 +131,10 @@ struct callchain_cursor_node { u64 ip; struct map *map; struct symbol *sym; + bool branch; + struct branch_flags branch_flags; + int nr_loop_iter; + int samples; struct callchain_cursor_node *next; }; @@ -183,7 +189,9 @@ static inline void callchain_cursor_reset(struct callchain_cursor *cursor) } int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, - struct map *map, struct symbol *sym); + struct map *map, struct symbol *sym, + bool branch, struct branch_flags *flags, + int nr_loop_iter, int samples); /* Close a cursor writing session. Initialize for the reader */ static inline void callchain_cursor_commit(struct callchain_cursor *cursor) @@ -208,6 +216,9 @@ static inline void callchain_cursor_advance(struct callchain_cursor *cursor) cursor->pos++; } +int callchain_cursor__copy(struct callchain_cursor *dst, + struct callchain_cursor *src); + struct option; struct hist_entry; @@ -261,8 +272,16 @@ char *callchain_node__scnprintf_value(struct callchain_node *node, int callchain_node__fprintf_value(struct callchain_node *node, FILE *fp, u64 total); +int callchain_list_counts__printf_value(struct callchain_node *node, + struct callchain_list *clist, + FILE *fp, char *bf, int bfsize); + void free_callchain(struct callchain_root *root); void decay_callchain(struct callchain_root *root); int callchain_node__make_parent_list(struct callchain_node *node); +int callchain_branch_counts(struct callchain_root *root, + u64 *branch_count, u64 *predicted_count, + u64 *abort_count, u64 *cycles_count); + #endif /* __PERF_CALLCHAIN_H */ diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 18dae745034f..3d906dbbef74 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -594,6 +594,19 @@ static int collect_config(const char *var, const char *value, goto out_free; } + /* perf_config_set can contain both user and system config items. + * So we should know where each value is from. + * The classification would be needed when a particular config file + * is overwrited by setting feature i.e. set_config(). + */ + if (strcmp(config_file_name, perf_etc_perfconfig()) == 0) { + section->from_system_config = true; + item->from_system_config = true; + } else { + section->from_system_config = false; + item->from_system_config = false; + } + ret = set_value(item, value); return ret; @@ -602,6 +615,13 @@ out_free: return -1; } +int perf_config_set__collect(struct perf_config_set *set, const char *file_name, + const char *var, const char *value) +{ + config_file_name = file_name; + return collect_config(var, value, set); +} + static int perf_config_set__init(struct perf_config_set *set) { int ret = -1; diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h index 6f813d46045e..1a59a6b43f8b 100644 --- a/tools/perf/util/config.h +++ b/tools/perf/util/config.h @@ -7,12 +7,14 @@ struct perf_config_item { char *name; char *value; + bool from_system_config; struct list_head node; }; struct perf_config_section { char *name; struct list_head items; + bool from_system_config; struct list_head node; }; @@ -33,6 +35,8 @@ const char *perf_etc_perfconfig(void); struct perf_config_set *perf_config_set__new(void); void perf_config_set__delete(struct perf_config_set *set); +int perf_config_set__collect(struct perf_config_set *set, const char *file_name, + const char *var, const char *value); void perf_config__init(void); void perf_config__exit(void); void perf_config__refresh(void); diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 8d363d5e65a2..c735c53a26f8 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -177,6 +177,8 @@ enum { PERF_IP_FLAG_TRACE_BEGIN |\ PERF_IP_FLAG_TRACE_END) +#define MAX_INSN 16 + struct perf_sample { u64 ip; u32 pid, tid; @@ -193,6 +195,7 @@ struct perf_sample { u32 flags; u16 insn_len; u8 cpumode; + char insn[MAX_INSN]; void *raw_data; struct ip_callchain *callchain; struct branch_stack *branch_stack; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 8bc271141d9d..b2365a63db45 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -28,6 +28,7 @@ #include "debug.h" #include "trace-event.h" #include "stat.h" +#include "util/parse-branch-options.h" static struct { bool sample_id_all; @@ -708,6 +709,14 @@ static void apply_config_terms(struct perf_evsel *evsel, case PERF_EVSEL__CONFIG_TERM_CALLGRAPH: callgraph_buf = term->val.callgraph; break; + case PERF_EVSEL__CONFIG_TERM_BRANCH: + if (term->val.branch && strcmp(term->val.branch, "no")) { + perf_evsel__set_sample_bit(evsel, BRANCH_STACK); + parse_branch_str(term->val.branch, + &attr->branch_sample_type); + } else + perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); + break; case PERF_EVSEL__CONFIG_TERM_STACK_USER: dump_size = term->val.stack_user; break; @@ -1472,7 +1481,7 @@ retry_sample_id: group_fd = get_group_fd(evsel, cpu, thread); retry_open: - pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx\n", + pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", pid, cpus->map[cpu], group_fd, flags); FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, @@ -1481,11 +1490,13 @@ retry_open: group_fd, flags); if (FD(evsel, cpu, thread) < 0) { err = -errno; - pr_debug2("sys_perf_event_open failed, error %d\n", + pr_debug2("\nsys_perf_event_open failed, error %d\n", err); goto try_fallback; } + pr_debug2(" = %d\n", FD(evsel, cpu, thread)); + if (evsel->bpf_fd >= 0) { int evt_fd = FD(evsel, cpu, thread); int bpf_fd = evsel->bpf_fd; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index b1503b0ecdff..6abb89cd27f9 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -47,6 +47,7 @@ enum { PERF_EVSEL__CONFIG_TERM_MAX_STACK, PERF_EVSEL__CONFIG_TERM_OVERWRITE, PERF_EVSEL__CONFIG_TERM_DRV_CFG, + PERF_EVSEL__CONFIG_TERM_BRANCH, PERF_EVSEL__CONFIG_TERM_MAX, }; @@ -63,6 +64,7 @@ struct perf_evsel_config_term { int max_stack; bool inherit; bool overwrite; + char *branch; } val; }; @@ -389,6 +391,8 @@ int perf_evsel__fprintf(struct perf_evsel *evsel, #define EVSEL__PRINT_ONELINE (1<<4) #define EVSEL__PRINT_SRCLINE (1<<5) #define EVSEL__PRINT_UNKNOWN_AS_ADDR (1<<6) +#define EVSEL__PRINT_CALLCHAIN_ARROW (1<<7) +#define EVSEL__PRINT_SKIP_IGNORED (1<<8) struct callchain_cursor; diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index 662a0a6182e7..6b2925542c0a 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -108,7 +108,10 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, int print_oneline = print_opts & EVSEL__PRINT_ONELINE; int print_srcline = print_opts & EVSEL__PRINT_SRCLINE; int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR; + int print_arrow = print_opts & EVSEL__PRINT_CALLCHAIN_ARROW; + int print_skip_ignored = print_opts & EVSEL__PRINT_SKIP_IGNORED; char s = print_oneline ? ' ' : '\t'; + bool first = true; if (sample->callchain) { struct addr_location node_al; @@ -122,8 +125,14 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, if (!node) break; + if (node->sym && node->sym->ignore && print_skip_ignored) + goto next; + printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " "); + if (print_arrow && !first) + printed += fprintf(fp, " <-"); + if (print_ip) printed += fprintf(fp, "%c%16" PRIx64, s, node->ip); @@ -137,7 +146,8 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, if (print_symoffset) { printed += __symbol__fprintf_symname_offs(node->sym, &node_al, - print_unknown_as_addr, fp); + print_unknown_as_addr, + true, fp); } else { printed += __symbol__fprintf_symname(node->sym, &node_al, print_unknown_as_addr, fp); @@ -156,6 +166,16 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, if (!print_oneline) printed += fprintf(fp, "\n"); + if (symbol_conf.bt_stop_list && + node->sym && + node->sym->name && + strlist__has_entry(symbol_conf.bt_stop_list, + node->sym->name)) { + break; + } + + first = false; +next: callchain_cursor_advance(cursor); } } @@ -188,7 +208,8 @@ int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al, printed += fprintf(fp, " "); if (print_symoffset) { printed += __symbol__fprintf_symname_offs(al->sym, al, - print_unknown_as_addr, fp); + print_unknown_as_addr, + true, fp); } else { printed += __symbol__fprintf_symname(al->sym, al, print_unknown_as_addr, fp); diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c index c1ef805c6a8f..c540d47583e7 100644 --- a/tools/perf/util/genelf.c +++ b/tools/perf/util/genelf.c @@ -19,12 +19,18 @@ #include <limits.h> #include <fcntl.h> #include <err.h> +#ifdef HAVE_DWARF_SUPPORT #include <dwarf.h> +#endif #include "perf.h" #include "genelf.h" #include "../util/jitdump.h" +#ifndef NT_GNU_BUILD_ID +#define NT_GNU_BUILD_ID 3 +#endif + #define JVMTI #define BUILD_ID_URANDOM /* different uuid for each run */ @@ -67,6 +73,8 @@ static char shd_string_table[] = { '.', 'd', 'e', 'b', 'u', 'g', '_', 'l', 'i', 'n', 'e', 0, /* 52 */ '.', 'd', 'e', 'b', 'u', 'g', '_', 'i', 'n', 'f', 'o', 0, /* 64 */ '.', 'd', 'e', 'b', 'u', 'g', '_', 'a', 'b', 'b', 'r', 'e', 'v', 0, /* 76 */ + '.', 'e', 'h', '_', 'f', 'r', 'a', 'm', 'e', '_', 'h', 'd', 'r', 0, /* 90 */ + '.', 'e', 'h', '_', 'f', 'r', 'a', 'm', 'e', 0, /* 104 */ }; static struct buildid_note { @@ -147,6 +155,86 @@ gen_build_id(struct buildid_note *note, unsigned long load_addr, const void *cod } #endif +static int +jit_add_eh_frame_info(Elf *e, void* unwinding, uint64_t unwinding_header_size, + uint64_t unwinding_size, uint64_t base_offset) +{ + Elf_Data *d; + Elf_Scn *scn; + Elf_Shdr *shdr; + uint64_t unwinding_table_size = unwinding_size - unwinding_header_size; + + /* + * setup eh_frame section + */ + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + return -1; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + return -1; + } + + d->d_align = 8; + d->d_off = 0LL; + d->d_buf = unwinding; + d->d_type = ELF_T_BYTE; + d->d_size = unwinding_table_size; + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + return -1; + } + + shdr->sh_name = 104; + shdr->sh_type = SHT_PROGBITS; + shdr->sh_addr = base_offset; + shdr->sh_flags = SHF_ALLOC; + shdr->sh_entsize = 0; + + /* + * setup eh_frame_hdr section + */ + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + return -1; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + return -1; + } + + d->d_align = 4; + d->d_off = 0LL; + d->d_buf = unwinding + unwinding_table_size; + d->d_type = ELF_T_BYTE; + d->d_size = unwinding_header_size; + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + return -1; + } + + shdr->sh_name = 90; + shdr->sh_type = SHT_PROGBITS; + shdr->sh_addr = base_offset + unwinding_table_size; + shdr->sh_flags = SHF_ALLOC; + shdr->sh_entsize = 0; + + return 0; +} + /* * fd: file descriptor open for writing for the output file * load_addr: code load address (could be zero, just used for buildid) @@ -157,13 +245,15 @@ gen_build_id(struct buildid_note *note, unsigned long load_addr, const void *cod int jit_write_elf(int fd, uint64_t load_addr, const char *sym, const void *code, int csize, - void *debug, int nr_debug_entries) + void *debug __maybe_unused, int nr_debug_entries __maybe_unused, + void *unwinding, uint64_t unwinding_header_size, uint64_t unwinding_size) { Elf *e; Elf_Data *d; Elf_Scn *scn; Elf_Ehdr *ehdr; Elf_Shdr *shdr; + uint64_t eh_frame_base_offset; char *strsym = NULL; int symlen; int retval = -1; @@ -194,7 +284,7 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, ehdr->e_type = ET_DYN; ehdr->e_entry = GEN_ELF_TEXT_OFFSET; ehdr->e_version = EV_CURRENT; - ehdr->e_shstrndx= 2; /* shdr index for section name */ + ehdr->e_shstrndx= unwinding ? 4 : 2; /* shdr index for section name */ /* * setup text section @@ -231,6 +321,18 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, shdr->sh_entsize = 0; /* + * Setup .eh_frame_hdr and .eh_frame + */ + if (unwinding) { + eh_frame_base_offset = ALIGN_8(GEN_ELF_TEXT_OFFSET + csize); + retval = jit_add_eh_frame_info(e, unwinding, + unwinding_header_size, unwinding_size, + eh_frame_base_offset); + if (retval) + goto error; + } + + /* * setup section headers string table */ scn = elf_newscn(e); @@ -298,7 +400,7 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, shdr->sh_type = SHT_SYMTAB; shdr->sh_flags = 0; shdr->sh_entsize = sizeof(Elf_Sym); - shdr->sh_link = 4; /* index of .strtab section */ + shdr->sh_link = unwinding ? 6 : 4; /* index of .strtab section */ /* * setup symbols string table @@ -386,11 +488,14 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, shdr->sh_size = sizeof(bnote); shdr->sh_entsize = 0; +#ifdef HAVE_DWARF_SUPPORT if (debug && nr_debug_entries) { retval = jit_add_debug_info(e, load_addr, debug, nr_debug_entries); if (retval) goto error; - } else { + } else +#endif + { if (elf_update(e, ELF_C_WRITE) < 0) { warnx("elf_update 4 failed"); goto error; diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h index 2fbeb59c4bdd..2424bd9862a3 100644 --- a/tools/perf/util/genelf.h +++ b/tools/perf/util/genelf.h @@ -3,9 +3,12 @@ /* genelf.c */ int jit_write_elf(int fd, uint64_t code_addr, const char *sym, - const void *code, int csize, void *debug, int nr_debug_entries); + const void *code, int csize, void *debug, int nr_debug_entries, + void *unwinding, uint64_t unwinding_header_size, uint64_t unwinding_size); +#ifdef HAVE_DWARF_SUPPORT /* genelf_debug.c */ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries); +#endif #if defined(__arm__) #define GEN_ELF_ARCH EM_ARM diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 2f3eded54b0c..d89c9c7ef4e5 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2250,11 +2250,28 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full) struct header_print_data hd; struct perf_header *header = &session->header; int fd = perf_data_file__fd(session->file); + struct stat st; + int ret, bit; + hd.fp = fp; hd.full = full; + ret = fstat(fd, &st); + if (ret == -1) + return -1; + + fprintf(fp, "# captured on: %s", ctime(&st.st_ctime)); + perf_header__process_sections(header, fd, &hd, perf_file_section__fprintf_info); + + fprintf(fp, "# missing features: "); + for_each_clear_bit(bit, header->adds_features, HEADER_LAST_FEATURE) { + if (bit) + fprintf(fp, "%s ", feat_ops[bit].name); + } + + fprintf(fp, "\n"); return 0; } @@ -2273,7 +2290,7 @@ static int do_write_feat(int fd, struct perf_header *h, int type, err = feat_ops[type].write(fd, h, evlist); if (err < 0) { - pr_debug("failed to write feature %d\n", type); + pr_debug("failed to write feature %s\n", feat_ops[type].name); /* undo anything written */ lseek(fd, (*p)->offset, SEEK_SET); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index b02992efb513..6770a9645609 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1195,6 +1195,7 @@ static void hist_entry__check_and_remove_filter(struct hist_entry *he, case HIST_FILTER__GUEST: case HIST_FILTER__HOST: case HIST_FILTER__SOCKET: + case HIST_FILTER__C2C: default: return; } @@ -1600,18 +1601,18 @@ static void hists__hierarchy_output_resort(struct hists *hists, if (prog) ui_progress__update(prog, 1); + hists->nr_entries++; + if (!he->filtered) { + hists->nr_non_filtered_entries++; + hists__calc_col_len(hists, he); + } + if (!he->leaf) { hists__hierarchy_output_resort(hists, prog, &he->hroot_in, &he->hroot_out, min_callchain_hits, use_callchain); - hists->nr_entries++; - if (!he->filtered) { - hists->nr_non_filtered_entries++; - hists__calc_col_len(hists, he); - } - continue; } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 9928fed8bc59..d4b6514eeef5 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -22,6 +22,7 @@ enum hist_filter { HIST_FILTER__GUEST, HIST_FILTER__HOST, HIST_FILTER__SOCKET, + HIST_FILTER__C2C, }; enum hist_column { diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index f545ec1e758a..6c2eb5da4afc 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -295,6 +295,7 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, sample.cpu = btsq->cpu; sample.flags = btsq->sample_flags; sample.insn_len = btsq->intel_pt_insn.length; + memcpy(sample.insn, btsq->intel_pt_insn.buf, INTEL_PT_INSN_BUF_SZ); if (bts->synth_opts.inject) { event.sample.header.size = bts->branches_event_size; @@ -319,15 +320,12 @@ static int intel_bts_get_next_insn(struct intel_bts_queue *btsq, u64 ip) struct machine *machine = btsq->bts->machine; struct thread *thread; struct addr_location al; - unsigned char buf[1024]; - size_t bufsz; + unsigned char buf[INTEL_PT_INSN_BUF_SZ]; ssize_t len; int x86_64; uint8_t cpumode; int err = -1; - bufsz = intel_pt_insn_max_size(); - if (machine__kernel_ip(machine, ip)) cpumode = PERF_RECORD_MISC_KERNEL; else @@ -341,7 +339,8 @@ static int intel_bts_get_next_insn(struct intel_bts_queue *btsq, u64 ip) if (!al.map || !al.map->dso) goto out_put; - len = dso__data_read_addr(al.map->dso, al.map, machine, ip, buf, bufsz); + len = dso__data_read_addr(al.map->dso, al.map, machine, ip, buf, + INTEL_PT_INSN_BUF_SZ); if (len <= 0) goto out_put; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 16c06d3ae577..e4e7dc781d21 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -980,6 +980,8 @@ out: out_no_progress: decoder->state.insn_op = intel_pt_insn->op; decoder->state.insn_len = intel_pt_insn->length; + memcpy(decoder->state.insn, intel_pt_insn->buf, + INTEL_PT_INSN_BUF_SZ); if (decoder->tx_flags & INTEL_PT_IN_TX) decoder->state.flags |= INTEL_PT_IN_TX; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index 89399985fa4d..e90619a43c0c 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -66,6 +66,7 @@ struct intel_pt_state { uint32_t flags; enum intel_pt_insn_op insn_op; int insn_len; + char insn[INTEL_PT_INSN_BUF_SZ]; }; struct intel_pt_insn; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index d23138c06665..7913363bde5c 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -27,6 +27,10 @@ #include "intel-pt-insn-decoder.h" +#if INTEL_PT_INSN_BUF_SZ < MAX_INSN_SIZE || INTEL_PT_INSN_BUF_SZ > MAX_INSN +#error Instruction buffer size too small +#endif + /* Based on branch_type() from perf_event_intel_lbr.c */ static void intel_pt_insn_decoder(struct insn *insn, struct intel_pt_insn *intel_pt_insn) @@ -166,10 +170,10 @@ int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64, if (!insn_complete(&insn) || insn.length > len) return -1; intel_pt_insn_decoder(&insn, intel_pt_insn); - if (insn.length < INTEL_PT_INSN_DBG_BUF_SZ) + if (insn.length < INTEL_PT_INSN_BUF_SZ) memcpy(intel_pt_insn->buf, buf, insn.length); else - memcpy(intel_pt_insn->buf, buf, INTEL_PT_INSN_DBG_BUF_SZ); + memcpy(intel_pt_insn->buf, buf, INTEL_PT_INSN_BUF_SZ); return 0; } @@ -211,11 +215,6 @@ int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf, return 0; } -size_t intel_pt_insn_max_size(void) -{ - return MAX_INSN_SIZE; -} - int intel_pt_insn_type(enum intel_pt_insn_op op) { switch (op) { diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h index b0adbf37323e..37ec5627ae9b 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h @@ -20,7 +20,7 @@ #include <stdint.h> #define INTEL_PT_INSN_DESC_MAX 32 -#define INTEL_PT_INSN_DBG_BUF_SZ 16 +#define INTEL_PT_INSN_BUF_SZ 16 enum intel_pt_insn_op { INTEL_PT_OP_OTHER, @@ -47,7 +47,7 @@ struct intel_pt_insn { enum intel_pt_insn_branch branch; int length; int32_t rel; - unsigned char buf[INTEL_PT_INSN_DBG_BUF_SZ]; + unsigned char buf[INTEL_PT_INSN_BUF_SZ]; }; int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64, @@ -58,8 +58,6 @@ const char *intel_pt_insn_name(enum intel_pt_insn_op op); int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf, size_t buf_len); -size_t intel_pt_insn_max_size(void); - int intel_pt_insn_type(enum intel_pt_insn_op op); #endif diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c index 319bef33a64b..e02bc7b166a0 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-log.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c @@ -119,8 +119,8 @@ void __intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip) if (intel_pt_log_open()) return; - if (len > INTEL_PT_INSN_DBG_BUF_SZ) - len = INTEL_PT_INSN_DBG_BUF_SZ; + if (len > INTEL_PT_INSN_BUF_SZ) + len = INTEL_PT_INSN_BUF_SZ; intel_pt_print_data(intel_pt_insn->buf, len, ip, 8); if (intel_pt_insn_desc(intel_pt_insn, desc, INTEL_PT_INSN_DESC_MAX) > 0) fprintf(f, "%s\n", desc); diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index dc041d4368c8..85d5eeb66c75 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -143,6 +143,7 @@ struct intel_pt_queue { u32 flags; u16 insn_len; u64 last_insn_cnt; + char insn[INTEL_PT_INSN_BUF_SZ]; }; static void intel_pt_dump(struct intel_pt *pt __maybe_unused, @@ -315,6 +316,7 @@ struct intel_pt_cache_entry { enum intel_pt_insn_branch branch; int length; int32_t rel; + char insn[INTEL_PT_INSN_BUF_SZ]; }; static int intel_pt_config_div(const char *var, const char *value, void *data) @@ -400,6 +402,7 @@ static int intel_pt_cache_add(struct dso *dso, struct machine *machine, e->branch = intel_pt_insn->branch; e->length = intel_pt_insn->length; e->rel = intel_pt_insn->rel; + memcpy(e->insn, intel_pt_insn->buf, INTEL_PT_INSN_BUF_SZ); err = auxtrace_cache__add(c, offset, &e->entry); if (err) @@ -428,8 +431,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, struct machine *machine = ptq->pt->machine; struct thread *thread; struct addr_location al; - unsigned char buf[1024]; - size_t bufsz; + unsigned char buf[INTEL_PT_INSN_BUF_SZ]; ssize_t len; int x86_64; u8 cpumode; @@ -437,11 +439,11 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, u64 insn_cnt = 0; bool one_map = true; + intel_pt_insn->length = 0; + if (to_ip && *ip == to_ip) goto out_no_cache; - bufsz = intel_pt_insn_max_size(); - if (*ip >= ptq->pt->kernel_start) cpumode = PERF_RECORD_MISC_KERNEL; else @@ -478,6 +480,8 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, intel_pt_insn->branch = e->branch; intel_pt_insn->length = e->length; intel_pt_insn->rel = e->rel; + memcpy(intel_pt_insn->buf, e->insn, + INTEL_PT_INSN_BUF_SZ); intel_pt_log_insn_no_data(intel_pt_insn, *ip); return 0; } @@ -493,7 +497,8 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, while (1) { len = dso__data_read_offset(al.map->dso, machine, - offset, buf, bufsz); + offset, buf, + INTEL_PT_INSN_BUF_SZ); if (len <= 0) return -EINVAL; @@ -900,6 +905,7 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq) if (ptq->state->flags & INTEL_PT_IN_TX) ptq->flags |= PERF_IP_FLAG_IN_TX; ptq->insn_len = ptq->state->insn_len; + memcpy(ptq->insn, ptq->state->insn, INTEL_PT_INSN_BUF_SZ); } } @@ -1080,6 +1086,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) sample.cpu = ptq->cpu; sample.flags = ptq->flags; sample.insn_len = ptq->insn_len; + memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); /* * perf report cannot handle events without a branch stack when using @@ -1141,6 +1148,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) sample.cpu = ptq->cpu; sample.flags = ptq->flags; sample.insn_len = ptq->insn_len; + memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); ptq->last_insn_cnt = ptq->state->tot_insn_cnt; @@ -1203,6 +1211,7 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) sample.cpu = ptq->cpu; sample.flags = ptq->flags; sample.insn_len = ptq->insn_len; + memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); if (pt->synth_opts.callchain) { thread_stack__sample(ptq->thread, ptq->chain, diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index 95f0884aae02..c9a941ef0f6d 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -37,6 +37,10 @@ struct jit_buf_desc { bool needs_bswap; /* handles cross-endianess */ bool use_arch_timestamp; void *debug_data; + void *unwinding_data; + uint64_t unwinding_size; + uint64_t unwinding_mapped_size; + uint64_t eh_frame_hdr_size; size_t nr_debug_entries; uint32_t code_load_count; u64 bytes_written; @@ -68,7 +72,10 @@ jit_emit_elf(char *filename, const void *code, int csize, void *debug, - int nr_debug_entries) + int nr_debug_entries, + void *unwinding, + uint32_t unwinding_header_size, + uint32_t unwinding_size) { int ret, fd; @@ -81,7 +88,8 @@ jit_emit_elf(char *filename, return -1; } - ret = jit_write_elf(fd, code_addr, sym, (const void *)code, csize, debug, nr_debug_entries); + ret = jit_write_elf(fd, code_addr, sym, (const void *)code, csize, debug, nr_debug_entries, + unwinding, unwinding_header_size, unwinding_size); close(fd); @@ -172,6 +180,12 @@ jit_open(struct jit_buf_desc *jd, const char *name) header.elf_mach, jd->use_arch_timestamp); + if (header.version > JITHEADER_VERSION) { + pr_err("wrong jitdump version %u, expected " STR(JITHEADER_VERSION), + header.version); + goto error; + } + if (header.flags & JITDUMP_FLAGS_RESERVED) { pr_err("jitdump file contains invalid or unsupported flags 0x%llx\n", (unsigned long long)header.flags & JITDUMP_FLAGS_RESERVED); @@ -263,8 +277,7 @@ jit_get_next_entry(struct jit_buf_desc *jd) return NULL; if (id >= JIT_CODE_MAX) { - pr_warning("next_entry: unknown prefix %d, skipping\n", id); - return NULL; + pr_warning("next_entry: unknown record type %d, skipping\n", id); } if (bs > jd->bufsize) { void *n; @@ -296,6 +309,13 @@ jit_get_next_entry(struct jit_buf_desc *jd) } } break; + case JIT_CODE_UNWINDING_INFO: + if (jd->needs_bswap) { + jr->unwinding.unwinding_size = bswap_64(jr->unwinding.unwinding_size); + jr->unwinding.eh_frame_hdr_size = bswap_64(jr->unwinding.eh_frame_hdr_size); + jr->unwinding.mapped_size = bswap_64(jr->unwinding.mapped_size); + } + break; case JIT_CODE_CLOSE: break; case JIT_CODE_LOAD: @@ -322,7 +342,8 @@ jit_get_next_entry(struct jit_buf_desc *jd) break; case JIT_CODE_MAX: default: - return NULL; + /* skip unknown record (we have read them) */ + break; } return jr; } @@ -370,7 +391,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) u16 idr_size; const char *sym; uint32_t count; - int ret, csize; + int ret, csize, usize; pid_t pid, tid; struct { u32 pid, tid; @@ -380,6 +401,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) pid = jr->load.pid; tid = jr->load.tid; csize = jr->load.code_size; + usize = jd->unwinding_mapped_size; addr = jr->load.code_addr; sym = (void *)((unsigned long)jr + sizeof(jr->load)); code = (unsigned long)jr + jr->load.p.total_size - csize; @@ -400,7 +422,8 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) size = PERF_ALIGN(size, sizeof(u64)); uaddr = (uintptr_t)code; - ret = jit_emit_elf(filename, sym, addr, (const void *)uaddr, csize, jd->debug_data, jd->nr_debug_entries); + ret = jit_emit_elf(filename, sym, addr, (const void *)uaddr, csize, jd->debug_data, jd->nr_debug_entries, + jd->unwinding_data, jd->eh_frame_hdr_size, jd->unwinding_size); if (jd->debug_data && jd->nr_debug_entries) { free(jd->debug_data); @@ -408,6 +431,14 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) jd->nr_debug_entries = 0; } + if (jd->unwinding_data && jd->eh_frame_hdr_size) { + free(jd->unwinding_data); + jd->unwinding_data = NULL; + jd->eh_frame_hdr_size = 0; + jd->unwinding_mapped_size = 0; + jd->unwinding_size = 0; + } + if (ret) { free(event); return -1; @@ -422,7 +453,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET; event->mmap2.start = addr; - event->mmap2.len = csize; + event->mmap2.len = usize ? ALIGN_8(csize) + usize : csize; event->mmap2.pid = pid; event->mmap2.tid = tid; event->mmap2.ino = st.st_ino; @@ -473,6 +504,7 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr) char *filename; size_t size; struct stat st; + int usize; u16 idr_size; int ret; pid_t pid, tid; @@ -483,6 +515,7 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr) pid = jr->move.pid; tid = jr->move.tid; + usize = jd->unwinding_mapped_size; idr_size = jd->machine->id_hdr_size; /* @@ -511,7 +544,8 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr) (sizeof(event->mmap2.filename) - size) + idr_size); event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET; event->mmap2.start = jr->move.new_code_addr; - event->mmap2.len = jr->move.code_size; + event->mmap2.len = usize ? ALIGN_8(jr->move.code_size) + usize + : jr->move.code_size; event->mmap2.pid = pid; event->mmap2.tid = tid; event->mmap2.ino = st.st_ino; @@ -578,10 +612,35 @@ static int jit_repipe_debug_info(struct jit_buf_desc *jd, union jr_entry *jr) } static int +jit_repipe_unwinding_info(struct jit_buf_desc *jd, union jr_entry *jr) +{ + void *unwinding_data; + uint32_t unwinding_data_size; + + if (!(jd && jr)) + return -1; + + unwinding_data_size = jr->prefix.total_size - sizeof(jr->unwinding); + unwinding_data = malloc(unwinding_data_size); + if (!unwinding_data) + return -1; + + memcpy(unwinding_data, &jr->unwinding.unwinding_data, + unwinding_data_size); + + jd->eh_frame_hdr_size = jr->unwinding.eh_frame_hdr_size; + jd->unwinding_size = jr->unwinding.unwinding_size; + jd->unwinding_mapped_size = jr->unwinding.mapped_size; + jd->unwinding_data = unwinding_data; + + return 0; +} + +static int jit_process_dump(struct jit_buf_desc *jd) { union jr_entry *jr; - int ret; + int ret = 0; while ((jr = jit_get_next_entry(jd))) { switch(jr->prefix.id) { @@ -594,6 +653,9 @@ jit_process_dump(struct jit_buf_desc *jd) case JIT_CODE_DEBUG_INFO: ret = jit_repipe_debug_info(jd, jr); break; + case JIT_CODE_UNWINDING_INFO: + ret = jit_repipe_unwinding_info(jd, jr); + break; default: ret = 0; continue; diff --git a/tools/perf/util/jitdump.h b/tools/perf/util/jitdump.h index bcacd20d0c1c..c6b9b67f43bf 100644 --- a/tools/perf/util/jitdump.h +++ b/tools/perf/util/jitdump.h @@ -19,6 +19,7 @@ #define JITHEADER_MAGIC_SW 0x4454694A #define PADDING_8ALIGNED(x) ((((x) + 7) & 7) ^ 7) +#define ALIGN_8(x) (((x) + 7) & (~7)) #define JITHEADER_VERSION 1 @@ -48,6 +49,7 @@ enum jit_record_type { JIT_CODE_MOVE = 1, JIT_CODE_DEBUG_INFO = 2, JIT_CODE_CLOSE = 3, + JIT_CODE_UNWINDING_INFO = 4, JIT_CODE_MAX, }; @@ -101,12 +103,22 @@ struct jr_code_debug_info { struct debug_entry entries[0]; }; +struct jr_code_unwinding_info { + struct jr_prefix p; + + uint64_t unwinding_size; + uint64_t eh_frame_hdr_size; + uint64_t mapped_size; + const char unwinding_data[0]; +}; + union jr_entry { struct jr_code_debug_info info; struct jr_code_close close; struct jr_code_load load; struct jr_code_move move; struct jr_prefix prefix; + struct jr_code_unwinding_info unwinding; }; static inline struct debug_entry * diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index bf7216b8731d..b23ff44cf214 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -7,6 +7,7 @@ #include <limits.h> #include <stdio.h> #include <stdlib.h> +#include <linux/err.h> #include "debug.h" #include "llvm-utils.h" #include "config.h" @@ -282,9 +283,10 @@ static const char *kinc_fetch_script = "rm -rf $TMPDIR\n" "exit $RET\n"; -static inline void -get_kbuild_opts(char **kbuild_dir, char **kbuild_include_opts) +void llvm__get_kbuild_opts(char **kbuild_dir, char **kbuild_include_opts) { + static char *saved_kbuild_dir; + static char *saved_kbuild_include_opts; int err; if (!kbuild_dir || !kbuild_include_opts) @@ -293,10 +295,28 @@ get_kbuild_opts(char **kbuild_dir, char **kbuild_include_opts) *kbuild_dir = NULL; *kbuild_include_opts = NULL; + if (saved_kbuild_dir && saved_kbuild_include_opts && + !IS_ERR(saved_kbuild_dir) && !IS_ERR(saved_kbuild_include_opts)) { + *kbuild_dir = strdup(saved_kbuild_dir); + *kbuild_include_opts = strdup(saved_kbuild_include_opts); + + if (*kbuild_dir && *kbuild_include_opts) + return; + + zfree(kbuild_dir); + zfree(kbuild_include_opts); + /* + * Don't fall through: it may breaks saved_kbuild_dir and + * saved_kbuild_include_opts if detect them again when + * memory is low. + */ + return; + } + if (llvm_param.kbuild_dir && !llvm_param.kbuild_dir[0]) { pr_debug("[llvm.kbuild-dir] is set to \"\" deliberately.\n"); pr_debug("Skip kbuild options detection.\n"); - return; + goto errout; } err = detect_kbuild_dir(kbuild_dir); @@ -306,7 +326,7 @@ get_kbuild_opts(char **kbuild_dir, char **kbuild_include_opts) "Hint:\tSet correct kbuild directory using 'kbuild-dir' option in [llvm]\n" " \tsection of ~/.perfconfig or set it to \"\" to suppress kbuild\n" " \tdetection.\n\n"); - return; + goto errout; } pr_debug("Kernel build dir is set to %s\n", *kbuild_dir); @@ -325,21 +345,50 @@ get_kbuild_opts(char **kbuild_dir, char **kbuild_include_opts) free(*kbuild_dir); *kbuild_dir = NULL; - return; + goto errout; } pr_debug("include option is set to %s\n", *kbuild_include_opts); + + saved_kbuild_dir = strdup(*kbuild_dir); + saved_kbuild_include_opts = strdup(*kbuild_include_opts); + + if (!saved_kbuild_dir || !saved_kbuild_include_opts) { + zfree(&saved_kbuild_dir); + zfree(&saved_kbuild_include_opts); + } + return; +errout: + saved_kbuild_dir = ERR_PTR(-EINVAL); + saved_kbuild_include_opts = ERR_PTR(-EINVAL); } -static void -dump_obj(const char *path, void *obj_buf, size_t size) +int llvm__get_nr_cpus(void) +{ + static int nr_cpus_avail = 0; + char serr[STRERR_BUFSIZE]; + + if (nr_cpus_avail > 0) + return nr_cpus_avail; + + nr_cpus_avail = sysconf(_SC_NPROCESSORS_CONF); + if (nr_cpus_avail <= 0) { + pr_err( +"WARNING:\tunable to get available CPUs in this system: %s\n" +" \tUse 128 instead.\n", str_error_r(errno, serr, sizeof(serr))); + nr_cpus_avail = 128; + } + return nr_cpus_avail; +} + +void llvm__dump_obj(const char *path, void *obj_buf, size_t size) { char *obj_path = strdup(path); FILE *fp; char *p; if (!obj_path) { - pr_warning("WARNING: No enough memory, skip object dumping\n"); + pr_warning("WARNING: Not enough memory, skip object dumping\n"); return; } @@ -406,15 +455,9 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, * This is an optional work. Even it fail we can continue our * work. Needn't to check error return. */ - get_kbuild_opts(&kbuild_dir, &kbuild_include_opts); + llvm__get_kbuild_opts(&kbuild_dir, &kbuild_include_opts); - nr_cpus_avail = sysconf(_SC_NPROCESSORS_CONF); - if (nr_cpus_avail <= 0) { - pr_err( -"WARNING:\tunable to get available CPUs in this system: %s\n" -" \tUse 128 instead.\n", str_error_r(errno, serr, sizeof(serr))); - nr_cpus_avail = 128; - } + nr_cpus_avail = llvm__get_nr_cpus(); snprintf(nr_cpus_avail_str, sizeof(nr_cpus_avail_str), "%d", nr_cpus_avail); @@ -453,9 +496,6 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, free(kbuild_dir); free(kbuild_include_opts); - if (llvm_param.dump_obj) - dump_obj(path, obj_buf, obj_buf_sz); - if (!p_obj_buf) free(obj_buf); else diff --git a/tools/perf/util/llvm-utils.h b/tools/perf/util/llvm-utils.h index 9f501cef06a1..c87a2a92a88f 100644 --- a/tools/perf/util/llvm-utils.h +++ b/tools/perf/util/llvm-utils.h @@ -50,4 +50,10 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, size_t *p_obj_buf_sz); /* This function is for test__llvm() use only */ int llvm__search_clang(void); + +/* Following functions are reused by builtin clang support */ +void llvm__get_kbuild_opts(char **kbuild_dir, char **kbuild_include_opts); +int llvm__get_nr_cpus(void); + +void llvm__dump_obj(const char *path, void *obj_buf, size_t size); #endif diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index df85b9efd80f..9b33bef54581 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1616,7 +1616,11 @@ static int add_callchain_ip(struct thread *thread, struct symbol **parent, struct addr_location *root_al, u8 *cpumode, - u64 ip) + u64 ip, + bool branch, + struct branch_flags *flags, + int nr_loop_iter, + int samples) { struct addr_location al; @@ -1668,7 +1672,8 @@ static int add_callchain_ip(struct thread *thread, if (symbol_conf.hide_unresolved && al.sym == NULL) return 0; - return callchain_cursor_append(cursor, al.addr, al.map, al.sym); + return callchain_cursor_append(cursor, al.addr, al.map, al.sym, + branch, flags, nr_loop_iter, samples); } struct branch_info *sample__resolve_bstack(struct perf_sample *sample, @@ -1757,7 +1762,9 @@ static int resolve_lbr_callchain_sample(struct thread *thread, /* LBR only affects the user callchain */ if (i != chain_nr) { struct branch_stack *lbr_stack = sample->branch_stack; - int lbr_nr = lbr_stack->nr, j; + int lbr_nr = lbr_stack->nr, j, k; + bool branch; + struct branch_flags *flags; /* * LBR callstack can only get user call chain. * The mix_chain_nr is kernel call chain @@ -1772,23 +1779,41 @@ static int resolve_lbr_callchain_sample(struct thread *thread, for (j = 0; j < mix_chain_nr; j++) { int err; + branch = false; + flags = NULL; + if (callchain_param.order == ORDER_CALLEE) { if (j < i + 1) ip = chain->ips[j]; - else if (j > i + 1) - ip = lbr_stack->entries[j - i - 2].from; - else + else if (j > i + 1) { + k = j - i - 2; + ip = lbr_stack->entries[k].from; + branch = true; + flags = &lbr_stack->entries[k].flags; + } else { ip = lbr_stack->entries[0].to; + branch = true; + flags = &lbr_stack->entries[0].flags; + } } else { - if (j < lbr_nr) - ip = lbr_stack->entries[lbr_nr - j - 1].from; + if (j < lbr_nr) { + k = lbr_nr - j - 1; + ip = lbr_stack->entries[k].from; + branch = true; + flags = &lbr_stack->entries[k].flags; + } else if (j > lbr_nr) ip = chain->ips[i + 1 - (j - lbr_nr)]; - else + else { ip = lbr_stack->entries[0].to; + branch = true; + flags = &lbr_stack->entries[0].flags; + } } - err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip); + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, ip, + branch, flags, 0, 0); if (err) return (err < 0) ? err : 0; } @@ -1813,6 +1838,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, int i, j, err, nr_entries; int skip_idx = -1; int first_call = 0; + int nr_loop_iter; if (perf_evsel__has_branch_callstack(evsel)) { err = resolve_lbr_callchain_sample(thread, cursor, sample, parent, @@ -1868,14 +1894,37 @@ static int thread__resolve_callchain_sample(struct thread *thread, be[i] = branch->entries[branch->nr - i - 1]; } + nr_loop_iter = nr; nr = remove_loops(be, nr); + /* + * Get the number of iterations. + * It's only approximation, but good enough in practice. + */ + if (nr_loop_iter > nr) + nr_loop_iter = nr_loop_iter - nr + 1; + else + nr_loop_iter = 0; + for (i = 0; i < nr; i++) { - err = add_callchain_ip(thread, cursor, parent, root_al, - NULL, be[i].to); + if (i == nr - 1) + err = add_callchain_ip(thread, cursor, parent, + root_al, + NULL, be[i].to, + true, &be[i].flags, + nr_loop_iter, 1); + else + err = add_callchain_ip(thread, cursor, parent, + root_al, + NULL, be[i].to, + true, &be[i].flags, + 0, 0); + if (!err) err = add_callchain_ip(thread, cursor, parent, root_al, - NULL, be[i].from); + NULL, be[i].from, + true, &be[i].flags, + 0, 0); if (err == -EINVAL) break; if (err) @@ -1903,7 +1952,9 @@ check_calls: if (ip < PERF_CONTEXT_MAX) ++nr_entries; - err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip); + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, ip, + false, NULL, 0, 0); if (err) return (err < 0) ? err : 0; @@ -1919,7 +1970,8 @@ static int unwind_entry(struct unwind_entry *entry, void *arg) if (symbol_conf.hide_unresolved && entry->sym == NULL) return 0; return callchain_cursor_append(cursor, entry->ip, - entry->map, entry->sym); + entry->map, entry->sym, + false, NULL, 0, 0); } static int thread__resolve_callchain_unwind(struct thread *thread, diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index c662fef95d14..4f9a71c63026 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -682,9 +682,16 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp continue; if (verbose >= 2) { - fputs("overlapping maps:\n", fp); - map__fprintf(map, fp); - map__fprintf(pos, fp); + + if (use_browser) { + pr_warning("overlapping maps in %s " + "(disable tui for more info)\n", + map->dso->name); + } else { + fputs("overlapping maps:\n", fp); + map__fprintf(map, fp); + map__fprintf(pos, fp); + } } rb_erase_init(&pos->rb_node, root); @@ -702,7 +709,7 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp before->end = map->start; __map_groups__insert(pos->groups, before); - if (verbose >= 2) + if (verbose >= 2 && !use_browser) map__fprintf(before, fp); map__put(before); } @@ -717,7 +724,7 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp after->start = map->end; __map_groups__insert(pos->groups, after); - if (verbose >= 2) + if (verbose >= 2 && !use_browser) map__fprintf(after, fp); map__put(after); } diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index bbc368e7d1e4..1d4ab53c60ca 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -9,6 +9,7 @@ #include "mem-events.h" #include "debug.h" #include "symbol.h" +#include "sort.h" unsigned int perf_mem_events__loads_ldlat = 30; @@ -268,3 +269,138 @@ int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_in return i; } + +int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi) +{ + union perf_mem_data_src *data_src = &mi->data_src; + u64 daddr = mi->daddr.addr; + u64 op = data_src->mem_op; + u64 lvl = data_src->mem_lvl; + u64 snoop = data_src->mem_snoop; + u64 lock = data_src->mem_lock; + int err = 0; + +#define HITM_INC(__f) \ +do { \ + stats->__f++; \ + stats->tot_hitm++; \ +} while (0) + +#define P(a, b) PERF_MEM_##a##_##b + + stats->nr_entries++; + + if (lock & P(LOCK, LOCKED)) stats->locks++; + + if (op & P(OP, LOAD)) { + /* load */ + stats->load++; + + if (!daddr) { + stats->ld_noadrs++; + return -1; + } + + if (lvl & P(LVL, HIT)) { + if (lvl & P(LVL, UNC)) stats->ld_uncache++; + if (lvl & P(LVL, IO)) stats->ld_io++; + if (lvl & P(LVL, LFB)) stats->ld_fbhit++; + if (lvl & P(LVL, L1 )) stats->ld_l1hit++; + if (lvl & P(LVL, L2 )) stats->ld_l2hit++; + if (lvl & P(LVL, L3 )) { + if (snoop & P(SNOOP, HITM)) + HITM_INC(lcl_hitm); + else + stats->ld_llchit++; + } + + if (lvl & P(LVL, LOC_RAM)) { + stats->lcl_dram++; + if (snoop & P(SNOOP, HIT)) + stats->ld_shared++; + else + stats->ld_excl++; + } + + if ((lvl & P(LVL, REM_RAM1)) || + (lvl & P(LVL, REM_RAM2))) { + stats->rmt_dram++; + if (snoop & P(SNOOP, HIT)) + stats->ld_shared++; + else + stats->ld_excl++; + } + } + + if ((lvl & P(LVL, REM_CCE1)) || + (lvl & P(LVL, REM_CCE2))) { + if (snoop & P(SNOOP, HIT)) + stats->rmt_hit++; + else if (snoop & P(SNOOP, HITM)) + HITM_INC(rmt_hitm); + } + + if ((lvl & P(LVL, MISS))) + stats->ld_miss++; + + } else if (op & P(OP, STORE)) { + /* store */ + stats->store++; + + if (!daddr) { + stats->st_noadrs++; + return -1; + } + + if (lvl & P(LVL, HIT)) { + if (lvl & P(LVL, UNC)) stats->st_uncache++; + if (lvl & P(LVL, L1 )) stats->st_l1hit++; + } + if (lvl & P(LVL, MISS)) + if (lvl & P(LVL, L1)) stats->st_l1miss++; + } else { + /* unparsable data_src? */ + stats->noparse++; + return -1; + } + + if (!mi->daddr.map || !mi->iaddr.map) { + stats->nomap++; + return -1; + } + +#undef P +#undef HITM_INC + return err; +} + +void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add) +{ + stats->nr_entries += add->nr_entries; + + stats->locks += add->locks; + stats->store += add->store; + stats->st_uncache += add->st_uncache; + stats->st_noadrs += add->st_noadrs; + stats->st_l1hit += add->st_l1hit; + stats->st_l1miss += add->st_l1miss; + stats->load += add->load; + stats->ld_excl += add->ld_excl; + stats->ld_shared += add->ld_shared; + stats->ld_uncache += add->ld_uncache; + stats->ld_io += add->ld_io; + stats->ld_miss += add->ld_miss; + stats->ld_noadrs += add->ld_noadrs; + stats->ld_fbhit += add->ld_fbhit; + stats->ld_l1hit += add->ld_l1hit; + stats->ld_l2hit += add->ld_l2hit; + stats->ld_llchit += add->ld_llchit; + stats->lcl_hitm += add->lcl_hitm; + stats->rmt_hitm += add->rmt_hitm; + stats->tot_hitm += add->tot_hitm; + stats->rmt_hit += add->rmt_hit; + stats->lcl_dram += add->lcl_dram; + stats->rmt_dram += add->rmt_dram; + stats->nomap += add->nomap; + stats->noparse += add->noparse; +} diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 7f69bf9d789d..40f72ee4f42a 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -2,6 +2,10 @@ #define __PERF_MEM_EVENTS_H #include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <linux/types.h> +#include "stat.h" struct perf_mem_event { bool record; @@ -33,4 +37,38 @@ int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info); int perf_script__meminfo_scnprintf(char *bf, size_t size, struct mem_info *mem_info); +struct c2c_stats { + u32 nr_entries; + + u32 locks; /* count of 'lock' transactions */ + u32 store; /* count of all stores in trace */ + u32 st_uncache; /* stores to uncacheable address */ + u32 st_noadrs; /* cacheable store with no address */ + u32 st_l1hit; /* count of stores that hit L1D */ + u32 st_l1miss; /* count of stores that miss L1D */ + u32 load; /* count of all loads in trace */ + u32 ld_excl; /* exclusive loads, rmt/lcl DRAM - snp none/miss */ + u32 ld_shared; /* shared loads, rmt/lcl DRAM - snp hit */ + u32 ld_uncache; /* loads to uncacheable address */ + u32 ld_io; /* loads to io address */ + u32 ld_miss; /* loads miss */ + u32 ld_noadrs; /* cacheable load with no address */ + u32 ld_fbhit; /* count of loads hitting Fill Buffer */ + u32 ld_l1hit; /* count of loads that hit L1D */ + u32 ld_l2hit; /* count of loads that hit L2D */ + u32 ld_llchit; /* count of loads that hit LLC */ + u32 lcl_hitm; /* count of loads with local HITM */ + u32 rmt_hitm; /* count of loads with remote HITM */ + u32 tot_hitm; /* count of loads with local and remote HITM */ + u32 rmt_hit; /* count of loads with remote hit clean; */ + u32 lcl_dram; /* count of loads miss to local DRAM */ + u32 rmt_dram; /* count of loads miss to remote DRAM */ + u32 nomap; /* count of load/stores with no phys adrs */ + u32 noparse; /* count of unparsable data sources */ +}; + +struct hist_entry; +int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi); +void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add); + #endif /* __PERF_MEM_EVENTS_H */ diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c index afc088dd7d20..38fd11504015 100644 --- a/tools/perf/util/parse-branch-options.c +++ b/tools/perf/util/parse-branch-options.c @@ -31,59 +31,51 @@ static const struct branch_mode branch_modes[] = { BRANCH_END }; -int -parse_branch_stack(const struct option *opt, const char *str, int unset) +int parse_branch_str(const char *str, __u64 *mode) { #define ONLY_PLM \ (PERF_SAMPLE_BRANCH_USER |\ PERF_SAMPLE_BRANCH_KERNEL |\ PERF_SAMPLE_BRANCH_HV) - uint64_t *mode = (uint64_t *)opt->value; + int ret = 0; + char *p, *s; + char *os = NULL; const struct branch_mode *br; - char *s, *os = NULL, *p; - int ret = -1; - if (unset) + if (str == NULL) { + *mode = PERF_SAMPLE_BRANCH_ANY; return 0; + } - /* - * cannot set it twice, -b + --branch-filter for instance - */ - if (*mode) + /* because str is read-only */ + s = os = strdup(str); + if (!s) return -1; - /* str may be NULL in case no arg is passed to -b */ - if (str) { - /* because str is read-only */ - s = os = strdup(str); - if (!s) - return -1; - - for (;;) { - p = strchr(s, ','); - if (p) - *p = '\0'; - - for (br = branch_modes; br->name; br++) { - if (!strcasecmp(s, br->name)) - break; - } - if (!br->name) { - ui__warning("unknown branch filter %s," - " check man page\n", s); - goto error; - } - - *mode |= br->mode; - - if (!p) - break; + for (;;) { + p = strchr(s, ','); + if (p) + *p = '\0'; - s = p + 1; + for (br = branch_modes; br->name; br++) { + if (!strcasecmp(s, br->name)) + break; + } + if (!br->name) { + ret = -1; + pr_warning("unknown branch filter %s," + " check man page\n", s); + goto error; } + + *mode |= br->mode; + + if (!p) + break; + + s = p + 1; } - ret = 0; /* default to any branch */ if ((*mode & ~ONLY_PLM) == 0) { @@ -93,3 +85,20 @@ error: free(os); return ret; } + +int +parse_branch_stack(const struct option *opt, const char *str, int unset) +{ + __u64 *mode = (__u64 *)opt->value; + + if (unset) + return 0; + + /* + * cannot set it twice, -b + --branch-filter for instance + */ + if (*mode) + return -1; + + return parse_branch_str(str, mode); +} diff --git a/tools/perf/util/parse-branch-options.h b/tools/perf/util/parse-branch-options.h index b9d9470c2e82..6086fd90eb23 100644 --- a/tools/perf/util/parse-branch-options.h +++ b/tools/perf/util/parse-branch-options.h @@ -1,5 +1,6 @@ #ifndef _PERF_PARSE_BRANCH_OPTIONS_H #define _PERF_PARSE_BRANCH_OPTIONS_H 1 -struct option; +#include <stdint.h> int parse_branch_stack(const struct option *opt, const char *str, int unset); +int parse_branch_str(const char *str, __u64 *mode); #endif /* _PERF_PARSE_BRANCH_OPTIONS_H */ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 4e778eae1510..3c876b8ba4de 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -22,6 +22,7 @@ #include "cpumap.h" #include "probe-file.h" #include "asm/bug.h" +#include "util/parse-branch-options.h" #define MAX_NAME_LEN 100 @@ -973,10 +974,13 @@ do { \ CHECK_TYPE_VAL(NUM); break; case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE: - /* - * TODO uncomment when the field is available - * attr->branch_sample_type = term->val.num; - */ + CHECK_TYPE_VAL(STR); + if (strcmp(term->val.str, "no") && + parse_branch_str(term->val.str, &attr->branch_sample_type)) { + err->str = strdup("invalid branch sample type"); + err->idx = term->err_val; + return -EINVAL; + } break; case PARSE_EVENTS__TERM_TYPE_TIME: CHECK_TYPE_VAL(NUM); @@ -1119,6 +1123,9 @@ do { \ case PARSE_EVENTS__TERM_TYPE_CALLGRAPH: ADD_CONFIG_TERM(CALLGRAPH, callgraph, term->val.str); break; + case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE: + ADD_CONFIG_TERM(BRANCH, branch, term->val.str); + break; case PARSE_EVENTS__TERM_TYPE_STACKSIZE: ADD_CONFIG_TERM(STACK_USER, stack_user, term->val.num); break; diff --git a/tools/perf/util/perf-hooks-list.h b/tools/perf/util/perf-hooks-list.h new file mode 100644 index 000000000000..2867c07ee84e --- /dev/null +++ b/tools/perf/util/perf-hooks-list.h @@ -0,0 +1,3 @@ +PERF_HOOK(record_start) +PERF_HOOK(record_end) +PERF_HOOK(test) diff --git a/tools/perf/util/perf-hooks.c b/tools/perf/util/perf-hooks.c new file mode 100644 index 000000000000..cb368306b12b --- /dev/null +++ b/tools/perf/util/perf-hooks.c @@ -0,0 +1,88 @@ +/* + * perf_hooks.c + * + * Copyright (C) 2016 Wang Nan <wangnan0@huawei.com> + * Copyright (C) 2016 Huawei Inc. + */ + +#include <errno.h> +#include <stdlib.h> +#include <setjmp.h> +#include <linux/err.h> +#include "util/util.h" +#include "util/debug.h" +#include "util/perf-hooks.h" + +static sigjmp_buf jmpbuf; +static const struct perf_hook_desc *current_perf_hook; + +void perf_hooks__invoke(const struct perf_hook_desc *desc) +{ + if (!(desc && desc->p_hook_func && *desc->p_hook_func)) + return; + + if (sigsetjmp(jmpbuf, 1)) { + pr_warning("Fatal error (SEGFAULT) in perf hook '%s'\n", + desc->hook_name); + *(current_perf_hook->p_hook_func) = NULL; + } else { + current_perf_hook = desc; + (**desc->p_hook_func)(desc->hook_ctx); + } + current_perf_hook = NULL; +} + +void perf_hooks__recover(void) +{ + if (current_perf_hook) + siglongjmp(jmpbuf, 1); +} + +#define PERF_HOOK(name) \ +perf_hook_func_t __perf_hook_func_##name = NULL; \ +struct perf_hook_desc __perf_hook_desc_##name = \ + {.hook_name = #name, \ + .p_hook_func = &__perf_hook_func_##name, \ + .hook_ctx = NULL}; +#include "perf-hooks-list.h" +#undef PERF_HOOK + +#define PERF_HOOK(name) \ + &__perf_hook_desc_##name, + +static struct perf_hook_desc *perf_hooks[] = { +#include "perf-hooks-list.h" +}; +#undef PERF_HOOK + +int perf_hooks__set_hook(const char *hook_name, + perf_hook_func_t hook_func, + void *hook_ctx) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(perf_hooks); i++) { + if (strcmp(hook_name, perf_hooks[i]->hook_name) != 0) + continue; + + if (*(perf_hooks[i]->p_hook_func)) + pr_warning("Overwrite existing hook: %s\n", hook_name); + *(perf_hooks[i]->p_hook_func) = hook_func; + perf_hooks[i]->hook_ctx = hook_ctx; + return 0; + } + return -ENOENT; +} + +perf_hook_func_t perf_hooks__get_hook(const char *hook_name) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(perf_hooks); i++) { + if (strcmp(hook_name, perf_hooks[i]->hook_name) != 0) + continue; + + return *(perf_hooks[i]->p_hook_func); + } + return ERR_PTR(-ENOENT); +} diff --git a/tools/perf/util/perf-hooks.h b/tools/perf/util/perf-hooks.h new file mode 100644 index 000000000000..838d5797bc1e --- /dev/null +++ b/tools/perf/util/perf-hooks.h @@ -0,0 +1,39 @@ +#ifndef PERF_UTIL_PERF_HOOKS_H +#define PERF_UTIL_PERF_HOOKS_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void (*perf_hook_func_t)(void *ctx); +struct perf_hook_desc { + const char * const hook_name; + perf_hook_func_t * const p_hook_func; + void *hook_ctx; +}; + +extern void perf_hooks__invoke(const struct perf_hook_desc *); +extern void perf_hooks__recover(void); + +#define PERF_HOOK(name) \ +extern struct perf_hook_desc __perf_hook_desc_##name; \ +static inline void perf_hooks__invoke_##name(void) \ +{ \ + perf_hooks__invoke(&__perf_hook_desc_##name); \ +} + +#include "perf-hooks-list.h" +#undef PERF_HOOK + +extern int +perf_hooks__set_hook(const char *hook_name, + perf_hook_func_t hook_func, + void *hook_ctx); + +extern perf_hook_func_t +perf_hooks__get_hook(const char *hook_name); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index b1474dcadfa2..dc6ccaa4e927 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -504,6 +504,7 @@ static void pmu_add_cpu_aliases(struct list_head *head) struct pmu_events_map *map; struct pmu_event *pe; char *cpuid; + static bool printed; cpuid = getenv("PERF_CPUID"); if (cpuid) @@ -513,7 +514,10 @@ static void pmu_add_cpu_aliases(struct list_head *head) if (!cpuid) return; - pr_debug("Using CPUID %s\n", cpuid); + if (!printed) { + pr_debug("Using CPUID %s\n", cpuid); + printed = true; + } i = 0; while (1) { @@ -1135,9 +1139,11 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, bool is_cpu = !strcmp(pmu->name, "cpu"); if (event_glob != NULL && - !(strglobmatch(name, event_glob) || - (!is_cpu && strglobmatch(alias->name, - event_glob)))) + !(strglobmatch_nocase(name, event_glob) || + (!is_cpu && strglobmatch_nocase(alias->name, + event_glob)) || + (alias->topic && + strglobmatch_nocase(alias->topic, event_glob)))) continue; if (is_cpu && !name_only && !alias->desc) diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 8091d15113f7..5d4e94061402 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -18,6 +18,8 @@ struct probe_conf { extern struct probe_conf probe_conf; extern bool probe_event_dry_run; +struct symbol; + /* kprobe-tracer and uprobe-tracer tracing point */ struct probe_trace_point { char *realname; /* function real name (if needed) */ diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index b7d4f4aeee61..0546a4304347 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -18,6 +18,7 @@ util/thread_map.c util/util.c util/xyarray.c util/cgroup.c +util/parse-branch-options.c util/rblist.c util/counts.c util/strlist.c diff --git a/tools/perf/util/quote.c b/tools/perf/util/quote.c index 639d1da2f978..293534c1a474 100644 --- a/tools/perf/util/quote.c +++ b/tools/perf/util/quote.c @@ -54,7 +54,7 @@ int sq_quote_argv(struct strbuf *dst, const char** argv, size_t maxlen) break; ret = sq_quote_buf(dst, argv[i]); if (maxlen && dst->len > maxlen) - die("Too many or long arguments"); + return -ENOSPC; } return ret; } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 5d61242a6e64..f268201048a0 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2025,20 +2025,10 @@ out_delete_map: void perf_session__fprintf_info(struct perf_session *session, FILE *fp, bool full) { - struct stat st; - int fd, ret; - if (session == NULL || fp == NULL) return; - fd = perf_data_file__fd(session->file); - - ret = fstat(fd, &st); - if (ret == -1) - return; - fprintf(fp, "# ========\n"); - fprintf(fp, "# captured on: %s", ctime(&st.st_ctime)); perf_header__fprintf_info(session, fp, full); fprintf(fp, "# ========\n#\n"); } diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 452e15a10dd2..df622f4e301e 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -315,7 +315,7 @@ struct sort_entry sort_sym = { /* --sort srcline */ -static char *hist_entry__get_srcline(struct hist_entry *he) +char *hist_entry__get_srcline(struct hist_entry *he) { struct map *map = he->ms.map; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 099c97557d33..7aff317fc7c4 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -280,4 +280,5 @@ int64_t sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right); int64_t sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right); +char *hist_entry__get_srcline(struct hist_entry *he); #endif /* __PERF_SORT_H */ diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index 7f7e072be746..d8dfaf64b32e 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -193,7 +193,8 @@ error: } /* Glob/lazy pattern matching */ -static bool __match_glob(const char *str, const char *pat, bool ignore_space) +static bool __match_glob(const char *str, const char *pat, bool ignore_space, + bool case_ins) { while (*str && *pat && *pat != '*') { if (ignore_space) { @@ -219,8 +220,13 @@ static bool __match_glob(const char *str, const char *pat, bool ignore_space) return false; else if (*pat == '\\') /* Escaped char match as normal char */ pat++; - if (*str++ != *pat++) + if (case_ins) { + if (tolower(*str) != tolower(*pat)) + return false; + } else if (*str != *pat) return false; + str++; + pat++; } /* Check wild card */ if (*pat == '*') { @@ -229,7 +235,7 @@ static bool __match_glob(const char *str, const char *pat, bool ignore_space) if (!*pat) /* Tail wild card matches all */ return true; while (*str) - if (__match_glob(str++, pat, ignore_space)) + if (__match_glob(str++, pat, ignore_space, case_ins)) return true; } return !*str && !*pat; @@ -249,7 +255,12 @@ static bool __match_glob(const char *str, const char *pat, bool ignore_space) */ bool strglobmatch(const char *str, const char *pat) { - return __match_glob(str, pat, false); + return __match_glob(str, pat, false, false); +} + +bool strglobmatch_nocase(const char *str, const char *pat) +{ + return __match_glob(str, pat, false, true); } /** @@ -262,7 +273,7 @@ bool strglobmatch(const char *str, const char *pat) */ bool strlazymatch(const char *str, const char *pat) { - return __match_glob(str, pat, true); + return __match_glob(str, pat, true, false); } /** diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index aecff69a510d..df2482b2ba45 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1962,7 +1962,7 @@ static bool symbol__read_kptr_restrict(void) char line[8]; if (fgets(line, sizeof(line), fp) != NULL) - value = (geteuid() != 0) ? + value = ((geteuid() != 0) || (getuid() != 0)) ? (atoi(line) != 0) : (atoi(line) == 2); @@ -2032,6 +2032,10 @@ int symbol__init(struct perf_env *env) symbol_conf.sym_list_str, "symbol") < 0) goto out_free_tid_list; + if (setup_list(&symbol_conf.bt_stop_list, + symbol_conf.bt_stop_list_str, "symbol") < 0) + goto out_free_sym_list; + /* * A path to symbols of "/" is identical to "" * reset here for simplicity. @@ -2049,6 +2053,8 @@ int symbol__init(struct perf_env *env) symbol_conf.initialized = true; return 0; +out_free_sym_list: + strlist__delete(symbol_conf.sym_list); out_free_tid_list: intlist__delete(symbol_conf.tid_list); out_free_pid_list: @@ -2064,6 +2070,7 @@ void symbol__exit(void) { if (!symbol_conf.initialized) return; + strlist__delete(symbol_conf.bt_stop_list); strlist__delete(symbol_conf.sym_list); strlist__delete(symbol_conf.dso_list); strlist__delete(symbol_conf.comm_list); @@ -2071,6 +2078,7 @@ void symbol__exit(void) intlist__delete(symbol_conf.pid_list); vmlinux_path__exit(); symbol_conf.sym_list = symbol_conf.dso_list = symbol_conf.comm_list = NULL; + symbol_conf.bt_stop_list = NULL; symbol_conf.initialized = false; } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index d964844eb314..6c358b7ed336 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -58,6 +58,7 @@ struct symbol { u16 namelen; u8 binding; u8 idle:1; + u8 ignore:1; u8 arch_sym; char name[0]; }; @@ -100,6 +101,7 @@ struct symbol_conf { show_total_period, use_callchain, cumulate_callchain, + show_branchflag_count, exclude_other, show_cpu_utilization, initialized, @@ -130,14 +132,16 @@ struct symbol_conf { *pid_list_str, *tid_list_str, *sym_list_str, - *col_width_list_str; + *col_width_list_str, + *bt_stop_list_str; struct strlist *dso_list, *comm_list, *sym_list, *dso_from_list, *dso_to_list, *sym_from_list, - *sym_to_list; + *sym_to_list, + *bt_stop_list; struct intlist *pid_list, *tid_list; const char *symfs; @@ -281,7 +285,8 @@ int symbol__annotation_init(void); struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name); size_t __symbol__fprintf_symname_offs(const struct symbol *sym, const struct addr_location *al, - bool unknown_as_addr, FILE *fp); + bool unknown_as_addr, + bool print_offsets, FILE *fp); size_t symbol__fprintf_symname_offs(const struct symbol *sym, const struct addr_location *al, FILE *fp); size_t __symbol__fprintf_symname(const struct symbol *sym, diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c index a680bdaa65dc..7c6b33e8e2d2 100644 --- a/tools/perf/util/symbol_fprintf.c +++ b/tools/perf/util/symbol_fprintf.c @@ -15,14 +15,15 @@ size_t symbol__fprintf(struct symbol *sym, FILE *fp) size_t __symbol__fprintf_symname_offs(const struct symbol *sym, const struct addr_location *al, - bool unknown_as_addr, FILE *fp) + bool unknown_as_addr, + bool print_offsets, FILE *fp) { unsigned long offset; size_t length; if (sym && sym->name) { length = fprintf(fp, "%s", sym->name); - if (al) { + if (al && print_offsets) { if (al->addr < sym->end) offset = al->addr - sym->start; else @@ -40,19 +41,19 @@ size_t symbol__fprintf_symname_offs(const struct symbol *sym, const struct addr_location *al, FILE *fp) { - return __symbol__fprintf_symname_offs(sym, al, false, fp); + return __symbol__fprintf_symname_offs(sym, al, false, true, fp); } size_t __symbol__fprintf_symname(const struct symbol *sym, const struct addr_location *al, bool unknown_as_addr, FILE *fp) { - return __symbol__fprintf_symname_offs(sym, al, unknown_as_addr, fp); + return __symbol__fprintf_symname_offs(sym, al, unknown_as_addr, false, fp); } size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp) { - return __symbol__fprintf_symname_offs(sym, NULL, false, fp); + return __symbol__fprintf_symname_offs(sym, NULL, false, false, fp); } size_t dso__fprintf_symbols_by_name(struct dso *dso, diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c new file mode 100644 index 000000000000..d1b21c72206d --- /dev/null +++ b/tools/perf/util/time-utils.c @@ -0,0 +1,119 @@ +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> +#include <linux/time64.h> +#include <time.h> +#include <errno.h> +#include <inttypes.h> + +#include "perf.h" +#include "debug.h" +#include "time-utils.h" + +int parse_nsec_time(const char *str, u64 *ptime) +{ + u64 time_sec, time_nsec; + char *end; + + time_sec = strtoul(str, &end, 10); + if (*end != '.' && *end != '\0') + return -1; + + if (*end == '.') { + int i; + char nsec_buf[10]; + + if (strlen(++end) > 9) + return -1; + + strncpy(nsec_buf, end, 9); + nsec_buf[9] = '\0'; + + /* make it nsec precision */ + for (i = strlen(nsec_buf); i < 9; i++) + nsec_buf[i] = '0'; + + time_nsec = strtoul(nsec_buf, &end, 10); + if (*end != '\0') + return -1; + } else + time_nsec = 0; + + *ptime = time_sec * NSEC_PER_SEC + time_nsec; + return 0; +} + +static int parse_timestr_sec_nsec(struct perf_time_interval *ptime, + char *start_str, char *end_str) +{ + if (start_str && (*start_str != '\0') && + (parse_nsec_time(start_str, &ptime->start) != 0)) { + return -1; + } + + if (end_str && (*end_str != '\0') && + (parse_nsec_time(end_str, &ptime->end) != 0)) { + return -1; + } + + return 0; +} + +int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr) +{ + char *start_str, *end_str; + char *d, *str; + int rc = 0; + + if (ostr == NULL || *ostr == '\0') + return 0; + + /* copy original string because we need to modify it */ + str = strdup(ostr); + if (str == NULL) + return -ENOMEM; + + ptime->start = 0; + ptime->end = 0; + + /* str has the format: <start>,<stop> + * variations: <start>, + * ,<stop> + * , + */ + start_str = str; + d = strchr(start_str, ','); + if (d) { + *d = '\0'; + ++d; + } + end_str = d; + + rc = parse_timestr_sec_nsec(ptime, start_str, end_str); + + free(str); + + /* make sure end time is after start time if it was given */ + if (rc == 0 && ptime->end && ptime->end < ptime->start) + return -EINVAL; + + pr_debug("start time %" PRIu64 ", ", ptime->start); + pr_debug("end time %" PRIu64 "\n", ptime->end); + + return rc; +} + +bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp) +{ + /* if time is not set don't drop sample */ + if (timestamp == 0) + return false; + + /* otherwise compare sample time to time window */ + if ((ptime->start && timestamp < ptime->start) || + (ptime->end && timestamp > ptime->end)) { + return true; + } + + return false; +} diff --git a/tools/perf/util/time-utils.h b/tools/perf/util/time-utils.h new file mode 100644 index 000000000000..c1f197c4af6c --- /dev/null +++ b/tools/perf/util/time-utils.h @@ -0,0 +1,14 @@ +#ifndef _TIME_UTILS_H_ +#define _TIME_UTILS_H_ + +struct perf_time_interval { + u64 start, end; +}; + +int parse_nsec_time(const char *str, u64 *ptime); + +int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr); + +bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp); + +#endif diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index 9df61059a85d..0ac9077f62a2 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -25,6 +25,7 @@ #include <errno.h> #include "../perf.h" +#include "debug.h" #include "util.h" #include "trace-event.h" @@ -86,16 +87,15 @@ struct scripting_ops python_scripting_unsupported_ops = { static void register_python_scripting(struct scripting_ops *scripting_ops) { - int err; - err = script_spec_register("Python", scripting_ops); - if (err) - die("error registering Python script extension"); - - err = script_spec_register("py", scripting_ops); - if (err) - die("error registering py script extension"); - - scripting_context = malloc(sizeof(struct scripting_context)); + if (scripting_context == NULL) + scripting_context = malloc(sizeof(*scripting_context)); + + if (scripting_context == NULL || + script_spec_register("Python", scripting_ops) || + script_spec_register("py", scripting_ops)) { + pr_err("Error registering Python script extension: disabling it\n"); + zfree(&scripting_context); + } } #ifdef NO_LIBPYTHON @@ -150,16 +150,15 @@ struct scripting_ops perl_scripting_unsupported_ops = { static void register_perl_scripting(struct scripting_ops *scripting_ops) { - int err; - err = script_spec_register("Perl", scripting_ops); - if (err) - die("error registering Perl script extension"); - - err = script_spec_register("pl", scripting_ops); - if (err) - die("error registering pl script extension"); - - scripting_context = malloc(sizeof(struct scripting_context)); + if (scripting_context == NULL) + scripting_context = malloc(sizeof(*scripting_context)); + + if (scripting_context == NULL || + script_spec_register("Perl", scripting_ops) || + script_spec_register("pl", scripting_ops)) { + pr_err("Error registering Perl script extension: disabling it\n"); + zfree(&scripting_context); + } } #ifdef NO_LIBPERL diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 20c2e5743903..6fec84dff3f7 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -357,8 +357,8 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi, di.format = UNW_INFO_FORMAT_REMOTE_TABLE; di.start_ip = map->start; di.end_ip = map->end; - di.u.rti.segbase = map->start + segbase; - di.u.rti.table_data = map->start + table_data; + di.u.rti.segbase = map->start + segbase - map->pgoff; + di.u.rti.table_data = map->start + table_data - map->pgoff; di.u.rti.table_len = fde_count * sizeof(struct table_entry) / sizeof(unw_word_t); ret = dwarf_search_unwind_table(as, ip, &di, pi, diff --git a/tools/perf/util/util-cxx.h b/tools/perf/util/util-cxx.h new file mode 100644 index 000000000000..0e0e019c9f34 --- /dev/null +++ b/tools/perf/util/util-cxx.h @@ -0,0 +1,26 @@ +/* + * Support C++ source use utilities defined in util.h + */ + +#ifndef PERF_UTIL_UTIL_CXX_H +#define PERF_UTIL_UTIL_CXX_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Now 'new' is the only C++ keyword found in util.h: + * in tools/include/linux/rbtree.h + * + * Other keywords, like class and delete, should be + * redefined if necessary. + */ +#define new _new +#include "util.h" +#undef new + +#ifdef __cplusplus +} +#endif +#endif diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 85c56800f17a..9ddd98827d12 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -400,37 +400,12 @@ void sighandler_dump_stack(int sig) raise(sig); } -int parse_nsec_time(const char *str, u64 *ptime) +int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz) { - u64 time_sec, time_nsec; - char *end; + u64 sec = timestamp / NSEC_PER_SEC; + u64 usec = (timestamp % NSEC_PER_SEC) / NSEC_PER_USEC; - time_sec = strtoul(str, &end, 10); - if (*end != '.' && *end != '\0') - return -1; - - if (*end == '.') { - int i; - char nsec_buf[10]; - - if (strlen(++end) > 9) - return -1; - - strncpy(nsec_buf, end, 9); - nsec_buf[9] = '\0'; - - /* make it nsec precision */ - for (i = strlen(nsec_buf); i < 9; i++) - nsec_buf[i] = '0'; - - time_nsec = strtoul(nsec_buf, &end, 10); - if (*end != '\0') - return -1; - } else - time_nsec = 0; - - *ptime = time_sec * NSEC_PER_SEC + time_nsec; - return 0; + return scnprintf(buf, sz, "%"PRIu64".%06"PRIu64, sec, usec); } unsigned long parse_tag_value(const char *str, struct parse_tag *tags) @@ -629,12 +604,63 @@ bool find_process(const char *name) return ret ? false : true; } +static int +fetch_ubuntu_kernel_version(unsigned int *puint) +{ + ssize_t len; + size_t line_len = 0; + char *ptr, *line = NULL; + int version, patchlevel, sublevel, err; + FILE *vsig = fopen("/proc/version_signature", "r"); + + if (!vsig) { + pr_debug("Open /proc/version_signature failed: %s\n", + strerror(errno)); + return -1; + } + + len = getline(&line, &line_len, vsig); + fclose(vsig); + err = -1; + if (len <= 0) { + pr_debug("Reading from /proc/version_signature failed: %s\n", + strerror(errno)); + goto errout; + } + + ptr = strrchr(line, ' '); + if (!ptr) { + pr_debug("Parsing /proc/version_signature failed: %s\n", line); + goto errout; + } + + err = sscanf(ptr + 1, "%d.%d.%d", + &version, &patchlevel, &sublevel); + if (err != 3) { + pr_debug("Unable to get kernel version from /proc/version_signature '%s'\n", + line); + goto errout; + } + + if (puint) + *puint = (version << 16) + (patchlevel << 8) + sublevel; + err = 0; +errout: + free(line); + return err; +} + int fetch_kernel_version(unsigned int *puint, char *str, size_t str_size) { struct utsname utsname; int version, patchlevel, sublevel, err; + bool int_ver_ready = false; + + if (access("/proc/version_signature", R_OK) == 0) + if (!fetch_ubuntu_kernel_version(puint)) + int_ver_ready = true; if (uname(&utsname)) return -1; @@ -648,12 +674,12 @@ fetch_kernel_version(unsigned int *puint, char *str, &version, &patchlevel, &sublevel); if (err != 3) { - pr_debug("Unablt to get kernel version from uname '%s'\n", + pr_debug("Unable to get kernel version from uname '%s'\n", utsname.release); return -1; } - if (puint) + if (puint && !int_ver_ready) *puint = (version << 16) + (patchlevel << 8) + sublevel; return 0; } diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 43899e0d6fa1..1d639e38aa82 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -179,8 +179,6 @@ static inline void *zalloc(size_t size) #undef tolower #undef toupper -int parse_nsec_time(const char *str, u64 *ptime); - extern unsigned char sane_ctype[256]; #define GIT_SPACE 0x01 #define GIT_DIGIT 0x02 @@ -222,6 +220,7 @@ s64 perf_atoll(const char *str); char **argv_split(const char *str, int *argcp); void argv_free(char **argv); bool strglobmatch(const char *str, const char *pat); +bool strglobmatch_nocase(const char *str, const char *pat); bool strlazymatch(const char *str, const char *pat); static inline bool strisglob(const char *str) { @@ -361,4 +360,7 @@ extern int sched_getcpu(void); #endif int is_printable_array(char *p, unsigned int len); + +int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz); + #endif /* GIT_COMPAT_UTIL_H */ diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c index 0fb3c1fcd3e6..5074be4ed467 100644 --- a/tools/perf/util/values.c +++ b/tools/perf/util/values.c @@ -2,15 +2,18 @@ #include "util.h" #include "values.h" +#include "debug.h" -void perf_read_values_init(struct perf_read_values *values) +int perf_read_values_init(struct perf_read_values *values) { values->threads_max = 16; values->pid = malloc(values->threads_max * sizeof(*values->pid)); values->tid = malloc(values->threads_max * sizeof(*values->tid)); values->value = malloc(values->threads_max * sizeof(*values->value)); - if (!values->pid || !values->tid || !values->value) - die("failed to allocate read_values threads arrays"); + if (!values->pid || !values->tid || !values->value) { + pr_debug("failed to allocate read_values threads arrays"); + goto out_free_pid; + } values->threads = 0; values->counters_max = 16; @@ -18,9 +21,22 @@ void perf_read_values_init(struct perf_read_values *values) * sizeof(*values->counterrawid)); values->countername = malloc(values->counters_max * sizeof(*values->countername)); - if (!values->counterrawid || !values->countername) - die("failed to allocate read_values counters arrays"); + if (!values->counterrawid || !values->countername) { + pr_debug("failed to allocate read_values counters arrays"); + goto out_free_counter; + } values->counters = 0; + + return 0; + +out_free_counter: + zfree(&values->counterrawid); + zfree(&values->countername); +out_free_pid: + zfree(&values->pid); + zfree(&values->tid); + zfree(&values->value); + return -ENOMEM; } void perf_read_values_destroy(struct perf_read_values *values) @@ -41,17 +57,27 @@ void perf_read_values_destroy(struct perf_read_values *values) zfree(&values->countername); } -static void perf_read_values__enlarge_threads(struct perf_read_values *values) +static int perf_read_values__enlarge_threads(struct perf_read_values *values) { - values->threads_max *= 2; - values->pid = realloc(values->pid, - values->threads_max * sizeof(*values->pid)); - values->tid = realloc(values->tid, - values->threads_max * sizeof(*values->tid)); - values->value = realloc(values->value, - values->threads_max * sizeof(*values->value)); - if (!values->pid || !values->tid || !values->value) - die("failed to enlarge read_values threads arrays"); + int nthreads_max = values->threads_max * 2; + void *npid = realloc(values->pid, nthreads_max * sizeof(*values->pid)), + *ntid = realloc(values->tid, nthreads_max * sizeof(*values->tid)), + *nvalue = realloc(values->value, nthreads_max * sizeof(*values->value)); + + if (!npid || !ntid || !nvalue) + goto out_err; + + values->threads_max = nthreads_max; + values->pid = npid; + values->tid = ntid; + values->value = nvalue; + return 0; +out_err: + free(npid); + free(ntid); + free(nvalue); + pr_debug("failed to enlarge read_values threads arrays"); + return -ENOMEM; } static int perf_read_values__findnew_thread(struct perf_read_values *values, @@ -63,15 +89,21 @@ static int perf_read_values__findnew_thread(struct perf_read_values *values, if (values->pid[i] == pid && values->tid[i] == tid) return i; - if (values->threads == values->threads_max) - perf_read_values__enlarge_threads(values); + if (values->threads == values->threads_max) { + i = perf_read_values__enlarge_threads(values); + if (i < 0) + return i; + } - i = values->threads++; + i = values->threads + 1; + values->value[i] = malloc(values->counters_max * sizeof(**values->value)); + if (!values->value[i]) { + pr_debug("failed to allocate read_values counters array"); + return -ENOMEM; + } values->pid[i] = pid; values->tid[i] = tid; - values->value[i] = malloc(values->counters_max * sizeof(**values->value)); - if (!values->value[i]) - die("failed to allocate read_values counters array"); + values->threads = i; return i; } @@ -115,16 +147,21 @@ static int perf_read_values__findnew_counter(struct perf_read_values *values, return i; } -void perf_read_values_add_value(struct perf_read_values *values, +int perf_read_values_add_value(struct perf_read_values *values, u32 pid, u32 tid, u64 rawid, const char *name, u64 value) { int tindex, cindex; tindex = perf_read_values__findnew_thread(values, pid, tid); + if (tindex < 0) + return tindex; cindex = perf_read_values__findnew_counter(values, rawid, name); + if (cindex < 0) + return cindex; values->value[tindex][cindex] = value; + return 0; } static void perf_read_values__display_pretty(FILE *fp, diff --git a/tools/perf/util/values.h b/tools/perf/util/values.h index b21a80c6cf8d..808ff9c73bf5 100644 --- a/tools/perf/util/values.h +++ b/tools/perf/util/values.h @@ -14,10 +14,10 @@ struct perf_read_values { u64 **value; }; -void perf_read_values_init(struct perf_read_values *values); +int perf_read_values_init(struct perf_read_values *values); void perf_read_values_destroy(struct perf_read_values *values); -void perf_read_values_add_value(struct perf_read_values *values, +int perf_read_values_add_value(struct perf_read_values *values, u32 pid, u32 tid, u64 rawid, const char *name, u64 value); |