From 94f0705eee70cb256d21c9abe7ce44ffbe093555 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 10 May 2023 23:27:24 -0700 Subject: perf annotate: Parse x86 SIB addressing properly When the source argument of the "mov" instruction looks like below, it didn't parse the whole operand and just stopped at the first comma. mov (%rbx,%rax,1),%rcx Fix it by checking the parentheses and move it to the closing one. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230511062725.514752-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'tools/perf/util/annotate.c') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 11992cfe271c..b708bbc49c9e 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -558,6 +558,19 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_sy return -1; *s = '\0'; + + /* + * x86 SIB addressing has something like 0x8(%rax, %rcx, 1) + * then it needs to have the closing parenthesis. + */ + if (strchr(ops->raw, '(')) { + *s = ','; + s = strchr(ops->raw, ')'); + if (s == NULL || s[1] != ',') + return -1; + *++s = '\0'; + } + ops->source.raw = strdup(ops->raw); *s = ','; -- cgit v1.2.3 From d0b35979986e3bd03cb2f2e887e0b8036ae06198 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 24 May 2023 13:50:53 -0700 Subject: perf annotate: Handle x86 instruction suffix generally In AT&T asm syntax, most of x86 instructions can have size suffix like b, w, l or q. Instead of adding all these instructions in the table, we can handle them in a general way. For example, it can try to find an instruction as is. If not found, assuming it has a suffix and it'd try again without the suffix if it's one of the allowed suffixes. This way, we can reduce the instruction table size for duplicated entries of the same instructions with a different suffix. If an instruction xyz and others like xyz are completely different ones, then they both need to be listed in the table so that they can be found before the second attempt (without the suffix). Reviewed-by: Adrian Hunter Reviewed-by: Masami Hiramatsu Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230524205054.3087004-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'tools/perf/util/annotate.c') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index b708bbc49c9e..7f05f2a2aa83 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -70,6 +70,7 @@ struct arch { struct ins_ops *(*associate_instruction_ops)(struct arch *arch, const char *name); bool sorted_instructions; bool initialized; + const char *insn_suffix; void *priv; unsigned int model; unsigned int family; @@ -179,6 +180,7 @@ static struct arch architectures[] = { .init = x86__annotate_init, .instructions = x86__instructions, .nr_instructions = ARRAY_SIZE(x86__instructions), + .insn_suffix = "bwlq", .objdump = { .comment_char = '#', }, @@ -720,6 +722,26 @@ static struct ins_ops *__ins__find(struct arch *arch, const char *name) } ins = bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp); + if (ins) + return ins->ops; + + if (arch->insn_suffix) { + char tmp[32]; + char suffix; + size_t len = strlen(name); + + if (len == 0 || len >= sizeof(tmp)) + return NULL; + + suffix = name[len - 1]; + if (strchr(arch->insn_suffix, suffix) == NULL) + return NULL; + + strcpy(tmp, name); + tmp[len - 1] = '\0'; /* remove the suffix and check again */ + + ins = bsearch(tmp, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp); + } return ins ? ins->ops : NULL; } -- cgit v1.2.3 From d3d53b2e9617ea606aae91a013163895f037de96 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 8 Jun 2023 16:28:18 -0700 Subject: perf annotate: Fix parse_objdump_line memory leak fileloc is used to hold a previous line, before overwriting it ensure the previous contents is freed. Free the storage once done in symbol__disassemble. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ali Saidi Cc: Andi Kleen Cc: Athira Rajeev Cc: Brian Robbins Cc: Changbin Du Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: Fangrui Song Cc: German Gomez Cc: Ingo Molnar Cc: Ivan Babrou Cc: James Clark Cc: Jing Zhang Cc: Jiri Olsa Cc: John Garry Cc: K Prateek Nayak Cc: Kan Liang Cc: Leo Yan Cc: Liam Howlett Cc: Mark Rutland Cc: Miguel Ojeda Cc: Mike Leach Cc: Namhyung Kim Cc: Naveen N. Rao Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Suzuki Poulouse Cc: Wenyu Liu Cc: Will Deacon Cc: Yang Jihong Cc: Ye Xingchen Cc: Yuan Can Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230608232823.4027869-22-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools/perf/util/annotate.c') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 7f05f2a2aa83..57ef616cdbfd 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1546,6 +1546,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, /* /filename:linenr ? Save line number and ignore. */ if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) { *line_nr = atoi(parsed_line + match[1].rm_so); + free(*fileloc); *fileloc = strdup(parsed_line); return 0; } @@ -1594,7 +1595,6 @@ static int symbol__parse_objdump_line(struct symbol *sym, } annotation_line__add(&dl->al, ¬es->src->source); - return 0; } @@ -2136,6 +2136,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) nline++; } free(line); + free(fileloc); err = finish_command(&objdump_process); if (err) -- cgit v1.2.3 From 625db36e6c53b39c664b7fcb509207d26ac58ea6 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 8 Jun 2023 16:28:22 -0700 Subject: perf srcline: Change free_srcline to zfree_srcline Make use after free more unlikely. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ali Saidi Cc: Andi Kleen Cc: Athira Rajeev Cc: Brian Robbins Cc: Changbin Du Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: Fangrui Song Cc: German Gomez Cc: Ingo Molnar Cc: Ivan Babrou Cc: James Clark Cc: Jing Zhang Cc: Jiri Olsa Cc: John Garry Cc: K Prateek Nayak Cc: Kan Liang Cc: Leo Yan Cc: Liam Howlett Cc: Mark Rutland Cc: Miguel Ojeda Cc: Mike Leach Cc: Namhyung Kim Cc: Naveen N. Rao Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Suzuki Poulouse Cc: Wenyu Liu Cc: Will Deacon Cc: Yang Jihong Cc: Ye Xingchen Cc: Yuan Can Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230608232823.4027869-26-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 4 ++-- tools/perf/util/annotate.c | 2 +- tools/perf/util/block-info.c | 4 ++-- tools/perf/util/hist.c | 6 +++--- tools/perf/util/map.c | 2 +- tools/perf/util/srcline.c | 15 ++++++++++----- tools/perf/util/srcline.h | 2 +- 7 files changed, 20 insertions(+), 15 deletions(-) (limited to 'tools/perf/util/annotate.c') diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index ca39657ee407..eec89567ae48 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -1387,8 +1387,8 @@ static int cycles_printf(struct hist_entry *he, struct hist_entry *pair, bi->start, bi->end, block_he->diff.cycles); } - free_srcline(start_line); - free_srcline(end_line); + zfree_srcline(&start_line); + zfree_srcline(&end_line); return scnprintf(hpp->buf, hpp->size, "%*s", width, buf); } diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 57ef616cdbfd..bde890cfa620 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1218,7 +1218,7 @@ static void annotation_line__init(struct annotation_line *al, static void annotation_line__exit(struct annotation_line *al) { - free_srcline(al->path); + zfree_srcline(&al->path); zfree(&al->line); } diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c index 16a7b4adcf18..08279b1b65e5 100644 --- a/tools/perf/util/block-info.c +++ b/tools/perf/util/block-info.c @@ -305,8 +305,8 @@ static int block_range_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, bi->start, bi->end); } - free_srcline(start_line); - free_srcline(end_line); + zfree_srcline(&start_line); + zfree_srcline(&end_line); return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf); } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index efaf7ac784fc..be2c134d672f 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1323,8 +1323,8 @@ void hist_entry__delete(struct hist_entry *he) if (he->branch_info) { map__zput(he->branch_info->from.ms.map); map__zput(he->branch_info->to.ms.map); - free_srcline(he->branch_info->srcline_from); - free_srcline(he->branch_info->srcline_to); + zfree_srcline(&he->branch_info->srcline_from); + zfree_srcline(&he->branch_info->srcline_to); zfree(&he->branch_info); } @@ -1342,7 +1342,7 @@ void hist_entry__delete(struct hist_entry *he) zfree(&he->res_samples); zfree(&he->stat_acc); - free_srcline(he->srcline); + zfree_srcline(&he->srcline); if (he->srcfile && he->srcfile[0]) zfree(&he->srcfile); free_callchain(he->callchain); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index ae1d54d4880a..c77e2fce6a37 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -498,7 +498,7 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, char *srcline = map__srcline(map, addr, NULL); if (strncmp(srcline, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0) ret = fprintf(fp, "%s%s", prefix, srcline); - free_srcline(srcline); + zfree_srcline(&srcline); } return ret; } diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index cfca03abd6f8..b8e596528d7e 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -804,10 +804,15 @@ out: return NULL; } -void free_srcline(char *srcline) +void zfree_srcline(char **srcline) { - if (srcline && strcmp(srcline, SRCLINE_UNKNOWN) != 0) - free(srcline); + if (*srcline == NULL) + return; + + if (strcmp(*srcline, SRCLINE_UNKNOWN)) + free(*srcline); + + *srcline = NULL; } char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, @@ -880,7 +885,7 @@ void srcline__tree_delete(struct rb_root_cached *tree) pos = rb_entry(next, struct srcline_node, rb_node); next = rb_next(&pos->rb_node); rb_erase_cached(&pos->rb_node, tree); - free_srcline(pos->srcline); + zfree_srcline(&pos->srcline); zfree(&pos); } } @@ -903,7 +908,7 @@ void inline_node__delete(struct inline_node *node) list_for_each_entry_safe(ilist, tmp, &node->val, list) { list_del_init(&ilist->list); - free_srcline(ilist->srcline); + zfree_srcline(&ilist->srcline); /* only the inlined symbols are owned by the list */ if (ilist->symbol && ilist->symbol->inlined) symbol__delete(ilist->symbol); diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h index b11a0aaaa676..a15c7db9058e 100644 --- a/tools/perf/util/srcline.h +++ b/tools/perf/util/srcline.h @@ -15,7 +15,7 @@ char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, bool show_sym, bool show_addr, bool unwind_inlines, u64 ip); -void free_srcline(char *srcline); +void zfree_srcline(char **srcline); char *get_srcline_split(struct dso *dso, u64 addr, unsigned *line); /* insert the srcline into the DSO, which will take ownership */ -- cgit v1.2.3 From 103b3d2f94732fb1bc796e68e4cdfbcd731bbeaa Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 12 Jun 2023 16:00:26 -0700 Subject: perf annotate: Allow whitespace between insn operands The llvm-objdump adds a space between the operands while GNU objdump does not. Allow a space to handle the both. In GNU objdump: Disassembly of section .text: here | ffffffff81000000 <_stext>: v ffffffff81000000: 48 8d 25 51 1f 40 01 lea 0x1401f51(%rip),%rsp ffffffff81000007: e8 d4 00 00 00 call ffffffff810000e0 ffffffff8100000c: 48 8d 3d ed ff ff ff lea -0x13(%rip),%rdi In llvm-objdump: Disassembly of section .text: here | ffffffff81000000 : v ffffffff81000000: 48 8d 25 51 1f 40 01 leaq 20979537(%rip), %rsp ffffffff81000007: e8 d4 00 00 00 callq 0xffffffff810000e0 ffffffff8100000c: 48 8d 3d ed ff ff ff leaq -19(%rip), %rdi Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Ingo Molnar Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230612230026.3887586-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/util/annotate.c') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index bde890cfa620..cdd1924a4418 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -579,7 +579,7 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_sy if (ops->source.raw == NULL) return -1; - target = ++s; + target = skip_spaces(++s); comment = strchr(s, arch->objdump.comment_char); if (comment != NULL) -- cgit v1.2.3 From 66dc1920f6bbc172ee35520d024977d5330df842 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Fri, 25 Nov 2022 12:42:09 +0100 Subject: perf annotate: Work with vmlinux outside symfs It is currently possible to use --symfs along with a vmlinux which lies outside of the symfs by passing an absolute path to --vmlinux, thanks to the check in dso__load_vmlinux() which handles this explicitly. However, the annotate code lacks this check and thus 'perf annotate' does not work ("Internal error: Invalid -1 error code") for kernel functions with this combination. Add the missing handling. Signed-off-by: Vincent Whitchurch Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: kernel@axis.com Link: https://lore.kernel.org/r/20221125114210.2353820-1-vincent.whitchurch@axis.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools/perf/util/annotate.c') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index cdd1924a4418..43865601f96c 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1731,7 +1731,10 @@ fallback: * cache, or is just a kallsyms file, well, lets hope that this * DSO is the same as when 'perf record' ran. */ - __symbol__join_symfs(filename, filename_size, dso->long_name); + if (dso->kernel && dso->long_name[0] == '/') + snprintf(filename, filename_size, "%s", dso->long_name); + else + __symbol__join_symfs(filename, filename_size, dso->long_name); mutex_lock(&dso->lock); if (access(filename, R_OK) && errno == ENOENT && dso->nsinfo) { -- cgit v1.2.3 From 2e9f9d4a729f12b4bc3fa60406374327b1809abe Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 14 Jun 2023 21:07:15 -0700 Subject: perf annotation: Switch lock from a mutex to a sharded_mutex Remove the "struct mutex lock" variable from annotation that is allocated per symbol. This removes in the region of 40 bytes per symbol allocation. Use a sharded mutex where the number of shards is set to the number of CPUs. Assuming good hashing of the annotation (done based on the pointer), this means in order to contend there needs to be more threads than CPUs, which is not currently true in any perf command. Were contention an issue it is straightforward to increase the number of shards in the mutex. On my Debian/glibc based machine, this reduces the size of struct annotation from 136 bytes to 96 bytes, or nearly 30%. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Andres Freund Cc: Mark Rutland Cc: Yuan Can Cc: Peter Zijlstra Cc: Adrian Hunter Cc: Arnaldo Carvalho de Melo Cc: Huacai Chen Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Alexander Shishkin Cc: Kan Liang Cc: Ingo Molnar Link: https://lore.kernel.org/r/20230615040715.2064350-2-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/builtin-top.c | 14 ++++----- tools/perf/ui/browsers/annotate.c | 10 +++--- tools/perf/util/annotate.c | 66 +++++++++++++++++++++++++++++++++------ tools/perf/util/annotate.h | 11 +++++-- 4 files changed, 77 insertions(+), 24 deletions(-) (limited to 'tools/perf/util/annotate.c') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index c363c04e16df..1baa2acb3ced 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -137,10 +137,10 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) } notes = symbol__annotation(sym); - mutex_lock(¬es->lock); + annotation__lock(notes); if (!symbol__hists(sym, top->evlist->core.nr_entries)) { - mutex_unlock(¬es->lock); + annotation__unlock(notes); pr_err("Not enough memory for annotating '%s' symbol!\n", sym->name); sleep(1); @@ -156,7 +156,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) pr_err("Couldn't annotate %s: %s\n", sym->name, msg); } - mutex_unlock(¬es->lock); + annotation__unlock(notes); return err; } @@ -211,12 +211,12 @@ static void perf_top__record_precise_ip(struct perf_top *top, notes = symbol__annotation(sym); - if (!mutex_trylock(¬es->lock)) + if (!annotation__trylock(notes)) return; err = hist_entry__inc_addr_samples(he, sample, evsel, ip); - mutex_unlock(¬es->lock); + annotation__unlock(notes); if (unlikely(err)) { /* @@ -253,7 +253,7 @@ static void perf_top__show_details(struct perf_top *top) symbol = he->ms.sym; notes = symbol__annotation(symbol); - mutex_lock(¬es->lock); + annotation__lock(notes); symbol__calc_percent(symbol, evsel); @@ -274,7 +274,7 @@ static void perf_top__show_details(struct perf_top *top) if (more != 0) printf("%d lines not displayed, maybe increase display entries [e]\n", more); out_unlock: - mutex_unlock(¬es->lock); + annotation__unlock(notes); } static void perf_top__resort_hists(struct perf_top *t) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 70bad42b807b..ccdb2cd11fbf 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -314,7 +314,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, browser->entries = RB_ROOT; - mutex_lock(¬es->lock); + annotation__lock(notes); symbol__calc_percent(sym, evsel); @@ -343,7 +343,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, } disasm_rb_tree__insert(browser, &pos->al); } - mutex_unlock(¬es->lock); + annotation__unlock(notes); browser->curr_hot = rb_last(&browser->entries); } @@ -470,10 +470,10 @@ static bool annotate_browser__callq(struct annotate_browser *browser, } notes = symbol__annotation(dl->ops.target.sym); - mutex_lock(¬es->lock); + annotation__lock(notes); if (!symbol__hists(dl->ops.target.sym, evsel->evlist->core.nr_entries)) { - mutex_unlock(¬es->lock); + annotation__unlock(notes); ui__warning("Not enough memory for annotating '%s' symbol!\n", dl->ops.target.sym->name); return true; @@ -482,7 +482,7 @@ static bool annotate_browser__callq(struct annotate_browser *browser, target_ms.maps = ms->maps; target_ms.map = ms->map; target_ms.sym = dl->ops.target.sym; - mutex_unlock(¬es->lock); + annotation__unlock(notes); symbol__tui_annotate(&target_ms, evsel, hbt, browser->opts); sym_title(ms->sym, ms->map, title, sizeof(title), browser->opts->percent_type); ui_browser__show_title(&browser->b, title); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 43865601f96c..77c816400719 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -32,6 +32,7 @@ #include "block-range.h" #include "string2.h" #include "util/event.h" +#include "util/sharded_mutex.h" #include "arch/common.h" #include "namespaces.h" #include @@ -856,7 +857,7 @@ void symbol__annotate_zero_histograms(struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); - mutex_lock(¬es->lock); + annotation__lock(notes); if (notes->src != NULL) { memset(notes->src->histograms, 0, notes->src->nr_histograms * notes->src->sizeof_sym_hist); @@ -864,7 +865,7 @@ void symbol__annotate_zero_histograms(struct symbol *sym) memset(notes->src->cycles_hist, 0, symbol__size(sym) * sizeof(struct cyc_hist)); } - mutex_unlock(¬es->lock); + annotation__unlock(notes); } static int __symbol__account_cycles(struct cyc_hist *ch, @@ -1121,7 +1122,7 @@ void annotation__compute_ipc(struct annotation *notes, size_t size) notes->hit_insn = 0; notes->cover_insn = 0; - mutex_lock(¬es->lock); + annotation__lock(notes); for (offset = size - 1; offset >= 0; --offset) { struct cyc_hist *ch; @@ -1140,7 +1141,7 @@ void annotation__compute_ipc(struct annotation *notes, size_t size) notes->have_cycles = true; } } - mutex_unlock(¬es->lock); + annotation__unlock(notes); } int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample, @@ -1291,17 +1292,64 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r return ins__scnprintf(&dl->ins, bf, size, &dl->ops, max_ins_name); } -void annotation__init(struct annotation *notes) +void annotation__exit(struct annotation *notes) { - mutex_init(¬es->lock); + annotated_source__delete(notes->src); } -void annotation__exit(struct annotation *notes) +static struct sharded_mutex *sharded_mutex; + +static void annotation__init_sharded_mutex(void) { - annotated_source__delete(notes->src); - mutex_destroy(¬es->lock); + /* As many mutexes as there are CPUs. */ + sharded_mutex = sharded_mutex__new(cpu__max_present_cpu().cpu); +} + +static size_t annotation__hash(const struct annotation *notes) +{ + return (size_t)notes; } +static struct mutex *annotation__get_mutex(const struct annotation *notes) +{ + static pthread_once_t once = PTHREAD_ONCE_INIT; + + pthread_once(&once, annotation__init_sharded_mutex); + if (!sharded_mutex) + return NULL; + + return sharded_mutex__get_mutex(sharded_mutex, annotation__hash(notes)); +} + +void annotation__lock(struct annotation *notes) + NO_THREAD_SAFETY_ANALYSIS +{ + struct mutex *mutex = annotation__get_mutex(notes); + + if (mutex) + mutex_lock(mutex); +} + +void annotation__unlock(struct annotation *notes) + NO_THREAD_SAFETY_ANALYSIS +{ + struct mutex *mutex = annotation__get_mutex(notes); + + if (mutex) + mutex_unlock(mutex); +} + +bool annotation__trylock(struct annotation *notes) +{ + struct mutex *mutex = annotation__get_mutex(notes); + + if (!mutex) + return false; + + return mutex_trylock(mutex); +} + + static void annotation_line__add(struct annotation_line *al, struct list_head *head) { list_add_tail(&al->node, head); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 1c6335b8333a..962780559176 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -271,8 +271,7 @@ struct annotated_source { struct sym_hist *histograms; }; -struct annotation { - struct mutex lock; +struct LOCKABLE annotation { u64 max_coverage; u64 start; u64 hit_cycles; @@ -298,9 +297,15 @@ struct annotation { struct annotated_source *src; }; -void annotation__init(struct annotation *notes); +static inline void annotation__init(struct annotation *notes __maybe_unused) +{ +} void annotation__exit(struct annotation *notes); +void annotation__lock(struct annotation *notes) EXCLUSIVE_LOCK_FUNCTION(*notes); +void annotation__unlock(struct annotation *notes) UNLOCK_FUNCTION(*notes); +bool annotation__trylock(struct annotation *notes) EXCLUSIVE_TRYLOCK_FUNCTION(true, *notes); + static inline int annotation__cycles_width(struct annotation *notes) { if (notes->have_cycles && notes->options->show_minmax_cycle) -- cgit v1.2.3 From 4ca0d340ce206985d9b9956993d7c81eeb1d3198 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Tue, 20 Jun 2023 21:20:25 +0800 Subject: perf annotate: Fix instruction association and parsing for LoongArch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the perf annotate view for LoongArch, there is no arrowed line pointing to the target from the branch instruction. This issue is caused by incorrect instruction association and parsing. $ perf record alloc-6276705c94ad1398 # rust benchmark $ perf report 0.28 │ ori $a1, $zero, 0x63 │ move $a2, $zero 10.55 │ addi.d $a3, $a2, 1(0x1) │ sltu $a4, $a3, $s7 9.53 │ masknez $a4, $s7, $a4 │ sub.d $a3, $a3, $a4 12.12 │ st.d $a1, $fp, 24(0x18) │ st.d $a3, $fp, 16(0x10) 16.29 │ slli.d $a2, $a2, 0x2 │ ldx.w $a2, $s8, $a2 12.77 │ st.w $a2, $sp, 724(0x2d4) │ st.w $s0, $sp, 720(0x2d0) 7.03 │ addi.d $a2, $sp, 720(0x2d0) │ addi.d $a1, $a1, -1(0xfff) 12.03 │ move $a2, $a3 │ → bne $a1, $s3, -52(0x3ffcc) # 82ce8 2.50 │ addi.d $a0, $a0, 1(0x1) This patch fixes instruction association issues, such as associating branch instructions with jump_ops instead of call_ops, and corrects false instruction matches. It also implements branch instruction parsing specifically for LoongArch. With this patch, we will be able to see the arrowed line. 0.79 │3ec: ori $a1, $zero, 0x63 │ move $a2, $zero 10.32 │3f4:┌─→addi.d $a3, $a2, 1(0x1) │ │ sltu $a4, $a3, $s7 10.44 │ │ masknez $a4, $s7, $a4 │ │ sub.d $a3, $a3, $a4 14.17 │ │ st.d $a1, $fp, 24(0x18) │ │ st.d $a3, $fp, 16(0x10) 13.15 │ │ slli.d $a2, $a2, 0x2 │ │ ldx.w $a2, $s8, $a2 11.00 │ │ st.w $a2, $sp, 724(0x2d4) │ │ st.w $s0, $sp, 720(0x2d0) 8.00 │ │ addi.d $a2, $sp, 720(0x2d0) │ │ addi.d $a1, $a1, -1(0xfff) 11.99 │ │ move $a2, $a3 │ └──bne $a1, $s3, 3f4 3.17 │ addi.d $a0, $a0, 1(0x1) Signed-off-by: WANG Rui Acked-by: Namhyung Kim Cc: Mark Rutland Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Alexander Shishkin Cc: loongarch@lists.linux.dev Cc: loongson-kernel@lists.loongnix.cn Cc: Huacai Chen Cc: Tiezhu Yang Cc: Ingo Molnar Cc: WANG Xuerui Link: https://lore.kernel.org/r/20230620132025.105563-1-wangrui@loongson.cn Signed-off-by: Namhyung Kim --- tools/perf/arch/loongarch/annotate/instructions.c | 116 +++++++++++++++++++--- tools/perf/arch/s390/annotate/instructions.c | 3 - tools/perf/util/annotate.c | 8 +- 3 files changed, 109 insertions(+), 18 deletions(-) (limited to 'tools/perf/util/annotate.c') diff --git a/tools/perf/arch/loongarch/annotate/instructions.c b/tools/perf/arch/loongarch/annotate/instructions.c index ab21bf122135..98e19c5366ac 100644 --- a/tools/perf/arch/loongarch/annotate/instructions.c +++ b/tools/perf/arch/loongarch/annotate/instructions.c @@ -5,25 +5,115 @@ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited */ +static int loongarch_call__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms) +{ + char *c, *endptr, *tok, *name; + struct map *map = ms->map; + struct addr_map_symbol target = { + .ms = { .map = map, }, + }; + + c = strchr(ops->raw, '#'); + if (c++ == NULL) + return -1; + + ops->target.addr = strtoull(c, &endptr, 16); + + name = strchr(endptr, '<'); + name++; + + if (arch->objdump.skip_functions_char && + strchr(name, arch->objdump.skip_functions_char)) + return -1; + + tok = strchr(name, '>'); + if (tok == NULL) + return -1; + + *tok = '\0'; + ops->target.name = strdup(name); + *tok = '>'; + + if (ops->target.name == NULL) + return -1; + + target.addr = map__objdump_2mem(map, ops->target.addr); + + if (maps__find_ams(ms->maps, &target) == 0 && + map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr) + ops->target.sym = target.ms.sym; + + return 0; +} + +static struct ins_ops loongarch_call_ops = { + .parse = loongarch_call__parse, + .scnprintf = call__scnprintf, +}; + +static int loongarch_jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms) +{ + struct map *map = ms->map; + struct symbol *sym = ms->sym; + struct addr_map_symbol target = { + .ms = { .map = map, }, + }; + const char *c = strchr(ops->raw, '#'); + u64 start, end; + + ops->raw_comment = strchr(ops->raw, arch->objdump.comment_char); + ops->raw_func_start = strchr(ops->raw, '<'); + + if (ops->raw_func_start && c > ops->raw_func_start) + c = NULL; + + if (c++ != NULL) + ops->target.addr = strtoull(c, NULL, 16); + else + ops->target.addr = strtoull(ops->raw, NULL, 16); + + target.addr = map__objdump_2mem(map, ops->target.addr); + start = map__unmap_ip(map, sym->start); + end = map__unmap_ip(map, sym->end); + + ops->target.outside = target.addr < start || target.addr > end; + + if (maps__find_ams(ms->maps, &target) == 0 && + map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr) + ops->target.sym = target.ms.sym; + + if (!ops->target.outside) { + ops->target.offset = target.addr - start; + ops->target.offset_avail = true; + } else { + ops->target.offset_avail = false; + } + + return 0; +} + +static struct ins_ops loongarch_jump_ops = { + .parse = loongarch_jump__parse, + .scnprintf = jump__scnprintf, +}; + static struct ins_ops *loongarch__associate_ins_ops(struct arch *arch, const char *name) { struct ins_ops *ops = NULL; - if (!strncmp(name, "beqz", 4) || - !strncmp(name, "bnez", 4) || - !strncmp(name, "beq", 3) || - !strncmp(name, "bne", 3) || - !strncmp(name, "blt", 3) || - !strncmp(name, "bge", 3) || - !strncmp(name, "bltu", 4) || - !strncmp(name, "bgeu", 4) || - !strncmp(name, "bl", 2)) - ops = &call_ops; - else if (!strncmp(name, "jirl", 4)) + if (!strcmp(name, "bl")) + ops = &loongarch_call_ops; + else if (!strcmp(name, "jirl")) ops = &ret_ops; - else if (name[0] == 'b') - ops = &jump_ops; + else if (!strcmp(name, "b") || + !strncmp(name, "beq", 3) || + !strncmp(name, "bne", 3) || + !strncmp(name, "blt", 3) || + !strncmp(name, "bge", 3) || + !strncmp(name, "bltu", 4) || + !strncmp(name, "bgeu", 4)) + ops = &loongarch_jump_ops; else return NULL; diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c index de925b0e35ce..da5aa3e1f04c 100644 --- a/tools/perf/arch/s390/annotate/instructions.c +++ b/tools/perf/arch/s390/annotate/instructions.c @@ -45,9 +45,6 @@ static int s390_call__parse(struct arch *arch, struct ins_operands *ops, return 0; } -static int call__scnprintf(struct ins *ins, char *bf, size_t size, - struct ins_operands *ops, int max_ins_name); - static struct ins_ops s390_call_ops = { .parse = s390_call__parse, .scnprintf = call__scnprintf, diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 77c816400719..ba988a13dacb 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -62,6 +62,10 @@ static regex_t file_lineno; static struct ins_ops *ins__find(struct arch *arch, const char *name); static void ins__sort(struct arch *arch); static int disasm_line__parse(char *line, const char **namep, char **rawp); +static int call__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name); +static int jump__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name); struct arch { const char *name; @@ -324,7 +328,7 @@ static struct ins_ops call_ops = { bool ins__is_call(const struct ins *ins) { - return ins->ops == &call_ops || ins->ops == &s390_call_ops; + return ins->ops == &call_ops || ins->ops == &s390_call_ops || ins->ops == &loongarch_call_ops; } /* @@ -465,7 +469,7 @@ static struct ins_ops jump_ops = { bool ins__is_jump(const struct ins *ins) { - return ins->ops == &jump_ops; + return ins->ops == &jump_ops || ins->ops == &loongarch_jump_ops; } static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep) -- cgit v1.2.3