diff options
author | Ingo Molnar <mingo@kernel.org> | 2016-03-04 14:19:21 +0300 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-03-04 14:19:21 +0300 |
commit | 009668520ae00d52026ccdb3884864e3473c6b65 (patch) | |
tree | 15b2cf5b2f7f6b80773d53247bfcacfa9fcad571 | |
parent | 6f6e151692625e29810f2fe036b4f410540567c8 (diff) | |
parent | fb4605ba47e772ff9d62d1d54218a832ec8b3e1d (diff) | |
download | linux-009668520ae00d52026ccdb3884864e3473c6b65.tar.xz |
Merge tag 'perf-core-for-mingo-20160303' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes:
User visible changes:
- Check existence of frontend/backed stalled cycles in 'perf stat' (Andi Kleen)
- Implement CSV metrics output in 'perf stat' (Andi Kleen)
- Support metrics in 'perf stat' --per-core/socket mode (Andi Kleen)
- Avoid installing .o files from tools/lib/ into the python extension (Jiri Olsa)
- Rename the tracepoint '/format' field that carries the syscall ID from 'nr',
that is also the name of some syscalls arguments, to "__syscall_nr", to
avoid having multiple fields with the same name, that was breaking the
python script skeleton generator from perf.data files (Taeung Song)
- Support converting data from bpf events in 'perf data' (Wang Nan)
- Fix segfault in 'perf test' hists related entries (Arnaldo Carvalho de Melo)
- Fix output of %llu for 64 bit values read on 32 bit machines in libtraceevent (Steven Rostedt)
- Fix time stamp rounding issue in libtraceevent (Chaos.Chen)
Infrastructure changes:
- Fix setlocale() breakage in the pmu parsing code (Jiri Olsa)
- Split libtraceevent's pevent_print_event() (Steven Rostedt)
- Librarize some 'perf record' bits to allow handling multiple perf.data
files per session (Wang Nan)
- Ensure return non-zero rc when mmap fails in 'perf record' (Wang Nan)
- Fix double free on 'command_line' in a error path in 'perf script' (Colin Ian King)
- Initialize struct sigaction 'sa_flags' field in a 'perf test' entry (Colin Ian King)
- Fix various build warnings in turbostat, detected with gcc6 (Colin Ian King)
- Use .s extension for preprocessed assembler code (Masahiro Yamada)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | kernel/trace/trace_syscalls.c | 16 | ||||
-rw-r--r-- | tools/build/Makefile.build | 2 | ||||
-rw-r--r-- | tools/lib/traceevent/event-parse.c | 146 | ||||
-rw-r--r-- | tools/lib/traceevent/event-parse.h | 13 | ||||
-rw-r--r-- | tools/perf/arch/x86/tests/rdpmc.c | 1 | ||||
-rw-r--r-- | tools/perf/builtin-record.c | 168 | ||||
-rw-r--r-- | tools/perf/builtin-stat.c | 158 | ||||
-rw-r--r-- | tools/perf/builtin-trace.c | 8 | ||||
-rw-r--r-- | tools/perf/util/data-convert-bt.c | 118 | ||||
-rw-r--r-- | tools/perf/util/pmu.c | 13 | ||||
-rw-r--r-- | tools/perf/util/scripting-engines/trace-event-python.c | 4 | ||||
-rw-r--r-- | tools/perf/util/setup.py | 4 | ||||
-rw-r--r-- | tools/perf/util/sort.c | 37 | ||||
-rw-r--r-- | tools/perf/util/stat-shadow.c | 18 | ||||
-rw-r--r-- | tools/perf/util/stat.h | 1 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 8 |
16 files changed, 566 insertions, 149 deletions
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 0655afbea83f..d1663083d903 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -186,11 +186,11 @@ print_syscall_exit(struct trace_iterator *iter, int flags, extern char *__bad_type_size(void); -#define SYSCALL_FIELD(type, name) \ - sizeof(type) != sizeof(trace.name) ? \ +#define SYSCALL_FIELD(type, field, name) \ + sizeof(type) != sizeof(trace.field) ? \ __bad_type_size() : \ - #type, #name, offsetof(typeof(trace), name), \ - sizeof(trace.name), is_signed_type(type) + #type, #name, offsetof(typeof(trace), field), \ + sizeof(trace.field), is_signed_type(type) static int __init __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len) @@ -261,7 +261,8 @@ static int __init syscall_enter_define_fields(struct trace_event_call *call) int i; int offset = offsetof(typeof(trace), args); - ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); + ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr), + FILTER_OTHER); if (ret) return ret; @@ -281,11 +282,12 @@ static int __init syscall_exit_define_fields(struct trace_event_call *call) struct syscall_trace_exit trace; int ret; - ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); + ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr), + FILTER_OTHER); if (ret) return ret; - ret = trace_define_field(call, SYSCALL_FIELD(long, ret), + ret = trace_define_field(call, SYSCALL_FIELD(long, ret, ret), FILTER_OTHER); return ret; diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index 4a96473b180f..ee566e8bd1cf 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -85,7 +85,7 @@ $(OUTPUT)%.i: %.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_i_c) -$(OUTPUT)%.i: %.S FORCE +$(OUTPUT)%.s: %.S FORCE $(call rule_mkdir) $(call if_changed_dep,cc_i_c) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 575e75174087..865dea55454b 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -2635,6 +2635,7 @@ process_hex(struct event_format *event, struct print_arg *arg, char **tok) free_field: free_arg(arg->hex.field); + arg->hex.field = NULL; out: *tok = NULL; return EVENT_ERROR; @@ -2659,8 +2660,10 @@ process_int_array(struct event_format *event, struct print_arg *arg, char **tok) free_size: free_arg(arg->int_array.count); + arg->int_array.count = NULL; free_field: free_arg(arg->int_array.field); + arg->int_array.field = NULL; out: *tok = NULL; return EVENT_ERROR; @@ -4975,7 +4978,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event break; } } - if (pevent->long_size == 8 && ls && + if (pevent->long_size == 8 && ls == 1 && sizeof(long) != 8) { char *p; @@ -5339,41 +5342,45 @@ static bool is_timestamp_in_us(char *trace_clock, bool use_trace_clock) return false; } -void pevent_print_event(struct pevent *pevent, struct trace_seq *s, - struct pevent_record *record, bool use_trace_clock) +/** + * pevent_find_event_by_record - return the event from a given record + * @pevent: a handle to the pevent + * @record: The record to get the event from + * + * Returns the associated event for a given record, or NULL if non is + * is found. + */ +struct event_format * +pevent_find_event_by_record(struct pevent *pevent, struct pevent_record *record) { - static const char *spaces = " "; /* 20 spaces */ - struct event_format *event; - unsigned long secs; - unsigned long usecs; - unsigned long nsecs; - const char *comm; - void *data = record->data; int type; - int pid; - int len; - int p; - bool use_usec_format; - - use_usec_format = is_timestamp_in_us(pevent->trace_clock, - use_trace_clock); - if (use_usec_format) { - secs = record->ts / NSECS_PER_SEC; - nsecs = record->ts - secs * NSECS_PER_SEC; - } if (record->size < 0) { do_warning("ug! negative record size %d", record->size); - return; + return NULL; } - type = trace_parse_common_type(pevent, data); + type = trace_parse_common_type(pevent, record->data); - event = pevent_find_event(pevent, type); - if (!event) { - do_warning("ug! no event found for type %d", type); - return; - } + return pevent_find_event(pevent, type); +} + +/** + * pevent_print_event_task - Write the event task comm, pid and CPU + * @pevent: a handle to the pevent + * @s: the trace_seq to write to + * @event: the handle to the record's event + * @record: The record to get the event from + * + * Writes the tasks comm, pid and CPU to @s. + */ +void pevent_print_event_task(struct pevent *pevent, struct trace_seq *s, + struct event_format *event, + struct pevent_record *record) +{ + void *data = record->data; + const char *comm; + int pid; pid = parse_common_pid(pevent, data); comm = find_cmdline(pevent, pid); @@ -5381,9 +5388,43 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s, if (pevent->latency_format) { trace_seq_printf(s, "%8.8s-%-5d %3d", comm, pid, record->cpu); - pevent_data_lat_fmt(pevent, s, record); } else trace_seq_printf(s, "%16s-%-5d [%03d]", comm, pid, record->cpu); +} + +/** + * pevent_print_event_time - Write the event timestamp + * @pevent: a handle to the pevent + * @s: the trace_seq to write to + * @event: the handle to the record's event + * @record: The record to get the event from + * @use_trace_clock: Set to parse according to the @pevent->trace_clock + * + * Writes the timestamp of the record into @s. + */ +void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s, + struct event_format *event, + struct pevent_record *record, + bool use_trace_clock) +{ + unsigned long secs; + unsigned long usecs; + unsigned long nsecs; + int p; + bool use_usec_format; + + use_usec_format = is_timestamp_in_us(pevent->trace_clock, + use_trace_clock); + if (use_usec_format) { + secs = record->ts / NSECS_PER_SEC; + nsecs = record->ts - secs * NSECS_PER_SEC; + } + + if (pevent->latency_format) { + trace_seq_printf(s, " %3d", record->cpu); + pevent_data_lat_fmt(pevent, s, record); + } else + trace_seq_printf(s, " [%03d]", record->cpu); if (use_usec_format) { if (pevent->flags & PEVENT_NSEC_OUTPUT) { @@ -5391,14 +5432,36 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s, p = 9; } else { usecs = (nsecs + 500) / NSECS_PER_USEC; + /* To avoid usecs larger than 1 sec */ + if (usecs >= 1000000) { + usecs -= 1000000; + secs++; + } p = 6; } - trace_seq_printf(s, " %5lu.%0*lu: %s: ", - secs, p, usecs, event->name); + trace_seq_printf(s, " %5lu.%0*lu:", secs, p, usecs); } else - trace_seq_printf(s, " %12llu: %s: ", - record->ts, event->name); + trace_seq_printf(s, " %12llu:", record->ts); +} + +/** + * pevent_print_event_data - Write the event data section + * @pevent: a handle to the pevent + * @s: the trace_seq to write to + * @event: the handle to the record's event + * @record: The record to get the event from + * + * Writes the parsing of the record's data to @s. + */ +void pevent_print_event_data(struct pevent *pevent, struct trace_seq *s, + struct event_format *event, + struct pevent_record *record) +{ + static const char *spaces = " "; /* 20 spaces */ + int len; + + trace_seq_printf(s, " %s: ", event->name); /* Space out the event names evenly. */ len = strlen(event->name); @@ -5408,6 +5471,23 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s, pevent_event_info(s, event, record); } +void pevent_print_event(struct pevent *pevent, struct trace_seq *s, + struct pevent_record *record, bool use_trace_clock) +{ + struct event_format *event; + + event = pevent_find_event_by_record(pevent, record); + if (!event) { + do_warning("ug! no event found for type %d", + trace_parse_common_type(pevent, record->data)); + return; + } + + pevent_print_event_task(pevent, s, event, record); + pevent_print_event_time(pevent, s, event, record, use_trace_clock); + pevent_print_event_data(pevent, s, event, record); +} + static int events_id_cmp(const void *a, const void *b) { struct event_format * const * ea = a; diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 706d9bc24066..9ffde377e89d 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -628,6 +628,16 @@ int pevent_register_print_string(struct pevent *pevent, const char *fmt, unsigned long long addr); int pevent_pid_is_registered(struct pevent *pevent, int pid); +void pevent_print_event_task(struct pevent *pevent, struct trace_seq *s, + struct event_format *event, + struct pevent_record *record); +void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s, + struct event_format *event, + struct pevent_record *record, + bool use_trace_clock); +void pevent_print_event_data(struct pevent *pevent, struct trace_seq *s, + struct event_format *event, + struct pevent_record *record); void pevent_print_event(struct pevent *pevent, struct trace_seq *s, struct pevent_record *record, bool use_trace_clock); @@ -694,6 +704,9 @@ struct event_format *pevent_find_event(struct pevent *pevent, int id); struct event_format * pevent_find_event_by_name(struct pevent *pevent, const char *sys, const char *name); +struct event_format * +pevent_find_event_by_record(struct pevent *pevent, struct pevent_record *record); + void pevent_data_lat_fmt(struct pevent *pevent, struct trace_seq *s, struct pevent_record *record); int pevent_data_type(struct pevent *pevent, struct pevent_record *rec); diff --git a/tools/perf/arch/x86/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c index 7bb0d13c235f..7945462851a4 100644 --- a/tools/perf/arch/x86/tests/rdpmc.c +++ b/tools/perf/arch/x86/tests/rdpmc.c @@ -103,6 +103,7 @@ static int __test__rdpmc(void) sigfillset(&sa.sa_mask); sa.sa_sigaction = segfault_handler; + sa.sa_flags = 0; sigaction(SIGSEGV, &sa, NULL); fd = sys_perf_event_open(&attr, 0, -1, -1, diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 7d11162b6c41..515510ecc76a 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -33,6 +33,7 @@ #include "util/parse-regs-options.h" #include "util/llvm-utils.h" #include "util/bpf-loader.h" +#include "asm/bug.h" #include <unistd.h> #include <sched.h> @@ -323,7 +324,10 @@ try_again: } else { pr_err("failed to mmap with %d (%s)\n", errno, strerror_r(errno, msg, sizeof(msg))); - rc = -errno; + if (errno) + rc = -errno; + else + rc = -EINVAL; } goto out; } @@ -467,6 +471,29 @@ static void record__init_features(struct record *rec) perf_header__clear_feat(&session->header, HEADER_STAT); } +static void +record__finish_output(struct record *rec) +{ + struct perf_data_file *file = &rec->file; + int fd = perf_data_file__fd(file); + + if (file->is_pipe) + return; + + rec->session->header.data_size += rec->bytes_written; + file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR); + + if (!rec->no_buildid) { + process_buildids(rec); + + if (rec->buildid_all) + dsos__hit_all(rec->session); + } + perf_session__write_header(rec->session, rec->evlist, fd, true); + + return; +} + static volatile int workload_exec_errno; /* @@ -485,6 +512,74 @@ static void workload_exec_failed_signal(int signo __maybe_unused, static void snapshot_sig_handler(int sig); +static int record__synthesize(struct record *rec) +{ + struct perf_session *session = rec->session; + struct machine *machine = &session->machines.host; + struct perf_data_file *file = &rec->file; + struct record_opts *opts = &rec->opts; + struct perf_tool *tool = &rec->tool; + int fd = perf_data_file__fd(file); + int err = 0; + + if (file->is_pipe) { + err = perf_event__synthesize_attrs(tool, session, + process_synthesized_event); + if (err < 0) { + pr_err("Couldn't synthesize attrs.\n"); + goto out; + } + + if (have_tracepoints(&rec->evlist->entries)) { + /* + * FIXME err <= 0 here actually means that + * there were no tracepoints so its not really + * an error, just that we don't need to + * synthesize anything. We really have to + * return this more properly and also + * propagate errors that now are calling die() + */ + err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, + process_synthesized_event); + if (err <= 0) { + pr_err("Couldn't record tracing data.\n"); + goto out; + } + rec->bytes_written += err; + } + } + + if (rec->opts.full_auxtrace) { + err = perf_event__synthesize_auxtrace_info(rec->itr, tool, + session, process_synthesized_event); + if (err) + goto out; + } + + err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, + machine); + WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" + "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" + "Check /proc/kallsyms permission or run as root.\n"); + + err = perf_event__synthesize_modules(tool, process_synthesized_event, + machine); + WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" + "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" + "Check /proc/modules permission or run as root.\n"); + + if (perf_guest) { + machines__process_guests(&session->machines, + perf_event__synthesize_guest_os, tool); + } + + err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, + process_synthesized_event, opts->sample_address, + opts->proc_map_timeout); +out: + return err; +} + static int __cmd_record(struct record *rec, int argc, const char **argv) { int err; @@ -579,63 +674,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) machine = &session->machines.host; - if (file->is_pipe) { - err = perf_event__synthesize_attrs(tool, session, - process_synthesized_event); - if (err < 0) { - pr_err("Couldn't synthesize attrs.\n"); - goto out_child; - } - - if (have_tracepoints(&rec->evlist->entries)) { - /* - * FIXME err <= 0 here actually means that - * there were no tracepoints so its not really - * an error, just that we don't need to - * synthesize anything. We really have to - * return this more properly and also - * propagate errors that now are calling die() - */ - err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, - process_synthesized_event); - if (err <= 0) { - pr_err("Couldn't record tracing data.\n"); - goto out_child; - } - rec->bytes_written += err; - } - } - - if (rec->opts.full_auxtrace) { - err = perf_event__synthesize_auxtrace_info(rec->itr, tool, - session, process_synthesized_event); - if (err) - goto out_delete_session; - } - - err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, - machine); - if (err < 0) - pr_err("Couldn't record kernel reference relocation symbol\n" - "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" - "Check /proc/kallsyms permission or run as root.\n"); - - err = perf_event__synthesize_modules(tool, process_synthesized_event, - machine); + err = record__synthesize(rec); if (err < 0) - pr_err("Couldn't record kernel module information.\n" - "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" - "Check /proc/modules permission or run as root.\n"); - - if (perf_guest) { - machines__process_guests(&session->machines, - perf_event__synthesize_guest_os, tool); - } - - err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, - process_synthesized_event, opts->sample_address, - opts->proc_map_timeout); - if (err != 0) goto out_child; if (rec->realtime_prio) { @@ -771,18 +811,8 @@ out_child: /* this will be recalculated during process_buildids() */ rec->samples = 0; - if (!err && !file->is_pipe) { - rec->session->header.data_size += rec->bytes_written; - file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR); - - if (!rec->no_buildid) { - process_buildids(rec); - - if (rec->buildid_all) - dsos__hit_all(rec->session); - } - perf_session__write_header(rec->session, rec->evlist, fd, true); - } + if (!err) + record__finish_output(rec); if (!err && !quiet) { char samples[128]; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 8c0bc0fe5179..baa82078c148 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -739,6 +739,9 @@ struct outstate { FILE *fh; bool newline; const char *prefix; + int nfields; + int id, nr; + struct perf_evsel *evsel; }; #define METRIC_LEN 35 @@ -754,12 +757,9 @@ static void do_new_line_std(struct outstate *os) { fputc('\n', os->fh); fputs(os->prefix, os->fh); + aggr_printout(os->evsel, os->id, os->nr); if (stat_config.aggr_mode == AGGR_NONE) fprintf(os->fh, " "); - if (stat_config.aggr_mode == AGGR_CORE) - fprintf(os->fh, " "); - if (stat_config.aggr_mode == AGGR_SOCKET) - fprintf(os->fh, " "); fprintf(os->fh, " "); } @@ -789,6 +789,44 @@ static void print_metric_std(void *ctx, const char *color, const char *fmt, fprintf(out, " %-*s", METRIC_LEN - n - 1, unit); } +static void new_line_csv(void *ctx) +{ + struct outstate *os = ctx; + int i; + + fputc('\n', os->fh); + if (os->prefix) + fprintf(os->fh, "%s%s", os->prefix, csv_sep); + aggr_printout(os->evsel, os->id, os->nr); + for (i = 0; i < os->nfields; i++) + fputs(csv_sep, os->fh); +} + +static void print_metric_csv(void *ctx, + const char *color __maybe_unused, + const char *fmt, const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + char buf[64], *vals, *ends; + + if (unit == NULL || fmt == NULL) { + fprintf(out, "%s%s%s%s", csv_sep, csv_sep, csv_sep, csv_sep); + return; + } + snprintf(buf, sizeof(buf), fmt, val); + vals = buf; + while (isspace(*vals)) + vals++; + ends = vals; + while (isdigit(*ends) || *ends == '.') + ends++; + *ends = 0; + while (isspace(*unit)) + unit++; + fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit); +} + static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) { FILE *output = stat_config.output; @@ -817,6 +855,28 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); } +static int first_shadow_cpu(struct perf_evsel *evsel, int id) +{ + int i; + + if (!aggr_get_id) + return 0; + + if (stat_config.aggr_mode == AGGR_NONE) + return id; + + if (stat_config.aggr_mode == AGGR_GLOBAL) + return 0; + + for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { + int cpu2 = perf_evsel__cpus(evsel)->map[i]; + + if (aggr_get_id(evsel_list->cpus, cpu2) == id) + return cpu2; + } + return 0; +} + static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) { FILE *output = stat_config.output; @@ -853,13 +913,32 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, struct perf_stat_output_ctx out; struct outstate os = { .fh = stat_config.output, - .prefix = prefix ? prefix : "" + .prefix = prefix ? prefix : "", + .id = id, + .nr = nr, + .evsel = counter, }; print_metric_t pm = print_metric_std; void (*nl)(void *); nl = new_line_std; + if (csv_output) { + static int aggr_fields[] = { + [AGGR_GLOBAL] = 0, + [AGGR_THREAD] = 1, + [AGGR_NONE] = 1, + [AGGR_SOCKET] = 2, + [AGGR_CORE] = 2, + }; + + pm = print_metric_csv; + nl = new_line_csv; + os.nfields = 3; + os.nfields += aggr_fields[stat_config.aggr_mode]; + if (counter->cgrp) + os.nfields++; + } if (run == 0 || ena == 0 || counter->counts->scaled == -1) { aggr_printout(counter, id, nr); @@ -880,7 +959,12 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, fprintf(stat_config.output, "%s%s", csv_sep, counter->cgrp->name); + if (!csv_output) + pm(&os, NULL, NULL, "", 0); + print_noise(counter, noise); print_running(run, ena); + if (csv_output) + pm(&os, NULL, NULL, "", 0); return; } @@ -893,14 +977,41 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, out.new_line = nl; out.ctx = &os; - if (!csv_output) - perf_stat__print_shadow_stats(counter, uval, - stat_config.aggr_mode == AGGR_GLOBAL ? 0 : - cpu_map__id_to_cpu(id), + if (csv_output) { + print_noise(counter, noise); + print_running(run, ena); + } + + perf_stat__print_shadow_stats(counter, uval, + first_shadow_cpu(counter, id), &out); + if (!csv_output) { + print_noise(counter, noise); + print_running(run, ena); + } +} + +static void aggr_update_shadow(void) +{ + int cpu, s2, id, s; + u64 val; + struct perf_evsel *counter; - print_noise(counter, noise); - print_running(run, ena); + for (s = 0; s < aggr_map->nr; s++) { + id = aggr_map->map[s]; + evlist__for_each(evsel_list, counter) { + val = 0; + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { + s2 = aggr_get_id(evsel_list->cpus, cpu); + if (s2 != id) + continue; + val += perf_counts(counter->counts, cpu, 0)->val; + } + val = val * counter->scale; + perf_stat__update_shadow_stats(counter, &val, + first_shadow_cpu(counter, id)); + } + } } static void print_aggr(char *prefix) @@ -914,6 +1025,8 @@ static void print_aggr(char *prefix) if (!(aggr_map || aggr_get_id)) return; + aggr_update_shadow(); + for (s = 0; s < aggr_map->nr; s++) { id = aggr_map->map[s]; evlist__for_each(evsel_list, counter) { @@ -1441,7 +1554,7 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st) */ static int add_default_attributes(void) { - struct perf_event_attr default_attrs[] = { + struct perf_event_attr default_attrs0[] = { { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, @@ -1449,8 +1562,14 @@ static int add_default_attributes(void) { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, +}; + struct perf_event_attr frontend_attrs[] = { { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, +}; + struct perf_event_attr backend_attrs[] = { { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, +}; + struct perf_event_attr default_attrs1[] = { { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, @@ -1567,7 +1686,19 @@ static int add_default_attributes(void) } if (!evsel_list->nr_entries) { - if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0) + if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0) + return -1; + if (pmu_have_event("cpu", "stalled-cycles-frontend")) { + if (perf_evlist__add_default_attrs(evsel_list, + frontend_attrs) < 0) + return -1; + } + if (pmu_have_event("cpu", "stalled-cycles-backend")) { + if (perf_evlist__add_default_attrs(evsel_list, + backend_attrs) < 0) + return -1; + } + if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0) return -1; } @@ -1835,6 +1966,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands, (const char **) stat_usage, PARSE_OPT_STOP_AT_NON_OPTION); + perf_stat__init_shadow_stats(); if (csv_sep) { csv_output = true; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 26a337f939d8..8dc98c598b1a 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1725,8 +1725,12 @@ static int trace__read_syscall_info(struct trace *trace, int id) sc->args = sc->tp_format->format.fields; sc->nr_args = sc->tp_format->format.nr_fields; - /* drop nr field - not relevant here; does not exist on older kernels */ - if (sc->args && strcmp(sc->args->name, "nr") == 0) { + /* + * We need to check and discard the first variable '__syscall_nr' + * or 'nr' that mean the syscall number. It is needless here. + * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels. + */ + if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) { sc->args = sc->args->next; --sc->nr_args; } diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 6729f4d9df7c..811af89ce0bb 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -352,6 +352,84 @@ static int add_tracepoint_values(struct ctf_writer *cw, return ret; } +static int +add_bpf_output_values(struct bt_ctf_event_class *event_class, + struct bt_ctf_event *event, + struct perf_sample *sample) +{ + struct bt_ctf_field_type *len_type, *seq_type; + struct bt_ctf_field *len_field, *seq_field; + unsigned int raw_size = sample->raw_size; + unsigned int nr_elements = raw_size / sizeof(u32); + unsigned int i; + int ret; + + if (nr_elements * sizeof(u32) != raw_size) + pr_warning("Incorrect raw_size (%u) in bpf output event, skip %lu bytes\n", + raw_size, nr_elements * sizeof(u32) - raw_size); + + len_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_len"); + len_field = bt_ctf_field_create(len_type); + if (!len_field) { + pr_err("failed to create 'raw_len' for bpf output event\n"); + ret = -1; + goto put_len_type; + } + + ret = bt_ctf_field_unsigned_integer_set_value(len_field, nr_elements); + if (ret) { + pr_err("failed to set field value for raw_len\n"); + goto put_len_field; + } + ret = bt_ctf_event_set_payload(event, "raw_len", len_field); + if (ret) { + pr_err("failed to set payload to raw_len\n"); + goto put_len_field; + } + + seq_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_data"); + seq_field = bt_ctf_field_create(seq_type); + if (!seq_field) { + pr_err("failed to create 'raw_data' for bpf output event\n"); + ret = -1; + goto put_seq_type; + } + + ret = bt_ctf_field_sequence_set_length(seq_field, len_field); + if (ret) { + pr_err("failed to set length of 'raw_data'\n"); + goto put_seq_field; + } + + for (i = 0; i < nr_elements; i++) { + struct bt_ctf_field *elem_field = + bt_ctf_field_sequence_get_field(seq_field, i); + + ret = bt_ctf_field_unsigned_integer_set_value(elem_field, + ((u32 *)(sample->raw_data))[i]); + + bt_ctf_field_put(elem_field); + if (ret) { + pr_err("failed to set raw_data[%d]\n", i); + goto put_seq_field; + } + } + + ret = bt_ctf_event_set_payload(event, "raw_data", seq_field); + if (ret) + pr_err("failed to set payload for raw_data\n"); + +put_seq_field: + bt_ctf_field_put(seq_field); +put_seq_type: + bt_ctf_field_type_put(seq_type); +put_len_field: + bt_ctf_field_put(len_field); +put_len_type: + bt_ctf_field_type_put(len_type); + return ret; +} + static int add_generic_values(struct ctf_writer *cw, struct bt_ctf_event *event, struct perf_evsel *evsel, @@ -597,6 +675,12 @@ static int process_sample_event(struct perf_tool *tool, return -1; } + if (perf_evsel__is_bpf_output(evsel)) { + ret = add_bpf_output_values(event_class, event, sample); + if (ret) + return -1; + } + cs = ctf_stream(cw, get_sample_cpu(cw, sample, evsel)); if (cs) { if (is_flush_needed(cs)) @@ -744,6 +828,25 @@ static int add_tracepoint_types(struct ctf_writer *cw, return ret; } +static int add_bpf_output_types(struct ctf_writer *cw, + struct bt_ctf_event_class *class) +{ + struct bt_ctf_field_type *len_type = cw->data.u32; + struct bt_ctf_field_type *seq_base_type = cw->data.u32_hex; + struct bt_ctf_field_type *seq_type; + int ret; + + ret = bt_ctf_event_class_add_field(class, len_type, "raw_len"); + if (ret) + return ret; + + seq_type = bt_ctf_field_type_sequence_create(seq_base_type, "raw_len"); + if (!seq_type) + return -1; + + return bt_ctf_event_class_add_field(class, seq_type, "raw_data"); +} + static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel, struct bt_ctf_event_class *event_class) { @@ -755,7 +858,8 @@ static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel, * ctf event header * PERF_SAMPLE_READ - TODO * PERF_SAMPLE_CALLCHAIN - TODO - * PERF_SAMPLE_RAW - tracepoint fields are handled separately + * PERF_SAMPLE_RAW - tracepoint fields and BPF output + * are handled separately * PERF_SAMPLE_BRANCH_STACK - TODO * PERF_SAMPLE_REGS_USER - TODO * PERF_SAMPLE_STACK_USER - TODO @@ -824,6 +928,12 @@ static int add_event(struct ctf_writer *cw, struct perf_evsel *evsel) goto err; } + if (perf_evsel__is_bpf_output(evsel)) { + ret = add_bpf_output_types(cw, event_class); + if (ret) + goto err; + } + ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class); if (ret) { pr("Failed to add event class into stream.\n"); @@ -970,6 +1080,12 @@ static struct bt_ctf_field_type *create_int_type(int size, bool sign, bool hex) bt_ctf_field_type_integer_set_base(type, BT_CTF_INTEGER_BASE_HEXADECIMAL)) goto err; +#if __BYTE_ORDER == __BIG_ENDIAN + bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_BIG_ENDIAN); +#else + bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_LITTLE_ENDIAN); +#endif + pr2("Created type: INTEGER %d-bit %ssigned %s\n", size, sign ? "un" : "", hex ? "hex" : ""); return type; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index ce61f79dbaae..d8cd038baed2 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -124,6 +124,17 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char * lc = setlocale(LC_NUMERIC, NULL); /* + * The lc string may be allocated in static storage, + * so get a dynamic copy to make it survive setlocale + * call below. + */ + lc = strdup(lc); + if (!lc) { + ret = -ENOMEM; + goto error; + } + + /* * force to C locale to ensure kernel * scale string is converted correctly. * kernel uses default C locale. @@ -135,6 +146,8 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char * /* restore locale */ setlocale(LC_NUMERIC, lc); + free((char *) lc); + ret = 0; error: close(fd); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 309d90fa7698..fbd05242b4e5 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1094,8 +1094,6 @@ static int python_start_script(const char *script, int argc, const char **argv) goto error; } - free(command_line); - set_table_handlers(tables); if (tables->db_export_mode) { @@ -1104,6 +1102,8 @@ static int python_start_script(const char *script, int argc, const char **argv) goto error; } + free(command_line); + return err; error: Py_Finalize(); diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 1833103768cb..c8680984d2d6 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -22,6 +22,7 @@ cflags = getenv('CFLAGS', '').split() # switch off several checks (need to be at the end of cflags list) cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' ] +src_perf = getenv('srctree') + '/tools/perf' build_lib = getenv('PYTHON_EXTBUILD_LIB') build_tmp = getenv('PYTHON_EXTBUILD_TMP') libtraceevent = getenv('LIBTRACEEVENT') @@ -30,6 +31,9 @@ libapikfs = getenv('LIBAPI') ext_sources = [f.strip() for f in file('util/python-ext-sources') if len(f.strip()) > 0 and f[0] != '#'] +# use full paths with source files +ext_sources = map(lambda x: '%s/%s' % (src_perf, x) , ext_sources) + perf = Extension('perf', sources = ext_sources, include_dirs = ['util/include'], diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 5888bfe9a193..4380a2858802 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2635,25 +2635,14 @@ out: return ret; } -int setup_sorting(struct perf_evlist *evlist) +static void evlist__set_hists_nr_sort_keys(struct perf_evlist *evlist) { - int err; - struct hists *hists; struct perf_evsel *evsel; - struct perf_hpp_fmt *fmt; - - err = __setup_sorting(evlist); - if (err < 0) - return err; - - if (parent_pattern != default_parent_pattern) { - err = sort_dimension__add("parent", evlist); - if (err < 0) - return err; - } evlist__for_each(evlist, evsel) { - hists = evsel__hists(evsel); + struct perf_hpp_fmt *fmt; + struct hists *hists = evsel__hists(evsel); + hists->nr_sort_keys = perf_hpp_list.nr_sort_keys; /* @@ -2667,6 +2656,24 @@ int setup_sorting(struct perf_evlist *evlist) hists->nr_sort_keys--; } } +} + +int setup_sorting(struct perf_evlist *evlist) +{ + int err; + + err = __setup_sorting(evlist); + if (err < 0) + return err; + + if (parent_pattern != default_parent_pattern) { + err = sort_dimension__add("parent", evlist); + if (err < 0) + return err; + } + + if (evlist != NULL) + evlist__set_hists_nr_sort_keys(evlist); reset_dimensions(); diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 4d8f18581b9b..b33ffb2af2cf 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -2,6 +2,7 @@ #include "evsel.h" #include "stat.h" #include "color.h" +#include "pmu.h" enum { CTX_BIT_USER = 1 << 0, @@ -14,6 +15,13 @@ enum { #define NUM_CTX CTX_BIT_MAX +/* + * AGGR_GLOBAL: Use CPU 0 + * AGGR_SOCKET: Use first CPU of socket + * AGGR_CORE: Use first CPU of core + * AGGR_NONE: Use matching CPU + * AGGR_THREAD: Not supported? + */ static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS]; @@ -28,9 +36,15 @@ static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS]; +static bool have_frontend_stalled; struct stats walltime_nsecs_stats; +void perf_stat__init_shadow_stats(void) +{ + have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); +} + static int evsel_context(struct perf_evsel *evsel) { int ctx = 0; @@ -310,13 +324,13 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); - out->new_line(ctxp); if (total && avg) { + out->new_line(ctxp); ratio = total / avg; print_metric(ctxp, NULL, "%7.2f ", "stalled cycles per insn", ratio); - } else { + } else if (have_frontend_stalled) { print_metric(ctxp, NULL, NULL, "stalled cycles per insn", 0); } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index f02af68adc04..0150e786ccc7 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -72,6 +72,7 @@ typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit, const char *fmt, double val); typedef void (*new_line_t )(void *ctx); +void perf_stat__init_shadow_stats(void); void perf_stat__reset_shadow_stats(void); void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, int cpu); diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 0dac7e05a6ac..3fa94e291d16 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1970,7 +1970,7 @@ int has_config_tdp(unsigned int family, unsigned int model) } static void -dump_cstate_pstate_config_info(family, model) +dump_cstate_pstate_config_info(unsigned int family, unsigned int model) { if (!do_nhm_platform_info) return; @@ -2142,7 +2142,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ -double get_tdp(model) +double get_tdp(unsigned int model) { unsigned long long msr; @@ -2256,7 +2256,7 @@ void rapl_probe(unsigned int family, unsigned int model) return; } -void perf_limit_reasons_probe(family, model) +void perf_limit_reasons_probe(unsigned int family, unsigned int model) { if (!genuine_intel) return; @@ -2792,7 +2792,7 @@ void process_cpuid() perf_limit_reasons_probe(family, model); if (debug) - dump_cstate_pstate_config_info(); + dump_cstate_pstate_config_info(family, model); if (has_skl_msrs(family, model)) calculate_tsc_tweak(); |