diff options
Diffstat (limited to 'tools/perf/util')
43 files changed, 1001 insertions, 622 deletions
diff --git a/tools/perf/util/affinity.c b/tools/perf/util/affinity.c index a5e31f826828..7b12bd7a3080 100644 --- a/tools/perf/util/affinity.c +++ b/tools/perf/util/affinity.c @@ -25,11 +25,11 @@ int affinity__setup(struct affinity *a) { int cpu_set_size = get_cpu_set_size(); - a->orig_cpus = bitmap_alloc(cpu_set_size * 8); + a->orig_cpus = bitmap_zalloc(cpu_set_size * 8); if (!a->orig_cpus) return -1; sched_getaffinity(0, cpu_set_size, (cpu_set_t *)a->orig_cpus); - a->sched_cpus = bitmap_alloc(cpu_set_size * 8); + a->sched_cpus = bitmap_zalloc(cpu_set_size * 8); if (!a->sched_cpus) { zfree(&a->orig_cpus); return -1; diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index aa04a3655236..0bae061b2d6d 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1833,7 +1833,7 @@ static int symbol__disassemble_bpf(struct symbol *sym, ret = 0; out: free(prog_linfo); - free(btf); + btf__free(btf); fclose(s); bfd_close(bfdf); return ret; @@ -2192,8 +2192,10 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, return errno; args.arch = arch = arch__find(arch_name); - if (arch == NULL) + if (arch == NULL) { + pr_err("%s: unsupported arch %s\n", __func__, arch_name); return ENOTSUP; + } if (parch) *parch = arch; @@ -2787,9 +2789,17 @@ int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel, struct rb_root source_line = RB_ROOT; struct hists *hists = evsel__hists(evsel); char buf[1024]; + int err; - if (symbol__annotate2(ms, evsel, opts, NULL) < 0) + err = symbol__annotate2(ms, evsel, opts, NULL); + if (err) { + char msg[BUFSIZ]; + + dso->annotate_warned = true; + symbol__strerror_disassemble(ms, err, msg, sizeof(msg)); + ui__error("Couldn't annotate %s:\n%s", sym->name, msg); return -1; + } if (opts->print_lines) { srcline_full_filename = opts->full_path; @@ -2813,9 +2823,17 @@ int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel, struct dso *dso = ms->map->dso; struct symbol *sym = ms->sym; struct rb_root source_line = RB_ROOT; + int err; + + err = symbol__annotate(ms, evsel, opts, NULL); + if (err) { + char msg[BUFSIZ]; - if (symbol__annotate(ms, evsel, opts, NULL) < 0) + dso->annotate_warned = true; + symbol__strerror_disassemble(ms, err, msg, sizeof(msg)); + ui__error("Couldn't annotate %s:\n%s", sym->name, msg); return -1; + } symbol__calc_percent(sym, evsel); diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index cb19669d2a5b..8d2865b9ade2 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -130,11 +130,6 @@ int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, return 0; } -#if BITS_PER_LONG != 64 && !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT) - pr_err("Cannot use AUX area tracing mmaps\n"); - return -1; -#endif - pc->aux_offset = mp->offset; pc->aux_size = mp->len; @@ -1674,6 +1669,82 @@ int perf_event__process_auxtrace_error(struct perf_session *session, return 0; } +/* + * In the compat mode kernel runs in 64-bit and perf tool runs in 32-bit mode, + * 32-bit perf tool cannot access 64-bit value atomically, which might lead to + * the issues caused by the below sequence on multiple CPUs: when perf tool + * accesses either the load operation or the store operation for 64-bit value, + * on some architectures the operation is divided into two instructions, one + * is for accessing the low 32-bit value and another is for the high 32-bit; + * thus these two user operations can give the kernel chances to access the + * 64-bit value, and thus leads to the unexpected load values. + * + * kernel (64-bit) user (32-bit) + * + * if (LOAD ->aux_tail) { --, LOAD ->aux_head_lo + * STORE $aux_data | ,---> + * FLUSH $aux_data | | LOAD ->aux_head_hi + * STORE ->aux_head --|-------` smp_rmb() + * } | LOAD $data + * | smp_mb() + * | STORE ->aux_tail_lo + * `-----------> + * STORE ->aux_tail_hi + * + * For this reason, it's impossible for the perf tool to work correctly when + * the AUX head or tail is bigger than 4GB (more than 32 bits length); and we + * can not simply limit the AUX ring buffer to less than 4GB, the reason is + * the pointers can be increased monotonically, whatever the buffer size it is, + * at the end the head and tail can be bigger than 4GB and carry out to the + * high 32-bit. + * + * To mitigate the issues and improve the user experience, we can allow the + * perf tool working in certain conditions and bail out with error if detect + * any overflow cannot be handled. + * + * For reading the AUX head, it reads out the values for three times, and + * compares the high 4 bytes of the values between the first time and the last + * time, if there has no change for high 4 bytes injected by the kernel during + * the user reading sequence, it's safe for use the second value. + * + * When compat_auxtrace_mmap__write_tail() detects any carrying in the high + * 32 bits, it means there have two store operations in user space and it cannot + * promise the atomicity for 64-bit write, so return '-1' in this case to tell + * the caller an overflow error has happened. + */ +u64 __weak compat_auxtrace_mmap__read_head(struct auxtrace_mmap *mm) +{ + struct perf_event_mmap_page *pc = mm->userpg; + u64 first, second, last; + u64 mask = (u64)(UINT32_MAX) << 32; + + do { + first = READ_ONCE(pc->aux_head); + /* Ensure all reads are done after we read the head */ + smp_rmb(); + second = READ_ONCE(pc->aux_head); + /* Ensure all reads are done after we read the head */ + smp_rmb(); + last = READ_ONCE(pc->aux_head); + } while ((first & mask) != (last & mask)); + + return second; +} + +int __weak compat_auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail) +{ + struct perf_event_mmap_page *pc = mm->userpg; + u64 mask = (u64)(UINT32_MAX) << 32; + + if (tail & mask) + return -1; + + /* Ensure all reads are done before we write the tail out */ + smp_mb(); + WRITE_ONCE(pc->aux_tail, tail); + return 0; +} + static int __auxtrace_mmap__read(struct mmap *map, struct auxtrace_record *itr, struct perf_tool *tool, process_auxtrace_t fn, @@ -1685,15 +1756,13 @@ static int __auxtrace_mmap__read(struct mmap *map, size_t size, head_off, old_off, len1, len2, padding; union perf_event ev; void *data1, *data2; + int kernel_is_64_bit = perf_env__kernel_is_64_bit(evsel__env(NULL)); - if (snapshot) { - head = auxtrace_mmap__read_snapshot_head(mm); - if (auxtrace_record__find_snapshot(itr, mm->idx, mm, data, - &head, &old)) - return -1; - } else { - head = auxtrace_mmap__read_head(mm); - } + head = auxtrace_mmap__read_head(mm, kernel_is_64_bit); + + if (snapshot && + auxtrace_record__find_snapshot(itr, mm->idx, mm, data, &head, &old)) + return -1; if (old == head) return 0; @@ -1772,10 +1841,13 @@ static int __auxtrace_mmap__read(struct mmap *map, mm->prev = head; if (!snapshot) { - auxtrace_mmap__write_tail(mm, head); - if (itr->read_finish) { - int err; + int err; + err = auxtrace_mmap__write_tail(mm, head, kernel_is_64_bit); + if (err < 0) + return err; + + if (itr->read_finish) { err = itr->read_finish(itr, mm->idx); if (err < 0) return err; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index cc1c1b9cec9c..5f383908ca6e 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -440,52 +440,39 @@ struct auxtrace_cache; #ifdef HAVE_AUXTRACE_SUPPORT -/* - * In snapshot mode the mmapped page is read-only which makes using - * __sync_val_compare_and_swap() problematic. However, snapshot mode expects - * the buffer is not updated while the snapshot is made (e.g. Intel PT disables - * the event) so there is not a race anyway. - */ -static inline u64 auxtrace_mmap__read_snapshot_head(struct auxtrace_mmap *mm) -{ - struct perf_event_mmap_page *pc = mm->userpg; - u64 head = READ_ONCE(pc->aux_head); - - /* Ensure all reads are done after we read the head */ - smp_rmb(); - return head; -} +u64 compat_auxtrace_mmap__read_head(struct auxtrace_mmap *mm); +int compat_auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail); -static inline u64 auxtrace_mmap__read_head(struct auxtrace_mmap *mm) +static inline u64 auxtrace_mmap__read_head(struct auxtrace_mmap *mm, + int kernel_is_64_bit __maybe_unused) { struct perf_event_mmap_page *pc = mm->userpg; -#if BITS_PER_LONG == 64 || !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT) - u64 head = READ_ONCE(pc->aux_head); -#else - u64 head = __sync_val_compare_and_swap(&pc->aux_head, 0, 0); + u64 head; + +#if BITS_PER_LONG == 32 + if (kernel_is_64_bit) + return compat_auxtrace_mmap__read_head(mm); #endif + head = READ_ONCE(pc->aux_head); /* Ensure all reads are done after we read the head */ smp_rmb(); return head; } -static inline void auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail) +static inline int auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail, + int kernel_is_64_bit __maybe_unused) { struct perf_event_mmap_page *pc = mm->userpg; -#if BITS_PER_LONG != 64 && defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT) - u64 old_tail; -#endif +#if BITS_PER_LONG == 32 + if (kernel_is_64_bit) + return compat_auxtrace_mmap__write_tail(mm, tail); +#endif /* Ensure all reads are done before we write the tail out */ smp_mb(); -#if BITS_PER_LONG == 64 || !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT) - pc->aux_tail = tail; -#else - do { - old_tail = __sync_val_compare_and_swap(&pc->aux_tail, 0, 0); - } while (!__sync_bool_compare_and_swap(&pc->aux_tail, old_tail, tail)); -#endif + WRITE_ONCE(pc->aux_tail, tail); + return 0; } int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 63d472b336de..4fb5e90d7a57 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -581,7 +581,10 @@ const char *perf_home_perfconfig(void) static const char *config; static bool failed; - config = failed ? NULL : home_perfconfig(); + if (failed || config) + return config; + + config = home_perfconfig(); if (!config) failed = true; diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 3e1a05bc82cc..31fa3b45134a 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -13,8 +13,6 @@ #include <linux/zalloc.h> #include <stdlib.h> #include <opencsd/c_api/opencsd_c_api.h> -#include <opencsd/etmv4/trc_pkt_types_etmv4.h> -#include <opencsd/ocsd_if_types.h> #include "cs-etm.h" #include "cs-etm-decoder.h" @@ -35,9 +33,11 @@ struct cs_etm_decoder { void *data; void (*packet_printer)(const char *msg); + bool suppress_printing; dcd_tree_handle_t dcd_tree; cs_etm_mem_cb_type mem_access; ocsd_datapath_resp_t prev_return; + const char *decoder_name; }; static u32 @@ -74,9 +74,10 @@ int cs_etm_decoder__reset(struct cs_etm_decoder *decoder) ocsd_datapath_resp_t dp_ret; decoder->prev_return = OCSD_RESP_CONT; - + decoder->suppress_printing = true; dp_ret = ocsd_dt_process_data(decoder->dcd_tree, OCSD_OP_RESET, 0, 0, NULL, NULL); + decoder->suppress_printing = false; if (OCSD_DATA_RESP_IS_FATAL(dp_ret)) return -1; @@ -124,6 +125,21 @@ static int cs_etm_decoder__gen_etmv3_config(struct cs_etm_trace_params *params, return 0; } +#define TRCIDR1_TRCARCHMIN_SHIFT 4 +#define TRCIDR1_TRCARCHMIN_MASK GENMASK(7, 4) +#define TRCIDR1_TRCARCHMIN(x) (((x) & TRCIDR1_TRCARCHMIN_MASK) >> TRCIDR1_TRCARCHMIN_SHIFT) + +static enum _ocsd_arch_version cs_etm_decoder__get_etmv4_arch_ver(u32 reg_idr1) +{ + /* + * For ETMv4 if the trace minor version is 4 or more then we can assume + * the architecture is ARCH_AA64 rather than just V8. + * ARCH_V8 = V8 architecture + * ARCH_AA64 = Min v8r3 plus additional AA64 PE features + */ + return TRCIDR1_TRCARCHMIN(reg_idr1) >= 4 ? ARCH_AA64 : ARCH_V8; +} + static void cs_etm_decoder__gen_etmv4_config(struct cs_etm_trace_params *params, ocsd_etmv4_cfg *config) { @@ -138,7 +154,21 @@ static void cs_etm_decoder__gen_etmv4_config(struct cs_etm_trace_params *params, config->reg_idr11 = 0; config->reg_idr12 = 0; config->reg_idr13 = 0; - config->arch_ver = ARCH_V8; + config->arch_ver = cs_etm_decoder__get_etmv4_arch_ver(params->etmv4.reg_idr1); + config->core_prof = profile_CortexA; +} + +static void cs_etm_decoder__gen_ete_config(struct cs_etm_trace_params *params, + ocsd_ete_cfg *config) +{ + config->reg_configr = params->ete.reg_configr; + config->reg_traceidr = params->ete.reg_traceidr; + config->reg_idr0 = params->ete.reg_idr0; + config->reg_idr1 = params->ete.reg_idr1; + config->reg_idr2 = params->ete.reg_idr2; + config->reg_idr8 = params->ete.reg_idr8; + config->reg_devarch = params->ete.reg_devarch; + config->arch_ver = ARCH_AA64; config->core_prof = profile_CortexA; } @@ -146,8 +176,10 @@ static void cs_etm_decoder__print_str_cb(const void *p_context, const char *msg, const int str_len) { - if (p_context && str_len) - ((struct cs_etm_decoder *)p_context)->packet_printer(msg); + const struct cs_etm_decoder *decoder = p_context; + + if (p_context && str_len && !decoder->suppress_printing) + decoder->packet_printer(msg); } static int @@ -223,55 +255,6 @@ cs_etm_decoder__init_raw_frame_logging( } #endif -static int cs_etm_decoder__create_packet_printer(struct cs_etm_decoder *decoder, - const char *decoder_name, - void *trace_config) -{ - u8 csid; - - if (ocsd_dt_create_decoder(decoder->dcd_tree, decoder_name, - OCSD_CREATE_FLG_PACKET_PROC, - trace_config, &csid)) - return -1; - - if (ocsd_dt_set_pkt_protocol_printer(decoder->dcd_tree, csid, 0)) - return -1; - - return 0; -} - -static int -cs_etm_decoder__create_etm_packet_printer(struct cs_etm_trace_params *t_params, - struct cs_etm_decoder *decoder) -{ - const char *decoder_name; - ocsd_etmv3_cfg config_etmv3; - ocsd_etmv4_cfg trace_config_etmv4; - void *trace_config; - - switch (t_params->protocol) { - case CS_ETM_PROTO_ETMV3: - case CS_ETM_PROTO_PTM: - cs_etm_decoder__gen_etmv3_config(t_params, &config_etmv3); - decoder_name = (t_params->protocol == CS_ETM_PROTO_ETMV3) ? - OCSD_BUILTIN_DCD_ETMV3 : - OCSD_BUILTIN_DCD_PTM; - trace_config = &config_etmv3; - break; - case CS_ETM_PROTO_ETMV4i: - cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4); - decoder_name = OCSD_BUILTIN_DCD_ETMV4I; - trace_config = &trace_config_etmv4; - break; - default: - return -1; - } - - return cs_etm_decoder__create_packet_printer(decoder, - decoder_name, - trace_config); -} - static ocsd_datapath_resp_t cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq, struct cs_etm_packet_queue *packet_queue, @@ -324,8 +307,11 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, * underflow. */ packet_queue->cs_timestamp = 0; - WARN_ONCE(true, "Zero Coresight timestamp found at Idx:%" OCSD_TRC_IDX_STR - ". Decoding may be improved with --itrace=Z...\n", indx); + if (!cs_etm__etmq_is_timeless(etmq)) + pr_warning_once("Zero Coresight timestamp found at Idx:%" OCSD_TRC_IDX_STR + ". Decoding may be improved by prepending 'Z' to your current --itrace arguments.\n", + indx); + } else if (packet_queue->instr_count > elem->timestamp) { /* * Sanity check that the elem->timestamp - packet_queue->instr_count would not @@ -625,13 +611,14 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( return resp; } -static int cs_etm_decoder__create_etm_packet_decoder( - struct cs_etm_trace_params *t_params, - struct cs_etm_decoder *decoder) +static int +cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, + struct cs_etm_trace_params *t_params, + struct cs_etm_decoder *decoder) { - const char *decoder_name; ocsd_etmv3_cfg config_etmv3; ocsd_etmv4_cfg trace_config_etmv4; + ocsd_ete_cfg trace_config_ete; void *trace_config; u8 csid; @@ -639,51 +626,55 @@ static int cs_etm_decoder__create_etm_packet_decoder( case CS_ETM_PROTO_ETMV3: case CS_ETM_PROTO_PTM: cs_etm_decoder__gen_etmv3_config(t_params, &config_etmv3); - decoder_name = (t_params->protocol == CS_ETM_PROTO_ETMV3) ? + decoder->decoder_name = (t_params->protocol == CS_ETM_PROTO_ETMV3) ? OCSD_BUILTIN_DCD_ETMV3 : OCSD_BUILTIN_DCD_PTM; trace_config = &config_etmv3; break; case CS_ETM_PROTO_ETMV4i: cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4); - decoder_name = OCSD_BUILTIN_DCD_ETMV4I; + decoder->decoder_name = OCSD_BUILTIN_DCD_ETMV4I; trace_config = &trace_config_etmv4; break; + case CS_ETM_PROTO_ETE: + cs_etm_decoder__gen_ete_config(t_params, &trace_config_ete); + decoder->decoder_name = OCSD_BUILTIN_DCD_ETE; + trace_config = &trace_config_ete; + break; default: return -1; } - if (ocsd_dt_create_decoder(decoder->dcd_tree, - decoder_name, - OCSD_CREATE_FLG_FULL_DECODER, - trace_config, &csid)) - return -1; + if (d_params->operation == CS_ETM_OPERATION_DECODE) { + if (ocsd_dt_create_decoder(decoder->dcd_tree, + decoder->decoder_name, + OCSD_CREATE_FLG_FULL_DECODER, + trace_config, &csid)) + return -1; - if (ocsd_dt_set_gen_elem_outfn(decoder->dcd_tree, - cs_etm_decoder__gen_trace_elem_printer, - decoder)) - return -1; + if (ocsd_dt_set_gen_elem_outfn(decoder->dcd_tree, + cs_etm_decoder__gen_trace_elem_printer, + decoder)) + return -1; - return 0; -} + return 0; + } else if (d_params->operation == CS_ETM_OPERATION_PRINT) { + if (ocsd_dt_create_decoder(decoder->dcd_tree, decoder->decoder_name, + OCSD_CREATE_FLG_PACKET_PROC, + trace_config, &csid)) + return -1; -static int -cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, - struct cs_etm_trace_params *t_params, - struct cs_etm_decoder *decoder) -{ - if (d_params->operation == CS_ETM_OPERATION_PRINT) - return cs_etm_decoder__create_etm_packet_printer(t_params, - decoder); - else if (d_params->operation == CS_ETM_OPERATION_DECODE) - return cs_etm_decoder__create_etm_packet_decoder(t_params, - decoder); + if (ocsd_dt_set_pkt_protocol_printer(decoder->dcd_tree, csid, 0)) + return -1; + + return 0; + } return -1; } struct cs_etm_decoder * -cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params, +cs_etm_decoder__new(int decoders, struct cs_etm_decoder_params *d_params, struct cs_etm_trace_params t_params[]) { struct cs_etm_decoder *decoder; @@ -728,7 +719,7 @@ cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params, /* init raw frame logging if required */ cs_etm_decoder__init_raw_frame_logging(d_params, decoder); - for (i = 0; i < num_cpu; i++) { + for (i = 0; i < decoders; i++) { ret = cs_etm_decoder__create_etm_decoder(d_params, &t_params[i], decoder); @@ -800,3 +791,8 @@ void cs_etm_decoder__free(struct cs_etm_decoder *decoder) decoder->dcd_tree = NULL; free(decoder); } + +const char *cs_etm_decoder__get_name(struct cs_etm_decoder *decoder) +{ + return decoder->decoder_name; +} diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index 11f3391d06f2..92a855fbe5b8 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -37,11 +37,22 @@ struct cs_etmv4_trace_params { u32 reg_traceidr; }; +struct cs_ete_trace_params { + u32 reg_idr0; + u32 reg_idr1; + u32 reg_idr2; + u32 reg_idr8; + u32 reg_configr; + u32 reg_traceidr; + u32 reg_devarch; +}; + struct cs_etm_trace_params { int protocol; union { struct cs_etmv3_trace_params etmv3; struct cs_etmv4_trace_params etmv4; + struct cs_ete_trace_params ete; }; }; @@ -65,6 +76,7 @@ enum { CS_ETM_PROTO_ETMV4i, CS_ETM_PROTO_ETMV4d, CS_ETM_PROTO_PTM, + CS_ETM_PROTO_ETE }; enum cs_etm_decoder_operation { @@ -92,5 +104,6 @@ int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue, struct cs_etm_packet *packet); int cs_etm_decoder__reset(struct cs_etm_decoder *decoder); +const char *cs_etm_decoder__get_name(struct cs_etm_decoder *decoder); #endif /* INCLUDE__CS_ETM_DECODER_H__ */ diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index bc1f64873c8f..f323adb1af85 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -62,7 +62,6 @@ struct cs_etm_auxtrace { u64 instructions_sample_period; u64 instructions_id; u64 **metadata; - u64 kernel_start; unsigned int pmu_type; }; @@ -97,7 +96,6 @@ struct cs_etm_queue { /* RB tree for quick conversion between traceID and metadata pointers */ static struct intlist *traceid_list; -static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, pid_t tid); @@ -462,14 +460,30 @@ static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params, t_params[idx].etmv4.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR]; } +static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params, + struct cs_etm_auxtrace *etm, int idx) +{ + u64 **metadata = etm->metadata; + + t_params[idx].protocol = CS_ETM_PROTO_ETE; + t_params[idx].ete.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0]; + t_params[idx].ete.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1]; + t_params[idx].ete.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2]; + t_params[idx].ete.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8]; + t_params[idx].ete.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR]; + t_params[idx].ete.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR]; + t_params[idx].ete.reg_devarch = metadata[idx][CS_ETE_TRCDEVARCH]; +} + static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, - struct cs_etm_auxtrace *etm) + struct cs_etm_auxtrace *etm, + int decoders) { int i; u32 etmidr; u64 architecture; - for (i = 0; i < etm->num_cpu; i++) { + for (i = 0; i < decoders; i++) { architecture = etm->metadata[i][CS_ETM_MAGIC]; switch (architecture) { @@ -480,6 +494,9 @@ static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, case __perf_cs_etmv4_magic: cs_etm__set_trace_param_etmv4(t_params, etm, i); break; + case __perf_cs_ete_magic: + cs_etm__set_trace_param_ete(t_params, etm, i); + break; default: return -EINVAL; } @@ -490,7 +507,8 @@ static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, struct cs_etm_queue *etmq, - enum cs_etm_decoder_operation mode) + enum cs_etm_decoder_operation mode, + bool formatted) { int ret = -EINVAL; @@ -500,7 +518,7 @@ static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, d_params->packet_printer = cs_etm__packet_dump; d_params->operation = mode; d_params->data = etmq; - d_params->formatted = true; + d_params->formatted = formatted; d_params->fsyncs = false; d_params->hsyncs = false; d_params->frame_aligned = true; @@ -510,44 +528,23 @@ out: return ret; } -static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, +static void cs_etm__dump_event(struct cs_etm_queue *etmq, struct auxtrace_buffer *buffer) { int ret; const char *color = PERF_COLOR_BLUE; - struct cs_etm_decoder_params d_params; - struct cs_etm_trace_params *t_params; - struct cs_etm_decoder *decoder; size_t buffer_used = 0; fprintf(stdout, "\n"); color_fprintf(stdout, color, - ". ... CoreSight ETM Trace data: size %zu bytes\n", - buffer->size); - - /* Use metadata to fill in trace parameters for trace decoder */ - t_params = zalloc(sizeof(*t_params) * etm->num_cpu); - - if (!t_params) - return; - - if (cs_etm__init_trace_params(t_params, etm)) - goto out_free; + ". ... CoreSight %s Trace data: size %zu bytes\n", + cs_etm_decoder__get_name(etmq->decoder), buffer->size); - /* Set decoder parameters to simply print the trace packets */ - if (cs_etm__init_decoder_params(&d_params, NULL, - CS_ETM_OPERATION_PRINT)) - goto out_free; - - decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); - - if (!decoder) - goto out_free; do { size_t consumed; ret = cs_etm_decoder__process_data_block( - decoder, buffer->offset, + etmq->decoder, buffer->offset, &((u8 *)buffer->data)[buffer_used], buffer->size - buffer_used, &consumed); if (ret) @@ -556,16 +553,12 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, buffer_used += consumed; } while (buffer_used < buffer->size); - cs_etm_decoder__free(decoder); - -out_free: - zfree(&t_params); + cs_etm_decoder__reset(etmq->decoder); } static int cs_etm__flush_events(struct perf_session *session, struct perf_tool *tool) { - int ret; struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace); @@ -575,11 +568,6 @@ static int cs_etm__flush_events(struct perf_session *session, if (!tool->ordered_events) return -EINVAL; - ret = cs_etm__update_queues(etm); - - if (ret < 0) - return ret; - if (etm->timeless_decoding) return cs_etm__process_timeless_queues(etm, -1); @@ -691,7 +679,7 @@ static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address) machine = etmq->etm->machine; - if (address >= etmq->etm->kernel_start) { + if (address >= machine__kernel_start(machine)) { if (machine__is_host(machine)) return PERF_RECORD_MISC_KERNEL; else @@ -746,17 +734,32 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, len = dso__data_read_offset(al.map->dso, machine, offset, buffer, size); - if (len <= 0) + if (len <= 0) { + ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n" + " Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n"); + if (!al.map->dso->auxtrace_warned) { + pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n", + address, + al.map->dso->long_name ? al.map->dso->long_name : "Unknown"); + al.map->dso->auxtrace_warned = true; + } return 0; + } return len; } -static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) +static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, + bool formatted) { struct cs_etm_decoder_params d_params; struct cs_etm_trace_params *t_params = NULL; struct cs_etm_queue *etmq; + /* + * Each queue can only contain data from one CPU when unformatted, so only one decoder is + * needed. + */ + int decoders = formatted ? etm->num_cpu : 1; etmq = zalloc(sizeof(*etmq)); if (!etmq) @@ -767,20 +770,23 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) goto out_free; /* Use metadata to fill in trace parameters for trace decoder */ - t_params = zalloc(sizeof(*t_params) * etm->num_cpu); + t_params = zalloc(sizeof(*t_params) * decoders); if (!t_params) goto out_free; - if (cs_etm__init_trace_params(t_params, etm)) + if (cs_etm__init_trace_params(t_params, etm, decoders)) goto out_free; /* Set decoder parameters to decode trace packets */ if (cs_etm__init_decoder_params(&d_params, etmq, - CS_ETM_OPERATION_DECODE)) + dump_trace ? CS_ETM_OPERATION_PRINT : + CS_ETM_OPERATION_DECODE, + formatted)) goto out_free; - etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); + etmq->decoder = cs_etm_decoder__new(decoders, &d_params, + t_params); if (!etmq->decoder) goto out_free; @@ -808,31 +814,35 @@ out_free: static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, struct auxtrace_queue *queue, - unsigned int queue_nr) + unsigned int queue_nr, + bool formatted) { - int ret = 0; - unsigned int cs_queue_nr; - u8 trace_chan_id; - u64 cs_timestamp; struct cs_etm_queue *etmq = queue->priv; if (list_empty(&queue->head) || etmq) - goto out; + return 0; - etmq = cs_etm__alloc_queue(etm); + etmq = cs_etm__alloc_queue(etm, formatted); - if (!etmq) { - ret = -ENOMEM; - goto out; - } + if (!etmq) + return -ENOMEM; queue->priv = etmq; etmq->etm = etm; etmq->queue_nr = queue_nr; etmq->offset = 0; - if (etm->timeless_decoding) - goto out; + return 0; +} + +static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm, + struct cs_etm_queue *etmq, + unsigned int queue_nr) +{ + int ret = 0; + unsigned int cs_queue_nr; + u8 trace_chan_id; + u64 cs_timestamp; /* * We are under a CPU-wide trace scenario. As such we need to know @@ -896,33 +906,6 @@ out: return ret; } -static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) -{ - unsigned int i; - int ret; - - if (!etm->kernel_start) - etm->kernel_start = machine__kernel_start(etm->machine); - - for (i = 0; i < etm->queues.nr_queues; i++) { - ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i); - if (ret) - return ret; - } - - return 0; -} - -static int cs_etm__update_queues(struct cs_etm_auxtrace *etm) -{ - if (etm->queues.new_data) { - etm->queues.new_data = false; - return cs_etm__setup_queues(etm); - } - - return 0; -} - static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq, struct cs_etm_traceid_queue *tidq) @@ -2222,13 +2205,27 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, static int cs_etm__process_queues(struct cs_etm_auxtrace *etm) { int ret = 0; - unsigned int cs_queue_nr, queue_nr; + unsigned int cs_queue_nr, queue_nr, i; u8 trace_chan_id; u64 cs_timestamp; struct auxtrace_queue *queue; struct cs_etm_queue *etmq; struct cs_etm_traceid_queue *tidq; + /* + * Pre-populate the heap with one entry from each queue so that we can + * start processing in time order across all queues. + */ + for (i = 0; i < etm->queues.nr_queues; i++) { + etmq = etm->queues.queue_array[i].priv; + if (!etmq) + continue; + + ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i); + if (ret) + return ret; + } + while (1) { if (!etm->heap.heap_cnt) goto out; @@ -2382,7 +2379,6 @@ static int cs_etm__process_event(struct perf_session *session, struct perf_sample *sample, struct perf_tool *tool) { - int err = 0; u64 sample_kernel_timestamp; struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, @@ -2401,12 +2397,6 @@ static int cs_etm__process_event(struct perf_session *session, else sample_kernel_timestamp = 0; - if (sample_kernel_timestamp || etm->timeless_decoding) { - err = cs_etm__update_queues(etm); - if (err) - return err; - } - /* * Don't wait for cs_etm__flush_events() in per-thread/timeless mode to start the decode. We * need the tid of the PERF_RECORD_EXIT event to assign to the synthesised samples because @@ -2447,7 +2437,7 @@ static void dump_queued_data(struct cs_etm_auxtrace *etm, for (i = 0; i < etm->queues.nr_queues; ++i) list_for_each_entry(buf, &etm->queues.queue_array[i].head, list) if (buf->reference == event->reference) - cs_etm__dump_event(etm, buf); + cs_etm__dump_event(etm->queues.queue_array[i].priv, buf); } static int cs_etm__process_auxtrace_event(struct perf_session *session, @@ -2463,6 +2453,7 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, int fd = perf_data__fd(session->data); bool is_pipe = perf_data__is_pipe(session->data); int err; + int idx = event->auxtrace.idx; if (is_pipe) data_offset = 0; @@ -2477,9 +2468,20 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, if (err) return err; + /* + * Knowing if the trace is formatted or not requires a lookup of + * the aux record so only works in non-piped mode where data is + * queued in cs_etm__queue_aux_records(). Always assume + * formatted in piped mode (true). + */ + err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], + idx, true); + if (err) + return err; + if (dump_trace) if (auxtrace_buffer__get_data(buffer, fd)) { - cs_etm__dump_event(etm, buffer); + cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer); auxtrace_buffer__put_data(buffer); } } else if (dump_trace) @@ -2537,6 +2539,7 @@ static const char * const cs_etmv4_priv_fmts[] = { [CS_ETMV4_TRCIDR2] = " TRCIDR2 %llx\n", [CS_ETMV4_TRCIDR8] = " TRCIDR8 %llx\n", [CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n", + [CS_ETE_TRCDEVARCH] = " TRCDEVARCH %llx\n" }; static const char * const param_unk_fmt = @@ -2596,10 +2599,15 @@ static int cs_etm__print_cpu_metadata_v1(__u64 *val, int *offset) else fprintf(stdout, cs_etm_priv_fmts[j], val[i]); } - } else if (magic == __perf_cs_etmv4_magic) { + } else if (magic == __perf_cs_etmv4_magic || magic == __perf_cs_ete_magic) { + /* + * ETE and ETMv4 can be printed in the same block because the number of parameters + * is saved and they share the list of parameter names. ETE is also only supported + * in V1 files. + */ for (j = 0; j < total_params; j++, i++) { /* if newer record - could be excess params */ - if (j >= CS_ETMV4_PRIV_MAX) + if (j >= CS_ETE_PRIV_MAX) fprintf(stdout, param_unk_fmt, j, val[i]); else fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); @@ -2719,6 +2727,8 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o struct perf_record_auxtrace *auxtrace_event; union perf_event auxtrace_fragment; __u64 aux_offset, aux_size; + __u32 idx; + bool formatted; struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, @@ -2780,8 +2790,15 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64 " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu); - return auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment, - file_offset, NULL); + err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment, + file_offset, NULL); + if (err) + return err; + + idx = auxtrace_event->idx; + formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW); + return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], + idx, formatted); } /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */ @@ -2959,6 +2976,16 @@ int cs_etm__process_auxtrace_info(union perf_event *event, /* The traceID is our handle */ trcidr_idx = CS_ETMV4_TRCTRACEIDR; + } else if (ptr[i] == __perf_cs_ete_magic) { + metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1); + + /* ETE shares first part of metadata with ETMv4 */ + trcidr_idx = CS_ETMV4_TRCTRACEIDR; + } else { + ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n", + ptr[i]); + err = -EINVAL; + goto err_free_metadata; } if (!metadata[j]) { @@ -3070,6 +3097,13 @@ int cs_etm__process_auxtrace_info(union perf_event *event, goto err_delete_thread; etm->data_queued = etm->queues.populated; + /* + * Print warning in pipe mode, see cs_etm__process_auxtrace_event() and + * cs_etm__queue_aux_fragment() for details relating to limitations. + */ + if (!etm->data_queued) + pr_warning("CS ETM warning: Coresight decode and TRBE support requires random file access.\n" + "Continuing with best effort decoding in piped mode.\n\n"); return 0; diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index d65c7b19407d..90c83f932d9a 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -77,6 +77,15 @@ enum { #define CS_ETMV4_NR_TRC_PARAMS_V0 (CS_ETMV4_TRCAUTHSTATUS - CS_ETMV4_TRCCONFIGR + 1) /* + * ETE metadata is ETMv4 plus TRCDEVARCH register and doesn't support header V0 since it was + * added in header V1 + */ +enum { + CS_ETE_TRCDEVARCH = CS_ETMV4_PRIV_MAX, + CS_ETE_PRIV_MAX +}; + +/* * ETMv3 exception encoding number: * See Embedded Trace Macrocell specification (ARM IHI 0014Q) * table 7-12 Encoding of Exception[3:0] for non-ARMv7-M processors. @@ -187,8 +196,10 @@ struct cs_etm_packet_queue { #define __perf_cs_etmv3_magic 0x3030303030303030ULL #define __perf_cs_etmv4_magic 0x4040404040404040ULL +#define __perf_cs_ete_magic 0x5050505050505050ULL #define CS_ETMV3_PRIV_SIZE (CS_ETM_PRIV_MAX * sizeof(u64)) #define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64)) +#define CS_ETE_PRIV_SIZE (CS_ETE_PRIV_MAX * sizeof(u64)) #ifdef HAVE_CSTRACE_SUPPORT int cs_etm__process_auxtrace_info(union perf_event *event, diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index cace349fb700..aa862a26d95c 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -1634,7 +1634,7 @@ int bt_convert__perf2ctf(const char *input, const char *path, err = -1; /* perf.data session */ - session = perf_session__new(&data, 0, &c.tool); + session = perf_session__new(&data, &c.tool); if (IS_ERR(session)) return PTR_ERR(session); diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c index 355cd1948bdf..f1ab6edba446 100644 --- a/tools/perf/util/data-convert-json.c +++ b/tools/perf/util/data-convert-json.c @@ -334,7 +334,7 @@ int bt_convert__perf2json(const char *input_name, const char *output_name, goto err; } - session = perf_session__new(&data, false, &c.tool); + session = perf_session__new(&data, &c.tool); if (IS_ERR(session)) { fprintf(stderr, "Error creating perf session!\n"); goto err_fclose; diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index 48f631966067..f99468a7f681 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -22,6 +22,13 @@ extern int debug_data_convert; eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__) #define pr_warning(fmt, ...) \ eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__) +#define pr_warning_once(fmt, ...) ({ \ + static int __warned; \ + if (unlikely(!__warned)) { \ + pr_warning(fmt, ##__VA_ARGS__); \ + __warned = 1; \ + } \ +}) #define pr_info(fmt, ...) \ eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__) #define pr_debug(fmt, ...) \ @@ -55,6 +62,13 @@ void trace_event(union perf_event *event); int ui__error(const char *format, ...) __printf(1, 2); int ui__warning(const char *format, ...) __printf(1, 2); +#define ui__warning_once(format, ...) ({ \ + static int __warned; \ + if (unlikely(!__warned)) { \ + ui__warning(format, ##__VA_ARGS__); \ + __warned = 1; \ + } \ +}) void pr_stat(const char *fmt, ...); diff --git a/tools/perf/util/dlfilter.c b/tools/perf/util/dlfilter.c index ca33fbc5efde..db964d5a52af 100644 --- a/tools/perf/util/dlfilter.c +++ b/tools/perf/util/dlfilter.c @@ -21,7 +21,7 @@ #include "symbol.h" #include "srcline.h" #include "dlfilter.h" -#include "perf_dlfilter.h" +#include "../include/perf/perf_dlfilter.h" static void al_to_d_al(struct addr_location *al, struct perf_dlfilter_al *d_al) { @@ -530,8 +530,8 @@ int dlfilter__do_filter_event(struct dlfilter *d, return ret; } -static bool get_filter_desc(const char *dirname, const char *name, - char **desc, char **long_desc) +bool get_filter_desc(const char *dirname, const char *name, char **desc, + char **long_desc) { char path[PATH_MAX]; void *handle; diff --git a/tools/perf/util/dlfilter.h b/tools/perf/util/dlfilter.h index 505980442360..cc4bb9657d05 100644 --- a/tools/perf/util/dlfilter.h +++ b/tools/perf/util/dlfilter.h @@ -93,5 +93,7 @@ static inline int dlfilter__filter_event_early(struct dlfilter *d, } int list_available_dlfilters(const struct option *opt, const char *s, int unset); +bool get_filter_desc(const char *dirname, const char *name, char **desc, + char **long_desc); #endif diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 52e7101c5609..83723ba11dc8 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -170,6 +170,7 @@ struct dso { u8 has_srcline:1; u8 hit:1; u8 annotate_warned:1; + u8 auxtrace_warned:1; u8 short_name_allocated:1; u8 long_name_allocated:1; u8 is_64_bit:1; diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index cec2e6cad8aa..8f7ff0035c41 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -219,13 +219,35 @@ void perf_env__exit(struct perf_env *env) zfree(&env->hybrid_cpc_nodes); } -void perf_env__init(struct perf_env *env __maybe_unused) +void perf_env__init(struct perf_env *env) { #ifdef HAVE_LIBBPF_SUPPORT env->bpf_progs.infos = RB_ROOT; env->bpf_progs.btfs = RB_ROOT; init_rwsem(&env->bpf_progs.lock); #endif + env->kernel_is_64_bit = -1; +} + +static void perf_env__init_kernel_mode(struct perf_env *env) +{ + const char *arch = perf_env__raw_arch(env); + + if (!strncmp(arch, "x86_64", 6) || !strncmp(arch, "aarch64", 7) || + !strncmp(arch, "arm64", 5) || !strncmp(arch, "mips64", 6) || + !strncmp(arch, "parisc64", 8) || !strncmp(arch, "riscv64", 7) || + !strncmp(arch, "s390x", 5) || !strncmp(arch, "sparc64", 7)) + env->kernel_is_64_bit = 1; + else + env->kernel_is_64_bit = 0; +} + +int perf_env__kernel_is_64_bit(struct perf_env *env) +{ + if (env->kernel_is_64_bit == -1) + perf_env__init_kernel_mode(env); + + return env->kernel_is_64_bit; } int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]) @@ -349,7 +371,7 @@ static const char *normalize_arch(char *arch) return "x86"; if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5)) return "sparc"; - if (!strcmp(arch, "aarch64") || !strcmp(arch, "arm64")) + if (!strncmp(arch, "aarch64", 7) || !strncmp(arch, "arm64", 5)) return "arm64"; if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110")) return "arm"; diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 6824a7423a2d..1f5175820a05 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -61,6 +61,7 @@ struct perf_env { unsigned long long total_mem; unsigned int msr_pmu_type; unsigned int max_branches; + int kernel_is_64_bit; int nr_cmdline; int nr_sibling_cores; @@ -143,6 +144,8 @@ extern struct perf_env perf_env; void perf_env__exit(struct perf_env *env); +int perf_env__kernel_is_64_bit(struct perf_env *env); + int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]); int perf_env__read_cpuid(struct perf_env *env); diff --git a/tools/perf/util/events_stats.h b/tools/perf/util/events_stats.h index 3480bafd414b..1b0006092265 100644 --- a/tools/perf/util/events_stats.h +++ b/tools/perf/util/events_stats.h @@ -30,6 +30,7 @@ struct events_stats { u64 total_lost_samples; u64 total_aux_lost; u64 total_aux_partial; + u64 total_aux_collision; u64 total_invalid_chains; u32 nr_events[PERF_RECORD_HEADER_MAX]; u32 nr_lost_warned; diff --git a/tools/perf/util/evlist-hybrid.c b/tools/perf/util/evlist-hybrid.c index db3f5fbdebe1..7c554234b43d 100644 --- a/tools/perf/util/evlist-hybrid.c +++ b/tools/perf/util/evlist-hybrid.c @@ -86,3 +86,76 @@ bool evlist__has_hybrid(struct evlist *evlist) return false; } + +int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list) +{ + struct perf_cpu_map *cpus; + struct evsel *evsel, *tmp; + struct perf_pmu *pmu; + int ret, unmatched_count = 0, events_nr = 0; + + if (!perf_pmu__has_hybrid() || !cpu_list) + return 0; + + cpus = perf_cpu_map__new(cpu_list); + if (!cpus) + return -1; + + /* + * The evsels are created with hybrid pmu's cpus. But now we + * need to check and adjust the cpus of evsel by cpu_list because + * cpu_list may cause conflicts with cpus of evsel. For example, + * cpus of evsel is cpu0-7, but the cpu_list is cpu6-8, we need + * to adjust the cpus of evsel to cpu6-7. And then propatate maps + * in evlist__create_maps(). + */ + evlist__for_each_entry_safe(evlist, tmp, evsel) { + struct perf_cpu_map *matched_cpus, *unmatched_cpus; + char buf1[128], buf2[128]; + + pmu = perf_pmu__find_hybrid_pmu(evsel->pmu_name); + if (!pmu) + continue; + + ret = perf_pmu__cpus_match(pmu, cpus, &matched_cpus, + &unmatched_cpus); + if (ret) + goto out; + + events_nr++; + + if (matched_cpus->nr > 0 && (unmatched_cpus->nr > 0 || + matched_cpus->nr < cpus->nr || + matched_cpus->nr < pmu->cpus->nr)) { + perf_cpu_map__put(evsel->core.cpus); + perf_cpu_map__put(evsel->core.own_cpus); + evsel->core.cpus = perf_cpu_map__get(matched_cpus); + evsel->core.own_cpus = perf_cpu_map__get(matched_cpus); + + if (unmatched_cpus->nr > 0) { + cpu_map__snprint(matched_cpus, buf1, sizeof(buf1)); + pr_warning("WARNING: use %s in '%s' for '%s', skip other cpus in list.\n", + buf1, pmu->name, evsel->name); + } + } + + if (matched_cpus->nr == 0) { + evlist__remove(evlist, evsel); + evsel__delete(evsel); + + cpu_map__snprint(cpus, buf1, sizeof(buf1)); + cpu_map__snprint(pmu->cpus, buf2, sizeof(buf2)); + pr_warning("WARNING: %s isn't a '%s', please use a CPU list in the '%s' range (%s)\n", + buf1, pmu->name, pmu->name, buf2); + unmatched_count++; + } + + perf_cpu_map__put(matched_cpus); + perf_cpu_map__put(unmatched_cpus); + } + + ret = (unmatched_count == events_nr) ? -1 : 0; +out: + perf_cpu_map__put(cpus); + return ret; +} diff --git a/tools/perf/util/evlist-hybrid.h b/tools/perf/util/evlist-hybrid.h index 19f74b4c340a..aacdb1b0f948 100644 --- a/tools/perf/util/evlist-hybrid.h +++ b/tools/perf/util/evlist-hybrid.h @@ -10,5 +10,6 @@ int evlist__add_default_hybrid(struct evlist *evlist, bool precise); void evlist__warn_hybrid_group(struct evlist *evlist); bool evlist__has_hybrid(struct evlist *evlist); +int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list); #endif /* __PERF_EVLIST_HYBRID_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 47581a237c7a..5f92319ce258 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -27,6 +27,7 @@ #include "util/perf_api_probe.h" #include "util/evsel_fprintf.h" #include "util/evlist-hybrid.h" +#include "util/pmu.h" #include <signal.h> #include <unistd.h> #include <sched.h> @@ -1002,7 +1003,7 @@ int evlist__create_maps(struct evlist *evlist, struct target *target) if (!cpus) goto out_delete_threads; - evlist->core.has_user_cpus = !!target->cpu_list; + evlist->core.has_user_cpus = !!target->cpu_list && !target->hybrid; perf_evlist__set_maps(&evlist->core, cpus, threads); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 5c22383489ae..97bfb8d0be4f 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -276,6 +276,22 @@ void evlist__to_front(struct evlist *evlist, struct evsel *move_evsel); __evlist__for_each_entry_continue(&(evlist)->core.entries, evsel) /** + * __evlist__for_each_entry_from - continue iteration from @evsel (included) + * @list: list_head instance to iterate + * @evsel: struct evsel iterator + */ +#define __evlist__for_each_entry_from(list, evsel) \ + list_for_each_entry_from(evsel, list, core.node) + +/** + * evlist__for_each_entry_from - continue iteration from @evsel (included) + * @evlist: evlist instance to iterate + * @evsel: struct evsel iterator + */ +#define evlist__for_each_entry_from(evlist, evsel) \ + __evlist__for_each_entry_from(&(evlist)->core.entries, evsel) + +/** * __evlist__for_each_entry_reverse - iterate thru all the evsels in reverse order * @list: list_head instance to iterate * @evsel: struct evsel iterator diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index f61e5dd53f5d..54d251327b5b 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1656,7 +1656,7 @@ static int update_fds(struct evsel *evsel, return 0; } -static bool ignore_missing_thread(struct evsel *evsel, +bool evsel__ignore_missing_thread(struct evsel *evsel, int nr_cpus, int cpu, struct perf_thread_map *threads, int thread, int err) @@ -1709,59 +1709,43 @@ static void display_attr(struct perf_event_attr *attr) } } -static int perf_event_open(struct evsel *evsel, - pid_t pid, int cpu, int group_fd, - unsigned long flags) +bool evsel__precise_ip_fallback(struct evsel *evsel) { - int precise_ip = evsel->core.attr.precise_ip; - int fd; - - while (1) { - pr_debug2_peo("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", - pid, cpu, group_fd, flags); - - fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, group_fd, flags); - if (fd >= 0) - break; - - /* Do not try less precise if not requested. */ - if (!evsel->precise_max) - break; - - /* - * We tried all the precise_ip values, and it's - * still failing, so leave it to standard fallback. - */ - if (!evsel->core.attr.precise_ip) { - evsel->core.attr.precise_ip = precise_ip; - break; - } + /* Do not try less precise if not requested. */ + if (!evsel->precise_max) + return false; - pr_debug2_peo("\nsys_perf_event_open failed, error %d\n", -ENOTSUP); - evsel->core.attr.precise_ip--; - pr_debug2_peo("decreasing precise_ip by one (%d)\n", evsel->core.attr.precise_ip); - display_attr(&evsel->core.attr); + /* + * We tried all the precise_ip values, and it's + * still failing, so leave it to standard fallback. + */ + if (!evsel->core.attr.precise_ip) { + evsel->core.attr.precise_ip = evsel->precise_ip_original; + return false; } - return fd; + if (!evsel->precise_ip_original) + evsel->precise_ip_original = evsel->core.attr.precise_ip; + + evsel->core.attr.precise_ip--; + pr_debug2_peo("decreasing precise_ip by one (%d)\n", evsel->core.attr.precise_ip); + display_attr(&evsel->core.attr); + return true; } -static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, - struct perf_thread_map *threads, - int start_cpu, int end_cpu) +static struct perf_cpu_map *empty_cpu_map; +static struct perf_thread_map *empty_thread_map; + +static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, + struct perf_thread_map *threads) { - int cpu, thread, nthreads; - unsigned long flags = PERF_FLAG_FD_CLOEXEC; - int pid = -1, err, old_errno; - enum { NO_CHANGE, SET_TO_MAX, INCREASED_MAX } set_rlimit = NO_CHANGE; + int nthreads; if ((perf_missing_features.write_backward && evsel->core.attr.write_backward) || (perf_missing_features.aux_output && evsel->core.attr.aux_output)) return -EINVAL; if (cpus == NULL) { - static struct perf_cpu_map *empty_cpu_map; - if (empty_cpu_map == NULL) { empty_cpu_map = perf_cpu_map__dummy_new(); if (empty_cpu_map == NULL) @@ -1772,8 +1756,6 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, } if (threads == NULL) { - static struct perf_thread_map *empty_thread_map; - if (empty_thread_map == NULL) { empty_thread_map = thread_map__new_by_tid(-1); if (empty_thread_map == NULL) @@ -1792,12 +1774,15 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, perf_evsel__alloc_fd(&evsel->core, cpus->nr, nthreads) < 0) return -ENOMEM; - if (evsel->cgrp) { - flags |= PERF_FLAG_PID_CGROUP; - pid = evsel->cgrp->fd; - } + evsel->open_flags = PERF_FLAG_FD_CLOEXEC; + if (evsel->cgrp) + evsel->open_flags |= PERF_FLAG_PID_CGROUP; -fallback_missing_features: + return 0; +} + +static void evsel__disable_missing_features(struct evsel *evsel) +{ if (perf_missing_features.weight_struct) { evsel__set_sample_bit(evsel, WEIGHT); evsel__reset_sample_bit(evsel, WEIGHT_STRUCT); @@ -1809,7 +1794,7 @@ fallback_missing_features: evsel->core.attr.clockid = 0; } if (perf_missing_features.cloexec) - flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC; + evsel->open_flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC; if (perf_missing_features.mmap2) evsel->core.attr.mmap2 = 0; if (perf_missing_features.exclude_guest) @@ -1825,119 +1810,26 @@ fallback_missing_features: evsel->core.attr.bpf_event = 0; if (perf_missing_features.branch_hw_idx) evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_HW_INDEX; -retry_sample_id: if (perf_missing_features.sample_id_all) evsel->core.attr.sample_id_all = 0; +} - display_attr(&evsel->core.attr); - - for (cpu = start_cpu; cpu < end_cpu; cpu++) { - - for (thread = 0; thread < nthreads; thread++) { - int fd, group_fd; - - if (!evsel->cgrp && !evsel->core.system_wide) - pid = perf_thread_map__pid(threads, thread); - - group_fd = get_group_fd(evsel, cpu, thread); -retry_open: - test_attr__ready(); - - fd = perf_event_open(evsel, pid, cpus->map[cpu], - group_fd, flags); - - FD(evsel, cpu, thread) = fd; - - bpf_counter__install_pe(evsel, cpu, fd); - - if (unlikely(test_attr__enabled)) { - test_attr__open(&evsel->core.attr, pid, cpus->map[cpu], - fd, group_fd, flags); - } - - if (fd < 0) { - err = -errno; - - if (ignore_missing_thread(evsel, cpus->nr, cpu, threads, thread, err)) { - /* - * We just removed 1 thread, so take a step - * back on thread index and lower the upper - * nthreads limit. - */ - nthreads--; - thread--; - - /* ... and pretend like nothing have happened. */ - err = 0; - continue; - } - - pr_debug2_peo("\nsys_perf_event_open failed, error %d\n", - err); - goto try_fallback; - } - - pr_debug2_peo(" = %d\n", fd); - - if (evsel->bpf_fd >= 0) { - int evt_fd = fd; - int bpf_fd = evsel->bpf_fd; - - err = ioctl(evt_fd, - PERF_EVENT_IOC_SET_BPF, - bpf_fd); - if (err && errno != EEXIST) { - pr_err("failed to attach bpf fd %d: %s\n", - bpf_fd, strerror(errno)); - err = -EINVAL; - goto out_close; - } - } - - set_rlimit = NO_CHANGE; - - /* - * If we succeeded but had to kill clockid, fail and - * have evsel__open_strerror() print us a nice error. - */ - if (perf_missing_features.clockid || - perf_missing_features.clockid_wrong) { - err = -EINVAL; - goto out_close; - } - } - } - - return 0; +int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, + struct perf_thread_map *threads) +{ + int err; -try_fallback: - /* - * perf stat needs between 5 and 22 fds per CPU. When we run out - * of them try to increase the limits. - */ - if (err == -EMFILE && set_rlimit < INCREASED_MAX) { - struct rlimit l; + err = __evsel__prepare_open(evsel, cpus, threads); + if (err) + return err; - old_errno = errno; - if (getrlimit(RLIMIT_NOFILE, &l) == 0) { - if (set_rlimit == NO_CHANGE) - l.rlim_cur = l.rlim_max; - else { - l.rlim_cur = l.rlim_max + 1000; - l.rlim_max = l.rlim_cur; - } - if (setrlimit(RLIMIT_NOFILE, &l) == 0) { - set_rlimit++; - errno = old_errno; - goto retry_open; - } - } - errno = old_errno; - } + evsel__disable_missing_features(evsel); - if (err != -EINVAL || cpu > 0 || thread > 0) - goto out_close; + return err; +} +bool evsel__detect_missing_features(struct evsel *evsel) +{ /* * Must probe features in the order they were added to the * perf_event_attr interface. @@ -1946,82 +1838,239 @@ try_fallback: (evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT)) { perf_missing_features.weight_struct = true; pr_debug2("switching off weight struct support\n"); - goto fallback_missing_features; + return true; } else if (!perf_missing_features.code_page_size && (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)) { perf_missing_features.code_page_size = true; pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support, bailing out\n"); - goto out_close; + return false; } else if (!perf_missing_features.data_page_size && (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)) { perf_missing_features.data_page_size = true; pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support, bailing out\n"); - goto out_close; + return false; } else if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) { perf_missing_features.cgroup = true; pr_debug2_peo("Kernel has no cgroup sampling support, bailing out\n"); - goto out_close; - } else if (!perf_missing_features.branch_hw_idx && + return false; + } else if (!perf_missing_features.branch_hw_idx && (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) { perf_missing_features.branch_hw_idx = true; pr_debug2("switching off branch HW index support\n"); - goto fallback_missing_features; + return true; } else if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) { perf_missing_features.aux_output = true; pr_debug2_peo("Kernel has no attr.aux_output support, bailing out\n"); - goto out_close; + return false; } else if (!perf_missing_features.bpf && evsel->core.attr.bpf_event) { perf_missing_features.bpf = true; pr_debug2_peo("switching off bpf_event\n"); - goto fallback_missing_features; + return true; } else if (!perf_missing_features.ksymbol && evsel->core.attr.ksymbol) { perf_missing_features.ksymbol = true; pr_debug2_peo("switching off ksymbol\n"); - goto fallback_missing_features; + return true; } else if (!perf_missing_features.write_backward && evsel->core.attr.write_backward) { perf_missing_features.write_backward = true; pr_debug2_peo("switching off write_backward\n"); - goto out_close; + return false; } else if (!perf_missing_features.clockid_wrong && evsel->core.attr.use_clockid) { perf_missing_features.clockid_wrong = true; pr_debug2_peo("switching off clockid\n"); - goto fallback_missing_features; + return true; } else if (!perf_missing_features.clockid && evsel->core.attr.use_clockid) { perf_missing_features.clockid = true; pr_debug2_peo("switching off use_clockid\n"); - goto fallback_missing_features; - } else if (!perf_missing_features.cloexec && (flags & PERF_FLAG_FD_CLOEXEC)) { + return true; + } else if (!perf_missing_features.cloexec && (evsel->open_flags & PERF_FLAG_FD_CLOEXEC)) { perf_missing_features.cloexec = true; pr_debug2_peo("switching off cloexec flag\n"); - goto fallback_missing_features; + return true; } else if (!perf_missing_features.mmap2 && evsel->core.attr.mmap2) { perf_missing_features.mmap2 = true; pr_debug2_peo("switching off mmap2\n"); - goto fallback_missing_features; + return true; } else if (!perf_missing_features.exclude_guest && (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host)) { perf_missing_features.exclude_guest = true; pr_debug2_peo("switching off exclude_guest, exclude_host\n"); - goto fallback_missing_features; + return true; } else if (!perf_missing_features.sample_id_all) { perf_missing_features.sample_id_all = true; pr_debug2_peo("switching off sample_id_all\n"); - goto retry_sample_id; + return true; } else if (!perf_missing_features.lbr_flags && (evsel->core.attr.branch_sample_type & (PERF_SAMPLE_BRANCH_NO_CYCLES | PERF_SAMPLE_BRANCH_NO_FLAGS))) { perf_missing_features.lbr_flags = true; pr_debug2_peo("switching off branch sample type no (cycles/flags)\n"); - goto fallback_missing_features; + return true; } else if (!perf_missing_features.group_read && evsel->core.attr.inherit && (evsel->core.attr.read_format & PERF_FORMAT_GROUP) && evsel__is_group_leader(evsel)) { perf_missing_features.group_read = true; pr_debug2_peo("switching off group read\n"); - goto fallback_missing_features; + return true; + } else { + return false; + } +} + +bool evsel__increase_rlimit(enum rlimit_action *set_rlimit) +{ + int old_errno; + struct rlimit l; + + if (*set_rlimit < INCREASED_MAX) { + old_errno = errno; + + if (getrlimit(RLIMIT_NOFILE, &l) == 0) { + if (*set_rlimit == NO_CHANGE) { + l.rlim_cur = l.rlim_max; + } else { + l.rlim_cur = l.rlim_max + 1000; + l.rlim_max = l.rlim_cur; + } + if (setrlimit(RLIMIT_NOFILE, &l) == 0) { + (*set_rlimit) += 1; + errno = old_errno; + return true; + } + } + errno = old_errno; } + + return false; +} + +static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, + struct perf_thread_map *threads, + int start_cpu, int end_cpu) +{ + int cpu, thread, nthreads; + int pid = -1, err, old_errno; + enum rlimit_action set_rlimit = NO_CHANGE; + + err = __evsel__prepare_open(evsel, cpus, threads); + if (err) + return err; + + if (cpus == NULL) + cpus = empty_cpu_map; + + if (threads == NULL) + threads = empty_thread_map; + + if (evsel->core.system_wide) + nthreads = 1; + else + nthreads = threads->nr; + + if (evsel->cgrp) + pid = evsel->cgrp->fd; + +fallback_missing_features: + evsel__disable_missing_features(evsel); + + display_attr(&evsel->core.attr); + + for (cpu = start_cpu; cpu < end_cpu; cpu++) { + + for (thread = 0; thread < nthreads; thread++) { + int fd, group_fd; +retry_open: + if (thread >= nthreads) + break; + + if (!evsel->cgrp && !evsel->core.system_wide) + pid = perf_thread_map__pid(threads, thread); + + group_fd = get_group_fd(evsel, cpu, thread); + + test_attr__ready(); + + pr_debug2_peo("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", + pid, cpus->map[cpu], group_fd, evsel->open_flags); + + fd = sys_perf_event_open(&evsel->core.attr, pid, cpus->map[cpu], + group_fd, evsel->open_flags); + + FD(evsel, cpu, thread) = fd; + + if (fd < 0) { + err = -errno; + + pr_debug2_peo("\nsys_perf_event_open failed, error %d\n", + err); + goto try_fallback; + } + + bpf_counter__install_pe(evsel, cpu, fd); + + if (unlikely(test_attr__enabled)) { + test_attr__open(&evsel->core.attr, pid, cpus->map[cpu], + fd, group_fd, evsel->open_flags); + } + + pr_debug2_peo(" = %d\n", fd); + + if (evsel->bpf_fd >= 0) { + int evt_fd = fd; + int bpf_fd = evsel->bpf_fd; + + err = ioctl(evt_fd, + PERF_EVENT_IOC_SET_BPF, + bpf_fd); + if (err && errno != EEXIST) { + pr_err("failed to attach bpf fd %d: %s\n", + bpf_fd, strerror(errno)); + err = -EINVAL; + goto out_close; + } + } + + set_rlimit = NO_CHANGE; + + /* + * If we succeeded but had to kill clockid, fail and + * have evsel__open_strerror() print us a nice error. + */ + if (perf_missing_features.clockid || + perf_missing_features.clockid_wrong) { + err = -EINVAL; + goto out_close; + } + } + } + + return 0; + +try_fallback: + if (evsel__precise_ip_fallback(evsel)) + goto retry_open; + + if (evsel__ignore_missing_thread(evsel, cpus->nr, cpu, threads, thread, err)) { + /* We just removed 1 thread, so lower the upper nthreads limit. */ + nthreads--; + + /* ... and pretend like nothing have happened. */ + err = 0; + goto retry_open; + } + /* + * perf stat needs between 5 and 22 fds per CPU. When we run out + * of them try to increase the limits. + */ + if (err == -EMFILE && evsel__increase_rlimit(&set_rlimit)) + goto retry_open; + + if (err != -EINVAL || cpu > 0 || thread > 0) + goto out_close; + + if (evsel__detect_missing_features(evsel)) + goto fallback_missing_features; out_close: if (err) threads->err_thread = thread; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 80383096d51c..1b3eeab5f188 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -150,6 +150,8 @@ struct evsel { struct bperf_leader_bpf *leader_skel; struct bperf_follower_bpf *follower_skel; }; + unsigned long open_flags; + int precise_ip_original; }; struct perf_missing_features { @@ -286,6 +288,18 @@ int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads) int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads); void evsel__close(struct evsel *evsel); +int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, + struct perf_thread_map *threads); +bool evsel__detect_missing_features(struct evsel *evsel); + +enum rlimit_action { NO_CHANGE, SET_TO_MAX, INCREASED_MAX }; +bool evsel__increase_rlimit(enum rlimit_action *set_rlimit); + +bool evsel__ignore_missing_thread(struct evsel *evsel, + int nr_cpus, int cpu, + struct perf_thread_map *threads, + int thread, int err); +bool evsel__precise_ip_fallback(struct evsel *evsel); struct perf_sample; diff --git a/tools/perf/util/get_current_dir_name.c b/tools/perf/util/get_current_dir_name.c index b205d929245f..e68935e9ac8c 100644 --- a/tools/perf/util/get_current_dir_name.c +++ b/tools/perf/util/get_current_dir_name.c @@ -3,8 +3,9 @@ // #ifndef HAVE_GET_CURRENT_DIR_NAME #include "get_current_dir_name.h" +#include <limits.h> +#include <string.h> #include <unistd.h> -#include <stdlib.h> /* Android's 'bionic' library, for one, doesn't have this */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 44249027507a..1c7414f66655 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -278,7 +278,7 @@ static int do_read_bitmap(struct feat_fd *ff, unsigned long **pset, u64 *psize) if (ret) return ret; - set = bitmap_alloc(size); + set = bitmap_zalloc(size); if (!set) return -ENOMEM; @@ -1284,7 +1284,7 @@ static int memory_node__read(struct memory_node *n, unsigned long idx) dir = opendir(path); if (!dir) { - pr_warning("failed: cant' open memory sysfs data\n"); + pr_warning("failed: can't open memory sysfs data\n"); return -1; } @@ -1294,7 +1294,7 @@ static int memory_node__read(struct memory_node *n, unsigned long idx) size++; - n->set = bitmap_alloc(size); + n->set = bitmap_zalloc(size); if (!n->set) { closedir(dir); return -ENOMEM; @@ -3865,10 +3865,10 @@ static int perf_file_section__process(struct perf_file_section *section, static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, struct perf_header *ph, struct perf_data* data, - bool repipe) + bool repipe, int repipe_fd) { struct feat_fd ff = { - .fd = STDOUT_FILENO, + .fd = repipe_fd, .ph = ph, }; ssize_t ret; @@ -3891,13 +3891,13 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, return 0; } -static int perf_header__read_pipe(struct perf_session *session) +static int perf_header__read_pipe(struct perf_session *session, int repipe_fd) { struct perf_header *header = &session->header; struct perf_pipe_file_header f_header; if (perf_file_header__read_pipe(&f_header, header, session->data, - session->repipe) < 0) { + session->repipe, repipe_fd) < 0) { pr_debug("incompatible file format\n"); return -EINVAL; } @@ -3995,7 +3995,7 @@ static int evlist__prepare_tracepoint_events(struct evlist *evlist, struct tep_h return 0; } -int perf_session__read_header(struct perf_session *session) +int perf_session__read_header(struct perf_session *session, int repipe_fd) { struct perf_data *data = session->data; struct perf_header *header = &session->header; @@ -4016,7 +4016,7 @@ int perf_session__read_header(struct perf_session *session) * We can read 'pipe' data event from regular file, * check for the pipe header regardless of source. */ - err = perf_header__read_pipe(session); + err = perf_header__read_pipe(session, repipe_fd); if (!err || perf_data__is_pipe(data)) { data->is_pipe = true; return err; diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index ae6b1cf19a7d..c9e3265832d9 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -115,7 +115,7 @@ struct perf_session; struct perf_tool; union perf_event; -int perf_session__read_header(struct perf_session *session); +int perf_session__read_header(struct perf_session *session, int repipe_fd); int perf_session__write_header(struct perf_session *session, struct evlist *evlist, int fd, bool at_exit); diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index cbd9b268f168..96c8ef60f4f8 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -38,6 +38,8 @@ struct llvm_param llvm_param = { .user_set_param = false, }; +static void version_notice(void); + int perf_llvm_config(const char *var, const char *value) { if (!strstarts(var, "llvm.")) @@ -108,6 +110,21 @@ search_program(const char *def, const char *name, return ret; } +static int search_program_and_warn(const char *def, const char *name, + char *output) +{ + int ret = search_program(def, name, output); + + if (ret) { + pr_err("ERROR:\tunable to find %s.\n" + "Hint:\tTry to install latest clang/llvm to support BPF. Check your $PATH\n" + " \tand '%s-path' option in [llvm] section of ~/.perfconfig.\n", + name, name); + version_notice(); + } + return ret; +} + #define READ_SIZE 4096 static int read_from_pipe(const char *cmd, void **p_buf, size_t *p_read_sz) @@ -217,7 +234,7 @@ version_notice(void) " \t\tgit clone http://llvm.org/git/clang.git\n\n" " \tOr fetch the latest clang/llvm 3.7 from pre-built llvm packages for\n" " \tdebian/ubuntu:\n" -" \t\thttp://llvm.org/apt\n\n" +" \t\thttps://apt.llvm.org/\n\n" " \tIf you are using old version of clang, change 'clang-bpf-cmd-template'\n" " \toption in [llvm] section of ~/.perfconfig to:\n\n" " \t \"$CLANG_EXEC $CLANG_OPTIONS $KERNEL_INC_OPTIONS $PERF_BPF_INC_OPTIONS \\\n" @@ -458,16 +475,10 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, if (!template) template = CLANG_BPF_CMD_DEFAULT_TEMPLATE; - err = search_program(llvm_param.clang_path, + err = search_program_and_warn(llvm_param.clang_path, "clang", clang_path); - if (err) { - pr_err( -"ERROR:\tunable to find clang.\n" -"Hint:\tTry to install latest clang/llvm to support BPF. Check your $PATH\n" -" \tand 'clang-path' option in [llvm] section of ~/.perfconfig.\n"); - version_notice(); + if (err) return -ENOENT; - } /* * This is an optional work. Even it fail we can continue our @@ -495,14 +506,9 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, force_set_env("WORKING_DIR", kbuild_dir ? : "."); if (opts) { - err = search_program(llvm_param.llc_path, "llc", llc_path); - if (err) { - pr_err("ERROR:\tunable to find llc.\n" - "Hint:\tTry to install latest clang/llvm to support BPF. Check your $PATH\n" - " \tand 'llc-path' option in [llvm] section of ~/.perfconfig.\n"); - version_notice(); + err = search_program_and_warn(llvm_param.llc_path, "llc", llc_path); + if (err) goto errout; - } err = -ENOMEM; if (asprintf(&pipe_template, "%s -emit-llvm | %s -march=bpf %s -filetype=obj -o -", @@ -579,5 +585,5 @@ int llvm__search_clang(void) { char clang_path[PATH_MAX]; - return search_program(llvm_param.clang_path, "clang", clang_path); + return search_program_and_warn(llvm_param.clang_path, "clang", clang_path); } diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 99d047c5ead0..29b747ac31c1 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -313,7 +313,7 @@ static int metricgroup__setup_events(struct list_head *groups, struct evsel *evsel, *tmp; unsigned long *evlist_used; - evlist_used = bitmap_alloc(perf_evlist->core.nr_entries); + evlist_used = bitmap_zalloc(perf_evlist->core.nr_entries); if (!evlist_used) return -ENOMEM; diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index ab7108d22428..512dc8b9c168 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -106,7 +106,7 @@ static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity) data = map->aio.data[idx]; mmap_len = mmap__mmap_len(map); node_index = cpu__get_node(cpu); - node_mask = bitmap_alloc(node_index + 1); + node_mask = bitmap_zalloc(node_index + 1); if (!node_mask) { pr_err("Failed to allocate node mask for mbind: error %m\n"); return -1; @@ -258,7 +258,7 @@ static void build_node_mask(int node, struct mmap_cpu_mask *mask) static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *mp) { map->affinity_mask.nbits = cpu__max_cpu(); - map->affinity_mask.bits = bitmap_alloc(map->affinity_mask.nbits); + map->affinity_mask.bits = bitmap_zalloc(map->affinity_mask.nbits); if (!map->affinity_mask.bits) return -1; diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 9d5f589f02ae..af33118354dd 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -6,6 +6,7 @@ #include <linux/refcount.h> #include <linux/types.h> #include <linux/ring_buffer.h> +#include <linux/bitops.h> #include <stdbool.h> #include <pthread.h> // for cpu_set_t #ifdef HAVE_AIO_SUPPORT diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 9321bd0e2f76..d94e48e1ff9b 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -316,7 +316,8 @@ event_pmu_name opt_pmu_config if (!strncmp(name, "uncore_", 7) && strncmp($1, "uncore_", 7)) name += 7; - if (!perf_pmu__match(pattern, name, $1)) { + if (!perf_pmu__match(pattern, name, $1) || + !perf_pmu__match(pattern, pmu->alias_name, $1)) { if (parse_events_copy_term_list(orig_terms, &terms)) CLEANUP_YYABORT; if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true, false)) diff --git a/tools/perf/util/parse-sublevel-options.h b/tools/perf/util/parse-sublevel-options.h index 9b9efcc2aaad..578b18ef03bb 100644 --- a/tools/perf/util/parse-sublevel-options.h +++ b/tools/perf/util/parse-sublevel-options.h @@ -8,4 +8,4 @@ struct sublevel_option { int perf_parse_sublevel_options(const char *str, struct sublevel_option *opts); -#endif
\ No newline at end of file +#endif diff --git a/tools/perf/util/perf_dlfilter.h b/tools/perf/util/perf_dlfilter.h deleted file mode 100644 index 3eef03d661b4..000000000000 --- a/tools/perf/util/perf_dlfilter.h +++ /dev/null @@ -1,150 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * perf_dlfilter.h: API for perf --dlfilter shared object - * Copyright (c) 2021, Intel Corporation. - */ -#ifndef _LINUX_PERF_DLFILTER_H -#define _LINUX_PERF_DLFILTER_H - -#include <linux/perf_event.h> -#include <linux/types.h> - -/* Definitions for perf_dlfilter_sample flags */ -enum { - PERF_DLFILTER_FLAG_BRANCH = 1ULL << 0, - PERF_DLFILTER_FLAG_CALL = 1ULL << 1, - PERF_DLFILTER_FLAG_RETURN = 1ULL << 2, - PERF_DLFILTER_FLAG_CONDITIONAL = 1ULL << 3, - PERF_DLFILTER_FLAG_SYSCALLRET = 1ULL << 4, - PERF_DLFILTER_FLAG_ASYNC = 1ULL << 5, - PERF_DLFILTER_FLAG_INTERRUPT = 1ULL << 6, - PERF_DLFILTER_FLAG_TX_ABORT = 1ULL << 7, - PERF_DLFILTER_FLAG_TRACE_BEGIN = 1ULL << 8, - PERF_DLFILTER_FLAG_TRACE_END = 1ULL << 9, - PERF_DLFILTER_FLAG_IN_TX = 1ULL << 10, - PERF_DLFILTER_FLAG_VMENTRY = 1ULL << 11, - PERF_DLFILTER_FLAG_VMEXIT = 1ULL << 12, -}; - -/* - * perf sample event information (as per perf script and <linux/perf_event.h>) - */ -struct perf_dlfilter_sample { - __u32 size; /* Size of this structure (for compatibility checking) */ - __u16 ins_lat; /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */ - __u16 p_stage_cyc; /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */ - __u64 ip; - __s32 pid; - __s32 tid; - __u64 time; - __u64 addr; - __u64 id; - __u64 stream_id; - __u64 period; - __u64 weight; /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */ - __u64 transaction; /* Refer PERF_SAMPLE_TRANSACTION in <linux/perf_event.h> */ - __u64 insn_cnt; /* For instructions-per-cycle (IPC) */ - __u64 cyc_cnt; /* For instructions-per-cycle (IPC) */ - __s32 cpu; - __u32 flags; /* Refer PERF_DLFILTER_FLAG_* above */ - __u64 data_src; /* Refer PERF_SAMPLE_DATA_SRC in <linux/perf_event.h> */ - __u64 phys_addr; /* Refer PERF_SAMPLE_PHYS_ADDR in <linux/perf_event.h> */ - __u64 data_page_size; /* Refer PERF_SAMPLE_DATA_PAGE_SIZE in <linux/perf_event.h> */ - __u64 code_page_size; /* Refer PERF_SAMPLE_CODE_PAGE_SIZE in <linux/perf_event.h> */ - __u64 cgroup; /* Refer PERF_SAMPLE_CGROUP in <linux/perf_event.h> */ - __u8 cpumode; /* Refer CPUMODE_MASK etc in <linux/perf_event.h> */ - __u8 addr_correlates_sym; /* True => resolve_addr() can be called */ - __u16 misc; /* Refer perf_event_header in <linux/perf_event.h> */ - __u32 raw_size; /* Refer PERF_SAMPLE_RAW in <linux/perf_event.h> */ - const void *raw_data; /* Refer PERF_SAMPLE_RAW in <linux/perf_event.h> */ - __u64 brstack_nr; /* Number of brstack entries */ - const struct perf_branch_entry *brstack; /* Refer <linux/perf_event.h> */ - __u64 raw_callchain_nr; /* Number of raw_callchain entries */ - const __u64 *raw_callchain; /* Refer <linux/perf_event.h> */ - const char *event; -}; - -/* - * Address location (as per perf script) - */ -struct perf_dlfilter_al { - __u32 size; /* Size of this structure (for compatibility checking) */ - __u32 symoff; - const char *sym; - __u64 addr; /* Mapped address (from dso) */ - __u64 sym_start; - __u64 sym_end; - const char *dso; - __u8 sym_binding; /* STB_LOCAL, STB_GLOBAL or STB_WEAK, refer <elf.h> */ - __u8 is_64_bit; /* Only valid if dso is not NULL */ - __u8 is_kernel_ip; /* True if in kernel space */ - __u32 buildid_size; - __u8 *buildid; - /* Below members are only populated by resolve_ip() */ - __u8 filtered; /* True if this sample event will be filtered out */ - const char *comm; -}; - -struct perf_dlfilter_fns { - /* Return information about ip */ - const struct perf_dlfilter_al *(*resolve_ip)(void *ctx); - /* Return information about addr (if addr_correlates_sym) */ - const struct perf_dlfilter_al *(*resolve_addr)(void *ctx); - /* Return arguments from --dlarg option */ - char **(*args)(void *ctx, int *dlargc); - /* - * Return information about address (al->size must be set before - * calling). Returns 0 on success, -1 otherwise. - */ - __s32 (*resolve_address)(void *ctx, __u64 address, struct perf_dlfilter_al *al); - /* Return instruction bytes and length */ - const __u8 *(*insn)(void *ctx, __u32 *length); - /* Return source file name and line number */ - const char *(*srcline)(void *ctx, __u32 *line_number); - /* Return perf_event_attr, refer <linux/perf_event.h> */ - struct perf_event_attr *(*attr)(void *ctx); - /* Read object code, return numbers of bytes read */ - __s32 (*object_code)(void *ctx, __u64 ip, void *buf, __u32 len); - /* Reserved */ - void *(*reserved[120])(void *); -}; - -/* - * If implemented, 'start' will be called at the beginning, - * before any calls to 'filter_event'. Return 0 to indicate success, - * or return a negative error code. '*data' can be assigned for use - * by other functions. 'ctx' is needed for calls to perf_dlfilter_fns, - * but most perf_dlfilter_fns are not valid when called from 'start'. - */ -int start(void **data, void *ctx); - -/* - * If implemented, 'stop' will be called at the end, - * after any calls to 'filter_event'. Return 0 to indicate success, or - * return a negative error code. 'data' is set by start(). 'ctx' is - * needed for calls to perf_dlfilter_fns, but most perf_dlfilter_fns - * are not valid when called from 'stop'. - */ -int stop(void *data, void *ctx); - -/* - * If implemented, 'filter_event' will be called for each sample - * event. Return 0 to keep the sample event, 1 to filter it out, or - * return a negative error code. 'data' is set by start(). 'ctx' is - * needed for calls to perf_dlfilter_fns. - */ -int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx); - -/* - * The same as 'filter_event' except it is called before internal - * filtering. - */ -int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, void *ctx); - -/* - * If implemented, return a one-line description of the filter, and optionally - * a longer description. - */ -const char *filter_description(const char **long_description); - -#endif diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index fc683bc41715..bdabd62170d2 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -843,8 +843,7 @@ void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu, break; } - if (pmu_is_uncore(name) && - pmu_uncore_alias_match(pname, name)) + if (pmu->is_uncore && pmu_uncore_alias_match(pname, name)) goto new_alias; if (strcmp(pname, name)) @@ -927,7 +926,7 @@ static int pmu_add_sys_aliases_iter_fn(struct pmu_event *pe, void *data) return 0; } -static void pmu_add_sys_aliases(struct list_head *head, struct perf_pmu *pmu) +void pmu_add_sys_aliases(struct list_head *head, struct perf_pmu *pmu) { struct pmu_sys_event_iter_data idata = { .head = head, @@ -946,6 +945,18 @@ perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) return NULL; } +char * __weak +pmu_find_real_name(const char *name) +{ + return (char *)name; +} + +char * __weak +pmu_find_alias_name(const char *name __maybe_unused) +{ + return NULL; +} + static int pmu_max_precise(const char *name) { char path[PATH_MAX]; @@ -959,13 +970,15 @@ static int pmu_max_precise(const char *name) return max_precise; } -static struct perf_pmu *pmu_lookup(const char *name) +static struct perf_pmu *pmu_lookup(const char *lookup_name) { struct perf_pmu *pmu; LIST_HEAD(format); LIST_HEAD(aliases); __u32 type; + char *name = pmu_find_real_name(lookup_name); bool is_hybrid = perf_pmu__hybrid_mounted(name); + char *alias_name; /* * Check pmu name for hybrid and the pmu may be invalid in sysfs @@ -996,6 +1009,16 @@ static struct perf_pmu *pmu_lookup(const char *name) pmu->cpus = pmu_cpumask(name); pmu->name = strdup(name); + if (!pmu->name) + goto err; + + alias_name = pmu_find_alias_name(name); + if (alias_name) { + pmu->alias_name = strdup(alias_name); + if (!pmu->alias_name) + goto err; + } + pmu->type = type; pmu->is_uncore = pmu_is_uncore(name); if (pmu->is_uncore) @@ -1018,15 +1041,22 @@ static struct perf_pmu *pmu_lookup(const char *name) pmu->default_config = perf_pmu__get_default_config(pmu); return pmu; +err: + if (pmu->name) + free(pmu->name); + free(pmu); + return NULL; } static struct perf_pmu *pmu_find(const char *name) { struct perf_pmu *pmu; - list_for_each_entry(pmu, &pmus, list) - if (!strcmp(pmu->name, name)) + list_for_each_entry(pmu, &pmus, list) { + if (!strcmp(pmu->name, name) || + (pmu->alias_name && !strcmp(pmu->alias_name, name))) return pmu; + } return NULL; } @@ -1920,6 +1950,9 @@ bool perf_pmu__has_hybrid(void) int perf_pmu__match(char *pattern, char *name, char *tok) { + if (!name) + return -1; + if (fnmatch(pattern, name, 0)) return -1; @@ -1928,3 +1961,38 @@ int perf_pmu__match(char *pattern, char *name, char *tok) return 0; } + +int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus, + struct perf_cpu_map **mcpus_ptr, + struct perf_cpu_map **ucpus_ptr) +{ + struct perf_cpu_map *pmu_cpus = pmu->cpus; + struct perf_cpu_map *matched_cpus, *unmatched_cpus; + int matched_nr = 0, unmatched_nr = 0; + + matched_cpus = perf_cpu_map__default_new(); + if (!matched_cpus) + return -1; + + unmatched_cpus = perf_cpu_map__default_new(); + if (!unmatched_cpus) { + perf_cpu_map__put(matched_cpus); + return -1; + } + + for (int i = 0; i < cpus->nr; i++) { + int cpu; + + cpu = perf_cpu_map__idx(pmu_cpus, cpus->map[i]); + if (cpu == -1) + unmatched_cpus->map[unmatched_nr++] = cpus->map[i]; + else + matched_cpus->map[matched_nr++] = cpus->map[i]; + } + + unmatched_cpus->nr = unmatched_nr; + matched_cpus->nr = matched_nr; + *mcpus_ptr = matched_cpus; + *ucpus_ptr = unmatched_cpus; + return 0; +} diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 926da483a141..394898b07fd9 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -11,6 +11,7 @@ #include "pmu-events/pmu-events.h" struct evsel_config_term; +struct perf_cpu_map; enum { PERF_PMU_FORMAT_VALUE_CONFIG, @@ -21,6 +22,7 @@ enum { #define PERF_PMU_FORMAT_BITS 64 #define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/" #define CPUS_TEMPLATE_CPU "%s/bus/event_source/devices/%s/cpus" +#define MAX_PMU_NAME_LEN 128 struct perf_event_attr; @@ -32,6 +34,7 @@ struct perf_pmu_caps { struct perf_pmu { char *name; + char *alias_name; char *id; __u32 type; bool selectable; @@ -81,6 +84,7 @@ struct perf_pmu_alias { struct perf_pmu *perf_pmu__find(const char *name); struct perf_pmu *perf_pmu__find_by_type(unsigned int type); +void pmu_add_sys_aliases(struct list_head *head, struct perf_pmu *pmu); int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, struct list_head *head_terms, struct parse_events_error *error); @@ -135,4 +139,10 @@ void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, bool perf_pmu__has_hybrid(void); int perf_pmu__match(char *pattern, char *name, char *tok); +int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus, + struct perf_cpu_map **mcpus_ptr, + struct perf_cpu_map **ucpus_ptr); + +char *pmu_find_real_name(const char *name); +char *pmu_find_alias_name(const char *name); #endif /* __PMU_H */ diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 69129e2aa7a1..c0c010350bc2 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1422,6 +1422,37 @@ static void python_process_event(union perf_event *event, } } +static void python_process_throttle(union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + const char *handler_name; + PyObject *handler, *t; + + if (event->header.type == PERF_RECORD_THROTTLE) + handler_name = "throttle"; + else + handler_name = "unthrottle"; + handler = get_handler(handler_name); + if (!handler) + return; + + t = tuple_new(6); + if (!t) + return; + + tuple_set_u64(t, 0, event->throttle.time); + tuple_set_u64(t, 1, event->throttle.id); + tuple_set_u64(t, 2, event->throttle.stream_id); + tuple_set_s32(t, 3, sample->cpu); + tuple_set_s32(t, 4, sample->pid); + tuple_set_s32(t, 5, sample->tid); + + call_object(handler, t, handler_name); + + Py_DECREF(t); +} + static void python_do_process_switch(union perf_event *event, struct perf_sample *sample, struct machine *machine) @@ -2079,5 +2110,6 @@ struct scripting_ops python_scripting_ops = { .process_auxtrace_error = python_process_auxtrace_error, .process_stat = python_process_stat, .process_stat_interval = python_process_stat_interval, + .process_throttle = python_process_throttle, .generate_script = python_generate_script, }; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 51f727402912..069c2cfdd3be 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -102,11 +102,11 @@ static int perf_session__deliver_event(struct perf_session *session, struct perf_tool *tool, u64 file_offset); -static int perf_session__open(struct perf_session *session) +static int perf_session__open(struct perf_session *session, int repipe_fd) { struct perf_data *data = session->data; - if (perf_session__read_header(session) < 0) { + if (perf_session__read_header(session, repipe_fd) < 0) { pr_err("incompatible file format (rerun with -v to learn more)\n"); return -1; } @@ -185,8 +185,9 @@ static int ordered_events__deliver_event(struct ordered_events *oe, session->tool, event->file_offset); } -struct perf_session *perf_session__new(struct perf_data *data, - bool repipe, struct perf_tool *tool) +struct perf_session *__perf_session__new(struct perf_data *data, + bool repipe, int repipe_fd, + struct perf_tool *tool) { int ret = -ENOMEM; struct perf_session *session = zalloc(sizeof(*session)); @@ -210,7 +211,7 @@ struct perf_session *perf_session__new(struct perf_data *data, session->data = data; if (perf_data__is_read(data)) { - ret = perf_session__open(session); + ret = perf_session__open(session, repipe_fd); if (ret < 0) goto out_delete; @@ -1540,6 +1541,8 @@ static int machines__deliver_event(struct machines *machines, evlist->stats.total_aux_lost += 1; if (event->aux.flags & PERF_AUX_FLAG_PARTIAL) evlist->stats.total_aux_partial += 1; + if (event->aux.flags & PERF_AUX_FLAG_COLLISION) + evlist->stats.total_aux_collision += 1; } return tool->aux(tool, event, sample, machine); case PERF_RECORD_ITRACE_START: @@ -1895,6 +1898,13 @@ static void perf_session__warn_about_errors(const struct perf_session *session) ""); } + if (session->tool->aux == perf_event__process_aux && + stats->total_aux_collision != 0) { + ui__warning("AUX data detected collision %" PRIu64 " times out of %u!\n\n", + stats->total_aux_collision, + stats->nr_events[PERF_RECORD_AUX]); + } + if (stats->nr_unknown_events != 0) { ui__warning("Found %u unknown events!\n\n" "Is this an older tool processing a perf.data " diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index e31ba4c92a6c..5d8bd14a0a39 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -54,8 +54,16 @@ struct decomp { struct perf_tool; -struct perf_session *perf_session__new(struct perf_data *data, - bool repipe, struct perf_tool *tool); +struct perf_session *__perf_session__new(struct perf_data *data, + bool repipe, int repipe_fd, + struct perf_tool *tool); + +static inline struct perf_session *perf_session__new(struct perf_data *data, + struct perf_tool *tool) +{ + return __perf_session__new(data, false, -1, tool); +} + void perf_session__delete(struct perf_session *session); void perf_event_header__bswap(struct perf_event_header *hdr); diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 35aa0c0f7cd9..a7e981b2d7de 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only +#include "util/cgroup.h" +#include "util/data.h" #include "util/debug.h" #include "util/dso.h" #include "util/event.h" @@ -16,7 +18,6 @@ #include "util/synthetic-events.h" #include "util/target.h" #include "util/time-utils.h" -#include "util/cgroup.h" #include <linux/bitops.h> #include <linux/kernel.h> #include <linux/string.h> @@ -2179,3 +2180,53 @@ int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session free(ff.buf); return ret; } + +int perf_event__synthesize_for_pipe(struct perf_tool *tool, + struct perf_session *session, + struct perf_data *data, + perf_event__handler_t process) +{ + int err; + int ret = 0; + struct evlist *evlist = session->evlist; + + /* + * We need to synthesize events first, because some + * features works on top of them (on report side). + */ + err = perf_event__synthesize_attrs(tool, evlist, process); + if (err < 0) { + pr_err("Couldn't synthesize attrs.\n"); + return err; + } + ret += err; + + err = perf_event__synthesize_features(tool, session, evlist, process); + if (err < 0) { + pr_err("Couldn't synthesize features.\n"); + return err; + } + ret += err; + + if (have_tracepoints(&evlist->core.entries)) { + int fd = perf_data__fd(data); + + /* + * FIXME err <= 0 here actually means that + * there were no tracepoints so its not really + * an error, just that we don't need to + * synthesize anything. We really have to + * return this more properly and also + * propagate errors that now are calling die() + */ + err = perf_event__synthesize_tracing_data(tool, fd, evlist, + process); + if (err <= 0) { + pr_err("Couldn't record tracing data.\n"); + return err; + } + ret += err; + } + + return ret; +} diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h index e7a3e9589738..c845e2b9b444 100644 --- a/tools/perf/util/synthetic-events.h +++ b/tools/perf/util/synthetic-events.h @@ -14,6 +14,7 @@ struct evsel; struct machine; struct perf_counts_values; struct perf_cpu_map; +struct perf_data; struct perf_event_attr; struct perf_event_mmap_page; struct perf_sample; @@ -101,4 +102,9 @@ static inline int perf_event__synthesize_bpf_events(struct perf_session *session } #endif // HAVE_LIBBPF_SUPPORT +int perf_event__synthesize_for_pipe(struct perf_tool *tool, + struct perf_session *session, + struct perf_data *data, + perf_event__handler_t process); + #endif // __PERF_SYNTHETIC_EVENTS_H diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index 4ff56217f2a6..daec6cba500d 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -17,6 +17,7 @@ struct target { bool default_per_cpu; bool per_thread; bool use_bpf; + bool hybrid; const char *attr_map; }; diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 54aadeedf28c..640981105788 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -90,6 +90,9 @@ struct scripting_ops { void (*process_stat)(struct perf_stat_config *config, struct evsel *evsel, u64 tstamp); void (*process_stat_interval)(u64 tstamp); + void (*process_throttle)(union perf_event *event, + struct perf_sample *sample, + struct machine *machine); int (*generate_script) (struct tep_handle *pevent, const char *outfile); }; |