diff options
-rwxr-xr-x | tools/perf/tests/shell/test_arm_coresight.sh | 24 | ||||
-rw-r--r-- | tools/perf/util/cs-etm.c | 162 |
2 files changed, 148 insertions, 38 deletions
diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh index 565ce525c40b..482009e17bda 100755 --- a/tools/perf/tests/shell/test_arm_coresight.sh +++ b/tools/perf/tests/shell/test_arm_coresight.sh @@ -150,6 +150,8 @@ arm_cs_etm_system_wide_test() { echo "Recording trace with system wide mode" perf record -o ${perfdata} -e cs_etm// -a -- ls > /dev/null 2>&1 + # System-wide mode should include perf samples so test for that + # instead of ls perf_script_branch_samples perf && perf_report_branch_samples perf && perf_report_instruction_samples perf @@ -182,7 +184,29 @@ arm_cs_etm_snapshot_test() { arm_cs_report "CoreSight snapshot testing" $err } +arm_cs_etm_basic_test() { + echo "Recording trace with '$*'" + perf record -o ${perfdata} "$@" -- ls > /dev/null 2>&1 + + perf_script_branch_samples ls && + perf_report_branch_samples ls && + perf_report_instruction_samples ls + + err=$? + arm_cs_report "CoreSight basic testing with '$*'" $err +} + arm_cs_etm_traverse_path_test arm_cs_etm_system_wide_test arm_cs_etm_snapshot_test + +# Test all combinations of per-thread, system-wide and normal mode with +# and without timestamps +arm_cs_etm_basic_test -e cs_etm/timestamp=0/ --per-thread +arm_cs_etm_basic_test -e cs_etm/timestamp=1/ --per-thread +arm_cs_etm_basic_test -e cs_etm/timestamp=0/ -a +arm_cs_etm_basic_test -e cs_etm/timestamp=1/ -a +arm_cs_etm_basic_test -e cs_etm/timestamp=0/ +arm_cs_etm_basic_test -e cs_etm/timestamp=1/ + exit $glb_err diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index e048949bf655..456994564d6e 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -50,7 +50,22 @@ struct cs_etm_auxtrace { struct thread *unknown_thread; struct perf_tsc_conversion tc; + /* + * Timeless has no timestamps in the trace so overlapping mmap lookups + * are less accurate but produces smaller trace data. We use context IDs + * in the trace instead of matching timestamps with fork records so + * they're not really needed in the general case. Overlapping mmaps + * happen in cases like between a fork and an exec. + */ bool timeless_decoding; + + /* + * Per-thread ignores the trace channel ID and instead assumes that + * everything in a buffer comes from the same process regardless of + * which CPU it ran on. It also implies no context IDs so the TID is + * taken from the auxtrace buffer. + */ + bool per_thread_decoding; bool snapshot_mode; bool data_queued; bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */ @@ -98,7 +113,7 @@ struct cs_etm_queue { /* RB tree for quick conversion between traceID and metadata pointers */ static struct intlist *traceid_list; -static int cs_etm__process_queues(struct cs_etm_auxtrace *etm); +static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, pid_t tid); static int cs_etm__get_data_block(struct cs_etm_queue *etmq); @@ -492,7 +507,7 @@ static struct cs_etm_traceid_queue struct cs_etm_traceid_queue *tidq, **traceid_queues; struct cs_etm_auxtrace *etm = etmq->etm; - if (etm->timeless_decoding) + if (etm->per_thread_decoding) trace_chan_id = CS_ETM_PER_THREAD_TRACEID; traceid_queues_list = etmq->traceid_queues_list; @@ -731,10 +746,15 @@ static int cs_etm__flush_events(struct perf_session *session, if (!tool->ordered_events) return -EINVAL; - if (etm->timeless_decoding) + if (etm->timeless_decoding) { + /* + * Pass tid = -1 to process all queues. But likely they will have + * already been processed on PERF_RECORD_EXIT anyway. + */ return cs_etm__process_timeless_queues(etm, -1); + } - return cs_etm__process_queues(etm); + return cs_etm__process_timestamped_queues(etm); } static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq) @@ -1066,7 +1086,7 @@ static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm, * chronological order. * * Note that packets decoded above are still in the traceID's packet - * queue and will be processed in cs_etm__process_queues(). + * queue and will be processed in cs_etm__process_timestamped_queues(). */ cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp); @@ -1347,9 +1367,7 @@ static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq, struct cs_etm_auxtrace *etm = etmq->etm; struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue; - if (etm->timeless_decoding) - return 0; - else if (etm->has_virtual_ts) + if (!etm->timeless_decoding && etm->has_virtual_ts) return packet_queue->cs_timestamp; else return etm->latest_kernel_timestamp; @@ -2329,7 +2347,7 @@ static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq) } } -static int cs_etm__run_decoder(struct cs_etm_queue *etmq) +static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq) { int err = 0; struct cs_etm_traceid_queue *tidq; @@ -2367,6 +2385,51 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq) return err; } +static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq) +{ + int idx, err = 0; + struct cs_etm_traceid_queue *tidq; + struct int_node *inode; + + /* Go through each buffer in the queue and decode them one by one */ + while (1) { + err = cs_etm__get_data_block(etmq); + if (err <= 0) + return err; + + /* Run trace decoder until buffer consumed or end of trace */ + do { + err = cs_etm__decode_data_block(etmq); + if (err) + return err; + + /* + * cs_etm__run_per_thread_timeless_decoder() runs on a + * single traceID queue because each TID has a separate + * buffer. But here in per-cpu mode we need to iterate + * over each channel instead. + */ + intlist__for_each_entry(inode, + etmq->traceid_queues_list) { + idx = (int)(intptr_t)inode->priv; + tidq = etmq->traceid_queues[idx]; + cs_etm__process_traceid_queue(etmq, tidq); + } + } while (etmq->buf_len); + + intlist__for_each_entry(inode, etmq->traceid_queues_list) { + idx = (int)(intptr_t)inode->priv; + tidq = etmq->traceid_queues[idx]; + /* Flush any remaining branch stack entries */ + err = cs_etm__end_block(etmq, tidq); + if (err) + return err; + } + } + + return err; +} + static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, pid_t tid) { @@ -2381,22 +2444,30 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, if (!etmq) continue; - tidq = cs_etm__etmq_get_traceid_queue(etmq, - CS_ETM_PER_THREAD_TRACEID); + /* + * Per-cpu mode has contextIDs in the trace and the decoder + * calls cs_etm__set_pid_tid_cpu() automatically so no need + * to do this here + */ + if (etm->per_thread_decoding) { + tidq = cs_etm__etmq_get_traceid_queue( + etmq, CS_ETM_PER_THREAD_TRACEID); - if (!tidq) - continue; + if (!tidq) + continue; - if ((tid == -1) || (tidq->tid == tid)) { - cs_etm__set_pid_tid_cpu(etm, tidq); - cs_etm__run_decoder(etmq); - } + if ((tid == -1) || (tidq->tid == tid)) { + cs_etm__set_pid_tid_cpu(etm, tidq); + cs_etm__run_per_thread_timeless_decoder(etmq); + } + } else + cs_etm__run_per_cpu_timeless_decoder(etmq); } return 0; } -static int cs_etm__process_queues(struct cs_etm_auxtrace *etm) +static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm) { int ret = 0; unsigned int cs_queue_nr, queue_nr, i; @@ -2573,7 +2644,6 @@ static int cs_etm__process_event(struct perf_session *session, struct perf_sample *sample, struct perf_tool *tool) { - u64 sample_kernel_timestamp; struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace); @@ -2586,33 +2656,39 @@ static int cs_etm__process_event(struct perf_session *session, return -EINVAL; } - if (sample->time && (sample->time != (u64) -1)) - sample_kernel_timestamp = sample->time; - else - sample_kernel_timestamp = 0; - - /* - * Don't wait for cs_etm__flush_events() in per-thread/timeless mode to start the decode. We - * need the tid of the PERF_RECORD_EXIT event to assign to the synthesised samples because - * ETM_OPT_CTXTID is not enabled. - */ - if (etm->timeless_decoding && - event->header.type == PERF_RECORD_EXIT) - return cs_etm__process_timeless_queues(etm, - event->fork.tid); + switch (event->header.type) { + case PERF_RECORD_EXIT: + /* + * Don't need to wait for cs_etm__flush_events() in per-thread mode to + * start the decode because we know there will be no more trace from + * this thread. All this does is emit samples earlier than waiting for + * the flush in other modes, but with timestamps it makes sense to wait + * for flush so that events from different threads are interleaved + * properly. + */ + if (etm->per_thread_decoding && etm->timeless_decoding) + return cs_etm__process_timeless_queues(etm, + event->fork.tid); + break; - if (event->header.type == PERF_RECORD_ITRACE_START) + case PERF_RECORD_ITRACE_START: return cs_etm__process_itrace_start(etm, event); - else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) + + case PERF_RECORD_SWITCH_CPU_WIDE: return cs_etm__process_switch_cpu_wide(etm, event); - if (!etm->timeless_decoding && event->header.type == PERF_RECORD_AUX) { + case PERF_RECORD_AUX: /* * Record the latest kernel timestamp available in the header * for samples so that synthesised samples occur from this point * onwards. */ - etm->latest_kernel_timestamp = sample_kernel_timestamp; + if (sample->time && (sample->time != (u64)-1)) + etm->latest_kernel_timestamp = sample->time; + break; + + default: + break; } return 0; @@ -2821,10 +2897,20 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o * Return 'not found' if mismatch. */ if (auxtrace_event->cpu == (__u32) -1) { + etm->per_thread_decoding = true; if (auxtrace_event->tid != sample->tid) return 1; - } else if (auxtrace_event->cpu != sample->cpu) + } else if (auxtrace_event->cpu != sample->cpu) { + if (etm->per_thread_decoding) { + /* + * Found a per-cpu buffer after a per-thread one was + * already found + */ + pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n"); + return -EINVAL; + } return 1; + } if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) { /* |