summaryrefslogtreecommitdiff
path: root/tools/perf/util/cs-etm.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/util/cs-etm.c')
-rw-r--r--tools/perf/util/cs-etm.c162
1 files changed, 124 insertions, 38 deletions
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index e048949bf655..456994564d6e 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -50,7 +50,22 @@ struct cs_etm_auxtrace {
struct thread *unknown_thread;
struct perf_tsc_conversion tc;
+ /*
+ * Timeless has no timestamps in the trace so overlapping mmap lookups
+ * are less accurate but produces smaller trace data. We use context IDs
+ * in the trace instead of matching timestamps with fork records so
+ * they're not really needed in the general case. Overlapping mmaps
+ * happen in cases like between a fork and an exec.
+ */
bool timeless_decoding;
+
+ /*
+ * Per-thread ignores the trace channel ID and instead assumes that
+ * everything in a buffer comes from the same process regardless of
+ * which CPU it ran on. It also implies no context IDs so the TID is
+ * taken from the auxtrace buffer.
+ */
+ bool per_thread_decoding;
bool snapshot_mode;
bool data_queued;
bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */
@@ -98,7 +113,7 @@ struct cs_etm_queue {
/* RB tree for quick conversion between traceID and metadata pointers */
static struct intlist *traceid_list;
-static int cs_etm__process_queues(struct cs_etm_auxtrace *etm);
+static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
pid_t tid);
static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
@@ -492,7 +507,7 @@ static struct cs_etm_traceid_queue
struct cs_etm_traceid_queue *tidq, **traceid_queues;
struct cs_etm_auxtrace *etm = etmq->etm;
- if (etm->timeless_decoding)
+ if (etm->per_thread_decoding)
trace_chan_id = CS_ETM_PER_THREAD_TRACEID;
traceid_queues_list = etmq->traceid_queues_list;
@@ -731,10 +746,15 @@ static int cs_etm__flush_events(struct perf_session *session,
if (!tool->ordered_events)
return -EINVAL;
- if (etm->timeless_decoding)
+ if (etm->timeless_decoding) {
+ /*
+ * Pass tid = -1 to process all queues. But likely they will have
+ * already been processed on PERF_RECORD_EXIT anyway.
+ */
return cs_etm__process_timeless_queues(etm, -1);
+ }
- return cs_etm__process_queues(etm);
+ return cs_etm__process_timestamped_queues(etm);
}
static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
@@ -1066,7 +1086,7 @@ static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
* chronological order.
*
* Note that packets decoded above are still in the traceID's packet
- * queue and will be processed in cs_etm__process_queues().
+ * queue and will be processed in cs_etm__process_timestamped_queues().
*/
cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
@@ -1347,9 +1367,7 @@ static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq,
struct cs_etm_auxtrace *etm = etmq->etm;
struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue;
- if (etm->timeless_decoding)
- return 0;
- else if (etm->has_virtual_ts)
+ if (!etm->timeless_decoding && etm->has_virtual_ts)
return packet_queue->cs_timestamp;
else
return etm->latest_kernel_timestamp;
@@ -2329,7 +2347,7 @@ static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq)
}
}
-static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
+static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq)
{
int err = 0;
struct cs_etm_traceid_queue *tidq;
@@ -2367,6 +2385,51 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
return err;
}
+static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq)
+{
+ int idx, err = 0;
+ struct cs_etm_traceid_queue *tidq;
+ struct int_node *inode;
+
+ /* Go through each buffer in the queue and decode them one by one */
+ while (1) {
+ err = cs_etm__get_data_block(etmq);
+ if (err <= 0)
+ return err;
+
+ /* Run trace decoder until buffer consumed or end of trace */
+ do {
+ err = cs_etm__decode_data_block(etmq);
+ if (err)
+ return err;
+
+ /*
+ * cs_etm__run_per_thread_timeless_decoder() runs on a
+ * single traceID queue because each TID has a separate
+ * buffer. But here in per-cpu mode we need to iterate
+ * over each channel instead.
+ */
+ intlist__for_each_entry(inode,
+ etmq->traceid_queues_list) {
+ idx = (int)(intptr_t)inode->priv;
+ tidq = etmq->traceid_queues[idx];
+ cs_etm__process_traceid_queue(etmq, tidq);
+ }
+ } while (etmq->buf_len);
+
+ intlist__for_each_entry(inode, etmq->traceid_queues_list) {
+ idx = (int)(intptr_t)inode->priv;
+ tidq = etmq->traceid_queues[idx];
+ /* Flush any remaining branch stack entries */
+ err = cs_etm__end_block(etmq, tidq);
+ if (err)
+ return err;
+ }
+ }
+
+ return err;
+}
+
static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
pid_t tid)
{
@@ -2381,22 +2444,30 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
if (!etmq)
continue;
- tidq = cs_etm__etmq_get_traceid_queue(etmq,
- CS_ETM_PER_THREAD_TRACEID);
+ /*
+ * Per-cpu mode has contextIDs in the trace and the decoder
+ * calls cs_etm__set_pid_tid_cpu() automatically so no need
+ * to do this here
+ */
+ if (etm->per_thread_decoding) {
+ tidq = cs_etm__etmq_get_traceid_queue(
+ etmq, CS_ETM_PER_THREAD_TRACEID);
- if (!tidq)
- continue;
+ if (!tidq)
+ continue;
- if ((tid == -1) || (tidq->tid == tid)) {
- cs_etm__set_pid_tid_cpu(etm, tidq);
- cs_etm__run_decoder(etmq);
- }
+ if ((tid == -1) || (tidq->tid == tid)) {
+ cs_etm__set_pid_tid_cpu(etm, tidq);
+ cs_etm__run_per_thread_timeless_decoder(etmq);
+ }
+ } else
+ cs_etm__run_per_cpu_timeless_decoder(etmq);
}
return 0;
}
-static int cs_etm__process_queues(struct cs_etm_auxtrace *etm)
+static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm)
{
int ret = 0;
unsigned int cs_queue_nr, queue_nr, i;
@@ -2573,7 +2644,6 @@ static int cs_etm__process_event(struct perf_session *session,
struct perf_sample *sample,
struct perf_tool *tool)
{
- u64 sample_kernel_timestamp;
struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
struct cs_etm_auxtrace,
auxtrace);
@@ -2586,33 +2656,39 @@ static int cs_etm__process_event(struct perf_session *session,
return -EINVAL;
}
- if (sample->time && (sample->time != (u64) -1))
- sample_kernel_timestamp = sample->time;
- else
- sample_kernel_timestamp = 0;
-
- /*
- * Don't wait for cs_etm__flush_events() in per-thread/timeless mode to start the decode. We
- * need the tid of the PERF_RECORD_EXIT event to assign to the synthesised samples because
- * ETM_OPT_CTXTID is not enabled.
- */
- if (etm->timeless_decoding &&
- event->header.type == PERF_RECORD_EXIT)
- return cs_etm__process_timeless_queues(etm,
- event->fork.tid);
+ switch (event->header.type) {
+ case PERF_RECORD_EXIT:
+ /*
+ * Don't need to wait for cs_etm__flush_events() in per-thread mode to
+ * start the decode because we know there will be no more trace from
+ * this thread. All this does is emit samples earlier than waiting for
+ * the flush in other modes, but with timestamps it makes sense to wait
+ * for flush so that events from different threads are interleaved
+ * properly.
+ */
+ if (etm->per_thread_decoding && etm->timeless_decoding)
+ return cs_etm__process_timeless_queues(etm,
+ event->fork.tid);
+ break;
- if (event->header.type == PERF_RECORD_ITRACE_START)
+ case PERF_RECORD_ITRACE_START:
return cs_etm__process_itrace_start(etm, event);
- else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
+
+ case PERF_RECORD_SWITCH_CPU_WIDE:
return cs_etm__process_switch_cpu_wide(etm, event);
- if (!etm->timeless_decoding && event->header.type == PERF_RECORD_AUX) {
+ case PERF_RECORD_AUX:
/*
* Record the latest kernel timestamp available in the header
* for samples so that synthesised samples occur from this point
* onwards.
*/
- etm->latest_kernel_timestamp = sample_kernel_timestamp;
+ if (sample->time && (sample->time != (u64)-1))
+ etm->latest_kernel_timestamp = sample->time;
+ break;
+
+ default:
+ break;
}
return 0;
@@ -2821,10 +2897,20 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
* Return 'not found' if mismatch.
*/
if (auxtrace_event->cpu == (__u32) -1) {
+ etm->per_thread_decoding = true;
if (auxtrace_event->tid != sample->tid)
return 1;
- } else if (auxtrace_event->cpu != sample->cpu)
+ } else if (auxtrace_event->cpu != sample->cpu) {
+ if (etm->per_thread_decoding) {
+ /*
+ * Found a per-cpu buffer after a per-thread one was
+ * already found
+ */
+ pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n");
+ return -EINVAL;
+ }
return 1;
+ }
if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) {
/*