diff options
Diffstat (limited to 'tools/perf/builtin-sched.c')
-rw-r--r-- | tools/perf/builtin-sched.c | 1118 |
1 files changed, 1091 insertions, 27 deletions
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index f5503ca22e1c..1a3f1be93372 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -13,12 +13,16 @@ #include "util/cloexec.h" #include "util/thread_map.h" #include "util/color.h" +#include "util/stat.h" +#include "util/callchain.h" +#include "util/time-utils.h" #include <subcmd/parse-options.h> #include "util/trace-event.h" #include "util/debug.h" +#include <linux/log2.h> #include <sys/prctl.h> #include <sys/resource.h> @@ -192,8 +196,45 @@ struct perf_sched { bool force; bool skip_merge; struct perf_sched_map map; + + /* options for timehist command */ + bool summary; + bool summary_only; + bool show_callchain; + unsigned int max_stack; + bool show_cpu_visual; + bool show_wakeups; + bool show_migrations; + u64 skipped_samples; + const char *time_str; + struct perf_time_interval ptime; +}; + +/* per thread run time data */ +struct thread_runtime { + u64 last_time; /* time of previous sched in/out event */ + u64 dt_run; /* run time */ + u64 dt_wait; /* time between CPU access (off cpu) */ + u64 dt_delay; /* time between wakeup and sched-in */ + u64 ready_to_run; /* time of wakeup */ + + struct stats run_stats; + u64 total_run_time; + + u64 migrations; +}; + +/* per event run time data */ +struct evsel_runtime { + u64 *last_time; /* time this event was last seen per cpu */ + u32 ncpu; /* highest cpu slot allocated */ }; +/* track idle times per cpu */ +static struct thread **idle_threads; +static int idle_max_cpu; +static char idle_comm[] = "<idle>"; + static u64 get_nsecs(void) { struct timespec ts; @@ -1191,6 +1232,7 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_ int i; int ret; u64 avg; + char max_lat_at[32]; if (!work_list->nb_atoms) return; @@ -1212,12 +1254,13 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_ printf(" "); avg = work_list->total_lat / work_list->nb_atoms; + timestamp__scnprintf_usec(work_list->max_lat_at, max_lat_at, sizeof(max_lat_at)); - printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13.6f s\n", + printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13s s\n", (double)work_list->total_runtime / NSEC_PER_MSEC, work_list->nb_atoms, (double)avg / NSEC_PER_MSEC, (double)work_list->max_lat / NSEC_PER_MSEC, - (double)work_list->max_lat_at / NSEC_PER_SEC); + max_lat_at); } static int pid_cmp(struct work_atoms *l, struct work_atoms *r) @@ -1402,6 +1445,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel, int cpus_nr; bool new_cpu = false; const char *color = PERF_COLOR_NORMAL; + char stimestamp[32]; BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); @@ -1479,7 +1523,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel, cpu_color = COLOR_CPUS; if (cpu != this_cpu) - color_fprintf(stdout, cpu_color, " "); + color_fprintf(stdout, color, " "); else color_fprintf(stdout, cpu_color, "*"); @@ -1492,8 +1536,9 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel, if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu)) goto out; - color_fprintf(stdout, color, " %12.6f secs ", (double)timestamp / NSEC_PER_SEC); - if (new_shortname) { + timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp)); + color_fprintf(stdout, color, " %12s secs ", stimestamp); + if (new_shortname || (verbose && sched_in->tid)) { const char *pid_color = color; if (thread__has_color(sched_in)) @@ -1650,6 +1695,988 @@ out_delete: return rc; } +/* + * scheduling times are printed as msec.usec + */ +static inline void print_sched_time(unsigned long long nsecs, int width) +{ + unsigned long msecs; + unsigned long usecs; + + msecs = nsecs / NSEC_PER_MSEC; + nsecs -= msecs * NSEC_PER_MSEC; + usecs = nsecs / NSEC_PER_USEC; + printf("%*lu.%03lu ", width, msecs, usecs); +} + +/* + * returns runtime data for event, allocating memory for it the + * first time it is used. + */ +static struct evsel_runtime *perf_evsel__get_runtime(struct perf_evsel *evsel) +{ + struct evsel_runtime *r = evsel->priv; + + if (r == NULL) { + r = zalloc(sizeof(struct evsel_runtime)); + evsel->priv = r; + } + + return r; +} + +/* + * save last time event was seen per cpu + */ +static void perf_evsel__save_time(struct perf_evsel *evsel, + u64 timestamp, u32 cpu) +{ + struct evsel_runtime *r = perf_evsel__get_runtime(evsel); + + if (r == NULL) + return; + + if ((cpu >= r->ncpu) || (r->last_time == NULL)) { + int i, n = __roundup_pow_of_two(cpu+1); + void *p = r->last_time; + + p = realloc(r->last_time, n * sizeof(u64)); + if (!p) + return; + + r->last_time = p; + for (i = r->ncpu; i < n; ++i) + r->last_time[i] = (u64) 0; + + r->ncpu = n; + } + + r->last_time[cpu] = timestamp; +} + +/* returns last time this event was seen on the given cpu */ +static u64 perf_evsel__get_time(struct perf_evsel *evsel, u32 cpu) +{ + struct evsel_runtime *r = perf_evsel__get_runtime(evsel); + + if ((r == NULL) || (r->last_time == NULL) || (cpu >= r->ncpu)) + return 0; + + return r->last_time[cpu]; +} + +static int comm_width = 20; + +static char *timehist_get_commstr(struct thread *thread) +{ + static char str[32]; + const char *comm = thread__comm_str(thread); + pid_t tid = thread->tid; + pid_t pid = thread->pid_; + int n; + + if (pid == 0) + n = scnprintf(str, sizeof(str), "%s", comm); + + else if (tid != pid) + n = scnprintf(str, sizeof(str), "%s[%d/%d]", comm, tid, pid); + + else + n = scnprintf(str, sizeof(str), "%s[%d]", comm, tid); + + if (n > comm_width) + comm_width = n; + + return str; +} + +static void timehist_header(struct perf_sched *sched) +{ + u32 ncpus = sched->max_cpu + 1; + u32 i, j; + + printf("%15s %6s ", "time", "cpu"); + + if (sched->show_cpu_visual) { + printf(" "); + for (i = 0, j = 0; i < ncpus; ++i) { + printf("%x", j++); + if (j > 15) + j = 0; + } + printf(" "); + } + + printf(" %-20s %9s %9s %9s", + "task name", "wait time", "sch delay", "run time"); + + printf("\n"); + + /* + * units row + */ + printf("%15s %-6s ", "", ""); + + if (sched->show_cpu_visual) + printf(" %*s ", ncpus, ""); + + printf(" %-20s %9s %9s %9s\n", "[tid/pid]", "(msec)", "(msec)", "(msec)"); + + /* + * separator + */ + printf("%.15s %.6s ", graph_dotted_line, graph_dotted_line); + + if (sched->show_cpu_visual) + printf(" %.*s ", ncpus, graph_dotted_line); + + printf(" %.20s %.9s %.9s %.9s", + graph_dotted_line, graph_dotted_line, graph_dotted_line, + graph_dotted_line); + + printf("\n"); +} + +static void timehist_print_sample(struct perf_sched *sched, + struct perf_sample *sample, + struct addr_location *al, + struct thread *thread, + u64 t) +{ + struct thread_runtime *tr = thread__priv(thread); + u32 max_cpus = sched->max_cpu + 1; + char tstr[64]; + + timestamp__scnprintf_usec(t, tstr, sizeof(tstr)); + printf("%15s [%04d] ", tstr, sample->cpu); + + if (sched->show_cpu_visual) { + u32 i; + char c; + + printf(" "); + for (i = 0; i < max_cpus; ++i) { + /* flag idle times with 'i'; others are sched events */ + if (i == sample->cpu) + c = (thread->tid == 0) ? 'i' : 's'; + else + c = ' '; + printf("%c", c); + } + printf(" "); + } + + printf(" %-*s ", comm_width, timehist_get_commstr(thread)); + + print_sched_time(tr->dt_wait, 6); + print_sched_time(tr->dt_delay, 6); + print_sched_time(tr->dt_run, 6); + + if (sched->show_wakeups) + printf(" %-*s", comm_width, ""); + + if (thread->tid == 0) + goto out; + + if (sched->show_callchain) + printf(" "); + + sample__fprintf_sym(sample, al, 0, + EVSEL__PRINT_SYM | EVSEL__PRINT_ONELINE | + EVSEL__PRINT_CALLCHAIN_ARROW | + EVSEL__PRINT_SKIP_IGNORED, + &callchain_cursor, stdout); + +out: + printf("\n"); +} + +/* + * Explanation of delta-time stats: + * + * t = time of current schedule out event + * tprev = time of previous sched out event + * also time of schedule-in event for current task + * last_time = time of last sched change event for current task + * (i.e, time process was last scheduled out) + * ready_to_run = time of wakeup for current task + * + * -----|------------|------------|------------|------ + * last ready tprev t + * time to run + * + * |-------- dt_wait --------| + * |- dt_delay -|-- dt_run --| + * + * dt_run = run time of current task + * dt_wait = time between last schedule out event for task and tprev + * represents time spent off the cpu + * dt_delay = time between wakeup and schedule-in of task + */ + +static void timehist_update_runtime_stats(struct thread_runtime *r, + u64 t, u64 tprev) +{ + r->dt_delay = 0; + r->dt_wait = 0; + r->dt_run = 0; + if (tprev) { + r->dt_run = t - tprev; + if (r->ready_to_run) { + if (r->ready_to_run > tprev) + pr_debug("time travel: wakeup time for task > previous sched_switch event\n"); + else + r->dt_delay = tprev - r->ready_to_run; + } + + if (r->last_time > tprev) + pr_debug("time travel: last sched out time for task > previous sched_switch event\n"); + else if (r->last_time) + r->dt_wait = tprev - r->last_time; + } + + update_stats(&r->run_stats, r->dt_run); + r->total_run_time += r->dt_run; +} + +static bool is_idle_sample(struct perf_sched *sched, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine) +{ + struct thread *thread; + struct callchain_cursor *cursor = &callchain_cursor; + + /* pid 0 == swapper == idle task */ + if (sample->pid == 0) + return true; + + if (strcmp(perf_evsel__name(evsel), "sched:sched_switch") == 0) { + if (perf_evsel__intval(evsel, sample, "prev_pid") == 0) + return true; + } + + /* want main thread for process - has maps */ + thread = machine__findnew_thread(machine, sample->pid, sample->pid); + if (thread == NULL) { + pr_debug("Failed to get thread for pid %d.\n", sample->pid); + return false; + } + + if (!symbol_conf.use_callchain || sample->callchain == NULL) + return false; + + if (thread__resolve_callchain(thread, cursor, evsel, sample, + NULL, NULL, sched->max_stack + 2) != 0) { + if (verbose) + error("Failed to resolve callchain. Skipping\n"); + + return false; + } + + callchain_cursor_commit(cursor); + + while (true) { + struct callchain_cursor_node *node; + struct symbol *sym; + + node = callchain_cursor_current(cursor); + if (node == NULL) + break; + + sym = node->sym; + if (sym && sym->name) { + if (!strcmp(sym->name, "schedule") || + !strcmp(sym->name, "__schedule") || + !strcmp(sym->name, "preempt_schedule")) + sym->ignore = 1; + } + + callchain_cursor_advance(cursor); + } + + return false; +} + +/* + * Track idle stats per cpu by maintaining a local thread + * struct for the idle task on each cpu. + */ +static int init_idle_threads(int ncpu) +{ + int i; + + idle_threads = zalloc(ncpu * sizeof(struct thread *)); + if (!idle_threads) + return -ENOMEM; + + idle_max_cpu = ncpu; + + /* allocate the actual thread struct if needed */ + for (i = 0; i < ncpu; ++i) { + idle_threads[i] = thread__new(0, 0); + if (idle_threads[i] == NULL) + return -ENOMEM; + + thread__set_comm(idle_threads[i], idle_comm, 0); + } + + return 0; +} + +static void free_idle_threads(void) +{ + int i; + + if (idle_threads == NULL) + return; + + for (i = 0; i < idle_max_cpu; ++i) { + if ((idle_threads[i])) + thread__delete(idle_threads[i]); + } + + free(idle_threads); +} + +static struct thread *get_idle_thread(int cpu) +{ + /* + * expand/allocate array of pointers to local thread + * structs if needed + */ + if ((cpu >= idle_max_cpu) || (idle_threads == NULL)) { + int i, j = __roundup_pow_of_two(cpu+1); + void *p; + + p = realloc(idle_threads, j * sizeof(struct thread *)); + if (!p) + return NULL; + + idle_threads = (struct thread **) p; + for (i = idle_max_cpu; i < j; ++i) + idle_threads[i] = NULL; + + idle_max_cpu = j; + } + + /* allocate a new thread struct if needed */ + if (idle_threads[cpu] == NULL) { + idle_threads[cpu] = thread__new(0, 0); + + if (idle_threads[cpu]) { + idle_threads[cpu]->tid = 0; + thread__set_comm(idle_threads[cpu], idle_comm, 0); + } + } + + return idle_threads[cpu]; +} + +/* + * handle runtime stats saved per thread + */ +static struct thread_runtime *thread__init_runtime(struct thread *thread) +{ + struct thread_runtime *r; + + r = zalloc(sizeof(struct thread_runtime)); + if (!r) + return NULL; + + init_stats(&r->run_stats); + thread__set_priv(thread, r); + + return r; +} + +static struct thread_runtime *thread__get_runtime(struct thread *thread) +{ + struct thread_runtime *tr; + + tr = thread__priv(thread); + if (tr == NULL) { + tr = thread__init_runtime(thread); + if (tr == NULL) + pr_debug("Failed to malloc memory for runtime data.\n"); + } + + return tr; +} + +static struct thread *timehist_get_thread(struct perf_sched *sched, + struct perf_sample *sample, + struct machine *machine, + struct perf_evsel *evsel) +{ + struct thread *thread; + + if (is_idle_sample(sched, sample, evsel, machine)) { + thread = get_idle_thread(sample->cpu); + if (thread == NULL) + pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu); + + } else { + /* there were samples with tid 0 but non-zero pid */ + thread = machine__findnew_thread(machine, sample->pid, + sample->tid ?: sample->pid); + if (thread == NULL) { + pr_debug("Failed to get thread for tid %d. skipping sample.\n", + sample->tid); + } + } + + return thread; +} + +static bool timehist_skip_sample(struct perf_sched *sched, + struct thread *thread) +{ + bool rc = false; + + if (thread__is_filtered(thread)) { + rc = true; + sched->skipped_samples++; + } + + return rc; +} + +static void timehist_print_wakeup_event(struct perf_sched *sched, + struct perf_sample *sample, + struct machine *machine, + struct thread *awakened) +{ + struct thread *thread; + char tstr[64]; + + thread = machine__findnew_thread(machine, sample->pid, sample->tid); + if (thread == NULL) + return; + + /* show wakeup unless both awakee and awaker are filtered */ + if (timehist_skip_sample(sched, thread) && + timehist_skip_sample(sched, awakened)) { + return; + } + + timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); + printf("%15s [%04d] ", tstr, sample->cpu); + if (sched->show_cpu_visual) + printf(" %*s ", sched->max_cpu + 1, ""); + + printf(" %-*s ", comm_width, timehist_get_commstr(thread)); + + /* dt spacer */ + printf(" %9s %9s %9s ", "", "", ""); + + printf("awakened: %s", timehist_get_commstr(awakened)); + + printf("\n"); +} + +static int timehist_sched_wakeup_event(struct perf_tool *tool, + union perf_event *event __maybe_unused, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct machine *machine) +{ + struct perf_sched *sched = container_of(tool, struct perf_sched, tool); + struct thread *thread; + struct thread_runtime *tr = NULL; + /* want pid of awakened task not pid in sample */ + const u32 pid = perf_evsel__intval(evsel, sample, "pid"); + + thread = machine__findnew_thread(machine, 0, pid); + if (thread == NULL) + return -1; + + tr = thread__get_runtime(thread); + if (tr == NULL) + return -1; + + if (tr->ready_to_run == 0) + tr->ready_to_run = sample->time; + + /* show wakeups if requested */ + if (sched->show_wakeups && + !perf_time__skip_sample(&sched->ptime, sample->time)) + timehist_print_wakeup_event(sched, sample, machine, thread); + + return 0; +} + +static void timehist_print_migration_event(struct perf_sched *sched, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct machine *machine, + struct thread *migrated) +{ + struct thread *thread; + char tstr[64]; + u32 max_cpus = sched->max_cpu + 1; + u32 ocpu, dcpu; + + if (sched->summary_only) + return; + + max_cpus = sched->max_cpu + 1; + ocpu = perf_evsel__intval(evsel, sample, "orig_cpu"); + dcpu = perf_evsel__intval(evsel, sample, "dest_cpu"); + + thread = machine__findnew_thread(machine, sample->pid, sample->tid); + if (thread == NULL) + return; + + if (timehist_skip_sample(sched, thread) && + timehist_skip_sample(sched, migrated)) { + return; + } + + timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); + printf("%15s [%04d] ", tstr, sample->cpu); + + if (sched->show_cpu_visual) { + u32 i; + char c; + + printf(" "); + for (i = 0; i < max_cpus; ++i) { + c = (i == sample->cpu) ? 'm' : ' '; + printf("%c", c); + } + printf(" "); + } + + printf(" %-*s ", comm_width, timehist_get_commstr(thread)); + + /* dt spacer */ + printf(" %9s %9s %9s ", "", "", ""); + + printf("migrated: %s", timehist_get_commstr(migrated)); + printf(" cpu %d => %d", ocpu, dcpu); + + printf("\n"); +} + +static int timehist_migrate_task_event(struct perf_tool *tool, + union perf_event *event __maybe_unused, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct machine *machine) +{ + struct perf_sched *sched = container_of(tool, struct perf_sched, tool); + struct thread *thread; + struct thread_runtime *tr = NULL; + /* want pid of migrated task not pid in sample */ + const u32 pid = perf_evsel__intval(evsel, sample, "pid"); + + thread = machine__findnew_thread(machine, 0, pid); + if (thread == NULL) + return -1; + + tr = thread__get_runtime(thread); + if (tr == NULL) + return -1; + + tr->migrations++; + + /* show migrations if requested */ + timehist_print_migration_event(sched, evsel, sample, machine, thread); + + return 0; +} + +static int timehist_sched_change_event(struct perf_tool *tool, + union perf_event *event, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct machine *machine) +{ + struct perf_sched *sched = container_of(tool, struct perf_sched, tool); + struct perf_time_interval *ptime = &sched->ptime; + struct addr_location al; + struct thread *thread; + struct thread_runtime *tr = NULL; + u64 tprev, t = sample->time; + int rc = 0; + + if (machine__resolve(machine, &al, sample) < 0) { + pr_err("problem processing %d event. skipping it\n", + event->header.type); + rc = -1; + goto out; + } + + thread = timehist_get_thread(sched, sample, machine, evsel); + if (thread == NULL) { + rc = -1; + goto out; + } + + if (timehist_skip_sample(sched, thread)) + goto out; + + tr = thread__get_runtime(thread); + if (tr == NULL) { + rc = -1; + goto out; + } + + tprev = perf_evsel__get_time(evsel, sample->cpu); + + /* + * If start time given: + * - sample time is under window user cares about - skip sample + * - tprev is under window user cares about - reset to start of window + */ + if (ptime->start && ptime->start > t) + goto out; + + if (ptime->start > tprev) + tprev = ptime->start; + + /* + * If end time given: + * - previous sched event is out of window - we are done + * - sample time is beyond window user cares about - reset it + * to close out stats for time window interest + */ + if (ptime->end) { + if (tprev > ptime->end) + goto out; + + if (t > ptime->end) + t = ptime->end; + } + + timehist_update_runtime_stats(tr, t, tprev); + + if (!sched->summary_only) + timehist_print_sample(sched, sample, &al, thread, t); + +out: + if (tr) { + /* time of this sched_switch event becomes last time task seen */ + tr->last_time = sample->time; + + /* sched out event for task so reset ready to run time */ + tr->ready_to_run = 0; + } + + perf_evsel__save_time(evsel, sample->time, sample->cpu); + + return rc; +} + +static int timehist_sched_switch_event(struct perf_tool *tool, + union perf_event *event, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct machine *machine __maybe_unused) +{ + return timehist_sched_change_event(tool, event, evsel, sample, machine); +} + +static int process_lost(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine __maybe_unused) +{ + char tstr[64]; + + timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); + printf("%15s ", tstr); + printf("lost %" PRIu64 " events on cpu %d\n", event->lost.lost, sample->cpu); + + return 0; +} + + +static void print_thread_runtime(struct thread *t, + struct thread_runtime *r) +{ + double mean = avg_stats(&r->run_stats); + float stddev; + + printf("%*s %5d %9" PRIu64 " ", + comm_width, timehist_get_commstr(t), t->ppid, + (u64) r->run_stats.n); + + print_sched_time(r->total_run_time, 8); + stddev = rel_stddev_stats(stddev_stats(&r->run_stats), mean); + print_sched_time(r->run_stats.min, 6); + printf(" "); + print_sched_time((u64) mean, 6); + printf(" "); + print_sched_time(r->run_stats.max, 6); + printf(" "); + printf("%5.2f", stddev); + printf(" %5" PRIu64, r->migrations); + printf("\n"); +} + +struct total_run_stats { + u64 sched_count; + u64 task_count; + u64 total_run_time; +}; + +static int __show_thread_runtime(struct thread *t, void *priv) +{ + struct total_run_stats *stats = priv; + struct thread_runtime *r; + + if (thread__is_filtered(t)) + return 0; + + r = thread__priv(t); + if (r && r->run_stats.n) { + stats->task_count++; + stats->sched_count += r->run_stats.n; + stats->total_run_time += r->total_run_time; + print_thread_runtime(t, r); + } + + return 0; +} + +static int show_thread_runtime(struct thread *t, void *priv) +{ + if (t->dead) + return 0; + + return __show_thread_runtime(t, priv); +} + +static int show_deadthread_runtime(struct thread *t, void *priv) +{ + if (!t->dead) + return 0; + + return __show_thread_runtime(t, priv); +} + +static void timehist_print_summary(struct perf_sched *sched, + struct perf_session *session) +{ + struct machine *m = &session->machines.host; + struct total_run_stats totals; + u64 task_count; + struct thread *t; + struct thread_runtime *r; + int i; + + memset(&totals, 0, sizeof(totals)); + + if (comm_width < 30) + comm_width = 30; + + printf("\nRuntime summary\n"); + printf("%*s parent sched-in ", comm_width, "comm"); + printf(" run-time min-run avg-run max-run stddev migrations\n"); + printf("%*s (count) ", comm_width, ""); + printf(" (msec) (msec) (msec) (msec) %%\n"); + printf("%.117s\n", graph_dotted_line); + + machine__for_each_thread(m, show_thread_runtime, &totals); + task_count = totals.task_count; + if (!task_count) + printf("<no still running tasks>\n"); + + printf("\nTerminated tasks:\n"); + machine__for_each_thread(m, show_deadthread_runtime, &totals); + if (task_count == totals.task_count) + printf("<no terminated tasks>\n"); + + /* CPU idle stats not tracked when samples were skipped */ + if (sched->skipped_samples) + return; + + printf("\nIdle stats:\n"); + for (i = 0; i < idle_max_cpu; ++i) { + t = idle_threads[i]; + if (!t) + continue; + + r = thread__priv(t); + if (r && r->run_stats.n) { + totals.sched_count += r->run_stats.n; + printf(" CPU %2d idle for ", i); + print_sched_time(r->total_run_time, 6); + printf(" msec\n"); + } else + printf(" CPU %2d idle entire time window\n", i); + } + + printf("\n" + " Total number of unique tasks: %" PRIu64 "\n" + "Total number of context switches: %" PRIu64 "\n" + " Total run time (msec): ", + totals.task_count, totals.sched_count); + + print_sched_time(totals.total_run_time, 2); + printf("\n"); +} + +typedef int (*sched_handler)(struct perf_tool *tool, + union perf_event *event, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct machine *machine); + +static int perf_timehist__process_sample(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine) +{ + struct perf_sched *sched = container_of(tool, struct perf_sched, tool); + int err = 0; + int this_cpu = sample->cpu; + + if (this_cpu > sched->max_cpu) + sched->max_cpu = this_cpu; + + if (evsel->handler != NULL) { + sched_handler f = evsel->handler; + + err = f(tool, event, evsel, sample, machine); + } + + return err; +} + +static int timehist_check_attr(struct perf_sched *sched, + struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + struct evsel_runtime *er; + + list_for_each_entry(evsel, &evlist->entries, node) { + er = perf_evsel__get_runtime(evsel); + if (er == NULL) { + pr_err("Failed to allocate memory for evsel runtime data\n"); + return -1; + } + + if (sched->show_callchain && + !(evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN)) { + pr_info("Samples do not have callchains.\n"); + sched->show_callchain = 0; + symbol_conf.use_callchain = 0; + } + } + + return 0; +} + +static int perf_sched__timehist(struct perf_sched *sched) +{ + const struct perf_evsel_str_handler handlers[] = { + { "sched:sched_switch", timehist_sched_switch_event, }, + { "sched:sched_wakeup", timehist_sched_wakeup_event, }, + { "sched:sched_wakeup_new", timehist_sched_wakeup_event, }, + }; + const struct perf_evsel_str_handler migrate_handlers[] = { + { "sched:sched_migrate_task", timehist_migrate_task_event, }, + }; + struct perf_data_file file = { + .path = input_name, + .mode = PERF_DATA_MODE_READ, + .force = sched->force, + }; + + struct perf_session *session; + struct perf_evlist *evlist; + int err = -1; + + /* + * event handlers for timehist option + */ + sched->tool.sample = perf_timehist__process_sample; + sched->tool.mmap = perf_event__process_mmap; + sched->tool.comm = perf_event__process_comm; + sched->tool.exit = perf_event__process_exit; + sched->tool.fork = perf_event__process_fork; + sched->tool.lost = process_lost; + sched->tool.attr = perf_event__process_attr; + sched->tool.tracing_data = perf_event__process_tracing_data; + sched->tool.build_id = perf_event__process_build_id; + + sched->tool.ordered_events = true; + sched->tool.ordering_requires_timestamps = true; + + symbol_conf.use_callchain = sched->show_callchain; + + session = perf_session__new(&file, false, &sched->tool); + if (session == NULL) + return -ENOMEM; + + evlist = session->evlist; + + symbol__init(&session->header.env); + + if (perf_time__parse_str(&sched->ptime, sched->time_str) != 0) { + pr_err("Invalid time string\n"); + return -EINVAL; + } + + if (timehist_check_attr(sched, evlist) != 0) + goto out; + + setup_pager(); + + /* setup per-evsel handlers */ + if (perf_session__set_tracepoints_handlers(session, handlers)) + goto out; + + /* sched_switch event at a minimum needs to exist */ + if (!perf_evlist__find_tracepoint_by_name(session->evlist, + "sched:sched_switch")) { + pr_err("No sched_switch events found. Have you run 'perf sched record'?\n"); + goto out; + } + + if (sched->show_migrations && + perf_session__set_tracepoints_handlers(session, migrate_handlers)) + goto out; + + /* pre-allocate struct for per-CPU idle stats */ + sched->max_cpu = session->header.env.nr_cpus_online; + if (sched->max_cpu == 0) + sched->max_cpu = 4; + if (init_idle_threads(sched->max_cpu)) + goto out; + + /* summary_only implies summary option, but don't overwrite summary if set */ + if (sched->summary_only) + sched->summary = sched->summary_only; + + if (!sched->summary_only) + timehist_header(sched); + + err = perf_session__process_events(session); + if (err) { + pr_err("Failed to process events, error %d", err); + goto out; + } + + sched->nr_events = evlist->stats.nr_events[0]; + sched->nr_lost_events = evlist->stats.total_lost; + sched->nr_lost_chunks = evlist->stats.nr_events[PERF_RECORD_LOST]; + + if (sched->summary) + timehist_print_summary(sched, session); + +out: + free_idle_threads(); + perf_session__delete(session); + + return err; +} + + static void print_bad_events(struct perf_sched *sched) { if (sched->nr_unordered_timestamps && sched->nr_timestamps) { @@ -1953,38 +2980,32 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) .next_shortname1 = 'A', .next_shortname2 = '0', .skip_merge = 0, + .show_callchain = 1, + .max_stack = 5, + }; + const struct option sched_options[] = { + OPT_STRING('i', "input", &input_name, "file", + "input file name"), + OPT_INCR('v', "verbose", &verbose, + "be more verbose (show symbol address, etc)"), + OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, + "dump raw trace in ASCII"), + OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"), + OPT_END() }; const struct option latency_options[] = { OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]", "sort by key(s): runtime, switch, avg, max"), - OPT_INCR('v', "verbose", &verbose, - "be more verbose (show symbol address, etc)"), OPT_INTEGER('C', "CPU", &sched.profile_cpu, "CPU to profile on"), - OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, - "dump raw trace in ASCII"), OPT_BOOLEAN('p', "pids", &sched.skip_merge, "latency stats per pid instead of per comm"), - OPT_END() + OPT_PARENT(sched_options) }; const struct option replay_options[] = { OPT_UINTEGER('r', "repeat", &sched.replay_repeat, "repeat the workload replay N times (-1: infinite)"), - OPT_INCR('v', "verbose", &verbose, - "be more verbose (show symbol address, etc)"), - OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, - "dump raw trace in ASCII"), - OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"), - OPT_END() - }; - const struct option sched_options[] = { - OPT_STRING('i', "input", &input_name, "file", - "input file name"), - OPT_INCR('v', "verbose", &verbose, - "be more verbose (show symbol address, etc)"), - OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, - "dump raw trace in ASCII"), - OPT_END() + OPT_PARENT(sched_options) }; const struct option map_options[] = { OPT_BOOLEAN(0, "compact", &sched.map.comp, @@ -1995,8 +3016,31 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) "highlight given CPUs in map"), OPT_STRING(0, "cpus", &sched.map.cpus_str, "cpus", "display given CPUs in map"), - OPT_END() + OPT_PARENT(sched_options) }; + const struct option timehist_options[] = { + OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, + "file", "vmlinux pathname"), + OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, + "file", "kallsyms pathname"), + OPT_BOOLEAN('g', "call-graph", &sched.show_callchain, + "Display call chains if present (default on)"), + OPT_UINTEGER(0, "max-stack", &sched.max_stack, + "Maximum number of functions to display backtrace."), + OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", + "Look for files with symbols relative to this directory"), + OPT_BOOLEAN('s', "summary", &sched.summary_only, + "Show only syscall summary with statistics"), + OPT_BOOLEAN('S', "with-summary", &sched.summary, + "Show all syscalls and summary with statistics"), + OPT_BOOLEAN('w', "wakeups", &sched.show_wakeups, "Show wakeup events"), + OPT_BOOLEAN('M', "migrations", &sched.show_migrations, "Show migration events"), + OPT_BOOLEAN('V', "cpu-visual", &sched.show_cpu_visual, "Add CPU visual"), + OPT_STRING(0, "time", &sched.time_str, "str", + "Time span for analysis (start,stop)"), + OPT_PARENT(sched_options) + }; + const char * const latency_usage[] = { "perf sched latency [<options>]", NULL @@ -2009,8 +3053,13 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) "perf sched map [<options>]", NULL }; + const char * const timehist_usage[] = { + "perf sched timehist [<options>]", + NULL + }; const char *const sched_subcommands[] = { "record", "latency", "map", - "replay", "script", NULL }; + "replay", "script", + "timehist", NULL }; const char *sched_usage[] = { NULL, NULL @@ -2073,6 +3122,21 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) usage_with_options(replay_usage, replay_options); } return perf_sched__replay(&sched); + } else if (!strcmp(argv[0], "timehist")) { + if (argc) { + argc = parse_options(argc, argv, timehist_options, + timehist_usage, 0); + if (argc) + usage_with_options(timehist_usage, timehist_options); + } + if (sched.show_wakeups && sched.summary_only) { + pr_err(" Error: -s and -w are mutually exclusive.\n"); + parse_options_usage(timehist_usage, timehist_options, "s", true); + parse_options_usage(NULL, timehist_options, "w", true); + return -EINVAL; + } + + return perf_sched__timehist(&sched); } else { usage_with_options(sched_usage, sched_options); } |