diff options
Diffstat (limited to 'tools/perf/util/thread-stack.c')
-rw-r--r-- | tools/perf/util/thread-stack.c | 227 |
1 files changed, 164 insertions, 63 deletions
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index 61a4286a74dc..d52f27f373ce 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -15,6 +15,7 @@ #include <linux/rbtree.h> #include <linux/list.h> +#include <linux/log2.h> #include <errno.h> #include "thread.h" #include "event.h" @@ -60,6 +61,7 @@ struct thread_stack_entry { * @last_time: last timestamp * @crp: call/return processor * @comm: current comm + * @arr_sz: size of array if this is the first element of an array */ struct thread_stack { struct thread_stack_entry *stack; @@ -71,8 +73,19 @@ struct thread_stack { u64 last_time; struct call_return_processor *crp; struct comm *comm; + unsigned int arr_sz; }; +/* + * Assume pid == tid == 0 identifies the idle task as defined by + * perf_session__register_idle_thread(). The idle task is really 1 task per cpu, + * and therefore requires a stack for each cpu. + */ +static inline bool thread_stack__per_cpu(struct thread *thread) +{ + return !(thread->tid || thread->pid_); +} + static int thread_stack__grow(struct thread_stack *ts) { struct thread_stack_entry *new_stack; @@ -91,19 +104,14 @@ static int thread_stack__grow(struct thread_stack *ts) return 0; } -static struct thread_stack *thread_stack__new(struct thread *thread, - struct call_return_processor *crp) +static int thread_stack__init(struct thread_stack *ts, struct thread *thread, + struct call_return_processor *crp) { - struct thread_stack *ts; - - ts = zalloc(sizeof(struct thread_stack)); - if (!ts) - return NULL; + int err; - if (thread_stack__grow(ts)) { - free(ts); - return NULL; - } + err = thread_stack__grow(ts); + if (err) + return err; if (thread->mg && thread->mg->machine) ts->kernel_start = machine__kernel_start(thread->mg->machine); @@ -111,9 +119,72 @@ static struct thread_stack *thread_stack__new(struct thread *thread, ts->kernel_start = 1ULL << 63; ts->crp = crp; + return 0; +} + +static struct thread_stack *thread_stack__new(struct thread *thread, int cpu, + struct call_return_processor *crp) +{ + struct thread_stack *ts = thread->ts, *new_ts; + unsigned int old_sz = ts ? ts->arr_sz : 0; + unsigned int new_sz = 1; + + if (thread_stack__per_cpu(thread) && cpu > 0) + new_sz = roundup_pow_of_two(cpu + 1); + + if (!ts || new_sz > old_sz) { + new_ts = calloc(new_sz, sizeof(*ts)); + if (!new_ts) + return NULL; + if (ts) + memcpy(new_ts, ts, old_sz * sizeof(*ts)); + new_ts->arr_sz = new_sz; + zfree(&thread->ts); + thread->ts = new_ts; + ts = new_ts; + } + + if (thread_stack__per_cpu(thread) && cpu > 0 && + (unsigned int)cpu < ts->arr_sz) + ts += cpu; + + if (!ts->stack && + thread_stack__init(ts, thread, crp)) + return NULL; + return ts; } +static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu) +{ + struct thread_stack *ts = thread->ts; + + if (cpu < 0) + cpu = 0; + + if (!ts || (unsigned int)cpu >= ts->arr_sz) + return NULL; + + ts += cpu; + + if (!ts->stack) + return NULL; + + return ts; +} + +static inline struct thread_stack *thread__stack(struct thread *thread, + int cpu) +{ + if (!thread) + return NULL; + + if (thread_stack__per_cpu(thread)) + return thread__cpu_stack(thread, cpu); + + return thread->ts; +} + static int thread_stack__push(struct thread_stack *ts, u64 ret_addr, bool trace_end) { @@ -226,25 +297,37 @@ static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts) int thread_stack__flush(struct thread *thread) { - if (thread->ts) - return __thread_stack__flush(thread, thread->ts); + struct thread_stack *ts = thread->ts; + unsigned int pos; + int err = 0; - return 0; + if (ts) { + for (pos = 0; pos < ts->arr_sz; pos++) { + int ret = __thread_stack__flush(thread, ts + pos); + + if (ret) + err = ret; + } + } + + return err; } -int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, +int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, u64 to_ip, u16 insn_len, u64 trace_nr) { + struct thread_stack *ts = thread__stack(thread, cpu); + if (!thread) return -EINVAL; - if (!thread->ts) { - thread->ts = thread_stack__new(thread, NULL); - if (!thread->ts) { + if (!ts) { + ts = thread_stack__new(thread, cpu, NULL); + if (!ts) { pr_warning("Out of memory: no thread stack\n"); return -ENOMEM; } - thread->ts->trace_nr = trace_nr; + ts->trace_nr = trace_nr; } /* @@ -252,14 +335,14 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, * the stack might be completely invalid. Better to report nothing than * to report something misleading, so flush the stack. */ - if (trace_nr != thread->ts->trace_nr) { - if (thread->ts->trace_nr) - __thread_stack__flush(thread, thread->ts); - thread->ts->trace_nr = trace_nr; + if (trace_nr != ts->trace_nr) { + if (ts->trace_nr) + __thread_stack__flush(thread, ts); + ts->trace_nr = trace_nr; } /* Stop here if thread_stack__process() is in use */ - if (thread->ts->crp) + if (ts->crp) return 0; if (flags & PERF_IP_FLAG_CALL) { @@ -270,7 +353,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, ret_addr = from_ip + insn_len; if (ret_addr == to_ip) return 0; /* Zero-length calls are excluded */ - return thread_stack__push(thread->ts, ret_addr, + return thread_stack__push(ts, ret_addr, flags & PERF_IP_FLAG_TRACE_END); } else if (flags & PERF_IP_FLAG_TRACE_BEGIN) { /* @@ -280,32 +363,52 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, * address, so try to pop that. Also, do not expect a call made * when the trace ended, to return, so pop that. */ - thread_stack__pop(thread->ts, to_ip); - thread_stack__pop_trace_end(thread->ts); + thread_stack__pop(ts, to_ip); + thread_stack__pop_trace_end(ts); } else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) { - thread_stack__pop(thread->ts, to_ip); + thread_stack__pop(ts, to_ip); } return 0; } -void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr) +void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr) { - if (!thread || !thread->ts) + struct thread_stack *ts = thread__stack(thread, cpu); + + if (!ts) return; - if (trace_nr != thread->ts->trace_nr) { - if (thread->ts->trace_nr) - __thread_stack__flush(thread, thread->ts); - thread->ts->trace_nr = trace_nr; + if (trace_nr != ts->trace_nr) { + if (ts->trace_nr) + __thread_stack__flush(thread, ts); + ts->trace_nr = trace_nr; } } +static void __thread_stack__free(struct thread *thread, struct thread_stack *ts) +{ + __thread_stack__flush(thread, ts); + zfree(&ts->stack); +} + +static void thread_stack__reset(struct thread *thread, struct thread_stack *ts) +{ + unsigned int arr_sz = ts->arr_sz; + + __thread_stack__free(thread, ts); + memset(ts, 0, sizeof(*ts)); + ts->arr_sz = arr_sz; +} + void thread_stack__free(struct thread *thread) { - if (thread->ts) { - __thread_stack__flush(thread, thread->ts); - zfree(&thread->ts->stack); + struct thread_stack *ts = thread->ts; + unsigned int pos; + + if (ts) { + for (pos = 0; pos < ts->arr_sz; pos++) + __thread_stack__free(thread, ts + pos); zfree(&thread->ts); } } @@ -315,9 +418,11 @@ static inline u64 callchain_context(u64 ip, u64 kernel_start) return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL; } -void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, +void thread_stack__sample(struct thread *thread, int cpu, + struct ip_callchain *chain, size_t sz, u64 ip, u64 kernel_start) { + struct thread_stack *ts = thread__stack(thread, cpu); u64 context = callchain_context(ip, kernel_start); u64 last_context; size_t i, j; @@ -330,15 +435,15 @@ void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, chain->ips[0] = context; chain->ips[1] = ip; - if (!thread || !thread->ts) { + if (!ts) { chain->nr = 2; return; } last_context = context; - for (i = 2, j = 1; i < sz && j <= thread->ts->cnt; i++, j++) { - ip = thread->ts->stack[thread->ts->cnt - j].ret_addr; + for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) { + ip = ts->stack[ts->cnt - j].ret_addr; context = callchain_context(ip, kernel_start); if (context != last_context) { if (i >= sz - 1) @@ -449,7 +554,7 @@ static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts, return 1; } -static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts, +static int thread_stack__bottom(struct thread_stack *ts, struct perf_sample *sample, struct addr_location *from_al, struct addr_location *to_al, u64 ref) @@ -474,7 +579,7 @@ static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts, if (!cp) return -ENOMEM; - return thread_stack__push_cp(thread->ts, ip, sample->time, ref, cp, + return thread_stack__push_cp(ts, ip, sample->time, ref, cp, true, false); } @@ -590,24 +695,19 @@ int thread_stack__process(struct thread *thread, struct comm *comm, struct addr_location *to_al, u64 ref, struct call_return_processor *crp) { - struct thread_stack *ts = thread->ts; + struct thread_stack *ts = thread__stack(thread, sample->cpu); int err = 0; - if (ts) { - if (!ts->crp) { - /* Supersede thread_stack__event() */ - thread_stack__free(thread); - thread->ts = thread_stack__new(thread, crp); - if (!thread->ts) - return -ENOMEM; - ts = thread->ts; - ts->comm = comm; - } - } else { - thread->ts = thread_stack__new(thread, crp); - if (!thread->ts) + if (ts && !ts->crp) { + /* Supersede thread_stack__event() */ + thread_stack__reset(thread, ts); + ts = NULL; + } + + if (!ts) { + ts = thread_stack__new(thread, sample->cpu, crp); + if (!ts) return -ENOMEM; - ts = thread->ts; ts->comm = comm; } @@ -621,8 +721,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm, /* If the stack is empty, put the current symbol on the stack */ if (!ts->cnt) { - err = thread_stack__bottom(thread, ts, sample, from_al, to_al, - ref); + err = thread_stack__bottom(ts, sample, from_al, to_al, ref); if (err) return err; } @@ -671,9 +770,11 @@ int thread_stack__process(struct thread *thread, struct comm *comm, return err; } -size_t thread_stack__depth(struct thread *thread) +size_t thread_stack__depth(struct thread *thread, int cpu) { - if (!thread->ts) + struct thread_stack *ts = thread__stack(thread, cpu); + + if (!ts) return 0; - return thread->ts->cnt; + return ts->cnt; } |