Lines Matching +full:sample +full:- +full:time
1 // SPDX-License-Identifier: GPL-2.0-only
3 * thread-stack.c: Synthesize a thread's stack using call / return events
21 #include "call-path.h"
22 #include "thread-stack.h"
40 * struct thread_stack_entry - thread stack entry.
43 * @ref: external reference (e.g. db_id of sample)
47 * @db_id: id used for db-export
68 * struct thread_stack - thread stack constructed from 'call' and 'return'
123 new_sz = ts->sz + STACK_GROWTH; in thread_stack__grow()
126 new_stack = realloc(ts->stack, sz); in thread_stack__grow()
128 return -ENOMEM; in thread_stack__grow()
130 ts->stack = new_stack; in thread_stack__grow()
131 ts->sz = new_sz; in thread_stack__grow()
152 ts->br_stack_rb = zalloc(sz); in thread_stack__init()
153 if (!ts->br_stack_rb) in thread_stack__init()
154 return -ENOMEM; in thread_stack__init()
155 ts->br_stack_sz = br_stack_sz; in thread_stack__init()
160 const char *arch = perf_env__arch(machine->env); in thread_stack__init()
162 ts->kernel_start = machine__kernel_start(machine); in thread_stack__init()
164 ts->rstate = X86_RETPOLINE_POSSIBLE; in thread_stack__init()
166 ts->kernel_start = 1ULL << 63; in thread_stack__init()
168 ts->crp = crp; in thread_stack__init()
179 unsigned int old_sz = ts ? ts->arr_sz : 0; in thread_stack__new()
191 new_ts->arr_sz = new_sz; in thread_stack__new()
198 (unsigned int)cpu < ts->arr_sz) in thread_stack__new()
201 if (!ts->stack && in thread_stack__new()
215 if (!ts || (unsigned int)cpu >= ts->arr_sz) in thread__cpu_stack()
220 if (!ts->stack) in thread__cpu_stack()
243 if (ts->cnt == ts->sz) { in thread_stack__push()
247 ts->cnt = 0; in thread_stack__push()
251 ts->stack[ts->cnt].trace_end = trace_end; in thread_stack__push()
252 ts->stack[ts->cnt++].ret_addr = ret_addr; in thread_stack__push()
270 for (i = ts->cnt; i; ) { in thread_stack__pop()
271 if (ts->stack[--i].ret_addr == ret_addr) { in thread_stack__pop()
272 ts->cnt = i; in thread_stack__pop()
282 for (i = ts->cnt; i; ) { in thread_stack__pop_trace_end()
283 if (ts->stack[--i].trace_end) in thread_stack__pop_trace_end()
284 ts->cnt = i; in thread_stack__pop_trace_end()
292 if (!ts->cnt) in thread_stack__in_kernel()
295 return ts->stack[ts->cnt - 1].cp->in_kernel; in thread_stack__in_kernel()
302 struct call_return_processor *crp = ts->crp; in thread_stack__call_return()
306 .comm = ts->comm, in thread_stack__call_return()
311 tse = &ts->stack[idx]; in thread_stack__call_return()
312 cr.cp = tse->cp; in thread_stack__call_return()
313 cr.call_time = tse->timestamp; in thread_stack__call_return()
315 cr.branch_count = ts->branch_count - tse->branch_count; in thread_stack__call_return()
316 cr.insn_count = ts->insn_count - tse->insn_count; in thread_stack__call_return()
317 cr.cyc_count = ts->cyc_count - tse->cyc_count; in thread_stack__call_return()
318 cr.db_id = tse->db_id; in thread_stack__call_return()
319 cr.call_ref = tse->ref; in thread_stack__call_return()
321 if (tse->no_call) in thread_stack__call_return()
325 if (tse->non_call) in thread_stack__call_return()
333 parent_db_id = idx ? &(tse - 1)->db_id : NULL; in thread_stack__call_return()
335 return crp->process(&cr, parent_db_id, crp->data); in thread_stack__call_return()
340 struct call_return_processor *crp = ts->crp; in __thread_stack__flush()
344 ts->cnt = 0; in __thread_stack__flush()
345 ts->br_stack_pos = 0; in __thread_stack__flush()
346 if (ts->br_stack_rb) in __thread_stack__flush()
347 ts->br_stack_rb->nr = 0; in __thread_stack__flush()
351 while (ts->cnt) { in __thread_stack__flush()
352 err = thread_stack__call_return(thread, ts, --ts->cnt, in __thread_stack__flush()
353 ts->last_time, 0, true); in __thread_stack__flush()
356 ts->cnt = 0; in __thread_stack__flush()
371 for (pos = 0; pos < ts->arr_sz; pos++) { in thread_stack__flush()
385 struct branch_stack *bs = ts->br_stack_rb; in thread_stack__update_br_stack()
388 if (!ts->br_stack_pos) in thread_stack__update_br_stack()
389 ts->br_stack_pos = ts->br_stack_sz; in thread_stack__update_br_stack()
391 ts->br_stack_pos -= 1; in thread_stack__update_br_stack()
393 be = &bs->entries[ts->br_stack_pos]; in thread_stack__update_br_stack()
394 be->from = from_ip; in thread_stack__update_br_stack()
395 be->to = to_ip; in thread_stack__update_br_stack()
396 be->flags.value = 0; in thread_stack__update_br_stack()
397 be->flags.abort = !!(flags & PERF_IP_FLAG_TX_ABORT); in thread_stack__update_br_stack()
398 be->flags.in_tx = !!(flags & PERF_IP_FLAG_IN_TX); in thread_stack__update_br_stack()
400 be->flags.mispred = ts->mispred_all; in thread_stack__update_br_stack()
402 if (bs->nr < ts->br_stack_sz) in thread_stack__update_br_stack()
403 bs->nr += 1; in thread_stack__update_br_stack()
413 return -EINVAL; in thread_stack__event()
419 return -ENOMEM; in thread_stack__event()
421 ts->trace_nr = trace_nr; in thread_stack__event()
422 ts->mispred_all = mispred_all; in thread_stack__event()
430 if (trace_nr != ts->trace_nr) { in thread_stack__event()
431 if (ts->trace_nr) in thread_stack__event()
433 ts->trace_nr = trace_nr; in thread_stack__event()
443 if (ts->crp || !callstack) in thread_stack__event()
453 return 0; /* Zero-length calls are excluded */ in thread_stack__event()
480 if (trace_nr != ts->trace_nr) { in thread_stack__set_trace_nr()
481 if (ts->trace_nr) in thread_stack__set_trace_nr()
483 ts->trace_nr = trace_nr; in thread_stack__set_trace_nr()
490 zfree(&ts->stack); in __thread_stack__free()
491 zfree(&ts->br_stack_rb); in __thread_stack__free()
496 unsigned int arr_sz = ts->arr_sz; in thread_stack__reset()
500 ts->arr_sz = arr_sz; in thread_stack__reset()
509 for (pos = 0; pos < ts->arr_sz; pos++) in thread_stack__free()
531 chain->nr = 0; in thread_stack__sample()
535 chain->ips[0] = context; in thread_stack__sample()
536 chain->ips[1] = ip; in thread_stack__sample()
539 chain->nr = 2; in thread_stack__sample()
545 for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) { in thread_stack__sample()
546 ip = ts->stack[ts->cnt - j].ret_addr; in thread_stack__sample()
549 if (i >= sz - 1) in thread_stack__sample()
551 chain->ips[i++] = context; in thread_stack__sample()
554 chain->ips[i] = ip; in thread_stack__sample()
557 chain->nr = i; in thread_stack__sample()
561 * Hardware sample records, created some time after the event occurred, need to
574 chain->nr = 0; in thread_stack__sample_late()
586 for (j = 1; j <= ts->cnt; j++) { in thread_stack__sample_late()
587 ip = ts->stack[ts->cnt - j].ret_addr; in thread_stack__sample_late()
596 for (; nr < sz && j <= ts->cnt; nr++, j++) { in thread_stack__sample_late()
597 ip = ts->stack[ts->cnt - j].ret_addr; in thread_stack__sample_late()
600 if (nr >= sz - 1) in thread_stack__sample_late()
602 chain->ips[nr++] = context; in thread_stack__sample_late()
605 chain->ips[nr] = ip; in thread_stack__sample_late()
609 chain->nr = nr; in thread_stack__sample_late()
611 chain->ips[0] = sample_context; in thread_stack__sample_late()
612 chain->ips[1] = sample_ip; in thread_stack__sample_late()
613 chain->nr = 2; in thread_stack__sample_late()
626 dst->nr = 0; in thread_stack__br_sample()
631 src = ts->br_stack_rb; in thread_stack__br_sample()
632 if (!src->nr) in thread_stack__br_sample()
635 dst->nr = min((unsigned int)src->nr, sz); in thread_stack__br_sample()
637 be = &dst->entries[0]; in thread_stack__br_sample()
638 nr = min(ts->br_stack_sz - ts->br_stack_pos, (unsigned int)dst->nr); in thread_stack__br_sample()
639 memcpy(be, &src->entries[ts->br_stack_pos], bsz * nr); in thread_stack__br_sample()
641 if (src->nr >= ts->br_stack_sz) { in thread_stack__br_sample()
642 sz -= nr; in thread_stack__br_sample()
643 be = &dst->entries[nr]; in thread_stack__br_sample()
644 nr = min(ts->br_stack_pos, sz); in thread_stack__br_sample()
645 memcpy(be, &src->entries[0], bsz * ts->br_stack_pos); in thread_stack__br_sample()
653 *start = be->to && be->to < kernel_start; in us_start()
666 *start = (nb && sample_ip >= be->to && sample_ip <= nb->from) || in ks_start()
667 be->from < kernel_start || in ks_start()
668 (be->to && be->to < kernel_start); in ks_start()
675 * Hardware sample records, created some time after the event occurred, need to
688 dst->nr = 0; in thread_stack__br_sample_late()
693 src = ts->br_stack_rb; in thread_stack__br_sample_late()
694 if (!src->nr) in thread_stack__br_sample_late()
697 spos = &src->entries[ts->br_stack_pos]; in thread_stack__br_sample_late()
698 ssz = &src->entries[ts->br_stack_sz]; in thread_stack__br_sample_late()
700 d = &dst->entries[0]; in thread_stack__br_sample_late()
705 * User space sample: start copying branch entries when the in thread_stack__br_sample_late()
715 if (src->nr >= ts->br_stack_sz) { in thread_stack__br_sample_late()
716 for (s = &src->entries[0]; s < spos && nr < sz; s++) { in thread_stack__br_sample_late()
727 * Kernel space sample: start copying branch entries when the ip in thread_stack__br_sample_late()
739 if (src->nr >= ts->br_stack_sz) { in thread_stack__br_sample_late()
740 for (s = &src->entries[0]; s < spos && nr < sz; s++) { in thread_stack__br_sample_late()
750 dst->nr = nr; in thread_stack__br_sample_late()
762 crp->cpr = call_path_root__new(); in call_return_processor__new()
763 if (!crp->cpr) in call_return_processor__new()
765 crp->process = process; in call_return_processor__new()
766 crp->data = data; in call_return_processor__new()
777 call_path_root__free(crp->cpr); in call_return_processor__free()
790 return -ENOMEM; in thread_stack__push_cp()
792 if (ts->cnt == ts->sz) { in thread_stack__push_cp()
798 tse = &ts->stack[ts->cnt++]; in thread_stack__push_cp()
799 tse->ret_addr = ret_addr; in thread_stack__push_cp()
800 tse->timestamp = timestamp; in thread_stack__push_cp()
801 tse->ref = ref; in thread_stack__push_cp()
802 tse->branch_count = ts->branch_count; in thread_stack__push_cp()
803 tse->insn_count = ts->insn_count; in thread_stack__push_cp()
804 tse->cyc_count = ts->cyc_count; in thread_stack__push_cp()
805 tse->cp = cp; in thread_stack__push_cp()
806 tse->no_call = no_call; in thread_stack__push_cp()
807 tse->trace_end = trace_end; in thread_stack__push_cp()
808 tse->non_call = false; in thread_stack__push_cp()
809 tse->db_id = 0; in thread_stack__push_cp()
820 if (!ts->cnt) in thread_stack__pop_cp()
823 if (ts->cnt == 1) { in thread_stack__pop_cp()
824 struct thread_stack_entry *tse = &ts->stack[0]; in thread_stack__pop_cp()
826 if (tse->cp->sym == sym) in thread_stack__pop_cp()
827 return thread_stack__call_return(thread, ts, --ts->cnt, in thread_stack__pop_cp()
831 if (ts->stack[ts->cnt - 1].ret_addr == ret_addr && in thread_stack__pop_cp()
832 !ts->stack[ts->cnt - 1].non_call) { in thread_stack__pop_cp()
833 return thread_stack__call_return(thread, ts, --ts->cnt, in thread_stack__pop_cp()
836 size_t i = ts->cnt - 1; in thread_stack__pop_cp()
838 while (i--) { in thread_stack__pop_cp()
839 if (ts->stack[i].ret_addr != ret_addr || in thread_stack__pop_cp()
840 ts->stack[i].non_call) in thread_stack__pop_cp()
843 while (ts->cnt > i) { in thread_stack__pop_cp()
845 --ts->cnt, in thread_stack__pop_cp()
851 return thread_stack__call_return(thread, ts, --ts->cnt, in thread_stack__pop_cp()
860 struct perf_sample *sample, in thread_stack__bottom() argument
864 struct call_path_root *cpr = ts->crp->cpr; in thread_stack__bottom()
869 if (sample->ip) { in thread_stack__bottom()
870 ip = sample->ip; in thread_stack__bottom()
871 sym = from_al->sym; in thread_stack__bottom()
872 } else if (sample->addr) { in thread_stack__bottom()
873 ip = sample->addr; in thread_stack__bottom()
874 sym = to_al->sym; in thread_stack__bottom()
879 cp = call_path__findnew(cpr, &cpr->call_path, sym, ip, in thread_stack__bottom()
880 ts->kernel_start); in thread_stack__bottom()
882 return thread_stack__push_cp(ts, ip, sample->time, ref, cp, in thread_stack__bottom()
887 struct perf_sample *sample, u64 ref) in thread_stack__pop_ks() argument
889 u64 tm = sample->time; in thread_stack__pop_ks()
894 err = thread_stack__call_return(thread, ts, --ts->cnt, in thread_stack__pop_ks()
905 struct perf_sample *sample, in thread_stack__no_call_return() argument
909 struct call_path_root *cpr = ts->crp->cpr; in thread_stack__no_call_return()
910 struct call_path *root = &cpr->call_path; in thread_stack__no_call_return()
911 struct symbol *fsym = from_al->sym; in thread_stack__no_call_return()
912 struct symbol *tsym = to_al->sym; in thread_stack__no_call_return()
914 u64 ks = ts->kernel_start; in thread_stack__no_call_return()
915 u64 addr = sample->addr; in thread_stack__no_call_return()
916 u64 tm = sample->time; in thread_stack__no_call_return()
917 u64 ip = sample->ip; in thread_stack__no_call_return()
922 err = thread_stack__pop_ks(thread, ts, sample, ref); in thread_stack__no_call_return()
927 if (!ts->cnt) { in thread_stack__no_call_return()
934 err = thread_stack__pop_ks(thread, ts, sample, ref); in thread_stack__no_call_return()
939 if (ts->cnt) in thread_stack__no_call_return()
940 parent = ts->stack[ts->cnt - 1].cp; in thread_stack__no_call_return()
944 if (parent->sym == from_al->sym) { in thread_stack__no_call_return()
950 if (ts->cnt == 1) { in thread_stack__no_call_return()
951 err = thread_stack__call_return(thread, ts, --ts->cnt, in thread_stack__no_call_return()
957 if (!ts->cnt) { in thread_stack__no_call_return()
972 ts->stack[ts->cnt - 1].non_call = true; in thread_stack__no_call_return()
994 return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false); in thread_stack__no_call_return()
1004 if (!ts->cnt) in thread_stack__trace_begin()
1008 tse = &ts->stack[ts->cnt - 1]; in thread_stack__trace_begin()
1009 if (tse->trace_end) { in thread_stack__trace_begin()
1010 err = thread_stack__call_return(thread, ts, --ts->cnt, in thread_stack__trace_begin()
1020 struct perf_sample *sample, u64 ref) in thread_stack__trace_end() argument
1022 struct call_path_root *cpr = ts->crp->cpr; in thread_stack__trace_end()
1027 if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref)) in thread_stack__trace_end()
1030 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0, in thread_stack__trace_end()
1031 ts->kernel_start); in thread_stack__trace_end()
1033 ret_addr = sample->ip + sample->insn_len; in thread_stack__trace_end()
1035 return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp, in thread_stack__trace_end()
1050 struct perf_sample *sample, in thread_stack__x86_retpoline() argument
1053 struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1]; in thread_stack__x86_retpoline()
1054 struct call_path_root *cpr = ts->crp->cpr; in thread_stack__x86_retpoline()
1055 struct symbol *sym = tse->cp->sym; in thread_stack__x86_retpoline()
1056 struct symbol *tsym = to_al->sym; in thread_stack__x86_retpoline()
1059 if (sym && is_x86_retpoline(sym->name)) { in thread_stack__x86_retpoline()
1063 * not itself mean anything. Here the top-of-stack is removed, in thread_stack__x86_retpoline()
1065 * resulting top-of-stack is replaced with the actual target. in thread_stack__x86_retpoline()
1070 ts->cnt -= 1; in thread_stack__x86_retpoline()
1071 sym = ts->stack[ts->cnt - 2].cp->sym; in thread_stack__x86_retpoline()
1072 if (sym && sym == tsym && to_al->addr != tsym->start) { in thread_stack__x86_retpoline()
1078 ts->cnt -= 1; in thread_stack__x86_retpoline()
1086 ts->cnt -= 1; in thread_stack__x86_retpoline()
1090 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym, in thread_stack__x86_retpoline()
1091 sample->addr, ts->kernel_start); in thread_stack__x86_retpoline()
1093 return -ENOMEM; in thread_stack__x86_retpoline()
1095 /* Replace the top-of-stack with the actual target */ in thread_stack__x86_retpoline()
1096 ts->stack[ts->cnt - 1].cp = cp; in thread_stack__x86_retpoline()
1102 struct perf_sample *sample, in thread_stack__process() argument
1107 struct thread_stack *ts = thread__stack(thread, sample->cpu); in thread_stack__process()
1111 if (ts && !ts->crp) { in thread_stack__process()
1118 ts = thread_stack__new(thread, sample->cpu, crp, true, 0); in thread_stack__process()
1120 return -ENOMEM; in thread_stack__process()
1121 ts->comm = comm; in thread_stack__process()
1124 rstate = ts->rstate; in thread_stack__process()
1126 ts->rstate = X86_RETPOLINE_POSSIBLE; in thread_stack__process()
1129 if (ts->comm != comm && thread__pid(thread) == thread__tid(thread)) { in thread_stack__process()
1133 ts->comm = comm; in thread_stack__process()
1137 if (!ts->cnt) { in thread_stack__process()
1138 err = thread_stack__bottom(ts, sample, from_al, to_al, ref); in thread_stack__process()
1143 ts->branch_count += 1; in thread_stack__process()
1144 ts->insn_count += sample->insn_cnt; in thread_stack__process()
1145 ts->cyc_count += sample->cyc_cnt; in thread_stack__process()
1146 ts->last_time = sample->time; in thread_stack__process()
1148 if (sample->flags & PERF_IP_FLAG_CALL) { in thread_stack__process()
1149 bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END; in thread_stack__process()
1150 struct call_path_root *cpr = ts->crp->cpr; in thread_stack__process()
1154 if (!sample->ip || !sample->addr) in thread_stack__process()
1157 ret_addr = sample->ip + sample->insn_len; in thread_stack__process()
1158 if (ret_addr == sample->addr) in thread_stack__process()
1159 return 0; /* Zero-length calls are excluded */ in thread_stack__process()
1161 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, in thread_stack__process()
1162 to_al->sym, sample->addr, in thread_stack__process()
1163 ts->kernel_start); in thread_stack__process()
1164 err = thread_stack__push_cp(ts, ret_addr, sample->time, ref, in thread_stack__process()
1171 if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym && in thread_stack__process()
1172 from_al->sym == to_al->sym && in thread_stack__process()
1173 to_al->addr != to_al->sym->start) in thread_stack__process()
1174 ts->rstate = X86_RETPOLINE_DETECTED; in thread_stack__process()
1176 } else if (sample->flags & PERF_IP_FLAG_RETURN) { in thread_stack__process()
1177 if (!sample->addr) { in thread_stack__process()
1181 if (!(sample->flags & return_from_kernel)) in thread_stack__process()
1185 return thread_stack__pop_ks(thread, ts, sample, ref); in thread_stack__process()
1188 if (!sample->ip) in thread_stack__process()
1192 if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 && in thread_stack__process()
1193 ts->stack[ts->cnt - 1].ret_addr != sample->addr) in thread_stack__process()
1194 return thread_stack__x86_retpoline(ts, sample, to_al); in thread_stack__process()
1196 err = thread_stack__pop_cp(thread, ts, sample->addr, in thread_stack__process()
1197 sample->time, ref, from_al->sym); in thread_stack__process()
1201 err = thread_stack__no_call_return(thread, ts, sample, in thread_stack__process()
1204 } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) { in thread_stack__process()
1205 err = thread_stack__trace_begin(thread, ts, sample->time, ref); in thread_stack__process()
1206 } else if (sample->flags & PERF_IP_FLAG_TRACE_END) { in thread_stack__process()
1207 err = thread_stack__trace_end(ts, sample, ref); in thread_stack__process()
1208 } else if (sample->flags & PERF_IP_FLAG_BRANCH && in thread_stack__process()
1209 from_al->sym != to_al->sym && to_al->sym && in thread_stack__process()
1210 to_al->addr == to_al->sym->start) { in thread_stack__process()
1211 struct call_path_root *cpr = ts->crp->cpr; in thread_stack__process()
1220 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, in thread_stack__process()
1221 to_al->sym, sample->addr, in thread_stack__process()
1222 ts->kernel_start); in thread_stack__process()
1223 err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false, in thread_stack__process()
1226 ts->stack[ts->cnt - 1].non_call = true; in thread_stack__process()
1238 return ts->cnt; in thread_stack__depth()