1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * thread-stack.c: Synthesize a thread's stack using call / return events
4   * Copyright (c) 2014, Intel Corporation.
5   */
6  
7  #include <linux/rbtree.h>
8  #include <linux/list.h>
9  #include <linux/log2.h>
10  #include <linux/zalloc.h>
11  #include <errno.h>
12  #include <stdlib.h>
13  #include <string.h>
14  #include "thread.h"
15  #include "event.h"
16  #include "machine.h"
17  #include "env.h"
18  #include "debug.h"
19  #include "symbol.h"
20  #include "comm.h"
21  #include "call-path.h"
22  #include "thread-stack.h"
23  
24  #define STACK_GROWTH 2048
25  
26  /*
27   * State of retpoline detection.
28   *
29   * RETPOLINE_NONE: no retpoline detection
30   * X86_RETPOLINE_POSSIBLE: x86 retpoline possible
31   * X86_RETPOLINE_DETECTED: x86 retpoline detected
32   */
33  enum retpoline_state_t {
34  	RETPOLINE_NONE,
35  	X86_RETPOLINE_POSSIBLE,
36  	X86_RETPOLINE_DETECTED,
37  };
38  
39  /**
40   * struct thread_stack_entry - thread stack entry.
41   * @ret_addr: return address
42   * @timestamp: timestamp (if known)
43   * @ref: external reference (e.g. db_id of sample)
44   * @branch_count: the branch count when the entry was created
45   * @insn_count: the instruction count when the entry was created
46   * @cyc_count the cycle count when the entry was created
47   * @db_id: id used for db-export
48   * @cp: call path
49   * @no_call: a 'call' was not seen
50   * @trace_end: a 'call' but trace ended
51   * @non_call: a branch but not a 'call' to the start of a different symbol
52   */
53  struct thread_stack_entry {
54  	u64 ret_addr;
55  	u64 timestamp;
56  	u64 ref;
57  	u64 branch_count;
58  	u64 insn_count;
59  	u64 cyc_count;
60  	u64 db_id;
61  	struct call_path *cp;
62  	bool no_call;
63  	bool trace_end;
64  	bool non_call;
65  };
66  
67  /**
68   * struct thread_stack - thread stack constructed from 'call' and 'return'
69   *                       branch samples.
70   * @stack: array that holds the stack
71   * @cnt: number of entries in the stack
72   * @sz: current maximum stack size
73   * @trace_nr: current trace number
74   * @branch_count: running branch count
75   * @insn_count: running  instruction count
76   * @cyc_count running  cycle count
77   * @kernel_start: kernel start address
78   * @last_time: last timestamp
79   * @crp: call/return processor
80   * @comm: current comm
81   * @arr_sz: size of array if this is the first element of an array
82   * @rstate: used to detect retpolines
83   * @br_stack_rb: branch stack (ring buffer)
84   * @br_stack_sz: maximum branch stack size
85   * @br_stack_pos: current position in @br_stack_rb
86   * @mispred_all: mark all branches as mispredicted
87   */
88  struct thread_stack {
89  	struct thread_stack_entry *stack;
90  	size_t cnt;
91  	size_t sz;
92  	u64 trace_nr;
93  	u64 branch_count;
94  	u64 insn_count;
95  	u64 cyc_count;
96  	u64 kernel_start;
97  	u64 last_time;
98  	struct call_return_processor *crp;
99  	struct comm *comm;
100  	unsigned int arr_sz;
101  	enum retpoline_state_t rstate;
102  	struct branch_stack *br_stack_rb;
103  	unsigned int br_stack_sz;
104  	unsigned int br_stack_pos;
105  	bool mispred_all;
106  };
107  
108  /*
109   * Assume pid == tid == 0 identifies the idle task as defined by
110   * perf_session__register_idle_thread(). The idle task is really 1 task per cpu,
111   * and therefore requires a stack for each cpu.
112   */
thread_stack__per_cpu(struct thread * thread)113  static inline bool thread_stack__per_cpu(struct thread *thread)
114  {
115  	return !(thread__tid(thread) || thread__pid(thread));
116  }
117  
thread_stack__grow(struct thread_stack * ts)118  static int thread_stack__grow(struct thread_stack *ts)
119  {
120  	struct thread_stack_entry *new_stack;
121  	size_t sz, new_sz;
122  
123  	new_sz = ts->sz + STACK_GROWTH;
124  	sz = new_sz * sizeof(struct thread_stack_entry);
125  
126  	new_stack = realloc(ts->stack, sz);
127  	if (!new_stack)
128  		return -ENOMEM;
129  
130  	ts->stack = new_stack;
131  	ts->sz = new_sz;
132  
133  	return 0;
134  }
135  
thread_stack__init(struct thread_stack * ts,struct thread * thread,struct call_return_processor * crp,bool callstack,unsigned int br_stack_sz)136  static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
137  			      struct call_return_processor *crp,
138  			      bool callstack, unsigned int br_stack_sz)
139  {
140  	int err;
141  
142  	if (callstack) {
143  		err = thread_stack__grow(ts);
144  		if (err)
145  			return err;
146  	}
147  
148  	if (br_stack_sz) {
149  		size_t sz = sizeof(struct branch_stack);
150  
151  		sz += br_stack_sz * sizeof(struct branch_entry);
152  		ts->br_stack_rb = zalloc(sz);
153  		if (!ts->br_stack_rb)
154  			return -ENOMEM;
155  		ts->br_stack_sz = br_stack_sz;
156  	}
157  
158  	if (thread__maps(thread) && maps__machine(thread__maps(thread))) {
159  		struct machine *machine = maps__machine(thread__maps(thread));
160  		const char *arch = perf_env__arch(machine->env);
161  
162  		ts->kernel_start = machine__kernel_start(machine);
163  		if (!strcmp(arch, "x86"))
164  			ts->rstate = X86_RETPOLINE_POSSIBLE;
165  	} else {
166  		ts->kernel_start = 1ULL << 63;
167  	}
168  	ts->crp = crp;
169  
170  	return 0;
171  }
172  
thread_stack__new(struct thread * thread,int cpu,struct call_return_processor * crp,bool callstack,unsigned int br_stack_sz)173  static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
174  					      struct call_return_processor *crp,
175  					      bool callstack,
176  					      unsigned int br_stack_sz)
177  {
178  	struct thread_stack *ts = thread__ts(thread), *new_ts;
179  	unsigned int old_sz = ts ? ts->arr_sz : 0;
180  	unsigned int new_sz = 1;
181  
182  	if (thread_stack__per_cpu(thread) && cpu > 0)
183  		new_sz = roundup_pow_of_two(cpu + 1);
184  
185  	if (!ts || new_sz > old_sz) {
186  		new_ts = calloc(new_sz, sizeof(*ts));
187  		if (!new_ts)
188  			return NULL;
189  		if (ts)
190  			memcpy(new_ts, ts, old_sz * sizeof(*ts));
191  		new_ts->arr_sz = new_sz;
192  		free(thread__ts(thread));
193  		thread__set_ts(thread, new_ts);
194  		ts = new_ts;
195  	}
196  
197  	if (thread_stack__per_cpu(thread) && cpu > 0 &&
198  	    (unsigned int)cpu < ts->arr_sz)
199  		ts += cpu;
200  
201  	if (!ts->stack &&
202  	    thread_stack__init(ts, thread, crp, callstack, br_stack_sz))
203  		return NULL;
204  
205  	return ts;
206  }
207  
thread__cpu_stack(struct thread * thread,int cpu)208  static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu)
209  {
210  	struct thread_stack *ts = thread__ts(thread);
211  
212  	if (cpu < 0)
213  		cpu = 0;
214  
215  	if (!ts || (unsigned int)cpu >= ts->arr_sz)
216  		return NULL;
217  
218  	ts += cpu;
219  
220  	if (!ts->stack)
221  		return NULL;
222  
223  	return ts;
224  }
225  
thread__stack(struct thread * thread,int cpu)226  static inline struct thread_stack *thread__stack(struct thread *thread,
227  						    int cpu)
228  {
229  	if (!thread)
230  		return NULL;
231  
232  	if (thread_stack__per_cpu(thread))
233  		return thread__cpu_stack(thread, cpu);
234  
235  	return thread__ts(thread);
236  }
237  
thread_stack__push(struct thread_stack * ts,u64 ret_addr,bool trace_end)238  static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
239  			      bool trace_end)
240  {
241  	int err = 0;
242  
243  	if (ts->cnt == ts->sz) {
244  		err = thread_stack__grow(ts);
245  		if (err) {
246  			pr_warning("Out of memory: discarding thread stack\n");
247  			ts->cnt = 0;
248  		}
249  	}
250  
251  	ts->stack[ts->cnt].trace_end = trace_end;
252  	ts->stack[ts->cnt++].ret_addr = ret_addr;
253  
254  	return err;
255  }
256  
thread_stack__pop(struct thread_stack * ts,u64 ret_addr)257  static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
258  {
259  	size_t i;
260  
261  	/*
262  	 * In some cases there may be functions which are not seen to return.
263  	 * For example when setjmp / longjmp has been used.  Or the perf context
264  	 * switch in the kernel which doesn't stop and start tracing in exactly
265  	 * the same code path.  When that happens the return address will be
266  	 * further down the stack.  If the return address is not found at all,
267  	 * we assume the opposite (i.e. this is a return for a call that wasn't
268  	 * seen for some reason) and leave the stack alone.
269  	 */
270  	for (i = ts->cnt; i; ) {
271  		if (ts->stack[--i].ret_addr == ret_addr) {
272  			ts->cnt = i;
273  			return;
274  		}
275  	}
276  }
277  
thread_stack__pop_trace_end(struct thread_stack * ts)278  static void thread_stack__pop_trace_end(struct thread_stack *ts)
279  {
280  	size_t i;
281  
282  	for (i = ts->cnt; i; ) {
283  		if (ts->stack[--i].trace_end)
284  			ts->cnt = i;
285  		else
286  			return;
287  	}
288  }
289  
thread_stack__in_kernel(struct thread_stack * ts)290  static bool thread_stack__in_kernel(struct thread_stack *ts)
291  {
292  	if (!ts->cnt)
293  		return false;
294  
295  	return ts->stack[ts->cnt - 1].cp->in_kernel;
296  }
297  
thread_stack__call_return(struct thread * thread,struct thread_stack * ts,size_t idx,u64 timestamp,u64 ref,bool no_return)298  static int thread_stack__call_return(struct thread *thread,
299  				     struct thread_stack *ts, size_t idx,
300  				     u64 timestamp, u64 ref, bool no_return)
301  {
302  	struct call_return_processor *crp = ts->crp;
303  	struct thread_stack_entry *tse;
304  	struct call_return cr = {
305  		.thread = thread,
306  		.comm = ts->comm,
307  		.db_id = 0,
308  	};
309  	u64 *parent_db_id;
310  
311  	tse = &ts->stack[idx];
312  	cr.cp = tse->cp;
313  	cr.call_time = tse->timestamp;
314  	cr.return_time = timestamp;
315  	cr.branch_count = ts->branch_count - tse->branch_count;
316  	cr.insn_count = ts->insn_count - tse->insn_count;
317  	cr.cyc_count = ts->cyc_count - tse->cyc_count;
318  	cr.db_id = tse->db_id;
319  	cr.call_ref = tse->ref;
320  	cr.return_ref = ref;
321  	if (tse->no_call)
322  		cr.flags |= CALL_RETURN_NO_CALL;
323  	if (no_return)
324  		cr.flags |= CALL_RETURN_NO_RETURN;
325  	if (tse->non_call)
326  		cr.flags |= CALL_RETURN_NON_CALL;
327  
328  	/*
329  	 * The parent db_id must be assigned before exporting the child. Note
330  	 * it is not possible to export the parent first because its information
331  	 * is not yet complete because its 'return' has not yet been processed.
332  	 */
333  	parent_db_id = idx ? &(tse - 1)->db_id : NULL;
334  
335  	return crp->process(&cr, parent_db_id, crp->data);
336  }
337  
__thread_stack__flush(struct thread * thread,struct thread_stack * ts)338  static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
339  {
340  	struct call_return_processor *crp = ts->crp;
341  	int err;
342  
343  	if (!crp) {
344  		ts->cnt = 0;
345  		ts->br_stack_pos = 0;
346  		if (ts->br_stack_rb)
347  			ts->br_stack_rb->nr = 0;
348  		return 0;
349  	}
350  
351  	while (ts->cnt) {
352  		err = thread_stack__call_return(thread, ts, --ts->cnt,
353  						ts->last_time, 0, true);
354  		if (err) {
355  			pr_err("Error flushing thread stack!\n");
356  			ts->cnt = 0;
357  			return err;
358  		}
359  	}
360  
361  	return 0;
362  }
363  
thread_stack__flush(struct thread * thread)364  int thread_stack__flush(struct thread *thread)
365  {
366  	struct thread_stack *ts = thread__ts(thread);
367  	unsigned int pos;
368  	int err = 0;
369  
370  	if (ts) {
371  		for (pos = 0; pos < ts->arr_sz; pos++) {
372  			int ret = __thread_stack__flush(thread, ts + pos);
373  
374  			if (ret)
375  				err = ret;
376  		}
377  	}
378  
379  	return err;
380  }
381  
thread_stack__update_br_stack(struct thread_stack * ts,u32 flags,u64 from_ip,u64 to_ip)382  static void thread_stack__update_br_stack(struct thread_stack *ts, u32 flags,
383  					  u64 from_ip, u64 to_ip)
384  {
385  	struct branch_stack *bs = ts->br_stack_rb;
386  	struct branch_entry *be;
387  
388  	if (!ts->br_stack_pos)
389  		ts->br_stack_pos = ts->br_stack_sz;
390  
391  	ts->br_stack_pos -= 1;
392  
393  	be              = &bs->entries[ts->br_stack_pos];
394  	be->from        = from_ip;
395  	be->to          = to_ip;
396  	be->flags.value = 0;
397  	be->flags.abort = !!(flags & PERF_IP_FLAG_TX_ABORT);
398  	be->flags.in_tx = !!(flags & PERF_IP_FLAG_IN_TX);
399  	/* No support for mispredict */
400  	be->flags.mispred = ts->mispred_all;
401  
402  	if (bs->nr < ts->br_stack_sz)
403  		bs->nr += 1;
404  }
405  
thread_stack__event(struct thread * thread,int cpu,u32 flags,u64 from_ip,u64 to_ip,u16 insn_len,u64 trace_nr,bool callstack,unsigned int br_stack_sz,bool mispred_all)406  int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
407  			u64 to_ip, u16 insn_len, u64 trace_nr, bool callstack,
408  			unsigned int br_stack_sz, bool mispred_all)
409  {
410  	struct thread_stack *ts = thread__stack(thread, cpu);
411  
412  	if (!thread)
413  		return -EINVAL;
414  
415  	if (!ts) {
416  		ts = thread_stack__new(thread, cpu, NULL, callstack, br_stack_sz);
417  		if (!ts) {
418  			pr_warning("Out of memory: no thread stack\n");
419  			return -ENOMEM;
420  		}
421  		ts->trace_nr = trace_nr;
422  		ts->mispred_all = mispred_all;
423  	}
424  
425  	/*
426  	 * When the trace is discontinuous, the trace_nr changes.  In that case
427  	 * the stack might be completely invalid.  Better to report nothing than
428  	 * to report something misleading, so flush the stack.
429  	 */
430  	if (trace_nr != ts->trace_nr) {
431  		if (ts->trace_nr)
432  			__thread_stack__flush(thread, ts);
433  		ts->trace_nr = trace_nr;
434  	}
435  
436  	if (br_stack_sz)
437  		thread_stack__update_br_stack(ts, flags, from_ip, to_ip);
438  
439  	/*
440  	 * Stop here if thread_stack__process() is in use, or not recording call
441  	 * stack.
442  	 */
443  	if (ts->crp || !callstack)
444  		return 0;
445  
446  	if (flags & PERF_IP_FLAG_CALL) {
447  		u64 ret_addr;
448  
449  		if (!to_ip)
450  			return 0;
451  		ret_addr = from_ip + insn_len;
452  		if (ret_addr == to_ip)
453  			return 0; /* Zero-length calls are excluded */
454  		return thread_stack__push(ts, ret_addr,
455  					  flags & PERF_IP_FLAG_TRACE_END);
456  	} else if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
457  		/*
458  		 * If the caller did not change the trace number (which would
459  		 * have flushed the stack) then try to make sense of the stack.
460  		 * Possibly, tracing began after returning to the current
461  		 * address, so try to pop that. Also, do not expect a call made
462  		 * when the trace ended, to return, so pop that.
463  		 */
464  		thread_stack__pop(ts, to_ip);
465  		thread_stack__pop_trace_end(ts);
466  	} else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) {
467  		thread_stack__pop(ts, to_ip);
468  	}
469  
470  	return 0;
471  }
472  
thread_stack__set_trace_nr(struct thread * thread,int cpu,u64 trace_nr)473  void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr)
474  {
475  	struct thread_stack *ts = thread__stack(thread, cpu);
476  
477  	if (!ts)
478  		return;
479  
480  	if (trace_nr != ts->trace_nr) {
481  		if (ts->trace_nr)
482  			__thread_stack__flush(thread, ts);
483  		ts->trace_nr = trace_nr;
484  	}
485  }
486  
__thread_stack__free(struct thread * thread,struct thread_stack * ts)487  static void __thread_stack__free(struct thread *thread, struct thread_stack *ts)
488  {
489  	__thread_stack__flush(thread, ts);
490  	zfree(&ts->stack);
491  	zfree(&ts->br_stack_rb);
492  }
493  
thread_stack__reset(struct thread * thread,struct thread_stack * ts)494  static void thread_stack__reset(struct thread *thread, struct thread_stack *ts)
495  {
496  	unsigned int arr_sz = ts->arr_sz;
497  
498  	__thread_stack__free(thread, ts);
499  	memset(ts, 0, sizeof(*ts));
500  	ts->arr_sz = arr_sz;
501  }
502  
thread_stack__free(struct thread * thread)503  void thread_stack__free(struct thread *thread)
504  {
505  	struct thread_stack *ts = thread__ts(thread);
506  	unsigned int pos;
507  
508  	if (ts) {
509  		for (pos = 0; pos < ts->arr_sz; pos++)
510  			__thread_stack__free(thread, ts + pos);
511  		free(thread__ts(thread));
512  		thread__set_ts(thread, NULL);
513  	}
514  }
515  
callchain_context(u64 ip,u64 kernel_start)516  static inline u64 callchain_context(u64 ip, u64 kernel_start)
517  {
518  	return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
519  }
520  
thread_stack__sample(struct thread * thread,int cpu,struct ip_callchain * chain,size_t sz,u64 ip,u64 kernel_start)521  void thread_stack__sample(struct thread *thread, int cpu,
522  			  struct ip_callchain *chain,
523  			  size_t sz, u64 ip, u64 kernel_start)
524  {
525  	struct thread_stack *ts = thread__stack(thread, cpu);
526  	u64 context = callchain_context(ip, kernel_start);
527  	u64 last_context;
528  	size_t i, j;
529  
530  	if (sz < 2) {
531  		chain->nr = 0;
532  		return;
533  	}
534  
535  	chain->ips[0] = context;
536  	chain->ips[1] = ip;
537  
538  	if (!ts) {
539  		chain->nr = 2;
540  		return;
541  	}
542  
543  	last_context = context;
544  
545  	for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) {
546  		ip = ts->stack[ts->cnt - j].ret_addr;
547  		context = callchain_context(ip, kernel_start);
548  		if (context != last_context) {
549  			if (i >= sz - 1)
550  				break;
551  			chain->ips[i++] = context;
552  			last_context = context;
553  		}
554  		chain->ips[i] = ip;
555  	}
556  
557  	chain->nr = i;
558  }
559  
560  /*
561   * Hardware sample records, created some time after the event occurred, need to
562   * have subsequent addresses removed from the call chain.
563   */
thread_stack__sample_late(struct thread * thread,int cpu,struct ip_callchain * chain,size_t sz,u64 sample_ip,u64 kernel_start)564  void thread_stack__sample_late(struct thread *thread, int cpu,
565  			       struct ip_callchain *chain, size_t sz,
566  			       u64 sample_ip, u64 kernel_start)
567  {
568  	struct thread_stack *ts = thread__stack(thread, cpu);
569  	u64 sample_context = callchain_context(sample_ip, kernel_start);
570  	u64 last_context, context, ip;
571  	size_t nr = 0, j;
572  
573  	if (sz < 2) {
574  		chain->nr = 0;
575  		return;
576  	}
577  
578  	if (!ts)
579  		goto out;
580  
581  	/*
582  	 * When tracing kernel space, kernel addresses occur at the top of the
583  	 * call chain after the event occurred but before tracing stopped.
584  	 * Skip them.
585  	 */
586  	for (j = 1; j <= ts->cnt; j++) {
587  		ip = ts->stack[ts->cnt - j].ret_addr;
588  		context = callchain_context(ip, kernel_start);
589  		if (context == PERF_CONTEXT_USER ||
590  		    (context == sample_context && ip == sample_ip))
591  			break;
592  	}
593  
594  	last_context = sample_ip; /* Use sample_ip as an invalid context */
595  
596  	for (; nr < sz && j <= ts->cnt; nr++, j++) {
597  		ip = ts->stack[ts->cnt - j].ret_addr;
598  		context = callchain_context(ip, kernel_start);
599  		if (context != last_context) {
600  			if (nr >= sz - 1)
601  				break;
602  			chain->ips[nr++] = context;
603  			last_context = context;
604  		}
605  		chain->ips[nr] = ip;
606  	}
607  out:
608  	if (nr) {
609  		chain->nr = nr;
610  	} else {
611  		chain->ips[0] = sample_context;
612  		chain->ips[1] = sample_ip;
613  		chain->nr = 2;
614  	}
615  }
616  
thread_stack__br_sample(struct thread * thread,int cpu,struct branch_stack * dst,unsigned int sz)617  void thread_stack__br_sample(struct thread *thread, int cpu,
618  			     struct branch_stack *dst, unsigned int sz)
619  {
620  	struct thread_stack *ts = thread__stack(thread, cpu);
621  	const size_t bsz = sizeof(struct branch_entry);
622  	struct branch_stack *src;
623  	struct branch_entry *be;
624  	unsigned int nr;
625  
626  	dst->nr = 0;
627  
628  	if (!ts)
629  		return;
630  
631  	src = ts->br_stack_rb;
632  	if (!src->nr)
633  		return;
634  
635  	dst->nr = min((unsigned int)src->nr, sz);
636  
637  	be = &dst->entries[0];
638  	nr = min(ts->br_stack_sz - ts->br_stack_pos, (unsigned int)dst->nr);
639  	memcpy(be, &src->entries[ts->br_stack_pos], bsz * nr);
640  
641  	if (src->nr >= ts->br_stack_sz) {
642  		sz -= nr;
643  		be = &dst->entries[nr];
644  		nr = min(ts->br_stack_pos, sz);
645  		memcpy(be, &src->entries[0], bsz * ts->br_stack_pos);
646  	}
647  }
648  
649  /* Start of user space branch entries */
us_start(struct branch_entry * be,u64 kernel_start,bool * start)650  static bool us_start(struct branch_entry *be, u64 kernel_start, bool *start)
651  {
652  	if (!*start)
653  		*start = be->to && be->to < kernel_start;
654  
655  	return *start;
656  }
657  
658  /*
659   * Start of branch entries after the ip fell in between 2 branches, or user
660   * space branch entries.
661   */
ks_start(struct branch_entry * be,u64 sample_ip,u64 kernel_start,bool * start,struct branch_entry * nb)662  static bool ks_start(struct branch_entry *be, u64 sample_ip, u64 kernel_start,
663  		     bool *start, struct branch_entry *nb)
664  {
665  	if (!*start) {
666  		*start = (nb && sample_ip >= be->to && sample_ip <= nb->from) ||
667  			 be->from < kernel_start ||
668  			 (be->to && be->to < kernel_start);
669  	}
670  
671  	return *start;
672  }
673  
674  /*
675   * Hardware sample records, created some time after the event occurred, need to
676   * have subsequent addresses removed from the branch stack.
677   */
thread_stack__br_sample_late(struct thread * thread,int cpu,struct branch_stack * dst,unsigned int sz,u64 ip,u64 kernel_start)678  void thread_stack__br_sample_late(struct thread *thread, int cpu,
679  				  struct branch_stack *dst, unsigned int sz,
680  				  u64 ip, u64 kernel_start)
681  {
682  	struct thread_stack *ts = thread__stack(thread, cpu);
683  	struct branch_entry *d, *s, *spos, *ssz;
684  	struct branch_stack *src;
685  	unsigned int nr = 0;
686  	bool start = false;
687  
688  	dst->nr = 0;
689  
690  	if (!ts)
691  		return;
692  
693  	src = ts->br_stack_rb;
694  	if (!src->nr)
695  		return;
696  
697  	spos = &src->entries[ts->br_stack_pos];
698  	ssz  = &src->entries[ts->br_stack_sz];
699  
700  	d = &dst->entries[0];
701  	s = spos;
702  
703  	if (ip < kernel_start) {
704  		/*
705  		 * User space sample: start copying branch entries when the
706  		 * branch is in user space.
707  		 */
708  		for (s = spos; s < ssz && nr < sz; s++) {
709  			if (us_start(s, kernel_start, &start)) {
710  				*d++ = *s;
711  				nr += 1;
712  			}
713  		}
714  
715  		if (src->nr >= ts->br_stack_sz) {
716  			for (s = &src->entries[0]; s < spos && nr < sz; s++) {
717  				if (us_start(s, kernel_start, &start)) {
718  					*d++ = *s;
719  					nr += 1;
720  				}
721  			}
722  		}
723  	} else {
724  		struct branch_entry *nb = NULL;
725  
726  		/*
727  		 * Kernel space sample: start copying branch entries when the ip
728  		 * falls in between 2 branches (or the branch is in user space
729  		 * because then the start must have been missed).
730  		 */
731  		for (s = spos; s < ssz && nr < sz; s++) {
732  			if (ks_start(s, ip, kernel_start, &start, nb)) {
733  				*d++ = *s;
734  				nr += 1;
735  			}
736  			nb = s;
737  		}
738  
739  		if (src->nr >= ts->br_stack_sz) {
740  			for (s = &src->entries[0]; s < spos && nr < sz; s++) {
741  				if (ks_start(s, ip, kernel_start, &start, nb)) {
742  					*d++ = *s;
743  					nr += 1;
744  				}
745  				nb = s;
746  			}
747  		}
748  	}
749  
750  	dst->nr = nr;
751  }
752  
753  struct call_return_processor *
call_return_processor__new(int (* process)(struct call_return * cr,u64 * parent_db_id,void * data),void * data)754  call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data),
755  			   void *data)
756  {
757  	struct call_return_processor *crp;
758  
759  	crp = zalloc(sizeof(struct call_return_processor));
760  	if (!crp)
761  		return NULL;
762  	crp->cpr = call_path_root__new();
763  	if (!crp->cpr)
764  		goto out_free;
765  	crp->process = process;
766  	crp->data = data;
767  	return crp;
768  
769  out_free:
770  	free(crp);
771  	return NULL;
772  }
773  
call_return_processor__free(struct call_return_processor * crp)774  void call_return_processor__free(struct call_return_processor *crp)
775  {
776  	if (crp) {
777  		call_path_root__free(crp->cpr);
778  		free(crp);
779  	}
780  }
781  
thread_stack__push_cp(struct thread_stack * ts,u64 ret_addr,u64 timestamp,u64 ref,struct call_path * cp,bool no_call,bool trace_end)782  static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
783  				 u64 timestamp, u64 ref, struct call_path *cp,
784  				 bool no_call, bool trace_end)
785  {
786  	struct thread_stack_entry *tse;
787  	int err;
788  
789  	if (!cp)
790  		return -ENOMEM;
791  
792  	if (ts->cnt == ts->sz) {
793  		err = thread_stack__grow(ts);
794  		if (err)
795  			return err;
796  	}
797  
798  	tse = &ts->stack[ts->cnt++];
799  	tse->ret_addr = ret_addr;
800  	tse->timestamp = timestamp;
801  	tse->ref = ref;
802  	tse->branch_count = ts->branch_count;
803  	tse->insn_count = ts->insn_count;
804  	tse->cyc_count = ts->cyc_count;
805  	tse->cp = cp;
806  	tse->no_call = no_call;
807  	tse->trace_end = trace_end;
808  	tse->non_call = false;
809  	tse->db_id = 0;
810  
811  	return 0;
812  }
813  
thread_stack__pop_cp(struct thread * thread,struct thread_stack * ts,u64 ret_addr,u64 timestamp,u64 ref,struct symbol * sym)814  static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
815  				u64 ret_addr, u64 timestamp, u64 ref,
816  				struct symbol *sym)
817  {
818  	int err;
819  
820  	if (!ts->cnt)
821  		return 1;
822  
823  	if (ts->cnt == 1) {
824  		struct thread_stack_entry *tse = &ts->stack[0];
825  
826  		if (tse->cp->sym == sym)
827  			return thread_stack__call_return(thread, ts, --ts->cnt,
828  							 timestamp, ref, false);
829  	}
830  
831  	if (ts->stack[ts->cnt - 1].ret_addr == ret_addr &&
832  	    !ts->stack[ts->cnt - 1].non_call) {
833  		return thread_stack__call_return(thread, ts, --ts->cnt,
834  						 timestamp, ref, false);
835  	} else {
836  		size_t i = ts->cnt - 1;
837  
838  		while (i--) {
839  			if (ts->stack[i].ret_addr != ret_addr ||
840  			    ts->stack[i].non_call)
841  				continue;
842  			i += 1;
843  			while (ts->cnt > i) {
844  				err = thread_stack__call_return(thread, ts,
845  								--ts->cnt,
846  								timestamp, ref,
847  								true);
848  				if (err)
849  					return err;
850  			}
851  			return thread_stack__call_return(thread, ts, --ts->cnt,
852  							 timestamp, ref, false);
853  		}
854  	}
855  
856  	return 1;
857  }
858  
thread_stack__bottom(struct thread_stack * ts,struct perf_sample * sample,struct addr_location * from_al,struct addr_location * to_al,u64 ref)859  static int thread_stack__bottom(struct thread_stack *ts,
860  				struct perf_sample *sample,
861  				struct addr_location *from_al,
862  				struct addr_location *to_al, u64 ref)
863  {
864  	struct call_path_root *cpr = ts->crp->cpr;
865  	struct call_path *cp;
866  	struct symbol *sym;
867  	u64 ip;
868  
869  	if (sample->ip) {
870  		ip = sample->ip;
871  		sym = from_al->sym;
872  	} else if (sample->addr) {
873  		ip = sample->addr;
874  		sym = to_al->sym;
875  	} else {
876  		return 0;
877  	}
878  
879  	cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
880  				ts->kernel_start);
881  
882  	return thread_stack__push_cp(ts, ip, sample->time, ref, cp,
883  				     true, false);
884  }
885  
thread_stack__pop_ks(struct thread * thread,struct thread_stack * ts,struct perf_sample * sample,u64 ref)886  static int thread_stack__pop_ks(struct thread *thread, struct thread_stack *ts,
887  				struct perf_sample *sample, u64 ref)
888  {
889  	u64 tm = sample->time;
890  	int err;
891  
892  	/* Return to userspace, so pop all kernel addresses */
893  	while (thread_stack__in_kernel(ts)) {
894  		err = thread_stack__call_return(thread, ts, --ts->cnt,
895  						tm, ref, true);
896  		if (err)
897  			return err;
898  	}
899  
900  	return 0;
901  }
902  
thread_stack__no_call_return(struct thread * thread,struct thread_stack * ts,struct perf_sample * sample,struct addr_location * from_al,struct addr_location * to_al,u64 ref)903  static int thread_stack__no_call_return(struct thread *thread,
904  					struct thread_stack *ts,
905  					struct perf_sample *sample,
906  					struct addr_location *from_al,
907  					struct addr_location *to_al, u64 ref)
908  {
909  	struct call_path_root *cpr = ts->crp->cpr;
910  	struct call_path *root = &cpr->call_path;
911  	struct symbol *fsym = from_al->sym;
912  	struct symbol *tsym = to_al->sym;
913  	struct call_path *cp, *parent;
914  	u64 ks = ts->kernel_start;
915  	u64 addr = sample->addr;
916  	u64 tm = sample->time;
917  	u64 ip = sample->ip;
918  	int err;
919  
920  	if (ip >= ks && addr < ks) {
921  		/* Return to userspace, so pop all kernel addresses */
922  		err = thread_stack__pop_ks(thread, ts, sample, ref);
923  		if (err)
924  			return err;
925  
926  		/* If the stack is empty, push the userspace address */
927  		if (!ts->cnt) {
928  			cp = call_path__findnew(cpr, root, tsym, addr, ks);
929  			return thread_stack__push_cp(ts, 0, tm, ref, cp, true,
930  						     false);
931  		}
932  	} else if (thread_stack__in_kernel(ts) && ip < ks) {
933  		/* Return to userspace, so pop all kernel addresses */
934  		err = thread_stack__pop_ks(thread, ts, sample, ref);
935  		if (err)
936  			return err;
937  	}
938  
939  	if (ts->cnt)
940  		parent = ts->stack[ts->cnt - 1].cp;
941  	else
942  		parent = root;
943  
944  	if (parent->sym == from_al->sym) {
945  		/*
946  		 * At the bottom of the stack, assume the missing 'call' was
947  		 * before the trace started. So, pop the current symbol and push
948  		 * the 'to' symbol.
949  		 */
950  		if (ts->cnt == 1) {
951  			err = thread_stack__call_return(thread, ts, --ts->cnt,
952  							tm, ref, false);
953  			if (err)
954  				return err;
955  		}
956  
957  		if (!ts->cnt) {
958  			cp = call_path__findnew(cpr, root, tsym, addr, ks);
959  
960  			return thread_stack__push_cp(ts, addr, tm, ref, cp,
961  						     true, false);
962  		}
963  
964  		/*
965  		 * Otherwise assume the 'return' is being used as a jump (e.g.
966  		 * retpoline) and just push the 'to' symbol.
967  		 */
968  		cp = call_path__findnew(cpr, parent, tsym, addr, ks);
969  
970  		err = thread_stack__push_cp(ts, 0, tm, ref, cp, true, false);
971  		if (!err)
972  			ts->stack[ts->cnt - 1].non_call = true;
973  
974  		return err;
975  	}
976  
977  	/*
978  	 * Assume 'parent' has not yet returned, so push 'to', and then push and
979  	 * pop 'from'.
980  	 */
981  
982  	cp = call_path__findnew(cpr, parent, tsym, addr, ks);
983  
984  	err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false);
985  	if (err)
986  		return err;
987  
988  	cp = call_path__findnew(cpr, cp, fsym, ip, ks);
989  
990  	err = thread_stack__push_cp(ts, ip, tm, ref, cp, true, false);
991  	if (err)
992  		return err;
993  
994  	return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false);
995  }
996  
thread_stack__trace_begin(struct thread * thread,struct thread_stack * ts,u64 timestamp,u64 ref)997  static int thread_stack__trace_begin(struct thread *thread,
998  				     struct thread_stack *ts, u64 timestamp,
999  				     u64 ref)
1000  {
1001  	struct thread_stack_entry *tse;
1002  	int err;
1003  
1004  	if (!ts->cnt)
1005  		return 0;
1006  
1007  	/* Pop trace end */
1008  	tse = &ts->stack[ts->cnt - 1];
1009  	if (tse->trace_end) {
1010  		err = thread_stack__call_return(thread, ts, --ts->cnt,
1011  						timestamp, ref, false);
1012  		if (err)
1013  			return err;
1014  	}
1015  
1016  	return 0;
1017  }
1018  
thread_stack__trace_end(struct thread_stack * ts,struct perf_sample * sample,u64 ref)1019  static int thread_stack__trace_end(struct thread_stack *ts,
1020  				   struct perf_sample *sample, u64 ref)
1021  {
1022  	struct call_path_root *cpr = ts->crp->cpr;
1023  	struct call_path *cp;
1024  	u64 ret_addr;
1025  
1026  	/* No point having 'trace end' on the bottom of the stack */
1027  	if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
1028  		return 0;
1029  
1030  	cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
1031  				ts->kernel_start);
1032  
1033  	ret_addr = sample->ip + sample->insn_len;
1034  
1035  	return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
1036  				     false, true);
1037  }
1038  
is_x86_retpoline(const char * name)1039  static bool is_x86_retpoline(const char *name)
1040  {
1041  	return strstr(name, "__x86_indirect_thunk_") == name;
1042  }
1043  
1044  /*
1045   * x86 retpoline functions pollute the call graph. This function removes them.
1046   * This does not handle function return thunks, nor is there any improvement
1047   * for the handling of inline thunks or extern thunks.
1048   */
thread_stack__x86_retpoline(struct thread_stack * ts,struct perf_sample * sample,struct addr_location * to_al)1049  static int thread_stack__x86_retpoline(struct thread_stack *ts,
1050  				       struct perf_sample *sample,
1051  				       struct addr_location *to_al)
1052  {
1053  	struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1];
1054  	struct call_path_root *cpr = ts->crp->cpr;
1055  	struct symbol *sym = tse->cp->sym;
1056  	struct symbol *tsym = to_al->sym;
1057  	struct call_path *cp;
1058  
1059  	if (sym && is_x86_retpoline(sym->name)) {
1060  		/*
1061  		 * This is a x86 retpoline fn. It pollutes the call graph by
1062  		 * showing up everywhere there is an indirect branch, but does
1063  		 * not itself mean anything. Here the top-of-stack is removed,
1064  		 * by decrementing the stack count, and then further down, the
1065  		 * resulting top-of-stack is replaced with the actual target.
1066  		 * The result is that the retpoline functions will no longer
1067  		 * appear in the call graph. Note this only affects the call
1068  		 * graph, since all the original branches are left unchanged.
1069  		 */
1070  		ts->cnt -= 1;
1071  		sym = ts->stack[ts->cnt - 2].cp->sym;
1072  		if (sym && sym == tsym && to_al->addr != tsym->start) {
1073  			/*
1074  			 * Target is back to the middle of the symbol we came
1075  			 * from so assume it is an indirect jmp and forget it
1076  			 * altogether.
1077  			 */
1078  			ts->cnt -= 1;
1079  			return 0;
1080  		}
1081  	} else if (sym && sym == tsym) {
1082  		/*
1083  		 * Target is back to the symbol we came from so assume it is an
1084  		 * indirect jmp and forget it altogether.
1085  		 */
1086  		ts->cnt -= 1;
1087  		return 0;
1088  	}
1089  
1090  	cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym,
1091  				sample->addr, ts->kernel_start);
1092  	if (!cp)
1093  		return -ENOMEM;
1094  
1095  	/* Replace the top-of-stack with the actual target */
1096  	ts->stack[ts->cnt - 1].cp = cp;
1097  
1098  	return 0;
1099  }
1100  
thread_stack__process(struct thread * thread,struct comm * comm,struct perf_sample * sample,struct addr_location * from_al,struct addr_location * to_al,u64 ref,struct call_return_processor * crp)1101  int thread_stack__process(struct thread *thread, struct comm *comm,
1102  			  struct perf_sample *sample,
1103  			  struct addr_location *from_al,
1104  			  struct addr_location *to_al, u64 ref,
1105  			  struct call_return_processor *crp)
1106  {
1107  	struct thread_stack *ts = thread__stack(thread, sample->cpu);
1108  	enum retpoline_state_t rstate;
1109  	int err = 0;
1110  
1111  	if (ts && !ts->crp) {
1112  		/* Supersede thread_stack__event() */
1113  		thread_stack__reset(thread, ts);
1114  		ts = NULL;
1115  	}
1116  
1117  	if (!ts) {
1118  		ts = thread_stack__new(thread, sample->cpu, crp, true, 0);
1119  		if (!ts)
1120  			return -ENOMEM;
1121  		ts->comm = comm;
1122  	}
1123  
1124  	rstate = ts->rstate;
1125  	if (rstate == X86_RETPOLINE_DETECTED)
1126  		ts->rstate = X86_RETPOLINE_POSSIBLE;
1127  
1128  	/* Flush stack on exec */
1129  	if (ts->comm != comm && thread__pid(thread) == thread__tid(thread)) {
1130  		err = __thread_stack__flush(thread, ts);
1131  		if (err)
1132  			return err;
1133  		ts->comm = comm;
1134  	}
1135  
1136  	/* If the stack is empty, put the current symbol on the stack */
1137  	if (!ts->cnt) {
1138  		err = thread_stack__bottom(ts, sample, from_al, to_al, ref);
1139  		if (err)
1140  			return err;
1141  	}
1142  
1143  	ts->branch_count += 1;
1144  	ts->insn_count += sample->insn_cnt;
1145  	ts->cyc_count += sample->cyc_cnt;
1146  	ts->last_time = sample->time;
1147  
1148  	if (sample->flags & PERF_IP_FLAG_CALL) {
1149  		bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END;
1150  		struct call_path_root *cpr = ts->crp->cpr;
1151  		struct call_path *cp;
1152  		u64 ret_addr;
1153  
1154  		if (!sample->ip || !sample->addr)
1155  			return 0;
1156  
1157  		ret_addr = sample->ip + sample->insn_len;
1158  		if (ret_addr == sample->addr)
1159  			return 0; /* Zero-length calls are excluded */
1160  
1161  		cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
1162  					to_al->sym, sample->addr,
1163  					ts->kernel_start);
1164  		err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
1165  					    cp, false, trace_end);
1166  
1167  		/*
1168  		 * A call to the same symbol but not the start of the symbol,
1169  		 * may be the start of a x86 retpoline.
1170  		 */
1171  		if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym &&
1172  		    from_al->sym == to_al->sym &&
1173  		    to_al->addr != to_al->sym->start)
1174  			ts->rstate = X86_RETPOLINE_DETECTED;
1175  
1176  	} else if (sample->flags & PERF_IP_FLAG_RETURN) {
1177  		if (!sample->addr) {
1178  			u32 return_from_kernel = PERF_IP_FLAG_SYSCALLRET |
1179  						 PERF_IP_FLAG_INTERRUPT;
1180  
1181  			if (!(sample->flags & return_from_kernel))
1182  				return 0;
1183  
1184  			/* Pop kernel stack */
1185  			return thread_stack__pop_ks(thread, ts, sample, ref);
1186  		}
1187  
1188  		if (!sample->ip)
1189  			return 0;
1190  
1191  		/* x86 retpoline 'return' doesn't match the stack */
1192  		if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 &&
1193  		    ts->stack[ts->cnt - 1].ret_addr != sample->addr)
1194  			return thread_stack__x86_retpoline(ts, sample, to_al);
1195  
1196  		err = thread_stack__pop_cp(thread, ts, sample->addr,
1197  					   sample->time, ref, from_al->sym);
1198  		if (err) {
1199  			if (err < 0)
1200  				return err;
1201  			err = thread_stack__no_call_return(thread, ts, sample,
1202  							   from_al, to_al, ref);
1203  		}
1204  	} else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
1205  		err = thread_stack__trace_begin(thread, ts, sample->time, ref);
1206  	} else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
1207  		err = thread_stack__trace_end(ts, sample, ref);
1208  	} else if (sample->flags & PERF_IP_FLAG_BRANCH &&
1209  		   from_al->sym != to_al->sym && to_al->sym &&
1210  		   to_al->addr == to_al->sym->start) {
1211  		struct call_path_root *cpr = ts->crp->cpr;
1212  		struct call_path *cp;
1213  
1214  		/*
1215  		 * The compiler might optimize a call/ret combination by making
1216  		 * it a jmp. Make that visible by recording on the stack a
1217  		 * branch to the start of a different symbol. Note, that means
1218  		 * when a ret pops the stack, all jmps must be popped off first.
1219  		 */
1220  		cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
1221  					to_al->sym, sample->addr,
1222  					ts->kernel_start);
1223  		err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false,
1224  					    false);
1225  		if (!err)
1226  			ts->stack[ts->cnt - 1].non_call = true;
1227  	}
1228  
1229  	return err;
1230  }
1231  
thread_stack__depth(struct thread * thread,int cpu)1232  size_t thread_stack__depth(struct thread *thread, int cpu)
1233  {
1234  	struct thread_stack *ts = thread__stack(thread, cpu);
1235  
1236  	if (!ts)
1237  		return 0;
1238  	return ts->cnt;
1239  }
1240