1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * Performance events callchain code, extracted from core.c:
4   *
5   *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
6   *  Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
7   *  Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
8   *  Copyright  ©  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
9   */
10  
11  #include <linux/perf_event.h>
12  #include <linux/slab.h>
13  #include <linux/sched/task_stack.h>
14  #include <linux/uprobes.h>
15  
16  #include "internal.h"
17  
18  struct callchain_cpus_entries {
19  	struct rcu_head			rcu_head;
20  	struct perf_callchain_entry	*cpu_entries[];
21  };
22  
23  int sysctl_perf_event_max_stack __read_mostly = PERF_MAX_STACK_DEPTH;
24  int sysctl_perf_event_max_contexts_per_stack __read_mostly = PERF_MAX_CONTEXTS_PER_STACK;
25  
perf_callchain_entry__sizeof(void)26  static inline size_t perf_callchain_entry__sizeof(void)
27  {
28  	return (sizeof(struct perf_callchain_entry) +
29  		sizeof(__u64) * (sysctl_perf_event_max_stack +
30  				 sysctl_perf_event_max_contexts_per_stack));
31  }
32  
33  static DEFINE_PER_CPU(u8, callchain_recursion[PERF_NR_CONTEXTS]);
34  static atomic_t nr_callchain_events;
35  static DEFINE_MUTEX(callchain_mutex);
36  static struct callchain_cpus_entries *callchain_cpus_entries;
37  
38  
perf_callchain_kernel(struct perf_callchain_entry_ctx * entry,struct pt_regs * regs)39  __weak void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
40  				  struct pt_regs *regs)
41  {
42  }
43  
perf_callchain_user(struct perf_callchain_entry_ctx * entry,struct pt_regs * regs)44  __weak void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
45  				struct pt_regs *regs)
46  {
47  }
48  
release_callchain_buffers_rcu(struct rcu_head * head)49  static void release_callchain_buffers_rcu(struct rcu_head *head)
50  {
51  	struct callchain_cpus_entries *entries;
52  	int cpu;
53  
54  	entries = container_of(head, struct callchain_cpus_entries, rcu_head);
55  
56  	for_each_possible_cpu(cpu)
57  		kfree(entries->cpu_entries[cpu]);
58  
59  	kfree(entries);
60  }
61  
release_callchain_buffers(void)62  static void release_callchain_buffers(void)
63  {
64  	struct callchain_cpus_entries *entries;
65  
66  	entries = callchain_cpus_entries;
67  	RCU_INIT_POINTER(callchain_cpus_entries, NULL);
68  	call_rcu(&entries->rcu_head, release_callchain_buffers_rcu);
69  }
70  
alloc_callchain_buffers(void)71  static int alloc_callchain_buffers(void)
72  {
73  	int cpu;
74  	int size;
75  	struct callchain_cpus_entries *entries;
76  
77  	/*
78  	 * We can't use the percpu allocation API for data that can be
79  	 * accessed from NMI. Use a temporary manual per cpu allocation
80  	 * until that gets sorted out.
81  	 */
82  	size = offsetof(struct callchain_cpus_entries, cpu_entries[nr_cpu_ids]);
83  
84  	entries = kzalloc(size, GFP_KERNEL);
85  	if (!entries)
86  		return -ENOMEM;
87  
88  	size = perf_callchain_entry__sizeof() * PERF_NR_CONTEXTS;
89  
90  	for_each_possible_cpu(cpu) {
91  		entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
92  							 cpu_to_node(cpu));
93  		if (!entries->cpu_entries[cpu])
94  			goto fail;
95  	}
96  
97  	rcu_assign_pointer(callchain_cpus_entries, entries);
98  
99  	return 0;
100  
101  fail:
102  	for_each_possible_cpu(cpu)
103  		kfree(entries->cpu_entries[cpu]);
104  	kfree(entries);
105  
106  	return -ENOMEM;
107  }
108  
get_callchain_buffers(int event_max_stack)109  int get_callchain_buffers(int event_max_stack)
110  {
111  	int err = 0;
112  	int count;
113  
114  	mutex_lock(&callchain_mutex);
115  
116  	count = atomic_inc_return(&nr_callchain_events);
117  	if (WARN_ON_ONCE(count < 1)) {
118  		err = -EINVAL;
119  		goto exit;
120  	}
121  
122  	/*
123  	 * If requesting per event more than the global cap,
124  	 * return a different error to help userspace figure
125  	 * this out.
126  	 *
127  	 * And also do it here so that we have &callchain_mutex held.
128  	 */
129  	if (event_max_stack > sysctl_perf_event_max_stack) {
130  		err = -EOVERFLOW;
131  		goto exit;
132  	}
133  
134  	if (count == 1)
135  		err = alloc_callchain_buffers();
136  exit:
137  	if (err)
138  		atomic_dec(&nr_callchain_events);
139  
140  	mutex_unlock(&callchain_mutex);
141  
142  	return err;
143  }
144  
put_callchain_buffers(void)145  void put_callchain_buffers(void)
146  {
147  	if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) {
148  		release_callchain_buffers();
149  		mutex_unlock(&callchain_mutex);
150  	}
151  }
152  
get_callchain_entry(int * rctx)153  struct perf_callchain_entry *get_callchain_entry(int *rctx)
154  {
155  	int cpu;
156  	struct callchain_cpus_entries *entries;
157  
158  	*rctx = get_recursion_context(this_cpu_ptr(callchain_recursion));
159  	if (*rctx == -1)
160  		return NULL;
161  
162  	entries = rcu_dereference(callchain_cpus_entries);
163  	if (!entries) {
164  		put_recursion_context(this_cpu_ptr(callchain_recursion), *rctx);
165  		return NULL;
166  	}
167  
168  	cpu = smp_processor_id();
169  
170  	return (((void *)entries->cpu_entries[cpu]) +
171  		(*rctx * perf_callchain_entry__sizeof()));
172  }
173  
174  void
put_callchain_entry(int rctx)175  put_callchain_entry(int rctx)
176  {
177  	put_recursion_context(this_cpu_ptr(callchain_recursion), rctx);
178  }
179  
fixup_uretprobe_trampoline_entries(struct perf_callchain_entry * entry,int start_entry_idx)180  static void fixup_uretprobe_trampoline_entries(struct perf_callchain_entry *entry,
181  					       int start_entry_idx)
182  {
183  #ifdef CONFIG_UPROBES
184  	struct uprobe_task *utask = current->utask;
185  	struct return_instance *ri;
186  	__u64 *cur_ip, *last_ip, tramp_addr;
187  
188  	if (likely(!utask || !utask->return_instances))
189  		return;
190  
191  	cur_ip = &entry->ip[start_entry_idx];
192  	last_ip = &entry->ip[entry->nr - 1];
193  	ri = utask->return_instances;
194  	tramp_addr = uprobe_get_trampoline_vaddr();
195  
196  	/*
197  	 * If there are pending uretprobes for the current thread, they are
198  	 * recorded in a list inside utask->return_instances; each such
199  	 * pending uretprobe replaces traced user function's return address on
200  	 * the stack, so when stack trace is captured, instead of seeing
201  	 * actual function's return address, we'll have one or many uretprobe
202  	 * trampoline addresses in the stack trace, which are not helpful and
203  	 * misleading to users.
204  	 * So here we go over the pending list of uretprobes, and each
205  	 * encountered trampoline address is replaced with actual return
206  	 * address.
207  	 */
208  	while (ri && cur_ip <= last_ip) {
209  		if (*cur_ip == tramp_addr) {
210  			*cur_ip = ri->orig_ret_vaddr;
211  			ri = ri->next;
212  		}
213  		cur_ip++;
214  	}
215  #endif
216  }
217  
218  struct perf_callchain_entry *
get_perf_callchain(struct pt_regs * regs,u32 init_nr,bool kernel,bool user,u32 max_stack,bool crosstask,bool add_mark)219  get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
220  		   u32 max_stack, bool crosstask, bool add_mark)
221  {
222  	struct perf_callchain_entry *entry;
223  	struct perf_callchain_entry_ctx ctx;
224  	int rctx, start_entry_idx;
225  
226  	entry = get_callchain_entry(&rctx);
227  	if (!entry)
228  		return NULL;
229  
230  	ctx.entry     = entry;
231  	ctx.max_stack = max_stack;
232  	ctx.nr	      = entry->nr = init_nr;
233  	ctx.contexts       = 0;
234  	ctx.contexts_maxed = false;
235  
236  	if (kernel && !user_mode(regs)) {
237  		if (add_mark)
238  			perf_callchain_store_context(&ctx, PERF_CONTEXT_KERNEL);
239  		perf_callchain_kernel(&ctx, regs);
240  	}
241  
242  	if (user) {
243  		if (!user_mode(regs)) {
244  			if  (current->mm)
245  				regs = task_pt_regs(current);
246  			else
247  				regs = NULL;
248  		}
249  
250  		if (regs) {
251  			if (crosstask)
252  				goto exit_put;
253  
254  			if (add_mark)
255  				perf_callchain_store_context(&ctx, PERF_CONTEXT_USER);
256  
257  			start_entry_idx = entry->nr;
258  			perf_callchain_user(&ctx, regs);
259  			fixup_uretprobe_trampoline_entries(entry, start_entry_idx);
260  		}
261  	}
262  
263  exit_put:
264  	put_callchain_entry(rctx);
265  
266  	return entry;
267  }
268  
269  /*
270   * Used for sysctl_perf_event_max_stack and
271   * sysctl_perf_event_max_contexts_per_stack.
272   */
perf_event_max_stack_handler(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)273  int perf_event_max_stack_handler(const struct ctl_table *table, int write,
274  				 void *buffer, size_t *lenp, loff_t *ppos)
275  {
276  	int *value = table->data;
277  	int new_value = *value, ret;
278  	struct ctl_table new_table = *table;
279  
280  	new_table.data = &new_value;
281  	ret = proc_dointvec_minmax(&new_table, write, buffer, lenp, ppos);
282  	if (ret || !write)
283  		return ret;
284  
285  	mutex_lock(&callchain_mutex);
286  	if (atomic_read(&nr_callchain_events))
287  		ret = -EBUSY;
288  	else
289  		*value = new_value;
290  
291  	mutex_unlock(&callchain_mutex);
292  
293  	return ret;
294  }
295