1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * ring buffer based function tracer
4   *
5   * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6   * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7   *
8   * Originally taken from the RT patch by:
9   *    Arnaldo Carvalho de Melo <acme@redhat.com>
10   *
11   * Based on code from the latency_tracer, that is:
12   *  Copyright (C) 2004-2006 Ingo Molnar
13   *  Copyright (C) 2004 Nadia Yvette Chambers
14   */
15  #include <linux/ring_buffer.h>
16  #include <linux/utsname.h>
17  #include <linux/stacktrace.h>
18  #include <linux/writeback.h>
19  #include <linux/kallsyms.h>
20  #include <linux/security.h>
21  #include <linux/seq_file.h>
22  #include <linux/irqflags.h>
23  #include <linux/debugfs.h>
24  #include <linux/tracefs.h>
25  #include <linux/pagemap.h>
26  #include <linux/hardirq.h>
27  #include <linux/linkage.h>
28  #include <linux/uaccess.h>
29  #include <linux/vmalloc.h>
30  #include <linux/ftrace.h>
31  #include <linux/module.h>
32  #include <linux/percpu.h>
33  #include <linux/splice.h>
34  #include <linux/kdebug.h>
35  #include <linux/string.h>
36  #include <linux/mount.h>
37  #include <linux/rwsem.h>
38  #include <linux/slab.h>
39  #include <linux/ctype.h>
40  #include <linux/init.h>
41  #include <linux/panic_notifier.h>
42  #include <linux/poll.h>
43  #include <linux/nmi.h>
44  #include <linux/fs.h>
45  #include <linux/trace.h>
46  #include <linux/sched/clock.h>
47  #include <linux/sched/rt.h>
48  #include <linux/fsnotify.h>
49  #include <linux/irq_work.h>
50  #include <linux/workqueue.h>
51  
52  #include <asm/setup.h> /* COMMAND_LINE_SIZE */
53  
54  #include "trace.h"
55  #include "trace_output.h"
56  
57  #ifdef CONFIG_FTRACE_STARTUP_TEST
58  /*
59   * We need to change this state when a selftest is running.
60   * A selftest will lurk into the ring-buffer to count the
61   * entries inserted during the selftest although some concurrent
62   * insertions into the ring-buffer such as trace_printk could occurred
63   * at the same time, giving false positive or negative results.
64   */
65  static bool __read_mostly tracing_selftest_running;
66  
67  /*
68   * If boot-time tracing including tracers/events via kernel cmdline
69   * is running, we do not want to run SELFTEST.
70   */
71  bool __read_mostly tracing_selftest_disabled;
72  
disable_tracing_selftest(const char * reason)73  void __init disable_tracing_selftest(const char *reason)
74  {
75  	if (!tracing_selftest_disabled) {
76  		tracing_selftest_disabled = true;
77  		pr_info("Ftrace startup test is disabled due to %s\n", reason);
78  	}
79  }
80  #else
81  #define tracing_selftest_running	0
82  #define tracing_selftest_disabled	0
83  #endif
84  
85  /* Pipe tracepoints to printk */
86  static struct trace_iterator *tracepoint_print_iter;
87  int tracepoint_printk;
88  static bool tracepoint_printk_stop_on_boot __initdata;
89  static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90  
91  /* For tracers that don't implement custom flags */
92  static struct tracer_opt dummy_tracer_opt[] = {
93  	{ }
94  };
95  
96  static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)97  dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98  {
99  	return 0;
100  }
101  
102  /*
103   * To prevent the comm cache from being overwritten when no
104   * tracing is active, only save the comm when a trace event
105   * occurred.
106   */
107  DEFINE_PER_CPU(bool, trace_taskinfo_save);
108  
109  /*
110   * Kill all tracing for good (never come back).
111   * It is initialized to 1 but will turn to zero if the initialization
112   * of the tracer is successful. But that is the only place that sets
113   * this back to zero.
114   */
115  static int tracing_disabled = 1;
116  
117  cpumask_var_t __read_mostly	tracing_buffer_mask;
118  
119  /*
120   * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121   *
122   * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123   * is set, then ftrace_dump is called. This will output the contents
124   * of the ftrace buffers to the console.  This is very useful for
125   * capturing traces that lead to crashes and outputing it to a
126   * serial console.
127   *
128   * It is default off, but you can enable it with either specifying
129   * "ftrace_dump_on_oops" in the kernel command line, or setting
130   * /proc/sys/kernel/ftrace_dump_on_oops
131   * Set 1 if you want to dump buffers of all CPUs
132   * Set 2 if you want to dump the buffer of the CPU that triggered oops
133   * Set instance name if you want to dump the specific trace instance
134   * Multiple instance dump is also supported, and instances are seperated
135   * by commas.
136   */
137  /* Set to string format zero to disable by default */
138  char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
139  
140  /* When set, tracing will stop when a WARN*() is hit */
141  int __disable_trace_on_warning;
142  
143  #ifdef CONFIG_TRACE_EVAL_MAP_FILE
144  /* Map of enums to their values, for "eval_map" file */
145  struct trace_eval_map_head {
146  	struct module			*mod;
147  	unsigned long			length;
148  };
149  
150  union trace_eval_map_item;
151  
152  struct trace_eval_map_tail {
153  	/*
154  	 * "end" is first and points to NULL as it must be different
155  	 * than "mod" or "eval_string"
156  	 */
157  	union trace_eval_map_item	*next;
158  	const char			*end;	/* points to NULL */
159  };
160  
161  static DEFINE_MUTEX(trace_eval_mutex);
162  
163  /*
164   * The trace_eval_maps are saved in an array with two extra elements,
165   * one at the beginning, and one at the end. The beginning item contains
166   * the count of the saved maps (head.length), and the module they
167   * belong to if not built in (head.mod). The ending item contains a
168   * pointer to the next array of saved eval_map items.
169   */
170  union trace_eval_map_item {
171  	struct trace_eval_map		map;
172  	struct trace_eval_map_head	head;
173  	struct trace_eval_map_tail	tail;
174  };
175  
176  static union trace_eval_map_item *trace_eval_maps;
177  #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
178  
179  int tracing_set_tracer(struct trace_array *tr, const char *buf);
180  static void ftrace_trace_userstack(struct trace_array *tr,
181  				   struct trace_buffer *buffer,
182  				   unsigned int trace_ctx);
183  
184  static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185  static char *default_bootup_tracer;
186  
187  static bool allocate_snapshot;
188  static bool snapshot_at_boot;
189  
190  static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
191  static int boot_instance_index;
192  
193  static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
194  static int boot_snapshot_index;
195  
set_cmdline_ftrace(char * str)196  static int __init set_cmdline_ftrace(char *str)
197  {
198  	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
199  	default_bootup_tracer = bootup_tracer_buf;
200  	/* We are using ftrace early, expand it */
201  	trace_set_ring_buffer_expanded(NULL);
202  	return 1;
203  }
204  __setup("ftrace=", set_cmdline_ftrace);
205  
ftrace_dump_on_oops_enabled(void)206  int ftrace_dump_on_oops_enabled(void)
207  {
208  	if (!strcmp("0", ftrace_dump_on_oops))
209  		return 0;
210  	else
211  		return 1;
212  }
213  
set_ftrace_dump_on_oops(char * str)214  static int __init set_ftrace_dump_on_oops(char *str)
215  {
216  	if (!*str) {
217  		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
218  		return 1;
219  	}
220  
221  	if (*str == ',') {
222  		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
223  		strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
224  		return 1;
225  	}
226  
227  	if (*str++ == '=') {
228  		strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
229  		return 1;
230  	}
231  
232  	return 0;
233  }
234  __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
235  
stop_trace_on_warning(char * str)236  static int __init stop_trace_on_warning(char *str)
237  {
238  	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
239  		__disable_trace_on_warning = 1;
240  	return 1;
241  }
242  __setup("traceoff_on_warning", stop_trace_on_warning);
243  
boot_alloc_snapshot(char * str)244  static int __init boot_alloc_snapshot(char *str)
245  {
246  	char *slot = boot_snapshot_info + boot_snapshot_index;
247  	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
248  	int ret;
249  
250  	if (str[0] == '=') {
251  		str++;
252  		if (strlen(str) >= left)
253  			return -1;
254  
255  		ret = snprintf(slot, left, "%s\t", str);
256  		boot_snapshot_index += ret;
257  	} else {
258  		allocate_snapshot = true;
259  		/* We also need the main ring buffer expanded */
260  		trace_set_ring_buffer_expanded(NULL);
261  	}
262  	return 1;
263  }
264  __setup("alloc_snapshot", boot_alloc_snapshot);
265  
266  
boot_snapshot(char * str)267  static int __init boot_snapshot(char *str)
268  {
269  	snapshot_at_boot = true;
270  	boot_alloc_snapshot(str);
271  	return 1;
272  }
273  __setup("ftrace_boot_snapshot", boot_snapshot);
274  
275  
boot_instance(char * str)276  static int __init boot_instance(char *str)
277  {
278  	char *slot = boot_instance_info + boot_instance_index;
279  	int left = sizeof(boot_instance_info) - boot_instance_index;
280  	int ret;
281  
282  	if (strlen(str) >= left)
283  		return -1;
284  
285  	ret = snprintf(slot, left, "%s\t", str);
286  	boot_instance_index += ret;
287  
288  	return 1;
289  }
290  __setup("trace_instance=", boot_instance);
291  
292  
293  static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
294  
set_trace_boot_options(char * str)295  static int __init set_trace_boot_options(char *str)
296  {
297  	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
298  	return 1;
299  }
300  __setup("trace_options=", set_trace_boot_options);
301  
302  static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
303  static char *trace_boot_clock __initdata;
304  
set_trace_boot_clock(char * str)305  static int __init set_trace_boot_clock(char *str)
306  {
307  	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
308  	trace_boot_clock = trace_boot_clock_buf;
309  	return 1;
310  }
311  __setup("trace_clock=", set_trace_boot_clock);
312  
set_tracepoint_printk(char * str)313  static int __init set_tracepoint_printk(char *str)
314  {
315  	/* Ignore the "tp_printk_stop_on_boot" param */
316  	if (*str == '_')
317  		return 0;
318  
319  	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
320  		tracepoint_printk = 1;
321  	return 1;
322  }
323  __setup("tp_printk", set_tracepoint_printk);
324  
set_tracepoint_printk_stop(char * str)325  static int __init set_tracepoint_printk_stop(char *str)
326  {
327  	tracepoint_printk_stop_on_boot = true;
328  	return 1;
329  }
330  __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
331  
ns2usecs(u64 nsec)332  unsigned long long ns2usecs(u64 nsec)
333  {
334  	nsec += 500;
335  	do_div(nsec, 1000);
336  	return nsec;
337  }
338  
339  static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)340  trace_process_export(struct trace_export *export,
341  	       struct ring_buffer_event *event, int flag)
342  {
343  	struct trace_entry *entry;
344  	unsigned int size = 0;
345  
346  	if (export->flags & flag) {
347  		entry = ring_buffer_event_data(event);
348  		size = ring_buffer_event_length(event);
349  		export->write(export, entry, size);
350  	}
351  }
352  
353  static DEFINE_MUTEX(ftrace_export_lock);
354  
355  static struct trace_export __rcu *ftrace_exports_list __read_mostly;
356  
357  static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
358  static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
359  static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
360  
ftrace_exports_enable(struct trace_export * export)361  static inline void ftrace_exports_enable(struct trace_export *export)
362  {
363  	if (export->flags & TRACE_EXPORT_FUNCTION)
364  		static_branch_inc(&trace_function_exports_enabled);
365  
366  	if (export->flags & TRACE_EXPORT_EVENT)
367  		static_branch_inc(&trace_event_exports_enabled);
368  
369  	if (export->flags & TRACE_EXPORT_MARKER)
370  		static_branch_inc(&trace_marker_exports_enabled);
371  }
372  
ftrace_exports_disable(struct trace_export * export)373  static inline void ftrace_exports_disable(struct trace_export *export)
374  {
375  	if (export->flags & TRACE_EXPORT_FUNCTION)
376  		static_branch_dec(&trace_function_exports_enabled);
377  
378  	if (export->flags & TRACE_EXPORT_EVENT)
379  		static_branch_dec(&trace_event_exports_enabled);
380  
381  	if (export->flags & TRACE_EXPORT_MARKER)
382  		static_branch_dec(&trace_marker_exports_enabled);
383  }
384  
ftrace_exports(struct ring_buffer_event * event,int flag)385  static void ftrace_exports(struct ring_buffer_event *event, int flag)
386  {
387  	struct trace_export *export;
388  
389  	preempt_disable_notrace();
390  
391  	export = rcu_dereference_raw_check(ftrace_exports_list);
392  	while (export) {
393  		trace_process_export(export, event, flag);
394  		export = rcu_dereference_raw_check(export->next);
395  	}
396  
397  	preempt_enable_notrace();
398  }
399  
400  static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)401  add_trace_export(struct trace_export **list, struct trace_export *export)
402  {
403  	rcu_assign_pointer(export->next, *list);
404  	/*
405  	 * We are entering export into the list but another
406  	 * CPU might be walking that list. We need to make sure
407  	 * the export->next pointer is valid before another CPU sees
408  	 * the export pointer included into the list.
409  	 */
410  	rcu_assign_pointer(*list, export);
411  }
412  
413  static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)414  rm_trace_export(struct trace_export **list, struct trace_export *export)
415  {
416  	struct trace_export **p;
417  
418  	for (p = list; *p != NULL; p = &(*p)->next)
419  		if (*p == export)
420  			break;
421  
422  	if (*p != export)
423  		return -1;
424  
425  	rcu_assign_pointer(*p, (*p)->next);
426  
427  	return 0;
428  }
429  
430  static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)431  add_ftrace_export(struct trace_export **list, struct trace_export *export)
432  {
433  	ftrace_exports_enable(export);
434  
435  	add_trace_export(list, export);
436  }
437  
438  static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)439  rm_ftrace_export(struct trace_export **list, struct trace_export *export)
440  {
441  	int ret;
442  
443  	ret = rm_trace_export(list, export);
444  	ftrace_exports_disable(export);
445  
446  	return ret;
447  }
448  
register_ftrace_export(struct trace_export * export)449  int register_ftrace_export(struct trace_export *export)
450  {
451  	if (WARN_ON_ONCE(!export->write))
452  		return -1;
453  
454  	mutex_lock(&ftrace_export_lock);
455  
456  	add_ftrace_export(&ftrace_exports_list, export);
457  
458  	mutex_unlock(&ftrace_export_lock);
459  
460  	return 0;
461  }
462  EXPORT_SYMBOL_GPL(register_ftrace_export);
463  
unregister_ftrace_export(struct trace_export * export)464  int unregister_ftrace_export(struct trace_export *export)
465  {
466  	int ret;
467  
468  	mutex_lock(&ftrace_export_lock);
469  
470  	ret = rm_ftrace_export(&ftrace_exports_list, export);
471  
472  	mutex_unlock(&ftrace_export_lock);
473  
474  	return ret;
475  }
476  EXPORT_SYMBOL_GPL(unregister_ftrace_export);
477  
478  /* trace_flags holds trace_options default values */
479  #define TRACE_DEFAULT_FLAGS						\
480  	(FUNCTION_DEFAULT_FLAGS |					\
481  	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
482  	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
483  	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
484  	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
485  	 TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK)
486  
487  /* trace_options that are only supported by global_trace */
488  #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
489  	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
490  
491  /* trace_flags that are default zero for instances */
492  #define ZEROED_TRACE_FLAGS \
493  	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK)
494  
495  /*
496   * The global_trace is the descriptor that holds the top-level tracing
497   * buffers for the live tracing.
498   */
499  static struct trace_array global_trace = {
500  	.trace_flags = TRACE_DEFAULT_FLAGS,
501  };
502  
503  static struct trace_array *printk_trace = &global_trace;
504  
printk_binsafe(struct trace_array * tr)505  static __always_inline bool printk_binsafe(struct trace_array *tr)
506  {
507  	/*
508  	 * The binary format of traceprintk can cause a crash if used
509  	 * by a buffer from another boot. Force the use of the
510  	 * non binary version of trace_printk if the trace_printk
511  	 * buffer is a boot mapped ring buffer.
512  	 */
513  	return !(tr->flags & TRACE_ARRAY_FL_BOOT);
514  }
515  
update_printk_trace(struct trace_array * tr)516  static void update_printk_trace(struct trace_array *tr)
517  {
518  	if (printk_trace == tr)
519  		return;
520  
521  	printk_trace->trace_flags &= ~TRACE_ITER_TRACE_PRINTK;
522  	printk_trace = tr;
523  	tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
524  }
525  
trace_set_ring_buffer_expanded(struct trace_array * tr)526  void trace_set_ring_buffer_expanded(struct trace_array *tr)
527  {
528  	if (!tr)
529  		tr = &global_trace;
530  	tr->ring_buffer_expanded = true;
531  }
532  
533  LIST_HEAD(ftrace_trace_arrays);
534  
trace_array_get(struct trace_array * this_tr)535  int trace_array_get(struct trace_array *this_tr)
536  {
537  	struct trace_array *tr;
538  	int ret = -ENODEV;
539  
540  	mutex_lock(&trace_types_lock);
541  	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
542  		if (tr == this_tr) {
543  			tr->ref++;
544  			ret = 0;
545  			break;
546  		}
547  	}
548  	mutex_unlock(&trace_types_lock);
549  
550  	return ret;
551  }
552  
__trace_array_put(struct trace_array * this_tr)553  static void __trace_array_put(struct trace_array *this_tr)
554  {
555  	WARN_ON(!this_tr->ref);
556  	this_tr->ref--;
557  }
558  
559  /**
560   * trace_array_put - Decrement the reference counter for this trace array.
561   * @this_tr : pointer to the trace array
562   *
563   * NOTE: Use this when we no longer need the trace array returned by
564   * trace_array_get_by_name(). This ensures the trace array can be later
565   * destroyed.
566   *
567   */
trace_array_put(struct trace_array * this_tr)568  void trace_array_put(struct trace_array *this_tr)
569  {
570  	if (!this_tr)
571  		return;
572  
573  	mutex_lock(&trace_types_lock);
574  	__trace_array_put(this_tr);
575  	mutex_unlock(&trace_types_lock);
576  }
577  EXPORT_SYMBOL_GPL(trace_array_put);
578  
tracing_check_open_get_tr(struct trace_array * tr)579  int tracing_check_open_get_tr(struct trace_array *tr)
580  {
581  	int ret;
582  
583  	ret = security_locked_down(LOCKDOWN_TRACEFS);
584  	if (ret)
585  		return ret;
586  
587  	if (tracing_disabled)
588  		return -ENODEV;
589  
590  	if (tr && trace_array_get(tr) < 0)
591  		return -ENODEV;
592  
593  	return 0;
594  }
595  
call_filter_check_discard(struct trace_event_call * call,void * rec,struct trace_buffer * buffer,struct ring_buffer_event * event)596  int call_filter_check_discard(struct trace_event_call *call, void *rec,
597  			      struct trace_buffer *buffer,
598  			      struct ring_buffer_event *event)
599  {
600  	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
601  	    !filter_match_preds(call->filter, rec)) {
602  		__trace_event_discard_commit(buffer, event);
603  		return 1;
604  	}
605  
606  	return 0;
607  }
608  
609  /**
610   * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
611   * @filtered_pids: The list of pids to check
612   * @search_pid: The PID to find in @filtered_pids
613   *
614   * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
615   */
616  bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)617  trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
618  {
619  	return trace_pid_list_is_set(filtered_pids, search_pid);
620  }
621  
622  /**
623   * trace_ignore_this_task - should a task be ignored for tracing
624   * @filtered_pids: The list of pids to check
625   * @filtered_no_pids: The list of pids not to be traced
626   * @task: The task that should be ignored if not filtered
627   *
628   * Checks if @task should be traced or not from @filtered_pids.
629   * Returns true if @task should *NOT* be traced.
630   * Returns false if @task should be traced.
631   */
632  bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)633  trace_ignore_this_task(struct trace_pid_list *filtered_pids,
634  		       struct trace_pid_list *filtered_no_pids,
635  		       struct task_struct *task)
636  {
637  	/*
638  	 * If filtered_no_pids is not empty, and the task's pid is listed
639  	 * in filtered_no_pids, then return true.
640  	 * Otherwise, if filtered_pids is empty, that means we can
641  	 * trace all tasks. If it has content, then only trace pids
642  	 * within filtered_pids.
643  	 */
644  
645  	return (filtered_pids &&
646  		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
647  		(filtered_no_pids &&
648  		 trace_find_filtered_pid(filtered_no_pids, task->pid));
649  }
650  
651  /**
652   * trace_filter_add_remove_task - Add or remove a task from a pid_list
653   * @pid_list: The list to modify
654   * @self: The current task for fork or NULL for exit
655   * @task: The task to add or remove
656   *
657   * If adding a task, if @self is defined, the task is only added if @self
658   * is also included in @pid_list. This happens on fork and tasks should
659   * only be added when the parent is listed. If @self is NULL, then the
660   * @task pid will be removed from the list, which would happen on exit
661   * of a task.
662   */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)663  void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
664  				  struct task_struct *self,
665  				  struct task_struct *task)
666  {
667  	if (!pid_list)
668  		return;
669  
670  	/* For forks, we only add if the forking task is listed */
671  	if (self) {
672  		if (!trace_find_filtered_pid(pid_list, self->pid))
673  			return;
674  	}
675  
676  	/* "self" is set for forks, and NULL for exits */
677  	if (self)
678  		trace_pid_list_set(pid_list, task->pid);
679  	else
680  		trace_pid_list_clear(pid_list, task->pid);
681  }
682  
683  /**
684   * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
685   * @pid_list: The pid list to show
686   * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
687   * @pos: The position of the file
688   *
689   * This is used by the seq_file "next" operation to iterate the pids
690   * listed in a trace_pid_list structure.
691   *
692   * Returns the pid+1 as we want to display pid of zero, but NULL would
693   * stop the iteration.
694   */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)695  void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
696  {
697  	long pid = (unsigned long)v;
698  	unsigned int next;
699  
700  	(*pos)++;
701  
702  	/* pid already is +1 of the actual previous bit */
703  	if (trace_pid_list_next(pid_list, pid, &next) < 0)
704  		return NULL;
705  
706  	pid = next;
707  
708  	/* Return pid + 1 to allow zero to be represented */
709  	return (void *)(pid + 1);
710  }
711  
712  /**
713   * trace_pid_start - Used for seq_file to start reading pid lists
714   * @pid_list: The pid list to show
715   * @pos: The position of the file
716   *
717   * This is used by seq_file "start" operation to start the iteration
718   * of listing pids.
719   *
720   * Returns the pid+1 as we want to display pid of zero, but NULL would
721   * stop the iteration.
722   */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)723  void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
724  {
725  	unsigned long pid;
726  	unsigned int first;
727  	loff_t l = 0;
728  
729  	if (trace_pid_list_first(pid_list, &first) < 0)
730  		return NULL;
731  
732  	pid = first;
733  
734  	/* Return pid + 1 so that zero can be the exit value */
735  	for (pid++; pid && l < *pos;
736  	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
737  		;
738  	return (void *)pid;
739  }
740  
741  /**
742   * trace_pid_show - show the current pid in seq_file processing
743   * @m: The seq_file structure to write into
744   * @v: A void pointer of the pid (+1) value to display
745   *
746   * Can be directly used by seq_file operations to display the current
747   * pid value.
748   */
trace_pid_show(struct seq_file * m,void * v)749  int trace_pid_show(struct seq_file *m, void *v)
750  {
751  	unsigned long pid = (unsigned long)v - 1;
752  
753  	seq_printf(m, "%lu\n", pid);
754  	return 0;
755  }
756  
757  /* 128 should be much more than enough */
758  #define PID_BUF_SIZE		127
759  
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)760  int trace_pid_write(struct trace_pid_list *filtered_pids,
761  		    struct trace_pid_list **new_pid_list,
762  		    const char __user *ubuf, size_t cnt)
763  {
764  	struct trace_pid_list *pid_list;
765  	struct trace_parser parser;
766  	unsigned long val;
767  	int nr_pids = 0;
768  	ssize_t read = 0;
769  	ssize_t ret;
770  	loff_t pos;
771  	pid_t pid;
772  
773  	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
774  		return -ENOMEM;
775  
776  	/*
777  	 * Always recreate a new array. The write is an all or nothing
778  	 * operation. Always create a new array when adding new pids by
779  	 * the user. If the operation fails, then the current list is
780  	 * not modified.
781  	 */
782  	pid_list = trace_pid_list_alloc();
783  	if (!pid_list) {
784  		trace_parser_put(&parser);
785  		return -ENOMEM;
786  	}
787  
788  	if (filtered_pids) {
789  		/* copy the current bits to the new max */
790  		ret = trace_pid_list_first(filtered_pids, &pid);
791  		while (!ret) {
792  			trace_pid_list_set(pid_list, pid);
793  			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
794  			nr_pids++;
795  		}
796  	}
797  
798  	ret = 0;
799  	while (cnt > 0) {
800  
801  		pos = 0;
802  
803  		ret = trace_get_user(&parser, ubuf, cnt, &pos);
804  		if (ret < 0)
805  			break;
806  
807  		read += ret;
808  		ubuf += ret;
809  		cnt -= ret;
810  
811  		if (!trace_parser_loaded(&parser))
812  			break;
813  
814  		ret = -EINVAL;
815  		if (kstrtoul(parser.buffer, 0, &val))
816  			break;
817  
818  		pid = (pid_t)val;
819  
820  		if (trace_pid_list_set(pid_list, pid) < 0) {
821  			ret = -1;
822  			break;
823  		}
824  		nr_pids++;
825  
826  		trace_parser_clear(&parser);
827  		ret = 0;
828  	}
829  	trace_parser_put(&parser);
830  
831  	if (ret < 0) {
832  		trace_pid_list_free(pid_list);
833  		return ret;
834  	}
835  
836  	if (!nr_pids) {
837  		/* Cleared the list of pids */
838  		trace_pid_list_free(pid_list);
839  		pid_list = NULL;
840  	}
841  
842  	*new_pid_list = pid_list;
843  
844  	return read;
845  }
846  
buffer_ftrace_now(struct array_buffer * buf,int cpu)847  static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
848  {
849  	u64 ts;
850  
851  	/* Early boot up does not have a buffer yet */
852  	if (!buf->buffer)
853  		return trace_clock_local();
854  
855  	ts = ring_buffer_time_stamp(buf->buffer);
856  	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
857  
858  	return ts;
859  }
860  
ftrace_now(int cpu)861  u64 ftrace_now(int cpu)
862  {
863  	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
864  }
865  
866  /**
867   * tracing_is_enabled - Show if global_trace has been enabled
868   *
869   * Shows if the global trace has been enabled or not. It uses the
870   * mirror flag "buffer_disabled" to be used in fast paths such as for
871   * the irqsoff tracer. But it may be inaccurate due to races. If you
872   * need to know the accurate state, use tracing_is_on() which is a little
873   * slower, but accurate.
874   */
tracing_is_enabled(void)875  int tracing_is_enabled(void)
876  {
877  	/*
878  	 * For quick access (irqsoff uses this in fast path), just
879  	 * return the mirror variable of the state of the ring buffer.
880  	 * It's a little racy, but we don't really care.
881  	 */
882  	smp_rmb();
883  	return !global_trace.buffer_disabled;
884  }
885  
886  /*
887   * trace_buf_size is the size in bytes that is allocated
888   * for a buffer. Note, the number of bytes is always rounded
889   * to page size.
890   *
891   * This number is purposely set to a low number of 16384.
892   * If the dump on oops happens, it will be much appreciated
893   * to not have to wait for all that output. Anyway this can be
894   * boot time and run time configurable.
895   */
896  #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
897  
898  static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
899  
900  /* trace_types holds a link list of available tracers. */
901  static struct tracer		*trace_types __read_mostly;
902  
903  /*
904   * trace_types_lock is used to protect the trace_types list.
905   */
906  DEFINE_MUTEX(trace_types_lock);
907  
908  /*
909   * serialize the access of the ring buffer
910   *
911   * ring buffer serializes readers, but it is low level protection.
912   * The validity of the events (which returns by ring_buffer_peek() ..etc)
913   * are not protected by ring buffer.
914   *
915   * The content of events may become garbage if we allow other process consumes
916   * these events concurrently:
917   *   A) the page of the consumed events may become a normal page
918   *      (not reader page) in ring buffer, and this page will be rewritten
919   *      by events producer.
920   *   B) The page of the consumed events may become a page for splice_read,
921   *      and this page will be returned to system.
922   *
923   * These primitives allow multi process access to different cpu ring buffer
924   * concurrently.
925   *
926   * These primitives don't distinguish read-only and read-consume access.
927   * Multi read-only access are also serialized.
928   */
929  
930  #ifdef CONFIG_SMP
931  static DECLARE_RWSEM(all_cpu_access_lock);
932  static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
933  
trace_access_lock(int cpu)934  static inline void trace_access_lock(int cpu)
935  {
936  	if (cpu == RING_BUFFER_ALL_CPUS) {
937  		/* gain it for accessing the whole ring buffer. */
938  		down_write(&all_cpu_access_lock);
939  	} else {
940  		/* gain it for accessing a cpu ring buffer. */
941  
942  		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
943  		down_read(&all_cpu_access_lock);
944  
945  		/* Secondly block other access to this @cpu ring buffer. */
946  		mutex_lock(&per_cpu(cpu_access_lock, cpu));
947  	}
948  }
949  
trace_access_unlock(int cpu)950  static inline void trace_access_unlock(int cpu)
951  {
952  	if (cpu == RING_BUFFER_ALL_CPUS) {
953  		up_write(&all_cpu_access_lock);
954  	} else {
955  		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
956  		up_read(&all_cpu_access_lock);
957  	}
958  }
959  
trace_access_lock_init(void)960  static inline void trace_access_lock_init(void)
961  {
962  	int cpu;
963  
964  	for_each_possible_cpu(cpu)
965  		mutex_init(&per_cpu(cpu_access_lock, cpu));
966  }
967  
968  #else
969  
970  static DEFINE_MUTEX(access_lock);
971  
trace_access_lock(int cpu)972  static inline void trace_access_lock(int cpu)
973  {
974  	(void)cpu;
975  	mutex_lock(&access_lock);
976  }
977  
trace_access_unlock(int cpu)978  static inline void trace_access_unlock(int cpu)
979  {
980  	(void)cpu;
981  	mutex_unlock(&access_lock);
982  }
983  
trace_access_lock_init(void)984  static inline void trace_access_lock_init(void)
985  {
986  }
987  
988  #endif
989  
990  #ifdef CONFIG_STACKTRACE
991  static void __ftrace_trace_stack(struct trace_buffer *buffer,
992  				 unsigned int trace_ctx,
993  				 int skip, struct pt_regs *regs);
994  static inline void ftrace_trace_stack(struct trace_array *tr,
995  				      struct trace_buffer *buffer,
996  				      unsigned int trace_ctx,
997  				      int skip, struct pt_regs *regs);
998  
999  #else
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)1000  static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
1001  					unsigned int trace_ctx,
1002  					int skip, struct pt_regs *regs)
1003  {
1004  }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long trace_ctx,int skip,struct pt_regs * regs)1005  static inline void ftrace_trace_stack(struct trace_array *tr,
1006  				      struct trace_buffer *buffer,
1007  				      unsigned long trace_ctx,
1008  				      int skip, struct pt_regs *regs)
1009  {
1010  }
1011  
1012  #endif
1013  
1014  static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned int trace_ctx)1015  trace_event_setup(struct ring_buffer_event *event,
1016  		  int type, unsigned int trace_ctx)
1017  {
1018  	struct trace_entry *ent = ring_buffer_event_data(event);
1019  
1020  	tracing_generic_entry_update(ent, type, trace_ctx);
1021  }
1022  
1023  static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)1024  __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1025  			  int type,
1026  			  unsigned long len,
1027  			  unsigned int trace_ctx)
1028  {
1029  	struct ring_buffer_event *event;
1030  
1031  	event = ring_buffer_lock_reserve(buffer, len);
1032  	if (event != NULL)
1033  		trace_event_setup(event, type, trace_ctx);
1034  
1035  	return event;
1036  }
1037  
tracer_tracing_on(struct trace_array * tr)1038  void tracer_tracing_on(struct trace_array *tr)
1039  {
1040  	if (tr->array_buffer.buffer)
1041  		ring_buffer_record_on(tr->array_buffer.buffer);
1042  	/*
1043  	 * This flag is looked at when buffers haven't been allocated
1044  	 * yet, or by some tracers (like irqsoff), that just want to
1045  	 * know if the ring buffer has been disabled, but it can handle
1046  	 * races of where it gets disabled but we still do a record.
1047  	 * As the check is in the fast path of the tracers, it is more
1048  	 * important to be fast than accurate.
1049  	 */
1050  	tr->buffer_disabled = 0;
1051  	/* Make the flag seen by readers */
1052  	smp_wmb();
1053  }
1054  
1055  /**
1056   * tracing_on - enable tracing buffers
1057   *
1058   * This function enables tracing buffers that may have been
1059   * disabled with tracing_off.
1060   */
tracing_on(void)1061  void tracing_on(void)
1062  {
1063  	tracer_tracing_on(&global_trace);
1064  }
1065  EXPORT_SYMBOL_GPL(tracing_on);
1066  
1067  
1068  static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)1069  __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1070  {
1071  	__this_cpu_write(trace_taskinfo_save, true);
1072  
1073  	/* If this is the temp buffer, we need to commit fully */
1074  	if (this_cpu_read(trace_buffered_event) == event) {
1075  		/* Length is in event->array[0] */
1076  		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1077  		/* Release the temp buffer */
1078  		this_cpu_dec(trace_buffered_event_cnt);
1079  		/* ring_buffer_unlock_commit() enables preemption */
1080  		preempt_enable_notrace();
1081  	} else
1082  		ring_buffer_unlock_commit(buffer);
1083  }
1084  
__trace_array_puts(struct trace_array * tr,unsigned long ip,const char * str,int size)1085  int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1086  		       const char *str, int size)
1087  {
1088  	struct ring_buffer_event *event;
1089  	struct trace_buffer *buffer;
1090  	struct print_entry *entry;
1091  	unsigned int trace_ctx;
1092  	int alloc;
1093  
1094  	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1095  		return 0;
1096  
1097  	if (unlikely(tracing_selftest_running && tr == &global_trace))
1098  		return 0;
1099  
1100  	if (unlikely(tracing_disabled))
1101  		return 0;
1102  
1103  	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1104  
1105  	trace_ctx = tracing_gen_ctx();
1106  	buffer = tr->array_buffer.buffer;
1107  	ring_buffer_nest_start(buffer);
1108  	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1109  					    trace_ctx);
1110  	if (!event) {
1111  		size = 0;
1112  		goto out;
1113  	}
1114  
1115  	entry = ring_buffer_event_data(event);
1116  	entry->ip = ip;
1117  
1118  	memcpy(&entry->buf, str, size);
1119  
1120  	/* Add a newline if necessary */
1121  	if (entry->buf[size - 1] != '\n') {
1122  		entry->buf[size] = '\n';
1123  		entry->buf[size + 1] = '\0';
1124  	} else
1125  		entry->buf[size] = '\0';
1126  
1127  	__buffer_unlock_commit(buffer, event);
1128  	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1129   out:
1130  	ring_buffer_nest_end(buffer);
1131  	return size;
1132  }
1133  EXPORT_SYMBOL_GPL(__trace_array_puts);
1134  
1135  /**
1136   * __trace_puts - write a constant string into the trace buffer.
1137   * @ip:	   The address of the caller
1138   * @str:   The constant string to write
1139   * @size:  The size of the string.
1140   */
__trace_puts(unsigned long ip,const char * str,int size)1141  int __trace_puts(unsigned long ip, const char *str, int size)
1142  {
1143  	return __trace_array_puts(printk_trace, ip, str, size);
1144  }
1145  EXPORT_SYMBOL_GPL(__trace_puts);
1146  
1147  /**
1148   * __trace_bputs - write the pointer to a constant string into trace buffer
1149   * @ip:	   The address of the caller
1150   * @str:   The constant string to write to the buffer to
1151   */
__trace_bputs(unsigned long ip,const char * str)1152  int __trace_bputs(unsigned long ip, const char *str)
1153  {
1154  	struct trace_array *tr = READ_ONCE(printk_trace);
1155  	struct ring_buffer_event *event;
1156  	struct trace_buffer *buffer;
1157  	struct bputs_entry *entry;
1158  	unsigned int trace_ctx;
1159  	int size = sizeof(struct bputs_entry);
1160  	int ret = 0;
1161  
1162  	if (!printk_binsafe(tr))
1163  		return __trace_puts(ip, str, strlen(str));
1164  
1165  	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1166  		return 0;
1167  
1168  	if (unlikely(tracing_selftest_running || tracing_disabled))
1169  		return 0;
1170  
1171  	trace_ctx = tracing_gen_ctx();
1172  	buffer = tr->array_buffer.buffer;
1173  
1174  	ring_buffer_nest_start(buffer);
1175  	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1176  					    trace_ctx);
1177  	if (!event)
1178  		goto out;
1179  
1180  	entry = ring_buffer_event_data(event);
1181  	entry->ip			= ip;
1182  	entry->str			= str;
1183  
1184  	__buffer_unlock_commit(buffer, event);
1185  	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1186  
1187  	ret = 1;
1188   out:
1189  	ring_buffer_nest_end(buffer);
1190  	return ret;
1191  }
1192  EXPORT_SYMBOL_GPL(__trace_bputs);
1193  
1194  #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1195  static void tracing_snapshot_instance_cond(struct trace_array *tr,
1196  					   void *cond_data)
1197  {
1198  	struct tracer *tracer = tr->current_trace;
1199  	unsigned long flags;
1200  
1201  	if (in_nmi()) {
1202  		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1203  		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1204  		return;
1205  	}
1206  
1207  	if (!tr->allocated_snapshot) {
1208  		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1209  		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1210  		tracer_tracing_off(tr);
1211  		return;
1212  	}
1213  
1214  	/* Note, snapshot can not be used when the tracer uses it */
1215  	if (tracer->use_max_tr) {
1216  		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1217  		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1218  		return;
1219  	}
1220  
1221  	if (tr->mapped) {
1222  		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1223  		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1224  		return;
1225  	}
1226  
1227  	local_irq_save(flags);
1228  	update_max_tr(tr, current, smp_processor_id(), cond_data);
1229  	local_irq_restore(flags);
1230  }
1231  
tracing_snapshot_instance(struct trace_array * tr)1232  void tracing_snapshot_instance(struct trace_array *tr)
1233  {
1234  	tracing_snapshot_instance_cond(tr, NULL);
1235  }
1236  
1237  /**
1238   * tracing_snapshot - take a snapshot of the current buffer.
1239   *
1240   * This causes a swap between the snapshot buffer and the current live
1241   * tracing buffer. You can use this to take snapshots of the live
1242   * trace when some condition is triggered, but continue to trace.
1243   *
1244   * Note, make sure to allocate the snapshot with either
1245   * a tracing_snapshot_alloc(), or by doing it manually
1246   * with: echo 1 > /sys/kernel/tracing/snapshot
1247   *
1248   * If the snapshot buffer is not allocated, it will stop tracing.
1249   * Basically making a permanent snapshot.
1250   */
tracing_snapshot(void)1251  void tracing_snapshot(void)
1252  {
1253  	struct trace_array *tr = &global_trace;
1254  
1255  	tracing_snapshot_instance(tr);
1256  }
1257  EXPORT_SYMBOL_GPL(tracing_snapshot);
1258  
1259  /**
1260   * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1261   * @tr:		The tracing instance to snapshot
1262   * @cond_data:	The data to be tested conditionally, and possibly saved
1263   *
1264   * This is the same as tracing_snapshot() except that the snapshot is
1265   * conditional - the snapshot will only happen if the
1266   * cond_snapshot.update() implementation receiving the cond_data
1267   * returns true, which means that the trace array's cond_snapshot
1268   * update() operation used the cond_data to determine whether the
1269   * snapshot should be taken, and if it was, presumably saved it along
1270   * with the snapshot.
1271   */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1272  void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1273  {
1274  	tracing_snapshot_instance_cond(tr, cond_data);
1275  }
1276  EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1277  
1278  /**
1279   * tracing_cond_snapshot_data - get the user data associated with a snapshot
1280   * @tr:		The tracing instance
1281   *
1282   * When the user enables a conditional snapshot using
1283   * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1284   * with the snapshot.  This accessor is used to retrieve it.
1285   *
1286   * Should not be called from cond_snapshot.update(), since it takes
1287   * the tr->max_lock lock, which the code calling
1288   * cond_snapshot.update() has already done.
1289   *
1290   * Returns the cond_data associated with the trace array's snapshot.
1291   */
tracing_cond_snapshot_data(struct trace_array * tr)1292  void *tracing_cond_snapshot_data(struct trace_array *tr)
1293  {
1294  	void *cond_data = NULL;
1295  
1296  	local_irq_disable();
1297  	arch_spin_lock(&tr->max_lock);
1298  
1299  	if (tr->cond_snapshot)
1300  		cond_data = tr->cond_snapshot->cond_data;
1301  
1302  	arch_spin_unlock(&tr->max_lock);
1303  	local_irq_enable();
1304  
1305  	return cond_data;
1306  }
1307  EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1308  
1309  static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1310  					struct array_buffer *size_buf, int cpu_id);
1311  static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1312  
tracing_alloc_snapshot_instance(struct trace_array * tr)1313  int tracing_alloc_snapshot_instance(struct trace_array *tr)
1314  {
1315  	int order;
1316  	int ret;
1317  
1318  	if (!tr->allocated_snapshot) {
1319  
1320  		/* Make the snapshot buffer have the same order as main buffer */
1321  		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1322  		ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1323  		if (ret < 0)
1324  			return ret;
1325  
1326  		/* allocate spare buffer */
1327  		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1328  				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1329  		if (ret < 0)
1330  			return ret;
1331  
1332  		tr->allocated_snapshot = true;
1333  	}
1334  
1335  	return 0;
1336  }
1337  
free_snapshot(struct trace_array * tr)1338  static void free_snapshot(struct trace_array *tr)
1339  {
1340  	/*
1341  	 * We don't free the ring buffer. instead, resize it because
1342  	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1343  	 * we want preserve it.
1344  	 */
1345  	ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1346  	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1347  	set_buffer_entries(&tr->max_buffer, 1);
1348  	tracing_reset_online_cpus(&tr->max_buffer);
1349  	tr->allocated_snapshot = false;
1350  }
1351  
tracing_arm_snapshot_locked(struct trace_array * tr)1352  static int tracing_arm_snapshot_locked(struct trace_array *tr)
1353  {
1354  	int ret;
1355  
1356  	lockdep_assert_held(&trace_types_lock);
1357  
1358  	spin_lock(&tr->snapshot_trigger_lock);
1359  	if (tr->snapshot == UINT_MAX || tr->mapped) {
1360  		spin_unlock(&tr->snapshot_trigger_lock);
1361  		return -EBUSY;
1362  	}
1363  
1364  	tr->snapshot++;
1365  	spin_unlock(&tr->snapshot_trigger_lock);
1366  
1367  	ret = tracing_alloc_snapshot_instance(tr);
1368  	if (ret) {
1369  		spin_lock(&tr->snapshot_trigger_lock);
1370  		tr->snapshot--;
1371  		spin_unlock(&tr->snapshot_trigger_lock);
1372  	}
1373  
1374  	return ret;
1375  }
1376  
tracing_arm_snapshot(struct trace_array * tr)1377  int tracing_arm_snapshot(struct trace_array *tr)
1378  {
1379  	int ret;
1380  
1381  	mutex_lock(&trace_types_lock);
1382  	ret = tracing_arm_snapshot_locked(tr);
1383  	mutex_unlock(&trace_types_lock);
1384  
1385  	return ret;
1386  }
1387  
tracing_disarm_snapshot(struct trace_array * tr)1388  void tracing_disarm_snapshot(struct trace_array *tr)
1389  {
1390  	spin_lock(&tr->snapshot_trigger_lock);
1391  	if (!WARN_ON(!tr->snapshot))
1392  		tr->snapshot--;
1393  	spin_unlock(&tr->snapshot_trigger_lock);
1394  }
1395  
1396  /**
1397   * tracing_alloc_snapshot - allocate snapshot buffer.
1398   *
1399   * This only allocates the snapshot buffer if it isn't already
1400   * allocated - it doesn't also take a snapshot.
1401   *
1402   * This is meant to be used in cases where the snapshot buffer needs
1403   * to be set up for events that can't sleep but need to be able to
1404   * trigger a snapshot.
1405   */
tracing_alloc_snapshot(void)1406  int tracing_alloc_snapshot(void)
1407  {
1408  	struct trace_array *tr = &global_trace;
1409  	int ret;
1410  
1411  	ret = tracing_alloc_snapshot_instance(tr);
1412  	WARN_ON(ret < 0);
1413  
1414  	return ret;
1415  }
1416  EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1417  
1418  /**
1419   * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1420   *
1421   * This is similar to tracing_snapshot(), but it will allocate the
1422   * snapshot buffer if it isn't already allocated. Use this only
1423   * where it is safe to sleep, as the allocation may sleep.
1424   *
1425   * This causes a swap between the snapshot buffer and the current live
1426   * tracing buffer. You can use this to take snapshots of the live
1427   * trace when some condition is triggered, but continue to trace.
1428   */
tracing_snapshot_alloc(void)1429  void tracing_snapshot_alloc(void)
1430  {
1431  	int ret;
1432  
1433  	ret = tracing_alloc_snapshot();
1434  	if (ret < 0)
1435  		return;
1436  
1437  	tracing_snapshot();
1438  }
1439  EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1440  
1441  /**
1442   * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1443   * @tr:		The tracing instance
1444   * @cond_data:	User data to associate with the snapshot
1445   * @update:	Implementation of the cond_snapshot update function
1446   *
1447   * Check whether the conditional snapshot for the given instance has
1448   * already been enabled, or if the current tracer is already using a
1449   * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1450   * save the cond_data and update function inside.
1451   *
1452   * Returns 0 if successful, error otherwise.
1453   */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1454  int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1455  				 cond_update_fn_t update)
1456  {
1457  	struct cond_snapshot *cond_snapshot;
1458  	int ret = 0;
1459  
1460  	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1461  	if (!cond_snapshot)
1462  		return -ENOMEM;
1463  
1464  	cond_snapshot->cond_data = cond_data;
1465  	cond_snapshot->update = update;
1466  
1467  	mutex_lock(&trace_types_lock);
1468  
1469  	if (tr->current_trace->use_max_tr) {
1470  		ret = -EBUSY;
1471  		goto fail_unlock;
1472  	}
1473  
1474  	/*
1475  	 * The cond_snapshot can only change to NULL without the
1476  	 * trace_types_lock. We don't care if we race with it going
1477  	 * to NULL, but we want to make sure that it's not set to
1478  	 * something other than NULL when we get here, which we can
1479  	 * do safely with only holding the trace_types_lock and not
1480  	 * having to take the max_lock.
1481  	 */
1482  	if (tr->cond_snapshot) {
1483  		ret = -EBUSY;
1484  		goto fail_unlock;
1485  	}
1486  
1487  	ret = tracing_arm_snapshot_locked(tr);
1488  	if (ret)
1489  		goto fail_unlock;
1490  
1491  	local_irq_disable();
1492  	arch_spin_lock(&tr->max_lock);
1493  	tr->cond_snapshot = cond_snapshot;
1494  	arch_spin_unlock(&tr->max_lock);
1495  	local_irq_enable();
1496  
1497  	mutex_unlock(&trace_types_lock);
1498  
1499  	return ret;
1500  
1501   fail_unlock:
1502  	mutex_unlock(&trace_types_lock);
1503  	kfree(cond_snapshot);
1504  	return ret;
1505  }
1506  EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1507  
1508  /**
1509   * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1510   * @tr:		The tracing instance
1511   *
1512   * Check whether the conditional snapshot for the given instance is
1513   * enabled; if so, free the cond_snapshot associated with it,
1514   * otherwise return -EINVAL.
1515   *
1516   * Returns 0 if successful, error otherwise.
1517   */
tracing_snapshot_cond_disable(struct trace_array * tr)1518  int tracing_snapshot_cond_disable(struct trace_array *tr)
1519  {
1520  	int ret = 0;
1521  
1522  	local_irq_disable();
1523  	arch_spin_lock(&tr->max_lock);
1524  
1525  	if (!tr->cond_snapshot)
1526  		ret = -EINVAL;
1527  	else {
1528  		kfree(tr->cond_snapshot);
1529  		tr->cond_snapshot = NULL;
1530  	}
1531  
1532  	arch_spin_unlock(&tr->max_lock);
1533  	local_irq_enable();
1534  
1535  	tracing_disarm_snapshot(tr);
1536  
1537  	return ret;
1538  }
1539  EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1540  #else
tracing_snapshot(void)1541  void tracing_snapshot(void)
1542  {
1543  	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1544  }
1545  EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1546  void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1547  {
1548  	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1549  }
1550  EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1551  int tracing_alloc_snapshot(void)
1552  {
1553  	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1554  	return -ENODEV;
1555  }
1556  EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1557  void tracing_snapshot_alloc(void)
1558  {
1559  	/* Give warning */
1560  	tracing_snapshot();
1561  }
1562  EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1563  void *tracing_cond_snapshot_data(struct trace_array *tr)
1564  {
1565  	return NULL;
1566  }
1567  EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1568  int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1569  {
1570  	return -ENODEV;
1571  }
1572  EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1573  int tracing_snapshot_cond_disable(struct trace_array *tr)
1574  {
1575  	return false;
1576  }
1577  EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1578  #define free_snapshot(tr)	do { } while (0)
1579  #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1580  #endif /* CONFIG_TRACER_SNAPSHOT */
1581  
tracer_tracing_off(struct trace_array * tr)1582  void tracer_tracing_off(struct trace_array *tr)
1583  {
1584  	if (tr->array_buffer.buffer)
1585  		ring_buffer_record_off(tr->array_buffer.buffer);
1586  	/*
1587  	 * This flag is looked at when buffers haven't been allocated
1588  	 * yet, or by some tracers (like irqsoff), that just want to
1589  	 * know if the ring buffer has been disabled, but it can handle
1590  	 * races of where it gets disabled but we still do a record.
1591  	 * As the check is in the fast path of the tracers, it is more
1592  	 * important to be fast than accurate.
1593  	 */
1594  	tr->buffer_disabled = 1;
1595  	/* Make the flag seen by readers */
1596  	smp_wmb();
1597  }
1598  
1599  /**
1600   * tracing_off - turn off tracing buffers
1601   *
1602   * This function stops the tracing buffers from recording data.
1603   * It does not disable any overhead the tracers themselves may
1604   * be causing. This function simply causes all recording to
1605   * the ring buffers to fail.
1606   */
tracing_off(void)1607  void tracing_off(void)
1608  {
1609  	tracer_tracing_off(&global_trace);
1610  }
1611  EXPORT_SYMBOL_GPL(tracing_off);
1612  
disable_trace_on_warning(void)1613  void disable_trace_on_warning(void)
1614  {
1615  	if (__disable_trace_on_warning) {
1616  		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1617  			"Disabling tracing due to warning\n");
1618  		tracing_off();
1619  	}
1620  }
1621  
1622  /**
1623   * tracer_tracing_is_on - show real state of ring buffer enabled
1624   * @tr : the trace array to know if ring buffer is enabled
1625   *
1626   * Shows real state of the ring buffer if it is enabled or not.
1627   */
tracer_tracing_is_on(struct trace_array * tr)1628  bool tracer_tracing_is_on(struct trace_array *tr)
1629  {
1630  	if (tr->array_buffer.buffer)
1631  		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1632  	return !tr->buffer_disabled;
1633  }
1634  
1635  /**
1636   * tracing_is_on - show state of ring buffers enabled
1637   */
tracing_is_on(void)1638  int tracing_is_on(void)
1639  {
1640  	return tracer_tracing_is_on(&global_trace);
1641  }
1642  EXPORT_SYMBOL_GPL(tracing_is_on);
1643  
set_buf_size(char * str)1644  static int __init set_buf_size(char *str)
1645  {
1646  	unsigned long buf_size;
1647  
1648  	if (!str)
1649  		return 0;
1650  	buf_size = memparse(str, &str);
1651  	/*
1652  	 * nr_entries can not be zero and the startup
1653  	 * tests require some buffer space. Therefore
1654  	 * ensure we have at least 4096 bytes of buffer.
1655  	 */
1656  	trace_buf_size = max(4096UL, buf_size);
1657  	return 1;
1658  }
1659  __setup("trace_buf_size=", set_buf_size);
1660  
set_tracing_thresh(char * str)1661  static int __init set_tracing_thresh(char *str)
1662  {
1663  	unsigned long threshold;
1664  	int ret;
1665  
1666  	if (!str)
1667  		return 0;
1668  	ret = kstrtoul(str, 0, &threshold);
1669  	if (ret < 0)
1670  		return 0;
1671  	tracing_thresh = threshold * 1000;
1672  	return 1;
1673  }
1674  __setup("tracing_thresh=", set_tracing_thresh);
1675  
nsecs_to_usecs(unsigned long nsecs)1676  unsigned long nsecs_to_usecs(unsigned long nsecs)
1677  {
1678  	return nsecs / 1000;
1679  }
1680  
1681  /*
1682   * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1683   * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1684   * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1685   * of strings in the order that the evals (enum) were defined.
1686   */
1687  #undef C
1688  #define C(a, b) b
1689  
1690  /* These must match the bit positions in trace_iterator_flags */
1691  static const char *trace_options[] = {
1692  	TRACE_FLAGS
1693  	NULL
1694  };
1695  
1696  static struct {
1697  	u64 (*func)(void);
1698  	const char *name;
1699  	int in_ns;		/* is this clock in nanoseconds? */
1700  } trace_clocks[] = {
1701  	{ trace_clock_local,		"local",	1 },
1702  	{ trace_clock_global,		"global",	1 },
1703  	{ trace_clock_counter,		"counter",	0 },
1704  	{ trace_clock_jiffies,		"uptime",	0 },
1705  	{ trace_clock,			"perf",		1 },
1706  	{ ktime_get_mono_fast_ns,	"mono",		1 },
1707  	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1708  	{ ktime_get_boot_fast_ns,	"boot",		1 },
1709  	{ ktime_get_tai_fast_ns,	"tai",		1 },
1710  	ARCH_TRACE_CLOCKS
1711  };
1712  
trace_clock_in_ns(struct trace_array * tr)1713  bool trace_clock_in_ns(struct trace_array *tr)
1714  {
1715  	if (trace_clocks[tr->clock_id].in_ns)
1716  		return true;
1717  
1718  	return false;
1719  }
1720  
1721  /*
1722   * trace_parser_get_init - gets the buffer for trace parser
1723   */
trace_parser_get_init(struct trace_parser * parser,int size)1724  int trace_parser_get_init(struct trace_parser *parser, int size)
1725  {
1726  	memset(parser, 0, sizeof(*parser));
1727  
1728  	parser->buffer = kmalloc(size, GFP_KERNEL);
1729  	if (!parser->buffer)
1730  		return 1;
1731  
1732  	parser->size = size;
1733  	return 0;
1734  }
1735  
1736  /*
1737   * trace_parser_put - frees the buffer for trace parser
1738   */
trace_parser_put(struct trace_parser * parser)1739  void trace_parser_put(struct trace_parser *parser)
1740  {
1741  	kfree(parser->buffer);
1742  	parser->buffer = NULL;
1743  }
1744  
1745  /*
1746   * trace_get_user - reads the user input string separated by  space
1747   * (matched by isspace(ch))
1748   *
1749   * For each string found the 'struct trace_parser' is updated,
1750   * and the function returns.
1751   *
1752   * Returns number of bytes read.
1753   *
1754   * See kernel/trace/trace.h for 'struct trace_parser' details.
1755   */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1756  int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1757  	size_t cnt, loff_t *ppos)
1758  {
1759  	char ch;
1760  	size_t read = 0;
1761  	ssize_t ret;
1762  
1763  	if (!*ppos)
1764  		trace_parser_clear(parser);
1765  
1766  	ret = get_user(ch, ubuf++);
1767  	if (ret)
1768  		goto out;
1769  
1770  	read++;
1771  	cnt--;
1772  
1773  	/*
1774  	 * The parser is not finished with the last write,
1775  	 * continue reading the user input without skipping spaces.
1776  	 */
1777  	if (!parser->cont) {
1778  		/* skip white space */
1779  		while (cnt && isspace(ch)) {
1780  			ret = get_user(ch, ubuf++);
1781  			if (ret)
1782  				goto out;
1783  			read++;
1784  			cnt--;
1785  		}
1786  
1787  		parser->idx = 0;
1788  
1789  		/* only spaces were written */
1790  		if (isspace(ch) || !ch) {
1791  			*ppos += read;
1792  			ret = read;
1793  			goto out;
1794  		}
1795  	}
1796  
1797  	/* read the non-space input */
1798  	while (cnt && !isspace(ch) && ch) {
1799  		if (parser->idx < parser->size - 1)
1800  			parser->buffer[parser->idx++] = ch;
1801  		else {
1802  			ret = -EINVAL;
1803  			goto out;
1804  		}
1805  		ret = get_user(ch, ubuf++);
1806  		if (ret)
1807  			goto out;
1808  		read++;
1809  		cnt--;
1810  	}
1811  
1812  	/* We either got finished input or we have to wait for another call. */
1813  	if (isspace(ch) || !ch) {
1814  		parser->buffer[parser->idx] = 0;
1815  		parser->cont = false;
1816  	} else if (parser->idx < parser->size - 1) {
1817  		parser->cont = true;
1818  		parser->buffer[parser->idx++] = ch;
1819  		/* Make sure the parsed string always terminates with '\0'. */
1820  		parser->buffer[parser->idx] = 0;
1821  	} else {
1822  		ret = -EINVAL;
1823  		goto out;
1824  	}
1825  
1826  	*ppos += read;
1827  	ret = read;
1828  
1829  out:
1830  	return ret;
1831  }
1832  
1833  /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1834  static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1835  {
1836  	int len;
1837  
1838  	if (trace_seq_used(s) <= s->readpos)
1839  		return -EBUSY;
1840  
1841  	len = trace_seq_used(s) - s->readpos;
1842  	if (cnt > len)
1843  		cnt = len;
1844  	memcpy(buf, s->buffer + s->readpos, cnt);
1845  
1846  	s->readpos += cnt;
1847  	return cnt;
1848  }
1849  
1850  unsigned long __read_mostly	tracing_thresh;
1851  
1852  #ifdef CONFIG_TRACER_MAX_TRACE
1853  static const struct file_operations tracing_max_lat_fops;
1854  
1855  #ifdef LATENCY_FS_NOTIFY
1856  
1857  static struct workqueue_struct *fsnotify_wq;
1858  
latency_fsnotify_workfn(struct work_struct * work)1859  static void latency_fsnotify_workfn(struct work_struct *work)
1860  {
1861  	struct trace_array *tr = container_of(work, struct trace_array,
1862  					      fsnotify_work);
1863  	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1864  }
1865  
latency_fsnotify_workfn_irq(struct irq_work * iwork)1866  static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1867  {
1868  	struct trace_array *tr = container_of(iwork, struct trace_array,
1869  					      fsnotify_irqwork);
1870  	queue_work(fsnotify_wq, &tr->fsnotify_work);
1871  }
1872  
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1873  static void trace_create_maxlat_file(struct trace_array *tr,
1874  				     struct dentry *d_tracer)
1875  {
1876  	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1877  	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1878  	tr->d_max_latency = trace_create_file("tracing_max_latency",
1879  					      TRACE_MODE_WRITE,
1880  					      d_tracer, tr,
1881  					      &tracing_max_lat_fops);
1882  }
1883  
latency_fsnotify_init(void)1884  __init static int latency_fsnotify_init(void)
1885  {
1886  	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1887  				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1888  	if (!fsnotify_wq) {
1889  		pr_err("Unable to allocate tr_max_lat_wq\n");
1890  		return -ENOMEM;
1891  	}
1892  	return 0;
1893  }
1894  
1895  late_initcall_sync(latency_fsnotify_init);
1896  
latency_fsnotify(struct trace_array * tr)1897  void latency_fsnotify(struct trace_array *tr)
1898  {
1899  	if (!fsnotify_wq)
1900  		return;
1901  	/*
1902  	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1903  	 * possible that we are called from __schedule() or do_idle(), which
1904  	 * could cause a deadlock.
1905  	 */
1906  	irq_work_queue(&tr->fsnotify_irqwork);
1907  }
1908  
1909  #else /* !LATENCY_FS_NOTIFY */
1910  
1911  #define trace_create_maxlat_file(tr, d_tracer)				\
1912  	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1913  			  d_tracer, tr, &tracing_max_lat_fops)
1914  
1915  #endif
1916  
1917  /*
1918   * Copy the new maximum trace into the separate maximum-trace
1919   * structure. (this way the maximum trace is permanently saved,
1920   * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1921   */
1922  static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1923  __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1924  {
1925  	struct array_buffer *trace_buf = &tr->array_buffer;
1926  	struct array_buffer *max_buf = &tr->max_buffer;
1927  	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1928  	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1929  
1930  	max_buf->cpu = cpu;
1931  	max_buf->time_start = data->preempt_timestamp;
1932  
1933  	max_data->saved_latency = tr->max_latency;
1934  	max_data->critical_start = data->critical_start;
1935  	max_data->critical_end = data->critical_end;
1936  
1937  	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1938  	max_data->pid = tsk->pid;
1939  	/*
1940  	 * If tsk == current, then use current_uid(), as that does not use
1941  	 * RCU. The irq tracer can be called out of RCU scope.
1942  	 */
1943  	if (tsk == current)
1944  		max_data->uid = current_uid();
1945  	else
1946  		max_data->uid = task_uid(tsk);
1947  
1948  	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1949  	max_data->policy = tsk->policy;
1950  	max_data->rt_priority = tsk->rt_priority;
1951  
1952  	/* record this tasks comm */
1953  	tracing_record_cmdline(tsk);
1954  	latency_fsnotify(tr);
1955  }
1956  
1957  /**
1958   * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1959   * @tr: tracer
1960   * @tsk: the task with the latency
1961   * @cpu: The cpu that initiated the trace.
1962   * @cond_data: User data associated with a conditional snapshot
1963   *
1964   * Flip the buffers between the @tr and the max_tr and record information
1965   * about which task was the cause of this latency.
1966   */
1967  void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1968  update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1969  	      void *cond_data)
1970  {
1971  	if (tr->stop_count)
1972  		return;
1973  
1974  	WARN_ON_ONCE(!irqs_disabled());
1975  
1976  	if (!tr->allocated_snapshot) {
1977  		/* Only the nop tracer should hit this when disabling */
1978  		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1979  		return;
1980  	}
1981  
1982  	arch_spin_lock(&tr->max_lock);
1983  
1984  	/* Inherit the recordable setting from array_buffer */
1985  	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1986  		ring_buffer_record_on(tr->max_buffer.buffer);
1987  	else
1988  		ring_buffer_record_off(tr->max_buffer.buffer);
1989  
1990  #ifdef CONFIG_TRACER_SNAPSHOT
1991  	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1992  		arch_spin_unlock(&tr->max_lock);
1993  		return;
1994  	}
1995  #endif
1996  	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1997  
1998  	__update_max_tr(tr, tsk, cpu);
1999  
2000  	arch_spin_unlock(&tr->max_lock);
2001  
2002  	/* Any waiters on the old snapshot buffer need to wake up */
2003  	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
2004  }
2005  
2006  /**
2007   * update_max_tr_single - only copy one trace over, and reset the rest
2008   * @tr: tracer
2009   * @tsk: task with the latency
2010   * @cpu: the cpu of the buffer to copy.
2011   *
2012   * Flip the trace of a single CPU buffer between the @tr and the max_tr.
2013   */
2014  void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)2015  update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
2016  {
2017  	int ret;
2018  
2019  	if (tr->stop_count)
2020  		return;
2021  
2022  	WARN_ON_ONCE(!irqs_disabled());
2023  	if (!tr->allocated_snapshot) {
2024  		/* Only the nop tracer should hit this when disabling */
2025  		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2026  		return;
2027  	}
2028  
2029  	arch_spin_lock(&tr->max_lock);
2030  
2031  	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2032  
2033  	if (ret == -EBUSY) {
2034  		/*
2035  		 * We failed to swap the buffer due to a commit taking
2036  		 * place on this CPU. We fail to record, but we reset
2037  		 * the max trace buffer (no one writes directly to it)
2038  		 * and flag that it failed.
2039  		 * Another reason is resize is in progress.
2040  		 */
2041  		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2042  			"Failed to swap buffers due to commit or resize in progress\n");
2043  	}
2044  
2045  	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2046  
2047  	__update_max_tr(tr, tsk, cpu);
2048  	arch_spin_unlock(&tr->max_lock);
2049  }
2050  
2051  #endif /* CONFIG_TRACER_MAX_TRACE */
2052  
2053  struct pipe_wait {
2054  	struct trace_iterator		*iter;
2055  	int				wait_index;
2056  };
2057  
wait_pipe_cond(void * data)2058  static bool wait_pipe_cond(void *data)
2059  {
2060  	struct pipe_wait *pwait = data;
2061  	struct trace_iterator *iter = pwait->iter;
2062  
2063  	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2064  		return true;
2065  
2066  	return iter->closed;
2067  }
2068  
wait_on_pipe(struct trace_iterator * iter,int full)2069  static int wait_on_pipe(struct trace_iterator *iter, int full)
2070  {
2071  	struct pipe_wait pwait;
2072  	int ret;
2073  
2074  	/* Iterators are static, they should be filled or empty */
2075  	if (trace_buffer_iter(iter, iter->cpu_file))
2076  		return 0;
2077  
2078  	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2079  	pwait.iter = iter;
2080  
2081  	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2082  			       wait_pipe_cond, &pwait);
2083  
2084  #ifdef CONFIG_TRACER_MAX_TRACE
2085  	/*
2086  	 * Make sure this is still the snapshot buffer, as if a snapshot were
2087  	 * to happen, this would now be the main buffer.
2088  	 */
2089  	if (iter->snapshot)
2090  		iter->array_buffer = &iter->tr->max_buffer;
2091  #endif
2092  	return ret;
2093  }
2094  
2095  #ifdef CONFIG_FTRACE_STARTUP_TEST
2096  static bool selftests_can_run;
2097  
2098  struct trace_selftests {
2099  	struct list_head		list;
2100  	struct tracer			*type;
2101  };
2102  
2103  static LIST_HEAD(postponed_selftests);
2104  
save_selftest(struct tracer * type)2105  static int save_selftest(struct tracer *type)
2106  {
2107  	struct trace_selftests *selftest;
2108  
2109  	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2110  	if (!selftest)
2111  		return -ENOMEM;
2112  
2113  	selftest->type = type;
2114  	list_add(&selftest->list, &postponed_selftests);
2115  	return 0;
2116  }
2117  
run_tracer_selftest(struct tracer * type)2118  static int run_tracer_selftest(struct tracer *type)
2119  {
2120  	struct trace_array *tr = &global_trace;
2121  	struct tracer *saved_tracer = tr->current_trace;
2122  	int ret;
2123  
2124  	if (!type->selftest || tracing_selftest_disabled)
2125  		return 0;
2126  
2127  	/*
2128  	 * If a tracer registers early in boot up (before scheduling is
2129  	 * initialized and such), then do not run its selftests yet.
2130  	 * Instead, run it a little later in the boot process.
2131  	 */
2132  	if (!selftests_can_run)
2133  		return save_selftest(type);
2134  
2135  	if (!tracing_is_on()) {
2136  		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2137  			type->name);
2138  		return 0;
2139  	}
2140  
2141  	/*
2142  	 * Run a selftest on this tracer.
2143  	 * Here we reset the trace buffer, and set the current
2144  	 * tracer to be this tracer. The tracer can then run some
2145  	 * internal tracing to verify that everything is in order.
2146  	 * If we fail, we do not register this tracer.
2147  	 */
2148  	tracing_reset_online_cpus(&tr->array_buffer);
2149  
2150  	tr->current_trace = type;
2151  
2152  #ifdef CONFIG_TRACER_MAX_TRACE
2153  	if (type->use_max_tr) {
2154  		/* If we expanded the buffers, make sure the max is expanded too */
2155  		if (tr->ring_buffer_expanded)
2156  			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2157  					   RING_BUFFER_ALL_CPUS);
2158  		tr->allocated_snapshot = true;
2159  	}
2160  #endif
2161  
2162  	/* the test is responsible for initializing and enabling */
2163  	pr_info("Testing tracer %s: ", type->name);
2164  	ret = type->selftest(type, tr);
2165  	/* the test is responsible for resetting too */
2166  	tr->current_trace = saved_tracer;
2167  	if (ret) {
2168  		printk(KERN_CONT "FAILED!\n");
2169  		/* Add the warning after printing 'FAILED' */
2170  		WARN_ON(1);
2171  		return -1;
2172  	}
2173  	/* Only reset on passing, to avoid touching corrupted buffers */
2174  	tracing_reset_online_cpus(&tr->array_buffer);
2175  
2176  #ifdef CONFIG_TRACER_MAX_TRACE
2177  	if (type->use_max_tr) {
2178  		tr->allocated_snapshot = false;
2179  
2180  		/* Shrink the max buffer again */
2181  		if (tr->ring_buffer_expanded)
2182  			ring_buffer_resize(tr->max_buffer.buffer, 1,
2183  					   RING_BUFFER_ALL_CPUS);
2184  	}
2185  #endif
2186  
2187  	printk(KERN_CONT "PASSED\n");
2188  	return 0;
2189  }
2190  
do_run_tracer_selftest(struct tracer * type)2191  static int do_run_tracer_selftest(struct tracer *type)
2192  {
2193  	int ret;
2194  
2195  	/*
2196  	 * Tests can take a long time, especially if they are run one after the
2197  	 * other, as does happen during bootup when all the tracers are
2198  	 * registered. This could cause the soft lockup watchdog to trigger.
2199  	 */
2200  	cond_resched();
2201  
2202  	tracing_selftest_running = true;
2203  	ret = run_tracer_selftest(type);
2204  	tracing_selftest_running = false;
2205  
2206  	return ret;
2207  }
2208  
init_trace_selftests(void)2209  static __init int init_trace_selftests(void)
2210  {
2211  	struct trace_selftests *p, *n;
2212  	struct tracer *t, **last;
2213  	int ret;
2214  
2215  	selftests_can_run = true;
2216  
2217  	mutex_lock(&trace_types_lock);
2218  
2219  	if (list_empty(&postponed_selftests))
2220  		goto out;
2221  
2222  	pr_info("Running postponed tracer tests:\n");
2223  
2224  	tracing_selftest_running = true;
2225  	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2226  		/* This loop can take minutes when sanitizers are enabled, so
2227  		 * lets make sure we allow RCU processing.
2228  		 */
2229  		cond_resched();
2230  		ret = run_tracer_selftest(p->type);
2231  		/* If the test fails, then warn and remove from available_tracers */
2232  		if (ret < 0) {
2233  			WARN(1, "tracer: %s failed selftest, disabling\n",
2234  			     p->type->name);
2235  			last = &trace_types;
2236  			for (t = trace_types; t; t = t->next) {
2237  				if (t == p->type) {
2238  					*last = t->next;
2239  					break;
2240  				}
2241  				last = &t->next;
2242  			}
2243  		}
2244  		list_del(&p->list);
2245  		kfree(p);
2246  	}
2247  	tracing_selftest_running = false;
2248  
2249   out:
2250  	mutex_unlock(&trace_types_lock);
2251  
2252  	return 0;
2253  }
2254  core_initcall(init_trace_selftests);
2255  #else
do_run_tracer_selftest(struct tracer * type)2256  static inline int do_run_tracer_selftest(struct tracer *type)
2257  {
2258  	return 0;
2259  }
2260  #endif /* CONFIG_FTRACE_STARTUP_TEST */
2261  
2262  static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2263  
2264  static void __init apply_trace_boot_options(void);
2265  
2266  /**
2267   * register_tracer - register a tracer with the ftrace system.
2268   * @type: the plugin for the tracer
2269   *
2270   * Register a new plugin tracer.
2271   */
register_tracer(struct tracer * type)2272  int __init register_tracer(struct tracer *type)
2273  {
2274  	struct tracer *t;
2275  	int ret = 0;
2276  
2277  	if (!type->name) {
2278  		pr_info("Tracer must have a name\n");
2279  		return -1;
2280  	}
2281  
2282  	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2283  		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2284  		return -1;
2285  	}
2286  
2287  	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2288  		pr_warn("Can not register tracer %s due to lockdown\n",
2289  			   type->name);
2290  		return -EPERM;
2291  	}
2292  
2293  	mutex_lock(&trace_types_lock);
2294  
2295  	for (t = trace_types; t; t = t->next) {
2296  		if (strcmp(type->name, t->name) == 0) {
2297  			/* already found */
2298  			pr_info("Tracer %s already registered\n",
2299  				type->name);
2300  			ret = -1;
2301  			goto out;
2302  		}
2303  	}
2304  
2305  	if (!type->set_flag)
2306  		type->set_flag = &dummy_set_flag;
2307  	if (!type->flags) {
2308  		/*allocate a dummy tracer_flags*/
2309  		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2310  		if (!type->flags) {
2311  			ret = -ENOMEM;
2312  			goto out;
2313  		}
2314  		type->flags->val = 0;
2315  		type->flags->opts = dummy_tracer_opt;
2316  	} else
2317  		if (!type->flags->opts)
2318  			type->flags->opts = dummy_tracer_opt;
2319  
2320  	/* store the tracer for __set_tracer_option */
2321  	type->flags->trace = type;
2322  
2323  	ret = do_run_tracer_selftest(type);
2324  	if (ret < 0)
2325  		goto out;
2326  
2327  	type->next = trace_types;
2328  	trace_types = type;
2329  	add_tracer_options(&global_trace, type);
2330  
2331   out:
2332  	mutex_unlock(&trace_types_lock);
2333  
2334  	if (ret || !default_bootup_tracer)
2335  		goto out_unlock;
2336  
2337  	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2338  		goto out_unlock;
2339  
2340  	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2341  	/* Do we want this tracer to start on bootup? */
2342  	tracing_set_tracer(&global_trace, type->name);
2343  	default_bootup_tracer = NULL;
2344  
2345  	apply_trace_boot_options();
2346  
2347  	/* disable other selftests, since this will break it. */
2348  	disable_tracing_selftest("running a tracer");
2349  
2350   out_unlock:
2351  	return ret;
2352  }
2353  
tracing_reset_cpu(struct array_buffer * buf,int cpu)2354  static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2355  {
2356  	struct trace_buffer *buffer = buf->buffer;
2357  
2358  	if (!buffer)
2359  		return;
2360  
2361  	ring_buffer_record_disable(buffer);
2362  
2363  	/* Make sure all commits have finished */
2364  	synchronize_rcu();
2365  	ring_buffer_reset_cpu(buffer, cpu);
2366  
2367  	ring_buffer_record_enable(buffer);
2368  }
2369  
tracing_reset_online_cpus(struct array_buffer * buf)2370  void tracing_reset_online_cpus(struct array_buffer *buf)
2371  {
2372  	struct trace_buffer *buffer = buf->buffer;
2373  
2374  	if (!buffer)
2375  		return;
2376  
2377  	ring_buffer_record_disable(buffer);
2378  
2379  	/* Make sure all commits have finished */
2380  	synchronize_rcu();
2381  
2382  	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2383  
2384  	ring_buffer_reset_online_cpus(buffer);
2385  
2386  	ring_buffer_record_enable(buffer);
2387  }
2388  
tracing_reset_all_cpus(struct array_buffer * buf)2389  static void tracing_reset_all_cpus(struct array_buffer *buf)
2390  {
2391  	struct trace_buffer *buffer = buf->buffer;
2392  
2393  	if (!buffer)
2394  		return;
2395  
2396  	ring_buffer_record_disable(buffer);
2397  
2398  	/* Make sure all commits have finished */
2399  	synchronize_rcu();
2400  
2401  	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2402  
2403  	ring_buffer_reset(buffer);
2404  
2405  	ring_buffer_record_enable(buffer);
2406  }
2407  
2408  /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2409  void tracing_reset_all_online_cpus_unlocked(void)
2410  {
2411  	struct trace_array *tr;
2412  
2413  	lockdep_assert_held(&trace_types_lock);
2414  
2415  	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2416  		if (!tr->clear_trace)
2417  			continue;
2418  		tr->clear_trace = false;
2419  		tracing_reset_online_cpus(&tr->array_buffer);
2420  #ifdef CONFIG_TRACER_MAX_TRACE
2421  		tracing_reset_online_cpus(&tr->max_buffer);
2422  #endif
2423  	}
2424  }
2425  
tracing_reset_all_online_cpus(void)2426  void tracing_reset_all_online_cpus(void)
2427  {
2428  	mutex_lock(&trace_types_lock);
2429  	tracing_reset_all_online_cpus_unlocked();
2430  	mutex_unlock(&trace_types_lock);
2431  }
2432  
is_tracing_stopped(void)2433  int is_tracing_stopped(void)
2434  {
2435  	return global_trace.stop_count;
2436  }
2437  
tracing_start_tr(struct trace_array * tr)2438  static void tracing_start_tr(struct trace_array *tr)
2439  {
2440  	struct trace_buffer *buffer;
2441  	unsigned long flags;
2442  
2443  	if (tracing_disabled)
2444  		return;
2445  
2446  	raw_spin_lock_irqsave(&tr->start_lock, flags);
2447  	if (--tr->stop_count) {
2448  		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2449  			/* Someone screwed up their debugging */
2450  			tr->stop_count = 0;
2451  		}
2452  		goto out;
2453  	}
2454  
2455  	/* Prevent the buffers from switching */
2456  	arch_spin_lock(&tr->max_lock);
2457  
2458  	buffer = tr->array_buffer.buffer;
2459  	if (buffer)
2460  		ring_buffer_record_enable(buffer);
2461  
2462  #ifdef CONFIG_TRACER_MAX_TRACE
2463  	buffer = tr->max_buffer.buffer;
2464  	if (buffer)
2465  		ring_buffer_record_enable(buffer);
2466  #endif
2467  
2468  	arch_spin_unlock(&tr->max_lock);
2469  
2470   out:
2471  	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2472  }
2473  
2474  /**
2475   * tracing_start - quick start of the tracer
2476   *
2477   * If tracing is enabled but was stopped by tracing_stop,
2478   * this will start the tracer back up.
2479   */
tracing_start(void)2480  void tracing_start(void)
2481  
2482  {
2483  	return tracing_start_tr(&global_trace);
2484  }
2485  
tracing_stop_tr(struct trace_array * tr)2486  static void tracing_stop_tr(struct trace_array *tr)
2487  {
2488  	struct trace_buffer *buffer;
2489  	unsigned long flags;
2490  
2491  	raw_spin_lock_irqsave(&tr->start_lock, flags);
2492  	if (tr->stop_count++)
2493  		goto out;
2494  
2495  	/* Prevent the buffers from switching */
2496  	arch_spin_lock(&tr->max_lock);
2497  
2498  	buffer = tr->array_buffer.buffer;
2499  	if (buffer)
2500  		ring_buffer_record_disable(buffer);
2501  
2502  #ifdef CONFIG_TRACER_MAX_TRACE
2503  	buffer = tr->max_buffer.buffer;
2504  	if (buffer)
2505  		ring_buffer_record_disable(buffer);
2506  #endif
2507  
2508  	arch_spin_unlock(&tr->max_lock);
2509  
2510   out:
2511  	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2512  }
2513  
2514  /**
2515   * tracing_stop - quick stop of the tracer
2516   *
2517   * Light weight way to stop tracing. Use in conjunction with
2518   * tracing_start.
2519   */
tracing_stop(void)2520  void tracing_stop(void)
2521  {
2522  	return tracing_stop_tr(&global_trace);
2523  }
2524  
2525  /*
2526   * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2527   * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2528   * simplifies those functions and keeps them in sync.
2529   */
trace_handle_return(struct trace_seq * s)2530  enum print_line_t trace_handle_return(struct trace_seq *s)
2531  {
2532  	return trace_seq_has_overflowed(s) ?
2533  		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2534  }
2535  EXPORT_SYMBOL_GPL(trace_handle_return);
2536  
migration_disable_value(void)2537  static unsigned short migration_disable_value(void)
2538  {
2539  #if defined(CONFIG_SMP)
2540  	return current->migration_disabled;
2541  #else
2542  	return 0;
2543  #endif
2544  }
2545  
tracing_gen_ctx_irq_test(unsigned int irqs_status)2546  unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2547  {
2548  	unsigned int trace_flags = irqs_status;
2549  	unsigned int pc;
2550  
2551  	pc = preempt_count();
2552  
2553  	if (pc & NMI_MASK)
2554  		trace_flags |= TRACE_FLAG_NMI;
2555  	if (pc & HARDIRQ_MASK)
2556  		trace_flags |= TRACE_FLAG_HARDIRQ;
2557  	if (in_serving_softirq())
2558  		trace_flags |= TRACE_FLAG_SOFTIRQ;
2559  	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2560  		trace_flags |= TRACE_FLAG_BH_OFF;
2561  
2562  	if (tif_need_resched())
2563  		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2564  	if (test_preempt_need_resched())
2565  		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2566  	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2567  		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2568  }
2569  
2570  struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2571  trace_buffer_lock_reserve(struct trace_buffer *buffer,
2572  			  int type,
2573  			  unsigned long len,
2574  			  unsigned int trace_ctx)
2575  {
2576  	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2577  }
2578  
2579  DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2580  DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2581  static int trace_buffered_event_ref;
2582  
2583  /**
2584   * trace_buffered_event_enable - enable buffering events
2585   *
2586   * When events are being filtered, it is quicker to use a temporary
2587   * buffer to write the event data into if there's a likely chance
2588   * that it will not be committed. The discard of the ring buffer
2589   * is not as fast as committing, and is much slower than copying
2590   * a commit.
2591   *
2592   * When an event is to be filtered, allocate per cpu buffers to
2593   * write the event data into, and if the event is filtered and discarded
2594   * it is simply dropped, otherwise, the entire data is to be committed
2595   * in one shot.
2596   */
trace_buffered_event_enable(void)2597  void trace_buffered_event_enable(void)
2598  {
2599  	struct ring_buffer_event *event;
2600  	struct page *page;
2601  	int cpu;
2602  
2603  	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2604  
2605  	if (trace_buffered_event_ref++)
2606  		return;
2607  
2608  	for_each_tracing_cpu(cpu) {
2609  		page = alloc_pages_node(cpu_to_node(cpu),
2610  					GFP_KERNEL | __GFP_NORETRY, 0);
2611  		/* This is just an optimization and can handle failures */
2612  		if (!page) {
2613  			pr_err("Failed to allocate event buffer\n");
2614  			break;
2615  		}
2616  
2617  		event = page_address(page);
2618  		memset(event, 0, sizeof(*event));
2619  
2620  		per_cpu(trace_buffered_event, cpu) = event;
2621  
2622  		preempt_disable();
2623  		if (cpu == smp_processor_id() &&
2624  		    __this_cpu_read(trace_buffered_event) !=
2625  		    per_cpu(trace_buffered_event, cpu))
2626  			WARN_ON_ONCE(1);
2627  		preempt_enable();
2628  	}
2629  }
2630  
enable_trace_buffered_event(void * data)2631  static void enable_trace_buffered_event(void *data)
2632  {
2633  	/* Probably not needed, but do it anyway */
2634  	smp_rmb();
2635  	this_cpu_dec(trace_buffered_event_cnt);
2636  }
2637  
disable_trace_buffered_event(void * data)2638  static void disable_trace_buffered_event(void *data)
2639  {
2640  	this_cpu_inc(trace_buffered_event_cnt);
2641  }
2642  
2643  /**
2644   * trace_buffered_event_disable - disable buffering events
2645   *
2646   * When a filter is removed, it is faster to not use the buffered
2647   * events, and to commit directly into the ring buffer. Free up
2648   * the temp buffers when there are no more users. This requires
2649   * special synchronization with current events.
2650   */
trace_buffered_event_disable(void)2651  void trace_buffered_event_disable(void)
2652  {
2653  	int cpu;
2654  
2655  	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2656  
2657  	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2658  		return;
2659  
2660  	if (--trace_buffered_event_ref)
2661  		return;
2662  
2663  	/* For each CPU, set the buffer as used. */
2664  	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2665  			 NULL, true);
2666  
2667  	/* Wait for all current users to finish */
2668  	synchronize_rcu();
2669  
2670  	for_each_tracing_cpu(cpu) {
2671  		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2672  		per_cpu(trace_buffered_event, cpu) = NULL;
2673  	}
2674  
2675  	/*
2676  	 * Wait for all CPUs that potentially started checking if they can use
2677  	 * their event buffer only after the previous synchronize_rcu() call and
2678  	 * they still read a valid pointer from trace_buffered_event. It must be
2679  	 * ensured they don't see cleared trace_buffered_event_cnt else they
2680  	 * could wrongly decide to use the pointed-to buffer which is now freed.
2681  	 */
2682  	synchronize_rcu();
2683  
2684  	/* For each CPU, relinquish the buffer */
2685  	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2686  			 true);
2687  }
2688  
2689  static struct trace_buffer *temp_buffer;
2690  
2691  struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2692  trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2693  			  struct trace_event_file *trace_file,
2694  			  int type, unsigned long len,
2695  			  unsigned int trace_ctx)
2696  {
2697  	struct ring_buffer_event *entry;
2698  	struct trace_array *tr = trace_file->tr;
2699  	int val;
2700  
2701  	*current_rb = tr->array_buffer.buffer;
2702  
2703  	if (!tr->no_filter_buffering_ref &&
2704  	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2705  		preempt_disable_notrace();
2706  		/*
2707  		 * Filtering is on, so try to use the per cpu buffer first.
2708  		 * This buffer will simulate a ring_buffer_event,
2709  		 * where the type_len is zero and the array[0] will
2710  		 * hold the full length.
2711  		 * (see include/linux/ring-buffer.h for details on
2712  		 *  how the ring_buffer_event is structured).
2713  		 *
2714  		 * Using a temp buffer during filtering and copying it
2715  		 * on a matched filter is quicker than writing directly
2716  		 * into the ring buffer and then discarding it when
2717  		 * it doesn't match. That is because the discard
2718  		 * requires several atomic operations to get right.
2719  		 * Copying on match and doing nothing on a failed match
2720  		 * is still quicker than no copy on match, but having
2721  		 * to discard out of the ring buffer on a failed match.
2722  		 */
2723  		if ((entry = __this_cpu_read(trace_buffered_event))) {
2724  			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2725  
2726  			val = this_cpu_inc_return(trace_buffered_event_cnt);
2727  
2728  			/*
2729  			 * Preemption is disabled, but interrupts and NMIs
2730  			 * can still come in now. If that happens after
2731  			 * the above increment, then it will have to go
2732  			 * back to the old method of allocating the event
2733  			 * on the ring buffer, and if the filter fails, it
2734  			 * will have to call ring_buffer_discard_commit()
2735  			 * to remove it.
2736  			 *
2737  			 * Need to also check the unlikely case that the
2738  			 * length is bigger than the temp buffer size.
2739  			 * If that happens, then the reserve is pretty much
2740  			 * guaranteed to fail, as the ring buffer currently
2741  			 * only allows events less than a page. But that may
2742  			 * change in the future, so let the ring buffer reserve
2743  			 * handle the failure in that case.
2744  			 */
2745  			if (val == 1 && likely(len <= max_len)) {
2746  				trace_event_setup(entry, type, trace_ctx);
2747  				entry->array[0] = len;
2748  				/* Return with preemption disabled */
2749  				return entry;
2750  			}
2751  			this_cpu_dec(trace_buffered_event_cnt);
2752  		}
2753  		/* __trace_buffer_lock_reserve() disables preemption */
2754  		preempt_enable_notrace();
2755  	}
2756  
2757  	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2758  					    trace_ctx);
2759  	/*
2760  	 * If tracing is off, but we have triggers enabled
2761  	 * we still need to look at the event data. Use the temp_buffer
2762  	 * to store the trace event for the trigger to use. It's recursive
2763  	 * safe and will not be recorded anywhere.
2764  	 */
2765  	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2766  		*current_rb = temp_buffer;
2767  		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2768  						    trace_ctx);
2769  	}
2770  	return entry;
2771  }
2772  EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2773  
2774  static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2775  static DEFINE_MUTEX(tracepoint_printk_mutex);
2776  
output_printk(struct trace_event_buffer * fbuffer)2777  static void output_printk(struct trace_event_buffer *fbuffer)
2778  {
2779  	struct trace_event_call *event_call;
2780  	struct trace_event_file *file;
2781  	struct trace_event *event;
2782  	unsigned long flags;
2783  	struct trace_iterator *iter = tracepoint_print_iter;
2784  
2785  	/* We should never get here if iter is NULL */
2786  	if (WARN_ON_ONCE(!iter))
2787  		return;
2788  
2789  	event_call = fbuffer->trace_file->event_call;
2790  	if (!event_call || !event_call->event.funcs ||
2791  	    !event_call->event.funcs->trace)
2792  		return;
2793  
2794  	file = fbuffer->trace_file;
2795  	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2796  	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2797  	     !filter_match_preds(file->filter, fbuffer->entry)))
2798  		return;
2799  
2800  	event = &fbuffer->trace_file->event_call->event;
2801  
2802  	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2803  	trace_seq_init(&iter->seq);
2804  	iter->ent = fbuffer->entry;
2805  	event_call->event.funcs->trace(iter, 0, event);
2806  	trace_seq_putc(&iter->seq, 0);
2807  	printk("%s", iter->seq.buffer);
2808  
2809  	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2810  }
2811  
tracepoint_printk_sysctl(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2812  int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2813  			     void *buffer, size_t *lenp,
2814  			     loff_t *ppos)
2815  {
2816  	int save_tracepoint_printk;
2817  	int ret;
2818  
2819  	mutex_lock(&tracepoint_printk_mutex);
2820  	save_tracepoint_printk = tracepoint_printk;
2821  
2822  	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2823  
2824  	/*
2825  	 * This will force exiting early, as tracepoint_printk
2826  	 * is always zero when tracepoint_printk_iter is not allocated
2827  	 */
2828  	if (!tracepoint_print_iter)
2829  		tracepoint_printk = 0;
2830  
2831  	if (save_tracepoint_printk == tracepoint_printk)
2832  		goto out;
2833  
2834  	if (tracepoint_printk)
2835  		static_key_enable(&tracepoint_printk_key.key);
2836  	else
2837  		static_key_disable(&tracepoint_printk_key.key);
2838  
2839   out:
2840  	mutex_unlock(&tracepoint_printk_mutex);
2841  
2842  	return ret;
2843  }
2844  
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2845  void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2846  {
2847  	enum event_trigger_type tt = ETT_NONE;
2848  	struct trace_event_file *file = fbuffer->trace_file;
2849  
2850  	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2851  			fbuffer->entry, &tt))
2852  		goto discard;
2853  
2854  	if (static_key_false(&tracepoint_printk_key.key))
2855  		output_printk(fbuffer);
2856  
2857  	if (static_branch_unlikely(&trace_event_exports_enabled))
2858  		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2859  
2860  	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2861  			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2862  
2863  discard:
2864  	if (tt)
2865  		event_triggers_post_call(file, tt);
2866  
2867  }
2868  EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2869  
2870  /*
2871   * Skip 3:
2872   *
2873   *   trace_buffer_unlock_commit_regs()
2874   *   trace_event_buffer_commit()
2875   *   trace_event_raw_event_xxx()
2876   */
2877  # define STACK_SKIP 3
2878  
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)2879  void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2880  				     struct trace_buffer *buffer,
2881  				     struct ring_buffer_event *event,
2882  				     unsigned int trace_ctx,
2883  				     struct pt_regs *regs)
2884  {
2885  	__buffer_unlock_commit(buffer, event);
2886  
2887  	/*
2888  	 * If regs is not set, then skip the necessary functions.
2889  	 * Note, we can still get here via blktrace, wakeup tracer
2890  	 * and mmiotrace, but that's ok if they lose a function or
2891  	 * two. They are not that meaningful.
2892  	 */
2893  	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2894  	ftrace_trace_userstack(tr, buffer, trace_ctx);
2895  }
2896  
2897  /*
2898   * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2899   */
2900  void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2901  trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2902  				   struct ring_buffer_event *event)
2903  {
2904  	__buffer_unlock_commit(buffer, event);
2905  }
2906  
2907  void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx)2908  trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2909  	       parent_ip, unsigned int trace_ctx)
2910  {
2911  	struct trace_event_call *call = &event_function;
2912  	struct trace_buffer *buffer = tr->array_buffer.buffer;
2913  	struct ring_buffer_event *event;
2914  	struct ftrace_entry *entry;
2915  
2916  	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2917  					    trace_ctx);
2918  	if (!event)
2919  		return;
2920  	entry	= ring_buffer_event_data(event);
2921  	entry->ip			= ip;
2922  	entry->parent_ip		= parent_ip;
2923  
2924  	if (!call_filter_check_discard(call, entry, buffer, event)) {
2925  		if (static_branch_unlikely(&trace_function_exports_enabled))
2926  			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2927  		__buffer_unlock_commit(buffer, event);
2928  	}
2929  }
2930  
2931  #ifdef CONFIG_STACKTRACE
2932  
2933  /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2934  #define FTRACE_KSTACK_NESTING	4
2935  
2936  #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2937  
2938  struct ftrace_stack {
2939  	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2940  };
2941  
2942  
2943  struct ftrace_stacks {
2944  	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2945  };
2946  
2947  static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2948  static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2949  
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)2950  static void __ftrace_trace_stack(struct trace_buffer *buffer,
2951  				 unsigned int trace_ctx,
2952  				 int skip, struct pt_regs *regs)
2953  {
2954  	struct trace_event_call *call = &event_kernel_stack;
2955  	struct ring_buffer_event *event;
2956  	unsigned int size, nr_entries;
2957  	struct ftrace_stack *fstack;
2958  	struct stack_entry *entry;
2959  	int stackidx;
2960  
2961  	/*
2962  	 * Add one, for this function and the call to save_stack_trace()
2963  	 * If regs is set, then these functions will not be in the way.
2964  	 */
2965  #ifndef CONFIG_UNWINDER_ORC
2966  	if (!regs)
2967  		skip++;
2968  #endif
2969  
2970  	preempt_disable_notrace();
2971  
2972  	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2973  
2974  	/* This should never happen. If it does, yell once and skip */
2975  	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2976  		goto out;
2977  
2978  	/*
2979  	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2980  	 * interrupt will either see the value pre increment or post
2981  	 * increment. If the interrupt happens pre increment it will have
2982  	 * restored the counter when it returns.  We just need a barrier to
2983  	 * keep gcc from moving things around.
2984  	 */
2985  	barrier();
2986  
2987  	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2988  	size = ARRAY_SIZE(fstack->calls);
2989  
2990  	if (regs) {
2991  		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2992  						   size, skip);
2993  	} else {
2994  		nr_entries = stack_trace_save(fstack->calls, size, skip);
2995  	}
2996  
2997  	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2998  				    struct_size(entry, caller, nr_entries),
2999  				    trace_ctx);
3000  	if (!event)
3001  		goto out;
3002  	entry = ring_buffer_event_data(event);
3003  
3004  	entry->size = nr_entries;
3005  	memcpy(&entry->caller, fstack->calls,
3006  	       flex_array_size(entry, caller, nr_entries));
3007  
3008  	if (!call_filter_check_discard(call, entry, buffer, event))
3009  		__buffer_unlock_commit(buffer, event);
3010  
3011   out:
3012  	/* Again, don't let gcc optimize things here */
3013  	barrier();
3014  	__this_cpu_dec(ftrace_stack_reserve);
3015  	preempt_enable_notrace();
3016  
3017  }
3018  
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3019  static inline void ftrace_trace_stack(struct trace_array *tr,
3020  				      struct trace_buffer *buffer,
3021  				      unsigned int trace_ctx,
3022  				      int skip, struct pt_regs *regs)
3023  {
3024  	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3025  		return;
3026  
3027  	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3028  }
3029  
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)3030  void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3031  		   int skip)
3032  {
3033  	struct trace_buffer *buffer = tr->array_buffer.buffer;
3034  
3035  	if (rcu_is_watching()) {
3036  		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3037  		return;
3038  	}
3039  
3040  	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3041  		return;
3042  
3043  	/*
3044  	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3045  	 * but if the above rcu_is_watching() failed, then the NMI
3046  	 * triggered someplace critical, and ct_irq_enter() should
3047  	 * not be called from NMI.
3048  	 */
3049  	if (unlikely(in_nmi()))
3050  		return;
3051  
3052  	ct_irq_enter_irqson();
3053  	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3054  	ct_irq_exit_irqson();
3055  }
3056  
3057  /**
3058   * trace_dump_stack - record a stack back trace in the trace buffer
3059   * @skip: Number of functions to skip (helper handlers)
3060   */
trace_dump_stack(int skip)3061  void trace_dump_stack(int skip)
3062  {
3063  	if (tracing_disabled || tracing_selftest_running)
3064  		return;
3065  
3066  #ifndef CONFIG_UNWINDER_ORC
3067  	/* Skip 1 to skip this function. */
3068  	skip++;
3069  #endif
3070  	__ftrace_trace_stack(printk_trace->array_buffer.buffer,
3071  			     tracing_gen_ctx(), skip, NULL);
3072  }
3073  EXPORT_SYMBOL_GPL(trace_dump_stack);
3074  
3075  #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3076  static DEFINE_PER_CPU(int, user_stack_count);
3077  
3078  static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3079  ftrace_trace_userstack(struct trace_array *tr,
3080  		       struct trace_buffer *buffer, unsigned int trace_ctx)
3081  {
3082  	struct trace_event_call *call = &event_user_stack;
3083  	struct ring_buffer_event *event;
3084  	struct userstack_entry *entry;
3085  
3086  	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3087  		return;
3088  
3089  	/*
3090  	 * NMIs can not handle page faults, even with fix ups.
3091  	 * The save user stack can (and often does) fault.
3092  	 */
3093  	if (unlikely(in_nmi()))
3094  		return;
3095  
3096  	/*
3097  	 * prevent recursion, since the user stack tracing may
3098  	 * trigger other kernel events.
3099  	 */
3100  	preempt_disable();
3101  	if (__this_cpu_read(user_stack_count))
3102  		goto out;
3103  
3104  	__this_cpu_inc(user_stack_count);
3105  
3106  	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3107  					    sizeof(*entry), trace_ctx);
3108  	if (!event)
3109  		goto out_drop_count;
3110  	entry	= ring_buffer_event_data(event);
3111  
3112  	entry->tgid		= current->tgid;
3113  	memset(&entry->caller, 0, sizeof(entry->caller));
3114  
3115  	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3116  	if (!call_filter_check_discard(call, entry, buffer, event))
3117  		__buffer_unlock_commit(buffer, event);
3118  
3119   out_drop_count:
3120  	__this_cpu_dec(user_stack_count);
3121   out:
3122  	preempt_enable();
3123  }
3124  #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3125  static void ftrace_trace_userstack(struct trace_array *tr,
3126  				   struct trace_buffer *buffer,
3127  				   unsigned int trace_ctx)
3128  {
3129  }
3130  #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3131  
3132  #endif /* CONFIG_STACKTRACE */
3133  
3134  static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)3135  func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3136  			  unsigned long long delta)
3137  {
3138  	entry->bottom_delta_ts = delta & U32_MAX;
3139  	entry->top_delta_ts = (delta >> 32);
3140  }
3141  
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)3142  void trace_last_func_repeats(struct trace_array *tr,
3143  			     struct trace_func_repeats *last_info,
3144  			     unsigned int trace_ctx)
3145  {
3146  	struct trace_buffer *buffer = tr->array_buffer.buffer;
3147  	struct func_repeats_entry *entry;
3148  	struct ring_buffer_event *event;
3149  	u64 delta;
3150  
3151  	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3152  					    sizeof(*entry), trace_ctx);
3153  	if (!event)
3154  		return;
3155  
3156  	delta = ring_buffer_event_time_stamp(buffer, event) -
3157  		last_info->ts_last_call;
3158  
3159  	entry = ring_buffer_event_data(event);
3160  	entry->ip = last_info->ip;
3161  	entry->parent_ip = last_info->parent_ip;
3162  	entry->count = last_info->count;
3163  	func_repeats_set_delta_ts(entry, delta);
3164  
3165  	__buffer_unlock_commit(buffer, event);
3166  }
3167  
3168  /* created for use with alloc_percpu */
3169  struct trace_buffer_struct {
3170  	int nesting;
3171  	char buffer[4][TRACE_BUF_SIZE];
3172  };
3173  
3174  static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3175  
3176  /*
3177   * This allows for lockless recording.  If we're nested too deeply, then
3178   * this returns NULL.
3179   */
get_trace_buf(void)3180  static char *get_trace_buf(void)
3181  {
3182  	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3183  
3184  	if (!trace_percpu_buffer || buffer->nesting >= 4)
3185  		return NULL;
3186  
3187  	buffer->nesting++;
3188  
3189  	/* Interrupts must see nesting incremented before we use the buffer */
3190  	barrier();
3191  	return &buffer->buffer[buffer->nesting - 1][0];
3192  }
3193  
put_trace_buf(void)3194  static void put_trace_buf(void)
3195  {
3196  	/* Don't let the decrement of nesting leak before this */
3197  	barrier();
3198  	this_cpu_dec(trace_percpu_buffer->nesting);
3199  }
3200  
alloc_percpu_trace_buffer(void)3201  static int alloc_percpu_trace_buffer(void)
3202  {
3203  	struct trace_buffer_struct __percpu *buffers;
3204  
3205  	if (trace_percpu_buffer)
3206  		return 0;
3207  
3208  	buffers = alloc_percpu(struct trace_buffer_struct);
3209  	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3210  		return -ENOMEM;
3211  
3212  	trace_percpu_buffer = buffers;
3213  	return 0;
3214  }
3215  
3216  static int buffers_allocated;
3217  
trace_printk_init_buffers(void)3218  void trace_printk_init_buffers(void)
3219  {
3220  	if (buffers_allocated)
3221  		return;
3222  
3223  	if (alloc_percpu_trace_buffer())
3224  		return;
3225  
3226  	/* trace_printk() is for debug use only. Don't use it in production. */
3227  
3228  	pr_warn("\n");
3229  	pr_warn("**********************************************************\n");
3230  	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3231  	pr_warn("**                                                      **\n");
3232  	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3233  	pr_warn("**                                                      **\n");
3234  	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3235  	pr_warn("** unsafe for production use.                           **\n");
3236  	pr_warn("**                                                      **\n");
3237  	pr_warn("** If you see this message and you are not debugging    **\n");
3238  	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3239  	pr_warn("**                                                      **\n");
3240  	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3241  	pr_warn("**********************************************************\n");
3242  
3243  	/* Expand the buffers to set size */
3244  	tracing_update_buffers(&global_trace);
3245  
3246  	buffers_allocated = 1;
3247  
3248  	/*
3249  	 * trace_printk_init_buffers() can be called by modules.
3250  	 * If that happens, then we need to start cmdline recording
3251  	 * directly here. If the global_trace.buffer is already
3252  	 * allocated here, then this was called by module code.
3253  	 */
3254  	if (global_trace.array_buffer.buffer)
3255  		tracing_start_cmdline_record();
3256  }
3257  EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3258  
trace_printk_start_comm(void)3259  void trace_printk_start_comm(void)
3260  {
3261  	/* Start tracing comms if trace printk is set */
3262  	if (!buffers_allocated)
3263  		return;
3264  	tracing_start_cmdline_record();
3265  }
3266  
trace_printk_start_stop_comm(int enabled)3267  static void trace_printk_start_stop_comm(int enabled)
3268  {
3269  	if (!buffers_allocated)
3270  		return;
3271  
3272  	if (enabled)
3273  		tracing_start_cmdline_record();
3274  	else
3275  		tracing_stop_cmdline_record();
3276  }
3277  
3278  /**
3279   * trace_vbprintk - write binary msg to tracing buffer
3280   * @ip:    The address of the caller
3281   * @fmt:   The string format to write to the buffer
3282   * @args:  Arguments for @fmt
3283   */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3284  int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3285  {
3286  	struct trace_event_call *call = &event_bprint;
3287  	struct ring_buffer_event *event;
3288  	struct trace_buffer *buffer;
3289  	struct trace_array *tr = READ_ONCE(printk_trace);
3290  	struct bprint_entry *entry;
3291  	unsigned int trace_ctx;
3292  	char *tbuffer;
3293  	int len = 0, size;
3294  
3295  	if (!printk_binsafe(tr))
3296  		return trace_vprintk(ip, fmt, args);
3297  
3298  	if (unlikely(tracing_selftest_running || tracing_disabled))
3299  		return 0;
3300  
3301  	/* Don't pollute graph traces with trace_vprintk internals */
3302  	pause_graph_tracing();
3303  
3304  	trace_ctx = tracing_gen_ctx();
3305  	preempt_disable_notrace();
3306  
3307  	tbuffer = get_trace_buf();
3308  	if (!tbuffer) {
3309  		len = 0;
3310  		goto out_nobuffer;
3311  	}
3312  
3313  	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3314  
3315  	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3316  		goto out_put;
3317  
3318  	size = sizeof(*entry) + sizeof(u32) * len;
3319  	buffer = tr->array_buffer.buffer;
3320  	ring_buffer_nest_start(buffer);
3321  	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3322  					    trace_ctx);
3323  	if (!event)
3324  		goto out;
3325  	entry = ring_buffer_event_data(event);
3326  	entry->ip			= ip;
3327  	entry->fmt			= fmt;
3328  
3329  	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3330  	if (!call_filter_check_discard(call, entry, buffer, event)) {
3331  		__buffer_unlock_commit(buffer, event);
3332  		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3333  	}
3334  
3335  out:
3336  	ring_buffer_nest_end(buffer);
3337  out_put:
3338  	put_trace_buf();
3339  
3340  out_nobuffer:
3341  	preempt_enable_notrace();
3342  	unpause_graph_tracing();
3343  
3344  	return len;
3345  }
3346  EXPORT_SYMBOL_GPL(trace_vbprintk);
3347  
3348  __printf(3, 0)
3349  static int
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3350  __trace_array_vprintk(struct trace_buffer *buffer,
3351  		      unsigned long ip, const char *fmt, va_list args)
3352  {
3353  	struct trace_event_call *call = &event_print;
3354  	struct ring_buffer_event *event;
3355  	int len = 0, size;
3356  	struct print_entry *entry;
3357  	unsigned int trace_ctx;
3358  	char *tbuffer;
3359  
3360  	if (tracing_disabled)
3361  		return 0;
3362  
3363  	/* Don't pollute graph traces with trace_vprintk internals */
3364  	pause_graph_tracing();
3365  
3366  	trace_ctx = tracing_gen_ctx();
3367  	preempt_disable_notrace();
3368  
3369  
3370  	tbuffer = get_trace_buf();
3371  	if (!tbuffer) {
3372  		len = 0;
3373  		goto out_nobuffer;
3374  	}
3375  
3376  	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3377  
3378  	size = sizeof(*entry) + len + 1;
3379  	ring_buffer_nest_start(buffer);
3380  	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3381  					    trace_ctx);
3382  	if (!event)
3383  		goto out;
3384  	entry = ring_buffer_event_data(event);
3385  	entry->ip = ip;
3386  
3387  	memcpy(&entry->buf, tbuffer, len + 1);
3388  	if (!call_filter_check_discard(call, entry, buffer, event)) {
3389  		__buffer_unlock_commit(buffer, event);
3390  		ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3391  	}
3392  
3393  out:
3394  	ring_buffer_nest_end(buffer);
3395  	put_trace_buf();
3396  
3397  out_nobuffer:
3398  	preempt_enable_notrace();
3399  	unpause_graph_tracing();
3400  
3401  	return len;
3402  }
3403  
3404  __printf(3, 0)
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3405  int trace_array_vprintk(struct trace_array *tr,
3406  			unsigned long ip, const char *fmt, va_list args)
3407  {
3408  	if (tracing_selftest_running && tr == &global_trace)
3409  		return 0;
3410  
3411  	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3412  }
3413  
3414  /**
3415   * trace_array_printk - Print a message to a specific instance
3416   * @tr: The instance trace_array descriptor
3417   * @ip: The instruction pointer that this is called from.
3418   * @fmt: The format to print (printf format)
3419   *
3420   * If a subsystem sets up its own instance, they have the right to
3421   * printk strings into their tracing instance buffer using this
3422   * function. Note, this function will not write into the top level
3423   * buffer (use trace_printk() for that), as writing into the top level
3424   * buffer should only have events that can be individually disabled.
3425   * trace_printk() is only used for debugging a kernel, and should not
3426   * be ever incorporated in normal use.
3427   *
3428   * trace_array_printk() can be used, as it will not add noise to the
3429   * top level tracing buffer.
3430   *
3431   * Note, trace_array_init_printk() must be called on @tr before this
3432   * can be used.
3433   */
3434  __printf(3, 0)
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3435  int trace_array_printk(struct trace_array *tr,
3436  		       unsigned long ip, const char *fmt, ...)
3437  {
3438  	int ret;
3439  	va_list ap;
3440  
3441  	if (!tr)
3442  		return -ENOENT;
3443  
3444  	/* This is only allowed for created instances */
3445  	if (tr == &global_trace)
3446  		return 0;
3447  
3448  	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3449  		return 0;
3450  
3451  	va_start(ap, fmt);
3452  	ret = trace_array_vprintk(tr, ip, fmt, ap);
3453  	va_end(ap);
3454  	return ret;
3455  }
3456  EXPORT_SYMBOL_GPL(trace_array_printk);
3457  
3458  /**
3459   * trace_array_init_printk - Initialize buffers for trace_array_printk()
3460   * @tr: The trace array to initialize the buffers for
3461   *
3462   * As trace_array_printk() only writes into instances, they are OK to
3463   * have in the kernel (unlike trace_printk()). This needs to be called
3464   * before trace_array_printk() can be used on a trace_array.
3465   */
trace_array_init_printk(struct trace_array * tr)3466  int trace_array_init_printk(struct trace_array *tr)
3467  {
3468  	if (!tr)
3469  		return -ENOENT;
3470  
3471  	/* This is only allowed for created instances */
3472  	if (tr == &global_trace)
3473  		return -EINVAL;
3474  
3475  	return alloc_percpu_trace_buffer();
3476  }
3477  EXPORT_SYMBOL_GPL(trace_array_init_printk);
3478  
3479  __printf(3, 4)
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3480  int trace_array_printk_buf(struct trace_buffer *buffer,
3481  			   unsigned long ip, const char *fmt, ...)
3482  {
3483  	int ret;
3484  	va_list ap;
3485  
3486  	if (!(printk_trace->trace_flags & TRACE_ITER_PRINTK))
3487  		return 0;
3488  
3489  	va_start(ap, fmt);
3490  	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3491  	va_end(ap);
3492  	return ret;
3493  }
3494  
3495  __printf(2, 0)
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3496  int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3497  {
3498  	return trace_array_vprintk(printk_trace, ip, fmt, args);
3499  }
3500  EXPORT_SYMBOL_GPL(trace_vprintk);
3501  
trace_iterator_increment(struct trace_iterator * iter)3502  static void trace_iterator_increment(struct trace_iterator *iter)
3503  {
3504  	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3505  
3506  	iter->idx++;
3507  	if (buf_iter)
3508  		ring_buffer_iter_advance(buf_iter);
3509  }
3510  
3511  static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3512  peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3513  		unsigned long *lost_events)
3514  {
3515  	struct ring_buffer_event *event;
3516  	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3517  
3518  	if (buf_iter) {
3519  		event = ring_buffer_iter_peek(buf_iter, ts);
3520  		if (lost_events)
3521  			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3522  				(unsigned long)-1 : 0;
3523  	} else {
3524  		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3525  					 lost_events);
3526  	}
3527  
3528  	if (event) {
3529  		iter->ent_size = ring_buffer_event_length(event);
3530  		return ring_buffer_event_data(event);
3531  	}
3532  	iter->ent_size = 0;
3533  	return NULL;
3534  }
3535  
3536  static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3537  __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3538  		  unsigned long *missing_events, u64 *ent_ts)
3539  {
3540  	struct trace_buffer *buffer = iter->array_buffer->buffer;
3541  	struct trace_entry *ent, *next = NULL;
3542  	unsigned long lost_events = 0, next_lost = 0;
3543  	int cpu_file = iter->cpu_file;
3544  	u64 next_ts = 0, ts;
3545  	int next_cpu = -1;
3546  	int next_size = 0;
3547  	int cpu;
3548  
3549  	/*
3550  	 * If we are in a per_cpu trace file, don't bother by iterating over
3551  	 * all cpu and peek directly.
3552  	 */
3553  	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3554  		if (ring_buffer_empty_cpu(buffer, cpu_file))
3555  			return NULL;
3556  		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3557  		if (ent_cpu)
3558  			*ent_cpu = cpu_file;
3559  
3560  		return ent;
3561  	}
3562  
3563  	for_each_tracing_cpu(cpu) {
3564  
3565  		if (ring_buffer_empty_cpu(buffer, cpu))
3566  			continue;
3567  
3568  		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3569  
3570  		/*
3571  		 * Pick the entry with the smallest timestamp:
3572  		 */
3573  		if (ent && (!next || ts < next_ts)) {
3574  			next = ent;
3575  			next_cpu = cpu;
3576  			next_ts = ts;
3577  			next_lost = lost_events;
3578  			next_size = iter->ent_size;
3579  		}
3580  	}
3581  
3582  	iter->ent_size = next_size;
3583  
3584  	if (ent_cpu)
3585  		*ent_cpu = next_cpu;
3586  
3587  	if (ent_ts)
3588  		*ent_ts = next_ts;
3589  
3590  	if (missing_events)
3591  		*missing_events = next_lost;
3592  
3593  	return next;
3594  }
3595  
3596  #define STATIC_FMT_BUF_SIZE	128
3597  static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3598  
trace_iter_expand_format(struct trace_iterator * iter)3599  char *trace_iter_expand_format(struct trace_iterator *iter)
3600  {
3601  	char *tmp;
3602  
3603  	/*
3604  	 * iter->tr is NULL when used with tp_printk, which makes
3605  	 * this get called where it is not safe to call krealloc().
3606  	 */
3607  	if (!iter->tr || iter->fmt == static_fmt_buf)
3608  		return NULL;
3609  
3610  	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3611  		       GFP_KERNEL);
3612  	if (tmp) {
3613  		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3614  		iter->fmt = tmp;
3615  	}
3616  
3617  	return tmp;
3618  }
3619  
3620  /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str,bool star,int len)3621  static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3622  			   bool star, int len)
3623  {
3624  	unsigned long addr = (unsigned long)str;
3625  	struct trace_event *trace_event;
3626  	struct trace_event_call *event;
3627  
3628  	/* Ignore strings with no length */
3629  	if (star && !len)
3630  		return true;
3631  
3632  	/* OK if part of the event data */
3633  	if ((addr >= (unsigned long)iter->ent) &&
3634  	    (addr < (unsigned long)iter->ent + iter->ent_size))
3635  		return true;
3636  
3637  	/* OK if part of the temp seq buffer */
3638  	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3639  	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3640  		return true;
3641  
3642  	/* Core rodata can not be freed */
3643  	if (is_kernel_rodata(addr))
3644  		return true;
3645  
3646  	if (trace_is_tracepoint_string(str))
3647  		return true;
3648  
3649  	/*
3650  	 * Now this could be a module event, referencing core module
3651  	 * data, which is OK.
3652  	 */
3653  	if (!iter->ent)
3654  		return false;
3655  
3656  	trace_event = ftrace_find_event(iter->ent->type);
3657  	if (!trace_event)
3658  		return false;
3659  
3660  	event = container_of(trace_event, struct trace_event_call, event);
3661  	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3662  		return false;
3663  
3664  	/* Would rather have rodata, but this will suffice */
3665  	if (within_module_core(addr, event->module))
3666  		return true;
3667  
3668  	return false;
3669  }
3670  
3671  static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3672  
test_can_verify_check(const char * fmt,...)3673  static int test_can_verify_check(const char *fmt, ...)
3674  {
3675  	char buf[16];
3676  	va_list ap;
3677  	int ret;
3678  
3679  	/*
3680  	 * The verifier is dependent on vsnprintf() modifies the va_list
3681  	 * passed to it, where it is sent as a reference. Some architectures
3682  	 * (like x86_32) passes it by value, which means that vsnprintf()
3683  	 * does not modify the va_list passed to it, and the verifier
3684  	 * would then need to be able to understand all the values that
3685  	 * vsnprintf can use. If it is passed by value, then the verifier
3686  	 * is disabled.
3687  	 */
3688  	va_start(ap, fmt);
3689  	vsnprintf(buf, 16, "%d", ap);
3690  	ret = va_arg(ap, int);
3691  	va_end(ap);
3692  
3693  	return ret;
3694  }
3695  
test_can_verify(void)3696  static void test_can_verify(void)
3697  {
3698  	if (!test_can_verify_check("%d %d", 0, 1)) {
3699  		pr_info("trace event string verifier disabled\n");
3700  		static_branch_inc(&trace_no_verify);
3701  	}
3702  }
3703  
3704  /**
3705   * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3706   * @iter: The iterator that holds the seq buffer and the event being printed
3707   * @fmt: The format used to print the event
3708   * @ap: The va_list holding the data to print from @fmt.
3709   *
3710   * This writes the data into the @iter->seq buffer using the data from
3711   * @fmt and @ap. If the format has a %s, then the source of the string
3712   * is examined to make sure it is safe to print, otherwise it will
3713   * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3714   * pointer.
3715   */
trace_check_vprintf(struct trace_iterator * iter,const char * fmt,va_list ap)3716  void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3717  			 va_list ap)
3718  {
3719  	long text_delta = 0;
3720  	long data_delta = 0;
3721  	const char *p = fmt;
3722  	const char *str;
3723  	bool good;
3724  	int i, j;
3725  
3726  	if (WARN_ON_ONCE(!fmt))
3727  		return;
3728  
3729  	if (static_branch_unlikely(&trace_no_verify))
3730  		goto print;
3731  
3732  	/*
3733  	 * When the kernel is booted with the tp_printk command line
3734  	 * parameter, trace events go directly through to printk().
3735  	 * It also is checked by this function, but it does not
3736  	 * have an associated trace_array (tr) for it.
3737  	 */
3738  	if (iter->tr) {
3739  		text_delta = iter->tr->text_delta;
3740  		data_delta = iter->tr->data_delta;
3741  	}
3742  
3743  	/* Don't bother checking when doing a ftrace_dump() */
3744  	if (iter->fmt == static_fmt_buf)
3745  		goto print;
3746  
3747  	while (*p) {
3748  		bool star = false;
3749  		int len = 0;
3750  
3751  		j = 0;
3752  
3753  		/*
3754  		 * We only care about %s and variants
3755  		 * as well as %p[sS] if delta is non-zero
3756  		 */
3757  		for (i = 0; p[i]; i++) {
3758  			if (i + 1 >= iter->fmt_size) {
3759  				/*
3760  				 * If we can't expand the copy buffer,
3761  				 * just print it.
3762  				 */
3763  				if (!trace_iter_expand_format(iter))
3764  					goto print;
3765  			}
3766  
3767  			if (p[i] == '\\' && p[i+1]) {
3768  				i++;
3769  				continue;
3770  			}
3771  			if (p[i] == '%') {
3772  				/* Need to test cases like %08.*s */
3773  				for (j = 1; p[i+j]; j++) {
3774  					if (isdigit(p[i+j]) ||
3775  					    p[i+j] == '.')
3776  						continue;
3777  					if (p[i+j] == '*') {
3778  						star = true;
3779  						continue;
3780  					}
3781  					break;
3782  				}
3783  				if (p[i+j] == 's')
3784  					break;
3785  
3786  				if (text_delta && p[i+1] == 'p' &&
3787  				    ((p[i+2] == 's' || p[i+2] == 'S')))
3788  					break;
3789  
3790  				star = false;
3791  			}
3792  			j = 0;
3793  		}
3794  		/* If no %s found then just print normally */
3795  		if (!p[i])
3796  			break;
3797  
3798  		/* Copy up to the %s, and print that */
3799  		strncpy(iter->fmt, p, i);
3800  		iter->fmt[i] = '\0';
3801  		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3802  
3803  		/* Add delta to %pS pointers */
3804  		if (p[i+1] == 'p') {
3805  			unsigned long addr;
3806  			char fmt[4];
3807  
3808  			fmt[0] = '%';
3809  			fmt[1] = 'p';
3810  			fmt[2] = p[i+2]; /* Either %ps or %pS */
3811  			fmt[3] = '\0';
3812  
3813  			addr = va_arg(ap, unsigned long);
3814  			addr += text_delta;
3815  			trace_seq_printf(&iter->seq, fmt, (void *)addr);
3816  
3817  			p += i + 3;
3818  			continue;
3819  		}
3820  
3821  		/*
3822  		 * If iter->seq is full, the above call no longer guarantees
3823  		 * that ap is in sync with fmt processing, and further calls
3824  		 * to va_arg() can return wrong positional arguments.
3825  		 *
3826  		 * Ensure that ap is no longer used in this case.
3827  		 */
3828  		if (iter->seq.full) {
3829  			p = "";
3830  			break;
3831  		}
3832  
3833  		if (star)
3834  			len = va_arg(ap, int);
3835  
3836  		/* The ap now points to the string data of the %s */
3837  		str = va_arg(ap, const char *);
3838  
3839  		good = trace_safe_str(iter, str, star, len);
3840  
3841  		/* Could be from the last boot */
3842  		if (data_delta && !good) {
3843  			str += data_delta;
3844  			good = trace_safe_str(iter, str, star, len);
3845  		}
3846  
3847  		/*
3848  		 * If you hit this warning, it is likely that the
3849  		 * trace event in question used %s on a string that
3850  		 * was saved at the time of the event, but may not be
3851  		 * around when the trace is read. Use __string(),
3852  		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3853  		 * instead. See samples/trace_events/trace-events-sample.h
3854  		 * for reference.
3855  		 */
3856  		if (WARN_ONCE(!good, "fmt: '%s' current_buffer: '%s'",
3857  			      fmt, seq_buf_str(&iter->seq.seq))) {
3858  			int ret;
3859  
3860  			/* Try to safely read the string */
3861  			if (star) {
3862  				if (len + 1 > iter->fmt_size)
3863  					len = iter->fmt_size - 1;
3864  				if (len < 0)
3865  					len = 0;
3866  				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3867  				iter->fmt[len] = 0;
3868  				star = false;
3869  			} else {
3870  				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3871  								  iter->fmt_size);
3872  			}
3873  			if (ret < 0)
3874  				trace_seq_printf(&iter->seq, "(0x%px)", str);
3875  			else
3876  				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3877  						 str, iter->fmt);
3878  			str = "[UNSAFE-MEMORY]";
3879  			strcpy(iter->fmt, "%s");
3880  		} else {
3881  			strncpy(iter->fmt, p + i, j + 1);
3882  			iter->fmt[j+1] = '\0';
3883  		}
3884  		if (star)
3885  			trace_seq_printf(&iter->seq, iter->fmt, len, str);
3886  		else
3887  			trace_seq_printf(&iter->seq, iter->fmt, str);
3888  
3889  		p += i + j + 1;
3890  	}
3891   print:
3892  	if (*p)
3893  		trace_seq_vprintf(&iter->seq, p, ap);
3894  }
3895  
trace_event_format(struct trace_iterator * iter,const char * fmt)3896  const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3897  {
3898  	const char *p, *new_fmt;
3899  	char *q;
3900  
3901  	if (WARN_ON_ONCE(!fmt))
3902  		return fmt;
3903  
3904  	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3905  		return fmt;
3906  
3907  	p = fmt;
3908  	new_fmt = q = iter->fmt;
3909  	while (*p) {
3910  		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3911  			if (!trace_iter_expand_format(iter))
3912  				return fmt;
3913  
3914  			q += iter->fmt - new_fmt;
3915  			new_fmt = iter->fmt;
3916  		}
3917  
3918  		*q++ = *p++;
3919  
3920  		/* Replace %p with %px */
3921  		if (p[-1] == '%') {
3922  			if (p[0] == '%') {
3923  				*q++ = *p++;
3924  			} else if (p[0] == 'p' && !isalnum(p[1])) {
3925  				*q++ = *p++;
3926  				*q++ = 'x';
3927  			}
3928  		}
3929  	}
3930  	*q = '\0';
3931  
3932  	return new_fmt;
3933  }
3934  
3935  #define STATIC_TEMP_BUF_SIZE	128
3936  static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3937  
3938  /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3939  struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3940  					  int *ent_cpu, u64 *ent_ts)
3941  {
3942  	/* __find_next_entry will reset ent_size */
3943  	int ent_size = iter->ent_size;
3944  	struct trace_entry *entry;
3945  
3946  	/*
3947  	 * If called from ftrace_dump(), then the iter->temp buffer
3948  	 * will be the static_temp_buf and not created from kmalloc.
3949  	 * If the entry size is greater than the buffer, we can
3950  	 * not save it. Just return NULL in that case. This is only
3951  	 * used to add markers when two consecutive events' time
3952  	 * stamps have a large delta. See trace_print_lat_context()
3953  	 */
3954  	if (iter->temp == static_temp_buf &&
3955  	    STATIC_TEMP_BUF_SIZE < ent_size)
3956  		return NULL;
3957  
3958  	/*
3959  	 * The __find_next_entry() may call peek_next_entry(), which may
3960  	 * call ring_buffer_peek() that may make the contents of iter->ent
3961  	 * undefined. Need to copy iter->ent now.
3962  	 */
3963  	if (iter->ent && iter->ent != iter->temp) {
3964  		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3965  		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3966  			void *temp;
3967  			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3968  			if (!temp)
3969  				return NULL;
3970  			kfree(iter->temp);
3971  			iter->temp = temp;
3972  			iter->temp_size = iter->ent_size;
3973  		}
3974  		memcpy(iter->temp, iter->ent, iter->ent_size);
3975  		iter->ent = iter->temp;
3976  	}
3977  	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3978  	/* Put back the original ent_size */
3979  	iter->ent_size = ent_size;
3980  
3981  	return entry;
3982  }
3983  
3984  /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3985  void *trace_find_next_entry_inc(struct trace_iterator *iter)
3986  {
3987  	iter->ent = __find_next_entry(iter, &iter->cpu,
3988  				      &iter->lost_events, &iter->ts);
3989  
3990  	if (iter->ent)
3991  		trace_iterator_increment(iter);
3992  
3993  	return iter->ent ? iter : NULL;
3994  }
3995  
trace_consume(struct trace_iterator * iter)3996  static void trace_consume(struct trace_iterator *iter)
3997  {
3998  	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3999  			    &iter->lost_events);
4000  }
4001  
s_next(struct seq_file * m,void * v,loff_t * pos)4002  static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4003  {
4004  	struct trace_iterator *iter = m->private;
4005  	int i = (int)*pos;
4006  	void *ent;
4007  
4008  	WARN_ON_ONCE(iter->leftover);
4009  
4010  	(*pos)++;
4011  
4012  	/* can't go backwards */
4013  	if (iter->idx > i)
4014  		return NULL;
4015  
4016  	if (iter->idx < 0)
4017  		ent = trace_find_next_entry_inc(iter);
4018  	else
4019  		ent = iter;
4020  
4021  	while (ent && iter->idx < i)
4022  		ent = trace_find_next_entry_inc(iter);
4023  
4024  	iter->pos = *pos;
4025  
4026  	return ent;
4027  }
4028  
tracing_iter_reset(struct trace_iterator * iter,int cpu)4029  void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4030  {
4031  	struct ring_buffer_iter *buf_iter;
4032  	unsigned long entries = 0;
4033  	u64 ts;
4034  
4035  	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4036  
4037  	buf_iter = trace_buffer_iter(iter, cpu);
4038  	if (!buf_iter)
4039  		return;
4040  
4041  	ring_buffer_iter_reset(buf_iter);
4042  
4043  	/*
4044  	 * We could have the case with the max latency tracers
4045  	 * that a reset never took place on a cpu. This is evident
4046  	 * by the timestamp being before the start of the buffer.
4047  	 */
4048  	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4049  		if (ts >= iter->array_buffer->time_start)
4050  			break;
4051  		entries++;
4052  		ring_buffer_iter_advance(buf_iter);
4053  		/* This could be a big loop */
4054  		cond_resched();
4055  	}
4056  
4057  	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4058  }
4059  
4060  /*
4061   * The current tracer is copied to avoid a global locking
4062   * all around.
4063   */
s_start(struct seq_file * m,loff_t * pos)4064  static void *s_start(struct seq_file *m, loff_t *pos)
4065  {
4066  	struct trace_iterator *iter = m->private;
4067  	struct trace_array *tr = iter->tr;
4068  	int cpu_file = iter->cpu_file;
4069  	void *p = NULL;
4070  	loff_t l = 0;
4071  	int cpu;
4072  
4073  	mutex_lock(&trace_types_lock);
4074  	if (unlikely(tr->current_trace != iter->trace)) {
4075  		/* Close iter->trace before switching to the new current tracer */
4076  		if (iter->trace->close)
4077  			iter->trace->close(iter);
4078  		iter->trace = tr->current_trace;
4079  		/* Reopen the new current tracer */
4080  		if (iter->trace->open)
4081  			iter->trace->open(iter);
4082  	}
4083  	mutex_unlock(&trace_types_lock);
4084  
4085  #ifdef CONFIG_TRACER_MAX_TRACE
4086  	if (iter->snapshot && iter->trace->use_max_tr)
4087  		return ERR_PTR(-EBUSY);
4088  #endif
4089  
4090  	if (*pos != iter->pos) {
4091  		iter->ent = NULL;
4092  		iter->cpu = 0;
4093  		iter->idx = -1;
4094  
4095  		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4096  			for_each_tracing_cpu(cpu)
4097  				tracing_iter_reset(iter, cpu);
4098  		} else
4099  			tracing_iter_reset(iter, cpu_file);
4100  
4101  		iter->leftover = 0;
4102  		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4103  			;
4104  
4105  	} else {
4106  		/*
4107  		 * If we overflowed the seq_file before, then we want
4108  		 * to just reuse the trace_seq buffer again.
4109  		 */
4110  		if (iter->leftover)
4111  			p = iter;
4112  		else {
4113  			l = *pos - 1;
4114  			p = s_next(m, p, &l);
4115  		}
4116  	}
4117  
4118  	trace_event_read_lock();
4119  	trace_access_lock(cpu_file);
4120  	return p;
4121  }
4122  
s_stop(struct seq_file * m,void * p)4123  static void s_stop(struct seq_file *m, void *p)
4124  {
4125  	struct trace_iterator *iter = m->private;
4126  
4127  #ifdef CONFIG_TRACER_MAX_TRACE
4128  	if (iter->snapshot && iter->trace->use_max_tr)
4129  		return;
4130  #endif
4131  
4132  	trace_access_unlock(iter->cpu_file);
4133  	trace_event_read_unlock();
4134  }
4135  
4136  static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)4137  get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4138  		      unsigned long *entries, int cpu)
4139  {
4140  	unsigned long count;
4141  
4142  	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4143  	/*
4144  	 * If this buffer has skipped entries, then we hold all
4145  	 * entries for the trace and we need to ignore the
4146  	 * ones before the time stamp.
4147  	 */
4148  	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4149  		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4150  		/* total is the same as the entries */
4151  		*total = count;
4152  	} else
4153  		*total = count +
4154  			ring_buffer_overrun_cpu(buf->buffer, cpu);
4155  	*entries = count;
4156  }
4157  
4158  static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)4159  get_total_entries(struct array_buffer *buf,
4160  		  unsigned long *total, unsigned long *entries)
4161  {
4162  	unsigned long t, e;
4163  	int cpu;
4164  
4165  	*total = 0;
4166  	*entries = 0;
4167  
4168  	for_each_tracing_cpu(cpu) {
4169  		get_total_entries_cpu(buf, &t, &e, cpu);
4170  		*total += t;
4171  		*entries += e;
4172  	}
4173  }
4174  
trace_total_entries_cpu(struct trace_array * tr,int cpu)4175  unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4176  {
4177  	unsigned long total, entries;
4178  
4179  	if (!tr)
4180  		tr = &global_trace;
4181  
4182  	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4183  
4184  	return entries;
4185  }
4186  
trace_total_entries(struct trace_array * tr)4187  unsigned long trace_total_entries(struct trace_array *tr)
4188  {
4189  	unsigned long total, entries;
4190  
4191  	if (!tr)
4192  		tr = &global_trace;
4193  
4194  	get_total_entries(&tr->array_buffer, &total, &entries);
4195  
4196  	return entries;
4197  }
4198  
print_lat_help_header(struct seq_file * m)4199  static void print_lat_help_header(struct seq_file *m)
4200  {
4201  	seq_puts(m, "#                    _------=> CPU#            \n"
4202  		    "#                   / _-----=> irqs-off/BH-disabled\n"
4203  		    "#                  | / _----=> need-resched    \n"
4204  		    "#                  || / _---=> hardirq/softirq \n"
4205  		    "#                  ||| / _--=> preempt-depth   \n"
4206  		    "#                  |||| / _-=> migrate-disable \n"
4207  		    "#                  ||||| /     delay           \n"
4208  		    "#  cmd     pid     |||||| time  |   caller     \n"
4209  		    "#     \\   /        ||||||  \\    |    /       \n");
4210  }
4211  
print_event_info(struct array_buffer * buf,struct seq_file * m)4212  static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4213  {
4214  	unsigned long total;
4215  	unsigned long entries;
4216  
4217  	get_total_entries(buf, &total, &entries);
4218  	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4219  		   entries, total, num_online_cpus());
4220  	seq_puts(m, "#\n");
4221  }
4222  
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4223  static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4224  				   unsigned int flags)
4225  {
4226  	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4227  
4228  	print_event_info(buf, m);
4229  
4230  	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4231  	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4232  }
4233  
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4234  static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4235  				       unsigned int flags)
4236  {
4237  	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4238  	static const char space[] = "            ";
4239  	int prec = tgid ? 12 : 2;
4240  
4241  	print_event_info(buf, m);
4242  
4243  	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4244  	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4245  	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4246  	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4247  	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4248  	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4249  	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4250  	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4251  }
4252  
4253  void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)4254  print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4255  {
4256  	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4257  	struct array_buffer *buf = iter->array_buffer;
4258  	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4259  	struct tracer *type = iter->trace;
4260  	unsigned long entries;
4261  	unsigned long total;
4262  	const char *name = type->name;
4263  
4264  	get_total_entries(buf, &total, &entries);
4265  
4266  	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4267  		   name, init_utsname()->release);
4268  	seq_puts(m, "# -----------------------------------"
4269  		 "---------------------------------\n");
4270  	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4271  		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4272  		   nsecs_to_usecs(data->saved_latency),
4273  		   entries,
4274  		   total,
4275  		   buf->cpu,
4276  		   preempt_model_none()      ? "server" :
4277  		   preempt_model_voluntary() ? "desktop" :
4278  		   preempt_model_full()      ? "preempt" :
4279  		   preempt_model_rt()        ? "preempt_rt" :
4280  		   "unknown",
4281  		   /* These are reserved for later use */
4282  		   0, 0, 0, 0);
4283  #ifdef CONFIG_SMP
4284  	seq_printf(m, " #P:%d)\n", num_online_cpus());
4285  #else
4286  	seq_puts(m, ")\n");
4287  #endif
4288  	seq_puts(m, "#    -----------------\n");
4289  	seq_printf(m, "#    | task: %.16s-%d "
4290  		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4291  		   data->comm, data->pid,
4292  		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4293  		   data->policy, data->rt_priority);
4294  	seq_puts(m, "#    -----------------\n");
4295  
4296  	if (data->critical_start) {
4297  		seq_puts(m, "#  => started at: ");
4298  		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4299  		trace_print_seq(m, &iter->seq);
4300  		seq_puts(m, "\n#  => ended at:   ");
4301  		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4302  		trace_print_seq(m, &iter->seq);
4303  		seq_puts(m, "\n#\n");
4304  	}
4305  
4306  	seq_puts(m, "#\n");
4307  }
4308  
test_cpu_buff_start(struct trace_iterator * iter)4309  static void test_cpu_buff_start(struct trace_iterator *iter)
4310  {
4311  	struct trace_seq *s = &iter->seq;
4312  	struct trace_array *tr = iter->tr;
4313  
4314  	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4315  		return;
4316  
4317  	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4318  		return;
4319  
4320  	if (cpumask_available(iter->started) &&
4321  	    cpumask_test_cpu(iter->cpu, iter->started))
4322  		return;
4323  
4324  	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4325  		return;
4326  
4327  	if (cpumask_available(iter->started))
4328  		cpumask_set_cpu(iter->cpu, iter->started);
4329  
4330  	/* Don't print started cpu buffer for the first entry of the trace */
4331  	if (iter->idx > 1)
4332  		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4333  				iter->cpu);
4334  }
4335  
print_trace_fmt(struct trace_iterator * iter)4336  static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4337  {
4338  	struct trace_array *tr = iter->tr;
4339  	struct trace_seq *s = &iter->seq;
4340  	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4341  	struct trace_entry *entry;
4342  	struct trace_event *event;
4343  
4344  	entry = iter->ent;
4345  
4346  	test_cpu_buff_start(iter);
4347  
4348  	event = ftrace_find_event(entry->type);
4349  
4350  	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4351  		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4352  			trace_print_lat_context(iter);
4353  		else
4354  			trace_print_context(iter);
4355  	}
4356  
4357  	if (trace_seq_has_overflowed(s))
4358  		return TRACE_TYPE_PARTIAL_LINE;
4359  
4360  	if (event) {
4361  		if (tr->trace_flags & TRACE_ITER_FIELDS)
4362  			return print_event_fields(iter, event);
4363  		return event->funcs->trace(iter, sym_flags, event);
4364  	}
4365  
4366  	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4367  
4368  	return trace_handle_return(s);
4369  }
4370  
print_raw_fmt(struct trace_iterator * iter)4371  static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4372  {
4373  	struct trace_array *tr = iter->tr;
4374  	struct trace_seq *s = &iter->seq;
4375  	struct trace_entry *entry;
4376  	struct trace_event *event;
4377  
4378  	entry = iter->ent;
4379  
4380  	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4381  		trace_seq_printf(s, "%d %d %llu ",
4382  				 entry->pid, iter->cpu, iter->ts);
4383  
4384  	if (trace_seq_has_overflowed(s))
4385  		return TRACE_TYPE_PARTIAL_LINE;
4386  
4387  	event = ftrace_find_event(entry->type);
4388  	if (event)
4389  		return event->funcs->raw(iter, 0, event);
4390  
4391  	trace_seq_printf(s, "%d ?\n", entry->type);
4392  
4393  	return trace_handle_return(s);
4394  }
4395  
print_hex_fmt(struct trace_iterator * iter)4396  static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4397  {
4398  	struct trace_array *tr = iter->tr;
4399  	struct trace_seq *s = &iter->seq;
4400  	unsigned char newline = '\n';
4401  	struct trace_entry *entry;
4402  	struct trace_event *event;
4403  
4404  	entry = iter->ent;
4405  
4406  	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4407  		SEQ_PUT_HEX_FIELD(s, entry->pid);
4408  		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4409  		SEQ_PUT_HEX_FIELD(s, iter->ts);
4410  		if (trace_seq_has_overflowed(s))
4411  			return TRACE_TYPE_PARTIAL_LINE;
4412  	}
4413  
4414  	event = ftrace_find_event(entry->type);
4415  	if (event) {
4416  		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4417  		if (ret != TRACE_TYPE_HANDLED)
4418  			return ret;
4419  	}
4420  
4421  	SEQ_PUT_FIELD(s, newline);
4422  
4423  	return trace_handle_return(s);
4424  }
4425  
print_bin_fmt(struct trace_iterator * iter)4426  static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4427  {
4428  	struct trace_array *tr = iter->tr;
4429  	struct trace_seq *s = &iter->seq;
4430  	struct trace_entry *entry;
4431  	struct trace_event *event;
4432  
4433  	entry = iter->ent;
4434  
4435  	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4436  		SEQ_PUT_FIELD(s, entry->pid);
4437  		SEQ_PUT_FIELD(s, iter->cpu);
4438  		SEQ_PUT_FIELD(s, iter->ts);
4439  		if (trace_seq_has_overflowed(s))
4440  			return TRACE_TYPE_PARTIAL_LINE;
4441  	}
4442  
4443  	event = ftrace_find_event(entry->type);
4444  	return event ? event->funcs->binary(iter, 0, event) :
4445  		TRACE_TYPE_HANDLED;
4446  }
4447  
trace_empty(struct trace_iterator * iter)4448  int trace_empty(struct trace_iterator *iter)
4449  {
4450  	struct ring_buffer_iter *buf_iter;
4451  	int cpu;
4452  
4453  	/* If we are looking at one CPU buffer, only check that one */
4454  	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4455  		cpu = iter->cpu_file;
4456  		buf_iter = trace_buffer_iter(iter, cpu);
4457  		if (buf_iter) {
4458  			if (!ring_buffer_iter_empty(buf_iter))
4459  				return 0;
4460  		} else {
4461  			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4462  				return 0;
4463  		}
4464  		return 1;
4465  	}
4466  
4467  	for_each_tracing_cpu(cpu) {
4468  		buf_iter = trace_buffer_iter(iter, cpu);
4469  		if (buf_iter) {
4470  			if (!ring_buffer_iter_empty(buf_iter))
4471  				return 0;
4472  		} else {
4473  			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4474  				return 0;
4475  		}
4476  	}
4477  
4478  	return 1;
4479  }
4480  
4481  /*  Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4482  enum print_line_t print_trace_line(struct trace_iterator *iter)
4483  {
4484  	struct trace_array *tr = iter->tr;
4485  	unsigned long trace_flags = tr->trace_flags;
4486  	enum print_line_t ret;
4487  
4488  	if (iter->lost_events) {
4489  		if (iter->lost_events == (unsigned long)-1)
4490  			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4491  					 iter->cpu);
4492  		else
4493  			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4494  					 iter->cpu, iter->lost_events);
4495  		if (trace_seq_has_overflowed(&iter->seq))
4496  			return TRACE_TYPE_PARTIAL_LINE;
4497  	}
4498  
4499  	if (iter->trace && iter->trace->print_line) {
4500  		ret = iter->trace->print_line(iter);
4501  		if (ret != TRACE_TYPE_UNHANDLED)
4502  			return ret;
4503  	}
4504  
4505  	if (iter->ent->type == TRACE_BPUTS &&
4506  			trace_flags & TRACE_ITER_PRINTK &&
4507  			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4508  		return trace_print_bputs_msg_only(iter);
4509  
4510  	if (iter->ent->type == TRACE_BPRINT &&
4511  			trace_flags & TRACE_ITER_PRINTK &&
4512  			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4513  		return trace_print_bprintk_msg_only(iter);
4514  
4515  	if (iter->ent->type == TRACE_PRINT &&
4516  			trace_flags & TRACE_ITER_PRINTK &&
4517  			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4518  		return trace_print_printk_msg_only(iter);
4519  
4520  	if (trace_flags & TRACE_ITER_BIN)
4521  		return print_bin_fmt(iter);
4522  
4523  	if (trace_flags & TRACE_ITER_HEX)
4524  		return print_hex_fmt(iter);
4525  
4526  	if (trace_flags & TRACE_ITER_RAW)
4527  		return print_raw_fmt(iter);
4528  
4529  	return print_trace_fmt(iter);
4530  }
4531  
trace_latency_header(struct seq_file * m)4532  void trace_latency_header(struct seq_file *m)
4533  {
4534  	struct trace_iterator *iter = m->private;
4535  	struct trace_array *tr = iter->tr;
4536  
4537  	/* print nothing if the buffers are empty */
4538  	if (trace_empty(iter))
4539  		return;
4540  
4541  	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4542  		print_trace_header(m, iter);
4543  
4544  	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4545  		print_lat_help_header(m);
4546  }
4547  
trace_default_header(struct seq_file * m)4548  void trace_default_header(struct seq_file *m)
4549  {
4550  	struct trace_iterator *iter = m->private;
4551  	struct trace_array *tr = iter->tr;
4552  	unsigned long trace_flags = tr->trace_flags;
4553  
4554  	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4555  		return;
4556  
4557  	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4558  		/* print nothing if the buffers are empty */
4559  		if (trace_empty(iter))
4560  			return;
4561  		print_trace_header(m, iter);
4562  		if (!(trace_flags & TRACE_ITER_VERBOSE))
4563  			print_lat_help_header(m);
4564  	} else {
4565  		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4566  			if (trace_flags & TRACE_ITER_IRQ_INFO)
4567  				print_func_help_header_irq(iter->array_buffer,
4568  							   m, trace_flags);
4569  			else
4570  				print_func_help_header(iter->array_buffer, m,
4571  						       trace_flags);
4572  		}
4573  	}
4574  }
4575  
test_ftrace_alive(struct seq_file * m)4576  static void test_ftrace_alive(struct seq_file *m)
4577  {
4578  	if (!ftrace_is_dead())
4579  		return;
4580  	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4581  		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4582  }
4583  
4584  #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4585  static void show_snapshot_main_help(struct seq_file *m)
4586  {
4587  	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4588  		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4589  		    "#                      Takes a snapshot of the main buffer.\n"
4590  		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4591  		    "#                      (Doesn't have to be '2' works with any number that\n"
4592  		    "#                       is not a '0' or '1')\n");
4593  }
4594  
show_snapshot_percpu_help(struct seq_file * m)4595  static void show_snapshot_percpu_help(struct seq_file *m)
4596  {
4597  	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4598  #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4599  	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4600  		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4601  #else
4602  	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4603  		    "#                     Must use main snapshot file to allocate.\n");
4604  #endif
4605  	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4606  		    "#                      (Doesn't have to be '2' works with any number that\n"
4607  		    "#                       is not a '0' or '1')\n");
4608  }
4609  
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4610  static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4611  {
4612  	if (iter->tr->allocated_snapshot)
4613  		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4614  	else
4615  		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4616  
4617  	seq_puts(m, "# Snapshot commands:\n");
4618  	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4619  		show_snapshot_main_help(m);
4620  	else
4621  		show_snapshot_percpu_help(m);
4622  }
4623  #else
4624  /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4625  static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4626  #endif
4627  
s_show(struct seq_file * m,void * v)4628  static int s_show(struct seq_file *m, void *v)
4629  {
4630  	struct trace_iterator *iter = v;
4631  	int ret;
4632  
4633  	if (iter->ent == NULL) {
4634  		if (iter->tr) {
4635  			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4636  			seq_puts(m, "#\n");
4637  			test_ftrace_alive(m);
4638  		}
4639  		if (iter->snapshot && trace_empty(iter))
4640  			print_snapshot_help(m, iter);
4641  		else if (iter->trace && iter->trace->print_header)
4642  			iter->trace->print_header(m);
4643  		else
4644  			trace_default_header(m);
4645  
4646  	} else if (iter->leftover) {
4647  		/*
4648  		 * If we filled the seq_file buffer earlier, we
4649  		 * want to just show it now.
4650  		 */
4651  		ret = trace_print_seq(m, &iter->seq);
4652  
4653  		/* ret should this time be zero, but you never know */
4654  		iter->leftover = ret;
4655  
4656  	} else {
4657  		ret = print_trace_line(iter);
4658  		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4659  			iter->seq.full = 0;
4660  			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4661  		}
4662  		ret = trace_print_seq(m, &iter->seq);
4663  		/*
4664  		 * If we overflow the seq_file buffer, then it will
4665  		 * ask us for this data again at start up.
4666  		 * Use that instead.
4667  		 *  ret is 0 if seq_file write succeeded.
4668  		 *        -1 otherwise.
4669  		 */
4670  		iter->leftover = ret;
4671  	}
4672  
4673  	return 0;
4674  }
4675  
4676  /*
4677   * Should be used after trace_array_get(), trace_types_lock
4678   * ensures that i_cdev was already initialized.
4679   */
tracing_get_cpu(struct inode * inode)4680  static inline int tracing_get_cpu(struct inode *inode)
4681  {
4682  	if (inode->i_cdev) /* See trace_create_cpu_file() */
4683  		return (long)inode->i_cdev - 1;
4684  	return RING_BUFFER_ALL_CPUS;
4685  }
4686  
4687  static const struct seq_operations tracer_seq_ops = {
4688  	.start		= s_start,
4689  	.next		= s_next,
4690  	.stop		= s_stop,
4691  	.show		= s_show,
4692  };
4693  
4694  /*
4695   * Note, as iter itself can be allocated and freed in different
4696   * ways, this function is only used to free its content, and not
4697   * the iterator itself. The only requirement to all the allocations
4698   * is that it must zero all fields (kzalloc), as freeing works with
4699   * ethier allocated content or NULL.
4700   */
free_trace_iter_content(struct trace_iterator * iter)4701  static void free_trace_iter_content(struct trace_iterator *iter)
4702  {
4703  	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4704  	if (iter->fmt != static_fmt_buf)
4705  		kfree(iter->fmt);
4706  
4707  	kfree(iter->temp);
4708  	kfree(iter->buffer_iter);
4709  	mutex_destroy(&iter->mutex);
4710  	free_cpumask_var(iter->started);
4711  }
4712  
4713  static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4714  __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4715  {
4716  	struct trace_array *tr = inode->i_private;
4717  	struct trace_iterator *iter;
4718  	int cpu;
4719  
4720  	if (tracing_disabled)
4721  		return ERR_PTR(-ENODEV);
4722  
4723  	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4724  	if (!iter)
4725  		return ERR_PTR(-ENOMEM);
4726  
4727  	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4728  				    GFP_KERNEL);
4729  	if (!iter->buffer_iter)
4730  		goto release;
4731  
4732  	/*
4733  	 * trace_find_next_entry() may need to save off iter->ent.
4734  	 * It will place it into the iter->temp buffer. As most
4735  	 * events are less than 128, allocate a buffer of that size.
4736  	 * If one is greater, then trace_find_next_entry() will
4737  	 * allocate a new buffer to adjust for the bigger iter->ent.
4738  	 * It's not critical if it fails to get allocated here.
4739  	 */
4740  	iter->temp = kmalloc(128, GFP_KERNEL);
4741  	if (iter->temp)
4742  		iter->temp_size = 128;
4743  
4744  	/*
4745  	 * trace_event_printf() may need to modify given format
4746  	 * string to replace %p with %px so that it shows real address
4747  	 * instead of hash value. However, that is only for the event
4748  	 * tracing, other tracer may not need. Defer the allocation
4749  	 * until it is needed.
4750  	 */
4751  	iter->fmt = NULL;
4752  	iter->fmt_size = 0;
4753  
4754  	mutex_lock(&trace_types_lock);
4755  	iter->trace = tr->current_trace;
4756  
4757  	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4758  		goto fail;
4759  
4760  	iter->tr = tr;
4761  
4762  #ifdef CONFIG_TRACER_MAX_TRACE
4763  	/* Currently only the top directory has a snapshot */
4764  	if (tr->current_trace->print_max || snapshot)
4765  		iter->array_buffer = &tr->max_buffer;
4766  	else
4767  #endif
4768  		iter->array_buffer = &tr->array_buffer;
4769  	iter->snapshot = snapshot;
4770  	iter->pos = -1;
4771  	iter->cpu_file = tracing_get_cpu(inode);
4772  	mutex_init(&iter->mutex);
4773  
4774  	/* Notify the tracer early; before we stop tracing. */
4775  	if (iter->trace->open)
4776  		iter->trace->open(iter);
4777  
4778  	/* Annotate start of buffers if we had overruns */
4779  	if (ring_buffer_overruns(iter->array_buffer->buffer))
4780  		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4781  
4782  	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4783  	if (trace_clocks[tr->clock_id].in_ns)
4784  		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4785  
4786  	/*
4787  	 * If pause-on-trace is enabled, then stop the trace while
4788  	 * dumping, unless this is the "snapshot" file
4789  	 */
4790  	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4791  		tracing_stop_tr(tr);
4792  
4793  	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4794  		for_each_tracing_cpu(cpu) {
4795  			iter->buffer_iter[cpu] =
4796  				ring_buffer_read_prepare(iter->array_buffer->buffer,
4797  							 cpu, GFP_KERNEL);
4798  		}
4799  		ring_buffer_read_prepare_sync();
4800  		for_each_tracing_cpu(cpu) {
4801  			ring_buffer_read_start(iter->buffer_iter[cpu]);
4802  			tracing_iter_reset(iter, cpu);
4803  		}
4804  	} else {
4805  		cpu = iter->cpu_file;
4806  		iter->buffer_iter[cpu] =
4807  			ring_buffer_read_prepare(iter->array_buffer->buffer,
4808  						 cpu, GFP_KERNEL);
4809  		ring_buffer_read_prepare_sync();
4810  		ring_buffer_read_start(iter->buffer_iter[cpu]);
4811  		tracing_iter_reset(iter, cpu);
4812  	}
4813  
4814  	mutex_unlock(&trace_types_lock);
4815  
4816  	return iter;
4817  
4818   fail:
4819  	mutex_unlock(&trace_types_lock);
4820  	free_trace_iter_content(iter);
4821  release:
4822  	seq_release_private(inode, file);
4823  	return ERR_PTR(-ENOMEM);
4824  }
4825  
tracing_open_generic(struct inode * inode,struct file * filp)4826  int tracing_open_generic(struct inode *inode, struct file *filp)
4827  {
4828  	int ret;
4829  
4830  	ret = tracing_check_open_get_tr(NULL);
4831  	if (ret)
4832  		return ret;
4833  
4834  	filp->private_data = inode->i_private;
4835  	return 0;
4836  }
4837  
tracing_is_disabled(void)4838  bool tracing_is_disabled(void)
4839  {
4840  	return (tracing_disabled) ? true: false;
4841  }
4842  
4843  /*
4844   * Open and update trace_array ref count.
4845   * Must have the current trace_array passed to it.
4846   */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4847  int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4848  {
4849  	struct trace_array *tr = inode->i_private;
4850  	int ret;
4851  
4852  	ret = tracing_check_open_get_tr(tr);
4853  	if (ret)
4854  		return ret;
4855  
4856  	filp->private_data = inode->i_private;
4857  
4858  	return 0;
4859  }
4860  
4861  /*
4862   * The private pointer of the inode is the trace_event_file.
4863   * Update the tr ref count associated to it.
4864   */
tracing_open_file_tr(struct inode * inode,struct file * filp)4865  int tracing_open_file_tr(struct inode *inode, struct file *filp)
4866  {
4867  	struct trace_event_file *file = inode->i_private;
4868  	int ret;
4869  
4870  	ret = tracing_check_open_get_tr(file->tr);
4871  	if (ret)
4872  		return ret;
4873  
4874  	mutex_lock(&event_mutex);
4875  
4876  	/* Fail if the file is marked for removal */
4877  	if (file->flags & EVENT_FILE_FL_FREED) {
4878  		trace_array_put(file->tr);
4879  		ret = -ENODEV;
4880  	} else {
4881  		event_file_get(file);
4882  	}
4883  
4884  	mutex_unlock(&event_mutex);
4885  	if (ret)
4886  		return ret;
4887  
4888  	filp->private_data = inode->i_private;
4889  
4890  	return 0;
4891  }
4892  
tracing_release_file_tr(struct inode * inode,struct file * filp)4893  int tracing_release_file_tr(struct inode *inode, struct file *filp)
4894  {
4895  	struct trace_event_file *file = inode->i_private;
4896  
4897  	trace_array_put(file->tr);
4898  	event_file_put(file);
4899  
4900  	return 0;
4901  }
4902  
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4903  int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4904  {
4905  	tracing_release_file_tr(inode, filp);
4906  	return single_release(inode, filp);
4907  }
4908  
tracing_mark_open(struct inode * inode,struct file * filp)4909  static int tracing_mark_open(struct inode *inode, struct file *filp)
4910  {
4911  	stream_open(inode, filp);
4912  	return tracing_open_generic_tr(inode, filp);
4913  }
4914  
tracing_release(struct inode * inode,struct file * file)4915  static int tracing_release(struct inode *inode, struct file *file)
4916  {
4917  	struct trace_array *tr = inode->i_private;
4918  	struct seq_file *m = file->private_data;
4919  	struct trace_iterator *iter;
4920  	int cpu;
4921  
4922  	if (!(file->f_mode & FMODE_READ)) {
4923  		trace_array_put(tr);
4924  		return 0;
4925  	}
4926  
4927  	/* Writes do not use seq_file */
4928  	iter = m->private;
4929  	mutex_lock(&trace_types_lock);
4930  
4931  	for_each_tracing_cpu(cpu) {
4932  		if (iter->buffer_iter[cpu])
4933  			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4934  	}
4935  
4936  	if (iter->trace && iter->trace->close)
4937  		iter->trace->close(iter);
4938  
4939  	if (!iter->snapshot && tr->stop_count)
4940  		/* reenable tracing if it was previously enabled */
4941  		tracing_start_tr(tr);
4942  
4943  	__trace_array_put(tr);
4944  
4945  	mutex_unlock(&trace_types_lock);
4946  
4947  	free_trace_iter_content(iter);
4948  	seq_release_private(inode, file);
4949  
4950  	return 0;
4951  }
4952  
tracing_release_generic_tr(struct inode * inode,struct file * file)4953  int tracing_release_generic_tr(struct inode *inode, struct file *file)
4954  {
4955  	struct trace_array *tr = inode->i_private;
4956  
4957  	trace_array_put(tr);
4958  	return 0;
4959  }
4960  
tracing_single_release_tr(struct inode * inode,struct file * file)4961  static int tracing_single_release_tr(struct inode *inode, struct file *file)
4962  {
4963  	struct trace_array *tr = inode->i_private;
4964  
4965  	trace_array_put(tr);
4966  
4967  	return single_release(inode, file);
4968  }
4969  
tracing_open(struct inode * inode,struct file * file)4970  static int tracing_open(struct inode *inode, struct file *file)
4971  {
4972  	struct trace_array *tr = inode->i_private;
4973  	struct trace_iterator *iter;
4974  	int ret;
4975  
4976  	ret = tracing_check_open_get_tr(tr);
4977  	if (ret)
4978  		return ret;
4979  
4980  	/* If this file was open for write, then erase contents */
4981  	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4982  		int cpu = tracing_get_cpu(inode);
4983  		struct array_buffer *trace_buf = &tr->array_buffer;
4984  
4985  #ifdef CONFIG_TRACER_MAX_TRACE
4986  		if (tr->current_trace->print_max)
4987  			trace_buf = &tr->max_buffer;
4988  #endif
4989  
4990  		if (cpu == RING_BUFFER_ALL_CPUS)
4991  			tracing_reset_online_cpus(trace_buf);
4992  		else
4993  			tracing_reset_cpu(trace_buf, cpu);
4994  	}
4995  
4996  	if (file->f_mode & FMODE_READ) {
4997  		iter = __tracing_open(inode, file, false);
4998  		if (IS_ERR(iter))
4999  			ret = PTR_ERR(iter);
5000  		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5001  			iter->iter_flags |= TRACE_FILE_LAT_FMT;
5002  	}
5003  
5004  	if (ret < 0)
5005  		trace_array_put(tr);
5006  
5007  	return ret;
5008  }
5009  
5010  /*
5011   * Some tracers are not suitable for instance buffers.
5012   * A tracer is always available for the global array (toplevel)
5013   * or if it explicitly states that it is.
5014   */
5015  static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)5016  trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5017  {
5018  #ifdef CONFIG_TRACER_SNAPSHOT
5019  	/* arrays with mapped buffer range do not have snapshots */
5020  	if (tr->range_addr_start && t->use_max_tr)
5021  		return false;
5022  #endif
5023  	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5024  }
5025  
5026  /* Find the next tracer that this trace array may use */
5027  static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)5028  get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5029  {
5030  	while (t && !trace_ok_for_array(t, tr))
5031  		t = t->next;
5032  
5033  	return t;
5034  }
5035  
5036  static void *
t_next(struct seq_file * m,void * v,loff_t * pos)5037  t_next(struct seq_file *m, void *v, loff_t *pos)
5038  {
5039  	struct trace_array *tr = m->private;
5040  	struct tracer *t = v;
5041  
5042  	(*pos)++;
5043  
5044  	if (t)
5045  		t = get_tracer_for_array(tr, t->next);
5046  
5047  	return t;
5048  }
5049  
t_start(struct seq_file * m,loff_t * pos)5050  static void *t_start(struct seq_file *m, loff_t *pos)
5051  {
5052  	struct trace_array *tr = m->private;
5053  	struct tracer *t;
5054  	loff_t l = 0;
5055  
5056  	mutex_lock(&trace_types_lock);
5057  
5058  	t = get_tracer_for_array(tr, trace_types);
5059  	for (; t && l < *pos; t = t_next(m, t, &l))
5060  			;
5061  
5062  	return t;
5063  }
5064  
t_stop(struct seq_file * m,void * p)5065  static void t_stop(struct seq_file *m, void *p)
5066  {
5067  	mutex_unlock(&trace_types_lock);
5068  }
5069  
t_show(struct seq_file * m,void * v)5070  static int t_show(struct seq_file *m, void *v)
5071  {
5072  	struct tracer *t = v;
5073  
5074  	if (!t)
5075  		return 0;
5076  
5077  	seq_puts(m, t->name);
5078  	if (t->next)
5079  		seq_putc(m, ' ');
5080  	else
5081  		seq_putc(m, '\n');
5082  
5083  	return 0;
5084  }
5085  
5086  static const struct seq_operations show_traces_seq_ops = {
5087  	.start		= t_start,
5088  	.next		= t_next,
5089  	.stop		= t_stop,
5090  	.show		= t_show,
5091  };
5092  
show_traces_open(struct inode * inode,struct file * file)5093  static int show_traces_open(struct inode *inode, struct file *file)
5094  {
5095  	struct trace_array *tr = inode->i_private;
5096  	struct seq_file *m;
5097  	int ret;
5098  
5099  	ret = tracing_check_open_get_tr(tr);
5100  	if (ret)
5101  		return ret;
5102  
5103  	ret = seq_open(file, &show_traces_seq_ops);
5104  	if (ret) {
5105  		trace_array_put(tr);
5106  		return ret;
5107  	}
5108  
5109  	m = file->private_data;
5110  	m->private = tr;
5111  
5112  	return 0;
5113  }
5114  
tracing_seq_release(struct inode * inode,struct file * file)5115  static int tracing_seq_release(struct inode *inode, struct file *file)
5116  {
5117  	struct trace_array *tr = inode->i_private;
5118  
5119  	trace_array_put(tr);
5120  	return seq_release(inode, file);
5121  }
5122  
5123  static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5124  tracing_write_stub(struct file *filp, const char __user *ubuf,
5125  		   size_t count, loff_t *ppos)
5126  {
5127  	return count;
5128  }
5129  
tracing_lseek(struct file * file,loff_t offset,int whence)5130  loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5131  {
5132  	int ret;
5133  
5134  	if (file->f_mode & FMODE_READ)
5135  		ret = seq_lseek(file, offset, whence);
5136  	else
5137  		file->f_pos = ret = 0;
5138  
5139  	return ret;
5140  }
5141  
5142  static const struct file_operations tracing_fops = {
5143  	.open		= tracing_open,
5144  	.read		= seq_read,
5145  	.read_iter	= seq_read_iter,
5146  	.splice_read	= copy_splice_read,
5147  	.write		= tracing_write_stub,
5148  	.llseek		= tracing_lseek,
5149  	.release	= tracing_release,
5150  };
5151  
5152  static const struct file_operations show_traces_fops = {
5153  	.open		= show_traces_open,
5154  	.read		= seq_read,
5155  	.llseek		= seq_lseek,
5156  	.release	= tracing_seq_release,
5157  };
5158  
5159  static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5160  tracing_cpumask_read(struct file *filp, char __user *ubuf,
5161  		     size_t count, loff_t *ppos)
5162  {
5163  	struct trace_array *tr = file_inode(filp)->i_private;
5164  	char *mask_str;
5165  	int len;
5166  
5167  	len = snprintf(NULL, 0, "%*pb\n",
5168  		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5169  	mask_str = kmalloc(len, GFP_KERNEL);
5170  	if (!mask_str)
5171  		return -ENOMEM;
5172  
5173  	len = snprintf(mask_str, len, "%*pb\n",
5174  		       cpumask_pr_args(tr->tracing_cpumask));
5175  	if (len >= count) {
5176  		count = -EINVAL;
5177  		goto out_err;
5178  	}
5179  	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5180  
5181  out_err:
5182  	kfree(mask_str);
5183  
5184  	return count;
5185  }
5186  
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)5187  int tracing_set_cpumask(struct trace_array *tr,
5188  			cpumask_var_t tracing_cpumask_new)
5189  {
5190  	int cpu;
5191  
5192  	if (!tr)
5193  		return -EINVAL;
5194  
5195  	local_irq_disable();
5196  	arch_spin_lock(&tr->max_lock);
5197  	for_each_tracing_cpu(cpu) {
5198  		/*
5199  		 * Increase/decrease the disabled counter if we are
5200  		 * about to flip a bit in the cpumask:
5201  		 */
5202  		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5203  				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5204  			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5205  			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5206  #ifdef CONFIG_TRACER_MAX_TRACE
5207  			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5208  #endif
5209  		}
5210  		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5211  				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5212  			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5213  			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5214  #ifdef CONFIG_TRACER_MAX_TRACE
5215  			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5216  #endif
5217  		}
5218  	}
5219  	arch_spin_unlock(&tr->max_lock);
5220  	local_irq_enable();
5221  
5222  	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5223  
5224  	return 0;
5225  }
5226  
5227  static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5228  tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5229  		      size_t count, loff_t *ppos)
5230  {
5231  	struct trace_array *tr = file_inode(filp)->i_private;
5232  	cpumask_var_t tracing_cpumask_new;
5233  	int err;
5234  
5235  	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5236  		return -ENOMEM;
5237  
5238  	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5239  	if (err)
5240  		goto err_free;
5241  
5242  	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5243  	if (err)
5244  		goto err_free;
5245  
5246  	free_cpumask_var(tracing_cpumask_new);
5247  
5248  	return count;
5249  
5250  err_free:
5251  	free_cpumask_var(tracing_cpumask_new);
5252  
5253  	return err;
5254  }
5255  
5256  static const struct file_operations tracing_cpumask_fops = {
5257  	.open		= tracing_open_generic_tr,
5258  	.read		= tracing_cpumask_read,
5259  	.write		= tracing_cpumask_write,
5260  	.release	= tracing_release_generic_tr,
5261  	.llseek		= generic_file_llseek,
5262  };
5263  
tracing_trace_options_show(struct seq_file * m,void * v)5264  static int tracing_trace_options_show(struct seq_file *m, void *v)
5265  {
5266  	struct tracer_opt *trace_opts;
5267  	struct trace_array *tr = m->private;
5268  	u32 tracer_flags;
5269  	int i;
5270  
5271  	mutex_lock(&trace_types_lock);
5272  	tracer_flags = tr->current_trace->flags->val;
5273  	trace_opts = tr->current_trace->flags->opts;
5274  
5275  	for (i = 0; trace_options[i]; i++) {
5276  		if (tr->trace_flags & (1 << i))
5277  			seq_printf(m, "%s\n", trace_options[i]);
5278  		else
5279  			seq_printf(m, "no%s\n", trace_options[i]);
5280  	}
5281  
5282  	for (i = 0; trace_opts[i].name; i++) {
5283  		if (tracer_flags & trace_opts[i].bit)
5284  			seq_printf(m, "%s\n", trace_opts[i].name);
5285  		else
5286  			seq_printf(m, "no%s\n", trace_opts[i].name);
5287  	}
5288  	mutex_unlock(&trace_types_lock);
5289  
5290  	return 0;
5291  }
5292  
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)5293  static int __set_tracer_option(struct trace_array *tr,
5294  			       struct tracer_flags *tracer_flags,
5295  			       struct tracer_opt *opts, int neg)
5296  {
5297  	struct tracer *trace = tracer_flags->trace;
5298  	int ret;
5299  
5300  	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5301  	if (ret)
5302  		return ret;
5303  
5304  	if (neg)
5305  		tracer_flags->val &= ~opts->bit;
5306  	else
5307  		tracer_flags->val |= opts->bit;
5308  	return 0;
5309  }
5310  
5311  /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)5312  static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5313  {
5314  	struct tracer *trace = tr->current_trace;
5315  	struct tracer_flags *tracer_flags = trace->flags;
5316  	struct tracer_opt *opts = NULL;
5317  	int i;
5318  
5319  	for (i = 0; tracer_flags->opts[i].name; i++) {
5320  		opts = &tracer_flags->opts[i];
5321  
5322  		if (strcmp(cmp, opts->name) == 0)
5323  			return __set_tracer_option(tr, trace->flags, opts, neg);
5324  	}
5325  
5326  	return -EINVAL;
5327  }
5328  
5329  /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)5330  int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5331  {
5332  	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5333  		return -1;
5334  
5335  	return 0;
5336  }
5337  
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)5338  int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5339  {
5340  	if ((mask == TRACE_ITER_RECORD_TGID) ||
5341  	    (mask == TRACE_ITER_RECORD_CMD) ||
5342  	    (mask == TRACE_ITER_TRACE_PRINTK))
5343  		lockdep_assert_held(&event_mutex);
5344  
5345  	/* do nothing if flag is already set */
5346  	if (!!(tr->trace_flags & mask) == !!enabled)
5347  		return 0;
5348  
5349  	/* Give the tracer a chance to approve the change */
5350  	if (tr->current_trace->flag_changed)
5351  		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5352  			return -EINVAL;
5353  
5354  	if (mask == TRACE_ITER_TRACE_PRINTK) {
5355  		if (enabled) {
5356  			update_printk_trace(tr);
5357  		} else {
5358  			/*
5359  			 * The global_trace cannot clear this.
5360  			 * It's flag only gets cleared if another instance sets it.
5361  			 */
5362  			if (printk_trace == &global_trace)
5363  				return -EINVAL;
5364  			/*
5365  			 * An instance must always have it set.
5366  			 * by default, that's the global_trace instane.
5367  			 */
5368  			if (printk_trace == tr)
5369  				update_printk_trace(&global_trace);
5370  		}
5371  	}
5372  
5373  	if (enabled)
5374  		tr->trace_flags |= mask;
5375  	else
5376  		tr->trace_flags &= ~mask;
5377  
5378  	if (mask == TRACE_ITER_RECORD_CMD)
5379  		trace_event_enable_cmd_record(enabled);
5380  
5381  	if (mask == TRACE_ITER_RECORD_TGID) {
5382  
5383  		if (trace_alloc_tgid_map() < 0) {
5384  			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5385  			return -ENOMEM;
5386  		}
5387  
5388  		trace_event_enable_tgid_record(enabled);
5389  	}
5390  
5391  	if (mask == TRACE_ITER_EVENT_FORK)
5392  		trace_event_follow_fork(tr, enabled);
5393  
5394  	if (mask == TRACE_ITER_FUNC_FORK)
5395  		ftrace_pid_follow_fork(tr, enabled);
5396  
5397  	if (mask == TRACE_ITER_OVERWRITE) {
5398  		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5399  #ifdef CONFIG_TRACER_MAX_TRACE
5400  		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5401  #endif
5402  	}
5403  
5404  	if (mask == TRACE_ITER_PRINTK) {
5405  		trace_printk_start_stop_comm(enabled);
5406  		trace_printk_control(enabled);
5407  	}
5408  
5409  	return 0;
5410  }
5411  
trace_set_options(struct trace_array * tr,char * option)5412  int trace_set_options(struct trace_array *tr, char *option)
5413  {
5414  	char *cmp;
5415  	int neg = 0;
5416  	int ret;
5417  	size_t orig_len = strlen(option);
5418  	int len;
5419  
5420  	cmp = strstrip(option);
5421  
5422  	len = str_has_prefix(cmp, "no");
5423  	if (len)
5424  		neg = 1;
5425  
5426  	cmp += len;
5427  
5428  	mutex_lock(&event_mutex);
5429  	mutex_lock(&trace_types_lock);
5430  
5431  	ret = match_string(trace_options, -1, cmp);
5432  	/* If no option could be set, test the specific tracer options */
5433  	if (ret < 0)
5434  		ret = set_tracer_option(tr, cmp, neg);
5435  	else
5436  		ret = set_tracer_flag(tr, 1 << ret, !neg);
5437  
5438  	mutex_unlock(&trace_types_lock);
5439  	mutex_unlock(&event_mutex);
5440  
5441  	/*
5442  	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5443  	 * turn it back into a space.
5444  	 */
5445  	if (orig_len > strlen(option))
5446  		option[strlen(option)] = ' ';
5447  
5448  	return ret;
5449  }
5450  
apply_trace_boot_options(void)5451  static void __init apply_trace_boot_options(void)
5452  {
5453  	char *buf = trace_boot_options_buf;
5454  	char *option;
5455  
5456  	while (true) {
5457  		option = strsep(&buf, ",");
5458  
5459  		if (!option)
5460  			break;
5461  
5462  		if (*option)
5463  			trace_set_options(&global_trace, option);
5464  
5465  		/* Put back the comma to allow this to be called again */
5466  		if (buf)
5467  			*(buf - 1) = ',';
5468  	}
5469  }
5470  
5471  static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5472  tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5473  			size_t cnt, loff_t *ppos)
5474  {
5475  	struct seq_file *m = filp->private_data;
5476  	struct trace_array *tr = m->private;
5477  	char buf[64];
5478  	int ret;
5479  
5480  	if (cnt >= sizeof(buf))
5481  		return -EINVAL;
5482  
5483  	if (copy_from_user(buf, ubuf, cnt))
5484  		return -EFAULT;
5485  
5486  	buf[cnt] = 0;
5487  
5488  	ret = trace_set_options(tr, buf);
5489  	if (ret < 0)
5490  		return ret;
5491  
5492  	*ppos += cnt;
5493  
5494  	return cnt;
5495  }
5496  
tracing_trace_options_open(struct inode * inode,struct file * file)5497  static int tracing_trace_options_open(struct inode *inode, struct file *file)
5498  {
5499  	struct trace_array *tr = inode->i_private;
5500  	int ret;
5501  
5502  	ret = tracing_check_open_get_tr(tr);
5503  	if (ret)
5504  		return ret;
5505  
5506  	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5507  	if (ret < 0)
5508  		trace_array_put(tr);
5509  
5510  	return ret;
5511  }
5512  
5513  static const struct file_operations tracing_iter_fops = {
5514  	.open		= tracing_trace_options_open,
5515  	.read		= seq_read,
5516  	.llseek		= seq_lseek,
5517  	.release	= tracing_single_release_tr,
5518  	.write		= tracing_trace_options_write,
5519  };
5520  
5521  static const char readme_msg[] =
5522  	"tracing mini-HOWTO:\n\n"
5523  	"By default tracefs removes all OTH file permission bits.\n"
5524  	"When mounting tracefs an optional group id can be specified\n"
5525  	"which adds the group to every directory and file in tracefs:\n\n"
5526  	"\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5527  	"# echo 0 > tracing_on : quick way to disable tracing\n"
5528  	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5529  	" Important files:\n"
5530  	"  trace\t\t\t- The static contents of the buffer\n"
5531  	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5532  	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5533  	"  current_tracer\t- function and latency tracers\n"
5534  	"  available_tracers\t- list of configured tracers for current_tracer\n"
5535  	"  error_log\t- error log for failed commands (that support it)\n"
5536  	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5537  	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5538  	"  trace_clock\t\t- change the clock used to order events\n"
5539  	"       local:   Per cpu clock but may not be synced across CPUs\n"
5540  	"      global:   Synced across CPUs but slows tracing down.\n"
5541  	"     counter:   Not a clock, but just an increment\n"
5542  	"      uptime:   Jiffy counter from time of boot\n"
5543  	"        perf:   Same clock that perf events use\n"
5544  #ifdef CONFIG_X86_64
5545  	"     x86-tsc:   TSC cycle counter\n"
5546  #endif
5547  	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5548  	"       delta:   Delta difference against a buffer-wide timestamp\n"
5549  	"    absolute:   Absolute (standalone) timestamp\n"
5550  	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5551  	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5552  	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5553  	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5554  	"\t\t\t  Remove sub-buffer with rmdir\n"
5555  	"  trace_options\t\t- Set format or modify how tracing happens\n"
5556  	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5557  	"\t\t\t  option name\n"
5558  	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5559  #ifdef CONFIG_DYNAMIC_FTRACE
5560  	"\n  available_filter_functions - list of functions that can be filtered on\n"
5561  	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5562  	"\t\t\t  functions\n"
5563  	"\t     accepts: func_full_name or glob-matching-pattern\n"
5564  	"\t     modules: Can select a group via module\n"
5565  	"\t      Format: :mod:<module-name>\n"
5566  	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5567  	"\t    triggers: a command to perform when function is hit\n"
5568  	"\t      Format: <function>:<trigger>[:count]\n"
5569  	"\t     trigger: traceon, traceoff\n"
5570  	"\t\t      enable_event:<system>:<event>\n"
5571  	"\t\t      disable_event:<system>:<event>\n"
5572  #ifdef CONFIG_STACKTRACE
5573  	"\t\t      stacktrace\n"
5574  #endif
5575  #ifdef CONFIG_TRACER_SNAPSHOT
5576  	"\t\t      snapshot\n"
5577  #endif
5578  	"\t\t      dump\n"
5579  	"\t\t      cpudump\n"
5580  	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5581  	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5582  	"\t     The first one will disable tracing every time do_fault is hit\n"
5583  	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5584  	"\t       The first time do trap is hit and it disables tracing, the\n"
5585  	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5586  	"\t       the counter will not decrement. It only decrements when the\n"
5587  	"\t       trigger did work\n"
5588  	"\t     To remove trigger without count:\n"
5589  	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5590  	"\t     To remove trigger with a count:\n"
5591  	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5592  	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5593  	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5594  	"\t    modules: Can select a group via module command :mod:\n"
5595  	"\t    Does not accept triggers\n"
5596  #endif /* CONFIG_DYNAMIC_FTRACE */
5597  #ifdef CONFIG_FUNCTION_TRACER
5598  	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5599  	"\t\t    (function)\n"
5600  	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5601  	"\t\t    (function)\n"
5602  #endif
5603  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5604  	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5605  	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5606  	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5607  #endif
5608  #ifdef CONFIG_TRACER_SNAPSHOT
5609  	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5610  	"\t\t\t  snapshot buffer. Read the contents for more\n"
5611  	"\t\t\t  information\n"
5612  #endif
5613  #ifdef CONFIG_STACK_TRACER
5614  	"  stack_trace\t\t- Shows the max stack trace when active\n"
5615  	"  stack_max_size\t- Shows current max stack size that was traced\n"
5616  	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5617  	"\t\t\t  new trace)\n"
5618  #ifdef CONFIG_DYNAMIC_FTRACE
5619  	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5620  	"\t\t\t  traces\n"
5621  #endif
5622  #endif /* CONFIG_STACK_TRACER */
5623  #ifdef CONFIG_DYNAMIC_EVENTS
5624  	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5625  	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5626  #endif
5627  #ifdef CONFIG_KPROBE_EVENTS
5628  	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5629  	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5630  #endif
5631  #ifdef CONFIG_UPROBE_EVENTS
5632  	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5633  	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5634  #endif
5635  #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5636      defined(CONFIG_FPROBE_EVENTS)
5637  	"\t  accepts: event-definitions (one definition per line)\n"
5638  #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5639  	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5640  	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5641  #endif
5642  #ifdef CONFIG_FPROBE_EVENTS
5643  	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5644  	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5645  #endif
5646  #ifdef CONFIG_HIST_TRIGGERS
5647  	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5648  #endif
5649  	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5650  	"\t           -:[<group>/][<event>]\n"
5651  #ifdef CONFIG_KPROBE_EVENTS
5652  	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5653    "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5654  #endif
5655  #ifdef CONFIG_UPROBE_EVENTS
5656    "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5657  #endif
5658  	"\t     args: <name>=fetcharg[:type]\n"
5659  	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5660  #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5661  	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5662  #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5663  	"\t           <argname>[->field[->field|.field...]],\n"
5664  #endif
5665  #else
5666  	"\t           $stack<index>, $stack, $retval, $comm,\n"
5667  #endif
5668  	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5669  	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5670  	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5671  	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5672  	"\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5673  #ifdef CONFIG_HIST_TRIGGERS
5674  	"\t    field: <stype> <name>;\n"
5675  	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5676  	"\t           [unsigned] char/int/long\n"
5677  #endif
5678  	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5679  	"\t            of the <attached-group>/<attached-event>.\n"
5680  #endif
5681  	"  events/\t\t- Directory containing all trace event subsystems:\n"
5682  	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5683  	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5684  	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5685  	"\t\t\t  events\n"
5686  	"      filter\t\t- If set, only events passing filter are traced\n"
5687  	"  events/<system>/<event>/\t- Directory containing control files for\n"
5688  	"\t\t\t  <event>:\n"
5689  	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5690  	"      filter\t\t- If set, only events passing filter are traced\n"
5691  	"      trigger\t\t- If set, a command to perform when event is hit\n"
5692  	"\t    Format: <trigger>[:count][if <filter>]\n"
5693  	"\t   trigger: traceon, traceoff\n"
5694  	"\t            enable_event:<system>:<event>\n"
5695  	"\t            disable_event:<system>:<event>\n"
5696  #ifdef CONFIG_HIST_TRIGGERS
5697  	"\t            enable_hist:<system>:<event>\n"
5698  	"\t            disable_hist:<system>:<event>\n"
5699  #endif
5700  #ifdef CONFIG_STACKTRACE
5701  	"\t\t    stacktrace\n"
5702  #endif
5703  #ifdef CONFIG_TRACER_SNAPSHOT
5704  	"\t\t    snapshot\n"
5705  #endif
5706  #ifdef CONFIG_HIST_TRIGGERS
5707  	"\t\t    hist (see below)\n"
5708  #endif
5709  	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5710  	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5711  	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5712  	"\t                  events/block/block_unplug/trigger\n"
5713  	"\t   The first disables tracing every time block_unplug is hit.\n"
5714  	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5715  	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5716  	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5717  	"\t   Like function triggers, the counter is only decremented if it\n"
5718  	"\t    enabled or disabled tracing.\n"
5719  	"\t   To remove a trigger without a count:\n"
5720  	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5721  	"\t   To remove a trigger with a count:\n"
5722  	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5723  	"\t   Filters can be ignored when removing a trigger.\n"
5724  #ifdef CONFIG_HIST_TRIGGERS
5725  	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5726  	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5727  	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5728  	"\t            [:values=<field1[,field2,...]>]\n"
5729  	"\t            [:sort=<field1[,field2,...]>]\n"
5730  	"\t            [:size=#entries]\n"
5731  	"\t            [:pause][:continue][:clear]\n"
5732  	"\t            [:name=histname1]\n"
5733  	"\t            [:nohitcount]\n"
5734  	"\t            [:<handler>.<action>]\n"
5735  	"\t            [if <filter>]\n\n"
5736  	"\t    Note, special fields can be used as well:\n"
5737  	"\t            common_timestamp - to record current timestamp\n"
5738  	"\t            common_cpu - to record the CPU the event happened on\n"
5739  	"\n"
5740  	"\t    A hist trigger variable can be:\n"
5741  	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5742  	"\t        - a reference to another variable e.g. y=$x,\n"
5743  	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5744  	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5745  	"\n"
5746  	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5747  	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5748  	"\t    variable reference, field or numeric literal.\n"
5749  	"\n"
5750  	"\t    When a matching event is hit, an entry is added to a hash\n"
5751  	"\t    table using the key(s) and value(s) named, and the value of a\n"
5752  	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5753  	"\t    correspond to fields in the event's format description.  Keys\n"
5754  	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5755  	"\t    Compound keys consisting of up to two fields can be specified\n"
5756  	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5757  	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5758  	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5759  	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5760  	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5761  	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5762  	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5763  	"\t    its histogram data will be shared with other triggers of the\n"
5764  	"\t    same name, and trigger hits will update this common data.\n\n"
5765  	"\t    Reading the 'hist' file for the event will dump the hash\n"
5766  	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5767  	"\t    triggers attached to an event, there will be a table for each\n"
5768  	"\t    trigger in the output.  The table displayed for a named\n"
5769  	"\t    trigger will be the same as any other instance having the\n"
5770  	"\t    same name.  The default format used to display a given field\n"
5771  	"\t    can be modified by appending any of the following modifiers\n"
5772  	"\t    to the field name, as applicable:\n\n"
5773  	"\t            .hex        display a number as a hex value\n"
5774  	"\t            .sym        display an address as a symbol\n"
5775  	"\t            .sym-offset display an address as a symbol and offset\n"
5776  	"\t            .execname   display a common_pid as a program name\n"
5777  	"\t            .syscall    display a syscall id as a syscall name\n"
5778  	"\t            .log2       display log2 value rather than raw number\n"
5779  	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5780  	"\t            .usecs      display a common_timestamp in microseconds\n"
5781  	"\t            .percent    display a number of percentage value\n"
5782  	"\t            .graph      display a bar-graph of a value\n\n"
5783  	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5784  	"\t    trigger or to start a hist trigger but not log any events\n"
5785  	"\t    until told to do so.  'continue' can be used to start or\n"
5786  	"\t    restart a paused hist trigger.\n\n"
5787  	"\t    The 'clear' parameter will clear the contents of a running\n"
5788  	"\t    hist trigger and leave its current paused/active state\n"
5789  	"\t    unchanged.\n\n"
5790  	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5791  	"\t    raw hitcount in the histogram.\n\n"
5792  	"\t    The enable_hist and disable_hist triggers can be used to\n"
5793  	"\t    have one event conditionally start and stop another event's\n"
5794  	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5795  	"\t    the enable_event and disable_event triggers.\n\n"
5796  	"\t    Hist trigger handlers and actions are executed whenever a\n"
5797  	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5798  	"\t        <handler>.<action>\n\n"
5799  	"\t    The available handlers are:\n\n"
5800  	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5801  	"\t        onmax(var)               - invoke if var exceeds current max\n"
5802  	"\t        onchange(var)            - invoke action if var changes\n\n"
5803  	"\t    The available actions are:\n\n"
5804  	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5805  	"\t        save(field,...)                      - save current event fields\n"
5806  #ifdef CONFIG_TRACER_SNAPSHOT
5807  	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5808  #endif
5809  #ifdef CONFIG_SYNTH_EVENTS
5810  	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5811  	"\t  Write into this file to define/undefine new synthetic events.\n"
5812  	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5813  #endif
5814  #endif
5815  ;
5816  
5817  static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5818  tracing_readme_read(struct file *filp, char __user *ubuf,
5819  		       size_t cnt, loff_t *ppos)
5820  {
5821  	return simple_read_from_buffer(ubuf, cnt, ppos,
5822  					readme_msg, strlen(readme_msg));
5823  }
5824  
5825  static const struct file_operations tracing_readme_fops = {
5826  	.open		= tracing_open_generic,
5827  	.read		= tracing_readme_read,
5828  	.llseek		= generic_file_llseek,
5829  };
5830  
5831  #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5832  static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5833  update_eval_map(union trace_eval_map_item *ptr)
5834  {
5835  	if (!ptr->map.eval_string) {
5836  		if (ptr->tail.next) {
5837  			ptr = ptr->tail.next;
5838  			/* Set ptr to the next real item (skip head) */
5839  			ptr++;
5840  		} else
5841  			return NULL;
5842  	}
5843  	return ptr;
5844  }
5845  
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5846  static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5847  {
5848  	union trace_eval_map_item *ptr = v;
5849  
5850  	/*
5851  	 * Paranoid! If ptr points to end, we don't want to increment past it.
5852  	 * This really should never happen.
5853  	 */
5854  	(*pos)++;
5855  	ptr = update_eval_map(ptr);
5856  	if (WARN_ON_ONCE(!ptr))
5857  		return NULL;
5858  
5859  	ptr++;
5860  	ptr = update_eval_map(ptr);
5861  
5862  	return ptr;
5863  }
5864  
eval_map_start(struct seq_file * m,loff_t * pos)5865  static void *eval_map_start(struct seq_file *m, loff_t *pos)
5866  {
5867  	union trace_eval_map_item *v;
5868  	loff_t l = 0;
5869  
5870  	mutex_lock(&trace_eval_mutex);
5871  
5872  	v = trace_eval_maps;
5873  	if (v)
5874  		v++;
5875  
5876  	while (v && l < *pos) {
5877  		v = eval_map_next(m, v, &l);
5878  	}
5879  
5880  	return v;
5881  }
5882  
eval_map_stop(struct seq_file * m,void * v)5883  static void eval_map_stop(struct seq_file *m, void *v)
5884  {
5885  	mutex_unlock(&trace_eval_mutex);
5886  }
5887  
eval_map_show(struct seq_file * m,void * v)5888  static int eval_map_show(struct seq_file *m, void *v)
5889  {
5890  	union trace_eval_map_item *ptr = v;
5891  
5892  	seq_printf(m, "%s %ld (%s)\n",
5893  		   ptr->map.eval_string, ptr->map.eval_value,
5894  		   ptr->map.system);
5895  
5896  	return 0;
5897  }
5898  
5899  static const struct seq_operations tracing_eval_map_seq_ops = {
5900  	.start		= eval_map_start,
5901  	.next		= eval_map_next,
5902  	.stop		= eval_map_stop,
5903  	.show		= eval_map_show,
5904  };
5905  
tracing_eval_map_open(struct inode * inode,struct file * filp)5906  static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5907  {
5908  	int ret;
5909  
5910  	ret = tracing_check_open_get_tr(NULL);
5911  	if (ret)
5912  		return ret;
5913  
5914  	return seq_open(filp, &tracing_eval_map_seq_ops);
5915  }
5916  
5917  static const struct file_operations tracing_eval_map_fops = {
5918  	.open		= tracing_eval_map_open,
5919  	.read		= seq_read,
5920  	.llseek		= seq_lseek,
5921  	.release	= seq_release,
5922  };
5923  
5924  static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5925  trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5926  {
5927  	/* Return tail of array given the head */
5928  	return ptr + ptr->head.length + 1;
5929  }
5930  
5931  static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5932  trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5933  			   int len)
5934  {
5935  	struct trace_eval_map **stop;
5936  	struct trace_eval_map **map;
5937  	union trace_eval_map_item *map_array;
5938  	union trace_eval_map_item *ptr;
5939  
5940  	stop = start + len;
5941  
5942  	/*
5943  	 * The trace_eval_maps contains the map plus a head and tail item,
5944  	 * where the head holds the module and length of array, and the
5945  	 * tail holds a pointer to the next list.
5946  	 */
5947  	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5948  	if (!map_array) {
5949  		pr_warn("Unable to allocate trace eval mapping\n");
5950  		return;
5951  	}
5952  
5953  	mutex_lock(&trace_eval_mutex);
5954  
5955  	if (!trace_eval_maps)
5956  		trace_eval_maps = map_array;
5957  	else {
5958  		ptr = trace_eval_maps;
5959  		for (;;) {
5960  			ptr = trace_eval_jmp_to_tail(ptr);
5961  			if (!ptr->tail.next)
5962  				break;
5963  			ptr = ptr->tail.next;
5964  
5965  		}
5966  		ptr->tail.next = map_array;
5967  	}
5968  	map_array->head.mod = mod;
5969  	map_array->head.length = len;
5970  	map_array++;
5971  
5972  	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5973  		map_array->map = **map;
5974  		map_array++;
5975  	}
5976  	memset(map_array, 0, sizeof(*map_array));
5977  
5978  	mutex_unlock(&trace_eval_mutex);
5979  }
5980  
trace_create_eval_file(struct dentry * d_tracer)5981  static void trace_create_eval_file(struct dentry *d_tracer)
5982  {
5983  	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5984  			  NULL, &tracing_eval_map_fops);
5985  }
5986  
5987  #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5988  static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5989  static inline void trace_insert_eval_map_file(struct module *mod,
5990  			      struct trace_eval_map **start, int len) { }
5991  #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5992  
trace_insert_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5993  static void trace_insert_eval_map(struct module *mod,
5994  				  struct trace_eval_map **start, int len)
5995  {
5996  	struct trace_eval_map **map;
5997  
5998  	if (len <= 0)
5999  		return;
6000  
6001  	map = start;
6002  
6003  	trace_event_eval_update(map, len);
6004  
6005  	trace_insert_eval_map_file(mod, start, len);
6006  }
6007  
6008  static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6009  tracing_set_trace_read(struct file *filp, char __user *ubuf,
6010  		       size_t cnt, loff_t *ppos)
6011  {
6012  	struct trace_array *tr = filp->private_data;
6013  	char buf[MAX_TRACER_SIZE+2];
6014  	int r;
6015  
6016  	mutex_lock(&trace_types_lock);
6017  	r = sprintf(buf, "%s\n", tr->current_trace->name);
6018  	mutex_unlock(&trace_types_lock);
6019  
6020  	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6021  }
6022  
tracer_init(struct tracer * t,struct trace_array * tr)6023  int tracer_init(struct tracer *t, struct trace_array *tr)
6024  {
6025  	tracing_reset_online_cpus(&tr->array_buffer);
6026  	return t->init(tr);
6027  }
6028  
set_buffer_entries(struct array_buffer * buf,unsigned long val)6029  static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6030  {
6031  	int cpu;
6032  
6033  	for_each_tracing_cpu(cpu)
6034  		per_cpu_ptr(buf->data, cpu)->entries = val;
6035  }
6036  
update_buffer_entries(struct array_buffer * buf,int cpu)6037  static void update_buffer_entries(struct array_buffer *buf, int cpu)
6038  {
6039  	if (cpu == RING_BUFFER_ALL_CPUS) {
6040  		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6041  	} else {
6042  		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6043  	}
6044  }
6045  
6046  #ifdef CONFIG_TRACER_MAX_TRACE
6047  /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)6048  static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6049  					struct array_buffer *size_buf, int cpu_id)
6050  {
6051  	int cpu, ret = 0;
6052  
6053  	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6054  		for_each_tracing_cpu(cpu) {
6055  			ret = ring_buffer_resize(trace_buf->buffer,
6056  				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6057  			if (ret < 0)
6058  				break;
6059  			per_cpu_ptr(trace_buf->data, cpu)->entries =
6060  				per_cpu_ptr(size_buf->data, cpu)->entries;
6061  		}
6062  	} else {
6063  		ret = ring_buffer_resize(trace_buf->buffer,
6064  				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6065  		if (ret == 0)
6066  			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6067  				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6068  	}
6069  
6070  	return ret;
6071  }
6072  #endif /* CONFIG_TRACER_MAX_TRACE */
6073  
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)6074  static int __tracing_resize_ring_buffer(struct trace_array *tr,
6075  					unsigned long size, int cpu)
6076  {
6077  	int ret;
6078  
6079  	/*
6080  	 * If kernel or user changes the size of the ring buffer
6081  	 * we use the size that was given, and we can forget about
6082  	 * expanding it later.
6083  	 */
6084  	trace_set_ring_buffer_expanded(tr);
6085  
6086  	/* May be called before buffers are initialized */
6087  	if (!tr->array_buffer.buffer)
6088  		return 0;
6089  
6090  	/* Do not allow tracing while resizing ring buffer */
6091  	tracing_stop_tr(tr);
6092  
6093  	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6094  	if (ret < 0)
6095  		goto out_start;
6096  
6097  #ifdef CONFIG_TRACER_MAX_TRACE
6098  	if (!tr->allocated_snapshot)
6099  		goto out;
6100  
6101  	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6102  	if (ret < 0) {
6103  		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6104  						     &tr->array_buffer, cpu);
6105  		if (r < 0) {
6106  			/*
6107  			 * AARGH! We are left with different
6108  			 * size max buffer!!!!
6109  			 * The max buffer is our "snapshot" buffer.
6110  			 * When a tracer needs a snapshot (one of the
6111  			 * latency tracers), it swaps the max buffer
6112  			 * with the saved snap shot. We succeeded to
6113  			 * update the size of the main buffer, but failed to
6114  			 * update the size of the max buffer. But when we tried
6115  			 * to reset the main buffer to the original size, we
6116  			 * failed there too. This is very unlikely to
6117  			 * happen, but if it does, warn and kill all
6118  			 * tracing.
6119  			 */
6120  			WARN_ON(1);
6121  			tracing_disabled = 1;
6122  		}
6123  		goto out_start;
6124  	}
6125  
6126  	update_buffer_entries(&tr->max_buffer, cpu);
6127  
6128   out:
6129  #endif /* CONFIG_TRACER_MAX_TRACE */
6130  
6131  	update_buffer_entries(&tr->array_buffer, cpu);
6132   out_start:
6133  	tracing_start_tr(tr);
6134  	return ret;
6135  }
6136  
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)6137  ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6138  				  unsigned long size, int cpu_id)
6139  {
6140  	int ret;
6141  
6142  	mutex_lock(&trace_types_lock);
6143  
6144  	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6145  		/* make sure, this cpu is enabled in the mask */
6146  		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6147  			ret = -EINVAL;
6148  			goto out;
6149  		}
6150  	}
6151  
6152  	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6153  	if (ret < 0)
6154  		ret = -ENOMEM;
6155  
6156  out:
6157  	mutex_unlock(&trace_types_lock);
6158  
6159  	return ret;
6160  }
6161  
update_last_data(struct trace_array * tr)6162  static void update_last_data(struct trace_array *tr)
6163  {
6164  	if (!tr->text_delta && !tr->data_delta)
6165  		return;
6166  
6167  	/*
6168  	 * Need to clear all CPU buffers as there cannot be events
6169  	 * from the previous boot mixed with events with this boot
6170  	 * as that will cause a confusing trace. Need to clear all
6171  	 * CPU buffers, even for those that may currently be offline.
6172  	 */
6173  	tracing_reset_all_cpus(&tr->array_buffer);
6174  
6175  	/* Using current data now */
6176  	tr->text_delta = 0;
6177  	tr->data_delta = 0;
6178  }
6179  
6180  /**
6181   * tracing_update_buffers - used by tracing facility to expand ring buffers
6182   * @tr: The tracing instance
6183   *
6184   * To save on memory when the tracing is never used on a system with it
6185   * configured in. The ring buffers are set to a minimum size. But once
6186   * a user starts to use the tracing facility, then they need to grow
6187   * to their default size.
6188   *
6189   * This function is to be called when a tracer is about to be used.
6190   */
tracing_update_buffers(struct trace_array * tr)6191  int tracing_update_buffers(struct trace_array *tr)
6192  {
6193  	int ret = 0;
6194  
6195  	mutex_lock(&trace_types_lock);
6196  
6197  	update_last_data(tr);
6198  
6199  	if (!tr->ring_buffer_expanded)
6200  		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6201  						RING_BUFFER_ALL_CPUS);
6202  	mutex_unlock(&trace_types_lock);
6203  
6204  	return ret;
6205  }
6206  
6207  struct trace_option_dentry;
6208  
6209  static void
6210  create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6211  
6212  /*
6213   * Used to clear out the tracer before deletion of an instance.
6214   * Must have trace_types_lock held.
6215   */
tracing_set_nop(struct trace_array * tr)6216  static void tracing_set_nop(struct trace_array *tr)
6217  {
6218  	if (tr->current_trace == &nop_trace)
6219  		return;
6220  
6221  	tr->current_trace->enabled--;
6222  
6223  	if (tr->current_trace->reset)
6224  		tr->current_trace->reset(tr);
6225  
6226  	tr->current_trace = &nop_trace;
6227  }
6228  
6229  static bool tracer_options_updated;
6230  
add_tracer_options(struct trace_array * tr,struct tracer * t)6231  static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6232  {
6233  	/* Only enable if the directory has been created already. */
6234  	if (!tr->dir)
6235  		return;
6236  
6237  	/* Only create trace option files after update_tracer_options finish */
6238  	if (!tracer_options_updated)
6239  		return;
6240  
6241  	create_trace_option_files(tr, t);
6242  }
6243  
tracing_set_tracer(struct trace_array * tr,const char * buf)6244  int tracing_set_tracer(struct trace_array *tr, const char *buf)
6245  {
6246  	struct tracer *t;
6247  #ifdef CONFIG_TRACER_MAX_TRACE
6248  	bool had_max_tr;
6249  #endif
6250  	int ret = 0;
6251  
6252  	mutex_lock(&trace_types_lock);
6253  
6254  	update_last_data(tr);
6255  
6256  	if (!tr->ring_buffer_expanded) {
6257  		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6258  						RING_BUFFER_ALL_CPUS);
6259  		if (ret < 0)
6260  			goto out;
6261  		ret = 0;
6262  	}
6263  
6264  	for (t = trace_types; t; t = t->next) {
6265  		if (strcmp(t->name, buf) == 0)
6266  			break;
6267  	}
6268  	if (!t) {
6269  		ret = -EINVAL;
6270  		goto out;
6271  	}
6272  	if (t == tr->current_trace)
6273  		goto out;
6274  
6275  #ifdef CONFIG_TRACER_SNAPSHOT
6276  	if (t->use_max_tr) {
6277  		local_irq_disable();
6278  		arch_spin_lock(&tr->max_lock);
6279  		if (tr->cond_snapshot)
6280  			ret = -EBUSY;
6281  		arch_spin_unlock(&tr->max_lock);
6282  		local_irq_enable();
6283  		if (ret)
6284  			goto out;
6285  	}
6286  #endif
6287  	/* Some tracers won't work on kernel command line */
6288  	if (system_state < SYSTEM_RUNNING && t->noboot) {
6289  		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6290  			t->name);
6291  		goto out;
6292  	}
6293  
6294  	/* Some tracers are only allowed for the top level buffer */
6295  	if (!trace_ok_for_array(t, tr)) {
6296  		ret = -EINVAL;
6297  		goto out;
6298  	}
6299  
6300  	/* If trace pipe files are being read, we can't change the tracer */
6301  	if (tr->trace_ref) {
6302  		ret = -EBUSY;
6303  		goto out;
6304  	}
6305  
6306  	trace_branch_disable();
6307  
6308  	tr->current_trace->enabled--;
6309  
6310  	if (tr->current_trace->reset)
6311  		tr->current_trace->reset(tr);
6312  
6313  #ifdef CONFIG_TRACER_MAX_TRACE
6314  	had_max_tr = tr->current_trace->use_max_tr;
6315  
6316  	/* Current trace needs to be nop_trace before synchronize_rcu */
6317  	tr->current_trace = &nop_trace;
6318  
6319  	if (had_max_tr && !t->use_max_tr) {
6320  		/*
6321  		 * We need to make sure that the update_max_tr sees that
6322  		 * current_trace changed to nop_trace to keep it from
6323  		 * swapping the buffers after we resize it.
6324  		 * The update_max_tr is called from interrupts disabled
6325  		 * so a synchronized_sched() is sufficient.
6326  		 */
6327  		synchronize_rcu();
6328  		free_snapshot(tr);
6329  		tracing_disarm_snapshot(tr);
6330  	}
6331  
6332  	if (!had_max_tr && t->use_max_tr) {
6333  		ret = tracing_arm_snapshot_locked(tr);
6334  		if (ret)
6335  			goto out;
6336  	}
6337  #else
6338  	tr->current_trace = &nop_trace;
6339  #endif
6340  
6341  	if (t->init) {
6342  		ret = tracer_init(t, tr);
6343  		if (ret) {
6344  #ifdef CONFIG_TRACER_MAX_TRACE
6345  			if (t->use_max_tr)
6346  				tracing_disarm_snapshot(tr);
6347  #endif
6348  			goto out;
6349  		}
6350  	}
6351  
6352  	tr->current_trace = t;
6353  	tr->current_trace->enabled++;
6354  	trace_branch_enable(tr);
6355   out:
6356  	mutex_unlock(&trace_types_lock);
6357  
6358  	return ret;
6359  }
6360  
6361  static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6362  tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6363  			size_t cnt, loff_t *ppos)
6364  {
6365  	struct trace_array *tr = filp->private_data;
6366  	char buf[MAX_TRACER_SIZE+1];
6367  	char *name;
6368  	size_t ret;
6369  	int err;
6370  
6371  	ret = cnt;
6372  
6373  	if (cnt > MAX_TRACER_SIZE)
6374  		cnt = MAX_TRACER_SIZE;
6375  
6376  	if (copy_from_user(buf, ubuf, cnt))
6377  		return -EFAULT;
6378  
6379  	buf[cnt] = 0;
6380  
6381  	name = strim(buf);
6382  
6383  	err = tracing_set_tracer(tr, name);
6384  	if (err)
6385  		return err;
6386  
6387  	*ppos += ret;
6388  
6389  	return ret;
6390  }
6391  
6392  static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6393  tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6394  		   size_t cnt, loff_t *ppos)
6395  {
6396  	char buf[64];
6397  	int r;
6398  
6399  	r = snprintf(buf, sizeof(buf), "%ld\n",
6400  		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6401  	if (r > sizeof(buf))
6402  		r = sizeof(buf);
6403  	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6404  }
6405  
6406  static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6407  tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6408  		    size_t cnt, loff_t *ppos)
6409  {
6410  	unsigned long val;
6411  	int ret;
6412  
6413  	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6414  	if (ret)
6415  		return ret;
6416  
6417  	*ptr = val * 1000;
6418  
6419  	return cnt;
6420  }
6421  
6422  static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6423  tracing_thresh_read(struct file *filp, char __user *ubuf,
6424  		    size_t cnt, loff_t *ppos)
6425  {
6426  	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6427  }
6428  
6429  static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6430  tracing_thresh_write(struct file *filp, const char __user *ubuf,
6431  		     size_t cnt, loff_t *ppos)
6432  {
6433  	struct trace_array *tr = filp->private_data;
6434  	int ret;
6435  
6436  	mutex_lock(&trace_types_lock);
6437  	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6438  	if (ret < 0)
6439  		goto out;
6440  
6441  	if (tr->current_trace->update_thresh) {
6442  		ret = tr->current_trace->update_thresh(tr);
6443  		if (ret < 0)
6444  			goto out;
6445  	}
6446  
6447  	ret = cnt;
6448  out:
6449  	mutex_unlock(&trace_types_lock);
6450  
6451  	return ret;
6452  }
6453  
6454  #ifdef CONFIG_TRACER_MAX_TRACE
6455  
6456  static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6457  tracing_max_lat_read(struct file *filp, char __user *ubuf,
6458  		     size_t cnt, loff_t *ppos)
6459  {
6460  	struct trace_array *tr = filp->private_data;
6461  
6462  	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6463  }
6464  
6465  static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6466  tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6467  		      size_t cnt, loff_t *ppos)
6468  {
6469  	struct trace_array *tr = filp->private_data;
6470  
6471  	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6472  }
6473  
6474  #endif
6475  
open_pipe_on_cpu(struct trace_array * tr,int cpu)6476  static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6477  {
6478  	if (cpu == RING_BUFFER_ALL_CPUS) {
6479  		if (cpumask_empty(tr->pipe_cpumask)) {
6480  			cpumask_setall(tr->pipe_cpumask);
6481  			return 0;
6482  		}
6483  	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6484  		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6485  		return 0;
6486  	}
6487  	return -EBUSY;
6488  }
6489  
close_pipe_on_cpu(struct trace_array * tr,int cpu)6490  static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6491  {
6492  	if (cpu == RING_BUFFER_ALL_CPUS) {
6493  		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6494  		cpumask_clear(tr->pipe_cpumask);
6495  	} else {
6496  		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6497  		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6498  	}
6499  }
6500  
tracing_open_pipe(struct inode * inode,struct file * filp)6501  static int tracing_open_pipe(struct inode *inode, struct file *filp)
6502  {
6503  	struct trace_array *tr = inode->i_private;
6504  	struct trace_iterator *iter;
6505  	int cpu;
6506  	int ret;
6507  
6508  	ret = tracing_check_open_get_tr(tr);
6509  	if (ret)
6510  		return ret;
6511  
6512  	mutex_lock(&trace_types_lock);
6513  	cpu = tracing_get_cpu(inode);
6514  	ret = open_pipe_on_cpu(tr, cpu);
6515  	if (ret)
6516  		goto fail_pipe_on_cpu;
6517  
6518  	/* create a buffer to store the information to pass to userspace */
6519  	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6520  	if (!iter) {
6521  		ret = -ENOMEM;
6522  		goto fail_alloc_iter;
6523  	}
6524  
6525  	trace_seq_init(&iter->seq);
6526  	iter->trace = tr->current_trace;
6527  
6528  	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6529  		ret = -ENOMEM;
6530  		goto fail;
6531  	}
6532  
6533  	/* trace pipe does not show start of buffer */
6534  	cpumask_setall(iter->started);
6535  
6536  	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6537  		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6538  
6539  	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6540  	if (trace_clocks[tr->clock_id].in_ns)
6541  		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6542  
6543  	iter->tr = tr;
6544  	iter->array_buffer = &tr->array_buffer;
6545  	iter->cpu_file = cpu;
6546  	mutex_init(&iter->mutex);
6547  	filp->private_data = iter;
6548  
6549  	if (iter->trace->pipe_open)
6550  		iter->trace->pipe_open(iter);
6551  
6552  	nonseekable_open(inode, filp);
6553  
6554  	tr->trace_ref++;
6555  
6556  	mutex_unlock(&trace_types_lock);
6557  	return ret;
6558  
6559  fail:
6560  	kfree(iter);
6561  fail_alloc_iter:
6562  	close_pipe_on_cpu(tr, cpu);
6563  fail_pipe_on_cpu:
6564  	__trace_array_put(tr);
6565  	mutex_unlock(&trace_types_lock);
6566  	return ret;
6567  }
6568  
tracing_release_pipe(struct inode * inode,struct file * file)6569  static int tracing_release_pipe(struct inode *inode, struct file *file)
6570  {
6571  	struct trace_iterator *iter = file->private_data;
6572  	struct trace_array *tr = inode->i_private;
6573  
6574  	mutex_lock(&trace_types_lock);
6575  
6576  	tr->trace_ref--;
6577  
6578  	if (iter->trace->pipe_close)
6579  		iter->trace->pipe_close(iter);
6580  	close_pipe_on_cpu(tr, iter->cpu_file);
6581  	mutex_unlock(&trace_types_lock);
6582  
6583  	free_trace_iter_content(iter);
6584  	kfree(iter);
6585  
6586  	trace_array_put(tr);
6587  
6588  	return 0;
6589  }
6590  
6591  static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6592  trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6593  {
6594  	struct trace_array *tr = iter->tr;
6595  
6596  	/* Iterators are static, they should be filled or empty */
6597  	if (trace_buffer_iter(iter, iter->cpu_file))
6598  		return EPOLLIN | EPOLLRDNORM;
6599  
6600  	if (tr->trace_flags & TRACE_ITER_BLOCK)
6601  		/*
6602  		 * Always select as readable when in blocking mode
6603  		 */
6604  		return EPOLLIN | EPOLLRDNORM;
6605  	else
6606  		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6607  					     filp, poll_table, iter->tr->buffer_percent);
6608  }
6609  
6610  static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6611  tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6612  {
6613  	struct trace_iterator *iter = filp->private_data;
6614  
6615  	return trace_poll(iter, filp, poll_table);
6616  }
6617  
6618  /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6619  static int tracing_wait_pipe(struct file *filp)
6620  {
6621  	struct trace_iterator *iter = filp->private_data;
6622  	int ret;
6623  
6624  	while (trace_empty(iter)) {
6625  
6626  		if ((filp->f_flags & O_NONBLOCK)) {
6627  			return -EAGAIN;
6628  		}
6629  
6630  		/*
6631  		 * We block until we read something and tracing is disabled.
6632  		 * We still block if tracing is disabled, but we have never
6633  		 * read anything. This allows a user to cat this file, and
6634  		 * then enable tracing. But after we have read something,
6635  		 * we give an EOF when tracing is again disabled.
6636  		 *
6637  		 * iter->pos will be 0 if we haven't read anything.
6638  		 */
6639  		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6640  			break;
6641  
6642  		mutex_unlock(&iter->mutex);
6643  
6644  		ret = wait_on_pipe(iter, 0);
6645  
6646  		mutex_lock(&iter->mutex);
6647  
6648  		if (ret)
6649  			return ret;
6650  	}
6651  
6652  	return 1;
6653  }
6654  
6655  /*
6656   * Consumer reader.
6657   */
6658  static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6659  tracing_read_pipe(struct file *filp, char __user *ubuf,
6660  		  size_t cnt, loff_t *ppos)
6661  {
6662  	struct trace_iterator *iter = filp->private_data;
6663  	ssize_t sret;
6664  
6665  	/*
6666  	 * Avoid more than one consumer on a single file descriptor
6667  	 * This is just a matter of traces coherency, the ring buffer itself
6668  	 * is protected.
6669  	 */
6670  	mutex_lock(&iter->mutex);
6671  
6672  	/* return any leftover data */
6673  	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6674  	if (sret != -EBUSY)
6675  		goto out;
6676  
6677  	trace_seq_init(&iter->seq);
6678  
6679  	if (iter->trace->read) {
6680  		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6681  		if (sret)
6682  			goto out;
6683  	}
6684  
6685  waitagain:
6686  	sret = tracing_wait_pipe(filp);
6687  	if (sret <= 0)
6688  		goto out;
6689  
6690  	/* stop when tracing is finished */
6691  	if (trace_empty(iter)) {
6692  		sret = 0;
6693  		goto out;
6694  	}
6695  
6696  	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6697  		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6698  
6699  	/* reset all but tr, trace, and overruns */
6700  	trace_iterator_reset(iter);
6701  	cpumask_clear(iter->started);
6702  	trace_seq_init(&iter->seq);
6703  
6704  	trace_event_read_lock();
6705  	trace_access_lock(iter->cpu_file);
6706  	while (trace_find_next_entry_inc(iter) != NULL) {
6707  		enum print_line_t ret;
6708  		int save_len = iter->seq.seq.len;
6709  
6710  		ret = print_trace_line(iter);
6711  		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6712  			/*
6713  			 * If one print_trace_line() fills entire trace_seq in one shot,
6714  			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6715  			 * In this case, we need to consume it, otherwise, loop will peek
6716  			 * this event next time, resulting in an infinite loop.
6717  			 */
6718  			if (save_len == 0) {
6719  				iter->seq.full = 0;
6720  				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6721  				trace_consume(iter);
6722  				break;
6723  			}
6724  
6725  			/* In other cases, don't print partial lines */
6726  			iter->seq.seq.len = save_len;
6727  			break;
6728  		}
6729  		if (ret != TRACE_TYPE_NO_CONSUME)
6730  			trace_consume(iter);
6731  
6732  		if (trace_seq_used(&iter->seq) >= cnt)
6733  			break;
6734  
6735  		/*
6736  		 * Setting the full flag means we reached the trace_seq buffer
6737  		 * size and we should leave by partial output condition above.
6738  		 * One of the trace_seq_* functions is not used properly.
6739  		 */
6740  		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6741  			  iter->ent->type);
6742  	}
6743  	trace_access_unlock(iter->cpu_file);
6744  	trace_event_read_unlock();
6745  
6746  	/* Now copy what we have to the user */
6747  	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6748  	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6749  		trace_seq_init(&iter->seq);
6750  
6751  	/*
6752  	 * If there was nothing to send to user, in spite of consuming trace
6753  	 * entries, go back to wait for more entries.
6754  	 */
6755  	if (sret == -EBUSY)
6756  		goto waitagain;
6757  
6758  out:
6759  	mutex_unlock(&iter->mutex);
6760  
6761  	return sret;
6762  }
6763  
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6764  static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6765  				     unsigned int idx)
6766  {
6767  	__free_page(spd->pages[idx]);
6768  }
6769  
6770  static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6771  tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6772  {
6773  	size_t count;
6774  	int save_len;
6775  	int ret;
6776  
6777  	/* Seq buffer is page-sized, exactly what we need. */
6778  	for (;;) {
6779  		save_len = iter->seq.seq.len;
6780  		ret = print_trace_line(iter);
6781  
6782  		if (trace_seq_has_overflowed(&iter->seq)) {
6783  			iter->seq.seq.len = save_len;
6784  			break;
6785  		}
6786  
6787  		/*
6788  		 * This should not be hit, because it should only
6789  		 * be set if the iter->seq overflowed. But check it
6790  		 * anyway to be safe.
6791  		 */
6792  		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6793  			iter->seq.seq.len = save_len;
6794  			break;
6795  		}
6796  
6797  		count = trace_seq_used(&iter->seq) - save_len;
6798  		if (rem < count) {
6799  			rem = 0;
6800  			iter->seq.seq.len = save_len;
6801  			break;
6802  		}
6803  
6804  		if (ret != TRACE_TYPE_NO_CONSUME)
6805  			trace_consume(iter);
6806  		rem -= count;
6807  		if (!trace_find_next_entry_inc(iter))	{
6808  			rem = 0;
6809  			iter->ent = NULL;
6810  			break;
6811  		}
6812  	}
6813  
6814  	return rem;
6815  }
6816  
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6817  static ssize_t tracing_splice_read_pipe(struct file *filp,
6818  					loff_t *ppos,
6819  					struct pipe_inode_info *pipe,
6820  					size_t len,
6821  					unsigned int flags)
6822  {
6823  	struct page *pages_def[PIPE_DEF_BUFFERS];
6824  	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6825  	struct trace_iterator *iter = filp->private_data;
6826  	struct splice_pipe_desc spd = {
6827  		.pages		= pages_def,
6828  		.partial	= partial_def,
6829  		.nr_pages	= 0, /* This gets updated below. */
6830  		.nr_pages_max	= PIPE_DEF_BUFFERS,
6831  		.ops		= &default_pipe_buf_ops,
6832  		.spd_release	= tracing_spd_release_pipe,
6833  	};
6834  	ssize_t ret;
6835  	size_t rem;
6836  	unsigned int i;
6837  
6838  	if (splice_grow_spd(pipe, &spd))
6839  		return -ENOMEM;
6840  
6841  	mutex_lock(&iter->mutex);
6842  
6843  	if (iter->trace->splice_read) {
6844  		ret = iter->trace->splice_read(iter, filp,
6845  					       ppos, pipe, len, flags);
6846  		if (ret)
6847  			goto out_err;
6848  	}
6849  
6850  	ret = tracing_wait_pipe(filp);
6851  	if (ret <= 0)
6852  		goto out_err;
6853  
6854  	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6855  		ret = -EFAULT;
6856  		goto out_err;
6857  	}
6858  
6859  	trace_event_read_lock();
6860  	trace_access_lock(iter->cpu_file);
6861  
6862  	/* Fill as many pages as possible. */
6863  	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6864  		spd.pages[i] = alloc_page(GFP_KERNEL);
6865  		if (!spd.pages[i])
6866  			break;
6867  
6868  		rem = tracing_fill_pipe_page(rem, iter);
6869  
6870  		/* Copy the data into the page, so we can start over. */
6871  		ret = trace_seq_to_buffer(&iter->seq,
6872  					  page_address(spd.pages[i]),
6873  					  trace_seq_used(&iter->seq));
6874  		if (ret < 0) {
6875  			__free_page(spd.pages[i]);
6876  			break;
6877  		}
6878  		spd.partial[i].offset = 0;
6879  		spd.partial[i].len = trace_seq_used(&iter->seq);
6880  
6881  		trace_seq_init(&iter->seq);
6882  	}
6883  
6884  	trace_access_unlock(iter->cpu_file);
6885  	trace_event_read_unlock();
6886  	mutex_unlock(&iter->mutex);
6887  
6888  	spd.nr_pages = i;
6889  
6890  	if (i)
6891  		ret = splice_to_pipe(pipe, &spd);
6892  	else
6893  		ret = 0;
6894  out:
6895  	splice_shrink_spd(&spd);
6896  	return ret;
6897  
6898  out_err:
6899  	mutex_unlock(&iter->mutex);
6900  	goto out;
6901  }
6902  
6903  static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6904  tracing_entries_read(struct file *filp, char __user *ubuf,
6905  		     size_t cnt, loff_t *ppos)
6906  {
6907  	struct inode *inode = file_inode(filp);
6908  	struct trace_array *tr = inode->i_private;
6909  	int cpu = tracing_get_cpu(inode);
6910  	char buf[64];
6911  	int r = 0;
6912  	ssize_t ret;
6913  
6914  	mutex_lock(&trace_types_lock);
6915  
6916  	if (cpu == RING_BUFFER_ALL_CPUS) {
6917  		int cpu, buf_size_same;
6918  		unsigned long size;
6919  
6920  		size = 0;
6921  		buf_size_same = 1;
6922  		/* check if all cpu sizes are same */
6923  		for_each_tracing_cpu(cpu) {
6924  			/* fill in the size from first enabled cpu */
6925  			if (size == 0)
6926  				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6927  			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6928  				buf_size_same = 0;
6929  				break;
6930  			}
6931  		}
6932  
6933  		if (buf_size_same) {
6934  			if (!tr->ring_buffer_expanded)
6935  				r = sprintf(buf, "%lu (expanded: %lu)\n",
6936  					    size >> 10,
6937  					    trace_buf_size >> 10);
6938  			else
6939  				r = sprintf(buf, "%lu\n", size >> 10);
6940  		} else
6941  			r = sprintf(buf, "X\n");
6942  	} else
6943  		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6944  
6945  	mutex_unlock(&trace_types_lock);
6946  
6947  	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6948  	return ret;
6949  }
6950  
6951  static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6952  tracing_entries_write(struct file *filp, const char __user *ubuf,
6953  		      size_t cnt, loff_t *ppos)
6954  {
6955  	struct inode *inode = file_inode(filp);
6956  	struct trace_array *tr = inode->i_private;
6957  	unsigned long val;
6958  	int ret;
6959  
6960  	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6961  	if (ret)
6962  		return ret;
6963  
6964  	/* must have at least 1 entry */
6965  	if (!val)
6966  		return -EINVAL;
6967  
6968  	/* value is in KB */
6969  	val <<= 10;
6970  	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6971  	if (ret < 0)
6972  		return ret;
6973  
6974  	*ppos += cnt;
6975  
6976  	return cnt;
6977  }
6978  
6979  static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6980  tracing_total_entries_read(struct file *filp, char __user *ubuf,
6981  				size_t cnt, loff_t *ppos)
6982  {
6983  	struct trace_array *tr = filp->private_data;
6984  	char buf[64];
6985  	int r, cpu;
6986  	unsigned long size = 0, expanded_size = 0;
6987  
6988  	mutex_lock(&trace_types_lock);
6989  	for_each_tracing_cpu(cpu) {
6990  		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6991  		if (!tr->ring_buffer_expanded)
6992  			expanded_size += trace_buf_size >> 10;
6993  	}
6994  	if (tr->ring_buffer_expanded)
6995  		r = sprintf(buf, "%lu\n", size);
6996  	else
6997  		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6998  	mutex_unlock(&trace_types_lock);
6999  
7000  	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7001  }
7002  
7003  static ssize_t
tracing_last_boot_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7004  tracing_last_boot_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7005  {
7006  	struct trace_array *tr = filp->private_data;
7007  	struct seq_buf seq;
7008  	char buf[64];
7009  
7010  	seq_buf_init(&seq, buf, 64);
7011  
7012  	seq_buf_printf(&seq, "text delta:\t%ld\n", tr->text_delta);
7013  	seq_buf_printf(&seq, "data delta:\t%ld\n", tr->data_delta);
7014  
7015  	return simple_read_from_buffer(ubuf, cnt, ppos, buf, seq_buf_used(&seq));
7016  }
7017  
tracing_buffer_meta_open(struct inode * inode,struct file * filp)7018  static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
7019  {
7020  	struct trace_array *tr = inode->i_private;
7021  	int cpu = tracing_get_cpu(inode);
7022  	int ret;
7023  
7024  	ret = tracing_check_open_get_tr(tr);
7025  	if (ret)
7026  		return ret;
7027  
7028  	ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
7029  	if (ret < 0)
7030  		__trace_array_put(tr);
7031  	return ret;
7032  }
7033  
7034  static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7035  tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7036  			  size_t cnt, loff_t *ppos)
7037  {
7038  	/*
7039  	 * There is no need to read what the user has written, this function
7040  	 * is just to make sure that there is no error when "echo" is used
7041  	 */
7042  
7043  	*ppos += cnt;
7044  
7045  	return cnt;
7046  }
7047  
7048  static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)7049  tracing_free_buffer_release(struct inode *inode, struct file *filp)
7050  {
7051  	struct trace_array *tr = inode->i_private;
7052  
7053  	/* disable tracing ? */
7054  	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7055  		tracer_tracing_off(tr);
7056  	/* resize the ring buffer to 0 */
7057  	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7058  
7059  	trace_array_put(tr);
7060  
7061  	return 0;
7062  }
7063  
7064  #define TRACE_MARKER_MAX_SIZE		4096
7065  
7066  static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7067  tracing_mark_write(struct file *filp, const char __user *ubuf,
7068  					size_t cnt, loff_t *fpos)
7069  {
7070  	struct trace_array *tr = filp->private_data;
7071  	struct ring_buffer_event *event;
7072  	enum event_trigger_type tt = ETT_NONE;
7073  	struct trace_buffer *buffer;
7074  	struct print_entry *entry;
7075  	int meta_size;
7076  	ssize_t written;
7077  	size_t size;
7078  	int len;
7079  
7080  /* Used in tracing_mark_raw_write() as well */
7081  #define FAULTED_STR "<faulted>"
7082  #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7083  
7084  	if (tracing_disabled)
7085  		return -EINVAL;
7086  
7087  	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7088  		return -EINVAL;
7089  
7090  	if ((ssize_t)cnt < 0)
7091  		return -EINVAL;
7092  
7093  	if (cnt > TRACE_MARKER_MAX_SIZE)
7094  		cnt = TRACE_MARKER_MAX_SIZE;
7095  
7096  	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
7097   again:
7098  	size = cnt + meta_size;
7099  
7100  	/* If less than "<faulted>", then make sure we can still add that */
7101  	if (cnt < FAULTED_SIZE)
7102  		size += FAULTED_SIZE - cnt;
7103  
7104  	buffer = tr->array_buffer.buffer;
7105  	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7106  					    tracing_gen_ctx());
7107  	if (unlikely(!event)) {
7108  		/*
7109  		 * If the size was greater than what was allowed, then
7110  		 * make it smaller and try again.
7111  		 */
7112  		if (size > ring_buffer_max_event_size(buffer)) {
7113  			/* cnt < FAULTED size should never be bigger than max */
7114  			if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
7115  				return -EBADF;
7116  			cnt = ring_buffer_max_event_size(buffer) - meta_size;
7117  			/* The above should only happen once */
7118  			if (WARN_ON_ONCE(cnt + meta_size == size))
7119  				return -EBADF;
7120  			goto again;
7121  		}
7122  
7123  		/* Ring buffer disabled, return as if not open for write */
7124  		return -EBADF;
7125  	}
7126  
7127  	entry = ring_buffer_event_data(event);
7128  	entry->ip = _THIS_IP_;
7129  
7130  	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7131  	if (len) {
7132  		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7133  		cnt = FAULTED_SIZE;
7134  		written = -EFAULT;
7135  	} else
7136  		written = cnt;
7137  
7138  	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7139  		/* do not add \n before testing triggers, but add \0 */
7140  		entry->buf[cnt] = '\0';
7141  		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7142  	}
7143  
7144  	if (entry->buf[cnt - 1] != '\n') {
7145  		entry->buf[cnt] = '\n';
7146  		entry->buf[cnt + 1] = '\0';
7147  	} else
7148  		entry->buf[cnt] = '\0';
7149  
7150  	if (static_branch_unlikely(&trace_marker_exports_enabled))
7151  		ftrace_exports(event, TRACE_EXPORT_MARKER);
7152  	__buffer_unlock_commit(buffer, event);
7153  
7154  	if (tt)
7155  		event_triggers_post_call(tr->trace_marker_file, tt);
7156  
7157  	return written;
7158  }
7159  
7160  static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7161  tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7162  					size_t cnt, loff_t *fpos)
7163  {
7164  	struct trace_array *tr = filp->private_data;
7165  	struct ring_buffer_event *event;
7166  	struct trace_buffer *buffer;
7167  	struct raw_data_entry *entry;
7168  	ssize_t written;
7169  	int size;
7170  	int len;
7171  
7172  #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7173  
7174  	if (tracing_disabled)
7175  		return -EINVAL;
7176  
7177  	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7178  		return -EINVAL;
7179  
7180  	/* The marker must at least have a tag id */
7181  	if (cnt < sizeof(unsigned int))
7182  		return -EINVAL;
7183  
7184  	size = sizeof(*entry) + cnt;
7185  	if (cnt < FAULT_SIZE_ID)
7186  		size += FAULT_SIZE_ID - cnt;
7187  
7188  	buffer = tr->array_buffer.buffer;
7189  
7190  	if (size > ring_buffer_max_event_size(buffer))
7191  		return -EINVAL;
7192  
7193  	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7194  					    tracing_gen_ctx());
7195  	if (!event)
7196  		/* Ring buffer disabled, return as if not open for write */
7197  		return -EBADF;
7198  
7199  	entry = ring_buffer_event_data(event);
7200  
7201  	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7202  	if (len) {
7203  		entry->id = -1;
7204  		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7205  		written = -EFAULT;
7206  	} else
7207  		written = cnt;
7208  
7209  	__buffer_unlock_commit(buffer, event);
7210  
7211  	return written;
7212  }
7213  
tracing_clock_show(struct seq_file * m,void * v)7214  static int tracing_clock_show(struct seq_file *m, void *v)
7215  {
7216  	struct trace_array *tr = m->private;
7217  	int i;
7218  
7219  	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7220  		seq_printf(m,
7221  			"%s%s%s%s", i ? " " : "",
7222  			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7223  			i == tr->clock_id ? "]" : "");
7224  	seq_putc(m, '\n');
7225  
7226  	return 0;
7227  }
7228  
tracing_set_clock(struct trace_array * tr,const char * clockstr)7229  int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7230  {
7231  	int i;
7232  
7233  	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7234  		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7235  			break;
7236  	}
7237  	if (i == ARRAY_SIZE(trace_clocks))
7238  		return -EINVAL;
7239  
7240  	mutex_lock(&trace_types_lock);
7241  
7242  	tr->clock_id = i;
7243  
7244  	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7245  
7246  	/*
7247  	 * New clock may not be consistent with the previous clock.
7248  	 * Reset the buffer so that it doesn't have incomparable timestamps.
7249  	 */
7250  	tracing_reset_online_cpus(&tr->array_buffer);
7251  
7252  #ifdef CONFIG_TRACER_MAX_TRACE
7253  	if (tr->max_buffer.buffer)
7254  		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7255  	tracing_reset_online_cpus(&tr->max_buffer);
7256  #endif
7257  
7258  	mutex_unlock(&trace_types_lock);
7259  
7260  	return 0;
7261  }
7262  
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7263  static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7264  				   size_t cnt, loff_t *fpos)
7265  {
7266  	struct seq_file *m = filp->private_data;
7267  	struct trace_array *tr = m->private;
7268  	char buf[64];
7269  	const char *clockstr;
7270  	int ret;
7271  
7272  	if (cnt >= sizeof(buf))
7273  		return -EINVAL;
7274  
7275  	if (copy_from_user(buf, ubuf, cnt))
7276  		return -EFAULT;
7277  
7278  	buf[cnt] = 0;
7279  
7280  	clockstr = strstrip(buf);
7281  
7282  	ret = tracing_set_clock(tr, clockstr);
7283  	if (ret)
7284  		return ret;
7285  
7286  	*fpos += cnt;
7287  
7288  	return cnt;
7289  }
7290  
tracing_clock_open(struct inode * inode,struct file * file)7291  static int tracing_clock_open(struct inode *inode, struct file *file)
7292  {
7293  	struct trace_array *tr = inode->i_private;
7294  	int ret;
7295  
7296  	ret = tracing_check_open_get_tr(tr);
7297  	if (ret)
7298  		return ret;
7299  
7300  	ret = single_open(file, tracing_clock_show, inode->i_private);
7301  	if (ret < 0)
7302  		trace_array_put(tr);
7303  
7304  	return ret;
7305  }
7306  
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7307  static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7308  {
7309  	struct trace_array *tr = m->private;
7310  
7311  	mutex_lock(&trace_types_lock);
7312  
7313  	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7314  		seq_puts(m, "delta [absolute]\n");
7315  	else
7316  		seq_puts(m, "[delta] absolute\n");
7317  
7318  	mutex_unlock(&trace_types_lock);
7319  
7320  	return 0;
7321  }
7322  
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7323  static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7324  {
7325  	struct trace_array *tr = inode->i_private;
7326  	int ret;
7327  
7328  	ret = tracing_check_open_get_tr(tr);
7329  	if (ret)
7330  		return ret;
7331  
7332  	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7333  	if (ret < 0)
7334  		trace_array_put(tr);
7335  
7336  	return ret;
7337  }
7338  
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7339  u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7340  {
7341  	if (rbe == this_cpu_read(trace_buffered_event))
7342  		return ring_buffer_time_stamp(buffer);
7343  
7344  	return ring_buffer_event_time_stamp(buffer, rbe);
7345  }
7346  
7347  /*
7348   * Set or disable using the per CPU trace_buffer_event when possible.
7349   */
tracing_set_filter_buffering(struct trace_array * tr,bool set)7350  int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7351  {
7352  	int ret = 0;
7353  
7354  	mutex_lock(&trace_types_lock);
7355  
7356  	if (set && tr->no_filter_buffering_ref++)
7357  		goto out;
7358  
7359  	if (!set) {
7360  		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7361  			ret = -EINVAL;
7362  			goto out;
7363  		}
7364  
7365  		--tr->no_filter_buffering_ref;
7366  	}
7367   out:
7368  	mutex_unlock(&trace_types_lock);
7369  
7370  	return ret;
7371  }
7372  
7373  struct ftrace_buffer_info {
7374  	struct trace_iterator	iter;
7375  	void			*spare;
7376  	unsigned int		spare_cpu;
7377  	unsigned int		spare_size;
7378  	unsigned int		read;
7379  };
7380  
7381  #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7382  static int tracing_snapshot_open(struct inode *inode, struct file *file)
7383  {
7384  	struct trace_array *tr = inode->i_private;
7385  	struct trace_iterator *iter;
7386  	struct seq_file *m;
7387  	int ret;
7388  
7389  	ret = tracing_check_open_get_tr(tr);
7390  	if (ret)
7391  		return ret;
7392  
7393  	if (file->f_mode & FMODE_READ) {
7394  		iter = __tracing_open(inode, file, true);
7395  		if (IS_ERR(iter))
7396  			ret = PTR_ERR(iter);
7397  	} else {
7398  		/* Writes still need the seq_file to hold the private data */
7399  		ret = -ENOMEM;
7400  		m = kzalloc(sizeof(*m), GFP_KERNEL);
7401  		if (!m)
7402  			goto out;
7403  		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7404  		if (!iter) {
7405  			kfree(m);
7406  			goto out;
7407  		}
7408  		ret = 0;
7409  
7410  		iter->tr = tr;
7411  		iter->array_buffer = &tr->max_buffer;
7412  		iter->cpu_file = tracing_get_cpu(inode);
7413  		m->private = iter;
7414  		file->private_data = m;
7415  	}
7416  out:
7417  	if (ret < 0)
7418  		trace_array_put(tr);
7419  
7420  	return ret;
7421  }
7422  
tracing_swap_cpu_buffer(void * tr)7423  static void tracing_swap_cpu_buffer(void *tr)
7424  {
7425  	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7426  }
7427  
7428  static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7429  tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7430  		       loff_t *ppos)
7431  {
7432  	struct seq_file *m = filp->private_data;
7433  	struct trace_iterator *iter = m->private;
7434  	struct trace_array *tr = iter->tr;
7435  	unsigned long val;
7436  	int ret;
7437  
7438  	ret = tracing_update_buffers(tr);
7439  	if (ret < 0)
7440  		return ret;
7441  
7442  	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7443  	if (ret)
7444  		return ret;
7445  
7446  	mutex_lock(&trace_types_lock);
7447  
7448  	if (tr->current_trace->use_max_tr) {
7449  		ret = -EBUSY;
7450  		goto out;
7451  	}
7452  
7453  	local_irq_disable();
7454  	arch_spin_lock(&tr->max_lock);
7455  	if (tr->cond_snapshot)
7456  		ret = -EBUSY;
7457  	arch_spin_unlock(&tr->max_lock);
7458  	local_irq_enable();
7459  	if (ret)
7460  		goto out;
7461  
7462  	switch (val) {
7463  	case 0:
7464  		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7465  			ret = -EINVAL;
7466  			break;
7467  		}
7468  		if (tr->allocated_snapshot)
7469  			free_snapshot(tr);
7470  		break;
7471  	case 1:
7472  /* Only allow per-cpu swap if the ring buffer supports it */
7473  #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7474  		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7475  			ret = -EINVAL;
7476  			break;
7477  		}
7478  #endif
7479  		if (tr->allocated_snapshot)
7480  			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7481  					&tr->array_buffer, iter->cpu_file);
7482  
7483  		ret = tracing_arm_snapshot_locked(tr);
7484  		if (ret)
7485  			break;
7486  
7487  		/* Now, we're going to swap */
7488  		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7489  			local_irq_disable();
7490  			update_max_tr(tr, current, smp_processor_id(), NULL);
7491  			local_irq_enable();
7492  		} else {
7493  			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7494  						 (void *)tr, 1);
7495  		}
7496  		tracing_disarm_snapshot(tr);
7497  		break;
7498  	default:
7499  		if (tr->allocated_snapshot) {
7500  			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7501  				tracing_reset_online_cpus(&tr->max_buffer);
7502  			else
7503  				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7504  		}
7505  		break;
7506  	}
7507  
7508  	if (ret >= 0) {
7509  		*ppos += cnt;
7510  		ret = cnt;
7511  	}
7512  out:
7513  	mutex_unlock(&trace_types_lock);
7514  	return ret;
7515  }
7516  
tracing_snapshot_release(struct inode * inode,struct file * file)7517  static int tracing_snapshot_release(struct inode *inode, struct file *file)
7518  {
7519  	struct seq_file *m = file->private_data;
7520  	int ret;
7521  
7522  	ret = tracing_release(inode, file);
7523  
7524  	if (file->f_mode & FMODE_READ)
7525  		return ret;
7526  
7527  	/* If write only, the seq_file is just a stub */
7528  	if (m)
7529  		kfree(m->private);
7530  	kfree(m);
7531  
7532  	return 0;
7533  }
7534  
7535  static int tracing_buffers_open(struct inode *inode, struct file *filp);
7536  static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7537  				    size_t count, loff_t *ppos);
7538  static int tracing_buffers_release(struct inode *inode, struct file *file);
7539  static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7540  		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7541  
snapshot_raw_open(struct inode * inode,struct file * filp)7542  static int snapshot_raw_open(struct inode *inode, struct file *filp)
7543  {
7544  	struct ftrace_buffer_info *info;
7545  	int ret;
7546  
7547  	/* The following checks for tracefs lockdown */
7548  	ret = tracing_buffers_open(inode, filp);
7549  	if (ret < 0)
7550  		return ret;
7551  
7552  	info = filp->private_data;
7553  
7554  	if (info->iter.trace->use_max_tr) {
7555  		tracing_buffers_release(inode, filp);
7556  		return -EBUSY;
7557  	}
7558  
7559  	info->iter.snapshot = true;
7560  	info->iter.array_buffer = &info->iter.tr->max_buffer;
7561  
7562  	return ret;
7563  }
7564  
7565  #endif /* CONFIG_TRACER_SNAPSHOT */
7566  
7567  
7568  static const struct file_operations tracing_thresh_fops = {
7569  	.open		= tracing_open_generic,
7570  	.read		= tracing_thresh_read,
7571  	.write		= tracing_thresh_write,
7572  	.llseek		= generic_file_llseek,
7573  };
7574  
7575  #ifdef CONFIG_TRACER_MAX_TRACE
7576  static const struct file_operations tracing_max_lat_fops = {
7577  	.open		= tracing_open_generic_tr,
7578  	.read		= tracing_max_lat_read,
7579  	.write		= tracing_max_lat_write,
7580  	.llseek		= generic_file_llseek,
7581  	.release	= tracing_release_generic_tr,
7582  };
7583  #endif
7584  
7585  static const struct file_operations set_tracer_fops = {
7586  	.open		= tracing_open_generic_tr,
7587  	.read		= tracing_set_trace_read,
7588  	.write		= tracing_set_trace_write,
7589  	.llseek		= generic_file_llseek,
7590  	.release	= tracing_release_generic_tr,
7591  };
7592  
7593  static const struct file_operations tracing_pipe_fops = {
7594  	.open		= tracing_open_pipe,
7595  	.poll		= tracing_poll_pipe,
7596  	.read		= tracing_read_pipe,
7597  	.splice_read	= tracing_splice_read_pipe,
7598  	.release	= tracing_release_pipe,
7599  };
7600  
7601  static const struct file_operations tracing_entries_fops = {
7602  	.open		= tracing_open_generic_tr,
7603  	.read		= tracing_entries_read,
7604  	.write		= tracing_entries_write,
7605  	.llseek		= generic_file_llseek,
7606  	.release	= tracing_release_generic_tr,
7607  };
7608  
7609  static const struct file_operations tracing_buffer_meta_fops = {
7610  	.open		= tracing_buffer_meta_open,
7611  	.read		= seq_read,
7612  	.llseek		= seq_lseek,
7613  	.release	= tracing_seq_release,
7614  };
7615  
7616  static const struct file_operations tracing_total_entries_fops = {
7617  	.open		= tracing_open_generic_tr,
7618  	.read		= tracing_total_entries_read,
7619  	.llseek		= generic_file_llseek,
7620  	.release	= tracing_release_generic_tr,
7621  };
7622  
7623  static const struct file_operations tracing_free_buffer_fops = {
7624  	.open		= tracing_open_generic_tr,
7625  	.write		= tracing_free_buffer_write,
7626  	.release	= tracing_free_buffer_release,
7627  };
7628  
7629  static const struct file_operations tracing_mark_fops = {
7630  	.open		= tracing_mark_open,
7631  	.write		= tracing_mark_write,
7632  	.release	= tracing_release_generic_tr,
7633  };
7634  
7635  static const struct file_operations tracing_mark_raw_fops = {
7636  	.open		= tracing_mark_open,
7637  	.write		= tracing_mark_raw_write,
7638  	.release	= tracing_release_generic_tr,
7639  };
7640  
7641  static const struct file_operations trace_clock_fops = {
7642  	.open		= tracing_clock_open,
7643  	.read		= seq_read,
7644  	.llseek		= seq_lseek,
7645  	.release	= tracing_single_release_tr,
7646  	.write		= tracing_clock_write,
7647  };
7648  
7649  static const struct file_operations trace_time_stamp_mode_fops = {
7650  	.open		= tracing_time_stamp_mode_open,
7651  	.read		= seq_read,
7652  	.llseek		= seq_lseek,
7653  	.release	= tracing_single_release_tr,
7654  };
7655  
7656  static const struct file_operations last_boot_fops = {
7657  	.open		= tracing_open_generic_tr,
7658  	.read		= tracing_last_boot_read,
7659  	.llseek		= generic_file_llseek,
7660  	.release	= tracing_release_generic_tr,
7661  };
7662  
7663  #ifdef CONFIG_TRACER_SNAPSHOT
7664  static const struct file_operations snapshot_fops = {
7665  	.open		= tracing_snapshot_open,
7666  	.read		= seq_read,
7667  	.write		= tracing_snapshot_write,
7668  	.llseek		= tracing_lseek,
7669  	.release	= tracing_snapshot_release,
7670  };
7671  
7672  static const struct file_operations snapshot_raw_fops = {
7673  	.open		= snapshot_raw_open,
7674  	.read		= tracing_buffers_read,
7675  	.release	= tracing_buffers_release,
7676  	.splice_read	= tracing_buffers_splice_read,
7677  };
7678  
7679  #endif /* CONFIG_TRACER_SNAPSHOT */
7680  
7681  /*
7682   * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7683   * @filp: The active open file structure
7684   * @ubuf: The userspace provided buffer to read value into
7685   * @cnt: The maximum number of bytes to read
7686   * @ppos: The current "file" position
7687   *
7688   * This function implements the write interface for a struct trace_min_max_param.
7689   * The filp->private_data must point to a trace_min_max_param structure that
7690   * defines where to write the value, the min and the max acceptable values,
7691   * and a lock to protect the write.
7692   */
7693  static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7694  trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7695  {
7696  	struct trace_min_max_param *param = filp->private_data;
7697  	u64 val;
7698  	int err;
7699  
7700  	if (!param)
7701  		return -EFAULT;
7702  
7703  	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7704  	if (err)
7705  		return err;
7706  
7707  	if (param->lock)
7708  		mutex_lock(param->lock);
7709  
7710  	if (param->min && val < *param->min)
7711  		err = -EINVAL;
7712  
7713  	if (param->max && val > *param->max)
7714  		err = -EINVAL;
7715  
7716  	if (!err)
7717  		*param->val = val;
7718  
7719  	if (param->lock)
7720  		mutex_unlock(param->lock);
7721  
7722  	if (err)
7723  		return err;
7724  
7725  	return cnt;
7726  }
7727  
7728  /*
7729   * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7730   * @filp: The active open file structure
7731   * @ubuf: The userspace provided buffer to read value into
7732   * @cnt: The maximum number of bytes to read
7733   * @ppos: The current "file" position
7734   *
7735   * This function implements the read interface for a struct trace_min_max_param.
7736   * The filp->private_data must point to a trace_min_max_param struct with valid
7737   * data.
7738   */
7739  static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7740  trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7741  {
7742  	struct trace_min_max_param *param = filp->private_data;
7743  	char buf[U64_STR_SIZE];
7744  	int len;
7745  	u64 val;
7746  
7747  	if (!param)
7748  		return -EFAULT;
7749  
7750  	val = *param->val;
7751  
7752  	if (cnt > sizeof(buf))
7753  		cnt = sizeof(buf);
7754  
7755  	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7756  
7757  	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7758  }
7759  
7760  const struct file_operations trace_min_max_fops = {
7761  	.open		= tracing_open_generic,
7762  	.read		= trace_min_max_read,
7763  	.write		= trace_min_max_write,
7764  };
7765  
7766  #define TRACING_LOG_ERRS_MAX	8
7767  #define TRACING_LOG_LOC_MAX	128
7768  
7769  #define CMD_PREFIX "  Command: "
7770  
7771  struct err_info {
7772  	const char	**errs;	/* ptr to loc-specific array of err strings */
7773  	u8		type;	/* index into errs -> specific err string */
7774  	u16		pos;	/* caret position */
7775  	u64		ts;
7776  };
7777  
7778  struct tracing_log_err {
7779  	struct list_head	list;
7780  	struct err_info		info;
7781  	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7782  	char			*cmd;                     /* what caused err */
7783  };
7784  
7785  static DEFINE_MUTEX(tracing_err_log_lock);
7786  
alloc_tracing_log_err(int len)7787  static struct tracing_log_err *alloc_tracing_log_err(int len)
7788  {
7789  	struct tracing_log_err *err;
7790  
7791  	err = kzalloc(sizeof(*err), GFP_KERNEL);
7792  	if (!err)
7793  		return ERR_PTR(-ENOMEM);
7794  
7795  	err->cmd = kzalloc(len, GFP_KERNEL);
7796  	if (!err->cmd) {
7797  		kfree(err);
7798  		return ERR_PTR(-ENOMEM);
7799  	}
7800  
7801  	return err;
7802  }
7803  
free_tracing_log_err(struct tracing_log_err * err)7804  static void free_tracing_log_err(struct tracing_log_err *err)
7805  {
7806  	kfree(err->cmd);
7807  	kfree(err);
7808  }
7809  
get_tracing_log_err(struct trace_array * tr,int len)7810  static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7811  						   int len)
7812  {
7813  	struct tracing_log_err *err;
7814  	char *cmd;
7815  
7816  	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7817  		err = alloc_tracing_log_err(len);
7818  		if (PTR_ERR(err) != -ENOMEM)
7819  			tr->n_err_log_entries++;
7820  
7821  		return err;
7822  	}
7823  	cmd = kzalloc(len, GFP_KERNEL);
7824  	if (!cmd)
7825  		return ERR_PTR(-ENOMEM);
7826  	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7827  	kfree(err->cmd);
7828  	err->cmd = cmd;
7829  	list_del(&err->list);
7830  
7831  	return err;
7832  }
7833  
7834  /**
7835   * err_pos - find the position of a string within a command for error careting
7836   * @cmd: The tracing command that caused the error
7837   * @str: The string to position the caret at within @cmd
7838   *
7839   * Finds the position of the first occurrence of @str within @cmd.  The
7840   * return value can be passed to tracing_log_err() for caret placement
7841   * within @cmd.
7842   *
7843   * Returns the index within @cmd of the first occurrence of @str or 0
7844   * if @str was not found.
7845   */
err_pos(char * cmd,const char * str)7846  unsigned int err_pos(char *cmd, const char *str)
7847  {
7848  	char *found;
7849  
7850  	if (WARN_ON(!strlen(cmd)))
7851  		return 0;
7852  
7853  	found = strstr(cmd, str);
7854  	if (found)
7855  		return found - cmd;
7856  
7857  	return 0;
7858  }
7859  
7860  /**
7861   * tracing_log_err - write an error to the tracing error log
7862   * @tr: The associated trace array for the error (NULL for top level array)
7863   * @loc: A string describing where the error occurred
7864   * @cmd: The tracing command that caused the error
7865   * @errs: The array of loc-specific static error strings
7866   * @type: The index into errs[], which produces the specific static err string
7867   * @pos: The position the caret should be placed in the cmd
7868   *
7869   * Writes an error into tracing/error_log of the form:
7870   *
7871   * <loc>: error: <text>
7872   *   Command: <cmd>
7873   *              ^
7874   *
7875   * tracing/error_log is a small log file containing the last
7876   * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7877   * unless there has been a tracing error, and the error log can be
7878   * cleared and have its memory freed by writing the empty string in
7879   * truncation mode to it i.e. echo > tracing/error_log.
7880   *
7881   * NOTE: the @errs array along with the @type param are used to
7882   * produce a static error string - this string is not copied and saved
7883   * when the error is logged - only a pointer to it is saved.  See
7884   * existing callers for examples of how static strings are typically
7885   * defined for use with tracing_log_err().
7886   */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)7887  void tracing_log_err(struct trace_array *tr,
7888  		     const char *loc, const char *cmd,
7889  		     const char **errs, u8 type, u16 pos)
7890  {
7891  	struct tracing_log_err *err;
7892  	int len = 0;
7893  
7894  	if (!tr)
7895  		tr = &global_trace;
7896  
7897  	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7898  
7899  	mutex_lock(&tracing_err_log_lock);
7900  	err = get_tracing_log_err(tr, len);
7901  	if (PTR_ERR(err) == -ENOMEM) {
7902  		mutex_unlock(&tracing_err_log_lock);
7903  		return;
7904  	}
7905  
7906  	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7907  	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7908  
7909  	err->info.errs = errs;
7910  	err->info.type = type;
7911  	err->info.pos = pos;
7912  	err->info.ts = local_clock();
7913  
7914  	list_add_tail(&err->list, &tr->err_log);
7915  	mutex_unlock(&tracing_err_log_lock);
7916  }
7917  
clear_tracing_err_log(struct trace_array * tr)7918  static void clear_tracing_err_log(struct trace_array *tr)
7919  {
7920  	struct tracing_log_err *err, *next;
7921  
7922  	mutex_lock(&tracing_err_log_lock);
7923  	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7924  		list_del(&err->list);
7925  		free_tracing_log_err(err);
7926  	}
7927  
7928  	tr->n_err_log_entries = 0;
7929  	mutex_unlock(&tracing_err_log_lock);
7930  }
7931  
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)7932  static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7933  {
7934  	struct trace_array *tr = m->private;
7935  
7936  	mutex_lock(&tracing_err_log_lock);
7937  
7938  	return seq_list_start(&tr->err_log, *pos);
7939  }
7940  
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)7941  static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7942  {
7943  	struct trace_array *tr = m->private;
7944  
7945  	return seq_list_next(v, &tr->err_log, pos);
7946  }
7947  
tracing_err_log_seq_stop(struct seq_file * m,void * v)7948  static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7949  {
7950  	mutex_unlock(&tracing_err_log_lock);
7951  }
7952  
tracing_err_log_show_pos(struct seq_file * m,u16 pos)7953  static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7954  {
7955  	u16 i;
7956  
7957  	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7958  		seq_putc(m, ' ');
7959  	for (i = 0; i < pos; i++)
7960  		seq_putc(m, ' ');
7961  	seq_puts(m, "^\n");
7962  }
7963  
tracing_err_log_seq_show(struct seq_file * m,void * v)7964  static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7965  {
7966  	struct tracing_log_err *err = v;
7967  
7968  	if (err) {
7969  		const char *err_text = err->info.errs[err->info.type];
7970  		u64 sec = err->info.ts;
7971  		u32 nsec;
7972  
7973  		nsec = do_div(sec, NSEC_PER_SEC);
7974  		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7975  			   err->loc, err_text);
7976  		seq_printf(m, "%s", err->cmd);
7977  		tracing_err_log_show_pos(m, err->info.pos);
7978  	}
7979  
7980  	return 0;
7981  }
7982  
7983  static const struct seq_operations tracing_err_log_seq_ops = {
7984  	.start  = tracing_err_log_seq_start,
7985  	.next   = tracing_err_log_seq_next,
7986  	.stop   = tracing_err_log_seq_stop,
7987  	.show   = tracing_err_log_seq_show
7988  };
7989  
tracing_err_log_open(struct inode * inode,struct file * file)7990  static int tracing_err_log_open(struct inode *inode, struct file *file)
7991  {
7992  	struct trace_array *tr = inode->i_private;
7993  	int ret = 0;
7994  
7995  	ret = tracing_check_open_get_tr(tr);
7996  	if (ret)
7997  		return ret;
7998  
7999  	/* If this file was opened for write, then erase contents */
8000  	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8001  		clear_tracing_err_log(tr);
8002  
8003  	if (file->f_mode & FMODE_READ) {
8004  		ret = seq_open(file, &tracing_err_log_seq_ops);
8005  		if (!ret) {
8006  			struct seq_file *m = file->private_data;
8007  			m->private = tr;
8008  		} else {
8009  			trace_array_put(tr);
8010  		}
8011  	}
8012  	return ret;
8013  }
8014  
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)8015  static ssize_t tracing_err_log_write(struct file *file,
8016  				     const char __user *buffer,
8017  				     size_t count, loff_t *ppos)
8018  {
8019  	return count;
8020  }
8021  
tracing_err_log_release(struct inode * inode,struct file * file)8022  static int tracing_err_log_release(struct inode *inode, struct file *file)
8023  {
8024  	struct trace_array *tr = inode->i_private;
8025  
8026  	trace_array_put(tr);
8027  
8028  	if (file->f_mode & FMODE_READ)
8029  		seq_release(inode, file);
8030  
8031  	return 0;
8032  }
8033  
8034  static const struct file_operations tracing_err_log_fops = {
8035  	.open           = tracing_err_log_open,
8036  	.write		= tracing_err_log_write,
8037  	.read           = seq_read,
8038  	.llseek         = tracing_lseek,
8039  	.release        = tracing_err_log_release,
8040  };
8041  
tracing_buffers_open(struct inode * inode,struct file * filp)8042  static int tracing_buffers_open(struct inode *inode, struct file *filp)
8043  {
8044  	struct trace_array *tr = inode->i_private;
8045  	struct ftrace_buffer_info *info;
8046  	int ret;
8047  
8048  	ret = tracing_check_open_get_tr(tr);
8049  	if (ret)
8050  		return ret;
8051  
8052  	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8053  	if (!info) {
8054  		trace_array_put(tr);
8055  		return -ENOMEM;
8056  	}
8057  
8058  	mutex_lock(&trace_types_lock);
8059  
8060  	info->iter.tr		= tr;
8061  	info->iter.cpu_file	= tracing_get_cpu(inode);
8062  	info->iter.trace	= tr->current_trace;
8063  	info->iter.array_buffer = &tr->array_buffer;
8064  	info->spare		= NULL;
8065  	/* Force reading ring buffer for first read */
8066  	info->read		= (unsigned int)-1;
8067  
8068  	filp->private_data = info;
8069  
8070  	tr->trace_ref++;
8071  
8072  	mutex_unlock(&trace_types_lock);
8073  
8074  	ret = nonseekable_open(inode, filp);
8075  	if (ret < 0)
8076  		trace_array_put(tr);
8077  
8078  	return ret;
8079  }
8080  
8081  static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)8082  tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8083  {
8084  	struct ftrace_buffer_info *info = filp->private_data;
8085  	struct trace_iterator *iter = &info->iter;
8086  
8087  	return trace_poll(iter, filp, poll_table);
8088  }
8089  
8090  static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8091  tracing_buffers_read(struct file *filp, char __user *ubuf,
8092  		     size_t count, loff_t *ppos)
8093  {
8094  	struct ftrace_buffer_info *info = filp->private_data;
8095  	struct trace_iterator *iter = &info->iter;
8096  	void *trace_data;
8097  	int page_size;
8098  	ssize_t ret = 0;
8099  	ssize_t size;
8100  
8101  	if (!count)
8102  		return 0;
8103  
8104  #ifdef CONFIG_TRACER_MAX_TRACE
8105  	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8106  		return -EBUSY;
8107  #endif
8108  
8109  	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8110  
8111  	/* Make sure the spare matches the current sub buffer size */
8112  	if (info->spare) {
8113  		if (page_size != info->spare_size) {
8114  			ring_buffer_free_read_page(iter->array_buffer->buffer,
8115  						   info->spare_cpu, info->spare);
8116  			info->spare = NULL;
8117  		}
8118  	}
8119  
8120  	if (!info->spare) {
8121  		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8122  							  iter->cpu_file);
8123  		if (IS_ERR(info->spare)) {
8124  			ret = PTR_ERR(info->spare);
8125  			info->spare = NULL;
8126  		} else {
8127  			info->spare_cpu = iter->cpu_file;
8128  			info->spare_size = page_size;
8129  		}
8130  	}
8131  	if (!info->spare)
8132  		return ret;
8133  
8134  	/* Do we have previous read data to read? */
8135  	if (info->read < page_size)
8136  		goto read;
8137  
8138   again:
8139  	trace_access_lock(iter->cpu_file);
8140  	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8141  				    info->spare,
8142  				    count,
8143  				    iter->cpu_file, 0);
8144  	trace_access_unlock(iter->cpu_file);
8145  
8146  	if (ret < 0) {
8147  		if (trace_empty(iter) && !iter->closed) {
8148  			if ((filp->f_flags & O_NONBLOCK))
8149  				return -EAGAIN;
8150  
8151  			ret = wait_on_pipe(iter, 0);
8152  			if (ret)
8153  				return ret;
8154  
8155  			goto again;
8156  		}
8157  		return 0;
8158  	}
8159  
8160  	info->read = 0;
8161   read:
8162  	size = page_size - info->read;
8163  	if (size > count)
8164  		size = count;
8165  	trace_data = ring_buffer_read_page_data(info->spare);
8166  	ret = copy_to_user(ubuf, trace_data + info->read, size);
8167  	if (ret == size)
8168  		return -EFAULT;
8169  
8170  	size -= ret;
8171  
8172  	*ppos += size;
8173  	info->read += size;
8174  
8175  	return size;
8176  }
8177  
tracing_buffers_flush(struct file * file,fl_owner_t id)8178  static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8179  {
8180  	struct ftrace_buffer_info *info = file->private_data;
8181  	struct trace_iterator *iter = &info->iter;
8182  
8183  	iter->closed = true;
8184  	/* Make sure the waiters see the new wait_index */
8185  	(void)atomic_fetch_inc_release(&iter->wait_index);
8186  
8187  	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8188  
8189  	return 0;
8190  }
8191  
tracing_buffers_release(struct inode * inode,struct file * file)8192  static int tracing_buffers_release(struct inode *inode, struct file *file)
8193  {
8194  	struct ftrace_buffer_info *info = file->private_data;
8195  	struct trace_iterator *iter = &info->iter;
8196  
8197  	mutex_lock(&trace_types_lock);
8198  
8199  	iter->tr->trace_ref--;
8200  
8201  	__trace_array_put(iter->tr);
8202  
8203  	if (info->spare)
8204  		ring_buffer_free_read_page(iter->array_buffer->buffer,
8205  					   info->spare_cpu, info->spare);
8206  	kvfree(info);
8207  
8208  	mutex_unlock(&trace_types_lock);
8209  
8210  	return 0;
8211  }
8212  
8213  struct buffer_ref {
8214  	struct trace_buffer	*buffer;
8215  	void			*page;
8216  	int			cpu;
8217  	refcount_t		refcount;
8218  };
8219  
buffer_ref_release(struct buffer_ref * ref)8220  static void buffer_ref_release(struct buffer_ref *ref)
8221  {
8222  	if (!refcount_dec_and_test(&ref->refcount))
8223  		return;
8224  	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8225  	kfree(ref);
8226  }
8227  
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8228  static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8229  				    struct pipe_buffer *buf)
8230  {
8231  	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8232  
8233  	buffer_ref_release(ref);
8234  	buf->private = 0;
8235  }
8236  
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8237  static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8238  				struct pipe_buffer *buf)
8239  {
8240  	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8241  
8242  	if (refcount_read(&ref->refcount) > INT_MAX/2)
8243  		return false;
8244  
8245  	refcount_inc(&ref->refcount);
8246  	return true;
8247  }
8248  
8249  /* Pipe buffer operations for a buffer. */
8250  static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8251  	.release		= buffer_pipe_buf_release,
8252  	.get			= buffer_pipe_buf_get,
8253  };
8254  
8255  /*
8256   * Callback from splice_to_pipe(), if we need to release some pages
8257   * at the end of the spd in case we error'ed out in filling the pipe.
8258   */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8259  static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8260  {
8261  	struct buffer_ref *ref =
8262  		(struct buffer_ref *)spd->partial[i].private;
8263  
8264  	buffer_ref_release(ref);
8265  	spd->partial[i].private = 0;
8266  }
8267  
8268  static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8269  tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8270  			    struct pipe_inode_info *pipe, size_t len,
8271  			    unsigned int flags)
8272  {
8273  	struct ftrace_buffer_info *info = file->private_data;
8274  	struct trace_iterator *iter = &info->iter;
8275  	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8276  	struct page *pages_def[PIPE_DEF_BUFFERS];
8277  	struct splice_pipe_desc spd = {
8278  		.pages		= pages_def,
8279  		.partial	= partial_def,
8280  		.nr_pages_max	= PIPE_DEF_BUFFERS,
8281  		.ops		= &buffer_pipe_buf_ops,
8282  		.spd_release	= buffer_spd_release,
8283  	};
8284  	struct buffer_ref *ref;
8285  	bool woken = false;
8286  	int page_size;
8287  	int entries, i;
8288  	ssize_t ret = 0;
8289  
8290  #ifdef CONFIG_TRACER_MAX_TRACE
8291  	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8292  		return -EBUSY;
8293  #endif
8294  
8295  	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8296  	if (*ppos & (page_size - 1))
8297  		return -EINVAL;
8298  
8299  	if (len & (page_size - 1)) {
8300  		if (len < page_size)
8301  			return -EINVAL;
8302  		len &= (~(page_size - 1));
8303  	}
8304  
8305  	if (splice_grow_spd(pipe, &spd))
8306  		return -ENOMEM;
8307  
8308   again:
8309  	trace_access_lock(iter->cpu_file);
8310  	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8311  
8312  	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8313  		struct page *page;
8314  		int r;
8315  
8316  		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8317  		if (!ref) {
8318  			ret = -ENOMEM;
8319  			break;
8320  		}
8321  
8322  		refcount_set(&ref->refcount, 1);
8323  		ref->buffer = iter->array_buffer->buffer;
8324  		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8325  		if (IS_ERR(ref->page)) {
8326  			ret = PTR_ERR(ref->page);
8327  			ref->page = NULL;
8328  			kfree(ref);
8329  			break;
8330  		}
8331  		ref->cpu = iter->cpu_file;
8332  
8333  		r = ring_buffer_read_page(ref->buffer, ref->page,
8334  					  len, iter->cpu_file, 1);
8335  		if (r < 0) {
8336  			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8337  						   ref->page);
8338  			kfree(ref);
8339  			break;
8340  		}
8341  
8342  		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8343  
8344  		spd.pages[i] = page;
8345  		spd.partial[i].len = page_size;
8346  		spd.partial[i].offset = 0;
8347  		spd.partial[i].private = (unsigned long)ref;
8348  		spd.nr_pages++;
8349  		*ppos += page_size;
8350  
8351  		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8352  	}
8353  
8354  	trace_access_unlock(iter->cpu_file);
8355  	spd.nr_pages = i;
8356  
8357  	/* did we read anything? */
8358  	if (!spd.nr_pages) {
8359  
8360  		if (ret)
8361  			goto out;
8362  
8363  		if (woken)
8364  			goto out;
8365  
8366  		ret = -EAGAIN;
8367  		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8368  			goto out;
8369  
8370  		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8371  		if (ret)
8372  			goto out;
8373  
8374  		/* No need to wait after waking up when tracing is off */
8375  		if (!tracer_tracing_is_on(iter->tr))
8376  			goto out;
8377  
8378  		/* Iterate one more time to collect any new data then exit */
8379  		woken = true;
8380  
8381  		goto again;
8382  	}
8383  
8384  	ret = splice_to_pipe(pipe, &spd);
8385  out:
8386  	splice_shrink_spd(&spd);
8387  
8388  	return ret;
8389  }
8390  
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8391  static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8392  {
8393  	struct ftrace_buffer_info *info = file->private_data;
8394  	struct trace_iterator *iter = &info->iter;
8395  	int err;
8396  
8397  	if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8398  		if (!(file->f_flags & O_NONBLOCK)) {
8399  			err = ring_buffer_wait(iter->array_buffer->buffer,
8400  					       iter->cpu_file,
8401  					       iter->tr->buffer_percent,
8402  					       NULL, NULL);
8403  			if (err)
8404  				return err;
8405  		}
8406  
8407  		return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8408  						  iter->cpu_file);
8409  	} else if (cmd) {
8410  		return -ENOTTY;
8411  	}
8412  
8413  	/*
8414  	 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8415  	 * waiters
8416  	 */
8417  	mutex_lock(&trace_types_lock);
8418  
8419  	/* Make sure the waiters see the new wait_index */
8420  	(void)atomic_fetch_inc_release(&iter->wait_index);
8421  
8422  	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8423  
8424  	mutex_unlock(&trace_types_lock);
8425  	return 0;
8426  }
8427  
8428  #ifdef CONFIG_TRACER_MAX_TRACE
get_snapshot_map(struct trace_array * tr)8429  static int get_snapshot_map(struct trace_array *tr)
8430  {
8431  	int err = 0;
8432  
8433  	/*
8434  	 * Called with mmap_lock held. lockdep would be unhappy if we would now
8435  	 * take trace_types_lock. Instead use the specific
8436  	 * snapshot_trigger_lock.
8437  	 */
8438  	spin_lock(&tr->snapshot_trigger_lock);
8439  
8440  	if (tr->snapshot || tr->mapped == UINT_MAX)
8441  		err = -EBUSY;
8442  	else
8443  		tr->mapped++;
8444  
8445  	spin_unlock(&tr->snapshot_trigger_lock);
8446  
8447  	/* Wait for update_max_tr() to observe iter->tr->mapped */
8448  	if (tr->mapped == 1)
8449  		synchronize_rcu();
8450  
8451  	return err;
8452  
8453  }
put_snapshot_map(struct trace_array * tr)8454  static void put_snapshot_map(struct trace_array *tr)
8455  {
8456  	spin_lock(&tr->snapshot_trigger_lock);
8457  	if (!WARN_ON(!tr->mapped))
8458  		tr->mapped--;
8459  	spin_unlock(&tr->snapshot_trigger_lock);
8460  }
8461  #else
get_snapshot_map(struct trace_array * tr)8462  static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
put_snapshot_map(struct trace_array * tr)8463  static inline void put_snapshot_map(struct trace_array *tr) { }
8464  #endif
8465  
tracing_buffers_mmap_close(struct vm_area_struct * vma)8466  static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8467  {
8468  	struct ftrace_buffer_info *info = vma->vm_file->private_data;
8469  	struct trace_iterator *iter = &info->iter;
8470  
8471  	WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8472  	put_snapshot_map(iter->tr);
8473  }
8474  
8475  static const struct vm_operations_struct tracing_buffers_vmops = {
8476  	.close		= tracing_buffers_mmap_close,
8477  };
8478  
tracing_buffers_mmap(struct file * filp,struct vm_area_struct * vma)8479  static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8480  {
8481  	struct ftrace_buffer_info *info = filp->private_data;
8482  	struct trace_iterator *iter = &info->iter;
8483  	int ret = 0;
8484  
8485  	ret = get_snapshot_map(iter->tr);
8486  	if (ret)
8487  		return ret;
8488  
8489  	ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8490  	if (ret)
8491  		put_snapshot_map(iter->tr);
8492  
8493  	vma->vm_ops = &tracing_buffers_vmops;
8494  
8495  	return ret;
8496  }
8497  
8498  static const struct file_operations tracing_buffers_fops = {
8499  	.open		= tracing_buffers_open,
8500  	.read		= tracing_buffers_read,
8501  	.poll		= tracing_buffers_poll,
8502  	.release	= tracing_buffers_release,
8503  	.flush		= tracing_buffers_flush,
8504  	.splice_read	= tracing_buffers_splice_read,
8505  	.unlocked_ioctl = tracing_buffers_ioctl,
8506  	.mmap		= tracing_buffers_mmap,
8507  };
8508  
8509  static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8510  tracing_stats_read(struct file *filp, char __user *ubuf,
8511  		   size_t count, loff_t *ppos)
8512  {
8513  	struct inode *inode = file_inode(filp);
8514  	struct trace_array *tr = inode->i_private;
8515  	struct array_buffer *trace_buf = &tr->array_buffer;
8516  	int cpu = tracing_get_cpu(inode);
8517  	struct trace_seq *s;
8518  	unsigned long cnt;
8519  	unsigned long long t;
8520  	unsigned long usec_rem;
8521  
8522  	s = kmalloc(sizeof(*s), GFP_KERNEL);
8523  	if (!s)
8524  		return -ENOMEM;
8525  
8526  	trace_seq_init(s);
8527  
8528  	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8529  	trace_seq_printf(s, "entries: %ld\n", cnt);
8530  
8531  	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8532  	trace_seq_printf(s, "overrun: %ld\n", cnt);
8533  
8534  	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8535  	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8536  
8537  	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8538  	trace_seq_printf(s, "bytes: %ld\n", cnt);
8539  
8540  	if (trace_clocks[tr->clock_id].in_ns) {
8541  		/* local or global for trace_clock */
8542  		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8543  		usec_rem = do_div(t, USEC_PER_SEC);
8544  		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8545  								t, usec_rem);
8546  
8547  		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8548  		usec_rem = do_div(t, USEC_PER_SEC);
8549  		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8550  	} else {
8551  		/* counter or tsc mode for trace_clock */
8552  		trace_seq_printf(s, "oldest event ts: %llu\n",
8553  				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8554  
8555  		trace_seq_printf(s, "now ts: %llu\n",
8556  				ring_buffer_time_stamp(trace_buf->buffer));
8557  	}
8558  
8559  	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8560  	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8561  
8562  	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8563  	trace_seq_printf(s, "read events: %ld\n", cnt);
8564  
8565  	count = simple_read_from_buffer(ubuf, count, ppos,
8566  					s->buffer, trace_seq_used(s));
8567  
8568  	kfree(s);
8569  
8570  	return count;
8571  }
8572  
8573  static const struct file_operations tracing_stats_fops = {
8574  	.open		= tracing_open_generic_tr,
8575  	.read		= tracing_stats_read,
8576  	.llseek		= generic_file_llseek,
8577  	.release	= tracing_release_generic_tr,
8578  };
8579  
8580  #ifdef CONFIG_DYNAMIC_FTRACE
8581  
8582  static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8583  tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8584  		  size_t cnt, loff_t *ppos)
8585  {
8586  	ssize_t ret;
8587  	char *buf;
8588  	int r;
8589  
8590  	/* 256 should be plenty to hold the amount needed */
8591  	buf = kmalloc(256, GFP_KERNEL);
8592  	if (!buf)
8593  		return -ENOMEM;
8594  
8595  	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8596  		      ftrace_update_tot_cnt,
8597  		      ftrace_number_of_pages,
8598  		      ftrace_number_of_groups);
8599  
8600  	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8601  	kfree(buf);
8602  	return ret;
8603  }
8604  
8605  static const struct file_operations tracing_dyn_info_fops = {
8606  	.open		= tracing_open_generic,
8607  	.read		= tracing_read_dyn_info,
8608  	.llseek		= generic_file_llseek,
8609  };
8610  #endif /* CONFIG_DYNAMIC_FTRACE */
8611  
8612  #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8613  static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8614  ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8615  		struct trace_array *tr, struct ftrace_probe_ops *ops,
8616  		void *data)
8617  {
8618  	tracing_snapshot_instance(tr);
8619  }
8620  
8621  static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8622  ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8623  		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8624  		      void *data)
8625  {
8626  	struct ftrace_func_mapper *mapper = data;
8627  	long *count = NULL;
8628  
8629  	if (mapper)
8630  		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8631  
8632  	if (count) {
8633  
8634  		if (*count <= 0)
8635  			return;
8636  
8637  		(*count)--;
8638  	}
8639  
8640  	tracing_snapshot_instance(tr);
8641  }
8642  
8643  static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8644  ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8645  		      struct ftrace_probe_ops *ops, void *data)
8646  {
8647  	struct ftrace_func_mapper *mapper = data;
8648  	long *count = NULL;
8649  
8650  	seq_printf(m, "%ps:", (void *)ip);
8651  
8652  	seq_puts(m, "snapshot");
8653  
8654  	if (mapper)
8655  		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8656  
8657  	if (count)
8658  		seq_printf(m, ":count=%ld\n", *count);
8659  	else
8660  		seq_puts(m, ":unlimited\n");
8661  
8662  	return 0;
8663  }
8664  
8665  static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8666  ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8667  		     unsigned long ip, void *init_data, void **data)
8668  {
8669  	struct ftrace_func_mapper *mapper = *data;
8670  
8671  	if (!mapper) {
8672  		mapper = allocate_ftrace_func_mapper();
8673  		if (!mapper)
8674  			return -ENOMEM;
8675  		*data = mapper;
8676  	}
8677  
8678  	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8679  }
8680  
8681  static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8682  ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8683  		     unsigned long ip, void *data)
8684  {
8685  	struct ftrace_func_mapper *mapper = data;
8686  
8687  	if (!ip) {
8688  		if (!mapper)
8689  			return;
8690  		free_ftrace_func_mapper(mapper, NULL);
8691  		return;
8692  	}
8693  
8694  	ftrace_func_mapper_remove_ip(mapper, ip);
8695  }
8696  
8697  static struct ftrace_probe_ops snapshot_probe_ops = {
8698  	.func			= ftrace_snapshot,
8699  	.print			= ftrace_snapshot_print,
8700  };
8701  
8702  static struct ftrace_probe_ops snapshot_count_probe_ops = {
8703  	.func			= ftrace_count_snapshot,
8704  	.print			= ftrace_snapshot_print,
8705  	.init			= ftrace_snapshot_init,
8706  	.free			= ftrace_snapshot_free,
8707  };
8708  
8709  static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8710  ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8711  			       char *glob, char *cmd, char *param, int enable)
8712  {
8713  	struct ftrace_probe_ops *ops;
8714  	void *count = (void *)-1;
8715  	char *number;
8716  	int ret;
8717  
8718  	if (!tr)
8719  		return -ENODEV;
8720  
8721  	/* hash funcs only work with set_ftrace_filter */
8722  	if (!enable)
8723  		return -EINVAL;
8724  
8725  	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8726  
8727  	if (glob[0] == '!') {
8728  		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8729  		if (!ret)
8730  			tracing_disarm_snapshot(tr);
8731  
8732  		return ret;
8733  	}
8734  
8735  	if (!param)
8736  		goto out_reg;
8737  
8738  	number = strsep(&param, ":");
8739  
8740  	if (!strlen(number))
8741  		goto out_reg;
8742  
8743  	/*
8744  	 * We use the callback data field (which is a pointer)
8745  	 * as our counter.
8746  	 */
8747  	ret = kstrtoul(number, 0, (unsigned long *)&count);
8748  	if (ret)
8749  		return ret;
8750  
8751   out_reg:
8752  	ret = tracing_arm_snapshot(tr);
8753  	if (ret < 0)
8754  		goto out;
8755  
8756  	ret = register_ftrace_function_probe(glob, tr, ops, count);
8757  	if (ret < 0)
8758  		tracing_disarm_snapshot(tr);
8759   out:
8760  	return ret < 0 ? ret : 0;
8761  }
8762  
8763  static struct ftrace_func_command ftrace_snapshot_cmd = {
8764  	.name			= "snapshot",
8765  	.func			= ftrace_trace_snapshot_callback,
8766  };
8767  
register_snapshot_cmd(void)8768  static __init int register_snapshot_cmd(void)
8769  {
8770  	return register_ftrace_command(&ftrace_snapshot_cmd);
8771  }
8772  #else
register_snapshot_cmd(void)8773  static inline __init int register_snapshot_cmd(void) { return 0; }
8774  #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8775  
tracing_get_dentry(struct trace_array * tr)8776  static struct dentry *tracing_get_dentry(struct trace_array *tr)
8777  {
8778  	if (WARN_ON(!tr->dir))
8779  		return ERR_PTR(-ENODEV);
8780  
8781  	/* Top directory uses NULL as the parent */
8782  	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8783  		return NULL;
8784  
8785  	/* All sub buffers have a descriptor */
8786  	return tr->dir;
8787  }
8788  
tracing_dentry_percpu(struct trace_array * tr,int cpu)8789  static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8790  {
8791  	struct dentry *d_tracer;
8792  
8793  	if (tr->percpu_dir)
8794  		return tr->percpu_dir;
8795  
8796  	d_tracer = tracing_get_dentry(tr);
8797  	if (IS_ERR(d_tracer))
8798  		return NULL;
8799  
8800  	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8801  
8802  	MEM_FAIL(!tr->percpu_dir,
8803  		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8804  
8805  	return tr->percpu_dir;
8806  }
8807  
8808  static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8809  trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8810  		      void *data, long cpu, const struct file_operations *fops)
8811  {
8812  	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8813  
8814  	if (ret) /* See tracing_get_cpu() */
8815  		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8816  	return ret;
8817  }
8818  
8819  static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8820  tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8821  {
8822  	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8823  	struct dentry *d_cpu;
8824  	char cpu_dir[30]; /* 30 characters should be more than enough */
8825  
8826  	if (!d_percpu)
8827  		return;
8828  
8829  	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8830  	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8831  	if (!d_cpu) {
8832  		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8833  		return;
8834  	}
8835  
8836  	/* per cpu trace_pipe */
8837  	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8838  				tr, cpu, &tracing_pipe_fops);
8839  
8840  	/* per cpu trace */
8841  	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8842  				tr, cpu, &tracing_fops);
8843  
8844  	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8845  				tr, cpu, &tracing_buffers_fops);
8846  
8847  	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8848  				tr, cpu, &tracing_stats_fops);
8849  
8850  	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8851  				tr, cpu, &tracing_entries_fops);
8852  
8853  	if (tr->range_addr_start)
8854  		trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
8855  				      tr, cpu, &tracing_buffer_meta_fops);
8856  #ifdef CONFIG_TRACER_SNAPSHOT
8857  	if (!tr->range_addr_start) {
8858  		trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8859  				      tr, cpu, &snapshot_fops);
8860  
8861  		trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8862  				      tr, cpu, &snapshot_raw_fops);
8863  	}
8864  #endif
8865  }
8866  
8867  #ifdef CONFIG_FTRACE_SELFTEST
8868  /* Let selftest have access to static functions in this file */
8869  #include "trace_selftest.c"
8870  #endif
8871  
8872  static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8873  trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8874  			loff_t *ppos)
8875  {
8876  	struct trace_option_dentry *topt = filp->private_data;
8877  	char *buf;
8878  
8879  	if (topt->flags->val & topt->opt->bit)
8880  		buf = "1\n";
8881  	else
8882  		buf = "0\n";
8883  
8884  	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8885  }
8886  
8887  static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8888  trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8889  			 loff_t *ppos)
8890  {
8891  	struct trace_option_dentry *topt = filp->private_data;
8892  	unsigned long val;
8893  	int ret;
8894  
8895  	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8896  	if (ret)
8897  		return ret;
8898  
8899  	if (val != 0 && val != 1)
8900  		return -EINVAL;
8901  
8902  	if (!!(topt->flags->val & topt->opt->bit) != val) {
8903  		mutex_lock(&trace_types_lock);
8904  		ret = __set_tracer_option(topt->tr, topt->flags,
8905  					  topt->opt, !val);
8906  		mutex_unlock(&trace_types_lock);
8907  		if (ret)
8908  			return ret;
8909  	}
8910  
8911  	*ppos += cnt;
8912  
8913  	return cnt;
8914  }
8915  
tracing_open_options(struct inode * inode,struct file * filp)8916  static int tracing_open_options(struct inode *inode, struct file *filp)
8917  {
8918  	struct trace_option_dentry *topt = inode->i_private;
8919  	int ret;
8920  
8921  	ret = tracing_check_open_get_tr(topt->tr);
8922  	if (ret)
8923  		return ret;
8924  
8925  	filp->private_data = inode->i_private;
8926  	return 0;
8927  }
8928  
tracing_release_options(struct inode * inode,struct file * file)8929  static int tracing_release_options(struct inode *inode, struct file *file)
8930  {
8931  	struct trace_option_dentry *topt = file->private_data;
8932  
8933  	trace_array_put(topt->tr);
8934  	return 0;
8935  }
8936  
8937  static const struct file_operations trace_options_fops = {
8938  	.open = tracing_open_options,
8939  	.read = trace_options_read,
8940  	.write = trace_options_write,
8941  	.llseek	= generic_file_llseek,
8942  	.release = tracing_release_options,
8943  };
8944  
8945  /*
8946   * In order to pass in both the trace_array descriptor as well as the index
8947   * to the flag that the trace option file represents, the trace_array
8948   * has a character array of trace_flags_index[], which holds the index
8949   * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8950   * The address of this character array is passed to the flag option file
8951   * read/write callbacks.
8952   *
8953   * In order to extract both the index and the trace_array descriptor,
8954   * get_tr_index() uses the following algorithm.
8955   *
8956   *   idx = *ptr;
8957   *
8958   * As the pointer itself contains the address of the index (remember
8959   * index[1] == 1).
8960   *
8961   * Then to get the trace_array descriptor, by subtracting that index
8962   * from the ptr, we get to the start of the index itself.
8963   *
8964   *   ptr - idx == &index[0]
8965   *
8966   * Then a simple container_of() from that pointer gets us to the
8967   * trace_array descriptor.
8968   */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)8969  static void get_tr_index(void *data, struct trace_array **ptr,
8970  			 unsigned int *pindex)
8971  {
8972  	*pindex = *(unsigned char *)data;
8973  
8974  	*ptr = container_of(data - *pindex, struct trace_array,
8975  			    trace_flags_index);
8976  }
8977  
8978  static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8979  trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8980  			loff_t *ppos)
8981  {
8982  	void *tr_index = filp->private_data;
8983  	struct trace_array *tr;
8984  	unsigned int index;
8985  	char *buf;
8986  
8987  	get_tr_index(tr_index, &tr, &index);
8988  
8989  	if (tr->trace_flags & (1 << index))
8990  		buf = "1\n";
8991  	else
8992  		buf = "0\n";
8993  
8994  	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8995  }
8996  
8997  static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8998  trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8999  			 loff_t *ppos)
9000  {
9001  	void *tr_index = filp->private_data;
9002  	struct trace_array *tr;
9003  	unsigned int index;
9004  	unsigned long val;
9005  	int ret;
9006  
9007  	get_tr_index(tr_index, &tr, &index);
9008  
9009  	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9010  	if (ret)
9011  		return ret;
9012  
9013  	if (val != 0 && val != 1)
9014  		return -EINVAL;
9015  
9016  	mutex_lock(&event_mutex);
9017  	mutex_lock(&trace_types_lock);
9018  	ret = set_tracer_flag(tr, 1 << index, val);
9019  	mutex_unlock(&trace_types_lock);
9020  	mutex_unlock(&event_mutex);
9021  
9022  	if (ret < 0)
9023  		return ret;
9024  
9025  	*ppos += cnt;
9026  
9027  	return cnt;
9028  }
9029  
9030  static const struct file_operations trace_options_core_fops = {
9031  	.open = tracing_open_generic,
9032  	.read = trace_options_core_read,
9033  	.write = trace_options_core_write,
9034  	.llseek = generic_file_llseek,
9035  };
9036  
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)9037  struct dentry *trace_create_file(const char *name,
9038  				 umode_t mode,
9039  				 struct dentry *parent,
9040  				 void *data,
9041  				 const struct file_operations *fops)
9042  {
9043  	struct dentry *ret;
9044  
9045  	ret = tracefs_create_file(name, mode, parent, data, fops);
9046  	if (!ret)
9047  		pr_warn("Could not create tracefs '%s' entry\n", name);
9048  
9049  	return ret;
9050  }
9051  
9052  
trace_options_init_dentry(struct trace_array * tr)9053  static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9054  {
9055  	struct dentry *d_tracer;
9056  
9057  	if (tr->options)
9058  		return tr->options;
9059  
9060  	d_tracer = tracing_get_dentry(tr);
9061  	if (IS_ERR(d_tracer))
9062  		return NULL;
9063  
9064  	tr->options = tracefs_create_dir("options", d_tracer);
9065  	if (!tr->options) {
9066  		pr_warn("Could not create tracefs directory 'options'\n");
9067  		return NULL;
9068  	}
9069  
9070  	return tr->options;
9071  }
9072  
9073  static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)9074  create_trace_option_file(struct trace_array *tr,
9075  			 struct trace_option_dentry *topt,
9076  			 struct tracer_flags *flags,
9077  			 struct tracer_opt *opt)
9078  {
9079  	struct dentry *t_options;
9080  
9081  	t_options = trace_options_init_dentry(tr);
9082  	if (!t_options)
9083  		return;
9084  
9085  	topt->flags = flags;
9086  	topt->opt = opt;
9087  	topt->tr = tr;
9088  
9089  	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9090  					t_options, topt, &trace_options_fops);
9091  
9092  }
9093  
9094  static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)9095  create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9096  {
9097  	struct trace_option_dentry *topts;
9098  	struct trace_options *tr_topts;
9099  	struct tracer_flags *flags;
9100  	struct tracer_opt *opts;
9101  	int cnt;
9102  	int i;
9103  
9104  	if (!tracer)
9105  		return;
9106  
9107  	flags = tracer->flags;
9108  
9109  	if (!flags || !flags->opts)
9110  		return;
9111  
9112  	/*
9113  	 * If this is an instance, only create flags for tracers
9114  	 * the instance may have.
9115  	 */
9116  	if (!trace_ok_for_array(tracer, tr))
9117  		return;
9118  
9119  	for (i = 0; i < tr->nr_topts; i++) {
9120  		/* Make sure there's no duplicate flags. */
9121  		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9122  			return;
9123  	}
9124  
9125  	opts = flags->opts;
9126  
9127  	for (cnt = 0; opts[cnt].name; cnt++)
9128  		;
9129  
9130  	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9131  	if (!topts)
9132  		return;
9133  
9134  	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9135  			    GFP_KERNEL);
9136  	if (!tr_topts) {
9137  		kfree(topts);
9138  		return;
9139  	}
9140  
9141  	tr->topts = tr_topts;
9142  	tr->topts[tr->nr_topts].tracer = tracer;
9143  	tr->topts[tr->nr_topts].topts = topts;
9144  	tr->nr_topts++;
9145  
9146  	for (cnt = 0; opts[cnt].name; cnt++) {
9147  		create_trace_option_file(tr, &topts[cnt], flags,
9148  					 &opts[cnt]);
9149  		MEM_FAIL(topts[cnt].entry == NULL,
9150  			  "Failed to create trace option: %s",
9151  			  opts[cnt].name);
9152  	}
9153  }
9154  
9155  static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)9156  create_trace_option_core_file(struct trace_array *tr,
9157  			      const char *option, long index)
9158  {
9159  	struct dentry *t_options;
9160  
9161  	t_options = trace_options_init_dentry(tr);
9162  	if (!t_options)
9163  		return NULL;
9164  
9165  	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9166  				 (void *)&tr->trace_flags_index[index],
9167  				 &trace_options_core_fops);
9168  }
9169  
create_trace_options_dir(struct trace_array * tr)9170  static void create_trace_options_dir(struct trace_array *tr)
9171  {
9172  	struct dentry *t_options;
9173  	bool top_level = tr == &global_trace;
9174  	int i;
9175  
9176  	t_options = trace_options_init_dentry(tr);
9177  	if (!t_options)
9178  		return;
9179  
9180  	for (i = 0; trace_options[i]; i++) {
9181  		if (top_level ||
9182  		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9183  			create_trace_option_core_file(tr, trace_options[i], i);
9184  	}
9185  }
9186  
9187  static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9188  rb_simple_read(struct file *filp, char __user *ubuf,
9189  	       size_t cnt, loff_t *ppos)
9190  {
9191  	struct trace_array *tr = filp->private_data;
9192  	char buf[64];
9193  	int r;
9194  
9195  	r = tracer_tracing_is_on(tr);
9196  	r = sprintf(buf, "%d\n", r);
9197  
9198  	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9199  }
9200  
9201  static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9202  rb_simple_write(struct file *filp, const char __user *ubuf,
9203  		size_t cnt, loff_t *ppos)
9204  {
9205  	struct trace_array *tr = filp->private_data;
9206  	struct trace_buffer *buffer = tr->array_buffer.buffer;
9207  	unsigned long val;
9208  	int ret;
9209  
9210  	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9211  	if (ret)
9212  		return ret;
9213  
9214  	if (buffer) {
9215  		mutex_lock(&trace_types_lock);
9216  		if (!!val == tracer_tracing_is_on(tr)) {
9217  			val = 0; /* do nothing */
9218  		} else if (val) {
9219  			tracer_tracing_on(tr);
9220  			if (tr->current_trace->start)
9221  				tr->current_trace->start(tr);
9222  		} else {
9223  			tracer_tracing_off(tr);
9224  			if (tr->current_trace->stop)
9225  				tr->current_trace->stop(tr);
9226  			/* Wake up any waiters */
9227  			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9228  		}
9229  		mutex_unlock(&trace_types_lock);
9230  	}
9231  
9232  	(*ppos)++;
9233  
9234  	return cnt;
9235  }
9236  
9237  static const struct file_operations rb_simple_fops = {
9238  	.open		= tracing_open_generic_tr,
9239  	.read		= rb_simple_read,
9240  	.write		= rb_simple_write,
9241  	.release	= tracing_release_generic_tr,
9242  	.llseek		= default_llseek,
9243  };
9244  
9245  static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9246  buffer_percent_read(struct file *filp, char __user *ubuf,
9247  		    size_t cnt, loff_t *ppos)
9248  {
9249  	struct trace_array *tr = filp->private_data;
9250  	char buf[64];
9251  	int r;
9252  
9253  	r = tr->buffer_percent;
9254  	r = sprintf(buf, "%d\n", r);
9255  
9256  	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9257  }
9258  
9259  static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9260  buffer_percent_write(struct file *filp, const char __user *ubuf,
9261  		     size_t cnt, loff_t *ppos)
9262  {
9263  	struct trace_array *tr = filp->private_data;
9264  	unsigned long val;
9265  	int ret;
9266  
9267  	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9268  	if (ret)
9269  		return ret;
9270  
9271  	if (val > 100)
9272  		return -EINVAL;
9273  
9274  	tr->buffer_percent = val;
9275  
9276  	(*ppos)++;
9277  
9278  	return cnt;
9279  }
9280  
9281  static const struct file_operations buffer_percent_fops = {
9282  	.open		= tracing_open_generic_tr,
9283  	.read		= buffer_percent_read,
9284  	.write		= buffer_percent_write,
9285  	.release	= tracing_release_generic_tr,
9286  	.llseek		= default_llseek,
9287  };
9288  
9289  static ssize_t
buffer_subbuf_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9290  buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9291  {
9292  	struct trace_array *tr = filp->private_data;
9293  	size_t size;
9294  	char buf[64];
9295  	int order;
9296  	int r;
9297  
9298  	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9299  	size = (PAGE_SIZE << order) / 1024;
9300  
9301  	r = sprintf(buf, "%zd\n", size);
9302  
9303  	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9304  }
9305  
9306  static ssize_t
buffer_subbuf_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9307  buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9308  			 size_t cnt, loff_t *ppos)
9309  {
9310  	struct trace_array *tr = filp->private_data;
9311  	unsigned long val;
9312  	int old_order;
9313  	int order;
9314  	int pages;
9315  	int ret;
9316  
9317  	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9318  	if (ret)
9319  		return ret;
9320  
9321  	val *= 1024; /* value passed in is in KB */
9322  
9323  	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9324  	order = fls(pages - 1);
9325  
9326  	/* limit between 1 and 128 system pages */
9327  	if (order < 0 || order > 7)
9328  		return -EINVAL;
9329  
9330  	/* Do not allow tracing while changing the order of the ring buffer */
9331  	tracing_stop_tr(tr);
9332  
9333  	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9334  	if (old_order == order)
9335  		goto out;
9336  
9337  	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9338  	if (ret)
9339  		goto out;
9340  
9341  #ifdef CONFIG_TRACER_MAX_TRACE
9342  
9343  	if (!tr->allocated_snapshot)
9344  		goto out_max;
9345  
9346  	ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9347  	if (ret) {
9348  		/* Put back the old order */
9349  		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9350  		if (WARN_ON_ONCE(cnt)) {
9351  			/*
9352  			 * AARGH! We are left with different orders!
9353  			 * The max buffer is our "snapshot" buffer.
9354  			 * When a tracer needs a snapshot (one of the
9355  			 * latency tracers), it swaps the max buffer
9356  			 * with the saved snap shot. We succeeded to
9357  			 * update the order of the main buffer, but failed to
9358  			 * update the order of the max buffer. But when we tried
9359  			 * to reset the main buffer to the original size, we
9360  			 * failed there too. This is very unlikely to
9361  			 * happen, but if it does, warn and kill all
9362  			 * tracing.
9363  			 */
9364  			tracing_disabled = 1;
9365  		}
9366  		goto out;
9367  	}
9368   out_max:
9369  #endif
9370  	(*ppos)++;
9371   out:
9372  	if (ret)
9373  		cnt = ret;
9374  	tracing_start_tr(tr);
9375  	return cnt;
9376  }
9377  
9378  static const struct file_operations buffer_subbuf_size_fops = {
9379  	.open		= tracing_open_generic_tr,
9380  	.read		= buffer_subbuf_size_read,
9381  	.write		= buffer_subbuf_size_write,
9382  	.release	= tracing_release_generic_tr,
9383  	.llseek		= default_llseek,
9384  };
9385  
9386  static struct dentry *trace_instance_dir;
9387  
9388  static void
9389  init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9390  
9391  static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)9392  allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9393  {
9394  	enum ring_buffer_flags rb_flags;
9395  
9396  	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9397  
9398  	buf->tr = tr;
9399  
9400  	if (tr->range_addr_start && tr->range_addr_size) {
9401  		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9402  						      tr->range_addr_start,
9403  						      tr->range_addr_size);
9404  
9405  		ring_buffer_last_boot_delta(buf->buffer,
9406  					    &tr->text_delta, &tr->data_delta);
9407  		/*
9408  		 * This is basically the same as a mapped buffer,
9409  		 * with the same restrictions.
9410  		 */
9411  		tr->mapped++;
9412  	} else {
9413  		buf->buffer = ring_buffer_alloc(size, rb_flags);
9414  	}
9415  	if (!buf->buffer)
9416  		return -ENOMEM;
9417  
9418  	buf->data = alloc_percpu(struct trace_array_cpu);
9419  	if (!buf->data) {
9420  		ring_buffer_free(buf->buffer);
9421  		buf->buffer = NULL;
9422  		return -ENOMEM;
9423  	}
9424  
9425  	/* Allocate the first page for all buffers */
9426  	set_buffer_entries(&tr->array_buffer,
9427  			   ring_buffer_size(tr->array_buffer.buffer, 0));
9428  
9429  	return 0;
9430  }
9431  
free_trace_buffer(struct array_buffer * buf)9432  static void free_trace_buffer(struct array_buffer *buf)
9433  {
9434  	if (buf->buffer) {
9435  		ring_buffer_free(buf->buffer);
9436  		buf->buffer = NULL;
9437  		free_percpu(buf->data);
9438  		buf->data = NULL;
9439  	}
9440  }
9441  
allocate_trace_buffers(struct trace_array * tr,int size)9442  static int allocate_trace_buffers(struct trace_array *tr, int size)
9443  {
9444  	int ret;
9445  
9446  	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9447  	if (ret)
9448  		return ret;
9449  
9450  #ifdef CONFIG_TRACER_MAX_TRACE
9451  	/* Fix mapped buffer trace arrays do not have snapshot buffers */
9452  	if (tr->range_addr_start)
9453  		return 0;
9454  
9455  	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9456  				    allocate_snapshot ? size : 1);
9457  	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9458  		free_trace_buffer(&tr->array_buffer);
9459  		return -ENOMEM;
9460  	}
9461  	tr->allocated_snapshot = allocate_snapshot;
9462  
9463  	allocate_snapshot = false;
9464  #endif
9465  
9466  	return 0;
9467  }
9468  
free_trace_buffers(struct trace_array * tr)9469  static void free_trace_buffers(struct trace_array *tr)
9470  {
9471  	if (!tr)
9472  		return;
9473  
9474  	free_trace_buffer(&tr->array_buffer);
9475  
9476  #ifdef CONFIG_TRACER_MAX_TRACE
9477  	free_trace_buffer(&tr->max_buffer);
9478  #endif
9479  }
9480  
init_trace_flags_index(struct trace_array * tr)9481  static void init_trace_flags_index(struct trace_array *tr)
9482  {
9483  	int i;
9484  
9485  	/* Used by the trace options files */
9486  	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9487  		tr->trace_flags_index[i] = i;
9488  }
9489  
__update_tracer_options(struct trace_array * tr)9490  static void __update_tracer_options(struct trace_array *tr)
9491  {
9492  	struct tracer *t;
9493  
9494  	for (t = trace_types; t; t = t->next)
9495  		add_tracer_options(tr, t);
9496  }
9497  
update_tracer_options(struct trace_array * tr)9498  static void update_tracer_options(struct trace_array *tr)
9499  {
9500  	mutex_lock(&trace_types_lock);
9501  	tracer_options_updated = true;
9502  	__update_tracer_options(tr);
9503  	mutex_unlock(&trace_types_lock);
9504  }
9505  
9506  /* Must have trace_types_lock held */
trace_array_find(const char * instance)9507  struct trace_array *trace_array_find(const char *instance)
9508  {
9509  	struct trace_array *tr, *found = NULL;
9510  
9511  	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9512  		if (tr->name && strcmp(tr->name, instance) == 0) {
9513  			found = tr;
9514  			break;
9515  		}
9516  	}
9517  
9518  	return found;
9519  }
9520  
trace_array_find_get(const char * instance)9521  struct trace_array *trace_array_find_get(const char *instance)
9522  {
9523  	struct trace_array *tr;
9524  
9525  	mutex_lock(&trace_types_lock);
9526  	tr = trace_array_find(instance);
9527  	if (tr)
9528  		tr->ref++;
9529  	mutex_unlock(&trace_types_lock);
9530  
9531  	return tr;
9532  }
9533  
trace_array_create_dir(struct trace_array * tr)9534  static int trace_array_create_dir(struct trace_array *tr)
9535  {
9536  	int ret;
9537  
9538  	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9539  	if (!tr->dir)
9540  		return -EINVAL;
9541  
9542  	ret = event_trace_add_tracer(tr->dir, tr);
9543  	if (ret) {
9544  		tracefs_remove(tr->dir);
9545  		return ret;
9546  	}
9547  
9548  	init_tracer_tracefs(tr, tr->dir);
9549  	__update_tracer_options(tr);
9550  
9551  	return ret;
9552  }
9553  
9554  static struct trace_array *
trace_array_create_systems(const char * name,const char * systems,unsigned long range_addr_start,unsigned long range_addr_size)9555  trace_array_create_systems(const char *name, const char *systems,
9556  			   unsigned long range_addr_start,
9557  			   unsigned long range_addr_size)
9558  {
9559  	struct trace_array *tr;
9560  	int ret;
9561  
9562  	ret = -ENOMEM;
9563  	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9564  	if (!tr)
9565  		return ERR_PTR(ret);
9566  
9567  	tr->name = kstrdup(name, GFP_KERNEL);
9568  	if (!tr->name)
9569  		goto out_free_tr;
9570  
9571  	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9572  		goto out_free_tr;
9573  
9574  	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9575  		goto out_free_tr;
9576  
9577  	if (systems) {
9578  		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9579  		if (!tr->system_names)
9580  			goto out_free_tr;
9581  	}
9582  
9583  	/* Only for boot up memory mapped ring buffers */
9584  	tr->range_addr_start = range_addr_start;
9585  	tr->range_addr_size = range_addr_size;
9586  
9587  	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9588  
9589  	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9590  
9591  	raw_spin_lock_init(&tr->start_lock);
9592  
9593  	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9594  #ifdef CONFIG_TRACER_MAX_TRACE
9595  	spin_lock_init(&tr->snapshot_trigger_lock);
9596  #endif
9597  	tr->current_trace = &nop_trace;
9598  
9599  	INIT_LIST_HEAD(&tr->systems);
9600  	INIT_LIST_HEAD(&tr->events);
9601  	INIT_LIST_HEAD(&tr->hist_vars);
9602  	INIT_LIST_HEAD(&tr->err_log);
9603  
9604  	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9605  		goto out_free_tr;
9606  
9607  	/* The ring buffer is defaultly expanded */
9608  	trace_set_ring_buffer_expanded(tr);
9609  
9610  	if (ftrace_allocate_ftrace_ops(tr) < 0)
9611  		goto out_free_tr;
9612  
9613  	ftrace_init_trace_array(tr);
9614  
9615  	init_trace_flags_index(tr);
9616  
9617  	if (trace_instance_dir) {
9618  		ret = trace_array_create_dir(tr);
9619  		if (ret)
9620  			goto out_free_tr;
9621  	} else
9622  		__trace_early_add_events(tr);
9623  
9624  	list_add(&tr->list, &ftrace_trace_arrays);
9625  
9626  	tr->ref++;
9627  
9628  	return tr;
9629  
9630   out_free_tr:
9631  	ftrace_free_ftrace_ops(tr);
9632  	free_trace_buffers(tr);
9633  	free_cpumask_var(tr->pipe_cpumask);
9634  	free_cpumask_var(tr->tracing_cpumask);
9635  	kfree_const(tr->system_names);
9636  	kfree(tr->name);
9637  	kfree(tr);
9638  
9639  	return ERR_PTR(ret);
9640  }
9641  
trace_array_create(const char * name)9642  static struct trace_array *trace_array_create(const char *name)
9643  {
9644  	return trace_array_create_systems(name, NULL, 0, 0);
9645  }
9646  
instance_mkdir(const char * name)9647  static int instance_mkdir(const char *name)
9648  {
9649  	struct trace_array *tr;
9650  	int ret;
9651  
9652  	mutex_lock(&event_mutex);
9653  	mutex_lock(&trace_types_lock);
9654  
9655  	ret = -EEXIST;
9656  	if (trace_array_find(name))
9657  		goto out_unlock;
9658  
9659  	tr = trace_array_create(name);
9660  
9661  	ret = PTR_ERR_OR_ZERO(tr);
9662  
9663  out_unlock:
9664  	mutex_unlock(&trace_types_lock);
9665  	mutex_unlock(&event_mutex);
9666  	return ret;
9667  }
9668  
map_pages(u64 start,u64 size)9669  static u64 map_pages(u64 start, u64 size)
9670  {
9671  	struct page **pages;
9672  	phys_addr_t page_start;
9673  	unsigned int page_count;
9674  	unsigned int i;
9675  	void *vaddr;
9676  
9677  	page_count = DIV_ROUND_UP(size, PAGE_SIZE);
9678  
9679  	page_start = start;
9680  	pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
9681  	if (!pages)
9682  		return 0;
9683  
9684  	for (i = 0; i < page_count; i++) {
9685  		phys_addr_t addr = page_start + i * PAGE_SIZE;
9686  		pages[i] = pfn_to_page(addr >> PAGE_SHIFT);
9687  	}
9688  	vaddr = vmap(pages, page_count, VM_MAP, PAGE_KERNEL);
9689  	kfree(pages);
9690  
9691  	return (u64)(unsigned long)vaddr;
9692  }
9693  
9694  /**
9695   * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9696   * @name: The name of the trace array to be looked up/created.
9697   * @systems: A list of systems to create event directories for (NULL for all)
9698   *
9699   * Returns pointer to trace array with given name.
9700   * NULL, if it cannot be created.
9701   *
9702   * NOTE: This function increments the reference counter associated with the
9703   * trace array returned. This makes sure it cannot be freed while in use.
9704   * Use trace_array_put() once the trace array is no longer needed.
9705   * If the trace_array is to be freed, trace_array_destroy() needs to
9706   * be called after the trace_array_put(), or simply let user space delete
9707   * it from the tracefs instances directory. But until the
9708   * trace_array_put() is called, user space can not delete it.
9709   *
9710   */
trace_array_get_by_name(const char * name,const char * systems)9711  struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9712  {
9713  	struct trace_array *tr;
9714  
9715  	mutex_lock(&event_mutex);
9716  	mutex_lock(&trace_types_lock);
9717  
9718  	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9719  		if (tr->name && strcmp(tr->name, name) == 0)
9720  			goto out_unlock;
9721  	}
9722  
9723  	tr = trace_array_create_systems(name, systems, 0, 0);
9724  
9725  	if (IS_ERR(tr))
9726  		tr = NULL;
9727  out_unlock:
9728  	if (tr)
9729  		tr->ref++;
9730  
9731  	mutex_unlock(&trace_types_lock);
9732  	mutex_unlock(&event_mutex);
9733  	return tr;
9734  }
9735  EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9736  
__remove_instance(struct trace_array * tr)9737  static int __remove_instance(struct trace_array *tr)
9738  {
9739  	int i;
9740  
9741  	/* Reference counter for a newly created trace array = 1. */
9742  	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9743  		return -EBUSY;
9744  
9745  	list_del(&tr->list);
9746  
9747  	/* Disable all the flags that were enabled coming in */
9748  	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9749  		if ((1 << i) & ZEROED_TRACE_FLAGS)
9750  			set_tracer_flag(tr, 1 << i, 0);
9751  	}
9752  
9753  	if (printk_trace == tr)
9754  		update_printk_trace(&global_trace);
9755  
9756  	tracing_set_nop(tr);
9757  	clear_ftrace_function_probes(tr);
9758  	event_trace_del_tracer(tr);
9759  	ftrace_clear_pids(tr);
9760  	ftrace_destroy_function_files(tr);
9761  	tracefs_remove(tr->dir);
9762  	free_percpu(tr->last_func_repeats);
9763  	free_trace_buffers(tr);
9764  	clear_tracing_err_log(tr);
9765  
9766  	for (i = 0; i < tr->nr_topts; i++) {
9767  		kfree(tr->topts[i].topts);
9768  	}
9769  	kfree(tr->topts);
9770  
9771  	free_cpumask_var(tr->pipe_cpumask);
9772  	free_cpumask_var(tr->tracing_cpumask);
9773  	kfree_const(tr->system_names);
9774  	kfree(tr->name);
9775  	kfree(tr);
9776  
9777  	return 0;
9778  }
9779  
trace_array_destroy(struct trace_array * this_tr)9780  int trace_array_destroy(struct trace_array *this_tr)
9781  {
9782  	struct trace_array *tr;
9783  	int ret;
9784  
9785  	if (!this_tr)
9786  		return -EINVAL;
9787  
9788  	mutex_lock(&event_mutex);
9789  	mutex_lock(&trace_types_lock);
9790  
9791  	ret = -ENODEV;
9792  
9793  	/* Making sure trace array exists before destroying it. */
9794  	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9795  		if (tr == this_tr) {
9796  			ret = __remove_instance(tr);
9797  			break;
9798  		}
9799  	}
9800  
9801  	mutex_unlock(&trace_types_lock);
9802  	mutex_unlock(&event_mutex);
9803  
9804  	return ret;
9805  }
9806  EXPORT_SYMBOL_GPL(trace_array_destroy);
9807  
instance_rmdir(const char * name)9808  static int instance_rmdir(const char *name)
9809  {
9810  	struct trace_array *tr;
9811  	int ret;
9812  
9813  	mutex_lock(&event_mutex);
9814  	mutex_lock(&trace_types_lock);
9815  
9816  	ret = -ENODEV;
9817  	tr = trace_array_find(name);
9818  	if (tr)
9819  		ret = __remove_instance(tr);
9820  
9821  	mutex_unlock(&trace_types_lock);
9822  	mutex_unlock(&event_mutex);
9823  
9824  	return ret;
9825  }
9826  
create_trace_instances(struct dentry * d_tracer)9827  static __init void create_trace_instances(struct dentry *d_tracer)
9828  {
9829  	struct trace_array *tr;
9830  
9831  	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9832  							 instance_mkdir,
9833  							 instance_rmdir);
9834  	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9835  		return;
9836  
9837  	mutex_lock(&event_mutex);
9838  	mutex_lock(&trace_types_lock);
9839  
9840  	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9841  		if (!tr->name)
9842  			continue;
9843  		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9844  			     "Failed to create instance directory\n"))
9845  			break;
9846  	}
9847  
9848  	mutex_unlock(&trace_types_lock);
9849  	mutex_unlock(&event_mutex);
9850  }
9851  
9852  static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)9853  init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9854  {
9855  	int cpu;
9856  
9857  	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9858  			tr, &show_traces_fops);
9859  
9860  	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9861  			tr, &set_tracer_fops);
9862  
9863  	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9864  			  tr, &tracing_cpumask_fops);
9865  
9866  	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9867  			  tr, &tracing_iter_fops);
9868  
9869  	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9870  			  tr, &tracing_fops);
9871  
9872  	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9873  			  tr, &tracing_pipe_fops);
9874  
9875  	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9876  			  tr, &tracing_entries_fops);
9877  
9878  	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9879  			  tr, &tracing_total_entries_fops);
9880  
9881  	trace_create_file("free_buffer", 0200, d_tracer,
9882  			  tr, &tracing_free_buffer_fops);
9883  
9884  	trace_create_file("trace_marker", 0220, d_tracer,
9885  			  tr, &tracing_mark_fops);
9886  
9887  	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9888  
9889  	trace_create_file("trace_marker_raw", 0220, d_tracer,
9890  			  tr, &tracing_mark_raw_fops);
9891  
9892  	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9893  			  &trace_clock_fops);
9894  
9895  	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9896  			  tr, &rb_simple_fops);
9897  
9898  	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9899  			  &trace_time_stamp_mode_fops);
9900  
9901  	tr->buffer_percent = 50;
9902  
9903  	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9904  			tr, &buffer_percent_fops);
9905  
9906  	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
9907  			  tr, &buffer_subbuf_size_fops);
9908  
9909  	create_trace_options_dir(tr);
9910  
9911  #ifdef CONFIG_TRACER_MAX_TRACE
9912  	trace_create_maxlat_file(tr, d_tracer);
9913  #endif
9914  
9915  	if (ftrace_create_function_files(tr, d_tracer))
9916  		MEM_FAIL(1, "Could not allocate function filter files");
9917  
9918  	if (tr->range_addr_start) {
9919  		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
9920  				  tr, &last_boot_fops);
9921  #ifdef CONFIG_TRACER_SNAPSHOT
9922  	} else {
9923  		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9924  				  tr, &snapshot_fops);
9925  #endif
9926  	}
9927  
9928  	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9929  			  tr, &tracing_err_log_fops);
9930  
9931  	for_each_tracing_cpu(cpu)
9932  		tracing_init_tracefs_percpu(tr, cpu);
9933  
9934  	ftrace_init_tracefs(tr, d_tracer);
9935  }
9936  
trace_automount(struct dentry * mntpt,void * ingore)9937  static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9938  {
9939  	struct vfsmount *mnt;
9940  	struct file_system_type *type;
9941  
9942  	/*
9943  	 * To maintain backward compatibility for tools that mount
9944  	 * debugfs to get to the tracing facility, tracefs is automatically
9945  	 * mounted to the debugfs/tracing directory.
9946  	 */
9947  	type = get_fs_type("tracefs");
9948  	if (!type)
9949  		return NULL;
9950  	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9951  	put_filesystem(type);
9952  	if (IS_ERR(mnt))
9953  		return NULL;
9954  	mntget(mnt);
9955  
9956  	return mnt;
9957  }
9958  
9959  /**
9960   * tracing_init_dentry - initialize top level trace array
9961   *
9962   * This is called when creating files or directories in the tracing
9963   * directory. It is called via fs_initcall() by any of the boot up code
9964   * and expects to return the dentry of the top level tracing directory.
9965   */
tracing_init_dentry(void)9966  int tracing_init_dentry(void)
9967  {
9968  	struct trace_array *tr = &global_trace;
9969  
9970  	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9971  		pr_warn("Tracing disabled due to lockdown\n");
9972  		return -EPERM;
9973  	}
9974  
9975  	/* The top level trace array uses  NULL as parent */
9976  	if (tr->dir)
9977  		return 0;
9978  
9979  	if (WARN_ON(!tracefs_initialized()))
9980  		return -ENODEV;
9981  
9982  	/*
9983  	 * As there may still be users that expect the tracing
9984  	 * files to exist in debugfs/tracing, we must automount
9985  	 * the tracefs file system there, so older tools still
9986  	 * work with the newer kernel.
9987  	 */
9988  	tr->dir = debugfs_create_automount("tracing", NULL,
9989  					   trace_automount, NULL);
9990  
9991  	return 0;
9992  }
9993  
9994  extern struct trace_eval_map *__start_ftrace_eval_maps[];
9995  extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9996  
9997  static struct workqueue_struct *eval_map_wq __initdata;
9998  static struct work_struct eval_map_work __initdata;
9999  static struct work_struct tracerfs_init_work __initdata;
10000  
eval_map_work_func(struct work_struct * work)10001  static void __init eval_map_work_func(struct work_struct *work)
10002  {
10003  	int len;
10004  
10005  	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10006  	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
10007  }
10008  
trace_eval_init(void)10009  static int __init trace_eval_init(void)
10010  {
10011  	INIT_WORK(&eval_map_work, eval_map_work_func);
10012  
10013  	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10014  	if (!eval_map_wq) {
10015  		pr_err("Unable to allocate eval_map_wq\n");
10016  		/* Do work here */
10017  		eval_map_work_func(&eval_map_work);
10018  		return -ENOMEM;
10019  	}
10020  
10021  	queue_work(eval_map_wq, &eval_map_work);
10022  	return 0;
10023  }
10024  
10025  subsys_initcall(trace_eval_init);
10026  
trace_eval_sync(void)10027  static int __init trace_eval_sync(void)
10028  {
10029  	/* Make sure the eval map updates are finished */
10030  	if (eval_map_wq)
10031  		destroy_workqueue(eval_map_wq);
10032  	return 0;
10033  }
10034  
10035  late_initcall_sync(trace_eval_sync);
10036  
10037  
10038  #ifdef CONFIG_MODULES
trace_module_add_evals(struct module * mod)10039  static void trace_module_add_evals(struct module *mod)
10040  {
10041  	if (!mod->num_trace_evals)
10042  		return;
10043  
10044  	/*
10045  	 * Modules with bad taint do not have events created, do
10046  	 * not bother with enums either.
10047  	 */
10048  	if (trace_module_has_bad_taint(mod))
10049  		return;
10050  
10051  	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10052  }
10053  
10054  #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)10055  static void trace_module_remove_evals(struct module *mod)
10056  {
10057  	union trace_eval_map_item *map;
10058  	union trace_eval_map_item **last = &trace_eval_maps;
10059  
10060  	if (!mod->num_trace_evals)
10061  		return;
10062  
10063  	mutex_lock(&trace_eval_mutex);
10064  
10065  	map = trace_eval_maps;
10066  
10067  	while (map) {
10068  		if (map->head.mod == mod)
10069  			break;
10070  		map = trace_eval_jmp_to_tail(map);
10071  		last = &map->tail.next;
10072  		map = map->tail.next;
10073  	}
10074  	if (!map)
10075  		goto out;
10076  
10077  	*last = trace_eval_jmp_to_tail(map)->tail.next;
10078  	kfree(map);
10079   out:
10080  	mutex_unlock(&trace_eval_mutex);
10081  }
10082  #else
trace_module_remove_evals(struct module * mod)10083  static inline void trace_module_remove_evals(struct module *mod) { }
10084  #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10085  
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)10086  static int trace_module_notify(struct notifier_block *self,
10087  			       unsigned long val, void *data)
10088  {
10089  	struct module *mod = data;
10090  
10091  	switch (val) {
10092  	case MODULE_STATE_COMING:
10093  		trace_module_add_evals(mod);
10094  		break;
10095  	case MODULE_STATE_GOING:
10096  		trace_module_remove_evals(mod);
10097  		break;
10098  	}
10099  
10100  	return NOTIFY_OK;
10101  }
10102  
10103  static struct notifier_block trace_module_nb = {
10104  	.notifier_call = trace_module_notify,
10105  	.priority = 0,
10106  };
10107  #endif /* CONFIG_MODULES */
10108  
tracer_init_tracefs_work_func(struct work_struct * work)10109  static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10110  {
10111  
10112  	event_trace_init();
10113  
10114  	init_tracer_tracefs(&global_trace, NULL);
10115  	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10116  
10117  	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10118  			&global_trace, &tracing_thresh_fops);
10119  
10120  	trace_create_file("README", TRACE_MODE_READ, NULL,
10121  			NULL, &tracing_readme_fops);
10122  
10123  	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10124  			NULL, &tracing_saved_cmdlines_fops);
10125  
10126  	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10127  			  NULL, &tracing_saved_cmdlines_size_fops);
10128  
10129  	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10130  			NULL, &tracing_saved_tgids_fops);
10131  
10132  	trace_create_eval_file(NULL);
10133  
10134  #ifdef CONFIG_MODULES
10135  	register_module_notifier(&trace_module_nb);
10136  #endif
10137  
10138  #ifdef CONFIG_DYNAMIC_FTRACE
10139  	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10140  			NULL, &tracing_dyn_info_fops);
10141  #endif
10142  
10143  	create_trace_instances(NULL);
10144  
10145  	update_tracer_options(&global_trace);
10146  }
10147  
tracer_init_tracefs(void)10148  static __init int tracer_init_tracefs(void)
10149  {
10150  	int ret;
10151  
10152  	trace_access_lock_init();
10153  
10154  	ret = tracing_init_dentry();
10155  	if (ret)
10156  		return 0;
10157  
10158  	if (eval_map_wq) {
10159  		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10160  		queue_work(eval_map_wq, &tracerfs_init_work);
10161  	} else {
10162  		tracer_init_tracefs_work_func(NULL);
10163  	}
10164  
10165  	rv_init_interface();
10166  
10167  	return 0;
10168  }
10169  
10170  fs_initcall(tracer_init_tracefs);
10171  
10172  static int trace_die_panic_handler(struct notifier_block *self,
10173  				unsigned long ev, void *unused);
10174  
10175  static struct notifier_block trace_panic_notifier = {
10176  	.notifier_call = trace_die_panic_handler,
10177  	.priority = INT_MAX - 1,
10178  };
10179  
10180  static struct notifier_block trace_die_notifier = {
10181  	.notifier_call = trace_die_panic_handler,
10182  	.priority = INT_MAX - 1,
10183  };
10184  
10185  /*
10186   * The idea is to execute the following die/panic callback early, in order
10187   * to avoid showing irrelevant information in the trace (like other panic
10188   * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10189   * warnings get disabled (to prevent potential log flooding).
10190   */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)10191  static int trace_die_panic_handler(struct notifier_block *self,
10192  				unsigned long ev, void *unused)
10193  {
10194  	if (!ftrace_dump_on_oops_enabled())
10195  		return NOTIFY_DONE;
10196  
10197  	/* The die notifier requires DIE_OOPS to trigger */
10198  	if (self == &trace_die_notifier && ev != DIE_OOPS)
10199  		return NOTIFY_DONE;
10200  
10201  	ftrace_dump(DUMP_PARAM);
10202  
10203  	return NOTIFY_DONE;
10204  }
10205  
10206  /*
10207   * printk is set to max of 1024, we really don't need it that big.
10208   * Nothing should be printing 1000 characters anyway.
10209   */
10210  #define TRACE_MAX_PRINT		1000
10211  
10212  /*
10213   * Define here KERN_TRACE so that we have one place to modify
10214   * it if we decide to change what log level the ftrace dump
10215   * should be at.
10216   */
10217  #define KERN_TRACE		KERN_EMERG
10218  
10219  void
trace_printk_seq(struct trace_seq * s)10220  trace_printk_seq(struct trace_seq *s)
10221  {
10222  	/* Probably should print a warning here. */
10223  	if (s->seq.len >= TRACE_MAX_PRINT)
10224  		s->seq.len = TRACE_MAX_PRINT;
10225  
10226  	/*
10227  	 * More paranoid code. Although the buffer size is set to
10228  	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10229  	 * an extra layer of protection.
10230  	 */
10231  	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10232  		s->seq.len = s->seq.size - 1;
10233  
10234  	/* should be zero ended, but we are paranoid. */
10235  	s->buffer[s->seq.len] = 0;
10236  
10237  	printk(KERN_TRACE "%s", s->buffer);
10238  
10239  	trace_seq_init(s);
10240  }
10241  
trace_init_iter(struct trace_iterator * iter,struct trace_array * tr)10242  static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10243  {
10244  	iter->tr = tr;
10245  	iter->trace = iter->tr->current_trace;
10246  	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10247  	iter->array_buffer = &tr->array_buffer;
10248  
10249  	if (iter->trace && iter->trace->open)
10250  		iter->trace->open(iter);
10251  
10252  	/* Annotate start of buffers if we had overruns */
10253  	if (ring_buffer_overruns(iter->array_buffer->buffer))
10254  		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10255  
10256  	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10257  	if (trace_clocks[iter->tr->clock_id].in_ns)
10258  		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10259  
10260  	/* Can not use kmalloc for iter.temp and iter.fmt */
10261  	iter->temp = static_temp_buf;
10262  	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10263  	iter->fmt = static_fmt_buf;
10264  	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10265  }
10266  
trace_init_global_iter(struct trace_iterator * iter)10267  void trace_init_global_iter(struct trace_iterator *iter)
10268  {
10269  	trace_init_iter(iter, &global_trace);
10270  }
10271  
ftrace_dump_one(struct trace_array * tr,enum ftrace_dump_mode dump_mode)10272  static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10273  {
10274  	/* use static because iter can be a bit big for the stack */
10275  	static struct trace_iterator iter;
10276  	unsigned int old_userobj;
10277  	unsigned long flags;
10278  	int cnt = 0, cpu;
10279  
10280  	/*
10281  	 * Always turn off tracing when we dump.
10282  	 * We don't need to show trace output of what happens
10283  	 * between multiple crashes.
10284  	 *
10285  	 * If the user does a sysrq-z, then they can re-enable
10286  	 * tracing with echo 1 > tracing_on.
10287  	 */
10288  	tracer_tracing_off(tr);
10289  
10290  	local_irq_save(flags);
10291  
10292  	/* Simulate the iterator */
10293  	trace_init_iter(&iter, tr);
10294  
10295  	for_each_tracing_cpu(cpu) {
10296  		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10297  	}
10298  
10299  	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10300  
10301  	/* don't look at user memory in panic mode */
10302  	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10303  
10304  	if (dump_mode == DUMP_ORIG)
10305  		iter.cpu_file = raw_smp_processor_id();
10306  	else
10307  		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10308  
10309  	if (tr == &global_trace)
10310  		printk(KERN_TRACE "Dumping ftrace buffer:\n");
10311  	else
10312  		printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10313  
10314  	/* Did function tracer already get disabled? */
10315  	if (ftrace_is_dead()) {
10316  		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10317  		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10318  	}
10319  
10320  	/*
10321  	 * We need to stop all tracing on all CPUS to read
10322  	 * the next buffer. This is a bit expensive, but is
10323  	 * not done often. We fill all what we can read,
10324  	 * and then release the locks again.
10325  	 */
10326  
10327  	while (!trace_empty(&iter)) {
10328  
10329  		if (!cnt)
10330  			printk(KERN_TRACE "---------------------------------\n");
10331  
10332  		cnt++;
10333  
10334  		trace_iterator_reset(&iter);
10335  		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10336  
10337  		if (trace_find_next_entry_inc(&iter) != NULL) {
10338  			int ret;
10339  
10340  			ret = print_trace_line(&iter);
10341  			if (ret != TRACE_TYPE_NO_CONSUME)
10342  				trace_consume(&iter);
10343  		}
10344  		touch_nmi_watchdog();
10345  
10346  		trace_printk_seq(&iter.seq);
10347  	}
10348  
10349  	if (!cnt)
10350  		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10351  	else
10352  		printk(KERN_TRACE "---------------------------------\n");
10353  
10354  	tr->trace_flags |= old_userobj;
10355  
10356  	for_each_tracing_cpu(cpu) {
10357  		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10358  	}
10359  	local_irq_restore(flags);
10360  }
10361  
ftrace_dump_by_param(void)10362  static void ftrace_dump_by_param(void)
10363  {
10364  	bool first_param = true;
10365  	char dump_param[MAX_TRACER_SIZE];
10366  	char *buf, *token, *inst_name;
10367  	struct trace_array *tr;
10368  
10369  	strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10370  	buf = dump_param;
10371  
10372  	while ((token = strsep(&buf, ",")) != NULL) {
10373  		if (first_param) {
10374  			first_param = false;
10375  			if (!strcmp("0", token))
10376  				continue;
10377  			else if (!strcmp("1", token)) {
10378  				ftrace_dump_one(&global_trace, DUMP_ALL);
10379  				continue;
10380  			}
10381  			else if (!strcmp("2", token) ||
10382  			  !strcmp("orig_cpu", token)) {
10383  				ftrace_dump_one(&global_trace, DUMP_ORIG);
10384  				continue;
10385  			}
10386  		}
10387  
10388  		inst_name = strsep(&token, "=");
10389  		tr = trace_array_find(inst_name);
10390  		if (!tr) {
10391  			printk(KERN_TRACE "Instance %s not found\n", inst_name);
10392  			continue;
10393  		}
10394  
10395  		if (token && (!strcmp("2", token) ||
10396  			  !strcmp("orig_cpu", token)))
10397  			ftrace_dump_one(tr, DUMP_ORIG);
10398  		else
10399  			ftrace_dump_one(tr, DUMP_ALL);
10400  	}
10401  }
10402  
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)10403  void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10404  {
10405  	static atomic_t dump_running;
10406  
10407  	/* Only allow one dump user at a time. */
10408  	if (atomic_inc_return(&dump_running) != 1) {
10409  		atomic_dec(&dump_running);
10410  		return;
10411  	}
10412  
10413  	switch (oops_dump_mode) {
10414  	case DUMP_ALL:
10415  		ftrace_dump_one(&global_trace, DUMP_ALL);
10416  		break;
10417  	case DUMP_ORIG:
10418  		ftrace_dump_one(&global_trace, DUMP_ORIG);
10419  		break;
10420  	case DUMP_PARAM:
10421  		ftrace_dump_by_param();
10422  		break;
10423  	case DUMP_NONE:
10424  		break;
10425  	default:
10426  		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10427  		ftrace_dump_one(&global_trace, DUMP_ALL);
10428  	}
10429  
10430  	atomic_dec(&dump_running);
10431  }
10432  EXPORT_SYMBOL_GPL(ftrace_dump);
10433  
10434  #define WRITE_BUFSIZE  4096
10435  
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))10436  ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10437  				size_t count, loff_t *ppos,
10438  				int (*createfn)(const char *))
10439  {
10440  	char *kbuf, *buf, *tmp;
10441  	int ret = 0;
10442  	size_t done = 0;
10443  	size_t size;
10444  
10445  	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10446  	if (!kbuf)
10447  		return -ENOMEM;
10448  
10449  	while (done < count) {
10450  		size = count - done;
10451  
10452  		if (size >= WRITE_BUFSIZE)
10453  			size = WRITE_BUFSIZE - 1;
10454  
10455  		if (copy_from_user(kbuf, buffer + done, size)) {
10456  			ret = -EFAULT;
10457  			goto out;
10458  		}
10459  		kbuf[size] = '\0';
10460  		buf = kbuf;
10461  		do {
10462  			tmp = strchr(buf, '\n');
10463  			if (tmp) {
10464  				*tmp = '\0';
10465  				size = tmp - buf + 1;
10466  			} else {
10467  				size = strlen(buf);
10468  				if (done + size < count) {
10469  					if (buf != kbuf)
10470  						break;
10471  					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10472  					pr_warn("Line length is too long: Should be less than %d\n",
10473  						WRITE_BUFSIZE - 2);
10474  					ret = -EINVAL;
10475  					goto out;
10476  				}
10477  			}
10478  			done += size;
10479  
10480  			/* Remove comments */
10481  			tmp = strchr(buf, '#');
10482  
10483  			if (tmp)
10484  				*tmp = '\0';
10485  
10486  			ret = createfn(buf);
10487  			if (ret)
10488  				goto out;
10489  			buf += size;
10490  
10491  		} while (done < count);
10492  	}
10493  	ret = done;
10494  
10495  out:
10496  	kfree(kbuf);
10497  
10498  	return ret;
10499  }
10500  
10501  #ifdef CONFIG_TRACER_MAX_TRACE
tr_needs_alloc_snapshot(const char * name)10502  __init static bool tr_needs_alloc_snapshot(const char *name)
10503  {
10504  	char *test;
10505  	int len = strlen(name);
10506  	bool ret;
10507  
10508  	if (!boot_snapshot_index)
10509  		return false;
10510  
10511  	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10512  	    boot_snapshot_info[len] == '\t')
10513  		return true;
10514  
10515  	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10516  	if (!test)
10517  		return false;
10518  
10519  	sprintf(test, "\t%s\t", name);
10520  	ret = strstr(boot_snapshot_info, test) == NULL;
10521  	kfree(test);
10522  	return ret;
10523  }
10524  
do_allocate_snapshot(const char * name)10525  __init static void do_allocate_snapshot(const char *name)
10526  {
10527  	if (!tr_needs_alloc_snapshot(name))
10528  		return;
10529  
10530  	/*
10531  	 * When allocate_snapshot is set, the next call to
10532  	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10533  	 * will allocate the snapshot buffer. That will alse clear
10534  	 * this flag.
10535  	 */
10536  	allocate_snapshot = true;
10537  }
10538  #else
do_allocate_snapshot(const char * name)10539  static inline void do_allocate_snapshot(const char *name) { }
10540  #endif
10541  
enable_instances(void)10542  __init static void enable_instances(void)
10543  {
10544  	struct trace_array *tr;
10545  	char *curr_str;
10546  	char *name;
10547  	char *str;
10548  	char *tok;
10549  
10550  	/* A tab is always appended */
10551  	boot_instance_info[boot_instance_index - 1] = '\0';
10552  	str = boot_instance_info;
10553  
10554  	while ((curr_str = strsep(&str, "\t"))) {
10555  		phys_addr_t start = 0;
10556  		phys_addr_t size = 0;
10557  		unsigned long addr = 0;
10558  		bool traceprintk = false;
10559  		bool traceoff = false;
10560  		char *flag_delim;
10561  		char *addr_delim;
10562  
10563  		tok = strsep(&curr_str, ",");
10564  
10565  		flag_delim = strchr(tok, '^');
10566  		addr_delim = strchr(tok, '@');
10567  
10568  		if (addr_delim)
10569  			*addr_delim++ = '\0';
10570  
10571  		if (flag_delim)
10572  			*flag_delim++ = '\0';
10573  
10574  		name = tok;
10575  
10576  		if (flag_delim) {
10577  			char *flag;
10578  
10579  			while ((flag = strsep(&flag_delim, "^"))) {
10580  				if (strcmp(flag, "traceoff") == 0) {
10581  					traceoff = true;
10582  				} else if ((strcmp(flag, "printk") == 0) ||
10583  					   (strcmp(flag, "traceprintk") == 0) ||
10584  					   (strcmp(flag, "trace_printk") == 0)) {
10585  					traceprintk = true;
10586  				} else {
10587  					pr_info("Tracing: Invalid instance flag '%s' for %s\n",
10588  						flag, name);
10589  				}
10590  			}
10591  		}
10592  
10593  		tok = addr_delim;
10594  		if (tok && isdigit(*tok)) {
10595  			start = memparse(tok, &tok);
10596  			if (!start) {
10597  				pr_warn("Tracing: Invalid boot instance address for %s\n",
10598  					name);
10599  				continue;
10600  			}
10601  			if (*tok != ':') {
10602  				pr_warn("Tracing: No size specified for instance %s\n", name);
10603  				continue;
10604  			}
10605  			tok++;
10606  			size = memparse(tok, &tok);
10607  			if (!size) {
10608  				pr_warn("Tracing: Invalid boot instance size for %s\n",
10609  					name);
10610  				continue;
10611  			}
10612  		} else if (tok) {
10613  			if (!reserve_mem_find_by_name(tok, &start, &size)) {
10614  				start = 0;
10615  				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10616  				continue;
10617  			}
10618  		}
10619  
10620  		if (start) {
10621  			addr = map_pages(start, size);
10622  			if (addr) {
10623  				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
10624  					name, &start, (unsigned long)size);
10625  			} else {
10626  				pr_warn("Tracing: Failed to map boot instance %s\n", name);
10627  				continue;
10628  			}
10629  		} else {
10630  			/* Only non mapped buffers have snapshot buffers */
10631  			if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10632  				do_allocate_snapshot(name);
10633  		}
10634  
10635  		tr = trace_array_create_systems(name, NULL, addr, size);
10636  		if (IS_ERR(tr)) {
10637  			pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
10638  			continue;
10639  		}
10640  
10641  		if (traceoff)
10642  			tracer_tracing_off(tr);
10643  
10644  		if (traceprintk)
10645  			update_printk_trace(tr);
10646  
10647  		/*
10648  		 * If start is set, then this is a mapped buffer, and
10649  		 * cannot be deleted by user space, so keep the reference
10650  		 * to it.
10651  		 */
10652  		if (start) {
10653  			tr->flags |= TRACE_ARRAY_FL_BOOT;
10654  			tr->ref++;
10655  		}
10656  
10657  		while ((tok = strsep(&curr_str, ","))) {
10658  			early_enable_events(tr, tok, true);
10659  		}
10660  	}
10661  }
10662  
tracer_alloc_buffers(void)10663  __init static int tracer_alloc_buffers(void)
10664  {
10665  	int ring_buf_size;
10666  	int ret = -ENOMEM;
10667  
10668  
10669  	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10670  		pr_warn("Tracing disabled due to lockdown\n");
10671  		return -EPERM;
10672  	}
10673  
10674  	/*
10675  	 * Make sure we don't accidentally add more trace options
10676  	 * than we have bits for.
10677  	 */
10678  	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10679  
10680  	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10681  		goto out;
10682  
10683  	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10684  		goto out_free_buffer_mask;
10685  
10686  	/* Only allocate trace_printk buffers if a trace_printk exists */
10687  	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10688  		/* Must be called before global_trace.buffer is allocated */
10689  		trace_printk_init_buffers();
10690  
10691  	/* To save memory, keep the ring buffer size to its minimum */
10692  	if (global_trace.ring_buffer_expanded)
10693  		ring_buf_size = trace_buf_size;
10694  	else
10695  		ring_buf_size = 1;
10696  
10697  	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10698  	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10699  
10700  	raw_spin_lock_init(&global_trace.start_lock);
10701  
10702  	/*
10703  	 * The prepare callbacks allocates some memory for the ring buffer. We
10704  	 * don't free the buffer if the CPU goes down. If we were to free
10705  	 * the buffer, then the user would lose any trace that was in the
10706  	 * buffer. The memory will be removed once the "instance" is removed.
10707  	 */
10708  	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10709  				      "trace/RB:prepare", trace_rb_cpu_prepare,
10710  				      NULL);
10711  	if (ret < 0)
10712  		goto out_free_cpumask;
10713  	/* Used for event triggers */
10714  	ret = -ENOMEM;
10715  	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10716  	if (!temp_buffer)
10717  		goto out_rm_hp_state;
10718  
10719  	if (trace_create_savedcmd() < 0)
10720  		goto out_free_temp_buffer;
10721  
10722  	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10723  		goto out_free_savedcmd;
10724  
10725  	/* TODO: make the number of buffers hot pluggable with CPUS */
10726  	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10727  		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10728  		goto out_free_pipe_cpumask;
10729  	}
10730  	if (global_trace.buffer_disabled)
10731  		tracing_off();
10732  
10733  	if (trace_boot_clock) {
10734  		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10735  		if (ret < 0)
10736  			pr_warn("Trace clock %s not defined, going back to default\n",
10737  				trace_boot_clock);
10738  	}
10739  
10740  	/*
10741  	 * register_tracer() might reference current_trace, so it
10742  	 * needs to be set before we register anything. This is
10743  	 * just a bootstrap of current_trace anyway.
10744  	 */
10745  	global_trace.current_trace = &nop_trace;
10746  
10747  	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10748  #ifdef CONFIG_TRACER_MAX_TRACE
10749  	spin_lock_init(&global_trace.snapshot_trigger_lock);
10750  #endif
10751  	ftrace_init_global_array_ops(&global_trace);
10752  
10753  	init_trace_flags_index(&global_trace);
10754  
10755  	register_tracer(&nop_trace);
10756  
10757  	/* Function tracing may start here (via kernel command line) */
10758  	init_function_trace();
10759  
10760  	/* All seems OK, enable tracing */
10761  	tracing_disabled = 0;
10762  
10763  	atomic_notifier_chain_register(&panic_notifier_list,
10764  				       &trace_panic_notifier);
10765  
10766  	register_die_notifier(&trace_die_notifier);
10767  
10768  	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10769  
10770  	INIT_LIST_HEAD(&global_trace.systems);
10771  	INIT_LIST_HEAD(&global_trace.events);
10772  	INIT_LIST_HEAD(&global_trace.hist_vars);
10773  	INIT_LIST_HEAD(&global_trace.err_log);
10774  	list_add(&global_trace.list, &ftrace_trace_arrays);
10775  
10776  	apply_trace_boot_options();
10777  
10778  	register_snapshot_cmd();
10779  
10780  	test_can_verify();
10781  
10782  	return 0;
10783  
10784  out_free_pipe_cpumask:
10785  	free_cpumask_var(global_trace.pipe_cpumask);
10786  out_free_savedcmd:
10787  	trace_free_saved_cmdlines_buffer();
10788  out_free_temp_buffer:
10789  	ring_buffer_free(temp_buffer);
10790  out_rm_hp_state:
10791  	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10792  out_free_cpumask:
10793  	free_cpumask_var(global_trace.tracing_cpumask);
10794  out_free_buffer_mask:
10795  	free_cpumask_var(tracing_buffer_mask);
10796  out:
10797  	return ret;
10798  }
10799  
ftrace_boot_snapshot(void)10800  void __init ftrace_boot_snapshot(void)
10801  {
10802  #ifdef CONFIG_TRACER_MAX_TRACE
10803  	struct trace_array *tr;
10804  
10805  	if (!snapshot_at_boot)
10806  		return;
10807  
10808  	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10809  		if (!tr->allocated_snapshot)
10810  			continue;
10811  
10812  		tracing_snapshot_instance(tr);
10813  		trace_array_puts(tr, "** Boot snapshot taken **\n");
10814  	}
10815  #endif
10816  }
10817  
early_trace_init(void)10818  void __init early_trace_init(void)
10819  {
10820  	if (tracepoint_printk) {
10821  		tracepoint_print_iter =
10822  			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10823  		if (MEM_FAIL(!tracepoint_print_iter,
10824  			     "Failed to allocate trace iterator\n"))
10825  			tracepoint_printk = 0;
10826  		else
10827  			static_key_enable(&tracepoint_printk_key.key);
10828  	}
10829  	tracer_alloc_buffers();
10830  
10831  	init_events();
10832  }
10833  
trace_init(void)10834  void __init trace_init(void)
10835  {
10836  	trace_event_init();
10837  
10838  	if (boot_instance_index)
10839  		enable_instances();
10840  }
10841  
clear_boot_tracer(void)10842  __init static void clear_boot_tracer(void)
10843  {
10844  	/*
10845  	 * The default tracer at boot buffer is an init section.
10846  	 * This function is called in lateinit. If we did not
10847  	 * find the boot tracer, then clear it out, to prevent
10848  	 * later registration from accessing the buffer that is
10849  	 * about to be freed.
10850  	 */
10851  	if (!default_bootup_tracer)
10852  		return;
10853  
10854  	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10855  	       default_bootup_tracer);
10856  	default_bootup_tracer = NULL;
10857  }
10858  
10859  #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)10860  __init static void tracing_set_default_clock(void)
10861  {
10862  	/* sched_clock_stable() is determined in late_initcall */
10863  	if (!trace_boot_clock && !sched_clock_stable()) {
10864  		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10865  			pr_warn("Can not set tracing clock due to lockdown\n");
10866  			return;
10867  		}
10868  
10869  		printk(KERN_WARNING
10870  		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10871  		       "If you want to keep using the local clock, then add:\n"
10872  		       "  \"trace_clock=local\"\n"
10873  		       "on the kernel command line\n");
10874  		tracing_set_clock(&global_trace, "global");
10875  	}
10876  }
10877  #else
tracing_set_default_clock(void)10878  static inline void tracing_set_default_clock(void) { }
10879  #endif
10880  
late_trace_init(void)10881  __init static int late_trace_init(void)
10882  {
10883  	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10884  		static_key_disable(&tracepoint_printk_key.key);
10885  		tracepoint_printk = 0;
10886  	}
10887  
10888  	tracing_set_default_clock();
10889  	clear_boot_tracer();
10890  	return 0;
10891  }
10892  
10893  late_initcall_sync(late_trace_init);
10894