xref: /openbmc/linux/kernel/trace/trace.c (revision 9144f784f852f9a125cabe9927b986d909bfa439)
1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * ring buffer based function tracer
4   *
5   * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6   * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7   *
8   * Originally taken from the RT patch by:
9   *    Arnaldo Carvalho de Melo <acme@redhat.com>
10   *
11   * Based on code from the latency_tracer, that is:
12   *  Copyright (C) 2004-2006 Ingo Molnar
13   *  Copyright (C) 2004 Nadia Yvette Chambers
14   */
15  #include <linux/ring_buffer.h>
16  #include <generated/utsrelease.h>
17  #include <linux/stacktrace.h>
18  #include <linux/writeback.h>
19  #include <linux/kallsyms.h>
20  #include <linux/security.h>
21  #include <linux/seq_file.h>
22  #include <linux/irqflags.h>
23  #include <linux/debugfs.h>
24  #include <linux/tracefs.h>
25  #include <linux/pagemap.h>
26  #include <linux/hardirq.h>
27  #include <linux/linkage.h>
28  #include <linux/uaccess.h>
29  #include <linux/vmalloc.h>
30  #include <linux/ftrace.h>
31  #include <linux/module.h>
32  #include <linux/percpu.h>
33  #include <linux/splice.h>
34  #include <linux/kdebug.h>
35  #include <linux/string.h>
36  #include <linux/mount.h>
37  #include <linux/rwsem.h>
38  #include <linux/slab.h>
39  #include <linux/ctype.h>
40  #include <linux/init.h>
41  #include <linux/panic_notifier.h>
42  #include <linux/kmemleak.h>
43  #include <linux/poll.h>
44  #include <linux/nmi.h>
45  #include <linux/fs.h>
46  #include <linux/trace.h>
47  #include <linux/sched/clock.h>
48  #include <linux/sched/rt.h>
49  #include <linux/fsnotify.h>
50  #include <linux/irq_work.h>
51  #include <linux/workqueue.h>
52  
53  #include <asm/setup.h> /* COMMAND_LINE_SIZE */
54  
55  #include "trace.h"
56  #include "trace_output.h"
57  
58  /*
59   * On boot up, the ring buffer is set to the minimum size, so that
60   * we do not waste memory on systems that are not using tracing.
61   */
62  bool ring_buffer_expanded;
63  
64  #ifdef CONFIG_FTRACE_STARTUP_TEST
65  /*
66   * We need to change this state when a selftest is running.
67   * A selftest will lurk into the ring-buffer to count the
68   * entries inserted during the selftest although some concurrent
69   * insertions into the ring-buffer such as trace_printk could occurred
70   * at the same time, giving false positive or negative results.
71   */
72  static bool __read_mostly tracing_selftest_running;
73  
74  /*
75   * If boot-time tracing including tracers/events via kernel cmdline
76   * is running, we do not want to run SELFTEST.
77   */
78  bool __read_mostly tracing_selftest_disabled;
79  
disable_tracing_selftest(const char * reason)80  void __init disable_tracing_selftest(const char *reason)
81  {
82  	if (!tracing_selftest_disabled) {
83  		tracing_selftest_disabled = true;
84  		pr_info("Ftrace startup test is disabled due to %s\n", reason);
85  	}
86  }
87  #else
88  #define tracing_selftest_running	0
89  #define tracing_selftest_disabled	0
90  #endif
91  
92  /* Pipe tracepoints to printk */
93  static struct trace_iterator *tracepoint_print_iter;
94  int tracepoint_printk;
95  static bool tracepoint_printk_stop_on_boot __initdata;
96  static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
97  
98  /* For tracers that don't implement custom flags */
99  static struct tracer_opt dummy_tracer_opt[] = {
100  	{ }
101  };
102  
103  static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)104  dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
105  {
106  	return 0;
107  }
108  
109  /*
110   * To prevent the comm cache from being overwritten when no
111   * tracing is active, only save the comm when a trace event
112   * occurred.
113   */
114  static DEFINE_PER_CPU(bool, trace_taskinfo_save);
115  
116  /*
117   * Kill all tracing for good (never come back).
118   * It is initialized to 1 but will turn to zero if the initialization
119   * of the tracer is successful. But that is the only place that sets
120   * this back to zero.
121   */
122  static int tracing_disabled = 1;
123  
124  cpumask_var_t __read_mostly	tracing_buffer_mask;
125  
126  /*
127   * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
128   *
129   * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
130   * is set, then ftrace_dump is called. This will output the contents
131   * of the ftrace buffers to the console.  This is very useful for
132   * capturing traces that lead to crashes and outputing it to a
133   * serial console.
134   *
135   * It is default off, but you can enable it with either specifying
136   * "ftrace_dump_on_oops" in the kernel command line, or setting
137   * /proc/sys/kernel/ftrace_dump_on_oops
138   * Set 1 if you want to dump buffers of all CPUs
139   * Set 2 if you want to dump the buffer of the CPU that triggered oops
140   */
141  
142  enum ftrace_dump_mode ftrace_dump_on_oops;
143  
144  /* When set, tracing will stop when a WARN*() is hit */
145  int __disable_trace_on_warning;
146  
147  #ifdef CONFIG_TRACE_EVAL_MAP_FILE
148  /* Map of enums to their values, for "eval_map" file */
149  struct trace_eval_map_head {
150  	struct module			*mod;
151  	unsigned long			length;
152  };
153  
154  union trace_eval_map_item;
155  
156  struct trace_eval_map_tail {
157  	/*
158  	 * "end" is first and points to NULL as it must be different
159  	 * than "mod" or "eval_string"
160  	 */
161  	union trace_eval_map_item	*next;
162  	const char			*end;	/* points to NULL */
163  };
164  
165  static DEFINE_MUTEX(trace_eval_mutex);
166  
167  /*
168   * The trace_eval_maps are saved in an array with two extra elements,
169   * one at the beginning, and one at the end. The beginning item contains
170   * the count of the saved maps (head.length), and the module they
171   * belong to if not built in (head.mod). The ending item contains a
172   * pointer to the next array of saved eval_map items.
173   */
174  union trace_eval_map_item {
175  	struct trace_eval_map		map;
176  	struct trace_eval_map_head	head;
177  	struct trace_eval_map_tail	tail;
178  };
179  
180  static union trace_eval_map_item *trace_eval_maps;
181  #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
182  
183  int tracing_set_tracer(struct trace_array *tr, const char *buf);
184  static void ftrace_trace_userstack(struct trace_array *tr,
185  				   struct trace_buffer *buffer,
186  				   unsigned int trace_ctx);
187  
188  #define MAX_TRACER_SIZE		100
189  static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
190  static char *default_bootup_tracer;
191  
192  static bool allocate_snapshot;
193  static bool snapshot_at_boot;
194  
195  static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
196  static int boot_instance_index;
197  
198  static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
199  static int boot_snapshot_index;
200  
set_cmdline_ftrace(char * str)201  static int __init set_cmdline_ftrace(char *str)
202  {
203  	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
204  	default_bootup_tracer = bootup_tracer_buf;
205  	/* We are using ftrace early, expand it */
206  	ring_buffer_expanded = true;
207  	return 1;
208  }
209  __setup("ftrace=", set_cmdline_ftrace);
210  
set_ftrace_dump_on_oops(char * str)211  static int __init set_ftrace_dump_on_oops(char *str)
212  {
213  	if (*str++ != '=' || !*str || !strcmp("1", str)) {
214  		ftrace_dump_on_oops = DUMP_ALL;
215  		return 1;
216  	}
217  
218  	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
219  		ftrace_dump_on_oops = DUMP_ORIG;
220                  return 1;
221          }
222  
223          return 0;
224  }
225  __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
226  
stop_trace_on_warning(char * str)227  static int __init stop_trace_on_warning(char *str)
228  {
229  	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
230  		__disable_trace_on_warning = 1;
231  	return 1;
232  }
233  __setup("traceoff_on_warning", stop_trace_on_warning);
234  
boot_alloc_snapshot(char * str)235  static int __init boot_alloc_snapshot(char *str)
236  {
237  	char *slot = boot_snapshot_info + boot_snapshot_index;
238  	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
239  	int ret;
240  
241  	if (str[0] == '=') {
242  		str++;
243  		if (strlen(str) >= left)
244  			return -1;
245  
246  		ret = snprintf(slot, left, "%s\t", str);
247  		boot_snapshot_index += ret;
248  	} else {
249  		allocate_snapshot = true;
250  		/* We also need the main ring buffer expanded */
251  		ring_buffer_expanded = true;
252  	}
253  	return 1;
254  }
255  __setup("alloc_snapshot", boot_alloc_snapshot);
256  
257  
boot_snapshot(char * str)258  static int __init boot_snapshot(char *str)
259  {
260  	snapshot_at_boot = true;
261  	boot_alloc_snapshot(str);
262  	return 1;
263  }
264  __setup("ftrace_boot_snapshot", boot_snapshot);
265  
266  
boot_instance(char * str)267  static int __init boot_instance(char *str)
268  {
269  	char *slot = boot_instance_info + boot_instance_index;
270  	int left = sizeof(boot_instance_info) - boot_instance_index;
271  	int ret;
272  
273  	if (strlen(str) >= left)
274  		return -1;
275  
276  	ret = snprintf(slot, left, "%s\t", str);
277  	boot_instance_index += ret;
278  
279  	return 1;
280  }
281  __setup("trace_instance=", boot_instance);
282  
283  
284  static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
285  
set_trace_boot_options(char * str)286  static int __init set_trace_boot_options(char *str)
287  {
288  	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
289  	return 1;
290  }
291  __setup("trace_options=", set_trace_boot_options);
292  
293  static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
294  static char *trace_boot_clock __initdata;
295  
set_trace_boot_clock(char * str)296  static int __init set_trace_boot_clock(char *str)
297  {
298  	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
299  	trace_boot_clock = trace_boot_clock_buf;
300  	return 1;
301  }
302  __setup("trace_clock=", set_trace_boot_clock);
303  
set_tracepoint_printk(char * str)304  static int __init set_tracepoint_printk(char *str)
305  {
306  	/* Ignore the "tp_printk_stop_on_boot" param */
307  	if (*str == '_')
308  		return 0;
309  
310  	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
311  		tracepoint_printk = 1;
312  	return 1;
313  }
314  __setup("tp_printk", set_tracepoint_printk);
315  
set_tracepoint_printk_stop(char * str)316  static int __init set_tracepoint_printk_stop(char *str)
317  {
318  	tracepoint_printk_stop_on_boot = true;
319  	return 1;
320  }
321  __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
322  
ns2usecs(u64 nsec)323  unsigned long long ns2usecs(u64 nsec)
324  {
325  	nsec += 500;
326  	do_div(nsec, 1000);
327  	return nsec;
328  }
329  
330  static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)331  trace_process_export(struct trace_export *export,
332  	       struct ring_buffer_event *event, int flag)
333  {
334  	struct trace_entry *entry;
335  	unsigned int size = 0;
336  
337  	if (export->flags & flag) {
338  		entry = ring_buffer_event_data(event);
339  		size = ring_buffer_event_length(event);
340  		export->write(export, entry, size);
341  	}
342  }
343  
344  static DEFINE_MUTEX(ftrace_export_lock);
345  
346  static struct trace_export __rcu *ftrace_exports_list __read_mostly;
347  
348  static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
349  static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
350  static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
351  
ftrace_exports_enable(struct trace_export * export)352  static inline void ftrace_exports_enable(struct trace_export *export)
353  {
354  	if (export->flags & TRACE_EXPORT_FUNCTION)
355  		static_branch_inc(&trace_function_exports_enabled);
356  
357  	if (export->flags & TRACE_EXPORT_EVENT)
358  		static_branch_inc(&trace_event_exports_enabled);
359  
360  	if (export->flags & TRACE_EXPORT_MARKER)
361  		static_branch_inc(&trace_marker_exports_enabled);
362  }
363  
ftrace_exports_disable(struct trace_export * export)364  static inline void ftrace_exports_disable(struct trace_export *export)
365  {
366  	if (export->flags & TRACE_EXPORT_FUNCTION)
367  		static_branch_dec(&trace_function_exports_enabled);
368  
369  	if (export->flags & TRACE_EXPORT_EVENT)
370  		static_branch_dec(&trace_event_exports_enabled);
371  
372  	if (export->flags & TRACE_EXPORT_MARKER)
373  		static_branch_dec(&trace_marker_exports_enabled);
374  }
375  
ftrace_exports(struct ring_buffer_event * event,int flag)376  static void ftrace_exports(struct ring_buffer_event *event, int flag)
377  {
378  	struct trace_export *export;
379  
380  	preempt_disable_notrace();
381  
382  	export = rcu_dereference_raw_check(ftrace_exports_list);
383  	while (export) {
384  		trace_process_export(export, event, flag);
385  		export = rcu_dereference_raw_check(export->next);
386  	}
387  
388  	preempt_enable_notrace();
389  }
390  
391  static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)392  add_trace_export(struct trace_export **list, struct trace_export *export)
393  {
394  	rcu_assign_pointer(export->next, *list);
395  	/*
396  	 * We are entering export into the list but another
397  	 * CPU might be walking that list. We need to make sure
398  	 * the export->next pointer is valid before another CPU sees
399  	 * the export pointer included into the list.
400  	 */
401  	rcu_assign_pointer(*list, export);
402  }
403  
404  static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)405  rm_trace_export(struct trace_export **list, struct trace_export *export)
406  {
407  	struct trace_export **p;
408  
409  	for (p = list; *p != NULL; p = &(*p)->next)
410  		if (*p == export)
411  			break;
412  
413  	if (*p != export)
414  		return -1;
415  
416  	rcu_assign_pointer(*p, (*p)->next);
417  
418  	return 0;
419  }
420  
421  static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)422  add_ftrace_export(struct trace_export **list, struct trace_export *export)
423  {
424  	ftrace_exports_enable(export);
425  
426  	add_trace_export(list, export);
427  }
428  
429  static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)430  rm_ftrace_export(struct trace_export **list, struct trace_export *export)
431  {
432  	int ret;
433  
434  	ret = rm_trace_export(list, export);
435  	ftrace_exports_disable(export);
436  
437  	return ret;
438  }
439  
register_ftrace_export(struct trace_export * export)440  int register_ftrace_export(struct trace_export *export)
441  {
442  	if (WARN_ON_ONCE(!export->write))
443  		return -1;
444  
445  	mutex_lock(&ftrace_export_lock);
446  
447  	add_ftrace_export(&ftrace_exports_list, export);
448  
449  	mutex_unlock(&ftrace_export_lock);
450  
451  	return 0;
452  }
453  EXPORT_SYMBOL_GPL(register_ftrace_export);
454  
unregister_ftrace_export(struct trace_export * export)455  int unregister_ftrace_export(struct trace_export *export)
456  {
457  	int ret;
458  
459  	mutex_lock(&ftrace_export_lock);
460  
461  	ret = rm_ftrace_export(&ftrace_exports_list, export);
462  
463  	mutex_unlock(&ftrace_export_lock);
464  
465  	return ret;
466  }
467  EXPORT_SYMBOL_GPL(unregister_ftrace_export);
468  
469  /* trace_flags holds trace_options default values */
470  #define TRACE_DEFAULT_FLAGS						\
471  	(FUNCTION_DEFAULT_FLAGS |					\
472  	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
473  	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
474  	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
475  	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
476  	 TRACE_ITER_HASH_PTR)
477  
478  /* trace_options that are only supported by global_trace */
479  #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
480  	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
481  
482  /* trace_flags that are default zero for instances */
483  #define ZEROED_TRACE_FLAGS \
484  	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
485  
486  /*
487   * The global_trace is the descriptor that holds the top-level tracing
488   * buffers for the live tracing.
489   */
490  static struct trace_array global_trace = {
491  	.trace_flags = TRACE_DEFAULT_FLAGS,
492  };
493  
494  LIST_HEAD(ftrace_trace_arrays);
495  
trace_array_get(struct trace_array * this_tr)496  int trace_array_get(struct trace_array *this_tr)
497  {
498  	struct trace_array *tr;
499  	int ret = -ENODEV;
500  
501  	mutex_lock(&trace_types_lock);
502  	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
503  		if (tr == this_tr) {
504  			tr->ref++;
505  			ret = 0;
506  			break;
507  		}
508  	}
509  	mutex_unlock(&trace_types_lock);
510  
511  	return ret;
512  }
513  
__trace_array_put(struct trace_array * this_tr)514  static void __trace_array_put(struct trace_array *this_tr)
515  {
516  	WARN_ON(!this_tr->ref);
517  	this_tr->ref--;
518  }
519  
520  /**
521   * trace_array_put - Decrement the reference counter for this trace array.
522   * @this_tr : pointer to the trace array
523   *
524   * NOTE: Use this when we no longer need the trace array returned by
525   * trace_array_get_by_name(). This ensures the trace array can be later
526   * destroyed.
527   *
528   */
trace_array_put(struct trace_array * this_tr)529  void trace_array_put(struct trace_array *this_tr)
530  {
531  	if (!this_tr)
532  		return;
533  
534  	mutex_lock(&trace_types_lock);
535  	__trace_array_put(this_tr);
536  	mutex_unlock(&trace_types_lock);
537  }
538  EXPORT_SYMBOL_GPL(trace_array_put);
539  
tracing_check_open_get_tr(struct trace_array * tr)540  int tracing_check_open_get_tr(struct trace_array *tr)
541  {
542  	int ret;
543  
544  	ret = security_locked_down(LOCKDOWN_TRACEFS);
545  	if (ret)
546  		return ret;
547  
548  	if (tracing_disabled)
549  		return -ENODEV;
550  
551  	if (tr && trace_array_get(tr) < 0)
552  		return -ENODEV;
553  
554  	return 0;
555  }
556  
call_filter_check_discard(struct trace_event_call * call,void * rec,struct trace_buffer * buffer,struct ring_buffer_event * event)557  int call_filter_check_discard(struct trace_event_call *call, void *rec,
558  			      struct trace_buffer *buffer,
559  			      struct ring_buffer_event *event)
560  {
561  	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
562  	    !filter_match_preds(call->filter, rec)) {
563  		__trace_event_discard_commit(buffer, event);
564  		return 1;
565  	}
566  
567  	return 0;
568  }
569  
570  /**
571   * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
572   * @filtered_pids: The list of pids to check
573   * @search_pid: The PID to find in @filtered_pids
574   *
575   * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
576   */
577  bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)578  trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
579  {
580  	return trace_pid_list_is_set(filtered_pids, search_pid);
581  }
582  
583  /**
584   * trace_ignore_this_task - should a task be ignored for tracing
585   * @filtered_pids: The list of pids to check
586   * @filtered_no_pids: The list of pids not to be traced
587   * @task: The task that should be ignored if not filtered
588   *
589   * Checks if @task should be traced or not from @filtered_pids.
590   * Returns true if @task should *NOT* be traced.
591   * Returns false if @task should be traced.
592   */
593  bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)594  trace_ignore_this_task(struct trace_pid_list *filtered_pids,
595  		       struct trace_pid_list *filtered_no_pids,
596  		       struct task_struct *task)
597  {
598  	/*
599  	 * If filtered_no_pids is not empty, and the task's pid is listed
600  	 * in filtered_no_pids, then return true.
601  	 * Otherwise, if filtered_pids is empty, that means we can
602  	 * trace all tasks. If it has content, then only trace pids
603  	 * within filtered_pids.
604  	 */
605  
606  	return (filtered_pids &&
607  		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
608  		(filtered_no_pids &&
609  		 trace_find_filtered_pid(filtered_no_pids, task->pid));
610  }
611  
612  /**
613   * trace_filter_add_remove_task - Add or remove a task from a pid_list
614   * @pid_list: The list to modify
615   * @self: The current task for fork or NULL for exit
616   * @task: The task to add or remove
617   *
618   * If adding a task, if @self is defined, the task is only added if @self
619   * is also included in @pid_list. This happens on fork and tasks should
620   * only be added when the parent is listed. If @self is NULL, then the
621   * @task pid will be removed from the list, which would happen on exit
622   * of a task.
623   */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)624  void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
625  				  struct task_struct *self,
626  				  struct task_struct *task)
627  {
628  	if (!pid_list)
629  		return;
630  
631  	/* For forks, we only add if the forking task is listed */
632  	if (self) {
633  		if (!trace_find_filtered_pid(pid_list, self->pid))
634  			return;
635  	}
636  
637  	/* "self" is set for forks, and NULL for exits */
638  	if (self)
639  		trace_pid_list_set(pid_list, task->pid);
640  	else
641  		trace_pid_list_clear(pid_list, task->pid);
642  }
643  
644  /**
645   * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
646   * @pid_list: The pid list to show
647   * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
648   * @pos: The position of the file
649   *
650   * This is used by the seq_file "next" operation to iterate the pids
651   * listed in a trace_pid_list structure.
652   *
653   * Returns the pid+1 as we want to display pid of zero, but NULL would
654   * stop the iteration.
655   */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)656  void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
657  {
658  	long pid = (unsigned long)v;
659  	unsigned int next;
660  
661  	(*pos)++;
662  
663  	/* pid already is +1 of the actual previous bit */
664  	if (trace_pid_list_next(pid_list, pid, &next) < 0)
665  		return NULL;
666  
667  	pid = next;
668  
669  	/* Return pid + 1 to allow zero to be represented */
670  	return (void *)(pid + 1);
671  }
672  
673  /**
674   * trace_pid_start - Used for seq_file to start reading pid lists
675   * @pid_list: The pid list to show
676   * @pos: The position of the file
677   *
678   * This is used by seq_file "start" operation to start the iteration
679   * of listing pids.
680   *
681   * Returns the pid+1 as we want to display pid of zero, but NULL would
682   * stop the iteration.
683   */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)684  void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
685  {
686  	unsigned long pid;
687  	unsigned int first;
688  	loff_t l = 0;
689  
690  	if (trace_pid_list_first(pid_list, &first) < 0)
691  		return NULL;
692  
693  	pid = first;
694  
695  	/* Return pid + 1 so that zero can be the exit value */
696  	for (pid++; pid && l < *pos;
697  	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
698  		;
699  	return (void *)pid;
700  }
701  
702  /**
703   * trace_pid_show - show the current pid in seq_file processing
704   * @m: The seq_file structure to write into
705   * @v: A void pointer of the pid (+1) value to display
706   *
707   * Can be directly used by seq_file operations to display the current
708   * pid value.
709   */
trace_pid_show(struct seq_file * m,void * v)710  int trace_pid_show(struct seq_file *m, void *v)
711  {
712  	unsigned long pid = (unsigned long)v - 1;
713  
714  	seq_printf(m, "%lu\n", pid);
715  	return 0;
716  }
717  
718  /* 128 should be much more than enough */
719  #define PID_BUF_SIZE		127
720  
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)721  int trace_pid_write(struct trace_pid_list *filtered_pids,
722  		    struct trace_pid_list **new_pid_list,
723  		    const char __user *ubuf, size_t cnt)
724  {
725  	struct trace_pid_list *pid_list;
726  	struct trace_parser parser;
727  	unsigned long val;
728  	int nr_pids = 0;
729  	ssize_t read = 0;
730  	ssize_t ret;
731  	loff_t pos;
732  	pid_t pid;
733  
734  	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
735  		return -ENOMEM;
736  
737  	/*
738  	 * Always recreate a new array. The write is an all or nothing
739  	 * operation. Always create a new array when adding new pids by
740  	 * the user. If the operation fails, then the current list is
741  	 * not modified.
742  	 */
743  	pid_list = trace_pid_list_alloc();
744  	if (!pid_list) {
745  		trace_parser_put(&parser);
746  		return -ENOMEM;
747  	}
748  
749  	if (filtered_pids) {
750  		/* copy the current bits to the new max */
751  		ret = trace_pid_list_first(filtered_pids, &pid);
752  		while (!ret) {
753  			trace_pid_list_set(pid_list, pid);
754  			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
755  			nr_pids++;
756  		}
757  	}
758  
759  	ret = 0;
760  	while (cnt > 0) {
761  
762  		pos = 0;
763  
764  		ret = trace_get_user(&parser, ubuf, cnt, &pos);
765  		if (ret < 0)
766  			break;
767  
768  		read += ret;
769  		ubuf += ret;
770  		cnt -= ret;
771  
772  		if (!trace_parser_loaded(&parser))
773  			break;
774  
775  		ret = -EINVAL;
776  		if (kstrtoul(parser.buffer, 0, &val))
777  			break;
778  
779  		pid = (pid_t)val;
780  
781  		if (trace_pid_list_set(pid_list, pid) < 0) {
782  			ret = -1;
783  			break;
784  		}
785  		nr_pids++;
786  
787  		trace_parser_clear(&parser);
788  		ret = 0;
789  	}
790  	trace_parser_put(&parser);
791  
792  	if (ret < 0) {
793  		trace_pid_list_free(pid_list);
794  		return ret;
795  	}
796  
797  	if (!nr_pids) {
798  		/* Cleared the list of pids */
799  		trace_pid_list_free(pid_list);
800  		pid_list = NULL;
801  	}
802  
803  	*new_pid_list = pid_list;
804  
805  	return read;
806  }
807  
buffer_ftrace_now(struct array_buffer * buf,int cpu)808  static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
809  {
810  	u64 ts;
811  
812  	/* Early boot up does not have a buffer yet */
813  	if (!buf->buffer)
814  		return trace_clock_local();
815  
816  	ts = ring_buffer_time_stamp(buf->buffer);
817  	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
818  
819  	return ts;
820  }
821  
ftrace_now(int cpu)822  u64 ftrace_now(int cpu)
823  {
824  	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
825  }
826  
827  /**
828   * tracing_is_enabled - Show if global_trace has been enabled
829   *
830   * Shows if the global trace has been enabled or not. It uses the
831   * mirror flag "buffer_disabled" to be used in fast paths such as for
832   * the irqsoff tracer. But it may be inaccurate due to races. If you
833   * need to know the accurate state, use tracing_is_on() which is a little
834   * slower, but accurate.
835   */
tracing_is_enabled(void)836  int tracing_is_enabled(void)
837  {
838  	/*
839  	 * For quick access (irqsoff uses this in fast path), just
840  	 * return the mirror variable of the state of the ring buffer.
841  	 * It's a little racy, but we don't really care.
842  	 */
843  	smp_rmb();
844  	return !global_trace.buffer_disabled;
845  }
846  
847  /*
848   * trace_buf_size is the size in bytes that is allocated
849   * for a buffer. Note, the number of bytes is always rounded
850   * to page size.
851   *
852   * This number is purposely set to a low number of 16384.
853   * If the dump on oops happens, it will be much appreciated
854   * to not have to wait for all that output. Anyway this can be
855   * boot time and run time configurable.
856   */
857  #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
858  
859  static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
860  
861  /* trace_types holds a link list of available tracers. */
862  static struct tracer		*trace_types __read_mostly;
863  
864  /*
865   * trace_types_lock is used to protect the trace_types list.
866   */
867  DEFINE_MUTEX(trace_types_lock);
868  
869  /*
870   * serialize the access of the ring buffer
871   *
872   * ring buffer serializes readers, but it is low level protection.
873   * The validity of the events (which returns by ring_buffer_peek() ..etc)
874   * are not protected by ring buffer.
875   *
876   * The content of events may become garbage if we allow other process consumes
877   * these events concurrently:
878   *   A) the page of the consumed events may become a normal page
879   *      (not reader page) in ring buffer, and this page will be rewritten
880   *      by events producer.
881   *   B) The page of the consumed events may become a page for splice_read,
882   *      and this page will be returned to system.
883   *
884   * These primitives allow multi process access to different cpu ring buffer
885   * concurrently.
886   *
887   * These primitives don't distinguish read-only and read-consume access.
888   * Multi read-only access are also serialized.
889   */
890  
891  #ifdef CONFIG_SMP
892  static DECLARE_RWSEM(all_cpu_access_lock);
893  static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
894  
trace_access_lock(int cpu)895  static inline void trace_access_lock(int cpu)
896  {
897  	if (cpu == RING_BUFFER_ALL_CPUS) {
898  		/* gain it for accessing the whole ring buffer. */
899  		down_write(&all_cpu_access_lock);
900  	} else {
901  		/* gain it for accessing a cpu ring buffer. */
902  
903  		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
904  		down_read(&all_cpu_access_lock);
905  
906  		/* Secondly block other access to this @cpu ring buffer. */
907  		mutex_lock(&per_cpu(cpu_access_lock, cpu));
908  	}
909  }
910  
trace_access_unlock(int cpu)911  static inline void trace_access_unlock(int cpu)
912  {
913  	if (cpu == RING_BUFFER_ALL_CPUS) {
914  		up_write(&all_cpu_access_lock);
915  	} else {
916  		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
917  		up_read(&all_cpu_access_lock);
918  	}
919  }
920  
trace_access_lock_init(void)921  static inline void trace_access_lock_init(void)
922  {
923  	int cpu;
924  
925  	for_each_possible_cpu(cpu)
926  		mutex_init(&per_cpu(cpu_access_lock, cpu));
927  }
928  
929  #else
930  
931  static DEFINE_MUTEX(access_lock);
932  
trace_access_lock(int cpu)933  static inline void trace_access_lock(int cpu)
934  {
935  	(void)cpu;
936  	mutex_lock(&access_lock);
937  }
938  
trace_access_unlock(int cpu)939  static inline void trace_access_unlock(int cpu)
940  {
941  	(void)cpu;
942  	mutex_unlock(&access_lock);
943  }
944  
trace_access_lock_init(void)945  static inline void trace_access_lock_init(void)
946  {
947  }
948  
949  #endif
950  
951  #ifdef CONFIG_STACKTRACE
952  static void __ftrace_trace_stack(struct trace_buffer *buffer,
953  				 unsigned int trace_ctx,
954  				 int skip, struct pt_regs *regs);
955  static inline void ftrace_trace_stack(struct trace_array *tr,
956  				      struct trace_buffer *buffer,
957  				      unsigned int trace_ctx,
958  				      int skip, struct pt_regs *regs);
959  
960  #else
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)961  static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
962  					unsigned int trace_ctx,
963  					int skip, struct pt_regs *regs)
964  {
965  }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long trace_ctx,int skip,struct pt_regs * regs)966  static inline void ftrace_trace_stack(struct trace_array *tr,
967  				      struct trace_buffer *buffer,
968  				      unsigned long trace_ctx,
969  				      int skip, struct pt_regs *regs)
970  {
971  }
972  
973  #endif
974  
975  static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned int trace_ctx)976  trace_event_setup(struct ring_buffer_event *event,
977  		  int type, unsigned int trace_ctx)
978  {
979  	struct trace_entry *ent = ring_buffer_event_data(event);
980  
981  	tracing_generic_entry_update(ent, type, trace_ctx);
982  }
983  
984  static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)985  __trace_buffer_lock_reserve(struct trace_buffer *buffer,
986  			  int type,
987  			  unsigned long len,
988  			  unsigned int trace_ctx)
989  {
990  	struct ring_buffer_event *event;
991  
992  	event = ring_buffer_lock_reserve(buffer, len);
993  	if (event != NULL)
994  		trace_event_setup(event, type, trace_ctx);
995  
996  	return event;
997  }
998  
tracer_tracing_on(struct trace_array * tr)999  void tracer_tracing_on(struct trace_array *tr)
1000  {
1001  	if (tr->array_buffer.buffer)
1002  		ring_buffer_record_on(tr->array_buffer.buffer);
1003  	/*
1004  	 * This flag is looked at when buffers haven't been allocated
1005  	 * yet, or by some tracers (like irqsoff), that just want to
1006  	 * know if the ring buffer has been disabled, but it can handle
1007  	 * races of where it gets disabled but we still do a record.
1008  	 * As the check is in the fast path of the tracers, it is more
1009  	 * important to be fast than accurate.
1010  	 */
1011  	tr->buffer_disabled = 0;
1012  	/* Make the flag seen by readers */
1013  	smp_wmb();
1014  }
1015  
1016  /**
1017   * tracing_on - enable tracing buffers
1018   *
1019   * This function enables tracing buffers that may have been
1020   * disabled with tracing_off.
1021   */
tracing_on(void)1022  void tracing_on(void)
1023  {
1024  	tracer_tracing_on(&global_trace);
1025  }
1026  EXPORT_SYMBOL_GPL(tracing_on);
1027  
1028  
1029  static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)1030  __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1031  {
1032  	__this_cpu_write(trace_taskinfo_save, true);
1033  
1034  	/* If this is the temp buffer, we need to commit fully */
1035  	if (this_cpu_read(trace_buffered_event) == event) {
1036  		/* Length is in event->array[0] */
1037  		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1038  		/* Release the temp buffer */
1039  		this_cpu_dec(trace_buffered_event_cnt);
1040  		/* ring_buffer_unlock_commit() enables preemption */
1041  		preempt_enable_notrace();
1042  	} else
1043  		ring_buffer_unlock_commit(buffer);
1044  }
1045  
__trace_array_puts(struct trace_array * tr,unsigned long ip,const char * str,int size)1046  int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1047  		       const char *str, int size)
1048  {
1049  	struct ring_buffer_event *event;
1050  	struct trace_buffer *buffer;
1051  	struct print_entry *entry;
1052  	unsigned int trace_ctx;
1053  	int alloc;
1054  
1055  	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1056  		return 0;
1057  
1058  	if (unlikely(tracing_selftest_running && tr == &global_trace))
1059  		return 0;
1060  
1061  	if (unlikely(tracing_disabled))
1062  		return 0;
1063  
1064  	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1065  
1066  	trace_ctx = tracing_gen_ctx();
1067  	buffer = tr->array_buffer.buffer;
1068  	ring_buffer_nest_start(buffer);
1069  	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1070  					    trace_ctx);
1071  	if (!event) {
1072  		size = 0;
1073  		goto out;
1074  	}
1075  
1076  	entry = ring_buffer_event_data(event);
1077  	entry->ip = ip;
1078  
1079  	memcpy(&entry->buf, str, size);
1080  
1081  	/* Add a newline if necessary */
1082  	if (entry->buf[size - 1] != '\n') {
1083  		entry->buf[size] = '\n';
1084  		entry->buf[size + 1] = '\0';
1085  	} else
1086  		entry->buf[size] = '\0';
1087  
1088  	__buffer_unlock_commit(buffer, event);
1089  	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1090   out:
1091  	ring_buffer_nest_end(buffer);
1092  	return size;
1093  }
1094  EXPORT_SYMBOL_GPL(__trace_array_puts);
1095  
1096  /**
1097   * __trace_puts - write a constant string into the trace buffer.
1098   * @ip:	   The address of the caller
1099   * @str:   The constant string to write
1100   * @size:  The size of the string.
1101   */
__trace_puts(unsigned long ip,const char * str,int size)1102  int __trace_puts(unsigned long ip, const char *str, int size)
1103  {
1104  	return __trace_array_puts(&global_trace, ip, str, size);
1105  }
1106  EXPORT_SYMBOL_GPL(__trace_puts);
1107  
1108  /**
1109   * __trace_bputs - write the pointer to a constant string into trace buffer
1110   * @ip:	   The address of the caller
1111   * @str:   The constant string to write to the buffer to
1112   */
__trace_bputs(unsigned long ip,const char * str)1113  int __trace_bputs(unsigned long ip, const char *str)
1114  {
1115  	struct ring_buffer_event *event;
1116  	struct trace_buffer *buffer;
1117  	struct bputs_entry *entry;
1118  	unsigned int trace_ctx;
1119  	int size = sizeof(struct bputs_entry);
1120  	int ret = 0;
1121  
1122  	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1123  		return 0;
1124  
1125  	if (unlikely(tracing_selftest_running || tracing_disabled))
1126  		return 0;
1127  
1128  	trace_ctx = tracing_gen_ctx();
1129  	buffer = global_trace.array_buffer.buffer;
1130  
1131  	ring_buffer_nest_start(buffer);
1132  	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1133  					    trace_ctx);
1134  	if (!event)
1135  		goto out;
1136  
1137  	entry = ring_buffer_event_data(event);
1138  	entry->ip			= ip;
1139  	entry->str			= str;
1140  
1141  	__buffer_unlock_commit(buffer, event);
1142  	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1143  
1144  	ret = 1;
1145   out:
1146  	ring_buffer_nest_end(buffer);
1147  	return ret;
1148  }
1149  EXPORT_SYMBOL_GPL(__trace_bputs);
1150  
1151  #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1152  static void tracing_snapshot_instance_cond(struct trace_array *tr,
1153  					   void *cond_data)
1154  {
1155  	struct tracer *tracer = tr->current_trace;
1156  	unsigned long flags;
1157  
1158  	if (in_nmi()) {
1159  		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1160  		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1161  		return;
1162  	}
1163  
1164  	if (!tr->allocated_snapshot) {
1165  		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1166  		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1167  		tracer_tracing_off(tr);
1168  		return;
1169  	}
1170  
1171  	/* Note, snapshot can not be used when the tracer uses it */
1172  	if (tracer->use_max_tr) {
1173  		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1174  		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1175  		return;
1176  	}
1177  
1178  	local_irq_save(flags);
1179  	update_max_tr(tr, current, smp_processor_id(), cond_data);
1180  	local_irq_restore(flags);
1181  }
1182  
tracing_snapshot_instance(struct trace_array * tr)1183  void tracing_snapshot_instance(struct trace_array *tr)
1184  {
1185  	tracing_snapshot_instance_cond(tr, NULL);
1186  }
1187  
1188  /**
1189   * tracing_snapshot - take a snapshot of the current buffer.
1190   *
1191   * This causes a swap between the snapshot buffer and the current live
1192   * tracing buffer. You can use this to take snapshots of the live
1193   * trace when some condition is triggered, but continue to trace.
1194   *
1195   * Note, make sure to allocate the snapshot with either
1196   * a tracing_snapshot_alloc(), or by doing it manually
1197   * with: echo 1 > /sys/kernel/tracing/snapshot
1198   *
1199   * If the snapshot buffer is not allocated, it will stop tracing.
1200   * Basically making a permanent snapshot.
1201   */
tracing_snapshot(void)1202  void tracing_snapshot(void)
1203  {
1204  	struct trace_array *tr = &global_trace;
1205  
1206  	tracing_snapshot_instance(tr);
1207  }
1208  EXPORT_SYMBOL_GPL(tracing_snapshot);
1209  
1210  /**
1211   * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1212   * @tr:		The tracing instance to snapshot
1213   * @cond_data:	The data to be tested conditionally, and possibly saved
1214   *
1215   * This is the same as tracing_snapshot() except that the snapshot is
1216   * conditional - the snapshot will only happen if the
1217   * cond_snapshot.update() implementation receiving the cond_data
1218   * returns true, which means that the trace array's cond_snapshot
1219   * update() operation used the cond_data to determine whether the
1220   * snapshot should be taken, and if it was, presumably saved it along
1221   * with the snapshot.
1222   */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1223  void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1224  {
1225  	tracing_snapshot_instance_cond(tr, cond_data);
1226  }
1227  EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1228  
1229  /**
1230   * tracing_cond_snapshot_data - get the user data associated with a snapshot
1231   * @tr:		The tracing instance
1232   *
1233   * When the user enables a conditional snapshot using
1234   * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1235   * with the snapshot.  This accessor is used to retrieve it.
1236   *
1237   * Should not be called from cond_snapshot.update(), since it takes
1238   * the tr->max_lock lock, which the code calling
1239   * cond_snapshot.update() has already done.
1240   *
1241   * Returns the cond_data associated with the trace array's snapshot.
1242   */
tracing_cond_snapshot_data(struct trace_array * tr)1243  void *tracing_cond_snapshot_data(struct trace_array *tr)
1244  {
1245  	void *cond_data = NULL;
1246  
1247  	local_irq_disable();
1248  	arch_spin_lock(&tr->max_lock);
1249  
1250  	if (tr->cond_snapshot)
1251  		cond_data = tr->cond_snapshot->cond_data;
1252  
1253  	arch_spin_unlock(&tr->max_lock);
1254  	local_irq_enable();
1255  
1256  	return cond_data;
1257  }
1258  EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1259  
1260  static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1261  					struct array_buffer *size_buf, int cpu_id);
1262  static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1263  
tracing_alloc_snapshot_instance(struct trace_array * tr)1264  int tracing_alloc_snapshot_instance(struct trace_array *tr)
1265  {
1266  	int ret;
1267  
1268  	if (!tr->allocated_snapshot) {
1269  
1270  		/* allocate spare buffer */
1271  		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1272  				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1273  		if (ret < 0)
1274  			return ret;
1275  
1276  		tr->allocated_snapshot = true;
1277  	}
1278  
1279  	return 0;
1280  }
1281  
free_snapshot(struct trace_array * tr)1282  static void free_snapshot(struct trace_array *tr)
1283  {
1284  	/*
1285  	 * We don't free the ring buffer. instead, resize it because
1286  	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1287  	 * we want preserve it.
1288  	 */
1289  	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1290  	set_buffer_entries(&tr->max_buffer, 1);
1291  	tracing_reset_online_cpus(&tr->max_buffer);
1292  	tr->allocated_snapshot = false;
1293  }
1294  
1295  /**
1296   * tracing_alloc_snapshot - allocate snapshot buffer.
1297   *
1298   * This only allocates the snapshot buffer if it isn't already
1299   * allocated - it doesn't also take a snapshot.
1300   *
1301   * This is meant to be used in cases where the snapshot buffer needs
1302   * to be set up for events that can't sleep but need to be able to
1303   * trigger a snapshot.
1304   */
tracing_alloc_snapshot(void)1305  int tracing_alloc_snapshot(void)
1306  {
1307  	struct trace_array *tr = &global_trace;
1308  	int ret;
1309  
1310  	ret = tracing_alloc_snapshot_instance(tr);
1311  	WARN_ON(ret < 0);
1312  
1313  	return ret;
1314  }
1315  EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1316  
1317  /**
1318   * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1319   *
1320   * This is similar to tracing_snapshot(), but it will allocate the
1321   * snapshot buffer if it isn't already allocated. Use this only
1322   * where it is safe to sleep, as the allocation may sleep.
1323   *
1324   * This causes a swap between the snapshot buffer and the current live
1325   * tracing buffer. You can use this to take snapshots of the live
1326   * trace when some condition is triggered, but continue to trace.
1327   */
tracing_snapshot_alloc(void)1328  void tracing_snapshot_alloc(void)
1329  {
1330  	int ret;
1331  
1332  	ret = tracing_alloc_snapshot();
1333  	if (ret < 0)
1334  		return;
1335  
1336  	tracing_snapshot();
1337  }
1338  EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1339  
1340  /**
1341   * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1342   * @tr:		The tracing instance
1343   * @cond_data:	User data to associate with the snapshot
1344   * @update:	Implementation of the cond_snapshot update function
1345   *
1346   * Check whether the conditional snapshot for the given instance has
1347   * already been enabled, or if the current tracer is already using a
1348   * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1349   * save the cond_data and update function inside.
1350   *
1351   * Returns 0 if successful, error otherwise.
1352   */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1353  int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1354  				 cond_update_fn_t update)
1355  {
1356  	struct cond_snapshot *cond_snapshot;
1357  	int ret = 0;
1358  
1359  	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1360  	if (!cond_snapshot)
1361  		return -ENOMEM;
1362  
1363  	cond_snapshot->cond_data = cond_data;
1364  	cond_snapshot->update = update;
1365  
1366  	mutex_lock(&trace_types_lock);
1367  
1368  	ret = tracing_alloc_snapshot_instance(tr);
1369  	if (ret)
1370  		goto fail_unlock;
1371  
1372  	if (tr->current_trace->use_max_tr) {
1373  		ret = -EBUSY;
1374  		goto fail_unlock;
1375  	}
1376  
1377  	/*
1378  	 * The cond_snapshot can only change to NULL without the
1379  	 * trace_types_lock. We don't care if we race with it going
1380  	 * to NULL, but we want to make sure that it's not set to
1381  	 * something other than NULL when we get here, which we can
1382  	 * do safely with only holding the trace_types_lock and not
1383  	 * having to take the max_lock.
1384  	 */
1385  	if (tr->cond_snapshot) {
1386  		ret = -EBUSY;
1387  		goto fail_unlock;
1388  	}
1389  
1390  	local_irq_disable();
1391  	arch_spin_lock(&tr->max_lock);
1392  	tr->cond_snapshot = cond_snapshot;
1393  	arch_spin_unlock(&tr->max_lock);
1394  	local_irq_enable();
1395  
1396  	mutex_unlock(&trace_types_lock);
1397  
1398  	return ret;
1399  
1400   fail_unlock:
1401  	mutex_unlock(&trace_types_lock);
1402  	kfree(cond_snapshot);
1403  	return ret;
1404  }
1405  EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1406  
1407  /**
1408   * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1409   * @tr:		The tracing instance
1410   *
1411   * Check whether the conditional snapshot for the given instance is
1412   * enabled; if so, free the cond_snapshot associated with it,
1413   * otherwise return -EINVAL.
1414   *
1415   * Returns 0 if successful, error otherwise.
1416   */
tracing_snapshot_cond_disable(struct trace_array * tr)1417  int tracing_snapshot_cond_disable(struct trace_array *tr)
1418  {
1419  	int ret = 0;
1420  
1421  	local_irq_disable();
1422  	arch_spin_lock(&tr->max_lock);
1423  
1424  	if (!tr->cond_snapshot)
1425  		ret = -EINVAL;
1426  	else {
1427  		kfree(tr->cond_snapshot);
1428  		tr->cond_snapshot = NULL;
1429  	}
1430  
1431  	arch_spin_unlock(&tr->max_lock);
1432  	local_irq_enable();
1433  
1434  	return ret;
1435  }
1436  EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1437  #else
tracing_snapshot(void)1438  void tracing_snapshot(void)
1439  {
1440  	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1441  }
1442  EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1443  void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1444  {
1445  	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1446  }
1447  EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1448  int tracing_alloc_snapshot(void)
1449  {
1450  	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1451  	return -ENODEV;
1452  }
1453  EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1454  void tracing_snapshot_alloc(void)
1455  {
1456  	/* Give warning */
1457  	tracing_snapshot();
1458  }
1459  EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1460  void *tracing_cond_snapshot_data(struct trace_array *tr)
1461  {
1462  	return NULL;
1463  }
1464  EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1465  int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1466  {
1467  	return -ENODEV;
1468  }
1469  EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1470  int tracing_snapshot_cond_disable(struct trace_array *tr)
1471  {
1472  	return false;
1473  }
1474  EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1475  #define free_snapshot(tr)	do { } while (0)
1476  #endif /* CONFIG_TRACER_SNAPSHOT */
1477  
tracer_tracing_off(struct trace_array * tr)1478  void tracer_tracing_off(struct trace_array *tr)
1479  {
1480  	if (tr->array_buffer.buffer)
1481  		ring_buffer_record_off(tr->array_buffer.buffer);
1482  	/*
1483  	 * This flag is looked at when buffers haven't been allocated
1484  	 * yet, or by some tracers (like irqsoff), that just want to
1485  	 * know if the ring buffer has been disabled, but it can handle
1486  	 * races of where it gets disabled but we still do a record.
1487  	 * As the check is in the fast path of the tracers, it is more
1488  	 * important to be fast than accurate.
1489  	 */
1490  	tr->buffer_disabled = 1;
1491  	/* Make the flag seen by readers */
1492  	smp_wmb();
1493  }
1494  
1495  /**
1496   * tracing_off - turn off tracing buffers
1497   *
1498   * This function stops the tracing buffers from recording data.
1499   * It does not disable any overhead the tracers themselves may
1500   * be causing. This function simply causes all recording to
1501   * the ring buffers to fail.
1502   */
tracing_off(void)1503  void tracing_off(void)
1504  {
1505  	tracer_tracing_off(&global_trace);
1506  }
1507  EXPORT_SYMBOL_GPL(tracing_off);
1508  
disable_trace_on_warning(void)1509  void disable_trace_on_warning(void)
1510  {
1511  	if (__disable_trace_on_warning) {
1512  		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1513  			"Disabling tracing due to warning\n");
1514  		tracing_off();
1515  	}
1516  }
1517  
1518  /**
1519   * tracer_tracing_is_on - show real state of ring buffer enabled
1520   * @tr : the trace array to know if ring buffer is enabled
1521   *
1522   * Shows real state of the ring buffer if it is enabled or not.
1523   */
tracer_tracing_is_on(struct trace_array * tr)1524  bool tracer_tracing_is_on(struct trace_array *tr)
1525  {
1526  	if (tr->array_buffer.buffer)
1527  		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1528  	return !tr->buffer_disabled;
1529  }
1530  
1531  /**
1532   * tracing_is_on - show state of ring buffers enabled
1533   */
tracing_is_on(void)1534  int tracing_is_on(void)
1535  {
1536  	return tracer_tracing_is_on(&global_trace);
1537  }
1538  EXPORT_SYMBOL_GPL(tracing_is_on);
1539  
set_buf_size(char * str)1540  static int __init set_buf_size(char *str)
1541  {
1542  	unsigned long buf_size;
1543  
1544  	if (!str)
1545  		return 0;
1546  	buf_size = memparse(str, &str);
1547  	/*
1548  	 * nr_entries can not be zero and the startup
1549  	 * tests require some buffer space. Therefore
1550  	 * ensure we have at least 4096 bytes of buffer.
1551  	 */
1552  	trace_buf_size = max(4096UL, buf_size);
1553  	return 1;
1554  }
1555  __setup("trace_buf_size=", set_buf_size);
1556  
set_tracing_thresh(char * str)1557  static int __init set_tracing_thresh(char *str)
1558  {
1559  	unsigned long threshold;
1560  	int ret;
1561  
1562  	if (!str)
1563  		return 0;
1564  	ret = kstrtoul(str, 0, &threshold);
1565  	if (ret < 0)
1566  		return 0;
1567  	tracing_thresh = threshold * 1000;
1568  	return 1;
1569  }
1570  __setup("tracing_thresh=", set_tracing_thresh);
1571  
nsecs_to_usecs(unsigned long nsecs)1572  unsigned long nsecs_to_usecs(unsigned long nsecs)
1573  {
1574  	return nsecs / 1000;
1575  }
1576  
1577  /*
1578   * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1579   * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1580   * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1581   * of strings in the order that the evals (enum) were defined.
1582   */
1583  #undef C
1584  #define C(a, b) b
1585  
1586  /* These must match the bit positions in trace_iterator_flags */
1587  static const char *trace_options[] = {
1588  	TRACE_FLAGS
1589  	NULL
1590  };
1591  
1592  static struct {
1593  	u64 (*func)(void);
1594  	const char *name;
1595  	int in_ns;		/* is this clock in nanoseconds? */
1596  } trace_clocks[] = {
1597  	{ trace_clock_local,		"local",	1 },
1598  	{ trace_clock_global,		"global",	1 },
1599  	{ trace_clock_counter,		"counter",	0 },
1600  	{ trace_clock_jiffies,		"uptime",	0 },
1601  	{ trace_clock,			"perf",		1 },
1602  	{ ktime_get_mono_fast_ns,	"mono",		1 },
1603  	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1604  	{ ktime_get_boot_fast_ns,	"boot",		1 },
1605  	{ ktime_get_tai_fast_ns,	"tai",		1 },
1606  	ARCH_TRACE_CLOCKS
1607  };
1608  
trace_clock_in_ns(struct trace_array * tr)1609  bool trace_clock_in_ns(struct trace_array *tr)
1610  {
1611  	if (trace_clocks[tr->clock_id].in_ns)
1612  		return true;
1613  
1614  	return false;
1615  }
1616  
1617  /*
1618   * trace_parser_get_init - gets the buffer for trace parser
1619   */
trace_parser_get_init(struct trace_parser * parser,int size)1620  int trace_parser_get_init(struct trace_parser *parser, int size)
1621  {
1622  	memset(parser, 0, sizeof(*parser));
1623  
1624  	parser->buffer = kmalloc(size, GFP_KERNEL);
1625  	if (!parser->buffer)
1626  		return 1;
1627  
1628  	parser->size = size;
1629  	return 0;
1630  }
1631  
1632  /*
1633   * trace_parser_put - frees the buffer for trace parser
1634   */
trace_parser_put(struct trace_parser * parser)1635  void trace_parser_put(struct trace_parser *parser)
1636  {
1637  	kfree(parser->buffer);
1638  	parser->buffer = NULL;
1639  }
1640  
1641  /*
1642   * trace_get_user - reads the user input string separated by  space
1643   * (matched by isspace(ch))
1644   *
1645   * For each string found the 'struct trace_parser' is updated,
1646   * and the function returns.
1647   *
1648   * Returns number of bytes read.
1649   *
1650   * See kernel/trace/trace.h for 'struct trace_parser' details.
1651   */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1652  int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1653  	size_t cnt, loff_t *ppos)
1654  {
1655  	char ch;
1656  	size_t read = 0;
1657  	ssize_t ret;
1658  
1659  	if (!*ppos)
1660  		trace_parser_clear(parser);
1661  
1662  	ret = get_user(ch, ubuf++);
1663  	if (ret)
1664  		goto out;
1665  
1666  	read++;
1667  	cnt--;
1668  
1669  	/*
1670  	 * The parser is not finished with the last write,
1671  	 * continue reading the user input without skipping spaces.
1672  	 */
1673  	if (!parser->cont) {
1674  		/* skip white space */
1675  		while (cnt && isspace(ch)) {
1676  			ret = get_user(ch, ubuf++);
1677  			if (ret)
1678  				goto out;
1679  			read++;
1680  			cnt--;
1681  		}
1682  
1683  		parser->idx = 0;
1684  
1685  		/* only spaces were written */
1686  		if (isspace(ch) || !ch) {
1687  			*ppos += read;
1688  			ret = read;
1689  			goto out;
1690  		}
1691  	}
1692  
1693  	/* read the non-space input */
1694  	while (cnt && !isspace(ch) && ch) {
1695  		if (parser->idx < parser->size - 1)
1696  			parser->buffer[parser->idx++] = ch;
1697  		else {
1698  			ret = -EINVAL;
1699  			goto out;
1700  		}
1701  		ret = get_user(ch, ubuf++);
1702  		if (ret)
1703  			goto out;
1704  		read++;
1705  		cnt--;
1706  	}
1707  
1708  	/* We either got finished input or we have to wait for another call. */
1709  	if (isspace(ch) || !ch) {
1710  		parser->buffer[parser->idx] = 0;
1711  		parser->cont = false;
1712  	} else if (parser->idx < parser->size - 1) {
1713  		parser->cont = true;
1714  		parser->buffer[parser->idx++] = ch;
1715  		/* Make sure the parsed string always terminates with '\0'. */
1716  		parser->buffer[parser->idx] = 0;
1717  	} else {
1718  		ret = -EINVAL;
1719  		goto out;
1720  	}
1721  
1722  	*ppos += read;
1723  	ret = read;
1724  
1725  out:
1726  	return ret;
1727  }
1728  
1729  /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1730  static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1731  {
1732  	int len;
1733  
1734  	if (trace_seq_used(s) <= s->readpos)
1735  		return -EBUSY;
1736  
1737  	len = trace_seq_used(s) - s->readpos;
1738  	if (cnt > len)
1739  		cnt = len;
1740  	memcpy(buf, s->buffer + s->readpos, cnt);
1741  
1742  	s->readpos += cnt;
1743  	return cnt;
1744  }
1745  
1746  unsigned long __read_mostly	tracing_thresh;
1747  
1748  #ifdef CONFIG_TRACER_MAX_TRACE
1749  static const struct file_operations tracing_max_lat_fops;
1750  
1751  #ifdef LATENCY_FS_NOTIFY
1752  
1753  static struct workqueue_struct *fsnotify_wq;
1754  
latency_fsnotify_workfn(struct work_struct * work)1755  static void latency_fsnotify_workfn(struct work_struct *work)
1756  {
1757  	struct trace_array *tr = container_of(work, struct trace_array,
1758  					      fsnotify_work);
1759  	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1760  }
1761  
latency_fsnotify_workfn_irq(struct irq_work * iwork)1762  static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1763  {
1764  	struct trace_array *tr = container_of(iwork, struct trace_array,
1765  					      fsnotify_irqwork);
1766  	queue_work(fsnotify_wq, &tr->fsnotify_work);
1767  }
1768  
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1769  static void trace_create_maxlat_file(struct trace_array *tr,
1770  				     struct dentry *d_tracer)
1771  {
1772  	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1773  	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1774  	tr->d_max_latency = trace_create_file("tracing_max_latency",
1775  					      TRACE_MODE_WRITE,
1776  					      d_tracer, tr,
1777  					      &tracing_max_lat_fops);
1778  }
1779  
latency_fsnotify_init(void)1780  __init static int latency_fsnotify_init(void)
1781  {
1782  	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1783  				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1784  	if (!fsnotify_wq) {
1785  		pr_err("Unable to allocate tr_max_lat_wq\n");
1786  		return -ENOMEM;
1787  	}
1788  	return 0;
1789  }
1790  
1791  late_initcall_sync(latency_fsnotify_init);
1792  
latency_fsnotify(struct trace_array * tr)1793  void latency_fsnotify(struct trace_array *tr)
1794  {
1795  	if (!fsnotify_wq)
1796  		return;
1797  	/*
1798  	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1799  	 * possible that we are called from __schedule() or do_idle(), which
1800  	 * could cause a deadlock.
1801  	 */
1802  	irq_work_queue(&tr->fsnotify_irqwork);
1803  }
1804  
1805  #else /* !LATENCY_FS_NOTIFY */
1806  
1807  #define trace_create_maxlat_file(tr, d_tracer)				\
1808  	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1809  			  d_tracer, tr, &tracing_max_lat_fops)
1810  
1811  #endif
1812  
1813  /*
1814   * Copy the new maximum trace into the separate maximum-trace
1815   * structure. (this way the maximum trace is permanently saved,
1816   * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1817   */
1818  static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1819  __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1820  {
1821  	struct array_buffer *trace_buf = &tr->array_buffer;
1822  	struct array_buffer *max_buf = &tr->max_buffer;
1823  	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1824  	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1825  
1826  	max_buf->cpu = cpu;
1827  	max_buf->time_start = data->preempt_timestamp;
1828  
1829  	max_data->saved_latency = tr->max_latency;
1830  	max_data->critical_start = data->critical_start;
1831  	max_data->critical_end = data->critical_end;
1832  
1833  	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1834  	max_data->pid = tsk->pid;
1835  	/*
1836  	 * If tsk == current, then use current_uid(), as that does not use
1837  	 * RCU. The irq tracer can be called out of RCU scope.
1838  	 */
1839  	if (tsk == current)
1840  		max_data->uid = current_uid();
1841  	else
1842  		max_data->uid = task_uid(tsk);
1843  
1844  	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1845  	max_data->policy = tsk->policy;
1846  	max_data->rt_priority = tsk->rt_priority;
1847  
1848  	/* record this tasks comm */
1849  	tracing_record_cmdline(tsk);
1850  	latency_fsnotify(tr);
1851  }
1852  
1853  /**
1854   * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1855   * @tr: tracer
1856   * @tsk: the task with the latency
1857   * @cpu: The cpu that initiated the trace.
1858   * @cond_data: User data associated with a conditional snapshot
1859   *
1860   * Flip the buffers between the @tr and the max_tr and record information
1861   * about which task was the cause of this latency.
1862   */
1863  void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1864  update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1865  	      void *cond_data)
1866  {
1867  	if (tr->stop_count)
1868  		return;
1869  
1870  	WARN_ON_ONCE(!irqs_disabled());
1871  
1872  	if (!tr->allocated_snapshot) {
1873  		/* Only the nop tracer should hit this when disabling */
1874  		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1875  		return;
1876  	}
1877  
1878  	arch_spin_lock(&tr->max_lock);
1879  
1880  	/* Inherit the recordable setting from array_buffer */
1881  	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1882  		ring_buffer_record_on(tr->max_buffer.buffer);
1883  	else
1884  		ring_buffer_record_off(tr->max_buffer.buffer);
1885  
1886  #ifdef CONFIG_TRACER_SNAPSHOT
1887  	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1888  		arch_spin_unlock(&tr->max_lock);
1889  		return;
1890  	}
1891  #endif
1892  	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1893  
1894  	__update_max_tr(tr, tsk, cpu);
1895  
1896  	arch_spin_unlock(&tr->max_lock);
1897  
1898  	/* Any waiters on the old snapshot buffer need to wake up */
1899  	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1900  }
1901  
1902  /**
1903   * update_max_tr_single - only copy one trace over, and reset the rest
1904   * @tr: tracer
1905   * @tsk: task with the latency
1906   * @cpu: the cpu of the buffer to copy.
1907   *
1908   * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1909   */
1910  void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1911  update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1912  {
1913  	int ret;
1914  
1915  	if (tr->stop_count)
1916  		return;
1917  
1918  	WARN_ON_ONCE(!irqs_disabled());
1919  	if (!tr->allocated_snapshot) {
1920  		/* Only the nop tracer should hit this when disabling */
1921  		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1922  		return;
1923  	}
1924  
1925  	arch_spin_lock(&tr->max_lock);
1926  
1927  	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1928  
1929  	if (ret == -EBUSY) {
1930  		/*
1931  		 * We failed to swap the buffer due to a commit taking
1932  		 * place on this CPU. We fail to record, but we reset
1933  		 * the max trace buffer (no one writes directly to it)
1934  		 * and flag that it failed.
1935  		 * Another reason is resize is in progress.
1936  		 */
1937  		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1938  			"Failed to swap buffers due to commit or resize in progress\n");
1939  	}
1940  
1941  	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1942  
1943  	__update_max_tr(tr, tsk, cpu);
1944  	arch_spin_unlock(&tr->max_lock);
1945  }
1946  
1947  #endif /* CONFIG_TRACER_MAX_TRACE */
1948  
wait_on_pipe(struct trace_iterator * iter,int full)1949  static int wait_on_pipe(struct trace_iterator *iter, int full)
1950  {
1951  	int ret;
1952  
1953  	/* Iterators are static, they should be filled or empty */
1954  	if (trace_buffer_iter(iter, iter->cpu_file))
1955  		return 0;
1956  
1957  	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
1958  
1959  #ifdef CONFIG_TRACER_MAX_TRACE
1960  	/*
1961  	 * Make sure this is still the snapshot buffer, as if a snapshot were
1962  	 * to happen, this would now be the main buffer.
1963  	 */
1964  	if (iter->snapshot)
1965  		iter->array_buffer = &iter->tr->max_buffer;
1966  #endif
1967  	return ret;
1968  }
1969  
1970  #ifdef CONFIG_FTRACE_STARTUP_TEST
1971  static bool selftests_can_run;
1972  
1973  struct trace_selftests {
1974  	struct list_head		list;
1975  	struct tracer			*type;
1976  };
1977  
1978  static LIST_HEAD(postponed_selftests);
1979  
save_selftest(struct tracer * type)1980  static int save_selftest(struct tracer *type)
1981  {
1982  	struct trace_selftests *selftest;
1983  
1984  	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1985  	if (!selftest)
1986  		return -ENOMEM;
1987  
1988  	selftest->type = type;
1989  	list_add(&selftest->list, &postponed_selftests);
1990  	return 0;
1991  }
1992  
run_tracer_selftest(struct tracer * type)1993  static int run_tracer_selftest(struct tracer *type)
1994  {
1995  	struct trace_array *tr = &global_trace;
1996  	struct tracer *saved_tracer = tr->current_trace;
1997  	int ret;
1998  
1999  	if (!type->selftest || tracing_selftest_disabled)
2000  		return 0;
2001  
2002  	/*
2003  	 * If a tracer registers early in boot up (before scheduling is
2004  	 * initialized and such), then do not run its selftests yet.
2005  	 * Instead, run it a little later in the boot process.
2006  	 */
2007  	if (!selftests_can_run)
2008  		return save_selftest(type);
2009  
2010  	if (!tracing_is_on()) {
2011  		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2012  			type->name);
2013  		return 0;
2014  	}
2015  
2016  	/*
2017  	 * Run a selftest on this tracer.
2018  	 * Here we reset the trace buffer, and set the current
2019  	 * tracer to be this tracer. The tracer can then run some
2020  	 * internal tracing to verify that everything is in order.
2021  	 * If we fail, we do not register this tracer.
2022  	 */
2023  	tracing_reset_online_cpus(&tr->array_buffer);
2024  
2025  	tr->current_trace = type;
2026  
2027  #ifdef CONFIG_TRACER_MAX_TRACE
2028  	if (type->use_max_tr) {
2029  		/* If we expanded the buffers, make sure the max is expanded too */
2030  		if (ring_buffer_expanded)
2031  			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2032  					   RING_BUFFER_ALL_CPUS);
2033  		tr->allocated_snapshot = true;
2034  	}
2035  #endif
2036  
2037  	/* the test is responsible for initializing and enabling */
2038  	pr_info("Testing tracer %s: ", type->name);
2039  	ret = type->selftest(type, tr);
2040  	/* the test is responsible for resetting too */
2041  	tr->current_trace = saved_tracer;
2042  	if (ret) {
2043  		printk(KERN_CONT "FAILED!\n");
2044  		/* Add the warning after printing 'FAILED' */
2045  		WARN_ON(1);
2046  		return -1;
2047  	}
2048  	/* Only reset on passing, to avoid touching corrupted buffers */
2049  	tracing_reset_online_cpus(&tr->array_buffer);
2050  
2051  #ifdef CONFIG_TRACER_MAX_TRACE
2052  	if (type->use_max_tr) {
2053  		tr->allocated_snapshot = false;
2054  
2055  		/* Shrink the max buffer again */
2056  		if (ring_buffer_expanded)
2057  			ring_buffer_resize(tr->max_buffer.buffer, 1,
2058  					   RING_BUFFER_ALL_CPUS);
2059  	}
2060  #endif
2061  
2062  	printk(KERN_CONT "PASSED\n");
2063  	return 0;
2064  }
2065  
do_run_tracer_selftest(struct tracer * type)2066  static int do_run_tracer_selftest(struct tracer *type)
2067  {
2068  	int ret;
2069  
2070  	/*
2071  	 * Tests can take a long time, especially if they are run one after the
2072  	 * other, as does happen during bootup when all the tracers are
2073  	 * registered. This could cause the soft lockup watchdog to trigger.
2074  	 */
2075  	cond_resched();
2076  
2077  	tracing_selftest_running = true;
2078  	ret = run_tracer_selftest(type);
2079  	tracing_selftest_running = false;
2080  
2081  	return ret;
2082  }
2083  
init_trace_selftests(void)2084  static __init int init_trace_selftests(void)
2085  {
2086  	struct trace_selftests *p, *n;
2087  	struct tracer *t, **last;
2088  	int ret;
2089  
2090  	selftests_can_run = true;
2091  
2092  	mutex_lock(&trace_types_lock);
2093  
2094  	if (list_empty(&postponed_selftests))
2095  		goto out;
2096  
2097  	pr_info("Running postponed tracer tests:\n");
2098  
2099  	tracing_selftest_running = true;
2100  	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2101  		/* This loop can take minutes when sanitizers are enabled, so
2102  		 * lets make sure we allow RCU processing.
2103  		 */
2104  		cond_resched();
2105  		ret = run_tracer_selftest(p->type);
2106  		/* If the test fails, then warn and remove from available_tracers */
2107  		if (ret < 0) {
2108  			WARN(1, "tracer: %s failed selftest, disabling\n",
2109  			     p->type->name);
2110  			last = &trace_types;
2111  			for (t = trace_types; t; t = t->next) {
2112  				if (t == p->type) {
2113  					*last = t->next;
2114  					break;
2115  				}
2116  				last = &t->next;
2117  			}
2118  		}
2119  		list_del(&p->list);
2120  		kfree(p);
2121  	}
2122  	tracing_selftest_running = false;
2123  
2124   out:
2125  	mutex_unlock(&trace_types_lock);
2126  
2127  	return 0;
2128  }
2129  core_initcall(init_trace_selftests);
2130  #else
run_tracer_selftest(struct tracer * type)2131  static inline int run_tracer_selftest(struct tracer *type)
2132  {
2133  	return 0;
2134  }
do_run_tracer_selftest(struct tracer * type)2135  static inline int do_run_tracer_selftest(struct tracer *type)
2136  {
2137  	return 0;
2138  }
2139  #endif /* CONFIG_FTRACE_STARTUP_TEST */
2140  
2141  static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2142  
2143  static void __init apply_trace_boot_options(void);
2144  
2145  /**
2146   * register_tracer - register a tracer with the ftrace system.
2147   * @type: the plugin for the tracer
2148   *
2149   * Register a new plugin tracer.
2150   */
register_tracer(struct tracer * type)2151  int __init register_tracer(struct tracer *type)
2152  {
2153  	struct tracer *t;
2154  	int ret = 0;
2155  
2156  	if (!type->name) {
2157  		pr_info("Tracer must have a name\n");
2158  		return -1;
2159  	}
2160  
2161  	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2162  		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2163  		return -1;
2164  	}
2165  
2166  	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2167  		pr_warn("Can not register tracer %s due to lockdown\n",
2168  			   type->name);
2169  		return -EPERM;
2170  	}
2171  
2172  	mutex_lock(&trace_types_lock);
2173  
2174  	for (t = trace_types; t; t = t->next) {
2175  		if (strcmp(type->name, t->name) == 0) {
2176  			/* already found */
2177  			pr_info("Tracer %s already registered\n",
2178  				type->name);
2179  			ret = -1;
2180  			goto out;
2181  		}
2182  	}
2183  
2184  	if (!type->set_flag)
2185  		type->set_flag = &dummy_set_flag;
2186  	if (!type->flags) {
2187  		/*allocate a dummy tracer_flags*/
2188  		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2189  		if (!type->flags) {
2190  			ret = -ENOMEM;
2191  			goto out;
2192  		}
2193  		type->flags->val = 0;
2194  		type->flags->opts = dummy_tracer_opt;
2195  	} else
2196  		if (!type->flags->opts)
2197  			type->flags->opts = dummy_tracer_opt;
2198  
2199  	/* store the tracer for __set_tracer_option */
2200  	type->flags->trace = type;
2201  
2202  	ret = do_run_tracer_selftest(type);
2203  	if (ret < 0)
2204  		goto out;
2205  
2206  	type->next = trace_types;
2207  	trace_types = type;
2208  	add_tracer_options(&global_trace, type);
2209  
2210   out:
2211  	mutex_unlock(&trace_types_lock);
2212  
2213  	if (ret || !default_bootup_tracer)
2214  		goto out_unlock;
2215  
2216  	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2217  		goto out_unlock;
2218  
2219  	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2220  	/* Do we want this tracer to start on bootup? */
2221  	tracing_set_tracer(&global_trace, type->name);
2222  	default_bootup_tracer = NULL;
2223  
2224  	apply_trace_boot_options();
2225  
2226  	/* disable other selftests, since this will break it. */
2227  	disable_tracing_selftest("running a tracer");
2228  
2229   out_unlock:
2230  	return ret;
2231  }
2232  
tracing_reset_cpu(struct array_buffer * buf,int cpu)2233  static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2234  {
2235  	struct trace_buffer *buffer = buf->buffer;
2236  
2237  	if (!buffer)
2238  		return;
2239  
2240  	ring_buffer_record_disable(buffer);
2241  
2242  	/* Make sure all commits have finished */
2243  	synchronize_rcu();
2244  	ring_buffer_reset_cpu(buffer, cpu);
2245  
2246  	ring_buffer_record_enable(buffer);
2247  }
2248  
tracing_reset_online_cpus(struct array_buffer * buf)2249  void tracing_reset_online_cpus(struct array_buffer *buf)
2250  {
2251  	struct trace_buffer *buffer = buf->buffer;
2252  
2253  	if (!buffer)
2254  		return;
2255  
2256  	ring_buffer_record_disable(buffer);
2257  
2258  	/* Make sure all commits have finished */
2259  	synchronize_rcu();
2260  
2261  	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2262  
2263  	ring_buffer_reset_online_cpus(buffer);
2264  
2265  	ring_buffer_record_enable(buffer);
2266  }
2267  
2268  /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2269  void tracing_reset_all_online_cpus_unlocked(void)
2270  {
2271  	struct trace_array *tr;
2272  
2273  	lockdep_assert_held(&trace_types_lock);
2274  
2275  	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2276  		if (!tr->clear_trace)
2277  			continue;
2278  		tr->clear_trace = false;
2279  		tracing_reset_online_cpus(&tr->array_buffer);
2280  #ifdef CONFIG_TRACER_MAX_TRACE
2281  		tracing_reset_online_cpus(&tr->max_buffer);
2282  #endif
2283  	}
2284  }
2285  
tracing_reset_all_online_cpus(void)2286  void tracing_reset_all_online_cpus(void)
2287  {
2288  	mutex_lock(&trace_types_lock);
2289  	tracing_reset_all_online_cpus_unlocked();
2290  	mutex_unlock(&trace_types_lock);
2291  }
2292  
2293  /*
2294   * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2295   * is the tgid last observed corresponding to pid=i.
2296   */
2297  static int *tgid_map;
2298  
2299  /* The maximum valid index into tgid_map. */
2300  static size_t tgid_map_max;
2301  
2302  #define SAVED_CMDLINES_DEFAULT 128
2303  #define NO_CMDLINE_MAP UINT_MAX
2304  /*
2305   * Preemption must be disabled before acquiring trace_cmdline_lock.
2306   * The various trace_arrays' max_lock must be acquired in a context
2307   * where interrupt is disabled.
2308   */
2309  static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2310  struct saved_cmdlines_buffer {
2311  	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2312  	unsigned *map_cmdline_to_pid;
2313  	unsigned cmdline_num;
2314  	int cmdline_idx;
2315  	char saved_cmdlines[];
2316  };
2317  static struct saved_cmdlines_buffer *savedcmd;
2318  
2319  /* Holds the size of a cmdline and pid element */
2320  #define SAVED_CMDLINE_MAP_ELEMENT_SIZE(s)			\
2321  	(TASK_COMM_LEN + sizeof((s)->map_cmdline_to_pid[0]))
2322  
get_saved_cmdlines(int idx)2323  static inline char *get_saved_cmdlines(int idx)
2324  {
2325  	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2326  }
2327  
set_cmdline(int idx,const char * cmdline)2328  static inline void set_cmdline(int idx, const char *cmdline)
2329  {
2330  	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2331  }
2332  
free_saved_cmdlines_buffer(struct saved_cmdlines_buffer * s)2333  static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
2334  {
2335  	int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
2336  
2337  	kmemleak_free(s);
2338  	free_pages((unsigned long)s, order);
2339  }
2340  
allocate_cmdlines_buffer(unsigned int val)2341  static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
2342  {
2343  	struct saved_cmdlines_buffer *s;
2344  	struct page *page;
2345  	int orig_size, size;
2346  	int order;
2347  
2348  	/* Figure out how much is needed to hold the given number of cmdlines */
2349  	orig_size = sizeof(*s) + val * SAVED_CMDLINE_MAP_ELEMENT_SIZE(s);
2350  	order = get_order(orig_size);
2351  	size = 1 << (order + PAGE_SHIFT);
2352  	page = alloc_pages(GFP_KERNEL, order);
2353  	if (!page)
2354  		return NULL;
2355  
2356  	s = page_address(page);
2357  	kmemleak_alloc(s, size, 1, GFP_KERNEL);
2358  	memset(s, 0, sizeof(*s));
2359  
2360  	/* Round up to actual allocation */
2361  	val = (size - sizeof(*s)) / SAVED_CMDLINE_MAP_ELEMENT_SIZE(s);
2362  	s->cmdline_num = val;
2363  
2364  	/* Place map_cmdline_to_pid array right after saved_cmdlines */
2365  	s->map_cmdline_to_pid = (unsigned *)&s->saved_cmdlines[val * TASK_COMM_LEN];
2366  
2367  	s->cmdline_idx = 0;
2368  	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2369  	       sizeof(s->map_pid_to_cmdline));
2370  	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2371  	       val * sizeof(*s->map_cmdline_to_pid));
2372  
2373  	return s;
2374  }
2375  
trace_create_savedcmd(void)2376  static int trace_create_savedcmd(void)
2377  {
2378  	savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
2379  
2380  	return savedcmd ? 0 : -ENOMEM;
2381  }
2382  
is_tracing_stopped(void)2383  int is_tracing_stopped(void)
2384  {
2385  	return global_trace.stop_count;
2386  }
2387  
tracing_start_tr(struct trace_array * tr)2388  static void tracing_start_tr(struct trace_array *tr)
2389  {
2390  	struct trace_buffer *buffer;
2391  	unsigned long flags;
2392  
2393  	if (tracing_disabled)
2394  		return;
2395  
2396  	raw_spin_lock_irqsave(&tr->start_lock, flags);
2397  	if (--tr->stop_count) {
2398  		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2399  			/* Someone screwed up their debugging */
2400  			tr->stop_count = 0;
2401  		}
2402  		goto out;
2403  	}
2404  
2405  	/* Prevent the buffers from switching */
2406  	arch_spin_lock(&tr->max_lock);
2407  
2408  	buffer = tr->array_buffer.buffer;
2409  	if (buffer)
2410  		ring_buffer_record_enable(buffer);
2411  
2412  #ifdef CONFIG_TRACER_MAX_TRACE
2413  	buffer = tr->max_buffer.buffer;
2414  	if (buffer)
2415  		ring_buffer_record_enable(buffer);
2416  #endif
2417  
2418  	arch_spin_unlock(&tr->max_lock);
2419  
2420   out:
2421  	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2422  }
2423  
2424  /**
2425   * tracing_start - quick start of the tracer
2426   *
2427   * If tracing is enabled but was stopped by tracing_stop,
2428   * this will start the tracer back up.
2429   */
tracing_start(void)2430  void tracing_start(void)
2431  
2432  {
2433  	return tracing_start_tr(&global_trace);
2434  }
2435  
tracing_stop_tr(struct trace_array * tr)2436  static void tracing_stop_tr(struct trace_array *tr)
2437  {
2438  	struct trace_buffer *buffer;
2439  	unsigned long flags;
2440  
2441  	raw_spin_lock_irqsave(&tr->start_lock, flags);
2442  	if (tr->stop_count++)
2443  		goto out;
2444  
2445  	/* Prevent the buffers from switching */
2446  	arch_spin_lock(&tr->max_lock);
2447  
2448  	buffer = tr->array_buffer.buffer;
2449  	if (buffer)
2450  		ring_buffer_record_disable(buffer);
2451  
2452  #ifdef CONFIG_TRACER_MAX_TRACE
2453  	buffer = tr->max_buffer.buffer;
2454  	if (buffer)
2455  		ring_buffer_record_disable(buffer);
2456  #endif
2457  
2458  	arch_spin_unlock(&tr->max_lock);
2459  
2460   out:
2461  	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2462  }
2463  
2464  /**
2465   * tracing_stop - quick stop of the tracer
2466   *
2467   * Light weight way to stop tracing. Use in conjunction with
2468   * tracing_start.
2469   */
tracing_stop(void)2470  void tracing_stop(void)
2471  {
2472  	return tracing_stop_tr(&global_trace);
2473  }
2474  
trace_save_cmdline(struct task_struct * tsk)2475  static int trace_save_cmdline(struct task_struct *tsk)
2476  {
2477  	unsigned tpid, idx;
2478  
2479  	/* treat recording of idle task as a success */
2480  	if (!tsk->pid)
2481  		return 1;
2482  
2483  	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2484  
2485  	/*
2486  	 * It's not the end of the world if we don't get
2487  	 * the lock, but we also don't want to spin
2488  	 * nor do we want to disable interrupts,
2489  	 * so if we miss here, then better luck next time.
2490  	 *
2491  	 * This is called within the scheduler and wake up, so interrupts
2492  	 * had better been disabled and run queue lock been held.
2493  	 */
2494  	lockdep_assert_preemption_disabled();
2495  	if (!arch_spin_trylock(&trace_cmdline_lock))
2496  		return 0;
2497  
2498  	idx = savedcmd->map_pid_to_cmdline[tpid];
2499  	if (idx == NO_CMDLINE_MAP) {
2500  		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2501  
2502  		savedcmd->map_pid_to_cmdline[tpid] = idx;
2503  		savedcmd->cmdline_idx = idx;
2504  	}
2505  
2506  	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2507  	set_cmdline(idx, tsk->comm);
2508  
2509  	arch_spin_unlock(&trace_cmdline_lock);
2510  
2511  	return 1;
2512  }
2513  
__trace_find_cmdline(int pid,char comm[])2514  static void __trace_find_cmdline(int pid, char comm[])
2515  {
2516  	unsigned map;
2517  	int tpid;
2518  
2519  	if (!pid) {
2520  		strcpy(comm, "<idle>");
2521  		return;
2522  	}
2523  
2524  	if (WARN_ON_ONCE(pid < 0)) {
2525  		strcpy(comm, "<XXX>");
2526  		return;
2527  	}
2528  
2529  	tpid = pid & (PID_MAX_DEFAULT - 1);
2530  	map = savedcmd->map_pid_to_cmdline[tpid];
2531  	if (map != NO_CMDLINE_MAP) {
2532  		tpid = savedcmd->map_cmdline_to_pid[map];
2533  		if (tpid == pid) {
2534  			strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2535  			return;
2536  		}
2537  	}
2538  	strcpy(comm, "<...>");
2539  }
2540  
trace_find_cmdline(int pid,char comm[])2541  void trace_find_cmdline(int pid, char comm[])
2542  {
2543  	preempt_disable();
2544  	arch_spin_lock(&trace_cmdline_lock);
2545  
2546  	__trace_find_cmdline(pid, comm);
2547  
2548  	arch_spin_unlock(&trace_cmdline_lock);
2549  	preempt_enable();
2550  }
2551  
trace_find_tgid_ptr(int pid)2552  static int *trace_find_tgid_ptr(int pid)
2553  {
2554  	/*
2555  	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2556  	 * if we observe a non-NULL tgid_map then we also observe the correct
2557  	 * tgid_map_max.
2558  	 */
2559  	int *map = smp_load_acquire(&tgid_map);
2560  
2561  	if (unlikely(!map || pid > tgid_map_max))
2562  		return NULL;
2563  
2564  	return &map[pid];
2565  }
2566  
trace_find_tgid(int pid)2567  int trace_find_tgid(int pid)
2568  {
2569  	int *ptr = trace_find_tgid_ptr(pid);
2570  
2571  	return ptr ? *ptr : 0;
2572  }
2573  
trace_save_tgid(struct task_struct * tsk)2574  static int trace_save_tgid(struct task_struct *tsk)
2575  {
2576  	int *ptr;
2577  
2578  	/* treat recording of idle task as a success */
2579  	if (!tsk->pid)
2580  		return 1;
2581  
2582  	ptr = trace_find_tgid_ptr(tsk->pid);
2583  	if (!ptr)
2584  		return 0;
2585  
2586  	*ptr = tsk->tgid;
2587  	return 1;
2588  }
2589  
tracing_record_taskinfo_skip(int flags)2590  static bool tracing_record_taskinfo_skip(int flags)
2591  {
2592  	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2593  		return true;
2594  	if (!__this_cpu_read(trace_taskinfo_save))
2595  		return true;
2596  	return false;
2597  }
2598  
2599  /**
2600   * tracing_record_taskinfo - record the task info of a task
2601   *
2602   * @task:  task to record
2603   * @flags: TRACE_RECORD_CMDLINE for recording comm
2604   *         TRACE_RECORD_TGID for recording tgid
2605   */
tracing_record_taskinfo(struct task_struct * task,int flags)2606  void tracing_record_taskinfo(struct task_struct *task, int flags)
2607  {
2608  	bool done;
2609  
2610  	if (tracing_record_taskinfo_skip(flags))
2611  		return;
2612  
2613  	/*
2614  	 * Record as much task information as possible. If some fail, continue
2615  	 * to try to record the others.
2616  	 */
2617  	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2618  	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2619  
2620  	/* If recording any information failed, retry again soon. */
2621  	if (!done)
2622  		return;
2623  
2624  	__this_cpu_write(trace_taskinfo_save, false);
2625  }
2626  
2627  /**
2628   * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2629   *
2630   * @prev: previous task during sched_switch
2631   * @next: next task during sched_switch
2632   * @flags: TRACE_RECORD_CMDLINE for recording comm
2633   *         TRACE_RECORD_TGID for recording tgid
2634   */
tracing_record_taskinfo_sched_switch(struct task_struct * prev,struct task_struct * next,int flags)2635  void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2636  					  struct task_struct *next, int flags)
2637  {
2638  	bool done;
2639  
2640  	if (tracing_record_taskinfo_skip(flags))
2641  		return;
2642  
2643  	/*
2644  	 * Record as much task information as possible. If some fail, continue
2645  	 * to try to record the others.
2646  	 */
2647  	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2648  	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2649  	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2650  	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2651  
2652  	/* If recording any information failed, retry again soon. */
2653  	if (!done)
2654  		return;
2655  
2656  	__this_cpu_write(trace_taskinfo_save, false);
2657  }
2658  
2659  /* Helpers to record a specific task information */
tracing_record_cmdline(struct task_struct * task)2660  void tracing_record_cmdline(struct task_struct *task)
2661  {
2662  	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2663  }
2664  
tracing_record_tgid(struct task_struct * task)2665  void tracing_record_tgid(struct task_struct *task)
2666  {
2667  	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2668  }
2669  
2670  /*
2671   * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2672   * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2673   * simplifies those functions and keeps them in sync.
2674   */
trace_handle_return(struct trace_seq * s)2675  enum print_line_t trace_handle_return(struct trace_seq *s)
2676  {
2677  	return trace_seq_has_overflowed(s) ?
2678  		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2679  }
2680  EXPORT_SYMBOL_GPL(trace_handle_return);
2681  
migration_disable_value(void)2682  static unsigned short migration_disable_value(void)
2683  {
2684  #if defined(CONFIG_SMP)
2685  	return current->migration_disabled;
2686  #else
2687  	return 0;
2688  #endif
2689  }
2690  
tracing_gen_ctx_irq_test(unsigned int irqs_status)2691  unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2692  {
2693  	unsigned int trace_flags = irqs_status;
2694  	unsigned int pc;
2695  
2696  	pc = preempt_count();
2697  
2698  	if (pc & NMI_MASK)
2699  		trace_flags |= TRACE_FLAG_NMI;
2700  	if (pc & HARDIRQ_MASK)
2701  		trace_flags |= TRACE_FLAG_HARDIRQ;
2702  	if (in_serving_softirq())
2703  		trace_flags |= TRACE_FLAG_SOFTIRQ;
2704  	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2705  		trace_flags |= TRACE_FLAG_BH_OFF;
2706  
2707  	if (tif_need_resched())
2708  		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2709  	if (test_preempt_need_resched())
2710  		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2711  	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2712  		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2713  }
2714  
2715  struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2716  trace_buffer_lock_reserve(struct trace_buffer *buffer,
2717  			  int type,
2718  			  unsigned long len,
2719  			  unsigned int trace_ctx)
2720  {
2721  	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2722  }
2723  
2724  DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2725  DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2726  static int trace_buffered_event_ref;
2727  
2728  /**
2729   * trace_buffered_event_enable - enable buffering events
2730   *
2731   * When events are being filtered, it is quicker to use a temporary
2732   * buffer to write the event data into if there's a likely chance
2733   * that it will not be committed. The discard of the ring buffer
2734   * is not as fast as committing, and is much slower than copying
2735   * a commit.
2736   *
2737   * When an event is to be filtered, allocate per cpu buffers to
2738   * write the event data into, and if the event is filtered and discarded
2739   * it is simply dropped, otherwise, the entire data is to be committed
2740   * in one shot.
2741   */
trace_buffered_event_enable(void)2742  void trace_buffered_event_enable(void)
2743  {
2744  	struct ring_buffer_event *event;
2745  	struct page *page;
2746  	int cpu;
2747  
2748  	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2749  
2750  	if (trace_buffered_event_ref++)
2751  		return;
2752  
2753  	for_each_tracing_cpu(cpu) {
2754  		page = alloc_pages_node(cpu_to_node(cpu),
2755  					GFP_KERNEL | __GFP_NORETRY, 0);
2756  		/* This is just an optimization and can handle failures */
2757  		if (!page) {
2758  			pr_err("Failed to allocate event buffer\n");
2759  			break;
2760  		}
2761  
2762  		event = page_address(page);
2763  		memset(event, 0, sizeof(*event));
2764  
2765  		per_cpu(trace_buffered_event, cpu) = event;
2766  
2767  		preempt_disable();
2768  		if (cpu == smp_processor_id() &&
2769  		    __this_cpu_read(trace_buffered_event) !=
2770  		    per_cpu(trace_buffered_event, cpu))
2771  			WARN_ON_ONCE(1);
2772  		preempt_enable();
2773  	}
2774  }
2775  
enable_trace_buffered_event(void * data)2776  static void enable_trace_buffered_event(void *data)
2777  {
2778  	/* Probably not needed, but do it anyway */
2779  	smp_rmb();
2780  	this_cpu_dec(trace_buffered_event_cnt);
2781  }
2782  
disable_trace_buffered_event(void * data)2783  static void disable_trace_buffered_event(void *data)
2784  {
2785  	this_cpu_inc(trace_buffered_event_cnt);
2786  }
2787  
2788  /**
2789   * trace_buffered_event_disable - disable buffering events
2790   *
2791   * When a filter is removed, it is faster to not use the buffered
2792   * events, and to commit directly into the ring buffer. Free up
2793   * the temp buffers when there are no more users. This requires
2794   * special synchronization with current events.
2795   */
trace_buffered_event_disable(void)2796  void trace_buffered_event_disable(void)
2797  {
2798  	int cpu;
2799  
2800  	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2801  
2802  	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2803  		return;
2804  
2805  	if (--trace_buffered_event_ref)
2806  		return;
2807  
2808  	/* For each CPU, set the buffer as used. */
2809  	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2810  			 NULL, true);
2811  
2812  	/* Wait for all current users to finish */
2813  	synchronize_rcu();
2814  
2815  	for_each_tracing_cpu(cpu) {
2816  		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2817  		per_cpu(trace_buffered_event, cpu) = NULL;
2818  	}
2819  
2820  	/*
2821  	 * Wait for all CPUs that potentially started checking if they can use
2822  	 * their event buffer only after the previous synchronize_rcu() call and
2823  	 * they still read a valid pointer from trace_buffered_event. It must be
2824  	 * ensured they don't see cleared trace_buffered_event_cnt else they
2825  	 * could wrongly decide to use the pointed-to buffer which is now freed.
2826  	 */
2827  	synchronize_rcu();
2828  
2829  	/* For each CPU, relinquish the buffer */
2830  	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2831  			 true);
2832  }
2833  
2834  static struct trace_buffer *temp_buffer;
2835  
2836  struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2837  trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2838  			  struct trace_event_file *trace_file,
2839  			  int type, unsigned long len,
2840  			  unsigned int trace_ctx)
2841  {
2842  	struct ring_buffer_event *entry;
2843  	struct trace_array *tr = trace_file->tr;
2844  	int val;
2845  
2846  	*current_rb = tr->array_buffer.buffer;
2847  
2848  	if (!tr->no_filter_buffering_ref &&
2849  	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2850  		preempt_disable_notrace();
2851  		/*
2852  		 * Filtering is on, so try to use the per cpu buffer first.
2853  		 * This buffer will simulate a ring_buffer_event,
2854  		 * where the type_len is zero and the array[0] will
2855  		 * hold the full length.
2856  		 * (see include/linux/ring-buffer.h for details on
2857  		 *  how the ring_buffer_event is structured).
2858  		 *
2859  		 * Using a temp buffer during filtering and copying it
2860  		 * on a matched filter is quicker than writing directly
2861  		 * into the ring buffer and then discarding it when
2862  		 * it doesn't match. That is because the discard
2863  		 * requires several atomic operations to get right.
2864  		 * Copying on match and doing nothing on a failed match
2865  		 * is still quicker than no copy on match, but having
2866  		 * to discard out of the ring buffer on a failed match.
2867  		 */
2868  		if ((entry = __this_cpu_read(trace_buffered_event))) {
2869  			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2870  
2871  			val = this_cpu_inc_return(trace_buffered_event_cnt);
2872  
2873  			/*
2874  			 * Preemption is disabled, but interrupts and NMIs
2875  			 * can still come in now. If that happens after
2876  			 * the above increment, then it will have to go
2877  			 * back to the old method of allocating the event
2878  			 * on the ring buffer, and if the filter fails, it
2879  			 * will have to call ring_buffer_discard_commit()
2880  			 * to remove it.
2881  			 *
2882  			 * Need to also check the unlikely case that the
2883  			 * length is bigger than the temp buffer size.
2884  			 * If that happens, then the reserve is pretty much
2885  			 * guaranteed to fail, as the ring buffer currently
2886  			 * only allows events less than a page. But that may
2887  			 * change in the future, so let the ring buffer reserve
2888  			 * handle the failure in that case.
2889  			 */
2890  			if (val == 1 && likely(len <= max_len)) {
2891  				trace_event_setup(entry, type, trace_ctx);
2892  				entry->array[0] = len;
2893  				/* Return with preemption disabled */
2894  				return entry;
2895  			}
2896  			this_cpu_dec(trace_buffered_event_cnt);
2897  		}
2898  		/* __trace_buffer_lock_reserve() disables preemption */
2899  		preempt_enable_notrace();
2900  	}
2901  
2902  	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2903  					    trace_ctx);
2904  	/*
2905  	 * If tracing is off, but we have triggers enabled
2906  	 * we still need to look at the event data. Use the temp_buffer
2907  	 * to store the trace event for the trigger to use. It's recursive
2908  	 * safe and will not be recorded anywhere.
2909  	 */
2910  	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2911  		*current_rb = temp_buffer;
2912  		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2913  						    trace_ctx);
2914  	}
2915  	return entry;
2916  }
2917  EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2918  
2919  static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2920  static DEFINE_MUTEX(tracepoint_printk_mutex);
2921  
output_printk(struct trace_event_buffer * fbuffer)2922  static void output_printk(struct trace_event_buffer *fbuffer)
2923  {
2924  	struct trace_event_call *event_call;
2925  	struct trace_event_file *file;
2926  	struct trace_event *event;
2927  	unsigned long flags;
2928  	struct trace_iterator *iter = tracepoint_print_iter;
2929  
2930  	/* We should never get here if iter is NULL */
2931  	if (WARN_ON_ONCE(!iter))
2932  		return;
2933  
2934  	event_call = fbuffer->trace_file->event_call;
2935  	if (!event_call || !event_call->event.funcs ||
2936  	    !event_call->event.funcs->trace)
2937  		return;
2938  
2939  	file = fbuffer->trace_file;
2940  	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2941  	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2942  	     !filter_match_preds(file->filter, fbuffer->entry)))
2943  		return;
2944  
2945  	event = &fbuffer->trace_file->event_call->event;
2946  
2947  	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2948  	trace_seq_init(&iter->seq);
2949  	iter->ent = fbuffer->entry;
2950  	event_call->event.funcs->trace(iter, 0, event);
2951  	trace_seq_putc(&iter->seq, 0);
2952  	printk("%s", iter->seq.buffer);
2953  
2954  	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2955  }
2956  
tracepoint_printk_sysctl(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2957  int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2958  			     void *buffer, size_t *lenp,
2959  			     loff_t *ppos)
2960  {
2961  	int save_tracepoint_printk;
2962  	int ret;
2963  
2964  	mutex_lock(&tracepoint_printk_mutex);
2965  	save_tracepoint_printk = tracepoint_printk;
2966  
2967  	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2968  
2969  	/*
2970  	 * This will force exiting early, as tracepoint_printk
2971  	 * is always zero when tracepoint_printk_iter is not allocated
2972  	 */
2973  	if (!tracepoint_print_iter)
2974  		tracepoint_printk = 0;
2975  
2976  	if (save_tracepoint_printk == tracepoint_printk)
2977  		goto out;
2978  
2979  	if (tracepoint_printk)
2980  		static_key_enable(&tracepoint_printk_key.key);
2981  	else
2982  		static_key_disable(&tracepoint_printk_key.key);
2983  
2984   out:
2985  	mutex_unlock(&tracepoint_printk_mutex);
2986  
2987  	return ret;
2988  }
2989  
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2990  void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2991  {
2992  	enum event_trigger_type tt = ETT_NONE;
2993  	struct trace_event_file *file = fbuffer->trace_file;
2994  
2995  	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2996  			fbuffer->entry, &tt))
2997  		goto discard;
2998  
2999  	if (static_key_false(&tracepoint_printk_key.key))
3000  		output_printk(fbuffer);
3001  
3002  	if (static_branch_unlikely(&trace_event_exports_enabled))
3003  		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
3004  
3005  	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
3006  			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
3007  
3008  discard:
3009  	if (tt)
3010  		event_triggers_post_call(file, tt);
3011  
3012  }
3013  EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3014  
3015  /*
3016   * Skip 3:
3017   *
3018   *   trace_buffer_unlock_commit_regs()
3019   *   trace_event_buffer_commit()
3020   *   trace_event_raw_event_xxx()
3021   */
3022  # define STACK_SKIP 3
3023  
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)3024  void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3025  				     struct trace_buffer *buffer,
3026  				     struct ring_buffer_event *event,
3027  				     unsigned int trace_ctx,
3028  				     struct pt_regs *regs)
3029  {
3030  	__buffer_unlock_commit(buffer, event);
3031  
3032  	/*
3033  	 * If regs is not set, then skip the necessary functions.
3034  	 * Note, we can still get here via blktrace, wakeup tracer
3035  	 * and mmiotrace, but that's ok if they lose a function or
3036  	 * two. They are not that meaningful.
3037  	 */
3038  	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3039  	ftrace_trace_userstack(tr, buffer, trace_ctx);
3040  }
3041  
3042  /*
3043   * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3044   */
3045  void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)3046  trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3047  				   struct ring_buffer_event *event)
3048  {
3049  	__buffer_unlock_commit(buffer, event);
3050  }
3051  
3052  void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx)3053  trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3054  	       parent_ip, unsigned int trace_ctx)
3055  {
3056  	struct trace_event_call *call = &event_function;
3057  	struct trace_buffer *buffer = tr->array_buffer.buffer;
3058  	struct ring_buffer_event *event;
3059  	struct ftrace_entry *entry;
3060  
3061  	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3062  					    trace_ctx);
3063  	if (!event)
3064  		return;
3065  	entry	= ring_buffer_event_data(event);
3066  	entry->ip			= ip;
3067  	entry->parent_ip		= parent_ip;
3068  
3069  	if (!call_filter_check_discard(call, entry, buffer, event)) {
3070  		if (static_branch_unlikely(&trace_function_exports_enabled))
3071  			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3072  		__buffer_unlock_commit(buffer, event);
3073  	}
3074  }
3075  
3076  #ifdef CONFIG_STACKTRACE
3077  
3078  /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3079  #define FTRACE_KSTACK_NESTING	4
3080  
3081  #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
3082  
3083  struct ftrace_stack {
3084  	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
3085  };
3086  
3087  
3088  struct ftrace_stacks {
3089  	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
3090  };
3091  
3092  static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3093  static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3094  
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3095  static void __ftrace_trace_stack(struct trace_buffer *buffer,
3096  				 unsigned int trace_ctx,
3097  				 int skip, struct pt_regs *regs)
3098  {
3099  	struct trace_event_call *call = &event_kernel_stack;
3100  	struct ring_buffer_event *event;
3101  	unsigned int size, nr_entries;
3102  	struct ftrace_stack *fstack;
3103  	struct stack_entry *entry;
3104  	int stackidx;
3105  
3106  	/*
3107  	 * Add one, for this function and the call to save_stack_trace()
3108  	 * If regs is set, then these functions will not be in the way.
3109  	 */
3110  #ifndef CONFIG_UNWINDER_ORC
3111  	if (!regs)
3112  		skip++;
3113  #endif
3114  
3115  	preempt_disable_notrace();
3116  
3117  	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3118  
3119  	/* This should never happen. If it does, yell once and skip */
3120  	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3121  		goto out;
3122  
3123  	/*
3124  	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3125  	 * interrupt will either see the value pre increment or post
3126  	 * increment. If the interrupt happens pre increment it will have
3127  	 * restored the counter when it returns.  We just need a barrier to
3128  	 * keep gcc from moving things around.
3129  	 */
3130  	barrier();
3131  
3132  	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3133  	size = ARRAY_SIZE(fstack->calls);
3134  
3135  	if (regs) {
3136  		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3137  						   size, skip);
3138  	} else {
3139  		nr_entries = stack_trace_save(fstack->calls, size, skip);
3140  	}
3141  
3142  	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3143  				    struct_size(entry, caller, nr_entries),
3144  				    trace_ctx);
3145  	if (!event)
3146  		goto out;
3147  	entry = ring_buffer_event_data(event);
3148  
3149  	entry->size = nr_entries;
3150  	memcpy(&entry->caller, fstack->calls,
3151  	       flex_array_size(entry, caller, nr_entries));
3152  
3153  	if (!call_filter_check_discard(call, entry, buffer, event))
3154  		__buffer_unlock_commit(buffer, event);
3155  
3156   out:
3157  	/* Again, don't let gcc optimize things here */
3158  	barrier();
3159  	__this_cpu_dec(ftrace_stack_reserve);
3160  	preempt_enable_notrace();
3161  
3162  }
3163  
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3164  static inline void ftrace_trace_stack(struct trace_array *tr,
3165  				      struct trace_buffer *buffer,
3166  				      unsigned int trace_ctx,
3167  				      int skip, struct pt_regs *regs)
3168  {
3169  	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3170  		return;
3171  
3172  	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3173  }
3174  
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)3175  void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3176  		   int skip)
3177  {
3178  	struct trace_buffer *buffer = tr->array_buffer.buffer;
3179  
3180  	if (rcu_is_watching()) {
3181  		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3182  		return;
3183  	}
3184  
3185  	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3186  		return;
3187  
3188  	/*
3189  	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3190  	 * but if the above rcu_is_watching() failed, then the NMI
3191  	 * triggered someplace critical, and ct_irq_enter() should
3192  	 * not be called from NMI.
3193  	 */
3194  	if (unlikely(in_nmi()))
3195  		return;
3196  
3197  	ct_irq_enter_irqson();
3198  	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3199  	ct_irq_exit_irqson();
3200  }
3201  
3202  /**
3203   * trace_dump_stack - record a stack back trace in the trace buffer
3204   * @skip: Number of functions to skip (helper handlers)
3205   */
trace_dump_stack(int skip)3206  void trace_dump_stack(int skip)
3207  {
3208  	if (tracing_disabled || tracing_selftest_running)
3209  		return;
3210  
3211  #ifndef CONFIG_UNWINDER_ORC
3212  	/* Skip 1 to skip this function. */
3213  	skip++;
3214  #endif
3215  	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3216  			     tracing_gen_ctx(), skip, NULL);
3217  }
3218  EXPORT_SYMBOL_GPL(trace_dump_stack);
3219  
3220  #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3221  static DEFINE_PER_CPU(int, user_stack_count);
3222  
3223  static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3224  ftrace_trace_userstack(struct trace_array *tr,
3225  		       struct trace_buffer *buffer, unsigned int trace_ctx)
3226  {
3227  	struct trace_event_call *call = &event_user_stack;
3228  	struct ring_buffer_event *event;
3229  	struct userstack_entry *entry;
3230  
3231  	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3232  		return;
3233  
3234  	/*
3235  	 * NMIs can not handle page faults, even with fix ups.
3236  	 * The save user stack can (and often does) fault.
3237  	 */
3238  	if (unlikely(in_nmi()))
3239  		return;
3240  
3241  	/*
3242  	 * prevent recursion, since the user stack tracing may
3243  	 * trigger other kernel events.
3244  	 */
3245  	preempt_disable();
3246  	if (__this_cpu_read(user_stack_count))
3247  		goto out;
3248  
3249  	__this_cpu_inc(user_stack_count);
3250  
3251  	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3252  					    sizeof(*entry), trace_ctx);
3253  	if (!event)
3254  		goto out_drop_count;
3255  	entry	= ring_buffer_event_data(event);
3256  
3257  	entry->tgid		= current->tgid;
3258  	memset(&entry->caller, 0, sizeof(entry->caller));
3259  
3260  	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3261  	if (!call_filter_check_discard(call, entry, buffer, event))
3262  		__buffer_unlock_commit(buffer, event);
3263  
3264   out_drop_count:
3265  	__this_cpu_dec(user_stack_count);
3266   out:
3267  	preempt_enable();
3268  }
3269  #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3270  static void ftrace_trace_userstack(struct trace_array *tr,
3271  				   struct trace_buffer *buffer,
3272  				   unsigned int trace_ctx)
3273  {
3274  }
3275  #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3276  
3277  #endif /* CONFIG_STACKTRACE */
3278  
3279  static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)3280  func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3281  			  unsigned long long delta)
3282  {
3283  	entry->bottom_delta_ts = delta & U32_MAX;
3284  	entry->top_delta_ts = (delta >> 32);
3285  }
3286  
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)3287  void trace_last_func_repeats(struct trace_array *tr,
3288  			     struct trace_func_repeats *last_info,
3289  			     unsigned int trace_ctx)
3290  {
3291  	struct trace_buffer *buffer = tr->array_buffer.buffer;
3292  	struct func_repeats_entry *entry;
3293  	struct ring_buffer_event *event;
3294  	u64 delta;
3295  
3296  	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3297  					    sizeof(*entry), trace_ctx);
3298  	if (!event)
3299  		return;
3300  
3301  	delta = ring_buffer_event_time_stamp(buffer, event) -
3302  		last_info->ts_last_call;
3303  
3304  	entry = ring_buffer_event_data(event);
3305  	entry->ip = last_info->ip;
3306  	entry->parent_ip = last_info->parent_ip;
3307  	entry->count = last_info->count;
3308  	func_repeats_set_delta_ts(entry, delta);
3309  
3310  	__buffer_unlock_commit(buffer, event);
3311  }
3312  
3313  /* created for use with alloc_percpu */
3314  struct trace_buffer_struct {
3315  	int nesting;
3316  	char buffer[4][TRACE_BUF_SIZE];
3317  };
3318  
3319  static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3320  
3321  /*
3322   * This allows for lockless recording.  If we're nested too deeply, then
3323   * this returns NULL.
3324   */
get_trace_buf(void)3325  static char *get_trace_buf(void)
3326  {
3327  	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3328  
3329  	if (!trace_percpu_buffer || buffer->nesting >= 4)
3330  		return NULL;
3331  
3332  	buffer->nesting++;
3333  
3334  	/* Interrupts must see nesting incremented before we use the buffer */
3335  	barrier();
3336  	return &buffer->buffer[buffer->nesting - 1][0];
3337  }
3338  
put_trace_buf(void)3339  static void put_trace_buf(void)
3340  {
3341  	/* Don't let the decrement of nesting leak before this */
3342  	barrier();
3343  	this_cpu_dec(trace_percpu_buffer->nesting);
3344  }
3345  
alloc_percpu_trace_buffer(void)3346  static int alloc_percpu_trace_buffer(void)
3347  {
3348  	struct trace_buffer_struct __percpu *buffers;
3349  
3350  	if (trace_percpu_buffer)
3351  		return 0;
3352  
3353  	buffers = alloc_percpu(struct trace_buffer_struct);
3354  	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3355  		return -ENOMEM;
3356  
3357  	trace_percpu_buffer = buffers;
3358  	return 0;
3359  }
3360  
3361  static int buffers_allocated;
3362  
trace_printk_init_buffers(void)3363  void trace_printk_init_buffers(void)
3364  {
3365  	if (buffers_allocated)
3366  		return;
3367  
3368  	if (alloc_percpu_trace_buffer())
3369  		return;
3370  
3371  	/* trace_printk() is for debug use only. Don't use it in production. */
3372  
3373  	pr_warn("\n");
3374  	pr_warn("**********************************************************\n");
3375  	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3376  	pr_warn("**                                                      **\n");
3377  	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3378  	pr_warn("**                                                      **\n");
3379  	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3380  	pr_warn("** unsafe for production use.                           **\n");
3381  	pr_warn("**                                                      **\n");
3382  	pr_warn("** If you see this message and you are not debugging    **\n");
3383  	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3384  	pr_warn("**                                                      **\n");
3385  	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3386  	pr_warn("**********************************************************\n");
3387  
3388  	/* Expand the buffers to set size */
3389  	tracing_update_buffers();
3390  
3391  	buffers_allocated = 1;
3392  
3393  	/*
3394  	 * trace_printk_init_buffers() can be called by modules.
3395  	 * If that happens, then we need to start cmdline recording
3396  	 * directly here. If the global_trace.buffer is already
3397  	 * allocated here, then this was called by module code.
3398  	 */
3399  	if (global_trace.array_buffer.buffer)
3400  		tracing_start_cmdline_record();
3401  }
3402  EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3403  
trace_printk_start_comm(void)3404  void trace_printk_start_comm(void)
3405  {
3406  	/* Start tracing comms if trace printk is set */
3407  	if (!buffers_allocated)
3408  		return;
3409  	tracing_start_cmdline_record();
3410  }
3411  
trace_printk_start_stop_comm(int enabled)3412  static void trace_printk_start_stop_comm(int enabled)
3413  {
3414  	if (!buffers_allocated)
3415  		return;
3416  
3417  	if (enabled)
3418  		tracing_start_cmdline_record();
3419  	else
3420  		tracing_stop_cmdline_record();
3421  }
3422  
3423  /**
3424   * trace_vbprintk - write binary msg to tracing buffer
3425   * @ip:    The address of the caller
3426   * @fmt:   The string format to write to the buffer
3427   * @args:  Arguments for @fmt
3428   */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3429  int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3430  {
3431  	struct trace_event_call *call = &event_bprint;
3432  	struct ring_buffer_event *event;
3433  	struct trace_buffer *buffer;
3434  	struct trace_array *tr = &global_trace;
3435  	struct bprint_entry *entry;
3436  	unsigned int trace_ctx;
3437  	char *tbuffer;
3438  	int len = 0, size;
3439  
3440  	if (unlikely(tracing_selftest_running || tracing_disabled))
3441  		return 0;
3442  
3443  	/* Don't pollute graph traces with trace_vprintk internals */
3444  	pause_graph_tracing();
3445  
3446  	trace_ctx = tracing_gen_ctx();
3447  	preempt_disable_notrace();
3448  
3449  	tbuffer = get_trace_buf();
3450  	if (!tbuffer) {
3451  		len = 0;
3452  		goto out_nobuffer;
3453  	}
3454  
3455  	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3456  
3457  	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3458  		goto out_put;
3459  
3460  	size = sizeof(*entry) + sizeof(u32) * len;
3461  	buffer = tr->array_buffer.buffer;
3462  	ring_buffer_nest_start(buffer);
3463  	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3464  					    trace_ctx);
3465  	if (!event)
3466  		goto out;
3467  	entry = ring_buffer_event_data(event);
3468  	entry->ip			= ip;
3469  	entry->fmt			= fmt;
3470  
3471  	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3472  	if (!call_filter_check_discard(call, entry, buffer, event)) {
3473  		__buffer_unlock_commit(buffer, event);
3474  		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3475  	}
3476  
3477  out:
3478  	ring_buffer_nest_end(buffer);
3479  out_put:
3480  	put_trace_buf();
3481  
3482  out_nobuffer:
3483  	preempt_enable_notrace();
3484  	unpause_graph_tracing();
3485  
3486  	return len;
3487  }
3488  EXPORT_SYMBOL_GPL(trace_vbprintk);
3489  
3490  __printf(3, 0)
3491  static int
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3492  __trace_array_vprintk(struct trace_buffer *buffer,
3493  		      unsigned long ip, const char *fmt, va_list args)
3494  {
3495  	struct trace_event_call *call = &event_print;
3496  	struct ring_buffer_event *event;
3497  	int len = 0, size;
3498  	struct print_entry *entry;
3499  	unsigned int trace_ctx;
3500  	char *tbuffer;
3501  
3502  	if (tracing_disabled)
3503  		return 0;
3504  
3505  	/* Don't pollute graph traces with trace_vprintk internals */
3506  	pause_graph_tracing();
3507  
3508  	trace_ctx = tracing_gen_ctx();
3509  	preempt_disable_notrace();
3510  
3511  
3512  	tbuffer = get_trace_buf();
3513  	if (!tbuffer) {
3514  		len = 0;
3515  		goto out_nobuffer;
3516  	}
3517  
3518  	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3519  
3520  	size = sizeof(*entry) + len + 1;
3521  	ring_buffer_nest_start(buffer);
3522  	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3523  					    trace_ctx);
3524  	if (!event)
3525  		goto out;
3526  	entry = ring_buffer_event_data(event);
3527  	entry->ip = ip;
3528  
3529  	memcpy(&entry->buf, tbuffer, len + 1);
3530  	if (!call_filter_check_discard(call, entry, buffer, event)) {
3531  		__buffer_unlock_commit(buffer, event);
3532  		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3533  	}
3534  
3535  out:
3536  	ring_buffer_nest_end(buffer);
3537  	put_trace_buf();
3538  
3539  out_nobuffer:
3540  	preempt_enable_notrace();
3541  	unpause_graph_tracing();
3542  
3543  	return len;
3544  }
3545  
3546  __printf(3, 0)
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3547  int trace_array_vprintk(struct trace_array *tr,
3548  			unsigned long ip, const char *fmt, va_list args)
3549  {
3550  	if (tracing_selftest_running && tr == &global_trace)
3551  		return 0;
3552  
3553  	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3554  }
3555  
3556  /**
3557   * trace_array_printk - Print a message to a specific instance
3558   * @tr: The instance trace_array descriptor
3559   * @ip: The instruction pointer that this is called from.
3560   * @fmt: The format to print (printf format)
3561   *
3562   * If a subsystem sets up its own instance, they have the right to
3563   * printk strings into their tracing instance buffer using this
3564   * function. Note, this function will not write into the top level
3565   * buffer (use trace_printk() for that), as writing into the top level
3566   * buffer should only have events that can be individually disabled.
3567   * trace_printk() is only used for debugging a kernel, and should not
3568   * be ever incorporated in normal use.
3569   *
3570   * trace_array_printk() can be used, as it will not add noise to the
3571   * top level tracing buffer.
3572   *
3573   * Note, trace_array_init_printk() must be called on @tr before this
3574   * can be used.
3575   */
3576  __printf(3, 0)
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3577  int trace_array_printk(struct trace_array *tr,
3578  		       unsigned long ip, const char *fmt, ...)
3579  {
3580  	int ret;
3581  	va_list ap;
3582  
3583  	if (!tr)
3584  		return -ENOENT;
3585  
3586  	/* This is only allowed for created instances */
3587  	if (tr == &global_trace)
3588  		return 0;
3589  
3590  	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3591  		return 0;
3592  
3593  	va_start(ap, fmt);
3594  	ret = trace_array_vprintk(tr, ip, fmt, ap);
3595  	va_end(ap);
3596  	return ret;
3597  }
3598  EXPORT_SYMBOL_GPL(trace_array_printk);
3599  
3600  /**
3601   * trace_array_init_printk - Initialize buffers for trace_array_printk()
3602   * @tr: The trace array to initialize the buffers for
3603   *
3604   * As trace_array_printk() only writes into instances, they are OK to
3605   * have in the kernel (unlike trace_printk()). This needs to be called
3606   * before trace_array_printk() can be used on a trace_array.
3607   */
trace_array_init_printk(struct trace_array * tr)3608  int trace_array_init_printk(struct trace_array *tr)
3609  {
3610  	if (!tr)
3611  		return -ENOENT;
3612  
3613  	/* This is only allowed for created instances */
3614  	if (tr == &global_trace)
3615  		return -EINVAL;
3616  
3617  	return alloc_percpu_trace_buffer();
3618  }
3619  EXPORT_SYMBOL_GPL(trace_array_init_printk);
3620  
3621  __printf(3, 4)
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3622  int trace_array_printk_buf(struct trace_buffer *buffer,
3623  			   unsigned long ip, const char *fmt, ...)
3624  {
3625  	int ret;
3626  	va_list ap;
3627  
3628  	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3629  		return 0;
3630  
3631  	va_start(ap, fmt);
3632  	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3633  	va_end(ap);
3634  	return ret;
3635  }
3636  
3637  __printf(2, 0)
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3638  int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3639  {
3640  	return trace_array_vprintk(&global_trace, ip, fmt, args);
3641  }
3642  EXPORT_SYMBOL_GPL(trace_vprintk);
3643  
trace_iterator_increment(struct trace_iterator * iter)3644  static void trace_iterator_increment(struct trace_iterator *iter)
3645  {
3646  	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3647  
3648  	iter->idx++;
3649  	if (buf_iter)
3650  		ring_buffer_iter_advance(buf_iter);
3651  }
3652  
3653  static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3654  peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3655  		unsigned long *lost_events)
3656  {
3657  	struct ring_buffer_event *event;
3658  	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3659  
3660  	if (buf_iter) {
3661  		event = ring_buffer_iter_peek(buf_iter, ts);
3662  		if (lost_events)
3663  			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3664  				(unsigned long)-1 : 0;
3665  	} else {
3666  		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3667  					 lost_events);
3668  	}
3669  
3670  	if (event) {
3671  		iter->ent_size = ring_buffer_event_length(event);
3672  		return ring_buffer_event_data(event);
3673  	}
3674  	iter->ent_size = 0;
3675  	return NULL;
3676  }
3677  
3678  static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3679  __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3680  		  unsigned long *missing_events, u64 *ent_ts)
3681  {
3682  	struct trace_buffer *buffer = iter->array_buffer->buffer;
3683  	struct trace_entry *ent, *next = NULL;
3684  	unsigned long lost_events = 0, next_lost = 0;
3685  	int cpu_file = iter->cpu_file;
3686  	u64 next_ts = 0, ts;
3687  	int next_cpu = -1;
3688  	int next_size = 0;
3689  	int cpu;
3690  
3691  	/*
3692  	 * If we are in a per_cpu trace file, don't bother by iterating over
3693  	 * all cpu and peek directly.
3694  	 */
3695  	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3696  		if (ring_buffer_empty_cpu(buffer, cpu_file))
3697  			return NULL;
3698  		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3699  		if (ent_cpu)
3700  			*ent_cpu = cpu_file;
3701  
3702  		return ent;
3703  	}
3704  
3705  	for_each_tracing_cpu(cpu) {
3706  
3707  		if (ring_buffer_empty_cpu(buffer, cpu))
3708  			continue;
3709  
3710  		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3711  
3712  		/*
3713  		 * Pick the entry with the smallest timestamp:
3714  		 */
3715  		if (ent && (!next || ts < next_ts)) {
3716  			next = ent;
3717  			next_cpu = cpu;
3718  			next_ts = ts;
3719  			next_lost = lost_events;
3720  			next_size = iter->ent_size;
3721  		}
3722  	}
3723  
3724  	iter->ent_size = next_size;
3725  
3726  	if (ent_cpu)
3727  		*ent_cpu = next_cpu;
3728  
3729  	if (ent_ts)
3730  		*ent_ts = next_ts;
3731  
3732  	if (missing_events)
3733  		*missing_events = next_lost;
3734  
3735  	return next;
3736  }
3737  
3738  #define STATIC_FMT_BUF_SIZE	128
3739  static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3740  
trace_iter_expand_format(struct trace_iterator * iter)3741  char *trace_iter_expand_format(struct trace_iterator *iter)
3742  {
3743  	char *tmp;
3744  
3745  	/*
3746  	 * iter->tr is NULL when used with tp_printk, which makes
3747  	 * this get called where it is not safe to call krealloc().
3748  	 */
3749  	if (!iter->tr || iter->fmt == static_fmt_buf)
3750  		return NULL;
3751  
3752  	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3753  		       GFP_KERNEL);
3754  	if (tmp) {
3755  		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3756  		iter->fmt = tmp;
3757  	}
3758  
3759  	return tmp;
3760  }
3761  
3762  /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str)3763  static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3764  {
3765  	unsigned long addr = (unsigned long)str;
3766  	struct trace_event *trace_event;
3767  	struct trace_event_call *event;
3768  
3769  	/* OK if part of the event data */
3770  	if ((addr >= (unsigned long)iter->ent) &&
3771  	    (addr < (unsigned long)iter->ent + iter->ent_size))
3772  		return true;
3773  
3774  	/* OK if part of the temp seq buffer */
3775  	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3776  	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3777  		return true;
3778  
3779  	/* Core rodata can not be freed */
3780  	if (is_kernel_rodata(addr))
3781  		return true;
3782  
3783  	if (trace_is_tracepoint_string(str))
3784  		return true;
3785  
3786  	/*
3787  	 * Now this could be a module event, referencing core module
3788  	 * data, which is OK.
3789  	 */
3790  	if (!iter->ent)
3791  		return false;
3792  
3793  	trace_event = ftrace_find_event(iter->ent->type);
3794  	if (!trace_event)
3795  		return false;
3796  
3797  	event = container_of(trace_event, struct trace_event_call, event);
3798  	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3799  		return false;
3800  
3801  	/* Would rather have rodata, but this will suffice */
3802  	if (within_module_core(addr, event->module))
3803  		return true;
3804  
3805  	return false;
3806  }
3807  
3808  /**
3809   * ignore_event - Check dereferenced fields while writing to the seq buffer
3810   * @iter: The iterator that holds the seq buffer and the event being printed
3811   *
3812   * At boot up, test_event_printk() will flag any event that dereferences
3813   * a string with "%s" that does exist in the ring buffer. It may still
3814   * be valid, as the string may point to a static string in the kernel
3815   * rodata that never gets freed. But if the string pointer is pointing
3816   * to something that was allocated, there's a chance that it can be freed
3817   * by the time the user reads the trace. This would cause a bad memory
3818   * access by the kernel and possibly crash the system.
3819   *
3820   * This function will check if the event has any fields flagged as needing
3821   * to be checked at runtime and perform those checks.
3822   *
3823   * If it is found that a field is unsafe, it will write into the @iter->seq
3824   * a message stating what was found to be unsafe.
3825   *
3826   * @return: true if the event is unsafe and should be ignored,
3827   *          false otherwise.
3828   */
ignore_event(struct trace_iterator * iter)3829  bool ignore_event(struct trace_iterator *iter)
3830  {
3831  	struct ftrace_event_field *field;
3832  	struct trace_event *trace_event;
3833  	struct trace_event_call *event;
3834  	struct list_head *head;
3835  	struct trace_seq *seq;
3836  	const void *ptr;
3837  
3838  	trace_event = ftrace_find_event(iter->ent->type);
3839  
3840  	seq = &iter->seq;
3841  
3842  	if (!trace_event) {
3843  		trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3844  		return true;
3845  	}
3846  
3847  	event = container_of(trace_event, struct trace_event_call, event);
3848  	if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3849  		return false;
3850  
3851  	head = trace_get_fields(event);
3852  	if (!head) {
3853  		trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3854  				 trace_event_name(event));
3855  		return true;
3856  	}
3857  
3858  	/* Offsets are from the iter->ent that points to the raw event */
3859  	ptr = iter->ent;
3860  
3861  	list_for_each_entry(field, head, link) {
3862  		const char *str;
3863  		bool good;
3864  
3865  		if (!field->needs_test)
3866  			continue;
3867  
3868  		str = *(const char **)(ptr + field->offset);
3869  
3870  		good = trace_safe_str(iter, str);
3871  
3872  		/*
3873  		 * If you hit this warning, it is likely that the
3874  		 * trace event in question used %s on a string that
3875  		 * was saved at the time of the event, but may not be
3876  		 * around when the trace is read. Use __string(),
3877  		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3878  		 * instead. See samples/trace_events/trace-events-sample.h
3879  		 * for reference.
3880  		 */
3881  		if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3882  			      trace_event_name(event), field->name)) {
3883  			trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3884  					 trace_event_name(event), field->name);
3885  			return true;
3886  		}
3887  	}
3888  	return false;
3889  }
3890  
trace_event_format(struct trace_iterator * iter,const char * fmt)3891  const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3892  {
3893  	const char *p, *new_fmt;
3894  	char *q;
3895  
3896  	if (WARN_ON_ONCE(!fmt))
3897  		return fmt;
3898  
3899  	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3900  		return fmt;
3901  
3902  	p = fmt;
3903  	new_fmt = q = iter->fmt;
3904  	while (*p) {
3905  		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3906  			if (!trace_iter_expand_format(iter))
3907  				return fmt;
3908  
3909  			q += iter->fmt - new_fmt;
3910  			new_fmt = iter->fmt;
3911  		}
3912  
3913  		*q++ = *p++;
3914  
3915  		/* Replace %p with %px */
3916  		if (p[-1] == '%') {
3917  			if (p[0] == '%') {
3918  				*q++ = *p++;
3919  			} else if (p[0] == 'p' && !isalnum(p[1])) {
3920  				*q++ = *p++;
3921  				*q++ = 'x';
3922  			}
3923  		}
3924  	}
3925  	*q = '\0';
3926  
3927  	return new_fmt;
3928  }
3929  
3930  #define STATIC_TEMP_BUF_SIZE	128
3931  static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3932  
3933  /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3934  struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3935  					  int *ent_cpu, u64 *ent_ts)
3936  {
3937  	/* __find_next_entry will reset ent_size */
3938  	int ent_size = iter->ent_size;
3939  	struct trace_entry *entry;
3940  
3941  	/*
3942  	 * If called from ftrace_dump(), then the iter->temp buffer
3943  	 * will be the static_temp_buf and not created from kmalloc.
3944  	 * If the entry size is greater than the buffer, we can
3945  	 * not save it. Just return NULL in that case. This is only
3946  	 * used to add markers when two consecutive events' time
3947  	 * stamps have a large delta. See trace_print_lat_context()
3948  	 */
3949  	if (iter->temp == static_temp_buf &&
3950  	    STATIC_TEMP_BUF_SIZE < ent_size)
3951  		return NULL;
3952  
3953  	/*
3954  	 * The __find_next_entry() may call peek_next_entry(), which may
3955  	 * call ring_buffer_peek() that may make the contents of iter->ent
3956  	 * undefined. Need to copy iter->ent now.
3957  	 */
3958  	if (iter->ent && iter->ent != iter->temp) {
3959  		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3960  		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3961  			void *temp;
3962  			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3963  			if (!temp)
3964  				return NULL;
3965  			kfree(iter->temp);
3966  			iter->temp = temp;
3967  			iter->temp_size = iter->ent_size;
3968  		}
3969  		memcpy(iter->temp, iter->ent, iter->ent_size);
3970  		iter->ent = iter->temp;
3971  	}
3972  	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3973  	/* Put back the original ent_size */
3974  	iter->ent_size = ent_size;
3975  
3976  	return entry;
3977  }
3978  
3979  /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3980  void *trace_find_next_entry_inc(struct trace_iterator *iter)
3981  {
3982  	iter->ent = __find_next_entry(iter, &iter->cpu,
3983  				      &iter->lost_events, &iter->ts);
3984  
3985  	if (iter->ent)
3986  		trace_iterator_increment(iter);
3987  
3988  	return iter->ent ? iter : NULL;
3989  }
3990  
trace_consume(struct trace_iterator * iter)3991  static void trace_consume(struct trace_iterator *iter)
3992  {
3993  	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3994  			    &iter->lost_events);
3995  }
3996  
s_next(struct seq_file * m,void * v,loff_t * pos)3997  static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3998  {
3999  	struct trace_iterator *iter = m->private;
4000  	int i = (int)*pos;
4001  	void *ent;
4002  
4003  	WARN_ON_ONCE(iter->leftover);
4004  
4005  	(*pos)++;
4006  
4007  	/* can't go backwards */
4008  	if (iter->idx > i)
4009  		return NULL;
4010  
4011  	if (iter->idx < 0)
4012  		ent = trace_find_next_entry_inc(iter);
4013  	else
4014  		ent = iter;
4015  
4016  	while (ent && iter->idx < i)
4017  		ent = trace_find_next_entry_inc(iter);
4018  
4019  	iter->pos = *pos;
4020  
4021  	return ent;
4022  }
4023  
tracing_iter_reset(struct trace_iterator * iter,int cpu)4024  void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4025  {
4026  	struct ring_buffer_iter *buf_iter;
4027  	unsigned long entries = 0;
4028  	u64 ts;
4029  
4030  	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4031  
4032  	buf_iter = trace_buffer_iter(iter, cpu);
4033  	if (!buf_iter)
4034  		return;
4035  
4036  	ring_buffer_iter_reset(buf_iter);
4037  
4038  	/*
4039  	 * We could have the case with the max latency tracers
4040  	 * that a reset never took place on a cpu. This is evident
4041  	 * by the timestamp being before the start of the buffer.
4042  	 */
4043  	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4044  		if (ts >= iter->array_buffer->time_start)
4045  			break;
4046  		entries++;
4047  		ring_buffer_iter_advance(buf_iter);
4048  		/* This could be a big loop */
4049  		cond_resched();
4050  	}
4051  
4052  	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4053  }
4054  
4055  /*
4056   * The current tracer is copied to avoid a global locking
4057   * all around.
4058   */
s_start(struct seq_file * m,loff_t * pos)4059  static void *s_start(struct seq_file *m, loff_t *pos)
4060  {
4061  	struct trace_iterator *iter = m->private;
4062  	struct trace_array *tr = iter->tr;
4063  	int cpu_file = iter->cpu_file;
4064  	void *p = NULL;
4065  	loff_t l = 0;
4066  	int cpu;
4067  
4068  	mutex_lock(&trace_types_lock);
4069  	if (unlikely(tr->current_trace != iter->trace)) {
4070  		/* Close iter->trace before switching to the new current tracer */
4071  		if (iter->trace->close)
4072  			iter->trace->close(iter);
4073  		iter->trace = tr->current_trace;
4074  		/* Reopen the new current tracer */
4075  		if (iter->trace->open)
4076  			iter->trace->open(iter);
4077  	}
4078  	mutex_unlock(&trace_types_lock);
4079  
4080  #ifdef CONFIG_TRACER_MAX_TRACE
4081  	if (iter->snapshot && iter->trace->use_max_tr)
4082  		return ERR_PTR(-EBUSY);
4083  #endif
4084  
4085  	if (*pos != iter->pos) {
4086  		iter->ent = NULL;
4087  		iter->cpu = 0;
4088  		iter->idx = -1;
4089  
4090  		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4091  			for_each_tracing_cpu(cpu)
4092  				tracing_iter_reset(iter, cpu);
4093  		} else
4094  			tracing_iter_reset(iter, cpu_file);
4095  
4096  		iter->leftover = 0;
4097  		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4098  			;
4099  
4100  	} else {
4101  		/*
4102  		 * If we overflowed the seq_file before, then we want
4103  		 * to just reuse the trace_seq buffer again.
4104  		 */
4105  		if (iter->leftover)
4106  			p = iter;
4107  		else {
4108  			l = *pos - 1;
4109  			p = s_next(m, p, &l);
4110  		}
4111  	}
4112  
4113  	trace_event_read_lock();
4114  	trace_access_lock(cpu_file);
4115  	return p;
4116  }
4117  
s_stop(struct seq_file * m,void * p)4118  static void s_stop(struct seq_file *m, void *p)
4119  {
4120  	struct trace_iterator *iter = m->private;
4121  
4122  #ifdef CONFIG_TRACER_MAX_TRACE
4123  	if (iter->snapshot && iter->trace->use_max_tr)
4124  		return;
4125  #endif
4126  
4127  	trace_access_unlock(iter->cpu_file);
4128  	trace_event_read_unlock();
4129  }
4130  
4131  static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)4132  get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4133  		      unsigned long *entries, int cpu)
4134  {
4135  	unsigned long count;
4136  
4137  	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4138  	/*
4139  	 * If this buffer has skipped entries, then we hold all
4140  	 * entries for the trace and we need to ignore the
4141  	 * ones before the time stamp.
4142  	 */
4143  	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4144  		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4145  		/* total is the same as the entries */
4146  		*total = count;
4147  	} else
4148  		*total = count +
4149  			ring_buffer_overrun_cpu(buf->buffer, cpu);
4150  	*entries = count;
4151  }
4152  
4153  static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)4154  get_total_entries(struct array_buffer *buf,
4155  		  unsigned long *total, unsigned long *entries)
4156  {
4157  	unsigned long t, e;
4158  	int cpu;
4159  
4160  	*total = 0;
4161  	*entries = 0;
4162  
4163  	for_each_tracing_cpu(cpu) {
4164  		get_total_entries_cpu(buf, &t, &e, cpu);
4165  		*total += t;
4166  		*entries += e;
4167  	}
4168  }
4169  
trace_total_entries_cpu(struct trace_array * tr,int cpu)4170  unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4171  {
4172  	unsigned long total, entries;
4173  
4174  	if (!tr)
4175  		tr = &global_trace;
4176  
4177  	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4178  
4179  	return entries;
4180  }
4181  
trace_total_entries(struct trace_array * tr)4182  unsigned long trace_total_entries(struct trace_array *tr)
4183  {
4184  	unsigned long total, entries;
4185  
4186  	if (!tr)
4187  		tr = &global_trace;
4188  
4189  	get_total_entries(&tr->array_buffer, &total, &entries);
4190  
4191  	return entries;
4192  }
4193  
print_lat_help_header(struct seq_file * m)4194  static void print_lat_help_header(struct seq_file *m)
4195  {
4196  	seq_puts(m, "#                    _------=> CPU#            \n"
4197  		    "#                   / _-----=> irqs-off/BH-disabled\n"
4198  		    "#                  | / _----=> need-resched    \n"
4199  		    "#                  || / _---=> hardirq/softirq \n"
4200  		    "#                  ||| / _--=> preempt-depth   \n"
4201  		    "#                  |||| / _-=> migrate-disable \n"
4202  		    "#                  ||||| /     delay           \n"
4203  		    "#  cmd     pid     |||||| time  |   caller     \n"
4204  		    "#     \\   /        ||||||  \\    |    /       \n");
4205  }
4206  
print_event_info(struct array_buffer * buf,struct seq_file * m)4207  static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4208  {
4209  	unsigned long total;
4210  	unsigned long entries;
4211  
4212  	get_total_entries(buf, &total, &entries);
4213  	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4214  		   entries, total, num_online_cpus());
4215  	seq_puts(m, "#\n");
4216  }
4217  
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4218  static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4219  				   unsigned int flags)
4220  {
4221  	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4222  
4223  	print_event_info(buf, m);
4224  
4225  	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4226  	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4227  }
4228  
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4229  static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4230  				       unsigned int flags)
4231  {
4232  	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4233  	static const char space[] = "            ";
4234  	int prec = tgid ? 12 : 2;
4235  
4236  	print_event_info(buf, m);
4237  
4238  	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4239  	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4240  	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4241  	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4242  	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4243  	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4244  	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4245  	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4246  }
4247  
4248  void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)4249  print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4250  {
4251  	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4252  	struct array_buffer *buf = iter->array_buffer;
4253  	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4254  	struct tracer *type = iter->trace;
4255  	unsigned long entries;
4256  	unsigned long total;
4257  	const char *name = type->name;
4258  
4259  	get_total_entries(buf, &total, &entries);
4260  
4261  	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4262  		   name, UTS_RELEASE);
4263  	seq_puts(m, "# -----------------------------------"
4264  		 "---------------------------------\n");
4265  	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4266  		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4267  		   nsecs_to_usecs(data->saved_latency),
4268  		   entries,
4269  		   total,
4270  		   buf->cpu,
4271  		   preempt_model_none()      ? "server" :
4272  		   preempt_model_voluntary() ? "desktop" :
4273  		   preempt_model_full()      ? "preempt" :
4274  		   preempt_model_rt()        ? "preempt_rt" :
4275  		   "unknown",
4276  		   /* These are reserved for later use */
4277  		   0, 0, 0, 0);
4278  #ifdef CONFIG_SMP
4279  	seq_printf(m, " #P:%d)\n", num_online_cpus());
4280  #else
4281  	seq_puts(m, ")\n");
4282  #endif
4283  	seq_puts(m, "#    -----------------\n");
4284  	seq_printf(m, "#    | task: %.16s-%d "
4285  		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4286  		   data->comm, data->pid,
4287  		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4288  		   data->policy, data->rt_priority);
4289  	seq_puts(m, "#    -----------------\n");
4290  
4291  	if (data->critical_start) {
4292  		seq_puts(m, "#  => started at: ");
4293  		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4294  		trace_print_seq(m, &iter->seq);
4295  		seq_puts(m, "\n#  => ended at:   ");
4296  		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4297  		trace_print_seq(m, &iter->seq);
4298  		seq_puts(m, "\n#\n");
4299  	}
4300  
4301  	seq_puts(m, "#\n");
4302  }
4303  
test_cpu_buff_start(struct trace_iterator * iter)4304  static void test_cpu_buff_start(struct trace_iterator *iter)
4305  {
4306  	struct trace_seq *s = &iter->seq;
4307  	struct trace_array *tr = iter->tr;
4308  
4309  	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4310  		return;
4311  
4312  	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4313  		return;
4314  
4315  	if (cpumask_available(iter->started) &&
4316  	    cpumask_test_cpu(iter->cpu, iter->started))
4317  		return;
4318  
4319  	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4320  		return;
4321  
4322  	if (cpumask_available(iter->started))
4323  		cpumask_set_cpu(iter->cpu, iter->started);
4324  
4325  	/* Don't print started cpu buffer for the first entry of the trace */
4326  	if (iter->idx > 1)
4327  		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4328  				iter->cpu);
4329  }
4330  
print_trace_fmt(struct trace_iterator * iter)4331  static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4332  {
4333  	struct trace_array *tr = iter->tr;
4334  	struct trace_seq *s = &iter->seq;
4335  	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4336  	struct trace_entry *entry;
4337  	struct trace_event *event;
4338  
4339  	entry = iter->ent;
4340  
4341  	test_cpu_buff_start(iter);
4342  
4343  	event = ftrace_find_event(entry->type);
4344  
4345  	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4346  		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4347  			trace_print_lat_context(iter);
4348  		else
4349  			trace_print_context(iter);
4350  	}
4351  
4352  	if (trace_seq_has_overflowed(s))
4353  		return TRACE_TYPE_PARTIAL_LINE;
4354  
4355  	if (event) {
4356  		if (tr->trace_flags & TRACE_ITER_FIELDS)
4357  			return print_event_fields(iter, event);
4358  		return event->funcs->trace(iter, sym_flags, event);
4359  	}
4360  
4361  	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4362  
4363  	return trace_handle_return(s);
4364  }
4365  
print_raw_fmt(struct trace_iterator * iter)4366  static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4367  {
4368  	struct trace_array *tr = iter->tr;
4369  	struct trace_seq *s = &iter->seq;
4370  	struct trace_entry *entry;
4371  	struct trace_event *event;
4372  
4373  	entry = iter->ent;
4374  
4375  	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4376  		trace_seq_printf(s, "%d %d %llu ",
4377  				 entry->pid, iter->cpu, iter->ts);
4378  
4379  	if (trace_seq_has_overflowed(s))
4380  		return TRACE_TYPE_PARTIAL_LINE;
4381  
4382  	event = ftrace_find_event(entry->type);
4383  	if (event)
4384  		return event->funcs->raw(iter, 0, event);
4385  
4386  	trace_seq_printf(s, "%d ?\n", entry->type);
4387  
4388  	return trace_handle_return(s);
4389  }
4390  
print_hex_fmt(struct trace_iterator * iter)4391  static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4392  {
4393  	struct trace_array *tr = iter->tr;
4394  	struct trace_seq *s = &iter->seq;
4395  	unsigned char newline = '\n';
4396  	struct trace_entry *entry;
4397  	struct trace_event *event;
4398  
4399  	entry = iter->ent;
4400  
4401  	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4402  		SEQ_PUT_HEX_FIELD(s, entry->pid);
4403  		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4404  		SEQ_PUT_HEX_FIELD(s, iter->ts);
4405  		if (trace_seq_has_overflowed(s))
4406  			return TRACE_TYPE_PARTIAL_LINE;
4407  	}
4408  
4409  	event = ftrace_find_event(entry->type);
4410  	if (event) {
4411  		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4412  		if (ret != TRACE_TYPE_HANDLED)
4413  			return ret;
4414  	}
4415  
4416  	SEQ_PUT_FIELD(s, newline);
4417  
4418  	return trace_handle_return(s);
4419  }
4420  
print_bin_fmt(struct trace_iterator * iter)4421  static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4422  {
4423  	struct trace_array *tr = iter->tr;
4424  	struct trace_seq *s = &iter->seq;
4425  	struct trace_entry *entry;
4426  	struct trace_event *event;
4427  
4428  	entry = iter->ent;
4429  
4430  	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4431  		SEQ_PUT_FIELD(s, entry->pid);
4432  		SEQ_PUT_FIELD(s, iter->cpu);
4433  		SEQ_PUT_FIELD(s, iter->ts);
4434  		if (trace_seq_has_overflowed(s))
4435  			return TRACE_TYPE_PARTIAL_LINE;
4436  	}
4437  
4438  	event = ftrace_find_event(entry->type);
4439  	return event ? event->funcs->binary(iter, 0, event) :
4440  		TRACE_TYPE_HANDLED;
4441  }
4442  
trace_empty(struct trace_iterator * iter)4443  int trace_empty(struct trace_iterator *iter)
4444  {
4445  	struct ring_buffer_iter *buf_iter;
4446  	int cpu;
4447  
4448  	/* If we are looking at one CPU buffer, only check that one */
4449  	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4450  		cpu = iter->cpu_file;
4451  		buf_iter = trace_buffer_iter(iter, cpu);
4452  		if (buf_iter) {
4453  			if (!ring_buffer_iter_empty(buf_iter))
4454  				return 0;
4455  		} else {
4456  			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4457  				return 0;
4458  		}
4459  		return 1;
4460  	}
4461  
4462  	for_each_tracing_cpu(cpu) {
4463  		buf_iter = trace_buffer_iter(iter, cpu);
4464  		if (buf_iter) {
4465  			if (!ring_buffer_iter_empty(buf_iter))
4466  				return 0;
4467  		} else {
4468  			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4469  				return 0;
4470  		}
4471  	}
4472  
4473  	return 1;
4474  }
4475  
4476  /*  Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4477  enum print_line_t print_trace_line(struct trace_iterator *iter)
4478  {
4479  	struct trace_array *tr = iter->tr;
4480  	unsigned long trace_flags = tr->trace_flags;
4481  	enum print_line_t ret;
4482  
4483  	if (iter->lost_events) {
4484  		if (iter->lost_events == (unsigned long)-1)
4485  			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4486  					 iter->cpu);
4487  		else
4488  			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4489  					 iter->cpu, iter->lost_events);
4490  		if (trace_seq_has_overflowed(&iter->seq))
4491  			return TRACE_TYPE_PARTIAL_LINE;
4492  	}
4493  
4494  	if (iter->trace && iter->trace->print_line) {
4495  		ret = iter->trace->print_line(iter);
4496  		if (ret != TRACE_TYPE_UNHANDLED)
4497  			return ret;
4498  	}
4499  
4500  	if (iter->ent->type == TRACE_BPUTS &&
4501  			trace_flags & TRACE_ITER_PRINTK &&
4502  			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4503  		return trace_print_bputs_msg_only(iter);
4504  
4505  	if (iter->ent->type == TRACE_BPRINT &&
4506  			trace_flags & TRACE_ITER_PRINTK &&
4507  			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4508  		return trace_print_bprintk_msg_only(iter);
4509  
4510  	if (iter->ent->type == TRACE_PRINT &&
4511  			trace_flags & TRACE_ITER_PRINTK &&
4512  			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4513  		return trace_print_printk_msg_only(iter);
4514  
4515  	if (trace_flags & TRACE_ITER_BIN)
4516  		return print_bin_fmt(iter);
4517  
4518  	if (trace_flags & TRACE_ITER_HEX)
4519  		return print_hex_fmt(iter);
4520  
4521  	if (trace_flags & TRACE_ITER_RAW)
4522  		return print_raw_fmt(iter);
4523  
4524  	return print_trace_fmt(iter);
4525  }
4526  
trace_latency_header(struct seq_file * m)4527  void trace_latency_header(struct seq_file *m)
4528  {
4529  	struct trace_iterator *iter = m->private;
4530  	struct trace_array *tr = iter->tr;
4531  
4532  	/* print nothing if the buffers are empty */
4533  	if (trace_empty(iter))
4534  		return;
4535  
4536  	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4537  		print_trace_header(m, iter);
4538  
4539  	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4540  		print_lat_help_header(m);
4541  }
4542  
trace_default_header(struct seq_file * m)4543  void trace_default_header(struct seq_file *m)
4544  {
4545  	struct trace_iterator *iter = m->private;
4546  	struct trace_array *tr = iter->tr;
4547  	unsigned long trace_flags = tr->trace_flags;
4548  
4549  	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4550  		return;
4551  
4552  	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4553  		/* print nothing if the buffers are empty */
4554  		if (trace_empty(iter))
4555  			return;
4556  		print_trace_header(m, iter);
4557  		if (!(trace_flags & TRACE_ITER_VERBOSE))
4558  			print_lat_help_header(m);
4559  	} else {
4560  		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4561  			if (trace_flags & TRACE_ITER_IRQ_INFO)
4562  				print_func_help_header_irq(iter->array_buffer,
4563  							   m, trace_flags);
4564  			else
4565  				print_func_help_header(iter->array_buffer, m,
4566  						       trace_flags);
4567  		}
4568  	}
4569  }
4570  
test_ftrace_alive(struct seq_file * m)4571  static void test_ftrace_alive(struct seq_file *m)
4572  {
4573  	if (!ftrace_is_dead())
4574  		return;
4575  	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4576  		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4577  }
4578  
4579  #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4580  static void show_snapshot_main_help(struct seq_file *m)
4581  {
4582  	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4583  		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4584  		    "#                      Takes a snapshot of the main buffer.\n"
4585  		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4586  		    "#                      (Doesn't have to be '2' works with any number that\n"
4587  		    "#                       is not a '0' or '1')\n");
4588  }
4589  
show_snapshot_percpu_help(struct seq_file * m)4590  static void show_snapshot_percpu_help(struct seq_file *m)
4591  {
4592  	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4593  #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4594  	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4595  		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4596  #else
4597  	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4598  		    "#                     Must use main snapshot file to allocate.\n");
4599  #endif
4600  	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4601  		    "#                      (Doesn't have to be '2' works with any number that\n"
4602  		    "#                       is not a '0' or '1')\n");
4603  }
4604  
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4605  static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4606  {
4607  	if (iter->tr->allocated_snapshot)
4608  		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4609  	else
4610  		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4611  
4612  	seq_puts(m, "# Snapshot commands:\n");
4613  	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4614  		show_snapshot_main_help(m);
4615  	else
4616  		show_snapshot_percpu_help(m);
4617  }
4618  #else
4619  /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4620  static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4621  #endif
4622  
s_show(struct seq_file * m,void * v)4623  static int s_show(struct seq_file *m, void *v)
4624  {
4625  	struct trace_iterator *iter = v;
4626  	int ret;
4627  
4628  	if (iter->ent == NULL) {
4629  		if (iter->tr) {
4630  			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4631  			seq_puts(m, "#\n");
4632  			test_ftrace_alive(m);
4633  		}
4634  		if (iter->snapshot && trace_empty(iter))
4635  			print_snapshot_help(m, iter);
4636  		else if (iter->trace && iter->trace->print_header)
4637  			iter->trace->print_header(m);
4638  		else
4639  			trace_default_header(m);
4640  
4641  	} else if (iter->leftover) {
4642  		/*
4643  		 * If we filled the seq_file buffer earlier, we
4644  		 * want to just show it now.
4645  		 */
4646  		ret = trace_print_seq(m, &iter->seq);
4647  
4648  		/* ret should this time be zero, but you never know */
4649  		iter->leftover = ret;
4650  
4651  	} else {
4652  		ret = print_trace_line(iter);
4653  		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4654  			iter->seq.full = 0;
4655  			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4656  		}
4657  		ret = trace_print_seq(m, &iter->seq);
4658  		/*
4659  		 * If we overflow the seq_file buffer, then it will
4660  		 * ask us for this data again at start up.
4661  		 * Use that instead.
4662  		 *  ret is 0 if seq_file write succeeded.
4663  		 *        -1 otherwise.
4664  		 */
4665  		iter->leftover = ret;
4666  	}
4667  
4668  	return 0;
4669  }
4670  
4671  /*
4672   * Should be used after trace_array_get(), trace_types_lock
4673   * ensures that i_cdev was already initialized.
4674   */
tracing_get_cpu(struct inode * inode)4675  static inline int tracing_get_cpu(struct inode *inode)
4676  {
4677  	if (inode->i_cdev) /* See trace_create_cpu_file() */
4678  		return (long)inode->i_cdev - 1;
4679  	return RING_BUFFER_ALL_CPUS;
4680  }
4681  
4682  static const struct seq_operations tracer_seq_ops = {
4683  	.start		= s_start,
4684  	.next		= s_next,
4685  	.stop		= s_stop,
4686  	.show		= s_show,
4687  };
4688  
4689  /*
4690   * Note, as iter itself can be allocated and freed in different
4691   * ways, this function is only used to free its content, and not
4692   * the iterator itself. The only requirement to all the allocations
4693   * is that it must zero all fields (kzalloc), as freeing works with
4694   * ethier allocated content or NULL.
4695   */
free_trace_iter_content(struct trace_iterator * iter)4696  static void free_trace_iter_content(struct trace_iterator *iter)
4697  {
4698  	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4699  	if (iter->fmt != static_fmt_buf)
4700  		kfree(iter->fmt);
4701  
4702  	kfree(iter->temp);
4703  	kfree(iter->buffer_iter);
4704  	mutex_destroy(&iter->mutex);
4705  	free_cpumask_var(iter->started);
4706  }
4707  
4708  static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4709  __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4710  {
4711  	struct trace_array *tr = inode->i_private;
4712  	struct trace_iterator *iter;
4713  	int cpu;
4714  
4715  	if (tracing_disabled)
4716  		return ERR_PTR(-ENODEV);
4717  
4718  	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4719  	if (!iter)
4720  		return ERR_PTR(-ENOMEM);
4721  
4722  	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4723  				    GFP_KERNEL);
4724  	if (!iter->buffer_iter)
4725  		goto release;
4726  
4727  	/*
4728  	 * trace_find_next_entry() may need to save off iter->ent.
4729  	 * It will place it into the iter->temp buffer. As most
4730  	 * events are less than 128, allocate a buffer of that size.
4731  	 * If one is greater, then trace_find_next_entry() will
4732  	 * allocate a new buffer to adjust for the bigger iter->ent.
4733  	 * It's not critical if it fails to get allocated here.
4734  	 */
4735  	iter->temp = kmalloc(128, GFP_KERNEL);
4736  	if (iter->temp)
4737  		iter->temp_size = 128;
4738  
4739  	/*
4740  	 * trace_event_printf() may need to modify given format
4741  	 * string to replace %p with %px so that it shows real address
4742  	 * instead of hash value. However, that is only for the event
4743  	 * tracing, other tracer may not need. Defer the allocation
4744  	 * until it is needed.
4745  	 */
4746  	iter->fmt = NULL;
4747  	iter->fmt_size = 0;
4748  
4749  	mutex_lock(&trace_types_lock);
4750  	iter->trace = tr->current_trace;
4751  
4752  	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4753  		goto fail;
4754  
4755  	iter->tr = tr;
4756  
4757  #ifdef CONFIG_TRACER_MAX_TRACE
4758  	/* Currently only the top directory has a snapshot */
4759  	if (tr->current_trace->print_max || snapshot)
4760  		iter->array_buffer = &tr->max_buffer;
4761  	else
4762  #endif
4763  		iter->array_buffer = &tr->array_buffer;
4764  	iter->snapshot = snapshot;
4765  	iter->pos = -1;
4766  	iter->cpu_file = tracing_get_cpu(inode);
4767  	mutex_init(&iter->mutex);
4768  
4769  	/* Notify the tracer early; before we stop tracing. */
4770  	if (iter->trace->open)
4771  		iter->trace->open(iter);
4772  
4773  	/* Annotate start of buffers if we had overruns */
4774  	if (ring_buffer_overruns(iter->array_buffer->buffer))
4775  		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4776  
4777  	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4778  	if (trace_clocks[tr->clock_id].in_ns)
4779  		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4780  
4781  	/*
4782  	 * If pause-on-trace is enabled, then stop the trace while
4783  	 * dumping, unless this is the "snapshot" file
4784  	 */
4785  	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4786  		tracing_stop_tr(tr);
4787  
4788  	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4789  		for_each_tracing_cpu(cpu) {
4790  			iter->buffer_iter[cpu] =
4791  				ring_buffer_read_prepare(iter->array_buffer->buffer,
4792  							 cpu, GFP_KERNEL);
4793  		}
4794  		ring_buffer_read_prepare_sync();
4795  		for_each_tracing_cpu(cpu) {
4796  			ring_buffer_read_start(iter->buffer_iter[cpu]);
4797  			tracing_iter_reset(iter, cpu);
4798  		}
4799  	} else {
4800  		cpu = iter->cpu_file;
4801  		iter->buffer_iter[cpu] =
4802  			ring_buffer_read_prepare(iter->array_buffer->buffer,
4803  						 cpu, GFP_KERNEL);
4804  		ring_buffer_read_prepare_sync();
4805  		ring_buffer_read_start(iter->buffer_iter[cpu]);
4806  		tracing_iter_reset(iter, cpu);
4807  	}
4808  
4809  	mutex_unlock(&trace_types_lock);
4810  
4811  	return iter;
4812  
4813   fail:
4814  	mutex_unlock(&trace_types_lock);
4815  	free_trace_iter_content(iter);
4816  release:
4817  	seq_release_private(inode, file);
4818  	return ERR_PTR(-ENOMEM);
4819  }
4820  
tracing_open_generic(struct inode * inode,struct file * filp)4821  int tracing_open_generic(struct inode *inode, struct file *filp)
4822  {
4823  	int ret;
4824  
4825  	ret = tracing_check_open_get_tr(NULL);
4826  	if (ret)
4827  		return ret;
4828  
4829  	filp->private_data = inode->i_private;
4830  	return 0;
4831  }
4832  
tracing_is_disabled(void)4833  bool tracing_is_disabled(void)
4834  {
4835  	return (tracing_disabled) ? true: false;
4836  }
4837  
4838  /*
4839   * Open and update trace_array ref count.
4840   * Must have the current trace_array passed to it.
4841   */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4842  int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4843  {
4844  	struct trace_array *tr = inode->i_private;
4845  	int ret;
4846  
4847  	ret = tracing_check_open_get_tr(tr);
4848  	if (ret)
4849  		return ret;
4850  
4851  	filp->private_data = inode->i_private;
4852  
4853  	return 0;
4854  }
4855  
4856  /*
4857   * The private pointer of the inode is the trace_event_file.
4858   * Update the tr ref count associated to it.
4859   */
tracing_open_file_tr(struct inode * inode,struct file * filp)4860  int tracing_open_file_tr(struct inode *inode, struct file *filp)
4861  {
4862  	struct trace_event_file *file = inode->i_private;
4863  	int ret;
4864  
4865  	ret = tracing_check_open_get_tr(file->tr);
4866  	if (ret)
4867  		return ret;
4868  
4869  	mutex_lock(&event_mutex);
4870  
4871  	/* Fail if the file is marked for removal */
4872  	if (file->flags & EVENT_FILE_FL_FREED) {
4873  		trace_array_put(file->tr);
4874  		ret = -ENODEV;
4875  	} else {
4876  		event_file_get(file);
4877  	}
4878  
4879  	mutex_unlock(&event_mutex);
4880  	if (ret)
4881  		return ret;
4882  
4883  	filp->private_data = inode->i_private;
4884  
4885  	return 0;
4886  }
4887  
tracing_release_file_tr(struct inode * inode,struct file * filp)4888  int tracing_release_file_tr(struct inode *inode, struct file *filp)
4889  {
4890  	struct trace_event_file *file = inode->i_private;
4891  
4892  	trace_array_put(file->tr);
4893  	event_file_put(file);
4894  
4895  	return 0;
4896  }
4897  
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4898  int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4899  {
4900  	tracing_release_file_tr(inode, filp);
4901  	return single_release(inode, filp);
4902  }
4903  
tracing_mark_open(struct inode * inode,struct file * filp)4904  static int tracing_mark_open(struct inode *inode, struct file *filp)
4905  {
4906  	stream_open(inode, filp);
4907  	return tracing_open_generic_tr(inode, filp);
4908  }
4909  
tracing_release(struct inode * inode,struct file * file)4910  static int tracing_release(struct inode *inode, struct file *file)
4911  {
4912  	struct trace_array *tr = inode->i_private;
4913  	struct seq_file *m = file->private_data;
4914  	struct trace_iterator *iter;
4915  	int cpu;
4916  
4917  	if (!(file->f_mode & FMODE_READ)) {
4918  		trace_array_put(tr);
4919  		return 0;
4920  	}
4921  
4922  	/* Writes do not use seq_file */
4923  	iter = m->private;
4924  	mutex_lock(&trace_types_lock);
4925  
4926  	for_each_tracing_cpu(cpu) {
4927  		if (iter->buffer_iter[cpu])
4928  			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4929  	}
4930  
4931  	if (iter->trace && iter->trace->close)
4932  		iter->trace->close(iter);
4933  
4934  	if (!iter->snapshot && tr->stop_count)
4935  		/* reenable tracing if it was previously enabled */
4936  		tracing_start_tr(tr);
4937  
4938  	__trace_array_put(tr);
4939  
4940  	mutex_unlock(&trace_types_lock);
4941  
4942  	free_trace_iter_content(iter);
4943  	seq_release_private(inode, file);
4944  
4945  	return 0;
4946  }
4947  
tracing_release_generic_tr(struct inode * inode,struct file * file)4948  static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4949  {
4950  	struct trace_array *tr = inode->i_private;
4951  
4952  	trace_array_put(tr);
4953  	return 0;
4954  }
4955  
tracing_single_release_tr(struct inode * inode,struct file * file)4956  static int tracing_single_release_tr(struct inode *inode, struct file *file)
4957  {
4958  	struct trace_array *tr = inode->i_private;
4959  
4960  	trace_array_put(tr);
4961  
4962  	return single_release(inode, file);
4963  }
4964  
tracing_open(struct inode * inode,struct file * file)4965  static int tracing_open(struct inode *inode, struct file *file)
4966  {
4967  	struct trace_array *tr = inode->i_private;
4968  	struct trace_iterator *iter;
4969  	int ret;
4970  
4971  	ret = tracing_check_open_get_tr(tr);
4972  	if (ret)
4973  		return ret;
4974  
4975  	/* If this file was open for write, then erase contents */
4976  	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4977  		int cpu = tracing_get_cpu(inode);
4978  		struct array_buffer *trace_buf = &tr->array_buffer;
4979  
4980  #ifdef CONFIG_TRACER_MAX_TRACE
4981  		if (tr->current_trace->print_max)
4982  			trace_buf = &tr->max_buffer;
4983  #endif
4984  
4985  		if (cpu == RING_BUFFER_ALL_CPUS)
4986  			tracing_reset_online_cpus(trace_buf);
4987  		else
4988  			tracing_reset_cpu(trace_buf, cpu);
4989  	}
4990  
4991  	if (file->f_mode & FMODE_READ) {
4992  		iter = __tracing_open(inode, file, false);
4993  		if (IS_ERR(iter))
4994  			ret = PTR_ERR(iter);
4995  		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4996  			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4997  	}
4998  
4999  	if (ret < 0)
5000  		trace_array_put(tr);
5001  
5002  	return ret;
5003  }
5004  
5005  /*
5006   * Some tracers are not suitable for instance buffers.
5007   * A tracer is always available for the global array (toplevel)
5008   * or if it explicitly states that it is.
5009   */
5010  static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)5011  trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5012  {
5013  	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5014  }
5015  
5016  /* Find the next tracer that this trace array may use */
5017  static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)5018  get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5019  {
5020  	while (t && !trace_ok_for_array(t, tr))
5021  		t = t->next;
5022  
5023  	return t;
5024  }
5025  
5026  static void *
t_next(struct seq_file * m,void * v,loff_t * pos)5027  t_next(struct seq_file *m, void *v, loff_t *pos)
5028  {
5029  	struct trace_array *tr = m->private;
5030  	struct tracer *t = v;
5031  
5032  	(*pos)++;
5033  
5034  	if (t)
5035  		t = get_tracer_for_array(tr, t->next);
5036  
5037  	return t;
5038  }
5039  
t_start(struct seq_file * m,loff_t * pos)5040  static void *t_start(struct seq_file *m, loff_t *pos)
5041  {
5042  	struct trace_array *tr = m->private;
5043  	struct tracer *t;
5044  	loff_t l = 0;
5045  
5046  	mutex_lock(&trace_types_lock);
5047  
5048  	t = get_tracer_for_array(tr, trace_types);
5049  	for (; t && l < *pos; t = t_next(m, t, &l))
5050  			;
5051  
5052  	return t;
5053  }
5054  
t_stop(struct seq_file * m,void * p)5055  static void t_stop(struct seq_file *m, void *p)
5056  {
5057  	mutex_unlock(&trace_types_lock);
5058  }
5059  
t_show(struct seq_file * m,void * v)5060  static int t_show(struct seq_file *m, void *v)
5061  {
5062  	struct tracer *t = v;
5063  
5064  	if (!t)
5065  		return 0;
5066  
5067  	seq_puts(m, t->name);
5068  	if (t->next)
5069  		seq_putc(m, ' ');
5070  	else
5071  		seq_putc(m, '\n');
5072  
5073  	return 0;
5074  }
5075  
5076  static const struct seq_operations show_traces_seq_ops = {
5077  	.start		= t_start,
5078  	.next		= t_next,
5079  	.stop		= t_stop,
5080  	.show		= t_show,
5081  };
5082  
show_traces_open(struct inode * inode,struct file * file)5083  static int show_traces_open(struct inode *inode, struct file *file)
5084  {
5085  	struct trace_array *tr = inode->i_private;
5086  	struct seq_file *m;
5087  	int ret;
5088  
5089  	ret = tracing_check_open_get_tr(tr);
5090  	if (ret)
5091  		return ret;
5092  
5093  	ret = seq_open(file, &show_traces_seq_ops);
5094  	if (ret) {
5095  		trace_array_put(tr);
5096  		return ret;
5097  	}
5098  
5099  	m = file->private_data;
5100  	m->private = tr;
5101  
5102  	return 0;
5103  }
5104  
show_traces_release(struct inode * inode,struct file * file)5105  static int show_traces_release(struct inode *inode, struct file *file)
5106  {
5107  	struct trace_array *tr = inode->i_private;
5108  
5109  	trace_array_put(tr);
5110  	return seq_release(inode, file);
5111  }
5112  
5113  static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5114  tracing_write_stub(struct file *filp, const char __user *ubuf,
5115  		   size_t count, loff_t *ppos)
5116  {
5117  	return count;
5118  }
5119  
tracing_lseek(struct file * file,loff_t offset,int whence)5120  loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5121  {
5122  	int ret;
5123  
5124  	if (file->f_mode & FMODE_READ)
5125  		ret = seq_lseek(file, offset, whence);
5126  	else
5127  		file->f_pos = ret = 0;
5128  
5129  	return ret;
5130  }
5131  
5132  static const struct file_operations tracing_fops = {
5133  	.open		= tracing_open,
5134  	.read		= seq_read,
5135  	.read_iter	= seq_read_iter,
5136  	.splice_read	= copy_splice_read,
5137  	.write		= tracing_write_stub,
5138  	.llseek		= tracing_lseek,
5139  	.release	= tracing_release,
5140  };
5141  
5142  static const struct file_operations show_traces_fops = {
5143  	.open		= show_traces_open,
5144  	.read		= seq_read,
5145  	.llseek		= seq_lseek,
5146  	.release	= show_traces_release,
5147  };
5148  
5149  static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5150  tracing_cpumask_read(struct file *filp, char __user *ubuf,
5151  		     size_t count, loff_t *ppos)
5152  {
5153  	struct trace_array *tr = file_inode(filp)->i_private;
5154  	char *mask_str;
5155  	int len;
5156  
5157  	len = snprintf(NULL, 0, "%*pb\n",
5158  		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5159  	mask_str = kmalloc(len, GFP_KERNEL);
5160  	if (!mask_str)
5161  		return -ENOMEM;
5162  
5163  	len = snprintf(mask_str, len, "%*pb\n",
5164  		       cpumask_pr_args(tr->tracing_cpumask));
5165  	if (len >= count) {
5166  		count = -EINVAL;
5167  		goto out_err;
5168  	}
5169  	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5170  
5171  out_err:
5172  	kfree(mask_str);
5173  
5174  	return count;
5175  }
5176  
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)5177  int tracing_set_cpumask(struct trace_array *tr,
5178  			cpumask_var_t tracing_cpumask_new)
5179  {
5180  	int cpu;
5181  
5182  	if (!tr)
5183  		return -EINVAL;
5184  
5185  	local_irq_disable();
5186  	arch_spin_lock(&tr->max_lock);
5187  	for_each_tracing_cpu(cpu) {
5188  		/*
5189  		 * Increase/decrease the disabled counter if we are
5190  		 * about to flip a bit in the cpumask:
5191  		 */
5192  		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5193  				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5194  			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5195  			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5196  #ifdef CONFIG_TRACER_MAX_TRACE
5197  			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5198  #endif
5199  		}
5200  		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5201  				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5202  			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5203  			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5204  #ifdef CONFIG_TRACER_MAX_TRACE
5205  			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5206  #endif
5207  		}
5208  	}
5209  	arch_spin_unlock(&tr->max_lock);
5210  	local_irq_enable();
5211  
5212  	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5213  
5214  	return 0;
5215  }
5216  
5217  static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5218  tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5219  		      size_t count, loff_t *ppos)
5220  {
5221  	struct trace_array *tr = file_inode(filp)->i_private;
5222  	cpumask_var_t tracing_cpumask_new;
5223  	int err;
5224  
5225  	if (count == 0 || count > KMALLOC_MAX_SIZE)
5226  		return -EINVAL;
5227  
5228  	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5229  		return -ENOMEM;
5230  
5231  	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5232  	if (err)
5233  		goto err_free;
5234  
5235  	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5236  	if (err)
5237  		goto err_free;
5238  
5239  	free_cpumask_var(tracing_cpumask_new);
5240  
5241  	return count;
5242  
5243  err_free:
5244  	free_cpumask_var(tracing_cpumask_new);
5245  
5246  	return err;
5247  }
5248  
5249  static const struct file_operations tracing_cpumask_fops = {
5250  	.open		= tracing_open_generic_tr,
5251  	.read		= tracing_cpumask_read,
5252  	.write		= tracing_cpumask_write,
5253  	.release	= tracing_release_generic_tr,
5254  	.llseek		= generic_file_llseek,
5255  };
5256  
tracing_trace_options_show(struct seq_file * m,void * v)5257  static int tracing_trace_options_show(struct seq_file *m, void *v)
5258  {
5259  	struct tracer_opt *trace_opts;
5260  	struct trace_array *tr = m->private;
5261  	u32 tracer_flags;
5262  	int i;
5263  
5264  	mutex_lock(&trace_types_lock);
5265  	tracer_flags = tr->current_trace->flags->val;
5266  	trace_opts = tr->current_trace->flags->opts;
5267  
5268  	for (i = 0; trace_options[i]; i++) {
5269  		if (tr->trace_flags & (1 << i))
5270  			seq_printf(m, "%s\n", trace_options[i]);
5271  		else
5272  			seq_printf(m, "no%s\n", trace_options[i]);
5273  	}
5274  
5275  	for (i = 0; trace_opts[i].name; i++) {
5276  		if (tracer_flags & trace_opts[i].bit)
5277  			seq_printf(m, "%s\n", trace_opts[i].name);
5278  		else
5279  			seq_printf(m, "no%s\n", trace_opts[i].name);
5280  	}
5281  	mutex_unlock(&trace_types_lock);
5282  
5283  	return 0;
5284  }
5285  
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)5286  static int __set_tracer_option(struct trace_array *tr,
5287  			       struct tracer_flags *tracer_flags,
5288  			       struct tracer_opt *opts, int neg)
5289  {
5290  	struct tracer *trace = tracer_flags->trace;
5291  	int ret;
5292  
5293  	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5294  	if (ret)
5295  		return ret;
5296  
5297  	if (neg)
5298  		tracer_flags->val &= ~opts->bit;
5299  	else
5300  		tracer_flags->val |= opts->bit;
5301  	return 0;
5302  }
5303  
5304  /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)5305  static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5306  {
5307  	struct tracer *trace = tr->current_trace;
5308  	struct tracer_flags *tracer_flags = trace->flags;
5309  	struct tracer_opt *opts = NULL;
5310  	int i;
5311  
5312  	for (i = 0; tracer_flags->opts[i].name; i++) {
5313  		opts = &tracer_flags->opts[i];
5314  
5315  		if (strcmp(cmp, opts->name) == 0)
5316  			return __set_tracer_option(tr, trace->flags, opts, neg);
5317  	}
5318  
5319  	return -EINVAL;
5320  }
5321  
5322  /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)5323  int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5324  {
5325  	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5326  		return -1;
5327  
5328  	return 0;
5329  }
5330  
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)5331  int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5332  {
5333  	int *map;
5334  
5335  	if ((mask == TRACE_ITER_RECORD_TGID) ||
5336  	    (mask == TRACE_ITER_RECORD_CMD))
5337  		lockdep_assert_held(&event_mutex);
5338  
5339  	/* do nothing if flag is already set */
5340  	if (!!(tr->trace_flags & mask) == !!enabled)
5341  		return 0;
5342  
5343  	/* Give the tracer a chance to approve the change */
5344  	if (tr->current_trace->flag_changed)
5345  		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5346  			return -EINVAL;
5347  
5348  	if (enabled)
5349  		tr->trace_flags |= mask;
5350  	else
5351  		tr->trace_flags &= ~mask;
5352  
5353  	if (mask == TRACE_ITER_RECORD_CMD)
5354  		trace_event_enable_cmd_record(enabled);
5355  
5356  	if (mask == TRACE_ITER_RECORD_TGID) {
5357  		if (!tgid_map) {
5358  			tgid_map_max = pid_max;
5359  			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5360  				       GFP_KERNEL);
5361  
5362  			/*
5363  			 * Pairs with smp_load_acquire() in
5364  			 * trace_find_tgid_ptr() to ensure that if it observes
5365  			 * the tgid_map we just allocated then it also observes
5366  			 * the corresponding tgid_map_max value.
5367  			 */
5368  			smp_store_release(&tgid_map, map);
5369  		}
5370  		if (!tgid_map) {
5371  			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5372  			return -ENOMEM;
5373  		}
5374  
5375  		trace_event_enable_tgid_record(enabled);
5376  	}
5377  
5378  	if (mask == TRACE_ITER_EVENT_FORK)
5379  		trace_event_follow_fork(tr, enabled);
5380  
5381  	if (mask == TRACE_ITER_FUNC_FORK)
5382  		ftrace_pid_follow_fork(tr, enabled);
5383  
5384  	if (mask == TRACE_ITER_OVERWRITE) {
5385  		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5386  #ifdef CONFIG_TRACER_MAX_TRACE
5387  		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5388  #endif
5389  	}
5390  
5391  	if (mask == TRACE_ITER_PRINTK) {
5392  		trace_printk_start_stop_comm(enabled);
5393  		trace_printk_control(enabled);
5394  	}
5395  
5396  	return 0;
5397  }
5398  
trace_set_options(struct trace_array * tr,char * option)5399  int trace_set_options(struct trace_array *tr, char *option)
5400  {
5401  	char *cmp;
5402  	int neg = 0;
5403  	int ret;
5404  	size_t orig_len = strlen(option);
5405  	int len;
5406  
5407  	cmp = strstrip(option);
5408  
5409  	len = str_has_prefix(cmp, "no");
5410  	if (len)
5411  		neg = 1;
5412  
5413  	cmp += len;
5414  
5415  	mutex_lock(&event_mutex);
5416  	mutex_lock(&trace_types_lock);
5417  
5418  	ret = match_string(trace_options, -1, cmp);
5419  	/* If no option could be set, test the specific tracer options */
5420  	if (ret < 0)
5421  		ret = set_tracer_option(tr, cmp, neg);
5422  	else
5423  		ret = set_tracer_flag(tr, 1 << ret, !neg);
5424  
5425  	mutex_unlock(&trace_types_lock);
5426  	mutex_unlock(&event_mutex);
5427  
5428  	/*
5429  	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5430  	 * turn it back into a space.
5431  	 */
5432  	if (orig_len > strlen(option))
5433  		option[strlen(option)] = ' ';
5434  
5435  	return ret;
5436  }
5437  
apply_trace_boot_options(void)5438  static void __init apply_trace_boot_options(void)
5439  {
5440  	char *buf = trace_boot_options_buf;
5441  	char *option;
5442  
5443  	while (true) {
5444  		option = strsep(&buf, ",");
5445  
5446  		if (!option)
5447  			break;
5448  
5449  		if (*option)
5450  			trace_set_options(&global_trace, option);
5451  
5452  		/* Put back the comma to allow this to be called again */
5453  		if (buf)
5454  			*(buf - 1) = ',';
5455  	}
5456  }
5457  
5458  static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5459  tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5460  			size_t cnt, loff_t *ppos)
5461  {
5462  	struct seq_file *m = filp->private_data;
5463  	struct trace_array *tr = m->private;
5464  	char buf[64];
5465  	int ret;
5466  
5467  	if (cnt >= sizeof(buf))
5468  		return -EINVAL;
5469  
5470  	if (copy_from_user(buf, ubuf, cnt))
5471  		return -EFAULT;
5472  
5473  	buf[cnt] = 0;
5474  
5475  	ret = trace_set_options(tr, buf);
5476  	if (ret < 0)
5477  		return ret;
5478  
5479  	*ppos += cnt;
5480  
5481  	return cnt;
5482  }
5483  
tracing_trace_options_open(struct inode * inode,struct file * file)5484  static int tracing_trace_options_open(struct inode *inode, struct file *file)
5485  {
5486  	struct trace_array *tr = inode->i_private;
5487  	int ret;
5488  
5489  	ret = tracing_check_open_get_tr(tr);
5490  	if (ret)
5491  		return ret;
5492  
5493  	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5494  	if (ret < 0)
5495  		trace_array_put(tr);
5496  
5497  	return ret;
5498  }
5499  
5500  static const struct file_operations tracing_iter_fops = {
5501  	.open		= tracing_trace_options_open,
5502  	.read		= seq_read,
5503  	.llseek		= seq_lseek,
5504  	.release	= tracing_single_release_tr,
5505  	.write		= tracing_trace_options_write,
5506  };
5507  
5508  static const char readme_msg[] =
5509  	"tracing mini-HOWTO:\n\n"
5510  	"# echo 0 > tracing_on : quick way to disable tracing\n"
5511  	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5512  	" Important files:\n"
5513  	"  trace\t\t\t- The static contents of the buffer\n"
5514  	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5515  	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5516  	"  current_tracer\t- function and latency tracers\n"
5517  	"  available_tracers\t- list of configured tracers for current_tracer\n"
5518  	"  error_log\t- error log for failed commands (that support it)\n"
5519  	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5520  	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5521  	"  trace_clock\t\t- change the clock used to order events\n"
5522  	"       local:   Per cpu clock but may not be synced across CPUs\n"
5523  	"      global:   Synced across CPUs but slows tracing down.\n"
5524  	"     counter:   Not a clock, but just an increment\n"
5525  	"      uptime:   Jiffy counter from time of boot\n"
5526  	"        perf:   Same clock that perf events use\n"
5527  #ifdef CONFIG_X86_64
5528  	"     x86-tsc:   TSC cycle counter\n"
5529  #endif
5530  	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5531  	"       delta:   Delta difference against a buffer-wide timestamp\n"
5532  	"    absolute:   Absolute (standalone) timestamp\n"
5533  	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5534  	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5535  	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5536  	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5537  	"\t\t\t  Remove sub-buffer with rmdir\n"
5538  	"  trace_options\t\t- Set format or modify how tracing happens\n"
5539  	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5540  	"\t\t\t  option name\n"
5541  	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5542  #ifdef CONFIG_DYNAMIC_FTRACE
5543  	"\n  available_filter_functions - list of functions that can be filtered on\n"
5544  	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5545  	"\t\t\t  functions\n"
5546  	"\t     accepts: func_full_name or glob-matching-pattern\n"
5547  	"\t     modules: Can select a group via module\n"
5548  	"\t      Format: :mod:<module-name>\n"
5549  	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5550  	"\t    triggers: a command to perform when function is hit\n"
5551  	"\t      Format: <function>:<trigger>[:count]\n"
5552  	"\t     trigger: traceon, traceoff\n"
5553  	"\t\t      enable_event:<system>:<event>\n"
5554  	"\t\t      disable_event:<system>:<event>\n"
5555  #ifdef CONFIG_STACKTRACE
5556  	"\t\t      stacktrace\n"
5557  #endif
5558  #ifdef CONFIG_TRACER_SNAPSHOT
5559  	"\t\t      snapshot\n"
5560  #endif
5561  	"\t\t      dump\n"
5562  	"\t\t      cpudump\n"
5563  	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5564  	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5565  	"\t     The first one will disable tracing every time do_fault is hit\n"
5566  	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5567  	"\t       The first time do trap is hit and it disables tracing, the\n"
5568  	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5569  	"\t       the counter will not decrement. It only decrements when the\n"
5570  	"\t       trigger did work\n"
5571  	"\t     To remove trigger without count:\n"
5572  	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5573  	"\t     To remove trigger with a count:\n"
5574  	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5575  	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5576  	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5577  	"\t    modules: Can select a group via module command :mod:\n"
5578  	"\t    Does not accept triggers\n"
5579  #endif /* CONFIG_DYNAMIC_FTRACE */
5580  #ifdef CONFIG_FUNCTION_TRACER
5581  	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5582  	"\t\t    (function)\n"
5583  	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5584  	"\t\t    (function)\n"
5585  #endif
5586  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5587  	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5588  	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5589  	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5590  #endif
5591  #ifdef CONFIG_TRACER_SNAPSHOT
5592  	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5593  	"\t\t\t  snapshot buffer. Read the contents for more\n"
5594  	"\t\t\t  information\n"
5595  #endif
5596  #ifdef CONFIG_STACK_TRACER
5597  	"  stack_trace\t\t- Shows the max stack trace when active\n"
5598  	"  stack_max_size\t- Shows current max stack size that was traced\n"
5599  	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5600  	"\t\t\t  new trace)\n"
5601  #ifdef CONFIG_DYNAMIC_FTRACE
5602  	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5603  	"\t\t\t  traces\n"
5604  #endif
5605  #endif /* CONFIG_STACK_TRACER */
5606  #ifdef CONFIG_DYNAMIC_EVENTS
5607  	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5608  	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5609  #endif
5610  #ifdef CONFIG_KPROBE_EVENTS
5611  	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5612  	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5613  #endif
5614  #ifdef CONFIG_UPROBE_EVENTS
5615  	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5616  	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5617  #endif
5618  #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5619      defined(CONFIG_FPROBE_EVENTS)
5620  	"\t  accepts: event-definitions (one definition per line)\n"
5621  #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5622  	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5623  	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5624  #endif
5625  #ifdef CONFIG_FPROBE_EVENTS
5626  	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5627  	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5628  #endif
5629  #ifdef CONFIG_HIST_TRIGGERS
5630  	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5631  #endif
5632  	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5633  	"\t           -:[<group>/][<event>]\n"
5634  #ifdef CONFIG_KPROBE_EVENTS
5635  	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5636    "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5637  #endif
5638  #ifdef CONFIG_UPROBE_EVENTS
5639    "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5640  #endif
5641  	"\t     args: <name>=fetcharg[:type]\n"
5642  	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5643  #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5644  #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5645  	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5646  	"\t           <argname>[->field[->field|.field...]],\n"
5647  #else
5648  	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5649  #endif
5650  #else
5651  	"\t           $stack<index>, $stack, $retval, $comm,\n"
5652  #endif
5653  	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5654  	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5655  	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5656  	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5657  	"\t           symstr, <type>\\[<array-size>\\]\n"
5658  #ifdef CONFIG_HIST_TRIGGERS
5659  	"\t    field: <stype> <name>;\n"
5660  	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5661  	"\t           [unsigned] char/int/long\n"
5662  #endif
5663  	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5664  	"\t            of the <attached-group>/<attached-event>.\n"
5665  #endif
5666  	"  events/\t\t- Directory containing all trace event subsystems:\n"
5667  	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5668  	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5669  	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5670  	"\t\t\t  events\n"
5671  	"      filter\t\t- If set, only events passing filter are traced\n"
5672  	"  events/<system>/<event>/\t- Directory containing control files for\n"
5673  	"\t\t\t  <event>:\n"
5674  	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5675  	"      filter\t\t- If set, only events passing filter are traced\n"
5676  	"      trigger\t\t- If set, a command to perform when event is hit\n"
5677  	"\t    Format: <trigger>[:count][if <filter>]\n"
5678  	"\t   trigger: traceon, traceoff\n"
5679  	"\t            enable_event:<system>:<event>\n"
5680  	"\t            disable_event:<system>:<event>\n"
5681  #ifdef CONFIG_HIST_TRIGGERS
5682  	"\t            enable_hist:<system>:<event>\n"
5683  	"\t            disable_hist:<system>:<event>\n"
5684  #endif
5685  #ifdef CONFIG_STACKTRACE
5686  	"\t\t    stacktrace\n"
5687  #endif
5688  #ifdef CONFIG_TRACER_SNAPSHOT
5689  	"\t\t    snapshot\n"
5690  #endif
5691  #ifdef CONFIG_HIST_TRIGGERS
5692  	"\t\t    hist (see below)\n"
5693  #endif
5694  	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5695  	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5696  	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5697  	"\t                  events/block/block_unplug/trigger\n"
5698  	"\t   The first disables tracing every time block_unplug is hit.\n"
5699  	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5700  	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5701  	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5702  	"\t   Like function triggers, the counter is only decremented if it\n"
5703  	"\t    enabled or disabled tracing.\n"
5704  	"\t   To remove a trigger without a count:\n"
5705  	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5706  	"\t   To remove a trigger with a count:\n"
5707  	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5708  	"\t   Filters can be ignored when removing a trigger.\n"
5709  #ifdef CONFIG_HIST_TRIGGERS
5710  	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5711  	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5712  	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5713  	"\t            [:values=<field1[,field2,...]>]\n"
5714  	"\t            [:sort=<field1[,field2,...]>]\n"
5715  	"\t            [:size=#entries]\n"
5716  	"\t            [:pause][:continue][:clear]\n"
5717  	"\t            [:name=histname1]\n"
5718  	"\t            [:nohitcount]\n"
5719  	"\t            [:<handler>.<action>]\n"
5720  	"\t            [if <filter>]\n\n"
5721  	"\t    Note, special fields can be used as well:\n"
5722  	"\t            common_timestamp - to record current timestamp\n"
5723  	"\t            common_cpu - to record the CPU the event happened on\n"
5724  	"\n"
5725  	"\t    A hist trigger variable can be:\n"
5726  	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5727  	"\t        - a reference to another variable e.g. y=$x,\n"
5728  	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5729  	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5730  	"\n"
5731  	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5732  	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5733  	"\t    variable reference, field or numeric literal.\n"
5734  	"\n"
5735  	"\t    When a matching event is hit, an entry is added to a hash\n"
5736  	"\t    table using the key(s) and value(s) named, and the value of a\n"
5737  	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5738  	"\t    correspond to fields in the event's format description.  Keys\n"
5739  	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5740  	"\t    Compound keys consisting of up to two fields can be specified\n"
5741  	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5742  	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5743  	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5744  	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5745  	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5746  	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5747  	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5748  	"\t    its histogram data will be shared with other triggers of the\n"
5749  	"\t    same name, and trigger hits will update this common data.\n\n"
5750  	"\t    Reading the 'hist' file for the event will dump the hash\n"
5751  	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5752  	"\t    triggers attached to an event, there will be a table for each\n"
5753  	"\t    trigger in the output.  The table displayed for a named\n"
5754  	"\t    trigger will be the same as any other instance having the\n"
5755  	"\t    same name.  The default format used to display a given field\n"
5756  	"\t    can be modified by appending any of the following modifiers\n"
5757  	"\t    to the field name, as applicable:\n\n"
5758  	"\t            .hex        display a number as a hex value\n"
5759  	"\t            .sym        display an address as a symbol\n"
5760  	"\t            .sym-offset display an address as a symbol and offset\n"
5761  	"\t            .execname   display a common_pid as a program name\n"
5762  	"\t            .syscall    display a syscall id as a syscall name\n"
5763  	"\t            .log2       display log2 value rather than raw number\n"
5764  	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5765  	"\t            .usecs      display a common_timestamp in microseconds\n"
5766  	"\t            .percent    display a number of percentage value\n"
5767  	"\t            .graph      display a bar-graph of a value\n\n"
5768  	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5769  	"\t    trigger or to start a hist trigger but not log any events\n"
5770  	"\t    until told to do so.  'continue' can be used to start or\n"
5771  	"\t    restart a paused hist trigger.\n\n"
5772  	"\t    The 'clear' parameter will clear the contents of a running\n"
5773  	"\t    hist trigger and leave its current paused/active state\n"
5774  	"\t    unchanged.\n\n"
5775  	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5776  	"\t    raw hitcount in the histogram.\n\n"
5777  	"\t    The enable_hist and disable_hist triggers can be used to\n"
5778  	"\t    have one event conditionally start and stop another event's\n"
5779  	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5780  	"\t    the enable_event and disable_event triggers.\n\n"
5781  	"\t    Hist trigger handlers and actions are executed whenever a\n"
5782  	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5783  	"\t        <handler>.<action>\n\n"
5784  	"\t    The available handlers are:\n\n"
5785  	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5786  	"\t        onmax(var)               - invoke if var exceeds current max\n"
5787  	"\t        onchange(var)            - invoke action if var changes\n\n"
5788  	"\t    The available actions are:\n\n"
5789  	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5790  	"\t        save(field,...)                      - save current event fields\n"
5791  #ifdef CONFIG_TRACER_SNAPSHOT
5792  	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5793  #endif
5794  #ifdef CONFIG_SYNTH_EVENTS
5795  	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5796  	"\t  Write into this file to define/undefine new synthetic events.\n"
5797  	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5798  #endif
5799  #endif
5800  ;
5801  
5802  static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5803  tracing_readme_read(struct file *filp, char __user *ubuf,
5804  		       size_t cnt, loff_t *ppos)
5805  {
5806  	return simple_read_from_buffer(ubuf, cnt, ppos,
5807  					readme_msg, strlen(readme_msg));
5808  }
5809  
5810  static const struct file_operations tracing_readme_fops = {
5811  	.open		= tracing_open_generic,
5812  	.read		= tracing_readme_read,
5813  	.llseek		= generic_file_llseek,
5814  };
5815  
saved_tgids_next(struct seq_file * m,void * v,loff_t * pos)5816  static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5817  {
5818  	int pid = ++(*pos);
5819  
5820  	return trace_find_tgid_ptr(pid);
5821  }
5822  
saved_tgids_start(struct seq_file * m,loff_t * pos)5823  static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5824  {
5825  	int pid = *pos;
5826  
5827  	return trace_find_tgid_ptr(pid);
5828  }
5829  
saved_tgids_stop(struct seq_file * m,void * v)5830  static void saved_tgids_stop(struct seq_file *m, void *v)
5831  {
5832  }
5833  
saved_tgids_show(struct seq_file * m,void * v)5834  static int saved_tgids_show(struct seq_file *m, void *v)
5835  {
5836  	int *entry = (int *)v;
5837  	int pid = entry - tgid_map;
5838  	int tgid = *entry;
5839  
5840  	if (tgid == 0)
5841  		return SEQ_SKIP;
5842  
5843  	seq_printf(m, "%d %d\n", pid, tgid);
5844  	return 0;
5845  }
5846  
5847  static const struct seq_operations tracing_saved_tgids_seq_ops = {
5848  	.start		= saved_tgids_start,
5849  	.stop		= saved_tgids_stop,
5850  	.next		= saved_tgids_next,
5851  	.show		= saved_tgids_show,
5852  };
5853  
tracing_saved_tgids_open(struct inode * inode,struct file * filp)5854  static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5855  {
5856  	int ret;
5857  
5858  	ret = tracing_check_open_get_tr(NULL);
5859  	if (ret)
5860  		return ret;
5861  
5862  	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5863  }
5864  
5865  
5866  static const struct file_operations tracing_saved_tgids_fops = {
5867  	.open		= tracing_saved_tgids_open,
5868  	.read		= seq_read,
5869  	.llseek		= seq_lseek,
5870  	.release	= seq_release,
5871  };
5872  
saved_cmdlines_next(struct seq_file * m,void * v,loff_t * pos)5873  static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5874  {
5875  	unsigned int *ptr = v;
5876  
5877  	if (*pos || m->count)
5878  		ptr++;
5879  
5880  	(*pos)++;
5881  
5882  	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5883  	     ptr++) {
5884  		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5885  			continue;
5886  
5887  		return ptr;
5888  	}
5889  
5890  	return NULL;
5891  }
5892  
saved_cmdlines_start(struct seq_file * m,loff_t * pos)5893  static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5894  {
5895  	void *v;
5896  	loff_t l = 0;
5897  
5898  	preempt_disable();
5899  	arch_spin_lock(&trace_cmdline_lock);
5900  
5901  	v = &savedcmd->map_cmdline_to_pid[0];
5902  	while (l <= *pos) {
5903  		v = saved_cmdlines_next(m, v, &l);
5904  		if (!v)
5905  			return NULL;
5906  	}
5907  
5908  	return v;
5909  }
5910  
saved_cmdlines_stop(struct seq_file * m,void * v)5911  static void saved_cmdlines_stop(struct seq_file *m, void *v)
5912  {
5913  	arch_spin_unlock(&trace_cmdline_lock);
5914  	preempt_enable();
5915  }
5916  
saved_cmdlines_show(struct seq_file * m,void * v)5917  static int saved_cmdlines_show(struct seq_file *m, void *v)
5918  {
5919  	char buf[TASK_COMM_LEN];
5920  	unsigned int *pid = v;
5921  
5922  	__trace_find_cmdline(*pid, buf);
5923  	seq_printf(m, "%d %s\n", *pid, buf);
5924  	return 0;
5925  }
5926  
5927  static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5928  	.start		= saved_cmdlines_start,
5929  	.next		= saved_cmdlines_next,
5930  	.stop		= saved_cmdlines_stop,
5931  	.show		= saved_cmdlines_show,
5932  };
5933  
tracing_saved_cmdlines_open(struct inode * inode,struct file * filp)5934  static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5935  {
5936  	int ret;
5937  
5938  	ret = tracing_check_open_get_tr(NULL);
5939  	if (ret)
5940  		return ret;
5941  
5942  	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5943  }
5944  
5945  static const struct file_operations tracing_saved_cmdlines_fops = {
5946  	.open		= tracing_saved_cmdlines_open,
5947  	.read		= seq_read,
5948  	.llseek		= seq_lseek,
5949  	.release	= seq_release,
5950  };
5951  
5952  static ssize_t
tracing_saved_cmdlines_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5953  tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5954  				 size_t cnt, loff_t *ppos)
5955  {
5956  	char buf[64];
5957  	int r;
5958  
5959  	preempt_disable();
5960  	arch_spin_lock(&trace_cmdline_lock);
5961  	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5962  	arch_spin_unlock(&trace_cmdline_lock);
5963  	preempt_enable();
5964  
5965  	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5966  }
5967  
tracing_resize_saved_cmdlines(unsigned int val)5968  static int tracing_resize_saved_cmdlines(unsigned int val)
5969  {
5970  	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5971  
5972  	s = allocate_cmdlines_buffer(val);
5973  	if (!s)
5974  		return -ENOMEM;
5975  
5976  	preempt_disable();
5977  	arch_spin_lock(&trace_cmdline_lock);
5978  	savedcmd_temp = savedcmd;
5979  	savedcmd = s;
5980  	arch_spin_unlock(&trace_cmdline_lock);
5981  	preempt_enable();
5982  	free_saved_cmdlines_buffer(savedcmd_temp);
5983  
5984  	return 0;
5985  }
5986  
5987  static ssize_t
tracing_saved_cmdlines_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5988  tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5989  				  size_t cnt, loff_t *ppos)
5990  {
5991  	unsigned long val;
5992  	int ret;
5993  
5994  	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5995  	if (ret)
5996  		return ret;
5997  
5998  	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5999  	if (!val || val > PID_MAX_DEFAULT)
6000  		return -EINVAL;
6001  
6002  	ret = tracing_resize_saved_cmdlines((unsigned int)val);
6003  	if (ret < 0)
6004  		return ret;
6005  
6006  	*ppos += cnt;
6007  
6008  	return cnt;
6009  }
6010  
6011  static const struct file_operations tracing_saved_cmdlines_size_fops = {
6012  	.open		= tracing_open_generic,
6013  	.read		= tracing_saved_cmdlines_size_read,
6014  	.write		= tracing_saved_cmdlines_size_write,
6015  };
6016  
6017  #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6018  static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)6019  update_eval_map(union trace_eval_map_item *ptr)
6020  {
6021  	if (!ptr->map.eval_string) {
6022  		if (ptr->tail.next) {
6023  			ptr = ptr->tail.next;
6024  			/* Set ptr to the next real item (skip head) */
6025  			ptr++;
6026  		} else
6027  			return NULL;
6028  	}
6029  	return ptr;
6030  }
6031  
eval_map_next(struct seq_file * m,void * v,loff_t * pos)6032  static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6033  {
6034  	union trace_eval_map_item *ptr = v;
6035  
6036  	/*
6037  	 * Paranoid! If ptr points to end, we don't want to increment past it.
6038  	 * This really should never happen.
6039  	 */
6040  	(*pos)++;
6041  	ptr = update_eval_map(ptr);
6042  	if (WARN_ON_ONCE(!ptr))
6043  		return NULL;
6044  
6045  	ptr++;
6046  	ptr = update_eval_map(ptr);
6047  
6048  	return ptr;
6049  }
6050  
eval_map_start(struct seq_file * m,loff_t * pos)6051  static void *eval_map_start(struct seq_file *m, loff_t *pos)
6052  {
6053  	union trace_eval_map_item *v;
6054  	loff_t l = 0;
6055  
6056  	mutex_lock(&trace_eval_mutex);
6057  
6058  	v = trace_eval_maps;
6059  	if (v)
6060  		v++;
6061  
6062  	while (v && l < *pos) {
6063  		v = eval_map_next(m, v, &l);
6064  	}
6065  
6066  	return v;
6067  }
6068  
eval_map_stop(struct seq_file * m,void * v)6069  static void eval_map_stop(struct seq_file *m, void *v)
6070  {
6071  	mutex_unlock(&trace_eval_mutex);
6072  }
6073  
eval_map_show(struct seq_file * m,void * v)6074  static int eval_map_show(struct seq_file *m, void *v)
6075  {
6076  	union trace_eval_map_item *ptr = v;
6077  
6078  	seq_printf(m, "%s %ld (%s)\n",
6079  		   ptr->map.eval_string, ptr->map.eval_value,
6080  		   ptr->map.system);
6081  
6082  	return 0;
6083  }
6084  
6085  static const struct seq_operations tracing_eval_map_seq_ops = {
6086  	.start		= eval_map_start,
6087  	.next		= eval_map_next,
6088  	.stop		= eval_map_stop,
6089  	.show		= eval_map_show,
6090  };
6091  
tracing_eval_map_open(struct inode * inode,struct file * filp)6092  static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6093  {
6094  	int ret;
6095  
6096  	ret = tracing_check_open_get_tr(NULL);
6097  	if (ret)
6098  		return ret;
6099  
6100  	return seq_open(filp, &tracing_eval_map_seq_ops);
6101  }
6102  
6103  static const struct file_operations tracing_eval_map_fops = {
6104  	.open		= tracing_eval_map_open,
6105  	.read		= seq_read,
6106  	.llseek		= seq_lseek,
6107  	.release	= seq_release,
6108  };
6109  
6110  static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)6111  trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6112  {
6113  	/* Return tail of array given the head */
6114  	return ptr + ptr->head.length + 1;
6115  }
6116  
6117  static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)6118  trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6119  			   int len)
6120  {
6121  	struct trace_eval_map **stop;
6122  	struct trace_eval_map **map;
6123  	union trace_eval_map_item *map_array;
6124  	union trace_eval_map_item *ptr;
6125  
6126  	stop = start + len;
6127  
6128  	/*
6129  	 * The trace_eval_maps contains the map plus a head and tail item,
6130  	 * where the head holds the module and length of array, and the
6131  	 * tail holds a pointer to the next list.
6132  	 */
6133  	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6134  	if (!map_array) {
6135  		pr_warn("Unable to allocate trace eval mapping\n");
6136  		return;
6137  	}
6138  
6139  	mutex_lock(&trace_eval_mutex);
6140  
6141  	if (!trace_eval_maps)
6142  		trace_eval_maps = map_array;
6143  	else {
6144  		ptr = trace_eval_maps;
6145  		for (;;) {
6146  			ptr = trace_eval_jmp_to_tail(ptr);
6147  			if (!ptr->tail.next)
6148  				break;
6149  			ptr = ptr->tail.next;
6150  
6151  		}
6152  		ptr->tail.next = map_array;
6153  	}
6154  	map_array->head.mod = mod;
6155  	map_array->head.length = len;
6156  	map_array++;
6157  
6158  	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6159  		map_array->map = **map;
6160  		map_array++;
6161  	}
6162  	memset(map_array, 0, sizeof(*map_array));
6163  
6164  	mutex_unlock(&trace_eval_mutex);
6165  }
6166  
trace_create_eval_file(struct dentry * d_tracer)6167  static void trace_create_eval_file(struct dentry *d_tracer)
6168  {
6169  	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6170  			  NULL, &tracing_eval_map_fops);
6171  }
6172  
6173  #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)6174  static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)6175  static inline void trace_insert_eval_map_file(struct module *mod,
6176  			      struct trace_eval_map **start, int len) { }
6177  #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6178  
trace_insert_eval_map(struct module * mod,struct trace_eval_map ** start,int len)6179  static void trace_insert_eval_map(struct module *mod,
6180  				  struct trace_eval_map **start, int len)
6181  {
6182  	struct trace_eval_map **map;
6183  
6184  	if (len <= 0)
6185  		return;
6186  
6187  	map = start;
6188  
6189  	trace_event_eval_update(map, len);
6190  
6191  	trace_insert_eval_map_file(mod, start, len);
6192  }
6193  
6194  static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6195  tracing_set_trace_read(struct file *filp, char __user *ubuf,
6196  		       size_t cnt, loff_t *ppos)
6197  {
6198  	struct trace_array *tr = filp->private_data;
6199  	char buf[MAX_TRACER_SIZE+2];
6200  	int r;
6201  
6202  	mutex_lock(&trace_types_lock);
6203  	r = sprintf(buf, "%s\n", tr->current_trace->name);
6204  	mutex_unlock(&trace_types_lock);
6205  
6206  	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6207  }
6208  
tracer_init(struct tracer * t,struct trace_array * tr)6209  int tracer_init(struct tracer *t, struct trace_array *tr)
6210  {
6211  	tracing_reset_online_cpus(&tr->array_buffer);
6212  	return t->init(tr);
6213  }
6214  
set_buffer_entries(struct array_buffer * buf,unsigned long val)6215  static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6216  {
6217  	int cpu;
6218  
6219  	for_each_tracing_cpu(cpu)
6220  		per_cpu_ptr(buf->data, cpu)->entries = val;
6221  }
6222  
update_buffer_entries(struct array_buffer * buf,int cpu)6223  static void update_buffer_entries(struct array_buffer *buf, int cpu)
6224  {
6225  	if (cpu == RING_BUFFER_ALL_CPUS) {
6226  		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6227  	} else {
6228  		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6229  	}
6230  }
6231  
6232  #ifdef CONFIG_TRACER_MAX_TRACE
6233  /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)6234  static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6235  					struct array_buffer *size_buf, int cpu_id)
6236  {
6237  	int cpu, ret = 0;
6238  
6239  	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6240  		for_each_tracing_cpu(cpu) {
6241  			ret = ring_buffer_resize(trace_buf->buffer,
6242  				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6243  			if (ret < 0)
6244  				break;
6245  			per_cpu_ptr(trace_buf->data, cpu)->entries =
6246  				per_cpu_ptr(size_buf->data, cpu)->entries;
6247  		}
6248  	} else {
6249  		ret = ring_buffer_resize(trace_buf->buffer,
6250  				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6251  		if (ret == 0)
6252  			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6253  				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6254  	}
6255  
6256  	return ret;
6257  }
6258  #endif /* CONFIG_TRACER_MAX_TRACE */
6259  
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)6260  static int __tracing_resize_ring_buffer(struct trace_array *tr,
6261  					unsigned long size, int cpu)
6262  {
6263  	int ret;
6264  
6265  	/*
6266  	 * If kernel or user changes the size of the ring buffer
6267  	 * we use the size that was given, and we can forget about
6268  	 * expanding it later.
6269  	 */
6270  	ring_buffer_expanded = true;
6271  
6272  	/* May be called before buffers are initialized */
6273  	if (!tr->array_buffer.buffer)
6274  		return 0;
6275  
6276  	/* Do not allow tracing while resizing ring buffer */
6277  	tracing_stop_tr(tr);
6278  
6279  	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6280  	if (ret < 0)
6281  		goto out_start;
6282  
6283  #ifdef CONFIG_TRACER_MAX_TRACE
6284  	if (!tr->allocated_snapshot)
6285  		goto out;
6286  
6287  	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6288  	if (ret < 0) {
6289  		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6290  						     &tr->array_buffer, cpu);
6291  		if (r < 0) {
6292  			/*
6293  			 * AARGH! We are left with different
6294  			 * size max buffer!!!!
6295  			 * The max buffer is our "snapshot" buffer.
6296  			 * When a tracer needs a snapshot (one of the
6297  			 * latency tracers), it swaps the max buffer
6298  			 * with the saved snap shot. We succeeded to
6299  			 * update the size of the main buffer, but failed to
6300  			 * update the size of the max buffer. But when we tried
6301  			 * to reset the main buffer to the original size, we
6302  			 * failed there too. This is very unlikely to
6303  			 * happen, but if it does, warn and kill all
6304  			 * tracing.
6305  			 */
6306  			WARN_ON(1);
6307  			tracing_disabled = 1;
6308  		}
6309  		goto out_start;
6310  	}
6311  
6312  	update_buffer_entries(&tr->max_buffer, cpu);
6313  
6314   out:
6315  #endif /* CONFIG_TRACER_MAX_TRACE */
6316  
6317  	update_buffer_entries(&tr->array_buffer, cpu);
6318   out_start:
6319  	tracing_start_tr(tr);
6320  	return ret;
6321  }
6322  
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)6323  ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6324  				  unsigned long size, int cpu_id)
6325  {
6326  	int ret;
6327  
6328  	mutex_lock(&trace_types_lock);
6329  
6330  	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6331  		/* make sure, this cpu is enabled in the mask */
6332  		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6333  			ret = -EINVAL;
6334  			goto out;
6335  		}
6336  	}
6337  
6338  	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6339  	if (ret < 0)
6340  		ret = -ENOMEM;
6341  
6342  out:
6343  	mutex_unlock(&trace_types_lock);
6344  
6345  	return ret;
6346  }
6347  
6348  
6349  /**
6350   * tracing_update_buffers - used by tracing facility to expand ring buffers
6351   *
6352   * To save on memory when the tracing is never used on a system with it
6353   * configured in. The ring buffers are set to a minimum size. But once
6354   * a user starts to use the tracing facility, then they need to grow
6355   * to their default size.
6356   *
6357   * This function is to be called when a tracer is about to be used.
6358   */
tracing_update_buffers(void)6359  int tracing_update_buffers(void)
6360  {
6361  	int ret = 0;
6362  
6363  	mutex_lock(&trace_types_lock);
6364  	if (!ring_buffer_expanded)
6365  		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6366  						RING_BUFFER_ALL_CPUS);
6367  	mutex_unlock(&trace_types_lock);
6368  
6369  	return ret;
6370  }
6371  
6372  struct trace_option_dentry;
6373  
6374  static void
6375  create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6376  
6377  /*
6378   * Used to clear out the tracer before deletion of an instance.
6379   * Must have trace_types_lock held.
6380   */
tracing_set_nop(struct trace_array * tr)6381  static void tracing_set_nop(struct trace_array *tr)
6382  {
6383  	if (tr->current_trace == &nop_trace)
6384  		return;
6385  
6386  	tr->current_trace->enabled--;
6387  
6388  	if (tr->current_trace->reset)
6389  		tr->current_trace->reset(tr);
6390  
6391  	tr->current_trace = &nop_trace;
6392  }
6393  
6394  static bool tracer_options_updated;
6395  
add_tracer_options(struct trace_array * tr,struct tracer * t)6396  static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6397  {
6398  	/* Only enable if the directory has been created already. */
6399  	if (!tr->dir)
6400  		return;
6401  
6402  	/* Only create trace option files after update_tracer_options finish */
6403  	if (!tracer_options_updated)
6404  		return;
6405  
6406  	create_trace_option_files(tr, t);
6407  }
6408  
tracing_set_tracer(struct trace_array * tr,const char * buf)6409  int tracing_set_tracer(struct trace_array *tr, const char *buf)
6410  {
6411  	struct tracer *t;
6412  #ifdef CONFIG_TRACER_MAX_TRACE
6413  	bool had_max_tr;
6414  #endif
6415  	int ret = 0;
6416  
6417  	mutex_lock(&trace_types_lock);
6418  
6419  	if (!ring_buffer_expanded) {
6420  		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6421  						RING_BUFFER_ALL_CPUS);
6422  		if (ret < 0)
6423  			goto out;
6424  		ret = 0;
6425  	}
6426  
6427  	for (t = trace_types; t; t = t->next) {
6428  		if (strcmp(t->name, buf) == 0)
6429  			break;
6430  	}
6431  	if (!t) {
6432  		ret = -EINVAL;
6433  		goto out;
6434  	}
6435  	if (t == tr->current_trace)
6436  		goto out;
6437  
6438  #ifdef CONFIG_TRACER_SNAPSHOT
6439  	if (t->use_max_tr) {
6440  		local_irq_disable();
6441  		arch_spin_lock(&tr->max_lock);
6442  		if (tr->cond_snapshot)
6443  			ret = -EBUSY;
6444  		arch_spin_unlock(&tr->max_lock);
6445  		local_irq_enable();
6446  		if (ret)
6447  			goto out;
6448  	}
6449  #endif
6450  	/* Some tracers won't work on kernel command line */
6451  	if (system_state < SYSTEM_RUNNING && t->noboot) {
6452  		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6453  			t->name);
6454  		goto out;
6455  	}
6456  
6457  	/* Some tracers are only allowed for the top level buffer */
6458  	if (!trace_ok_for_array(t, tr)) {
6459  		ret = -EINVAL;
6460  		goto out;
6461  	}
6462  
6463  	/* If trace pipe files are being read, we can't change the tracer */
6464  	if (tr->trace_ref) {
6465  		ret = -EBUSY;
6466  		goto out;
6467  	}
6468  
6469  	trace_branch_disable();
6470  
6471  	tr->current_trace->enabled--;
6472  
6473  	if (tr->current_trace->reset)
6474  		tr->current_trace->reset(tr);
6475  
6476  #ifdef CONFIG_TRACER_MAX_TRACE
6477  	had_max_tr = tr->current_trace->use_max_tr;
6478  
6479  	/* Current trace needs to be nop_trace before synchronize_rcu */
6480  	tr->current_trace = &nop_trace;
6481  
6482  	if (had_max_tr && !t->use_max_tr) {
6483  		/*
6484  		 * We need to make sure that the update_max_tr sees that
6485  		 * current_trace changed to nop_trace to keep it from
6486  		 * swapping the buffers after we resize it.
6487  		 * The update_max_tr is called from interrupts disabled
6488  		 * so a synchronized_sched() is sufficient.
6489  		 */
6490  		synchronize_rcu();
6491  		free_snapshot(tr);
6492  	}
6493  
6494  	if (t->use_max_tr && !tr->allocated_snapshot) {
6495  		ret = tracing_alloc_snapshot_instance(tr);
6496  		if (ret < 0)
6497  			goto out;
6498  	}
6499  #else
6500  	tr->current_trace = &nop_trace;
6501  #endif
6502  
6503  	if (t->init) {
6504  		ret = tracer_init(t, tr);
6505  		if (ret)
6506  			goto out;
6507  	}
6508  
6509  	tr->current_trace = t;
6510  	tr->current_trace->enabled++;
6511  	trace_branch_enable(tr);
6512   out:
6513  	mutex_unlock(&trace_types_lock);
6514  
6515  	return ret;
6516  }
6517  
6518  static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6519  tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6520  			size_t cnt, loff_t *ppos)
6521  {
6522  	struct trace_array *tr = filp->private_data;
6523  	char buf[MAX_TRACER_SIZE+1];
6524  	char *name;
6525  	size_t ret;
6526  	int err;
6527  
6528  	ret = cnt;
6529  
6530  	if (cnt > MAX_TRACER_SIZE)
6531  		cnt = MAX_TRACER_SIZE;
6532  
6533  	if (copy_from_user(buf, ubuf, cnt))
6534  		return -EFAULT;
6535  
6536  	buf[cnt] = 0;
6537  
6538  	name = strim(buf);
6539  
6540  	err = tracing_set_tracer(tr, name);
6541  	if (err)
6542  		return err;
6543  
6544  	*ppos += ret;
6545  
6546  	return ret;
6547  }
6548  
6549  static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6550  tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6551  		   size_t cnt, loff_t *ppos)
6552  {
6553  	char buf[64];
6554  	int r;
6555  
6556  	r = snprintf(buf, sizeof(buf), "%ld\n",
6557  		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6558  	if (r > sizeof(buf))
6559  		r = sizeof(buf);
6560  	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6561  }
6562  
6563  static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6564  tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6565  		    size_t cnt, loff_t *ppos)
6566  {
6567  	unsigned long val;
6568  	int ret;
6569  
6570  	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6571  	if (ret)
6572  		return ret;
6573  
6574  	*ptr = val * 1000;
6575  
6576  	return cnt;
6577  }
6578  
6579  static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6580  tracing_thresh_read(struct file *filp, char __user *ubuf,
6581  		    size_t cnt, loff_t *ppos)
6582  {
6583  	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6584  }
6585  
6586  static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6587  tracing_thresh_write(struct file *filp, const char __user *ubuf,
6588  		     size_t cnt, loff_t *ppos)
6589  {
6590  	struct trace_array *tr = filp->private_data;
6591  	int ret;
6592  
6593  	mutex_lock(&trace_types_lock);
6594  	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6595  	if (ret < 0)
6596  		goto out;
6597  
6598  	if (tr->current_trace->update_thresh) {
6599  		ret = tr->current_trace->update_thresh(tr);
6600  		if (ret < 0)
6601  			goto out;
6602  	}
6603  
6604  	ret = cnt;
6605  out:
6606  	mutex_unlock(&trace_types_lock);
6607  
6608  	return ret;
6609  }
6610  
6611  #ifdef CONFIG_TRACER_MAX_TRACE
6612  
6613  static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6614  tracing_max_lat_read(struct file *filp, char __user *ubuf,
6615  		     size_t cnt, loff_t *ppos)
6616  {
6617  	struct trace_array *tr = filp->private_data;
6618  
6619  	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6620  }
6621  
6622  static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6623  tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6624  		      size_t cnt, loff_t *ppos)
6625  {
6626  	struct trace_array *tr = filp->private_data;
6627  
6628  	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6629  }
6630  
6631  #endif
6632  
open_pipe_on_cpu(struct trace_array * tr,int cpu)6633  static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6634  {
6635  	if (cpu == RING_BUFFER_ALL_CPUS) {
6636  		if (cpumask_empty(tr->pipe_cpumask)) {
6637  			cpumask_setall(tr->pipe_cpumask);
6638  			return 0;
6639  		}
6640  	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6641  		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6642  		return 0;
6643  	}
6644  	return -EBUSY;
6645  }
6646  
close_pipe_on_cpu(struct trace_array * tr,int cpu)6647  static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6648  {
6649  	if (cpu == RING_BUFFER_ALL_CPUS) {
6650  		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6651  		cpumask_clear(tr->pipe_cpumask);
6652  	} else {
6653  		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6654  		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6655  	}
6656  }
6657  
tracing_open_pipe(struct inode * inode,struct file * filp)6658  static int tracing_open_pipe(struct inode *inode, struct file *filp)
6659  {
6660  	struct trace_array *tr = inode->i_private;
6661  	struct trace_iterator *iter;
6662  	int cpu;
6663  	int ret;
6664  
6665  	ret = tracing_check_open_get_tr(tr);
6666  	if (ret)
6667  		return ret;
6668  
6669  	mutex_lock(&trace_types_lock);
6670  	cpu = tracing_get_cpu(inode);
6671  	ret = open_pipe_on_cpu(tr, cpu);
6672  	if (ret)
6673  		goto fail_pipe_on_cpu;
6674  
6675  	/* create a buffer to store the information to pass to userspace */
6676  	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6677  	if (!iter) {
6678  		ret = -ENOMEM;
6679  		goto fail_alloc_iter;
6680  	}
6681  
6682  	trace_seq_init(&iter->seq);
6683  	iter->trace = tr->current_trace;
6684  
6685  	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6686  		ret = -ENOMEM;
6687  		goto fail;
6688  	}
6689  
6690  	/* trace pipe does not show start of buffer */
6691  	cpumask_setall(iter->started);
6692  
6693  	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6694  		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6695  
6696  	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6697  	if (trace_clocks[tr->clock_id].in_ns)
6698  		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6699  
6700  	iter->tr = tr;
6701  	iter->array_buffer = &tr->array_buffer;
6702  	iter->cpu_file = cpu;
6703  	mutex_init(&iter->mutex);
6704  	filp->private_data = iter;
6705  
6706  	if (iter->trace->pipe_open)
6707  		iter->trace->pipe_open(iter);
6708  
6709  	nonseekable_open(inode, filp);
6710  
6711  	tr->trace_ref++;
6712  
6713  	mutex_unlock(&trace_types_lock);
6714  	return ret;
6715  
6716  fail:
6717  	kfree(iter);
6718  fail_alloc_iter:
6719  	close_pipe_on_cpu(tr, cpu);
6720  fail_pipe_on_cpu:
6721  	__trace_array_put(tr);
6722  	mutex_unlock(&trace_types_lock);
6723  	return ret;
6724  }
6725  
tracing_release_pipe(struct inode * inode,struct file * file)6726  static int tracing_release_pipe(struct inode *inode, struct file *file)
6727  {
6728  	struct trace_iterator *iter = file->private_data;
6729  	struct trace_array *tr = inode->i_private;
6730  
6731  	mutex_lock(&trace_types_lock);
6732  
6733  	tr->trace_ref--;
6734  
6735  	if (iter->trace->pipe_close)
6736  		iter->trace->pipe_close(iter);
6737  	close_pipe_on_cpu(tr, iter->cpu_file);
6738  	mutex_unlock(&trace_types_lock);
6739  
6740  	free_trace_iter_content(iter);
6741  	kfree(iter);
6742  
6743  	trace_array_put(tr);
6744  
6745  	return 0;
6746  }
6747  
6748  static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6749  trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6750  {
6751  	struct trace_array *tr = iter->tr;
6752  
6753  	/* Iterators are static, they should be filled or empty */
6754  	if (trace_buffer_iter(iter, iter->cpu_file))
6755  		return EPOLLIN | EPOLLRDNORM;
6756  
6757  	if (tr->trace_flags & TRACE_ITER_BLOCK)
6758  		/*
6759  		 * Always select as readable when in blocking mode
6760  		 */
6761  		return EPOLLIN | EPOLLRDNORM;
6762  	else
6763  		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6764  					     filp, poll_table, iter->tr->buffer_percent);
6765  }
6766  
6767  static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6768  tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6769  {
6770  	struct trace_iterator *iter = filp->private_data;
6771  
6772  	return trace_poll(iter, filp, poll_table);
6773  }
6774  
6775  /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6776  static int tracing_wait_pipe(struct file *filp)
6777  {
6778  	struct trace_iterator *iter = filp->private_data;
6779  	int ret;
6780  
6781  	while (trace_empty(iter)) {
6782  
6783  		if ((filp->f_flags & O_NONBLOCK)) {
6784  			return -EAGAIN;
6785  		}
6786  
6787  		/*
6788  		 * We block until we read something and tracing is disabled.
6789  		 * We still block if tracing is disabled, but we have never
6790  		 * read anything. This allows a user to cat this file, and
6791  		 * then enable tracing. But after we have read something,
6792  		 * we give an EOF when tracing is again disabled.
6793  		 *
6794  		 * iter->pos will be 0 if we haven't read anything.
6795  		 */
6796  		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6797  			break;
6798  
6799  		mutex_unlock(&iter->mutex);
6800  
6801  		ret = wait_on_pipe(iter, 0);
6802  
6803  		mutex_lock(&iter->mutex);
6804  
6805  		if (ret)
6806  			return ret;
6807  	}
6808  
6809  	return 1;
6810  }
6811  
6812  /*
6813   * Consumer reader.
6814   */
6815  static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6816  tracing_read_pipe(struct file *filp, char __user *ubuf,
6817  		  size_t cnt, loff_t *ppos)
6818  {
6819  	struct trace_iterator *iter = filp->private_data;
6820  	ssize_t sret;
6821  
6822  	/*
6823  	 * Avoid more than one consumer on a single file descriptor
6824  	 * This is just a matter of traces coherency, the ring buffer itself
6825  	 * is protected.
6826  	 */
6827  	mutex_lock(&iter->mutex);
6828  
6829  	/* return any leftover data */
6830  	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6831  	if (sret != -EBUSY)
6832  		goto out;
6833  
6834  	trace_seq_init(&iter->seq);
6835  
6836  	if (iter->trace->read) {
6837  		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6838  		if (sret)
6839  			goto out;
6840  	}
6841  
6842  waitagain:
6843  	sret = tracing_wait_pipe(filp);
6844  	if (sret <= 0)
6845  		goto out;
6846  
6847  	/* stop when tracing is finished */
6848  	if (trace_empty(iter)) {
6849  		sret = 0;
6850  		goto out;
6851  	}
6852  
6853  	if (cnt >= PAGE_SIZE)
6854  		cnt = PAGE_SIZE - 1;
6855  
6856  	/* reset all but tr, trace, and overruns */
6857  	trace_iterator_reset(iter);
6858  	cpumask_clear(iter->started);
6859  	trace_seq_init(&iter->seq);
6860  
6861  	trace_event_read_lock();
6862  	trace_access_lock(iter->cpu_file);
6863  	while (trace_find_next_entry_inc(iter) != NULL) {
6864  		enum print_line_t ret;
6865  		int save_len = iter->seq.seq.len;
6866  
6867  		ret = print_trace_line(iter);
6868  		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6869  			/*
6870  			 * If one print_trace_line() fills entire trace_seq in one shot,
6871  			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6872  			 * In this case, we need to consume it, otherwise, loop will peek
6873  			 * this event next time, resulting in an infinite loop.
6874  			 */
6875  			if (save_len == 0) {
6876  				iter->seq.full = 0;
6877  				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6878  				trace_consume(iter);
6879  				break;
6880  			}
6881  
6882  			/* In other cases, don't print partial lines */
6883  			iter->seq.seq.len = save_len;
6884  			break;
6885  		}
6886  		if (ret != TRACE_TYPE_NO_CONSUME)
6887  			trace_consume(iter);
6888  
6889  		if (trace_seq_used(&iter->seq) >= cnt)
6890  			break;
6891  
6892  		/*
6893  		 * Setting the full flag means we reached the trace_seq buffer
6894  		 * size and we should leave by partial output condition above.
6895  		 * One of the trace_seq_* functions is not used properly.
6896  		 */
6897  		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6898  			  iter->ent->type);
6899  	}
6900  	trace_access_unlock(iter->cpu_file);
6901  	trace_event_read_unlock();
6902  
6903  	/* Now copy what we have to the user */
6904  	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6905  	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6906  		trace_seq_init(&iter->seq);
6907  
6908  	/*
6909  	 * If there was nothing to send to user, in spite of consuming trace
6910  	 * entries, go back to wait for more entries.
6911  	 */
6912  	if (sret == -EBUSY)
6913  		goto waitagain;
6914  
6915  out:
6916  	mutex_unlock(&iter->mutex);
6917  
6918  	return sret;
6919  }
6920  
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6921  static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6922  				     unsigned int idx)
6923  {
6924  	__free_page(spd->pages[idx]);
6925  }
6926  
6927  static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6928  tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6929  {
6930  	size_t count;
6931  	int save_len;
6932  	int ret;
6933  
6934  	/* Seq buffer is page-sized, exactly what we need. */
6935  	for (;;) {
6936  		save_len = iter->seq.seq.len;
6937  		ret = print_trace_line(iter);
6938  
6939  		if (trace_seq_has_overflowed(&iter->seq)) {
6940  			iter->seq.seq.len = save_len;
6941  			break;
6942  		}
6943  
6944  		/*
6945  		 * This should not be hit, because it should only
6946  		 * be set if the iter->seq overflowed. But check it
6947  		 * anyway to be safe.
6948  		 */
6949  		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6950  			iter->seq.seq.len = save_len;
6951  			break;
6952  		}
6953  
6954  		count = trace_seq_used(&iter->seq) - save_len;
6955  		if (rem < count) {
6956  			rem = 0;
6957  			iter->seq.seq.len = save_len;
6958  			break;
6959  		}
6960  
6961  		if (ret != TRACE_TYPE_NO_CONSUME)
6962  			trace_consume(iter);
6963  		rem -= count;
6964  		if (!trace_find_next_entry_inc(iter))	{
6965  			rem = 0;
6966  			iter->ent = NULL;
6967  			break;
6968  		}
6969  	}
6970  
6971  	return rem;
6972  }
6973  
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6974  static ssize_t tracing_splice_read_pipe(struct file *filp,
6975  					loff_t *ppos,
6976  					struct pipe_inode_info *pipe,
6977  					size_t len,
6978  					unsigned int flags)
6979  {
6980  	struct page *pages_def[PIPE_DEF_BUFFERS];
6981  	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6982  	struct trace_iterator *iter = filp->private_data;
6983  	struct splice_pipe_desc spd = {
6984  		.pages		= pages_def,
6985  		.partial	= partial_def,
6986  		.nr_pages	= 0, /* This gets updated below. */
6987  		.nr_pages_max	= PIPE_DEF_BUFFERS,
6988  		.ops		= &default_pipe_buf_ops,
6989  		.spd_release	= tracing_spd_release_pipe,
6990  	};
6991  	ssize_t ret;
6992  	size_t rem;
6993  	unsigned int i;
6994  
6995  	if (splice_grow_spd(pipe, &spd))
6996  		return -ENOMEM;
6997  
6998  	mutex_lock(&iter->mutex);
6999  
7000  	if (iter->trace->splice_read) {
7001  		ret = iter->trace->splice_read(iter, filp,
7002  					       ppos, pipe, len, flags);
7003  		if (ret)
7004  			goto out_err;
7005  	}
7006  
7007  	ret = tracing_wait_pipe(filp);
7008  	if (ret <= 0)
7009  		goto out_err;
7010  
7011  	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7012  		ret = -EFAULT;
7013  		goto out_err;
7014  	}
7015  
7016  	trace_event_read_lock();
7017  	trace_access_lock(iter->cpu_file);
7018  
7019  	/* Fill as many pages as possible. */
7020  	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7021  		spd.pages[i] = alloc_page(GFP_KERNEL);
7022  		if (!spd.pages[i])
7023  			break;
7024  
7025  		rem = tracing_fill_pipe_page(rem, iter);
7026  
7027  		/* Copy the data into the page, so we can start over. */
7028  		ret = trace_seq_to_buffer(&iter->seq,
7029  					  page_address(spd.pages[i]),
7030  					  trace_seq_used(&iter->seq));
7031  		if (ret < 0) {
7032  			__free_page(spd.pages[i]);
7033  			break;
7034  		}
7035  		spd.partial[i].offset = 0;
7036  		spd.partial[i].len = trace_seq_used(&iter->seq);
7037  
7038  		trace_seq_init(&iter->seq);
7039  	}
7040  
7041  	trace_access_unlock(iter->cpu_file);
7042  	trace_event_read_unlock();
7043  	mutex_unlock(&iter->mutex);
7044  
7045  	spd.nr_pages = i;
7046  
7047  	if (i)
7048  		ret = splice_to_pipe(pipe, &spd);
7049  	else
7050  		ret = 0;
7051  out:
7052  	splice_shrink_spd(&spd);
7053  	return ret;
7054  
7055  out_err:
7056  	mutex_unlock(&iter->mutex);
7057  	goto out;
7058  }
7059  
7060  static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7061  tracing_entries_read(struct file *filp, char __user *ubuf,
7062  		     size_t cnt, loff_t *ppos)
7063  {
7064  	struct inode *inode = file_inode(filp);
7065  	struct trace_array *tr = inode->i_private;
7066  	int cpu = tracing_get_cpu(inode);
7067  	char buf[64];
7068  	int r = 0;
7069  	ssize_t ret;
7070  
7071  	mutex_lock(&trace_types_lock);
7072  
7073  	if (cpu == RING_BUFFER_ALL_CPUS) {
7074  		int cpu, buf_size_same;
7075  		unsigned long size;
7076  
7077  		size = 0;
7078  		buf_size_same = 1;
7079  		/* check if all cpu sizes are same */
7080  		for_each_tracing_cpu(cpu) {
7081  			/* fill in the size from first enabled cpu */
7082  			if (size == 0)
7083  				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7084  			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7085  				buf_size_same = 0;
7086  				break;
7087  			}
7088  		}
7089  
7090  		if (buf_size_same) {
7091  			if (!ring_buffer_expanded)
7092  				r = sprintf(buf, "%lu (expanded: %lu)\n",
7093  					    size >> 10,
7094  					    trace_buf_size >> 10);
7095  			else
7096  				r = sprintf(buf, "%lu\n", size >> 10);
7097  		} else
7098  			r = sprintf(buf, "X\n");
7099  	} else
7100  		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7101  
7102  	mutex_unlock(&trace_types_lock);
7103  
7104  	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7105  	return ret;
7106  }
7107  
7108  static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7109  tracing_entries_write(struct file *filp, const char __user *ubuf,
7110  		      size_t cnt, loff_t *ppos)
7111  {
7112  	struct inode *inode = file_inode(filp);
7113  	struct trace_array *tr = inode->i_private;
7114  	unsigned long val;
7115  	int ret;
7116  
7117  	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7118  	if (ret)
7119  		return ret;
7120  
7121  	/* must have at least 1 entry */
7122  	if (!val)
7123  		return -EINVAL;
7124  
7125  	/* value is in KB */
7126  	val <<= 10;
7127  	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7128  	if (ret < 0)
7129  		return ret;
7130  
7131  	*ppos += cnt;
7132  
7133  	return cnt;
7134  }
7135  
7136  static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7137  tracing_total_entries_read(struct file *filp, char __user *ubuf,
7138  				size_t cnt, loff_t *ppos)
7139  {
7140  	struct trace_array *tr = filp->private_data;
7141  	char buf[64];
7142  	int r, cpu;
7143  	unsigned long size = 0, expanded_size = 0;
7144  
7145  	mutex_lock(&trace_types_lock);
7146  	for_each_tracing_cpu(cpu) {
7147  		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7148  		if (!ring_buffer_expanded)
7149  			expanded_size += trace_buf_size >> 10;
7150  	}
7151  	if (ring_buffer_expanded)
7152  		r = sprintf(buf, "%lu\n", size);
7153  	else
7154  		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7155  	mutex_unlock(&trace_types_lock);
7156  
7157  	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7158  }
7159  
7160  static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7161  tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7162  			  size_t cnt, loff_t *ppos)
7163  {
7164  	/*
7165  	 * There is no need to read what the user has written, this function
7166  	 * is just to make sure that there is no error when "echo" is used
7167  	 */
7168  
7169  	*ppos += cnt;
7170  
7171  	return cnt;
7172  }
7173  
7174  static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)7175  tracing_free_buffer_release(struct inode *inode, struct file *filp)
7176  {
7177  	struct trace_array *tr = inode->i_private;
7178  
7179  	/* disable tracing ? */
7180  	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7181  		tracer_tracing_off(tr);
7182  	/* resize the ring buffer to 0 */
7183  	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7184  
7185  	trace_array_put(tr);
7186  
7187  	return 0;
7188  }
7189  
7190  static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7191  tracing_mark_write(struct file *filp, const char __user *ubuf,
7192  					size_t cnt, loff_t *fpos)
7193  {
7194  	struct trace_array *tr = filp->private_data;
7195  	struct ring_buffer_event *event;
7196  	enum event_trigger_type tt = ETT_NONE;
7197  	struct trace_buffer *buffer;
7198  	struct print_entry *entry;
7199  	ssize_t written;
7200  	int size;
7201  	int len;
7202  
7203  /* Used in tracing_mark_raw_write() as well */
7204  #define FAULTED_STR "<faulted>"
7205  #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7206  
7207  	if (tracing_disabled)
7208  		return -EINVAL;
7209  
7210  	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7211  		return -EINVAL;
7212  
7213  	if (cnt > TRACE_BUF_SIZE)
7214  		cnt = TRACE_BUF_SIZE;
7215  
7216  	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7217  
7218  	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7219  
7220  	/* If less than "<faulted>", then make sure we can still add that */
7221  	if (cnt < FAULTED_SIZE)
7222  		size += FAULTED_SIZE - cnt;
7223  
7224  	buffer = tr->array_buffer.buffer;
7225  	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7226  					    tracing_gen_ctx());
7227  	if (unlikely(!event))
7228  		/* Ring buffer disabled, return as if not open for write */
7229  		return -EBADF;
7230  
7231  	entry = ring_buffer_event_data(event);
7232  	entry->ip = _THIS_IP_;
7233  
7234  	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7235  	if (len) {
7236  		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7237  		cnt = FAULTED_SIZE;
7238  		written = -EFAULT;
7239  	} else
7240  		written = cnt;
7241  
7242  	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7243  		/* do not add \n before testing triggers, but add \0 */
7244  		entry->buf[cnt] = '\0';
7245  		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7246  	}
7247  
7248  	if (entry->buf[cnt - 1] != '\n') {
7249  		entry->buf[cnt] = '\n';
7250  		entry->buf[cnt + 1] = '\0';
7251  	} else
7252  		entry->buf[cnt] = '\0';
7253  
7254  	if (static_branch_unlikely(&trace_marker_exports_enabled))
7255  		ftrace_exports(event, TRACE_EXPORT_MARKER);
7256  	__buffer_unlock_commit(buffer, event);
7257  
7258  	if (tt)
7259  		event_triggers_post_call(tr->trace_marker_file, tt);
7260  
7261  	return written;
7262  }
7263  
7264  /* Limit it for now to 3K (including tag) */
7265  #define RAW_DATA_MAX_SIZE (1024*3)
7266  
7267  static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7268  tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7269  					size_t cnt, loff_t *fpos)
7270  {
7271  	struct trace_array *tr = filp->private_data;
7272  	struct ring_buffer_event *event;
7273  	struct trace_buffer *buffer;
7274  	struct raw_data_entry *entry;
7275  	ssize_t written;
7276  	int size;
7277  	int len;
7278  
7279  #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7280  
7281  	if (tracing_disabled)
7282  		return -EINVAL;
7283  
7284  	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7285  		return -EINVAL;
7286  
7287  	/* The marker must at least have a tag id */
7288  	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7289  		return -EINVAL;
7290  
7291  	if (cnt > TRACE_BUF_SIZE)
7292  		cnt = TRACE_BUF_SIZE;
7293  
7294  	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7295  
7296  	size = sizeof(*entry) + cnt;
7297  	if (cnt < FAULT_SIZE_ID)
7298  		size += FAULT_SIZE_ID - cnt;
7299  
7300  	buffer = tr->array_buffer.buffer;
7301  	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7302  					    tracing_gen_ctx());
7303  	if (!event)
7304  		/* Ring buffer disabled, return as if not open for write */
7305  		return -EBADF;
7306  
7307  	entry = ring_buffer_event_data(event);
7308  
7309  	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7310  	if (len) {
7311  		entry->id = -1;
7312  		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7313  		written = -EFAULT;
7314  	} else
7315  		written = cnt;
7316  
7317  	__buffer_unlock_commit(buffer, event);
7318  
7319  	return written;
7320  }
7321  
tracing_clock_show(struct seq_file * m,void * v)7322  static int tracing_clock_show(struct seq_file *m, void *v)
7323  {
7324  	struct trace_array *tr = m->private;
7325  	int i;
7326  
7327  	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7328  		seq_printf(m,
7329  			"%s%s%s%s", i ? " " : "",
7330  			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7331  			i == tr->clock_id ? "]" : "");
7332  	seq_putc(m, '\n');
7333  
7334  	return 0;
7335  }
7336  
tracing_set_clock(struct trace_array * tr,const char * clockstr)7337  int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7338  {
7339  	int i;
7340  
7341  	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7342  		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7343  			break;
7344  	}
7345  	if (i == ARRAY_SIZE(trace_clocks))
7346  		return -EINVAL;
7347  
7348  	mutex_lock(&trace_types_lock);
7349  
7350  	tr->clock_id = i;
7351  
7352  	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7353  
7354  	/*
7355  	 * New clock may not be consistent with the previous clock.
7356  	 * Reset the buffer so that it doesn't have incomparable timestamps.
7357  	 */
7358  	tracing_reset_online_cpus(&tr->array_buffer);
7359  
7360  #ifdef CONFIG_TRACER_MAX_TRACE
7361  	if (tr->max_buffer.buffer)
7362  		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7363  	tracing_reset_online_cpus(&tr->max_buffer);
7364  #endif
7365  
7366  	mutex_unlock(&trace_types_lock);
7367  
7368  	return 0;
7369  }
7370  
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7371  static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7372  				   size_t cnt, loff_t *fpos)
7373  {
7374  	struct seq_file *m = filp->private_data;
7375  	struct trace_array *tr = m->private;
7376  	char buf[64];
7377  	const char *clockstr;
7378  	int ret;
7379  
7380  	if (cnt >= sizeof(buf))
7381  		return -EINVAL;
7382  
7383  	if (copy_from_user(buf, ubuf, cnt))
7384  		return -EFAULT;
7385  
7386  	buf[cnt] = 0;
7387  
7388  	clockstr = strstrip(buf);
7389  
7390  	ret = tracing_set_clock(tr, clockstr);
7391  	if (ret)
7392  		return ret;
7393  
7394  	*fpos += cnt;
7395  
7396  	return cnt;
7397  }
7398  
tracing_clock_open(struct inode * inode,struct file * file)7399  static int tracing_clock_open(struct inode *inode, struct file *file)
7400  {
7401  	struct trace_array *tr = inode->i_private;
7402  	int ret;
7403  
7404  	ret = tracing_check_open_get_tr(tr);
7405  	if (ret)
7406  		return ret;
7407  
7408  	ret = single_open(file, tracing_clock_show, inode->i_private);
7409  	if (ret < 0)
7410  		trace_array_put(tr);
7411  
7412  	return ret;
7413  }
7414  
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7415  static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7416  {
7417  	struct trace_array *tr = m->private;
7418  
7419  	mutex_lock(&trace_types_lock);
7420  
7421  	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7422  		seq_puts(m, "delta [absolute]\n");
7423  	else
7424  		seq_puts(m, "[delta] absolute\n");
7425  
7426  	mutex_unlock(&trace_types_lock);
7427  
7428  	return 0;
7429  }
7430  
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7431  static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7432  {
7433  	struct trace_array *tr = inode->i_private;
7434  	int ret;
7435  
7436  	ret = tracing_check_open_get_tr(tr);
7437  	if (ret)
7438  		return ret;
7439  
7440  	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7441  	if (ret < 0)
7442  		trace_array_put(tr);
7443  
7444  	return ret;
7445  }
7446  
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7447  u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7448  {
7449  	if (rbe == this_cpu_read(trace_buffered_event))
7450  		return ring_buffer_time_stamp(buffer);
7451  
7452  	return ring_buffer_event_time_stamp(buffer, rbe);
7453  }
7454  
7455  /*
7456   * Set or disable using the per CPU trace_buffer_event when possible.
7457   */
tracing_set_filter_buffering(struct trace_array * tr,bool set)7458  int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7459  {
7460  	int ret = 0;
7461  
7462  	mutex_lock(&trace_types_lock);
7463  
7464  	if (set && tr->no_filter_buffering_ref++)
7465  		goto out;
7466  
7467  	if (!set) {
7468  		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7469  			ret = -EINVAL;
7470  			goto out;
7471  		}
7472  
7473  		--tr->no_filter_buffering_ref;
7474  	}
7475   out:
7476  	mutex_unlock(&trace_types_lock);
7477  
7478  	return ret;
7479  }
7480  
7481  struct ftrace_buffer_info {
7482  	struct trace_iterator	iter;
7483  	void			*spare;
7484  	unsigned int		spare_cpu;
7485  	unsigned int		read;
7486  };
7487  
7488  #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7489  static int tracing_snapshot_open(struct inode *inode, struct file *file)
7490  {
7491  	struct trace_array *tr = inode->i_private;
7492  	struct trace_iterator *iter;
7493  	struct seq_file *m;
7494  	int ret;
7495  
7496  	ret = tracing_check_open_get_tr(tr);
7497  	if (ret)
7498  		return ret;
7499  
7500  	if (file->f_mode & FMODE_READ) {
7501  		iter = __tracing_open(inode, file, true);
7502  		if (IS_ERR(iter))
7503  			ret = PTR_ERR(iter);
7504  	} else {
7505  		/* Writes still need the seq_file to hold the private data */
7506  		ret = -ENOMEM;
7507  		m = kzalloc(sizeof(*m), GFP_KERNEL);
7508  		if (!m)
7509  			goto out;
7510  		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7511  		if (!iter) {
7512  			kfree(m);
7513  			goto out;
7514  		}
7515  		ret = 0;
7516  
7517  		iter->tr = tr;
7518  		iter->array_buffer = &tr->max_buffer;
7519  		iter->cpu_file = tracing_get_cpu(inode);
7520  		m->private = iter;
7521  		file->private_data = m;
7522  	}
7523  out:
7524  	if (ret < 0)
7525  		trace_array_put(tr);
7526  
7527  	return ret;
7528  }
7529  
tracing_swap_cpu_buffer(void * tr)7530  static void tracing_swap_cpu_buffer(void *tr)
7531  {
7532  	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7533  }
7534  
7535  static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7536  tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7537  		       loff_t *ppos)
7538  {
7539  	struct seq_file *m = filp->private_data;
7540  	struct trace_iterator *iter = m->private;
7541  	struct trace_array *tr = iter->tr;
7542  	unsigned long val;
7543  	int ret;
7544  
7545  	ret = tracing_update_buffers();
7546  	if (ret < 0)
7547  		return ret;
7548  
7549  	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7550  	if (ret)
7551  		return ret;
7552  
7553  	mutex_lock(&trace_types_lock);
7554  
7555  	if (tr->current_trace->use_max_tr) {
7556  		ret = -EBUSY;
7557  		goto out;
7558  	}
7559  
7560  	local_irq_disable();
7561  	arch_spin_lock(&tr->max_lock);
7562  	if (tr->cond_snapshot)
7563  		ret = -EBUSY;
7564  	arch_spin_unlock(&tr->max_lock);
7565  	local_irq_enable();
7566  	if (ret)
7567  		goto out;
7568  
7569  	switch (val) {
7570  	case 0:
7571  		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7572  			ret = -EINVAL;
7573  			break;
7574  		}
7575  		if (tr->allocated_snapshot)
7576  			free_snapshot(tr);
7577  		break;
7578  	case 1:
7579  /* Only allow per-cpu swap if the ring buffer supports it */
7580  #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7581  		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7582  			ret = -EINVAL;
7583  			break;
7584  		}
7585  #endif
7586  		if (tr->allocated_snapshot)
7587  			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7588  					&tr->array_buffer, iter->cpu_file);
7589  		else
7590  			ret = tracing_alloc_snapshot_instance(tr);
7591  		if (ret < 0)
7592  			break;
7593  		/* Now, we're going to swap */
7594  		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7595  			local_irq_disable();
7596  			update_max_tr(tr, current, smp_processor_id(), NULL);
7597  			local_irq_enable();
7598  		} else {
7599  			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7600  						 (void *)tr, 1);
7601  		}
7602  		break;
7603  	default:
7604  		if (tr->allocated_snapshot) {
7605  			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7606  				tracing_reset_online_cpus(&tr->max_buffer);
7607  			else
7608  				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7609  		}
7610  		break;
7611  	}
7612  
7613  	if (ret >= 0) {
7614  		*ppos += cnt;
7615  		ret = cnt;
7616  	}
7617  out:
7618  	mutex_unlock(&trace_types_lock);
7619  	return ret;
7620  }
7621  
tracing_snapshot_release(struct inode * inode,struct file * file)7622  static int tracing_snapshot_release(struct inode *inode, struct file *file)
7623  {
7624  	struct seq_file *m = file->private_data;
7625  	int ret;
7626  
7627  	ret = tracing_release(inode, file);
7628  
7629  	if (file->f_mode & FMODE_READ)
7630  		return ret;
7631  
7632  	/* If write only, the seq_file is just a stub */
7633  	if (m)
7634  		kfree(m->private);
7635  	kfree(m);
7636  
7637  	return 0;
7638  }
7639  
7640  static int tracing_buffers_open(struct inode *inode, struct file *filp);
7641  static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7642  				    size_t count, loff_t *ppos);
7643  static int tracing_buffers_release(struct inode *inode, struct file *file);
7644  static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7645  		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7646  
snapshot_raw_open(struct inode * inode,struct file * filp)7647  static int snapshot_raw_open(struct inode *inode, struct file *filp)
7648  {
7649  	struct ftrace_buffer_info *info;
7650  	int ret;
7651  
7652  	/* The following checks for tracefs lockdown */
7653  	ret = tracing_buffers_open(inode, filp);
7654  	if (ret < 0)
7655  		return ret;
7656  
7657  	info = filp->private_data;
7658  
7659  	if (info->iter.trace->use_max_tr) {
7660  		tracing_buffers_release(inode, filp);
7661  		return -EBUSY;
7662  	}
7663  
7664  	info->iter.snapshot = true;
7665  	info->iter.array_buffer = &info->iter.tr->max_buffer;
7666  
7667  	return ret;
7668  }
7669  
7670  #endif /* CONFIG_TRACER_SNAPSHOT */
7671  
7672  
7673  static const struct file_operations tracing_thresh_fops = {
7674  	.open		= tracing_open_generic,
7675  	.read		= tracing_thresh_read,
7676  	.write		= tracing_thresh_write,
7677  	.llseek		= generic_file_llseek,
7678  };
7679  
7680  #ifdef CONFIG_TRACER_MAX_TRACE
7681  static const struct file_operations tracing_max_lat_fops = {
7682  	.open		= tracing_open_generic_tr,
7683  	.read		= tracing_max_lat_read,
7684  	.write		= tracing_max_lat_write,
7685  	.llseek		= generic_file_llseek,
7686  	.release	= tracing_release_generic_tr,
7687  };
7688  #endif
7689  
7690  static const struct file_operations set_tracer_fops = {
7691  	.open		= tracing_open_generic_tr,
7692  	.read		= tracing_set_trace_read,
7693  	.write		= tracing_set_trace_write,
7694  	.llseek		= generic_file_llseek,
7695  	.release	= tracing_release_generic_tr,
7696  };
7697  
7698  static const struct file_operations tracing_pipe_fops = {
7699  	.open		= tracing_open_pipe,
7700  	.poll		= tracing_poll_pipe,
7701  	.read		= tracing_read_pipe,
7702  	.splice_read	= tracing_splice_read_pipe,
7703  	.release	= tracing_release_pipe,
7704  	.llseek		= no_llseek,
7705  };
7706  
7707  static const struct file_operations tracing_entries_fops = {
7708  	.open		= tracing_open_generic_tr,
7709  	.read		= tracing_entries_read,
7710  	.write		= tracing_entries_write,
7711  	.llseek		= generic_file_llseek,
7712  	.release	= tracing_release_generic_tr,
7713  };
7714  
7715  static const struct file_operations tracing_total_entries_fops = {
7716  	.open		= tracing_open_generic_tr,
7717  	.read		= tracing_total_entries_read,
7718  	.llseek		= generic_file_llseek,
7719  	.release	= tracing_release_generic_tr,
7720  };
7721  
7722  static const struct file_operations tracing_free_buffer_fops = {
7723  	.open		= tracing_open_generic_tr,
7724  	.write		= tracing_free_buffer_write,
7725  	.release	= tracing_free_buffer_release,
7726  };
7727  
7728  static const struct file_operations tracing_mark_fops = {
7729  	.open		= tracing_mark_open,
7730  	.write		= tracing_mark_write,
7731  	.release	= tracing_release_generic_tr,
7732  };
7733  
7734  static const struct file_operations tracing_mark_raw_fops = {
7735  	.open		= tracing_mark_open,
7736  	.write		= tracing_mark_raw_write,
7737  	.release	= tracing_release_generic_tr,
7738  };
7739  
7740  static const struct file_operations trace_clock_fops = {
7741  	.open		= tracing_clock_open,
7742  	.read		= seq_read,
7743  	.llseek		= seq_lseek,
7744  	.release	= tracing_single_release_tr,
7745  	.write		= tracing_clock_write,
7746  };
7747  
7748  static const struct file_operations trace_time_stamp_mode_fops = {
7749  	.open		= tracing_time_stamp_mode_open,
7750  	.read		= seq_read,
7751  	.llseek		= seq_lseek,
7752  	.release	= tracing_single_release_tr,
7753  };
7754  
7755  #ifdef CONFIG_TRACER_SNAPSHOT
7756  static const struct file_operations snapshot_fops = {
7757  	.open		= tracing_snapshot_open,
7758  	.read		= seq_read,
7759  	.write		= tracing_snapshot_write,
7760  	.llseek		= tracing_lseek,
7761  	.release	= tracing_snapshot_release,
7762  };
7763  
7764  static const struct file_operations snapshot_raw_fops = {
7765  	.open		= snapshot_raw_open,
7766  	.read		= tracing_buffers_read,
7767  	.release	= tracing_buffers_release,
7768  	.splice_read	= tracing_buffers_splice_read,
7769  	.llseek		= no_llseek,
7770  };
7771  
7772  #endif /* CONFIG_TRACER_SNAPSHOT */
7773  
7774  /*
7775   * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7776   * @filp: The active open file structure
7777   * @ubuf: The userspace provided buffer to read value into
7778   * @cnt: The maximum number of bytes to read
7779   * @ppos: The current "file" position
7780   *
7781   * This function implements the write interface for a struct trace_min_max_param.
7782   * The filp->private_data must point to a trace_min_max_param structure that
7783   * defines where to write the value, the min and the max acceptable values,
7784   * and a lock to protect the write.
7785   */
7786  static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7787  trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7788  {
7789  	struct trace_min_max_param *param = filp->private_data;
7790  	u64 val;
7791  	int err;
7792  
7793  	if (!param)
7794  		return -EFAULT;
7795  
7796  	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7797  	if (err)
7798  		return err;
7799  
7800  	if (param->lock)
7801  		mutex_lock(param->lock);
7802  
7803  	if (param->min && val < *param->min)
7804  		err = -EINVAL;
7805  
7806  	if (param->max && val > *param->max)
7807  		err = -EINVAL;
7808  
7809  	if (!err)
7810  		*param->val = val;
7811  
7812  	if (param->lock)
7813  		mutex_unlock(param->lock);
7814  
7815  	if (err)
7816  		return err;
7817  
7818  	return cnt;
7819  }
7820  
7821  /*
7822   * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7823   * @filp: The active open file structure
7824   * @ubuf: The userspace provided buffer to read value into
7825   * @cnt: The maximum number of bytes to read
7826   * @ppos: The current "file" position
7827   *
7828   * This function implements the read interface for a struct trace_min_max_param.
7829   * The filp->private_data must point to a trace_min_max_param struct with valid
7830   * data.
7831   */
7832  static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7833  trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7834  {
7835  	struct trace_min_max_param *param = filp->private_data;
7836  	char buf[U64_STR_SIZE];
7837  	int len;
7838  	u64 val;
7839  
7840  	if (!param)
7841  		return -EFAULT;
7842  
7843  	val = *param->val;
7844  
7845  	if (cnt > sizeof(buf))
7846  		cnt = sizeof(buf);
7847  
7848  	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7849  
7850  	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7851  }
7852  
7853  const struct file_operations trace_min_max_fops = {
7854  	.open		= tracing_open_generic,
7855  	.read		= trace_min_max_read,
7856  	.write		= trace_min_max_write,
7857  };
7858  
7859  #define TRACING_LOG_ERRS_MAX	8
7860  #define TRACING_LOG_LOC_MAX	128
7861  
7862  #define CMD_PREFIX "  Command: "
7863  
7864  struct err_info {
7865  	const char	**errs;	/* ptr to loc-specific array of err strings */
7866  	u8		type;	/* index into errs -> specific err string */
7867  	u16		pos;	/* caret position */
7868  	u64		ts;
7869  };
7870  
7871  struct tracing_log_err {
7872  	struct list_head	list;
7873  	struct err_info		info;
7874  	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7875  	char			*cmd;                     /* what caused err */
7876  };
7877  
7878  static DEFINE_MUTEX(tracing_err_log_lock);
7879  
alloc_tracing_log_err(int len)7880  static struct tracing_log_err *alloc_tracing_log_err(int len)
7881  {
7882  	struct tracing_log_err *err;
7883  
7884  	err = kzalloc(sizeof(*err), GFP_KERNEL);
7885  	if (!err)
7886  		return ERR_PTR(-ENOMEM);
7887  
7888  	err->cmd = kzalloc(len, GFP_KERNEL);
7889  	if (!err->cmd) {
7890  		kfree(err);
7891  		return ERR_PTR(-ENOMEM);
7892  	}
7893  
7894  	return err;
7895  }
7896  
free_tracing_log_err(struct tracing_log_err * err)7897  static void free_tracing_log_err(struct tracing_log_err *err)
7898  {
7899  	kfree(err->cmd);
7900  	kfree(err);
7901  }
7902  
get_tracing_log_err(struct trace_array * tr,int len)7903  static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7904  						   int len)
7905  {
7906  	struct tracing_log_err *err;
7907  	char *cmd;
7908  
7909  	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7910  		err = alloc_tracing_log_err(len);
7911  		if (PTR_ERR(err) != -ENOMEM)
7912  			tr->n_err_log_entries++;
7913  
7914  		return err;
7915  	}
7916  	cmd = kzalloc(len, GFP_KERNEL);
7917  	if (!cmd)
7918  		return ERR_PTR(-ENOMEM);
7919  	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7920  	kfree(err->cmd);
7921  	err->cmd = cmd;
7922  	list_del(&err->list);
7923  
7924  	return err;
7925  }
7926  
7927  /**
7928   * err_pos - find the position of a string within a command for error careting
7929   * @cmd: The tracing command that caused the error
7930   * @str: The string to position the caret at within @cmd
7931   *
7932   * Finds the position of the first occurrence of @str within @cmd.  The
7933   * return value can be passed to tracing_log_err() for caret placement
7934   * within @cmd.
7935   *
7936   * Returns the index within @cmd of the first occurrence of @str or 0
7937   * if @str was not found.
7938   */
err_pos(char * cmd,const char * str)7939  unsigned int err_pos(char *cmd, const char *str)
7940  {
7941  	char *found;
7942  
7943  	if (WARN_ON(!strlen(cmd)))
7944  		return 0;
7945  
7946  	found = strstr(cmd, str);
7947  	if (found)
7948  		return found - cmd;
7949  
7950  	return 0;
7951  }
7952  
7953  /**
7954   * tracing_log_err - write an error to the tracing error log
7955   * @tr: The associated trace array for the error (NULL for top level array)
7956   * @loc: A string describing where the error occurred
7957   * @cmd: The tracing command that caused the error
7958   * @errs: The array of loc-specific static error strings
7959   * @type: The index into errs[], which produces the specific static err string
7960   * @pos: The position the caret should be placed in the cmd
7961   *
7962   * Writes an error into tracing/error_log of the form:
7963   *
7964   * <loc>: error: <text>
7965   *   Command: <cmd>
7966   *              ^
7967   *
7968   * tracing/error_log is a small log file containing the last
7969   * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7970   * unless there has been a tracing error, and the error log can be
7971   * cleared and have its memory freed by writing the empty string in
7972   * truncation mode to it i.e. echo > tracing/error_log.
7973   *
7974   * NOTE: the @errs array along with the @type param are used to
7975   * produce a static error string - this string is not copied and saved
7976   * when the error is logged - only a pointer to it is saved.  See
7977   * existing callers for examples of how static strings are typically
7978   * defined for use with tracing_log_err().
7979   */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)7980  void tracing_log_err(struct trace_array *tr,
7981  		     const char *loc, const char *cmd,
7982  		     const char **errs, u8 type, u16 pos)
7983  {
7984  	struct tracing_log_err *err;
7985  	int len = 0;
7986  
7987  	if (!tr)
7988  		tr = &global_trace;
7989  
7990  	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7991  
7992  	mutex_lock(&tracing_err_log_lock);
7993  	err = get_tracing_log_err(tr, len);
7994  	if (PTR_ERR(err) == -ENOMEM) {
7995  		mutex_unlock(&tracing_err_log_lock);
7996  		return;
7997  	}
7998  
7999  	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8000  	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8001  
8002  	err->info.errs = errs;
8003  	err->info.type = type;
8004  	err->info.pos = pos;
8005  	err->info.ts = local_clock();
8006  
8007  	list_add_tail(&err->list, &tr->err_log);
8008  	mutex_unlock(&tracing_err_log_lock);
8009  }
8010  
clear_tracing_err_log(struct trace_array * tr)8011  static void clear_tracing_err_log(struct trace_array *tr)
8012  {
8013  	struct tracing_log_err *err, *next;
8014  
8015  	mutex_lock(&tracing_err_log_lock);
8016  	list_for_each_entry_safe(err, next, &tr->err_log, list) {
8017  		list_del(&err->list);
8018  		free_tracing_log_err(err);
8019  	}
8020  
8021  	tr->n_err_log_entries = 0;
8022  	mutex_unlock(&tracing_err_log_lock);
8023  }
8024  
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)8025  static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8026  {
8027  	struct trace_array *tr = m->private;
8028  
8029  	mutex_lock(&tracing_err_log_lock);
8030  
8031  	return seq_list_start(&tr->err_log, *pos);
8032  }
8033  
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)8034  static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8035  {
8036  	struct trace_array *tr = m->private;
8037  
8038  	return seq_list_next(v, &tr->err_log, pos);
8039  }
8040  
tracing_err_log_seq_stop(struct seq_file * m,void * v)8041  static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8042  {
8043  	mutex_unlock(&tracing_err_log_lock);
8044  }
8045  
tracing_err_log_show_pos(struct seq_file * m,u16 pos)8046  static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8047  {
8048  	u16 i;
8049  
8050  	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8051  		seq_putc(m, ' ');
8052  	for (i = 0; i < pos; i++)
8053  		seq_putc(m, ' ');
8054  	seq_puts(m, "^\n");
8055  }
8056  
tracing_err_log_seq_show(struct seq_file * m,void * v)8057  static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8058  {
8059  	struct tracing_log_err *err = v;
8060  
8061  	if (err) {
8062  		const char *err_text = err->info.errs[err->info.type];
8063  		u64 sec = err->info.ts;
8064  		u32 nsec;
8065  
8066  		nsec = do_div(sec, NSEC_PER_SEC);
8067  		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8068  			   err->loc, err_text);
8069  		seq_printf(m, "%s", err->cmd);
8070  		tracing_err_log_show_pos(m, err->info.pos);
8071  	}
8072  
8073  	return 0;
8074  }
8075  
8076  static const struct seq_operations tracing_err_log_seq_ops = {
8077  	.start  = tracing_err_log_seq_start,
8078  	.next   = tracing_err_log_seq_next,
8079  	.stop   = tracing_err_log_seq_stop,
8080  	.show   = tracing_err_log_seq_show
8081  };
8082  
tracing_err_log_open(struct inode * inode,struct file * file)8083  static int tracing_err_log_open(struct inode *inode, struct file *file)
8084  {
8085  	struct trace_array *tr = inode->i_private;
8086  	int ret = 0;
8087  
8088  	ret = tracing_check_open_get_tr(tr);
8089  	if (ret)
8090  		return ret;
8091  
8092  	/* If this file was opened for write, then erase contents */
8093  	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8094  		clear_tracing_err_log(tr);
8095  
8096  	if (file->f_mode & FMODE_READ) {
8097  		ret = seq_open(file, &tracing_err_log_seq_ops);
8098  		if (!ret) {
8099  			struct seq_file *m = file->private_data;
8100  			m->private = tr;
8101  		} else {
8102  			trace_array_put(tr);
8103  		}
8104  	}
8105  	return ret;
8106  }
8107  
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)8108  static ssize_t tracing_err_log_write(struct file *file,
8109  				     const char __user *buffer,
8110  				     size_t count, loff_t *ppos)
8111  {
8112  	return count;
8113  }
8114  
tracing_err_log_release(struct inode * inode,struct file * file)8115  static int tracing_err_log_release(struct inode *inode, struct file *file)
8116  {
8117  	struct trace_array *tr = inode->i_private;
8118  
8119  	trace_array_put(tr);
8120  
8121  	if (file->f_mode & FMODE_READ)
8122  		seq_release(inode, file);
8123  
8124  	return 0;
8125  }
8126  
8127  static const struct file_operations tracing_err_log_fops = {
8128  	.open           = tracing_err_log_open,
8129  	.write		= tracing_err_log_write,
8130  	.read           = seq_read,
8131  	.llseek         = tracing_lseek,
8132  	.release        = tracing_err_log_release,
8133  };
8134  
tracing_buffers_open(struct inode * inode,struct file * filp)8135  static int tracing_buffers_open(struct inode *inode, struct file *filp)
8136  {
8137  	struct trace_array *tr = inode->i_private;
8138  	struct ftrace_buffer_info *info;
8139  	int ret;
8140  
8141  	ret = tracing_check_open_get_tr(tr);
8142  	if (ret)
8143  		return ret;
8144  
8145  	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8146  	if (!info) {
8147  		trace_array_put(tr);
8148  		return -ENOMEM;
8149  	}
8150  
8151  	mutex_lock(&trace_types_lock);
8152  
8153  	info->iter.tr		= tr;
8154  	info->iter.cpu_file	= tracing_get_cpu(inode);
8155  	info->iter.trace	= tr->current_trace;
8156  	info->iter.array_buffer = &tr->array_buffer;
8157  	info->spare		= NULL;
8158  	/* Force reading ring buffer for first read */
8159  	info->read		= (unsigned int)-1;
8160  
8161  	filp->private_data = info;
8162  
8163  	tr->trace_ref++;
8164  
8165  	mutex_unlock(&trace_types_lock);
8166  
8167  	ret = nonseekable_open(inode, filp);
8168  	if (ret < 0)
8169  		trace_array_put(tr);
8170  
8171  	return ret;
8172  }
8173  
8174  static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)8175  tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8176  {
8177  	struct ftrace_buffer_info *info = filp->private_data;
8178  	struct trace_iterator *iter = &info->iter;
8179  
8180  	return trace_poll(iter, filp, poll_table);
8181  }
8182  
8183  static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8184  tracing_buffers_read(struct file *filp, char __user *ubuf,
8185  		     size_t count, loff_t *ppos)
8186  {
8187  	struct ftrace_buffer_info *info = filp->private_data;
8188  	struct trace_iterator *iter = &info->iter;
8189  	ssize_t ret = 0;
8190  	ssize_t size;
8191  
8192  	if (!count)
8193  		return 0;
8194  
8195  #ifdef CONFIG_TRACER_MAX_TRACE
8196  	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8197  		return -EBUSY;
8198  #endif
8199  
8200  	if (!info->spare) {
8201  		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8202  							  iter->cpu_file);
8203  		if (IS_ERR(info->spare)) {
8204  			ret = PTR_ERR(info->spare);
8205  			info->spare = NULL;
8206  		} else {
8207  			info->spare_cpu = iter->cpu_file;
8208  		}
8209  	}
8210  	if (!info->spare)
8211  		return ret;
8212  
8213  	/* Do we have previous read data to read? */
8214  	if (info->read < PAGE_SIZE)
8215  		goto read;
8216  
8217   again:
8218  	trace_access_lock(iter->cpu_file);
8219  	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8220  				    &info->spare,
8221  				    count,
8222  				    iter->cpu_file, 0);
8223  	trace_access_unlock(iter->cpu_file);
8224  
8225  	if (ret < 0) {
8226  		if (trace_empty(iter)) {
8227  			if ((filp->f_flags & O_NONBLOCK))
8228  				return -EAGAIN;
8229  
8230  			ret = wait_on_pipe(iter, 0);
8231  			if (ret)
8232  				return ret;
8233  
8234  			goto again;
8235  		}
8236  		return 0;
8237  	}
8238  
8239  	info->read = 0;
8240   read:
8241  	size = PAGE_SIZE - info->read;
8242  	if (size > count)
8243  		size = count;
8244  
8245  	ret = copy_to_user(ubuf, info->spare + info->read, size);
8246  	if (ret == size)
8247  		return -EFAULT;
8248  
8249  	size -= ret;
8250  
8251  	*ppos += size;
8252  	info->read += size;
8253  
8254  	return size;
8255  }
8256  
tracing_buffers_flush(struct file * file,fl_owner_t id)8257  static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8258  {
8259  	struct ftrace_buffer_info *info = file->private_data;
8260  	struct trace_iterator *iter = &info->iter;
8261  
8262  	iter->wait_index++;
8263  	/* Make sure the waiters see the new wait_index */
8264  	smp_wmb();
8265  
8266  	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8267  
8268  	return 0;
8269  }
8270  
tracing_buffers_release(struct inode * inode,struct file * file)8271  static int tracing_buffers_release(struct inode *inode, struct file *file)
8272  {
8273  	struct ftrace_buffer_info *info = file->private_data;
8274  	struct trace_iterator *iter = &info->iter;
8275  
8276  	mutex_lock(&trace_types_lock);
8277  
8278  	iter->tr->trace_ref--;
8279  
8280  	__trace_array_put(iter->tr);
8281  
8282  	if (info->spare)
8283  		ring_buffer_free_read_page(iter->array_buffer->buffer,
8284  					   info->spare_cpu, info->spare);
8285  	kvfree(info);
8286  
8287  	mutex_unlock(&trace_types_lock);
8288  
8289  	return 0;
8290  }
8291  
8292  struct buffer_ref {
8293  	struct trace_buffer	*buffer;
8294  	void			*page;
8295  	int			cpu;
8296  	refcount_t		refcount;
8297  };
8298  
buffer_ref_release(struct buffer_ref * ref)8299  static void buffer_ref_release(struct buffer_ref *ref)
8300  {
8301  	if (!refcount_dec_and_test(&ref->refcount))
8302  		return;
8303  	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8304  	kfree(ref);
8305  }
8306  
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8307  static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8308  				    struct pipe_buffer *buf)
8309  {
8310  	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8311  
8312  	buffer_ref_release(ref);
8313  	buf->private = 0;
8314  }
8315  
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8316  static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8317  				struct pipe_buffer *buf)
8318  {
8319  	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8320  
8321  	if (refcount_read(&ref->refcount) > INT_MAX/2)
8322  		return false;
8323  
8324  	refcount_inc(&ref->refcount);
8325  	return true;
8326  }
8327  
8328  /* Pipe buffer operations for a buffer. */
8329  static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8330  	.release		= buffer_pipe_buf_release,
8331  	.get			= buffer_pipe_buf_get,
8332  };
8333  
8334  /*
8335   * Callback from splice_to_pipe(), if we need to release some pages
8336   * at the end of the spd in case we error'ed out in filling the pipe.
8337   */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8338  static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8339  {
8340  	struct buffer_ref *ref =
8341  		(struct buffer_ref *)spd->partial[i].private;
8342  
8343  	buffer_ref_release(ref);
8344  	spd->partial[i].private = 0;
8345  }
8346  
8347  static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8348  tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8349  			    struct pipe_inode_info *pipe, size_t len,
8350  			    unsigned int flags)
8351  {
8352  	struct ftrace_buffer_info *info = file->private_data;
8353  	struct trace_iterator *iter = &info->iter;
8354  	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8355  	struct page *pages_def[PIPE_DEF_BUFFERS];
8356  	struct splice_pipe_desc spd = {
8357  		.pages		= pages_def,
8358  		.partial	= partial_def,
8359  		.nr_pages_max	= PIPE_DEF_BUFFERS,
8360  		.ops		= &buffer_pipe_buf_ops,
8361  		.spd_release	= buffer_spd_release,
8362  	};
8363  	struct buffer_ref *ref;
8364  	int entries, i;
8365  	ssize_t ret = 0;
8366  
8367  #ifdef CONFIG_TRACER_MAX_TRACE
8368  	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8369  		return -EBUSY;
8370  #endif
8371  
8372  	if (*ppos & (PAGE_SIZE - 1))
8373  		return -EINVAL;
8374  
8375  	if (len & (PAGE_SIZE - 1)) {
8376  		if (len < PAGE_SIZE)
8377  			return -EINVAL;
8378  		len &= PAGE_MASK;
8379  	}
8380  
8381  	if (splice_grow_spd(pipe, &spd))
8382  		return -ENOMEM;
8383  
8384   again:
8385  	trace_access_lock(iter->cpu_file);
8386  	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8387  
8388  	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8389  		struct page *page;
8390  		int r;
8391  
8392  		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8393  		if (!ref) {
8394  			ret = -ENOMEM;
8395  			break;
8396  		}
8397  
8398  		refcount_set(&ref->refcount, 1);
8399  		ref->buffer = iter->array_buffer->buffer;
8400  		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8401  		if (IS_ERR(ref->page)) {
8402  			ret = PTR_ERR(ref->page);
8403  			ref->page = NULL;
8404  			kfree(ref);
8405  			break;
8406  		}
8407  		ref->cpu = iter->cpu_file;
8408  
8409  		r = ring_buffer_read_page(ref->buffer, &ref->page,
8410  					  len, iter->cpu_file, 1);
8411  		if (r < 0) {
8412  			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8413  						   ref->page);
8414  			kfree(ref);
8415  			break;
8416  		}
8417  
8418  		page = virt_to_page(ref->page);
8419  
8420  		spd.pages[i] = page;
8421  		spd.partial[i].len = PAGE_SIZE;
8422  		spd.partial[i].offset = 0;
8423  		spd.partial[i].private = (unsigned long)ref;
8424  		spd.nr_pages++;
8425  		*ppos += PAGE_SIZE;
8426  
8427  		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8428  	}
8429  
8430  	trace_access_unlock(iter->cpu_file);
8431  	spd.nr_pages = i;
8432  
8433  	/* did we read anything? */
8434  	if (!spd.nr_pages) {
8435  		long wait_index;
8436  
8437  		if (ret)
8438  			goto out;
8439  
8440  		ret = -EAGAIN;
8441  		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8442  			goto out;
8443  
8444  		wait_index = READ_ONCE(iter->wait_index);
8445  
8446  		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8447  		if (ret)
8448  			goto out;
8449  
8450  		/* No need to wait after waking up when tracing is off */
8451  		if (!tracer_tracing_is_on(iter->tr))
8452  			goto out;
8453  
8454  		/* Make sure we see the new wait_index */
8455  		smp_rmb();
8456  		if (wait_index != iter->wait_index)
8457  			goto out;
8458  
8459  		goto again;
8460  	}
8461  
8462  	ret = splice_to_pipe(pipe, &spd);
8463  out:
8464  	splice_shrink_spd(&spd);
8465  
8466  	return ret;
8467  }
8468  
8469  /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8470  static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8471  {
8472  	struct ftrace_buffer_info *info = file->private_data;
8473  	struct trace_iterator *iter = &info->iter;
8474  
8475  	if (cmd)
8476  		return -ENOIOCTLCMD;
8477  
8478  	mutex_lock(&trace_types_lock);
8479  
8480  	iter->wait_index++;
8481  	/* Make sure the waiters see the new wait_index */
8482  	smp_wmb();
8483  
8484  	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8485  
8486  	mutex_unlock(&trace_types_lock);
8487  	return 0;
8488  }
8489  
8490  static const struct file_operations tracing_buffers_fops = {
8491  	.open		= tracing_buffers_open,
8492  	.read		= tracing_buffers_read,
8493  	.poll		= tracing_buffers_poll,
8494  	.release	= tracing_buffers_release,
8495  	.flush		= tracing_buffers_flush,
8496  	.splice_read	= tracing_buffers_splice_read,
8497  	.unlocked_ioctl = tracing_buffers_ioctl,
8498  	.llseek		= no_llseek,
8499  };
8500  
8501  static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8502  tracing_stats_read(struct file *filp, char __user *ubuf,
8503  		   size_t count, loff_t *ppos)
8504  {
8505  	struct inode *inode = file_inode(filp);
8506  	struct trace_array *tr = inode->i_private;
8507  	struct array_buffer *trace_buf = &tr->array_buffer;
8508  	int cpu = tracing_get_cpu(inode);
8509  	struct trace_seq *s;
8510  	unsigned long cnt;
8511  	unsigned long long t;
8512  	unsigned long usec_rem;
8513  
8514  	s = kmalloc(sizeof(*s), GFP_KERNEL);
8515  	if (!s)
8516  		return -ENOMEM;
8517  
8518  	trace_seq_init(s);
8519  
8520  	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8521  	trace_seq_printf(s, "entries: %ld\n", cnt);
8522  
8523  	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8524  	trace_seq_printf(s, "overrun: %ld\n", cnt);
8525  
8526  	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8527  	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8528  
8529  	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8530  	trace_seq_printf(s, "bytes: %ld\n", cnt);
8531  
8532  	if (trace_clocks[tr->clock_id].in_ns) {
8533  		/* local or global for trace_clock */
8534  		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8535  		usec_rem = do_div(t, USEC_PER_SEC);
8536  		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8537  								t, usec_rem);
8538  
8539  		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8540  		usec_rem = do_div(t, USEC_PER_SEC);
8541  		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8542  	} else {
8543  		/* counter or tsc mode for trace_clock */
8544  		trace_seq_printf(s, "oldest event ts: %llu\n",
8545  				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8546  
8547  		trace_seq_printf(s, "now ts: %llu\n",
8548  				ring_buffer_time_stamp(trace_buf->buffer));
8549  	}
8550  
8551  	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8552  	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8553  
8554  	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8555  	trace_seq_printf(s, "read events: %ld\n", cnt);
8556  
8557  	count = simple_read_from_buffer(ubuf, count, ppos,
8558  					s->buffer, trace_seq_used(s));
8559  
8560  	kfree(s);
8561  
8562  	return count;
8563  }
8564  
8565  static const struct file_operations tracing_stats_fops = {
8566  	.open		= tracing_open_generic_tr,
8567  	.read		= tracing_stats_read,
8568  	.llseek		= generic_file_llseek,
8569  	.release	= tracing_release_generic_tr,
8570  };
8571  
8572  #ifdef CONFIG_DYNAMIC_FTRACE
8573  
8574  static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8575  tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8576  		  size_t cnt, loff_t *ppos)
8577  {
8578  	ssize_t ret;
8579  	char *buf;
8580  	int r;
8581  
8582  	/* 256 should be plenty to hold the amount needed */
8583  	buf = kmalloc(256, GFP_KERNEL);
8584  	if (!buf)
8585  		return -ENOMEM;
8586  
8587  	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8588  		      ftrace_update_tot_cnt,
8589  		      ftrace_number_of_pages,
8590  		      ftrace_number_of_groups);
8591  
8592  	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8593  	kfree(buf);
8594  	return ret;
8595  }
8596  
8597  static const struct file_operations tracing_dyn_info_fops = {
8598  	.open		= tracing_open_generic,
8599  	.read		= tracing_read_dyn_info,
8600  	.llseek		= generic_file_llseek,
8601  };
8602  #endif /* CONFIG_DYNAMIC_FTRACE */
8603  
8604  #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8605  static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8606  ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8607  		struct trace_array *tr, struct ftrace_probe_ops *ops,
8608  		void *data)
8609  {
8610  	tracing_snapshot_instance(tr);
8611  }
8612  
8613  static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8614  ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8615  		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8616  		      void *data)
8617  {
8618  	struct ftrace_func_mapper *mapper = data;
8619  	long *count = NULL;
8620  
8621  	if (mapper)
8622  		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8623  
8624  	if (count) {
8625  
8626  		if (*count <= 0)
8627  			return;
8628  
8629  		(*count)--;
8630  	}
8631  
8632  	tracing_snapshot_instance(tr);
8633  }
8634  
8635  static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8636  ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8637  		      struct ftrace_probe_ops *ops, void *data)
8638  {
8639  	struct ftrace_func_mapper *mapper = data;
8640  	long *count = NULL;
8641  
8642  	seq_printf(m, "%ps:", (void *)ip);
8643  
8644  	seq_puts(m, "snapshot");
8645  
8646  	if (mapper)
8647  		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8648  
8649  	if (count)
8650  		seq_printf(m, ":count=%ld\n", *count);
8651  	else
8652  		seq_puts(m, ":unlimited\n");
8653  
8654  	return 0;
8655  }
8656  
8657  static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8658  ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8659  		     unsigned long ip, void *init_data, void **data)
8660  {
8661  	struct ftrace_func_mapper *mapper = *data;
8662  
8663  	if (!mapper) {
8664  		mapper = allocate_ftrace_func_mapper();
8665  		if (!mapper)
8666  			return -ENOMEM;
8667  		*data = mapper;
8668  	}
8669  
8670  	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8671  }
8672  
8673  static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8674  ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8675  		     unsigned long ip, void *data)
8676  {
8677  	struct ftrace_func_mapper *mapper = data;
8678  
8679  	if (!ip) {
8680  		if (!mapper)
8681  			return;
8682  		free_ftrace_func_mapper(mapper, NULL);
8683  		return;
8684  	}
8685  
8686  	ftrace_func_mapper_remove_ip(mapper, ip);
8687  }
8688  
8689  static struct ftrace_probe_ops snapshot_probe_ops = {
8690  	.func			= ftrace_snapshot,
8691  	.print			= ftrace_snapshot_print,
8692  };
8693  
8694  static struct ftrace_probe_ops snapshot_count_probe_ops = {
8695  	.func			= ftrace_count_snapshot,
8696  	.print			= ftrace_snapshot_print,
8697  	.init			= ftrace_snapshot_init,
8698  	.free			= ftrace_snapshot_free,
8699  };
8700  
8701  static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8702  ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8703  			       char *glob, char *cmd, char *param, int enable)
8704  {
8705  	struct ftrace_probe_ops *ops;
8706  	void *count = (void *)-1;
8707  	char *number;
8708  	int ret;
8709  
8710  	if (!tr)
8711  		return -ENODEV;
8712  
8713  	/* hash funcs only work with set_ftrace_filter */
8714  	if (!enable)
8715  		return -EINVAL;
8716  
8717  	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8718  
8719  	if (glob[0] == '!')
8720  		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8721  
8722  	if (!param)
8723  		goto out_reg;
8724  
8725  	number = strsep(&param, ":");
8726  
8727  	if (!strlen(number))
8728  		goto out_reg;
8729  
8730  	/*
8731  	 * We use the callback data field (which is a pointer)
8732  	 * as our counter.
8733  	 */
8734  	ret = kstrtoul(number, 0, (unsigned long *)&count);
8735  	if (ret)
8736  		return ret;
8737  
8738   out_reg:
8739  	ret = tracing_alloc_snapshot_instance(tr);
8740  	if (ret < 0)
8741  		goto out;
8742  
8743  	ret = register_ftrace_function_probe(glob, tr, ops, count);
8744  
8745   out:
8746  	return ret < 0 ? ret : 0;
8747  }
8748  
8749  static struct ftrace_func_command ftrace_snapshot_cmd = {
8750  	.name			= "snapshot",
8751  	.func			= ftrace_trace_snapshot_callback,
8752  };
8753  
register_snapshot_cmd(void)8754  static __init int register_snapshot_cmd(void)
8755  {
8756  	return register_ftrace_command(&ftrace_snapshot_cmd);
8757  }
8758  #else
register_snapshot_cmd(void)8759  static inline __init int register_snapshot_cmd(void) { return 0; }
8760  #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8761  
tracing_get_dentry(struct trace_array * tr)8762  static struct dentry *tracing_get_dentry(struct trace_array *tr)
8763  {
8764  	if (WARN_ON(!tr->dir))
8765  		return ERR_PTR(-ENODEV);
8766  
8767  	/* Top directory uses NULL as the parent */
8768  	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8769  		return NULL;
8770  
8771  	/* All sub buffers have a descriptor */
8772  	return tr->dir;
8773  }
8774  
tracing_dentry_percpu(struct trace_array * tr,int cpu)8775  static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8776  {
8777  	struct dentry *d_tracer;
8778  
8779  	if (tr->percpu_dir)
8780  		return tr->percpu_dir;
8781  
8782  	d_tracer = tracing_get_dentry(tr);
8783  	if (IS_ERR(d_tracer))
8784  		return NULL;
8785  
8786  	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8787  
8788  	MEM_FAIL(!tr->percpu_dir,
8789  		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8790  
8791  	return tr->percpu_dir;
8792  }
8793  
8794  static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8795  trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8796  		      void *data, long cpu, const struct file_operations *fops)
8797  {
8798  	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8799  
8800  	if (ret) /* See tracing_get_cpu() */
8801  		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8802  	return ret;
8803  }
8804  
8805  static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8806  tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8807  {
8808  	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8809  	struct dentry *d_cpu;
8810  	char cpu_dir[30]; /* 30 characters should be more than enough */
8811  
8812  	if (!d_percpu)
8813  		return;
8814  
8815  	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8816  	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8817  	if (!d_cpu) {
8818  		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8819  		return;
8820  	}
8821  
8822  	/* per cpu trace_pipe */
8823  	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8824  				tr, cpu, &tracing_pipe_fops);
8825  
8826  	/* per cpu trace */
8827  	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8828  				tr, cpu, &tracing_fops);
8829  
8830  	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8831  				tr, cpu, &tracing_buffers_fops);
8832  
8833  	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8834  				tr, cpu, &tracing_stats_fops);
8835  
8836  	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8837  				tr, cpu, &tracing_entries_fops);
8838  
8839  #ifdef CONFIG_TRACER_SNAPSHOT
8840  	trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8841  				tr, cpu, &snapshot_fops);
8842  
8843  	trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8844  				tr, cpu, &snapshot_raw_fops);
8845  #endif
8846  }
8847  
8848  #ifdef CONFIG_FTRACE_SELFTEST
8849  /* Let selftest have access to static functions in this file */
8850  #include "trace_selftest.c"
8851  #endif
8852  
8853  static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8854  trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8855  			loff_t *ppos)
8856  {
8857  	struct trace_option_dentry *topt = filp->private_data;
8858  	char *buf;
8859  
8860  	if (topt->flags->val & topt->opt->bit)
8861  		buf = "1\n";
8862  	else
8863  		buf = "0\n";
8864  
8865  	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8866  }
8867  
8868  static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8869  trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8870  			 loff_t *ppos)
8871  {
8872  	struct trace_option_dentry *topt = filp->private_data;
8873  	unsigned long val;
8874  	int ret;
8875  
8876  	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8877  	if (ret)
8878  		return ret;
8879  
8880  	if (val != 0 && val != 1)
8881  		return -EINVAL;
8882  
8883  	if (!!(topt->flags->val & topt->opt->bit) != val) {
8884  		mutex_lock(&trace_types_lock);
8885  		ret = __set_tracer_option(topt->tr, topt->flags,
8886  					  topt->opt, !val);
8887  		mutex_unlock(&trace_types_lock);
8888  		if (ret)
8889  			return ret;
8890  	}
8891  
8892  	*ppos += cnt;
8893  
8894  	return cnt;
8895  }
8896  
tracing_open_options(struct inode * inode,struct file * filp)8897  static int tracing_open_options(struct inode *inode, struct file *filp)
8898  {
8899  	struct trace_option_dentry *topt = inode->i_private;
8900  	int ret;
8901  
8902  	ret = tracing_check_open_get_tr(topt->tr);
8903  	if (ret)
8904  		return ret;
8905  
8906  	filp->private_data = inode->i_private;
8907  	return 0;
8908  }
8909  
tracing_release_options(struct inode * inode,struct file * file)8910  static int tracing_release_options(struct inode *inode, struct file *file)
8911  {
8912  	struct trace_option_dentry *topt = file->private_data;
8913  
8914  	trace_array_put(topt->tr);
8915  	return 0;
8916  }
8917  
8918  static const struct file_operations trace_options_fops = {
8919  	.open = tracing_open_options,
8920  	.read = trace_options_read,
8921  	.write = trace_options_write,
8922  	.llseek	= generic_file_llseek,
8923  	.release = tracing_release_options,
8924  };
8925  
8926  /*
8927   * In order to pass in both the trace_array descriptor as well as the index
8928   * to the flag that the trace option file represents, the trace_array
8929   * has a character array of trace_flags_index[], which holds the index
8930   * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8931   * The address of this character array is passed to the flag option file
8932   * read/write callbacks.
8933   *
8934   * In order to extract both the index and the trace_array descriptor,
8935   * get_tr_index() uses the following algorithm.
8936   *
8937   *   idx = *ptr;
8938   *
8939   * As the pointer itself contains the address of the index (remember
8940   * index[1] == 1).
8941   *
8942   * Then to get the trace_array descriptor, by subtracting that index
8943   * from the ptr, we get to the start of the index itself.
8944   *
8945   *   ptr - idx == &index[0]
8946   *
8947   * Then a simple container_of() from that pointer gets us to the
8948   * trace_array descriptor.
8949   */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)8950  static void get_tr_index(void *data, struct trace_array **ptr,
8951  			 unsigned int *pindex)
8952  {
8953  	*pindex = *(unsigned char *)data;
8954  
8955  	*ptr = container_of(data - *pindex, struct trace_array,
8956  			    trace_flags_index);
8957  }
8958  
8959  static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8960  trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8961  			loff_t *ppos)
8962  {
8963  	void *tr_index = filp->private_data;
8964  	struct trace_array *tr;
8965  	unsigned int index;
8966  	char *buf;
8967  
8968  	get_tr_index(tr_index, &tr, &index);
8969  
8970  	if (tr->trace_flags & (1 << index))
8971  		buf = "1\n";
8972  	else
8973  		buf = "0\n";
8974  
8975  	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8976  }
8977  
8978  static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8979  trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8980  			 loff_t *ppos)
8981  {
8982  	void *tr_index = filp->private_data;
8983  	struct trace_array *tr;
8984  	unsigned int index;
8985  	unsigned long val;
8986  	int ret;
8987  
8988  	get_tr_index(tr_index, &tr, &index);
8989  
8990  	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8991  	if (ret)
8992  		return ret;
8993  
8994  	if (val != 0 && val != 1)
8995  		return -EINVAL;
8996  
8997  	mutex_lock(&event_mutex);
8998  	mutex_lock(&trace_types_lock);
8999  	ret = set_tracer_flag(tr, 1 << index, val);
9000  	mutex_unlock(&trace_types_lock);
9001  	mutex_unlock(&event_mutex);
9002  
9003  	if (ret < 0)
9004  		return ret;
9005  
9006  	*ppos += cnt;
9007  
9008  	return cnt;
9009  }
9010  
9011  static const struct file_operations trace_options_core_fops = {
9012  	.open = tracing_open_generic,
9013  	.read = trace_options_core_read,
9014  	.write = trace_options_core_write,
9015  	.llseek = generic_file_llseek,
9016  };
9017  
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)9018  struct dentry *trace_create_file(const char *name,
9019  				 umode_t mode,
9020  				 struct dentry *parent,
9021  				 void *data,
9022  				 const struct file_operations *fops)
9023  {
9024  	struct dentry *ret;
9025  
9026  	ret = tracefs_create_file(name, mode, parent, data, fops);
9027  	if (!ret)
9028  		pr_warn("Could not create tracefs '%s' entry\n", name);
9029  
9030  	return ret;
9031  }
9032  
9033  
trace_options_init_dentry(struct trace_array * tr)9034  static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9035  {
9036  	struct dentry *d_tracer;
9037  
9038  	if (tr->options)
9039  		return tr->options;
9040  
9041  	d_tracer = tracing_get_dentry(tr);
9042  	if (IS_ERR(d_tracer))
9043  		return NULL;
9044  
9045  	tr->options = tracefs_create_dir("options", d_tracer);
9046  	if (!tr->options) {
9047  		pr_warn("Could not create tracefs directory 'options'\n");
9048  		return NULL;
9049  	}
9050  
9051  	return tr->options;
9052  }
9053  
9054  static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)9055  create_trace_option_file(struct trace_array *tr,
9056  			 struct trace_option_dentry *topt,
9057  			 struct tracer_flags *flags,
9058  			 struct tracer_opt *opt)
9059  {
9060  	struct dentry *t_options;
9061  
9062  	t_options = trace_options_init_dentry(tr);
9063  	if (!t_options)
9064  		return;
9065  
9066  	topt->flags = flags;
9067  	topt->opt = opt;
9068  	topt->tr = tr;
9069  
9070  	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9071  					t_options, topt, &trace_options_fops);
9072  
9073  }
9074  
9075  static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)9076  create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9077  {
9078  	struct trace_option_dentry *topts;
9079  	struct trace_options *tr_topts;
9080  	struct tracer_flags *flags;
9081  	struct tracer_opt *opts;
9082  	int cnt;
9083  	int i;
9084  
9085  	if (!tracer)
9086  		return;
9087  
9088  	flags = tracer->flags;
9089  
9090  	if (!flags || !flags->opts)
9091  		return;
9092  
9093  	/*
9094  	 * If this is an instance, only create flags for tracers
9095  	 * the instance may have.
9096  	 */
9097  	if (!trace_ok_for_array(tracer, tr))
9098  		return;
9099  
9100  	for (i = 0; i < tr->nr_topts; i++) {
9101  		/* Make sure there's no duplicate flags. */
9102  		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9103  			return;
9104  	}
9105  
9106  	opts = flags->opts;
9107  
9108  	for (cnt = 0; opts[cnt].name; cnt++)
9109  		;
9110  
9111  	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9112  	if (!topts)
9113  		return;
9114  
9115  	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9116  			    GFP_KERNEL);
9117  	if (!tr_topts) {
9118  		kfree(topts);
9119  		return;
9120  	}
9121  
9122  	tr->topts = tr_topts;
9123  	tr->topts[tr->nr_topts].tracer = tracer;
9124  	tr->topts[tr->nr_topts].topts = topts;
9125  	tr->nr_topts++;
9126  
9127  	for (cnt = 0; opts[cnt].name; cnt++) {
9128  		create_trace_option_file(tr, &topts[cnt], flags,
9129  					 &opts[cnt]);
9130  		MEM_FAIL(topts[cnt].entry == NULL,
9131  			  "Failed to create trace option: %s",
9132  			  opts[cnt].name);
9133  	}
9134  }
9135  
9136  static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)9137  create_trace_option_core_file(struct trace_array *tr,
9138  			      const char *option, long index)
9139  {
9140  	struct dentry *t_options;
9141  
9142  	t_options = trace_options_init_dentry(tr);
9143  	if (!t_options)
9144  		return NULL;
9145  
9146  	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9147  				 (void *)&tr->trace_flags_index[index],
9148  				 &trace_options_core_fops);
9149  }
9150  
create_trace_options_dir(struct trace_array * tr)9151  static void create_trace_options_dir(struct trace_array *tr)
9152  {
9153  	struct dentry *t_options;
9154  	bool top_level = tr == &global_trace;
9155  	int i;
9156  
9157  	t_options = trace_options_init_dentry(tr);
9158  	if (!t_options)
9159  		return;
9160  
9161  	for (i = 0; trace_options[i]; i++) {
9162  		if (top_level ||
9163  		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9164  			create_trace_option_core_file(tr, trace_options[i], i);
9165  	}
9166  }
9167  
9168  static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9169  rb_simple_read(struct file *filp, char __user *ubuf,
9170  	       size_t cnt, loff_t *ppos)
9171  {
9172  	struct trace_array *tr = filp->private_data;
9173  	char buf[64];
9174  	int r;
9175  
9176  	r = tracer_tracing_is_on(tr);
9177  	r = sprintf(buf, "%d\n", r);
9178  
9179  	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9180  }
9181  
9182  static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9183  rb_simple_write(struct file *filp, const char __user *ubuf,
9184  		size_t cnt, loff_t *ppos)
9185  {
9186  	struct trace_array *tr = filp->private_data;
9187  	struct trace_buffer *buffer = tr->array_buffer.buffer;
9188  	unsigned long val;
9189  	int ret;
9190  
9191  	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9192  	if (ret)
9193  		return ret;
9194  
9195  	if (buffer) {
9196  		mutex_lock(&trace_types_lock);
9197  		if (!!val == tracer_tracing_is_on(tr)) {
9198  			val = 0; /* do nothing */
9199  		} else if (val) {
9200  			tracer_tracing_on(tr);
9201  			if (tr->current_trace->start)
9202  				tr->current_trace->start(tr);
9203  		} else {
9204  			tracer_tracing_off(tr);
9205  			if (tr->current_trace->stop)
9206  				tr->current_trace->stop(tr);
9207  			/* Wake up any waiters */
9208  			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9209  		}
9210  		mutex_unlock(&trace_types_lock);
9211  	}
9212  
9213  	(*ppos)++;
9214  
9215  	return cnt;
9216  }
9217  
9218  static const struct file_operations rb_simple_fops = {
9219  	.open		= tracing_open_generic_tr,
9220  	.read		= rb_simple_read,
9221  	.write		= rb_simple_write,
9222  	.release	= tracing_release_generic_tr,
9223  	.llseek		= default_llseek,
9224  };
9225  
9226  static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9227  buffer_percent_read(struct file *filp, char __user *ubuf,
9228  		    size_t cnt, loff_t *ppos)
9229  {
9230  	struct trace_array *tr = filp->private_data;
9231  	char buf[64];
9232  	int r;
9233  
9234  	r = tr->buffer_percent;
9235  	r = sprintf(buf, "%d\n", r);
9236  
9237  	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9238  }
9239  
9240  static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9241  buffer_percent_write(struct file *filp, const char __user *ubuf,
9242  		     size_t cnt, loff_t *ppos)
9243  {
9244  	struct trace_array *tr = filp->private_data;
9245  	unsigned long val;
9246  	int ret;
9247  
9248  	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9249  	if (ret)
9250  		return ret;
9251  
9252  	if (val > 100)
9253  		return -EINVAL;
9254  
9255  	tr->buffer_percent = val;
9256  
9257  	(*ppos)++;
9258  
9259  	return cnt;
9260  }
9261  
9262  static const struct file_operations buffer_percent_fops = {
9263  	.open		= tracing_open_generic_tr,
9264  	.read		= buffer_percent_read,
9265  	.write		= buffer_percent_write,
9266  	.release	= tracing_release_generic_tr,
9267  	.llseek		= default_llseek,
9268  };
9269  
9270  static struct dentry *trace_instance_dir;
9271  
9272  static void
9273  init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9274  
9275  static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)9276  allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9277  {
9278  	enum ring_buffer_flags rb_flags;
9279  
9280  	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9281  
9282  	buf->tr = tr;
9283  
9284  	buf->buffer = ring_buffer_alloc(size, rb_flags);
9285  	if (!buf->buffer)
9286  		return -ENOMEM;
9287  
9288  	buf->data = alloc_percpu(struct trace_array_cpu);
9289  	if (!buf->data) {
9290  		ring_buffer_free(buf->buffer);
9291  		buf->buffer = NULL;
9292  		return -ENOMEM;
9293  	}
9294  
9295  	/* Allocate the first page for all buffers */
9296  	set_buffer_entries(&tr->array_buffer,
9297  			   ring_buffer_size(tr->array_buffer.buffer, 0));
9298  
9299  	return 0;
9300  }
9301  
free_trace_buffer(struct array_buffer * buf)9302  static void free_trace_buffer(struct array_buffer *buf)
9303  {
9304  	if (buf->buffer) {
9305  		ring_buffer_free(buf->buffer);
9306  		buf->buffer = NULL;
9307  		free_percpu(buf->data);
9308  		buf->data = NULL;
9309  	}
9310  }
9311  
allocate_trace_buffers(struct trace_array * tr,int size)9312  static int allocate_trace_buffers(struct trace_array *tr, int size)
9313  {
9314  	int ret;
9315  
9316  	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9317  	if (ret)
9318  		return ret;
9319  
9320  #ifdef CONFIG_TRACER_MAX_TRACE
9321  	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9322  				    allocate_snapshot ? size : 1);
9323  	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9324  		free_trace_buffer(&tr->array_buffer);
9325  		return -ENOMEM;
9326  	}
9327  	tr->allocated_snapshot = allocate_snapshot;
9328  
9329  	allocate_snapshot = false;
9330  #endif
9331  
9332  	return 0;
9333  }
9334  
free_trace_buffers(struct trace_array * tr)9335  static void free_trace_buffers(struct trace_array *tr)
9336  {
9337  	if (!tr)
9338  		return;
9339  
9340  	free_trace_buffer(&tr->array_buffer);
9341  
9342  #ifdef CONFIG_TRACER_MAX_TRACE
9343  	free_trace_buffer(&tr->max_buffer);
9344  #endif
9345  }
9346  
init_trace_flags_index(struct trace_array * tr)9347  static void init_trace_flags_index(struct trace_array *tr)
9348  {
9349  	int i;
9350  
9351  	/* Used by the trace options files */
9352  	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9353  		tr->trace_flags_index[i] = i;
9354  }
9355  
__update_tracer_options(struct trace_array * tr)9356  static void __update_tracer_options(struct trace_array *tr)
9357  {
9358  	struct tracer *t;
9359  
9360  	for (t = trace_types; t; t = t->next)
9361  		add_tracer_options(tr, t);
9362  }
9363  
update_tracer_options(struct trace_array * tr)9364  static void update_tracer_options(struct trace_array *tr)
9365  {
9366  	mutex_lock(&trace_types_lock);
9367  	tracer_options_updated = true;
9368  	__update_tracer_options(tr);
9369  	mutex_unlock(&trace_types_lock);
9370  }
9371  
9372  /* Must have trace_types_lock held */
trace_array_find(const char * instance)9373  struct trace_array *trace_array_find(const char *instance)
9374  {
9375  	struct trace_array *tr, *found = NULL;
9376  
9377  	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9378  		if (tr->name && strcmp(tr->name, instance) == 0) {
9379  			found = tr;
9380  			break;
9381  		}
9382  	}
9383  
9384  	return found;
9385  }
9386  
trace_array_find_get(const char * instance)9387  struct trace_array *trace_array_find_get(const char *instance)
9388  {
9389  	struct trace_array *tr;
9390  
9391  	mutex_lock(&trace_types_lock);
9392  	tr = trace_array_find(instance);
9393  	if (tr)
9394  		tr->ref++;
9395  	mutex_unlock(&trace_types_lock);
9396  
9397  	return tr;
9398  }
9399  
trace_array_create_dir(struct trace_array * tr)9400  static int trace_array_create_dir(struct trace_array *tr)
9401  {
9402  	int ret;
9403  
9404  	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9405  	if (!tr->dir)
9406  		return -EINVAL;
9407  
9408  	ret = event_trace_add_tracer(tr->dir, tr);
9409  	if (ret) {
9410  		tracefs_remove(tr->dir);
9411  		return ret;
9412  	}
9413  
9414  	init_tracer_tracefs(tr, tr->dir);
9415  	__update_tracer_options(tr);
9416  
9417  	return ret;
9418  }
9419  
trace_array_create(const char * name)9420  static struct trace_array *trace_array_create(const char *name)
9421  {
9422  	struct trace_array *tr;
9423  	int ret;
9424  
9425  	ret = -ENOMEM;
9426  	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9427  	if (!tr)
9428  		return ERR_PTR(ret);
9429  
9430  	tr->name = kstrdup(name, GFP_KERNEL);
9431  	if (!tr->name)
9432  		goto out_free_tr;
9433  
9434  	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9435  		goto out_free_tr;
9436  
9437  	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9438  		goto out_free_tr;
9439  
9440  	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9441  
9442  	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9443  
9444  	raw_spin_lock_init(&tr->start_lock);
9445  
9446  	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9447  
9448  	tr->current_trace = &nop_trace;
9449  
9450  	INIT_LIST_HEAD(&tr->systems);
9451  	INIT_LIST_HEAD(&tr->events);
9452  	INIT_LIST_HEAD(&tr->hist_vars);
9453  	INIT_LIST_HEAD(&tr->err_log);
9454  
9455  	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9456  		goto out_free_tr;
9457  
9458  	if (ftrace_allocate_ftrace_ops(tr) < 0)
9459  		goto out_free_tr;
9460  
9461  	ftrace_init_trace_array(tr);
9462  
9463  	init_trace_flags_index(tr);
9464  
9465  	if (trace_instance_dir) {
9466  		ret = trace_array_create_dir(tr);
9467  		if (ret)
9468  			goto out_free_tr;
9469  	} else
9470  		__trace_early_add_events(tr);
9471  
9472  	list_add(&tr->list, &ftrace_trace_arrays);
9473  
9474  	tr->ref++;
9475  
9476  	return tr;
9477  
9478   out_free_tr:
9479  	ftrace_free_ftrace_ops(tr);
9480  	free_trace_buffers(tr);
9481  	free_cpumask_var(tr->pipe_cpumask);
9482  	free_cpumask_var(tr->tracing_cpumask);
9483  	kfree(tr->name);
9484  	kfree(tr);
9485  
9486  	return ERR_PTR(ret);
9487  }
9488  
instance_mkdir(const char * name)9489  static int instance_mkdir(const char *name)
9490  {
9491  	struct trace_array *tr;
9492  	int ret;
9493  
9494  	mutex_lock(&event_mutex);
9495  	mutex_lock(&trace_types_lock);
9496  
9497  	ret = -EEXIST;
9498  	if (trace_array_find(name))
9499  		goto out_unlock;
9500  
9501  	tr = trace_array_create(name);
9502  
9503  	ret = PTR_ERR_OR_ZERO(tr);
9504  
9505  out_unlock:
9506  	mutex_unlock(&trace_types_lock);
9507  	mutex_unlock(&event_mutex);
9508  	return ret;
9509  }
9510  
9511  /**
9512   * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9513   * @name: The name of the trace array to be looked up/created.
9514   *
9515   * Returns pointer to trace array with given name.
9516   * NULL, if it cannot be created.
9517   *
9518   * NOTE: This function increments the reference counter associated with the
9519   * trace array returned. This makes sure it cannot be freed while in use.
9520   * Use trace_array_put() once the trace array is no longer needed.
9521   * If the trace_array is to be freed, trace_array_destroy() needs to
9522   * be called after the trace_array_put(), or simply let user space delete
9523   * it from the tracefs instances directory. But until the
9524   * trace_array_put() is called, user space can not delete it.
9525   *
9526   */
trace_array_get_by_name(const char * name)9527  struct trace_array *trace_array_get_by_name(const char *name)
9528  {
9529  	struct trace_array *tr;
9530  
9531  	mutex_lock(&event_mutex);
9532  	mutex_lock(&trace_types_lock);
9533  
9534  	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9535  		if (tr->name && strcmp(tr->name, name) == 0)
9536  			goto out_unlock;
9537  	}
9538  
9539  	tr = trace_array_create(name);
9540  
9541  	if (IS_ERR(tr))
9542  		tr = NULL;
9543  out_unlock:
9544  	if (tr)
9545  		tr->ref++;
9546  
9547  	mutex_unlock(&trace_types_lock);
9548  	mutex_unlock(&event_mutex);
9549  	return tr;
9550  }
9551  EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9552  
__remove_instance(struct trace_array * tr)9553  static int __remove_instance(struct trace_array *tr)
9554  {
9555  	int i;
9556  
9557  	/* Reference counter for a newly created trace array = 1. */
9558  	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9559  		return -EBUSY;
9560  
9561  	list_del(&tr->list);
9562  
9563  	/* Disable all the flags that were enabled coming in */
9564  	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9565  		if ((1 << i) & ZEROED_TRACE_FLAGS)
9566  			set_tracer_flag(tr, 1 << i, 0);
9567  	}
9568  
9569  	tracing_set_nop(tr);
9570  	clear_ftrace_function_probes(tr);
9571  	event_trace_del_tracer(tr);
9572  	ftrace_clear_pids(tr);
9573  	ftrace_destroy_function_files(tr);
9574  	tracefs_remove(tr->dir);
9575  	free_percpu(tr->last_func_repeats);
9576  	free_trace_buffers(tr);
9577  	clear_tracing_err_log(tr);
9578  
9579  	for (i = 0; i < tr->nr_topts; i++) {
9580  		kfree(tr->topts[i].topts);
9581  	}
9582  	kfree(tr->topts);
9583  
9584  	free_cpumask_var(tr->pipe_cpumask);
9585  	free_cpumask_var(tr->tracing_cpumask);
9586  	kfree(tr->name);
9587  	kfree(tr);
9588  
9589  	return 0;
9590  }
9591  
trace_array_destroy(struct trace_array * this_tr)9592  int trace_array_destroy(struct trace_array *this_tr)
9593  {
9594  	struct trace_array *tr;
9595  	int ret;
9596  
9597  	if (!this_tr)
9598  		return -EINVAL;
9599  
9600  	mutex_lock(&event_mutex);
9601  	mutex_lock(&trace_types_lock);
9602  
9603  	ret = -ENODEV;
9604  
9605  	/* Making sure trace array exists before destroying it. */
9606  	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9607  		if (tr == this_tr) {
9608  			ret = __remove_instance(tr);
9609  			break;
9610  		}
9611  	}
9612  
9613  	mutex_unlock(&trace_types_lock);
9614  	mutex_unlock(&event_mutex);
9615  
9616  	return ret;
9617  }
9618  EXPORT_SYMBOL_GPL(trace_array_destroy);
9619  
instance_rmdir(const char * name)9620  static int instance_rmdir(const char *name)
9621  {
9622  	struct trace_array *tr;
9623  	int ret;
9624  
9625  	mutex_lock(&event_mutex);
9626  	mutex_lock(&trace_types_lock);
9627  
9628  	ret = -ENODEV;
9629  	tr = trace_array_find(name);
9630  	if (tr)
9631  		ret = __remove_instance(tr);
9632  
9633  	mutex_unlock(&trace_types_lock);
9634  	mutex_unlock(&event_mutex);
9635  
9636  	return ret;
9637  }
9638  
create_trace_instances(struct dentry * d_tracer)9639  static __init void create_trace_instances(struct dentry *d_tracer)
9640  {
9641  	struct trace_array *tr;
9642  
9643  	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9644  							 instance_mkdir,
9645  							 instance_rmdir);
9646  	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9647  		return;
9648  
9649  	mutex_lock(&event_mutex);
9650  	mutex_lock(&trace_types_lock);
9651  
9652  	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9653  		if (!tr->name)
9654  			continue;
9655  		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9656  			     "Failed to create instance directory\n"))
9657  			break;
9658  	}
9659  
9660  	mutex_unlock(&trace_types_lock);
9661  	mutex_unlock(&event_mutex);
9662  }
9663  
9664  static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)9665  init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9666  {
9667  	int cpu;
9668  
9669  	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9670  			tr, &show_traces_fops);
9671  
9672  	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9673  			tr, &set_tracer_fops);
9674  
9675  	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9676  			  tr, &tracing_cpumask_fops);
9677  
9678  	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9679  			  tr, &tracing_iter_fops);
9680  
9681  	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9682  			  tr, &tracing_fops);
9683  
9684  	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9685  			  tr, &tracing_pipe_fops);
9686  
9687  	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9688  			  tr, &tracing_entries_fops);
9689  
9690  	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9691  			  tr, &tracing_total_entries_fops);
9692  
9693  	trace_create_file("free_buffer", 0200, d_tracer,
9694  			  tr, &tracing_free_buffer_fops);
9695  
9696  	trace_create_file("trace_marker", 0220, d_tracer,
9697  			  tr, &tracing_mark_fops);
9698  
9699  	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9700  
9701  	trace_create_file("trace_marker_raw", 0220, d_tracer,
9702  			  tr, &tracing_mark_raw_fops);
9703  
9704  	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9705  			  &trace_clock_fops);
9706  
9707  	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9708  			  tr, &rb_simple_fops);
9709  
9710  	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9711  			  &trace_time_stamp_mode_fops);
9712  
9713  	tr->buffer_percent = 50;
9714  
9715  	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9716  			tr, &buffer_percent_fops);
9717  
9718  	create_trace_options_dir(tr);
9719  
9720  #ifdef CONFIG_TRACER_MAX_TRACE
9721  	trace_create_maxlat_file(tr, d_tracer);
9722  #endif
9723  
9724  	if (ftrace_create_function_files(tr, d_tracer))
9725  		MEM_FAIL(1, "Could not allocate function filter files");
9726  
9727  #ifdef CONFIG_TRACER_SNAPSHOT
9728  	trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9729  			  tr, &snapshot_fops);
9730  #endif
9731  
9732  	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9733  			  tr, &tracing_err_log_fops);
9734  
9735  	for_each_tracing_cpu(cpu)
9736  		tracing_init_tracefs_percpu(tr, cpu);
9737  
9738  	ftrace_init_tracefs(tr, d_tracer);
9739  }
9740  
trace_automount(struct dentry * mntpt,void * ingore)9741  static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9742  {
9743  	struct vfsmount *mnt;
9744  	struct file_system_type *type;
9745  
9746  	/*
9747  	 * To maintain backward compatibility for tools that mount
9748  	 * debugfs to get to the tracing facility, tracefs is automatically
9749  	 * mounted to the debugfs/tracing directory.
9750  	 */
9751  	type = get_fs_type("tracefs");
9752  	if (!type)
9753  		return NULL;
9754  	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9755  	put_filesystem(type);
9756  	if (IS_ERR(mnt))
9757  		return NULL;
9758  	mntget(mnt);
9759  
9760  	return mnt;
9761  }
9762  
9763  /**
9764   * tracing_init_dentry - initialize top level trace array
9765   *
9766   * This is called when creating files or directories in the tracing
9767   * directory. It is called via fs_initcall() by any of the boot up code
9768   * and expects to return the dentry of the top level tracing directory.
9769   */
tracing_init_dentry(void)9770  int tracing_init_dentry(void)
9771  {
9772  	struct trace_array *tr = &global_trace;
9773  
9774  	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9775  		pr_warn("Tracing disabled due to lockdown\n");
9776  		return -EPERM;
9777  	}
9778  
9779  	/* The top level trace array uses  NULL as parent */
9780  	if (tr->dir)
9781  		return 0;
9782  
9783  	if (WARN_ON(!tracefs_initialized()))
9784  		return -ENODEV;
9785  
9786  	/*
9787  	 * As there may still be users that expect the tracing
9788  	 * files to exist in debugfs/tracing, we must automount
9789  	 * the tracefs file system there, so older tools still
9790  	 * work with the newer kernel.
9791  	 */
9792  	tr->dir = debugfs_create_automount("tracing", NULL,
9793  					   trace_automount, NULL);
9794  
9795  	return 0;
9796  }
9797  
9798  extern struct trace_eval_map *__start_ftrace_eval_maps[];
9799  extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9800  
9801  static struct workqueue_struct *eval_map_wq __initdata;
9802  static struct work_struct eval_map_work __initdata;
9803  static struct work_struct tracerfs_init_work __initdata;
9804  
eval_map_work_func(struct work_struct * work)9805  static void __init eval_map_work_func(struct work_struct *work)
9806  {
9807  	int len;
9808  
9809  	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9810  	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9811  }
9812  
trace_eval_init(void)9813  static int __init trace_eval_init(void)
9814  {
9815  	INIT_WORK(&eval_map_work, eval_map_work_func);
9816  
9817  	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9818  	if (!eval_map_wq) {
9819  		pr_err("Unable to allocate eval_map_wq\n");
9820  		/* Do work here */
9821  		eval_map_work_func(&eval_map_work);
9822  		return -ENOMEM;
9823  	}
9824  
9825  	queue_work(eval_map_wq, &eval_map_work);
9826  	return 0;
9827  }
9828  
9829  subsys_initcall(trace_eval_init);
9830  
trace_eval_sync(void)9831  static int __init trace_eval_sync(void)
9832  {
9833  	/* Make sure the eval map updates are finished */
9834  	if (eval_map_wq)
9835  		destroy_workqueue(eval_map_wq);
9836  	return 0;
9837  }
9838  
9839  late_initcall_sync(trace_eval_sync);
9840  
9841  
9842  #ifdef CONFIG_MODULES
trace_module_add_evals(struct module * mod)9843  static void trace_module_add_evals(struct module *mod)
9844  {
9845  	if (!mod->num_trace_evals)
9846  		return;
9847  
9848  	/*
9849  	 * Modules with bad taint do not have events created, do
9850  	 * not bother with enums either.
9851  	 */
9852  	if (trace_module_has_bad_taint(mod))
9853  		return;
9854  
9855  	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9856  }
9857  
9858  #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)9859  static void trace_module_remove_evals(struct module *mod)
9860  {
9861  	union trace_eval_map_item *map;
9862  	union trace_eval_map_item **last = &trace_eval_maps;
9863  
9864  	if (!mod->num_trace_evals)
9865  		return;
9866  
9867  	mutex_lock(&trace_eval_mutex);
9868  
9869  	map = trace_eval_maps;
9870  
9871  	while (map) {
9872  		if (map->head.mod == mod)
9873  			break;
9874  		map = trace_eval_jmp_to_tail(map);
9875  		last = &map->tail.next;
9876  		map = map->tail.next;
9877  	}
9878  	if (!map)
9879  		goto out;
9880  
9881  	*last = trace_eval_jmp_to_tail(map)->tail.next;
9882  	kfree(map);
9883   out:
9884  	mutex_unlock(&trace_eval_mutex);
9885  }
9886  #else
trace_module_remove_evals(struct module * mod)9887  static inline void trace_module_remove_evals(struct module *mod) { }
9888  #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9889  
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)9890  static int trace_module_notify(struct notifier_block *self,
9891  			       unsigned long val, void *data)
9892  {
9893  	struct module *mod = data;
9894  
9895  	switch (val) {
9896  	case MODULE_STATE_COMING:
9897  		trace_module_add_evals(mod);
9898  		break;
9899  	case MODULE_STATE_GOING:
9900  		trace_module_remove_evals(mod);
9901  		break;
9902  	}
9903  
9904  	return NOTIFY_OK;
9905  }
9906  
9907  static struct notifier_block trace_module_nb = {
9908  	.notifier_call = trace_module_notify,
9909  	.priority = 0,
9910  };
9911  #endif /* CONFIG_MODULES */
9912  
tracer_init_tracefs_work_func(struct work_struct * work)9913  static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9914  {
9915  
9916  	event_trace_init();
9917  
9918  	init_tracer_tracefs(&global_trace, NULL);
9919  	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9920  
9921  	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9922  			&global_trace, &tracing_thresh_fops);
9923  
9924  	trace_create_file("README", TRACE_MODE_READ, NULL,
9925  			NULL, &tracing_readme_fops);
9926  
9927  	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9928  			NULL, &tracing_saved_cmdlines_fops);
9929  
9930  	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9931  			  NULL, &tracing_saved_cmdlines_size_fops);
9932  
9933  	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9934  			NULL, &tracing_saved_tgids_fops);
9935  
9936  	trace_create_eval_file(NULL);
9937  
9938  #ifdef CONFIG_MODULES
9939  	register_module_notifier(&trace_module_nb);
9940  #endif
9941  
9942  #ifdef CONFIG_DYNAMIC_FTRACE
9943  	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9944  			NULL, &tracing_dyn_info_fops);
9945  #endif
9946  
9947  	create_trace_instances(NULL);
9948  
9949  	update_tracer_options(&global_trace);
9950  }
9951  
tracer_init_tracefs(void)9952  static __init int tracer_init_tracefs(void)
9953  {
9954  	int ret;
9955  
9956  	trace_access_lock_init();
9957  
9958  	ret = tracing_init_dentry();
9959  	if (ret)
9960  		return 0;
9961  
9962  	if (eval_map_wq) {
9963  		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9964  		queue_work(eval_map_wq, &tracerfs_init_work);
9965  	} else {
9966  		tracer_init_tracefs_work_func(NULL);
9967  	}
9968  
9969  	rv_init_interface();
9970  
9971  	return 0;
9972  }
9973  
9974  fs_initcall(tracer_init_tracefs);
9975  
9976  static int trace_die_panic_handler(struct notifier_block *self,
9977  				unsigned long ev, void *unused);
9978  
9979  static struct notifier_block trace_panic_notifier = {
9980  	.notifier_call = trace_die_panic_handler,
9981  	.priority = INT_MAX - 1,
9982  };
9983  
9984  static struct notifier_block trace_die_notifier = {
9985  	.notifier_call = trace_die_panic_handler,
9986  	.priority = INT_MAX - 1,
9987  };
9988  
9989  /*
9990   * The idea is to execute the following die/panic callback early, in order
9991   * to avoid showing irrelevant information in the trace (like other panic
9992   * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
9993   * warnings get disabled (to prevent potential log flooding).
9994   */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)9995  static int trace_die_panic_handler(struct notifier_block *self,
9996  				unsigned long ev, void *unused)
9997  {
9998  	if (!ftrace_dump_on_oops)
9999  		return NOTIFY_DONE;
10000  
10001  	/* The die notifier requires DIE_OOPS to trigger */
10002  	if (self == &trace_die_notifier && ev != DIE_OOPS)
10003  		return NOTIFY_DONE;
10004  
10005  	ftrace_dump(ftrace_dump_on_oops);
10006  
10007  	return NOTIFY_DONE;
10008  }
10009  
10010  /*
10011   * printk is set to max of 1024, we really don't need it that big.
10012   * Nothing should be printing 1000 characters anyway.
10013   */
10014  #define TRACE_MAX_PRINT		1000
10015  
10016  /*
10017   * Define here KERN_TRACE so that we have one place to modify
10018   * it if we decide to change what log level the ftrace dump
10019   * should be at.
10020   */
10021  #define KERN_TRACE		KERN_EMERG
10022  
10023  void
trace_printk_seq(struct trace_seq * s)10024  trace_printk_seq(struct trace_seq *s)
10025  {
10026  	/* Probably should print a warning here. */
10027  	if (s->seq.len >= TRACE_MAX_PRINT)
10028  		s->seq.len = TRACE_MAX_PRINT;
10029  
10030  	/*
10031  	 * More paranoid code. Although the buffer size is set to
10032  	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10033  	 * an extra layer of protection.
10034  	 */
10035  	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10036  		s->seq.len = s->seq.size - 1;
10037  
10038  	/* should be zero ended, but we are paranoid. */
10039  	s->buffer[s->seq.len] = 0;
10040  
10041  	printk(KERN_TRACE "%s", s->buffer);
10042  
10043  	trace_seq_init(s);
10044  }
10045  
trace_init_global_iter(struct trace_iterator * iter)10046  void trace_init_global_iter(struct trace_iterator *iter)
10047  {
10048  	iter->tr = &global_trace;
10049  	iter->trace = iter->tr->current_trace;
10050  	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10051  	iter->array_buffer = &global_trace.array_buffer;
10052  
10053  	if (iter->trace && iter->trace->open)
10054  		iter->trace->open(iter);
10055  
10056  	/* Annotate start of buffers if we had overruns */
10057  	if (ring_buffer_overruns(iter->array_buffer->buffer))
10058  		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10059  
10060  	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10061  	if (trace_clocks[iter->tr->clock_id].in_ns)
10062  		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10063  
10064  	/* Can not use kmalloc for iter.temp and iter.fmt */
10065  	iter->temp = static_temp_buf;
10066  	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10067  	iter->fmt = static_fmt_buf;
10068  	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10069  }
10070  
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)10071  void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10072  {
10073  	/* use static because iter can be a bit big for the stack */
10074  	static struct trace_iterator iter;
10075  	static atomic_t dump_running;
10076  	struct trace_array *tr = &global_trace;
10077  	unsigned int old_userobj;
10078  	unsigned long flags;
10079  	int cnt = 0, cpu;
10080  
10081  	/* Only allow one dump user at a time. */
10082  	if (atomic_inc_return(&dump_running) != 1) {
10083  		atomic_dec(&dump_running);
10084  		return;
10085  	}
10086  
10087  	/*
10088  	 * Always turn off tracing when we dump.
10089  	 * We don't need to show trace output of what happens
10090  	 * between multiple crashes.
10091  	 *
10092  	 * If the user does a sysrq-z, then they can re-enable
10093  	 * tracing with echo 1 > tracing_on.
10094  	 */
10095  	tracing_off();
10096  
10097  	local_irq_save(flags);
10098  
10099  	/* Simulate the iterator */
10100  	trace_init_global_iter(&iter);
10101  
10102  	for_each_tracing_cpu(cpu) {
10103  		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10104  	}
10105  
10106  	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10107  
10108  	/* don't look at user memory in panic mode */
10109  	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10110  
10111  	switch (oops_dump_mode) {
10112  	case DUMP_ALL:
10113  		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10114  		break;
10115  	case DUMP_ORIG:
10116  		iter.cpu_file = raw_smp_processor_id();
10117  		break;
10118  	case DUMP_NONE:
10119  		goto out_enable;
10120  	default:
10121  		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10122  		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10123  	}
10124  
10125  	printk(KERN_TRACE "Dumping ftrace buffer:\n");
10126  
10127  	/* Did function tracer already get disabled? */
10128  	if (ftrace_is_dead()) {
10129  		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10130  		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10131  	}
10132  
10133  	/*
10134  	 * We need to stop all tracing on all CPUS to read
10135  	 * the next buffer. This is a bit expensive, but is
10136  	 * not done often. We fill all what we can read,
10137  	 * and then release the locks again.
10138  	 */
10139  
10140  	while (!trace_empty(&iter)) {
10141  
10142  		if (!cnt)
10143  			printk(KERN_TRACE "---------------------------------\n");
10144  
10145  		cnt++;
10146  
10147  		trace_iterator_reset(&iter);
10148  		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10149  
10150  		if (trace_find_next_entry_inc(&iter) != NULL) {
10151  			int ret;
10152  
10153  			ret = print_trace_line(&iter);
10154  			if (ret != TRACE_TYPE_NO_CONSUME)
10155  				trace_consume(&iter);
10156  		}
10157  		touch_nmi_watchdog();
10158  
10159  		trace_printk_seq(&iter.seq);
10160  	}
10161  
10162  	if (!cnt)
10163  		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10164  	else
10165  		printk(KERN_TRACE "---------------------------------\n");
10166  
10167   out_enable:
10168  	tr->trace_flags |= old_userobj;
10169  
10170  	for_each_tracing_cpu(cpu) {
10171  		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10172  	}
10173  	atomic_dec(&dump_running);
10174  	local_irq_restore(flags);
10175  }
10176  EXPORT_SYMBOL_GPL(ftrace_dump);
10177  
10178  #define WRITE_BUFSIZE  4096
10179  
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))10180  ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10181  				size_t count, loff_t *ppos,
10182  				int (*createfn)(const char *))
10183  {
10184  	char *kbuf, *buf, *tmp;
10185  	int ret = 0;
10186  	size_t done = 0;
10187  	size_t size;
10188  
10189  	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10190  	if (!kbuf)
10191  		return -ENOMEM;
10192  
10193  	while (done < count) {
10194  		size = count - done;
10195  
10196  		if (size >= WRITE_BUFSIZE)
10197  			size = WRITE_BUFSIZE - 1;
10198  
10199  		if (copy_from_user(kbuf, buffer + done, size)) {
10200  			ret = -EFAULT;
10201  			goto out;
10202  		}
10203  		kbuf[size] = '\0';
10204  		buf = kbuf;
10205  		do {
10206  			tmp = strchr(buf, '\n');
10207  			if (tmp) {
10208  				*tmp = '\0';
10209  				size = tmp - buf + 1;
10210  			} else {
10211  				size = strlen(buf);
10212  				if (done + size < count) {
10213  					if (buf != kbuf)
10214  						break;
10215  					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10216  					pr_warn("Line length is too long: Should be less than %d\n",
10217  						WRITE_BUFSIZE - 2);
10218  					ret = -EINVAL;
10219  					goto out;
10220  				}
10221  			}
10222  			done += size;
10223  
10224  			/* Remove comments */
10225  			tmp = strchr(buf, '#');
10226  
10227  			if (tmp)
10228  				*tmp = '\0';
10229  
10230  			ret = createfn(buf);
10231  			if (ret)
10232  				goto out;
10233  			buf += size;
10234  
10235  		} while (done < count);
10236  	}
10237  	ret = done;
10238  
10239  out:
10240  	kfree(kbuf);
10241  
10242  	return ret;
10243  }
10244  
10245  #ifdef CONFIG_TRACER_MAX_TRACE
tr_needs_alloc_snapshot(const char * name)10246  __init static bool tr_needs_alloc_snapshot(const char *name)
10247  {
10248  	char *test;
10249  	int len = strlen(name);
10250  	bool ret;
10251  
10252  	if (!boot_snapshot_index)
10253  		return false;
10254  
10255  	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10256  	    boot_snapshot_info[len] == '\t')
10257  		return true;
10258  
10259  	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10260  	if (!test)
10261  		return false;
10262  
10263  	sprintf(test, "\t%s\t", name);
10264  	ret = strstr(boot_snapshot_info, test) == NULL;
10265  	kfree(test);
10266  	return ret;
10267  }
10268  
do_allocate_snapshot(const char * name)10269  __init static void do_allocate_snapshot(const char *name)
10270  {
10271  	if (!tr_needs_alloc_snapshot(name))
10272  		return;
10273  
10274  	/*
10275  	 * When allocate_snapshot is set, the next call to
10276  	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10277  	 * will allocate the snapshot buffer. That will alse clear
10278  	 * this flag.
10279  	 */
10280  	allocate_snapshot = true;
10281  }
10282  #else
do_allocate_snapshot(const char * name)10283  static inline void do_allocate_snapshot(const char *name) { }
10284  #endif
10285  
enable_instances(void)10286  __init static void enable_instances(void)
10287  {
10288  	struct trace_array *tr;
10289  	char *curr_str;
10290  	char *str;
10291  	char *tok;
10292  
10293  	/* A tab is always appended */
10294  	boot_instance_info[boot_instance_index - 1] = '\0';
10295  	str = boot_instance_info;
10296  
10297  	while ((curr_str = strsep(&str, "\t"))) {
10298  
10299  		tok = strsep(&curr_str, ",");
10300  
10301  		if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10302  			do_allocate_snapshot(tok);
10303  
10304  		tr = trace_array_get_by_name(tok);
10305  		if (!tr) {
10306  			pr_warn("Failed to create instance buffer %s\n", curr_str);
10307  			continue;
10308  		}
10309  		/* Allow user space to delete it */
10310  		trace_array_put(tr);
10311  
10312  		while ((tok = strsep(&curr_str, ","))) {
10313  			early_enable_events(tr, tok, true);
10314  		}
10315  	}
10316  }
10317  
tracer_alloc_buffers(void)10318  __init static int tracer_alloc_buffers(void)
10319  {
10320  	int ring_buf_size;
10321  	int ret = -ENOMEM;
10322  
10323  
10324  	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10325  		pr_warn("Tracing disabled due to lockdown\n");
10326  		return -EPERM;
10327  	}
10328  
10329  	/*
10330  	 * Make sure we don't accidentally add more trace options
10331  	 * than we have bits for.
10332  	 */
10333  	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10334  
10335  	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10336  		goto out;
10337  
10338  	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10339  		goto out_free_buffer_mask;
10340  
10341  	/* Only allocate trace_printk buffers if a trace_printk exists */
10342  	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10343  		/* Must be called before global_trace.buffer is allocated */
10344  		trace_printk_init_buffers();
10345  
10346  	/* To save memory, keep the ring buffer size to its minimum */
10347  	if (ring_buffer_expanded)
10348  		ring_buf_size = trace_buf_size;
10349  	else
10350  		ring_buf_size = 1;
10351  
10352  	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10353  	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10354  
10355  	raw_spin_lock_init(&global_trace.start_lock);
10356  
10357  	/*
10358  	 * The prepare callbacks allocates some memory for the ring buffer. We
10359  	 * don't free the buffer if the CPU goes down. If we were to free
10360  	 * the buffer, then the user would lose any trace that was in the
10361  	 * buffer. The memory will be removed once the "instance" is removed.
10362  	 */
10363  	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10364  				      "trace/RB:prepare", trace_rb_cpu_prepare,
10365  				      NULL);
10366  	if (ret < 0)
10367  		goto out_free_cpumask;
10368  	/* Used for event triggers */
10369  	ret = -ENOMEM;
10370  	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10371  	if (!temp_buffer)
10372  		goto out_rm_hp_state;
10373  
10374  	if (trace_create_savedcmd() < 0)
10375  		goto out_free_temp_buffer;
10376  
10377  	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10378  		goto out_free_savedcmd;
10379  
10380  	/* TODO: make the number of buffers hot pluggable with CPUS */
10381  	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10382  		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10383  		goto out_free_pipe_cpumask;
10384  	}
10385  	if (global_trace.buffer_disabled)
10386  		tracing_off();
10387  
10388  	if (trace_boot_clock) {
10389  		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10390  		if (ret < 0)
10391  			pr_warn("Trace clock %s not defined, going back to default\n",
10392  				trace_boot_clock);
10393  	}
10394  
10395  	/*
10396  	 * register_tracer() might reference current_trace, so it
10397  	 * needs to be set before we register anything. This is
10398  	 * just a bootstrap of current_trace anyway.
10399  	 */
10400  	global_trace.current_trace = &nop_trace;
10401  
10402  	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10403  
10404  	ftrace_init_global_array_ops(&global_trace);
10405  
10406  	init_trace_flags_index(&global_trace);
10407  
10408  	register_tracer(&nop_trace);
10409  
10410  	/* Function tracing may start here (via kernel command line) */
10411  	init_function_trace();
10412  
10413  	/* All seems OK, enable tracing */
10414  	tracing_disabled = 0;
10415  
10416  	atomic_notifier_chain_register(&panic_notifier_list,
10417  				       &trace_panic_notifier);
10418  
10419  	register_die_notifier(&trace_die_notifier);
10420  
10421  	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10422  
10423  	INIT_LIST_HEAD(&global_trace.systems);
10424  	INIT_LIST_HEAD(&global_trace.events);
10425  	INIT_LIST_HEAD(&global_trace.hist_vars);
10426  	INIT_LIST_HEAD(&global_trace.err_log);
10427  	list_add(&global_trace.list, &ftrace_trace_arrays);
10428  
10429  	apply_trace_boot_options();
10430  
10431  	register_snapshot_cmd();
10432  
10433  	return 0;
10434  
10435  out_free_pipe_cpumask:
10436  	free_cpumask_var(global_trace.pipe_cpumask);
10437  out_free_savedcmd:
10438  	free_saved_cmdlines_buffer(savedcmd);
10439  out_free_temp_buffer:
10440  	ring_buffer_free(temp_buffer);
10441  out_rm_hp_state:
10442  	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10443  out_free_cpumask:
10444  	free_cpumask_var(global_trace.tracing_cpumask);
10445  out_free_buffer_mask:
10446  	free_cpumask_var(tracing_buffer_mask);
10447  out:
10448  	return ret;
10449  }
10450  
ftrace_boot_snapshot(void)10451  void __init ftrace_boot_snapshot(void)
10452  {
10453  #ifdef CONFIG_TRACER_MAX_TRACE
10454  	struct trace_array *tr;
10455  
10456  	if (!snapshot_at_boot)
10457  		return;
10458  
10459  	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10460  		if (!tr->allocated_snapshot)
10461  			continue;
10462  
10463  		tracing_snapshot_instance(tr);
10464  		trace_array_puts(tr, "** Boot snapshot taken **\n");
10465  	}
10466  #endif
10467  }
10468  
early_trace_init(void)10469  void __init early_trace_init(void)
10470  {
10471  	if (tracepoint_printk) {
10472  		tracepoint_print_iter =
10473  			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10474  		if (MEM_FAIL(!tracepoint_print_iter,
10475  			     "Failed to allocate trace iterator\n"))
10476  			tracepoint_printk = 0;
10477  		else
10478  			static_key_enable(&tracepoint_printk_key.key);
10479  	}
10480  	tracer_alloc_buffers();
10481  
10482  	init_events();
10483  }
10484  
trace_init(void)10485  void __init trace_init(void)
10486  {
10487  	trace_event_init();
10488  
10489  	if (boot_instance_index)
10490  		enable_instances();
10491  }
10492  
clear_boot_tracer(void)10493  __init static void clear_boot_tracer(void)
10494  {
10495  	/*
10496  	 * The default tracer at boot buffer is an init section.
10497  	 * This function is called in lateinit. If we did not
10498  	 * find the boot tracer, then clear it out, to prevent
10499  	 * later registration from accessing the buffer that is
10500  	 * about to be freed.
10501  	 */
10502  	if (!default_bootup_tracer)
10503  		return;
10504  
10505  	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10506  	       default_bootup_tracer);
10507  	default_bootup_tracer = NULL;
10508  }
10509  
10510  #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)10511  __init static void tracing_set_default_clock(void)
10512  {
10513  	/* sched_clock_stable() is determined in late_initcall */
10514  	if (!trace_boot_clock && !sched_clock_stable()) {
10515  		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10516  			pr_warn("Can not set tracing clock due to lockdown\n");
10517  			return;
10518  		}
10519  
10520  		printk(KERN_WARNING
10521  		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10522  		       "If you want to keep using the local clock, then add:\n"
10523  		       "  \"trace_clock=local\"\n"
10524  		       "on the kernel command line\n");
10525  		tracing_set_clock(&global_trace, "global");
10526  	}
10527  }
10528  #else
tracing_set_default_clock(void)10529  static inline void tracing_set_default_clock(void) { }
10530  #endif
10531  
late_trace_init(void)10532  __init static int late_trace_init(void)
10533  {
10534  	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10535  		static_key_disable(&tracepoint_printk_key.key);
10536  		tracepoint_printk = 0;
10537  	}
10538  
10539  	tracing_set_default_clock();
10540  	clear_boot_tracer();
10541  	return 0;
10542  }
10543  
10544  late_initcall_sync(late_trace_init);
10545