xref: /openbmc/linux/kernel/trace/trace.c (revision 05c618f3)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51 
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
53 
54 #include "trace.h"
55 #include "trace_output.h"
56 
57 /*
58  * On boot up, the ring buffer is set to the minimum size, so that
59  * we do not waste memory on systems that are not using tracing.
60  */
61 bool ring_buffer_expanded;
62 
63 #ifdef CONFIG_FTRACE_STARTUP_TEST
64 /*
65  * We need to change this state when a selftest is running.
66  * A selftest will lurk into the ring-buffer to count the
67  * entries inserted during the selftest although some concurrent
68  * insertions into the ring-buffer such as trace_printk could occurred
69  * at the same time, giving false positive or negative results.
70  */
71 static bool __read_mostly tracing_selftest_running;
72 
73 /*
74  * If boot-time tracing including tracers/events via kernel cmdline
75  * is running, we do not want to run SELFTEST.
76  */
77 bool __read_mostly tracing_selftest_disabled;
78 
79 void __init disable_tracing_selftest(const char *reason)
80 {
81 	if (!tracing_selftest_disabled) {
82 		tracing_selftest_disabled = true;
83 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
84 	}
85 }
86 #else
87 #define tracing_selftest_running	0
88 #define tracing_selftest_disabled	0
89 #endif
90 
91 /* Pipe tracepoints to printk */
92 static struct trace_iterator *tracepoint_print_iter;
93 int tracepoint_printk;
94 static bool tracepoint_printk_stop_on_boot __initdata;
95 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
96 
97 /* For tracers that don't implement custom flags */
98 static struct tracer_opt dummy_tracer_opt[] = {
99 	{ }
100 };
101 
102 static int
103 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
104 {
105 	return 0;
106 }
107 
108 /*
109  * To prevent the comm cache from being overwritten when no
110  * tracing is active, only save the comm when a trace event
111  * occurred.
112  */
113 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
114 
115 /*
116  * Kill all tracing for good (never come back).
117  * It is initialized to 1 but will turn to zero if the initialization
118  * of the tracer is successful. But that is the only place that sets
119  * this back to zero.
120  */
121 static int tracing_disabled = 1;
122 
123 cpumask_var_t __read_mostly	tracing_buffer_mask;
124 
125 /*
126  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
127  *
128  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
129  * is set, then ftrace_dump is called. This will output the contents
130  * of the ftrace buffers to the console.  This is very useful for
131  * capturing traces that lead to crashes and outputing it to a
132  * serial console.
133  *
134  * It is default off, but you can enable it with either specifying
135  * "ftrace_dump_on_oops" in the kernel command line, or setting
136  * /proc/sys/kernel/ftrace_dump_on_oops
137  * Set 1 if you want to dump buffers of all CPUs
138  * Set 2 if you want to dump the buffer of the CPU that triggered oops
139  */
140 
141 enum ftrace_dump_mode ftrace_dump_on_oops;
142 
143 /* When set, tracing will stop when a WARN*() is hit */
144 int __disable_trace_on_warning;
145 
146 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
147 /* Map of enums to their values, for "eval_map" file */
148 struct trace_eval_map_head {
149 	struct module			*mod;
150 	unsigned long			length;
151 };
152 
153 union trace_eval_map_item;
154 
155 struct trace_eval_map_tail {
156 	/*
157 	 * "end" is first and points to NULL as it must be different
158 	 * than "mod" or "eval_string"
159 	 */
160 	union trace_eval_map_item	*next;
161 	const char			*end;	/* points to NULL */
162 };
163 
164 static DEFINE_MUTEX(trace_eval_mutex);
165 
166 /*
167  * The trace_eval_maps are saved in an array with two extra elements,
168  * one at the beginning, and one at the end. The beginning item contains
169  * the count of the saved maps (head.length), and the module they
170  * belong to if not built in (head.mod). The ending item contains a
171  * pointer to the next array of saved eval_map items.
172  */
173 union trace_eval_map_item {
174 	struct trace_eval_map		map;
175 	struct trace_eval_map_head	head;
176 	struct trace_eval_map_tail	tail;
177 };
178 
179 static union trace_eval_map_item *trace_eval_maps;
180 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
181 
182 int tracing_set_tracer(struct trace_array *tr, const char *buf);
183 static void ftrace_trace_userstack(struct trace_array *tr,
184 				   struct trace_buffer *buffer,
185 				   unsigned int trace_ctx);
186 
187 #define MAX_TRACER_SIZE		100
188 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
189 static char *default_bootup_tracer;
190 
191 static bool allocate_snapshot;
192 static bool snapshot_at_boot;
193 
194 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
195 static int boot_instance_index;
196 
197 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
198 static int boot_snapshot_index;
199 
200 static int __init set_cmdline_ftrace(char *str)
201 {
202 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
203 	default_bootup_tracer = bootup_tracer_buf;
204 	/* We are using ftrace early, expand it */
205 	ring_buffer_expanded = true;
206 	return 1;
207 }
208 __setup("ftrace=", set_cmdline_ftrace);
209 
210 static int __init set_ftrace_dump_on_oops(char *str)
211 {
212 	if (*str++ != '=' || !*str || !strcmp("1", str)) {
213 		ftrace_dump_on_oops = DUMP_ALL;
214 		return 1;
215 	}
216 
217 	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
218 		ftrace_dump_on_oops = DUMP_ORIG;
219                 return 1;
220         }
221 
222         return 0;
223 }
224 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
225 
226 static int __init stop_trace_on_warning(char *str)
227 {
228 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
229 		__disable_trace_on_warning = 1;
230 	return 1;
231 }
232 __setup("traceoff_on_warning", stop_trace_on_warning);
233 
234 static int __init boot_alloc_snapshot(char *str)
235 {
236 	char *slot = boot_snapshot_info + boot_snapshot_index;
237 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
238 	int ret;
239 
240 	if (str[0] == '=') {
241 		str++;
242 		if (strlen(str) >= left)
243 			return -1;
244 
245 		ret = snprintf(slot, left, "%s\t", str);
246 		boot_snapshot_index += ret;
247 	} else {
248 		allocate_snapshot = true;
249 		/* We also need the main ring buffer expanded */
250 		ring_buffer_expanded = true;
251 	}
252 	return 1;
253 }
254 __setup("alloc_snapshot", boot_alloc_snapshot);
255 
256 
257 static int __init boot_snapshot(char *str)
258 {
259 	snapshot_at_boot = true;
260 	boot_alloc_snapshot(str);
261 	return 1;
262 }
263 __setup("ftrace_boot_snapshot", boot_snapshot);
264 
265 
266 static int __init boot_instance(char *str)
267 {
268 	char *slot = boot_instance_info + boot_instance_index;
269 	int left = sizeof(boot_instance_info) - boot_instance_index;
270 	int ret;
271 
272 	if (strlen(str) >= left)
273 		return -1;
274 
275 	ret = snprintf(slot, left, "%s\t", str);
276 	boot_instance_index += ret;
277 
278 	return 1;
279 }
280 __setup("trace_instance=", boot_instance);
281 
282 
283 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
284 
285 static int __init set_trace_boot_options(char *str)
286 {
287 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
288 	return 1;
289 }
290 __setup("trace_options=", set_trace_boot_options);
291 
292 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
293 static char *trace_boot_clock __initdata;
294 
295 static int __init set_trace_boot_clock(char *str)
296 {
297 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
298 	trace_boot_clock = trace_boot_clock_buf;
299 	return 1;
300 }
301 __setup("trace_clock=", set_trace_boot_clock);
302 
303 static int __init set_tracepoint_printk(char *str)
304 {
305 	/* Ignore the "tp_printk_stop_on_boot" param */
306 	if (*str == '_')
307 		return 0;
308 
309 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
310 		tracepoint_printk = 1;
311 	return 1;
312 }
313 __setup("tp_printk", set_tracepoint_printk);
314 
315 static int __init set_tracepoint_printk_stop(char *str)
316 {
317 	tracepoint_printk_stop_on_boot = true;
318 	return 1;
319 }
320 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
321 
322 unsigned long long ns2usecs(u64 nsec)
323 {
324 	nsec += 500;
325 	do_div(nsec, 1000);
326 	return nsec;
327 }
328 
329 static void
330 trace_process_export(struct trace_export *export,
331 	       struct ring_buffer_event *event, int flag)
332 {
333 	struct trace_entry *entry;
334 	unsigned int size = 0;
335 
336 	if (export->flags & flag) {
337 		entry = ring_buffer_event_data(event);
338 		size = ring_buffer_event_length(event);
339 		export->write(export, entry, size);
340 	}
341 }
342 
343 static DEFINE_MUTEX(ftrace_export_lock);
344 
345 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
346 
347 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
348 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
349 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
350 
351 static inline void ftrace_exports_enable(struct trace_export *export)
352 {
353 	if (export->flags & TRACE_EXPORT_FUNCTION)
354 		static_branch_inc(&trace_function_exports_enabled);
355 
356 	if (export->flags & TRACE_EXPORT_EVENT)
357 		static_branch_inc(&trace_event_exports_enabled);
358 
359 	if (export->flags & TRACE_EXPORT_MARKER)
360 		static_branch_inc(&trace_marker_exports_enabled);
361 }
362 
363 static inline void ftrace_exports_disable(struct trace_export *export)
364 {
365 	if (export->flags & TRACE_EXPORT_FUNCTION)
366 		static_branch_dec(&trace_function_exports_enabled);
367 
368 	if (export->flags & TRACE_EXPORT_EVENT)
369 		static_branch_dec(&trace_event_exports_enabled);
370 
371 	if (export->flags & TRACE_EXPORT_MARKER)
372 		static_branch_dec(&trace_marker_exports_enabled);
373 }
374 
375 static void ftrace_exports(struct ring_buffer_event *event, int flag)
376 {
377 	struct trace_export *export;
378 
379 	preempt_disable_notrace();
380 
381 	export = rcu_dereference_raw_check(ftrace_exports_list);
382 	while (export) {
383 		trace_process_export(export, event, flag);
384 		export = rcu_dereference_raw_check(export->next);
385 	}
386 
387 	preempt_enable_notrace();
388 }
389 
390 static inline void
391 add_trace_export(struct trace_export **list, struct trace_export *export)
392 {
393 	rcu_assign_pointer(export->next, *list);
394 	/*
395 	 * We are entering export into the list but another
396 	 * CPU might be walking that list. We need to make sure
397 	 * the export->next pointer is valid before another CPU sees
398 	 * the export pointer included into the list.
399 	 */
400 	rcu_assign_pointer(*list, export);
401 }
402 
403 static inline int
404 rm_trace_export(struct trace_export **list, struct trace_export *export)
405 {
406 	struct trace_export **p;
407 
408 	for (p = list; *p != NULL; p = &(*p)->next)
409 		if (*p == export)
410 			break;
411 
412 	if (*p != export)
413 		return -1;
414 
415 	rcu_assign_pointer(*p, (*p)->next);
416 
417 	return 0;
418 }
419 
420 static inline void
421 add_ftrace_export(struct trace_export **list, struct trace_export *export)
422 {
423 	ftrace_exports_enable(export);
424 
425 	add_trace_export(list, export);
426 }
427 
428 static inline int
429 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
430 {
431 	int ret;
432 
433 	ret = rm_trace_export(list, export);
434 	ftrace_exports_disable(export);
435 
436 	return ret;
437 }
438 
439 int register_ftrace_export(struct trace_export *export)
440 {
441 	if (WARN_ON_ONCE(!export->write))
442 		return -1;
443 
444 	mutex_lock(&ftrace_export_lock);
445 
446 	add_ftrace_export(&ftrace_exports_list, export);
447 
448 	mutex_unlock(&ftrace_export_lock);
449 
450 	return 0;
451 }
452 EXPORT_SYMBOL_GPL(register_ftrace_export);
453 
454 int unregister_ftrace_export(struct trace_export *export)
455 {
456 	int ret;
457 
458 	mutex_lock(&ftrace_export_lock);
459 
460 	ret = rm_ftrace_export(&ftrace_exports_list, export);
461 
462 	mutex_unlock(&ftrace_export_lock);
463 
464 	return ret;
465 }
466 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
467 
468 /* trace_flags holds trace_options default values */
469 #define TRACE_DEFAULT_FLAGS						\
470 	(FUNCTION_DEFAULT_FLAGS |					\
471 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
472 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
473 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
474 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
475 	 TRACE_ITER_HASH_PTR)
476 
477 /* trace_options that are only supported by global_trace */
478 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
479 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
480 
481 /* trace_flags that are default zero for instances */
482 #define ZEROED_TRACE_FLAGS \
483 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
484 
485 /*
486  * The global_trace is the descriptor that holds the top-level tracing
487  * buffers for the live tracing.
488  */
489 static struct trace_array global_trace = {
490 	.trace_flags = TRACE_DEFAULT_FLAGS,
491 };
492 
493 LIST_HEAD(ftrace_trace_arrays);
494 
495 int trace_array_get(struct trace_array *this_tr)
496 {
497 	struct trace_array *tr;
498 	int ret = -ENODEV;
499 
500 	mutex_lock(&trace_types_lock);
501 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
502 		if (tr == this_tr) {
503 			tr->ref++;
504 			ret = 0;
505 			break;
506 		}
507 	}
508 	mutex_unlock(&trace_types_lock);
509 
510 	return ret;
511 }
512 
513 static void __trace_array_put(struct trace_array *this_tr)
514 {
515 	WARN_ON(!this_tr->ref);
516 	this_tr->ref--;
517 }
518 
519 /**
520  * trace_array_put - Decrement the reference counter for this trace array.
521  * @this_tr : pointer to the trace array
522  *
523  * NOTE: Use this when we no longer need the trace array returned by
524  * trace_array_get_by_name(). This ensures the trace array can be later
525  * destroyed.
526  *
527  */
528 void trace_array_put(struct trace_array *this_tr)
529 {
530 	if (!this_tr)
531 		return;
532 
533 	mutex_lock(&trace_types_lock);
534 	__trace_array_put(this_tr);
535 	mutex_unlock(&trace_types_lock);
536 }
537 EXPORT_SYMBOL_GPL(trace_array_put);
538 
539 int tracing_check_open_get_tr(struct trace_array *tr)
540 {
541 	int ret;
542 
543 	ret = security_locked_down(LOCKDOWN_TRACEFS);
544 	if (ret)
545 		return ret;
546 
547 	if (tracing_disabled)
548 		return -ENODEV;
549 
550 	if (tr && trace_array_get(tr) < 0)
551 		return -ENODEV;
552 
553 	return 0;
554 }
555 
556 int call_filter_check_discard(struct trace_event_call *call, void *rec,
557 			      struct trace_buffer *buffer,
558 			      struct ring_buffer_event *event)
559 {
560 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
561 	    !filter_match_preds(call->filter, rec)) {
562 		__trace_event_discard_commit(buffer, event);
563 		return 1;
564 	}
565 
566 	return 0;
567 }
568 
569 /**
570  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
571  * @filtered_pids: The list of pids to check
572  * @search_pid: The PID to find in @filtered_pids
573  *
574  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
575  */
576 bool
577 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
578 {
579 	return trace_pid_list_is_set(filtered_pids, search_pid);
580 }
581 
582 /**
583  * trace_ignore_this_task - should a task be ignored for tracing
584  * @filtered_pids: The list of pids to check
585  * @filtered_no_pids: The list of pids not to be traced
586  * @task: The task that should be ignored if not filtered
587  *
588  * Checks if @task should be traced or not from @filtered_pids.
589  * Returns true if @task should *NOT* be traced.
590  * Returns false if @task should be traced.
591  */
592 bool
593 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
594 		       struct trace_pid_list *filtered_no_pids,
595 		       struct task_struct *task)
596 {
597 	/*
598 	 * If filtered_no_pids is not empty, and the task's pid is listed
599 	 * in filtered_no_pids, then return true.
600 	 * Otherwise, if filtered_pids is empty, that means we can
601 	 * trace all tasks. If it has content, then only trace pids
602 	 * within filtered_pids.
603 	 */
604 
605 	return (filtered_pids &&
606 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
607 		(filtered_no_pids &&
608 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
609 }
610 
611 /**
612  * trace_filter_add_remove_task - Add or remove a task from a pid_list
613  * @pid_list: The list to modify
614  * @self: The current task for fork or NULL for exit
615  * @task: The task to add or remove
616  *
617  * If adding a task, if @self is defined, the task is only added if @self
618  * is also included in @pid_list. This happens on fork and tasks should
619  * only be added when the parent is listed. If @self is NULL, then the
620  * @task pid will be removed from the list, which would happen on exit
621  * of a task.
622  */
623 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
624 				  struct task_struct *self,
625 				  struct task_struct *task)
626 {
627 	if (!pid_list)
628 		return;
629 
630 	/* For forks, we only add if the forking task is listed */
631 	if (self) {
632 		if (!trace_find_filtered_pid(pid_list, self->pid))
633 			return;
634 	}
635 
636 	/* "self" is set for forks, and NULL for exits */
637 	if (self)
638 		trace_pid_list_set(pid_list, task->pid);
639 	else
640 		trace_pid_list_clear(pid_list, task->pid);
641 }
642 
643 /**
644  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
645  * @pid_list: The pid list to show
646  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
647  * @pos: The position of the file
648  *
649  * This is used by the seq_file "next" operation to iterate the pids
650  * listed in a trace_pid_list structure.
651  *
652  * Returns the pid+1 as we want to display pid of zero, but NULL would
653  * stop the iteration.
654  */
655 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
656 {
657 	long pid = (unsigned long)v;
658 	unsigned int next;
659 
660 	(*pos)++;
661 
662 	/* pid already is +1 of the actual previous bit */
663 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
664 		return NULL;
665 
666 	pid = next;
667 
668 	/* Return pid + 1 to allow zero to be represented */
669 	return (void *)(pid + 1);
670 }
671 
672 /**
673  * trace_pid_start - Used for seq_file to start reading pid lists
674  * @pid_list: The pid list to show
675  * @pos: The position of the file
676  *
677  * This is used by seq_file "start" operation to start the iteration
678  * of listing pids.
679  *
680  * Returns the pid+1 as we want to display pid of zero, but NULL would
681  * stop the iteration.
682  */
683 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
684 {
685 	unsigned long pid;
686 	unsigned int first;
687 	loff_t l = 0;
688 
689 	if (trace_pid_list_first(pid_list, &first) < 0)
690 		return NULL;
691 
692 	pid = first;
693 
694 	/* Return pid + 1 so that zero can be the exit value */
695 	for (pid++; pid && l < *pos;
696 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
697 		;
698 	return (void *)pid;
699 }
700 
701 /**
702  * trace_pid_show - show the current pid in seq_file processing
703  * @m: The seq_file structure to write into
704  * @v: A void pointer of the pid (+1) value to display
705  *
706  * Can be directly used by seq_file operations to display the current
707  * pid value.
708  */
709 int trace_pid_show(struct seq_file *m, void *v)
710 {
711 	unsigned long pid = (unsigned long)v - 1;
712 
713 	seq_printf(m, "%lu\n", pid);
714 	return 0;
715 }
716 
717 /* 128 should be much more than enough */
718 #define PID_BUF_SIZE		127
719 
720 int trace_pid_write(struct trace_pid_list *filtered_pids,
721 		    struct trace_pid_list **new_pid_list,
722 		    const char __user *ubuf, size_t cnt)
723 {
724 	struct trace_pid_list *pid_list;
725 	struct trace_parser parser;
726 	unsigned long val;
727 	int nr_pids = 0;
728 	ssize_t read = 0;
729 	ssize_t ret;
730 	loff_t pos;
731 	pid_t pid;
732 
733 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
734 		return -ENOMEM;
735 
736 	/*
737 	 * Always recreate a new array. The write is an all or nothing
738 	 * operation. Always create a new array when adding new pids by
739 	 * the user. If the operation fails, then the current list is
740 	 * not modified.
741 	 */
742 	pid_list = trace_pid_list_alloc();
743 	if (!pid_list) {
744 		trace_parser_put(&parser);
745 		return -ENOMEM;
746 	}
747 
748 	if (filtered_pids) {
749 		/* copy the current bits to the new max */
750 		ret = trace_pid_list_first(filtered_pids, &pid);
751 		while (!ret) {
752 			trace_pid_list_set(pid_list, pid);
753 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
754 			nr_pids++;
755 		}
756 	}
757 
758 	ret = 0;
759 	while (cnt > 0) {
760 
761 		pos = 0;
762 
763 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
764 		if (ret < 0)
765 			break;
766 
767 		read += ret;
768 		ubuf += ret;
769 		cnt -= ret;
770 
771 		if (!trace_parser_loaded(&parser))
772 			break;
773 
774 		ret = -EINVAL;
775 		if (kstrtoul(parser.buffer, 0, &val))
776 			break;
777 
778 		pid = (pid_t)val;
779 
780 		if (trace_pid_list_set(pid_list, pid) < 0) {
781 			ret = -1;
782 			break;
783 		}
784 		nr_pids++;
785 
786 		trace_parser_clear(&parser);
787 		ret = 0;
788 	}
789 	trace_parser_put(&parser);
790 
791 	if (ret < 0) {
792 		trace_pid_list_free(pid_list);
793 		return ret;
794 	}
795 
796 	if (!nr_pids) {
797 		/* Cleared the list of pids */
798 		trace_pid_list_free(pid_list);
799 		pid_list = NULL;
800 	}
801 
802 	*new_pid_list = pid_list;
803 
804 	return read;
805 }
806 
807 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
808 {
809 	u64 ts;
810 
811 	/* Early boot up does not have a buffer yet */
812 	if (!buf->buffer)
813 		return trace_clock_local();
814 
815 	ts = ring_buffer_time_stamp(buf->buffer);
816 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
817 
818 	return ts;
819 }
820 
821 u64 ftrace_now(int cpu)
822 {
823 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
824 }
825 
826 /**
827  * tracing_is_enabled - Show if global_trace has been enabled
828  *
829  * Shows if the global trace has been enabled or not. It uses the
830  * mirror flag "buffer_disabled" to be used in fast paths such as for
831  * the irqsoff tracer. But it may be inaccurate due to races. If you
832  * need to know the accurate state, use tracing_is_on() which is a little
833  * slower, but accurate.
834  */
835 int tracing_is_enabled(void)
836 {
837 	/*
838 	 * For quick access (irqsoff uses this in fast path), just
839 	 * return the mirror variable of the state of the ring buffer.
840 	 * It's a little racy, but we don't really care.
841 	 */
842 	smp_rmb();
843 	return !global_trace.buffer_disabled;
844 }
845 
846 /*
847  * trace_buf_size is the size in bytes that is allocated
848  * for a buffer. Note, the number of bytes is always rounded
849  * to page size.
850  *
851  * This number is purposely set to a low number of 16384.
852  * If the dump on oops happens, it will be much appreciated
853  * to not have to wait for all that output. Anyway this can be
854  * boot time and run time configurable.
855  */
856 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
857 
858 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
859 
860 /* trace_types holds a link list of available tracers. */
861 static struct tracer		*trace_types __read_mostly;
862 
863 /*
864  * trace_types_lock is used to protect the trace_types list.
865  */
866 DEFINE_MUTEX(trace_types_lock);
867 
868 /*
869  * serialize the access of the ring buffer
870  *
871  * ring buffer serializes readers, but it is low level protection.
872  * The validity of the events (which returns by ring_buffer_peek() ..etc)
873  * are not protected by ring buffer.
874  *
875  * The content of events may become garbage if we allow other process consumes
876  * these events concurrently:
877  *   A) the page of the consumed events may become a normal page
878  *      (not reader page) in ring buffer, and this page will be rewritten
879  *      by events producer.
880  *   B) The page of the consumed events may become a page for splice_read,
881  *      and this page will be returned to system.
882  *
883  * These primitives allow multi process access to different cpu ring buffer
884  * concurrently.
885  *
886  * These primitives don't distinguish read-only and read-consume access.
887  * Multi read-only access are also serialized.
888  */
889 
890 #ifdef CONFIG_SMP
891 static DECLARE_RWSEM(all_cpu_access_lock);
892 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
893 
894 static inline void trace_access_lock(int cpu)
895 {
896 	if (cpu == RING_BUFFER_ALL_CPUS) {
897 		/* gain it for accessing the whole ring buffer. */
898 		down_write(&all_cpu_access_lock);
899 	} else {
900 		/* gain it for accessing a cpu ring buffer. */
901 
902 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
903 		down_read(&all_cpu_access_lock);
904 
905 		/* Secondly block other access to this @cpu ring buffer. */
906 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
907 	}
908 }
909 
910 static inline void trace_access_unlock(int cpu)
911 {
912 	if (cpu == RING_BUFFER_ALL_CPUS) {
913 		up_write(&all_cpu_access_lock);
914 	} else {
915 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
916 		up_read(&all_cpu_access_lock);
917 	}
918 }
919 
920 static inline void trace_access_lock_init(void)
921 {
922 	int cpu;
923 
924 	for_each_possible_cpu(cpu)
925 		mutex_init(&per_cpu(cpu_access_lock, cpu));
926 }
927 
928 #else
929 
930 static DEFINE_MUTEX(access_lock);
931 
932 static inline void trace_access_lock(int cpu)
933 {
934 	(void)cpu;
935 	mutex_lock(&access_lock);
936 }
937 
938 static inline void trace_access_unlock(int cpu)
939 {
940 	(void)cpu;
941 	mutex_unlock(&access_lock);
942 }
943 
944 static inline void trace_access_lock_init(void)
945 {
946 }
947 
948 #endif
949 
950 #ifdef CONFIG_STACKTRACE
951 static void __ftrace_trace_stack(struct trace_buffer *buffer,
952 				 unsigned int trace_ctx,
953 				 int skip, struct pt_regs *regs);
954 static inline void ftrace_trace_stack(struct trace_array *tr,
955 				      struct trace_buffer *buffer,
956 				      unsigned int trace_ctx,
957 				      int skip, struct pt_regs *regs);
958 
959 #else
960 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
961 					unsigned int trace_ctx,
962 					int skip, struct pt_regs *regs)
963 {
964 }
965 static inline void ftrace_trace_stack(struct trace_array *tr,
966 				      struct trace_buffer *buffer,
967 				      unsigned long trace_ctx,
968 				      int skip, struct pt_regs *regs)
969 {
970 }
971 
972 #endif
973 
974 static __always_inline void
975 trace_event_setup(struct ring_buffer_event *event,
976 		  int type, unsigned int trace_ctx)
977 {
978 	struct trace_entry *ent = ring_buffer_event_data(event);
979 
980 	tracing_generic_entry_update(ent, type, trace_ctx);
981 }
982 
983 static __always_inline struct ring_buffer_event *
984 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
985 			  int type,
986 			  unsigned long len,
987 			  unsigned int trace_ctx)
988 {
989 	struct ring_buffer_event *event;
990 
991 	event = ring_buffer_lock_reserve(buffer, len);
992 	if (event != NULL)
993 		trace_event_setup(event, type, trace_ctx);
994 
995 	return event;
996 }
997 
998 void tracer_tracing_on(struct trace_array *tr)
999 {
1000 	if (tr->array_buffer.buffer)
1001 		ring_buffer_record_on(tr->array_buffer.buffer);
1002 	/*
1003 	 * This flag is looked at when buffers haven't been allocated
1004 	 * yet, or by some tracers (like irqsoff), that just want to
1005 	 * know if the ring buffer has been disabled, but it can handle
1006 	 * races of where it gets disabled but we still do a record.
1007 	 * As the check is in the fast path of the tracers, it is more
1008 	 * important to be fast than accurate.
1009 	 */
1010 	tr->buffer_disabled = 0;
1011 	/* Make the flag seen by readers */
1012 	smp_wmb();
1013 }
1014 
1015 /**
1016  * tracing_on - enable tracing buffers
1017  *
1018  * This function enables tracing buffers that may have been
1019  * disabled with tracing_off.
1020  */
1021 void tracing_on(void)
1022 {
1023 	tracer_tracing_on(&global_trace);
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_on);
1026 
1027 
1028 static __always_inline void
1029 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1030 {
1031 	__this_cpu_write(trace_taskinfo_save, true);
1032 
1033 	/* If this is the temp buffer, we need to commit fully */
1034 	if (this_cpu_read(trace_buffered_event) == event) {
1035 		/* Length is in event->array[0] */
1036 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1037 		/* Release the temp buffer */
1038 		this_cpu_dec(trace_buffered_event_cnt);
1039 		/* ring_buffer_unlock_commit() enables preemption */
1040 		preempt_enable_notrace();
1041 	} else
1042 		ring_buffer_unlock_commit(buffer);
1043 }
1044 
1045 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1046 		       const char *str, int size)
1047 {
1048 	struct ring_buffer_event *event;
1049 	struct trace_buffer *buffer;
1050 	struct print_entry *entry;
1051 	unsigned int trace_ctx;
1052 	int alloc;
1053 
1054 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1055 		return 0;
1056 
1057 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1058 		return 0;
1059 
1060 	if (unlikely(tracing_disabled))
1061 		return 0;
1062 
1063 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1064 
1065 	trace_ctx = tracing_gen_ctx();
1066 	buffer = tr->array_buffer.buffer;
1067 	ring_buffer_nest_start(buffer);
1068 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1069 					    trace_ctx);
1070 	if (!event) {
1071 		size = 0;
1072 		goto out;
1073 	}
1074 
1075 	entry = ring_buffer_event_data(event);
1076 	entry->ip = ip;
1077 
1078 	memcpy(&entry->buf, str, size);
1079 
1080 	/* Add a newline if necessary */
1081 	if (entry->buf[size - 1] != '\n') {
1082 		entry->buf[size] = '\n';
1083 		entry->buf[size + 1] = '\0';
1084 	} else
1085 		entry->buf[size] = '\0';
1086 
1087 	__buffer_unlock_commit(buffer, event);
1088 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1089  out:
1090 	ring_buffer_nest_end(buffer);
1091 	return size;
1092 }
1093 EXPORT_SYMBOL_GPL(__trace_array_puts);
1094 
1095 /**
1096  * __trace_puts - write a constant string into the trace buffer.
1097  * @ip:	   The address of the caller
1098  * @str:   The constant string to write
1099  * @size:  The size of the string.
1100  */
1101 int __trace_puts(unsigned long ip, const char *str, int size)
1102 {
1103 	return __trace_array_puts(&global_trace, ip, str, size);
1104 }
1105 EXPORT_SYMBOL_GPL(__trace_puts);
1106 
1107 /**
1108  * __trace_bputs - write the pointer to a constant string into trace buffer
1109  * @ip:	   The address of the caller
1110  * @str:   The constant string to write to the buffer to
1111  */
1112 int __trace_bputs(unsigned long ip, const char *str)
1113 {
1114 	struct ring_buffer_event *event;
1115 	struct trace_buffer *buffer;
1116 	struct bputs_entry *entry;
1117 	unsigned int trace_ctx;
1118 	int size = sizeof(struct bputs_entry);
1119 	int ret = 0;
1120 
1121 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1122 		return 0;
1123 
1124 	if (unlikely(tracing_selftest_running || tracing_disabled))
1125 		return 0;
1126 
1127 	trace_ctx = tracing_gen_ctx();
1128 	buffer = global_trace.array_buffer.buffer;
1129 
1130 	ring_buffer_nest_start(buffer);
1131 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1132 					    trace_ctx);
1133 	if (!event)
1134 		goto out;
1135 
1136 	entry = ring_buffer_event_data(event);
1137 	entry->ip			= ip;
1138 	entry->str			= str;
1139 
1140 	__buffer_unlock_commit(buffer, event);
1141 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1142 
1143 	ret = 1;
1144  out:
1145 	ring_buffer_nest_end(buffer);
1146 	return ret;
1147 }
1148 EXPORT_SYMBOL_GPL(__trace_bputs);
1149 
1150 #ifdef CONFIG_TRACER_SNAPSHOT
1151 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1152 					   void *cond_data)
1153 {
1154 	struct tracer *tracer = tr->current_trace;
1155 	unsigned long flags;
1156 
1157 	if (in_nmi()) {
1158 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1159 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1160 		return;
1161 	}
1162 
1163 	if (!tr->allocated_snapshot) {
1164 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1165 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1166 		tracer_tracing_off(tr);
1167 		return;
1168 	}
1169 
1170 	/* Note, snapshot can not be used when the tracer uses it */
1171 	if (tracer->use_max_tr) {
1172 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1173 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1174 		return;
1175 	}
1176 
1177 	local_irq_save(flags);
1178 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1179 	local_irq_restore(flags);
1180 }
1181 
1182 void tracing_snapshot_instance(struct trace_array *tr)
1183 {
1184 	tracing_snapshot_instance_cond(tr, NULL);
1185 }
1186 
1187 /**
1188  * tracing_snapshot - take a snapshot of the current buffer.
1189  *
1190  * This causes a swap between the snapshot buffer and the current live
1191  * tracing buffer. You can use this to take snapshots of the live
1192  * trace when some condition is triggered, but continue to trace.
1193  *
1194  * Note, make sure to allocate the snapshot with either
1195  * a tracing_snapshot_alloc(), or by doing it manually
1196  * with: echo 1 > /sys/kernel/tracing/snapshot
1197  *
1198  * If the snapshot buffer is not allocated, it will stop tracing.
1199  * Basically making a permanent snapshot.
1200  */
1201 void tracing_snapshot(void)
1202 {
1203 	struct trace_array *tr = &global_trace;
1204 
1205 	tracing_snapshot_instance(tr);
1206 }
1207 EXPORT_SYMBOL_GPL(tracing_snapshot);
1208 
1209 /**
1210  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1211  * @tr:		The tracing instance to snapshot
1212  * @cond_data:	The data to be tested conditionally, and possibly saved
1213  *
1214  * This is the same as tracing_snapshot() except that the snapshot is
1215  * conditional - the snapshot will only happen if the
1216  * cond_snapshot.update() implementation receiving the cond_data
1217  * returns true, which means that the trace array's cond_snapshot
1218  * update() operation used the cond_data to determine whether the
1219  * snapshot should be taken, and if it was, presumably saved it along
1220  * with the snapshot.
1221  */
1222 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1223 {
1224 	tracing_snapshot_instance_cond(tr, cond_data);
1225 }
1226 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1227 
1228 /**
1229  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1230  * @tr:		The tracing instance
1231  *
1232  * When the user enables a conditional snapshot using
1233  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1234  * with the snapshot.  This accessor is used to retrieve it.
1235  *
1236  * Should not be called from cond_snapshot.update(), since it takes
1237  * the tr->max_lock lock, which the code calling
1238  * cond_snapshot.update() has already done.
1239  *
1240  * Returns the cond_data associated with the trace array's snapshot.
1241  */
1242 void *tracing_cond_snapshot_data(struct trace_array *tr)
1243 {
1244 	void *cond_data = NULL;
1245 
1246 	local_irq_disable();
1247 	arch_spin_lock(&tr->max_lock);
1248 
1249 	if (tr->cond_snapshot)
1250 		cond_data = tr->cond_snapshot->cond_data;
1251 
1252 	arch_spin_unlock(&tr->max_lock);
1253 	local_irq_enable();
1254 
1255 	return cond_data;
1256 }
1257 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1258 
1259 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1260 					struct array_buffer *size_buf, int cpu_id);
1261 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1262 
1263 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1264 {
1265 	int ret;
1266 
1267 	if (!tr->allocated_snapshot) {
1268 
1269 		/* allocate spare buffer */
1270 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1271 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1272 		if (ret < 0)
1273 			return ret;
1274 
1275 		tr->allocated_snapshot = true;
1276 	}
1277 
1278 	return 0;
1279 }
1280 
1281 static void free_snapshot(struct trace_array *tr)
1282 {
1283 	/*
1284 	 * We don't free the ring buffer. instead, resize it because
1285 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1286 	 * we want preserve it.
1287 	 */
1288 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1289 	set_buffer_entries(&tr->max_buffer, 1);
1290 	tracing_reset_online_cpus(&tr->max_buffer);
1291 	tr->allocated_snapshot = false;
1292 }
1293 
1294 /**
1295  * tracing_alloc_snapshot - allocate snapshot buffer.
1296  *
1297  * This only allocates the snapshot buffer if it isn't already
1298  * allocated - it doesn't also take a snapshot.
1299  *
1300  * This is meant to be used in cases where the snapshot buffer needs
1301  * to be set up for events that can't sleep but need to be able to
1302  * trigger a snapshot.
1303  */
1304 int tracing_alloc_snapshot(void)
1305 {
1306 	struct trace_array *tr = &global_trace;
1307 	int ret;
1308 
1309 	ret = tracing_alloc_snapshot_instance(tr);
1310 	WARN_ON(ret < 0);
1311 
1312 	return ret;
1313 }
1314 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1315 
1316 /**
1317  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1318  *
1319  * This is similar to tracing_snapshot(), but it will allocate the
1320  * snapshot buffer if it isn't already allocated. Use this only
1321  * where it is safe to sleep, as the allocation may sleep.
1322  *
1323  * This causes a swap between the snapshot buffer and the current live
1324  * tracing buffer. You can use this to take snapshots of the live
1325  * trace when some condition is triggered, but continue to trace.
1326  */
1327 void tracing_snapshot_alloc(void)
1328 {
1329 	int ret;
1330 
1331 	ret = tracing_alloc_snapshot();
1332 	if (ret < 0)
1333 		return;
1334 
1335 	tracing_snapshot();
1336 }
1337 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1338 
1339 /**
1340  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1341  * @tr:		The tracing instance
1342  * @cond_data:	User data to associate with the snapshot
1343  * @update:	Implementation of the cond_snapshot update function
1344  *
1345  * Check whether the conditional snapshot for the given instance has
1346  * already been enabled, or if the current tracer is already using a
1347  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1348  * save the cond_data and update function inside.
1349  *
1350  * Returns 0 if successful, error otherwise.
1351  */
1352 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1353 				 cond_update_fn_t update)
1354 {
1355 	struct cond_snapshot *cond_snapshot;
1356 	int ret = 0;
1357 
1358 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1359 	if (!cond_snapshot)
1360 		return -ENOMEM;
1361 
1362 	cond_snapshot->cond_data = cond_data;
1363 	cond_snapshot->update = update;
1364 
1365 	mutex_lock(&trace_types_lock);
1366 
1367 	ret = tracing_alloc_snapshot_instance(tr);
1368 	if (ret)
1369 		goto fail_unlock;
1370 
1371 	if (tr->current_trace->use_max_tr) {
1372 		ret = -EBUSY;
1373 		goto fail_unlock;
1374 	}
1375 
1376 	/*
1377 	 * The cond_snapshot can only change to NULL without the
1378 	 * trace_types_lock. We don't care if we race with it going
1379 	 * to NULL, but we want to make sure that it's not set to
1380 	 * something other than NULL when we get here, which we can
1381 	 * do safely with only holding the trace_types_lock and not
1382 	 * having to take the max_lock.
1383 	 */
1384 	if (tr->cond_snapshot) {
1385 		ret = -EBUSY;
1386 		goto fail_unlock;
1387 	}
1388 
1389 	local_irq_disable();
1390 	arch_spin_lock(&tr->max_lock);
1391 	tr->cond_snapshot = cond_snapshot;
1392 	arch_spin_unlock(&tr->max_lock);
1393 	local_irq_enable();
1394 
1395 	mutex_unlock(&trace_types_lock);
1396 
1397 	return ret;
1398 
1399  fail_unlock:
1400 	mutex_unlock(&trace_types_lock);
1401 	kfree(cond_snapshot);
1402 	return ret;
1403 }
1404 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1405 
1406 /**
1407  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1408  * @tr:		The tracing instance
1409  *
1410  * Check whether the conditional snapshot for the given instance is
1411  * enabled; if so, free the cond_snapshot associated with it,
1412  * otherwise return -EINVAL.
1413  *
1414  * Returns 0 if successful, error otherwise.
1415  */
1416 int tracing_snapshot_cond_disable(struct trace_array *tr)
1417 {
1418 	int ret = 0;
1419 
1420 	local_irq_disable();
1421 	arch_spin_lock(&tr->max_lock);
1422 
1423 	if (!tr->cond_snapshot)
1424 		ret = -EINVAL;
1425 	else {
1426 		kfree(tr->cond_snapshot);
1427 		tr->cond_snapshot = NULL;
1428 	}
1429 
1430 	arch_spin_unlock(&tr->max_lock);
1431 	local_irq_enable();
1432 
1433 	return ret;
1434 }
1435 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1436 #else
1437 void tracing_snapshot(void)
1438 {
1439 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1440 }
1441 EXPORT_SYMBOL_GPL(tracing_snapshot);
1442 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1443 {
1444 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1445 }
1446 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1447 int tracing_alloc_snapshot(void)
1448 {
1449 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1450 	return -ENODEV;
1451 }
1452 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1453 void tracing_snapshot_alloc(void)
1454 {
1455 	/* Give warning */
1456 	tracing_snapshot();
1457 }
1458 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1459 void *tracing_cond_snapshot_data(struct trace_array *tr)
1460 {
1461 	return NULL;
1462 }
1463 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1464 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1465 {
1466 	return -ENODEV;
1467 }
1468 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1469 int tracing_snapshot_cond_disable(struct trace_array *tr)
1470 {
1471 	return false;
1472 }
1473 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1474 #define free_snapshot(tr)	do { } while (0)
1475 #endif /* CONFIG_TRACER_SNAPSHOT */
1476 
1477 void tracer_tracing_off(struct trace_array *tr)
1478 {
1479 	if (tr->array_buffer.buffer)
1480 		ring_buffer_record_off(tr->array_buffer.buffer);
1481 	/*
1482 	 * This flag is looked at when buffers haven't been allocated
1483 	 * yet, or by some tracers (like irqsoff), that just want to
1484 	 * know if the ring buffer has been disabled, but it can handle
1485 	 * races of where it gets disabled but we still do a record.
1486 	 * As the check is in the fast path of the tracers, it is more
1487 	 * important to be fast than accurate.
1488 	 */
1489 	tr->buffer_disabled = 1;
1490 	/* Make the flag seen by readers */
1491 	smp_wmb();
1492 }
1493 
1494 /**
1495  * tracing_off - turn off tracing buffers
1496  *
1497  * This function stops the tracing buffers from recording data.
1498  * It does not disable any overhead the tracers themselves may
1499  * be causing. This function simply causes all recording to
1500  * the ring buffers to fail.
1501  */
1502 void tracing_off(void)
1503 {
1504 	tracer_tracing_off(&global_trace);
1505 }
1506 EXPORT_SYMBOL_GPL(tracing_off);
1507 
1508 void disable_trace_on_warning(void)
1509 {
1510 	if (__disable_trace_on_warning) {
1511 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1512 			"Disabling tracing due to warning\n");
1513 		tracing_off();
1514 	}
1515 }
1516 
1517 /**
1518  * tracer_tracing_is_on - show real state of ring buffer enabled
1519  * @tr : the trace array to know if ring buffer is enabled
1520  *
1521  * Shows real state of the ring buffer if it is enabled or not.
1522  */
1523 bool tracer_tracing_is_on(struct trace_array *tr)
1524 {
1525 	if (tr->array_buffer.buffer)
1526 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1527 	return !tr->buffer_disabled;
1528 }
1529 
1530 /**
1531  * tracing_is_on - show state of ring buffers enabled
1532  */
1533 int tracing_is_on(void)
1534 {
1535 	return tracer_tracing_is_on(&global_trace);
1536 }
1537 EXPORT_SYMBOL_GPL(tracing_is_on);
1538 
1539 static int __init set_buf_size(char *str)
1540 {
1541 	unsigned long buf_size;
1542 
1543 	if (!str)
1544 		return 0;
1545 	buf_size = memparse(str, &str);
1546 	/*
1547 	 * nr_entries can not be zero and the startup
1548 	 * tests require some buffer space. Therefore
1549 	 * ensure we have at least 4096 bytes of buffer.
1550 	 */
1551 	trace_buf_size = max(4096UL, buf_size);
1552 	return 1;
1553 }
1554 __setup("trace_buf_size=", set_buf_size);
1555 
1556 static int __init set_tracing_thresh(char *str)
1557 {
1558 	unsigned long threshold;
1559 	int ret;
1560 
1561 	if (!str)
1562 		return 0;
1563 	ret = kstrtoul(str, 0, &threshold);
1564 	if (ret < 0)
1565 		return 0;
1566 	tracing_thresh = threshold * 1000;
1567 	return 1;
1568 }
1569 __setup("tracing_thresh=", set_tracing_thresh);
1570 
1571 unsigned long nsecs_to_usecs(unsigned long nsecs)
1572 {
1573 	return nsecs / 1000;
1574 }
1575 
1576 /*
1577  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1578  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1579  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1580  * of strings in the order that the evals (enum) were defined.
1581  */
1582 #undef C
1583 #define C(a, b) b
1584 
1585 /* These must match the bit positions in trace_iterator_flags */
1586 static const char *trace_options[] = {
1587 	TRACE_FLAGS
1588 	NULL
1589 };
1590 
1591 static struct {
1592 	u64 (*func)(void);
1593 	const char *name;
1594 	int in_ns;		/* is this clock in nanoseconds? */
1595 } trace_clocks[] = {
1596 	{ trace_clock_local,		"local",	1 },
1597 	{ trace_clock_global,		"global",	1 },
1598 	{ trace_clock_counter,		"counter",	0 },
1599 	{ trace_clock_jiffies,		"uptime",	0 },
1600 	{ trace_clock,			"perf",		1 },
1601 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1602 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1603 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1604 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1605 	ARCH_TRACE_CLOCKS
1606 };
1607 
1608 bool trace_clock_in_ns(struct trace_array *tr)
1609 {
1610 	if (trace_clocks[tr->clock_id].in_ns)
1611 		return true;
1612 
1613 	return false;
1614 }
1615 
1616 /*
1617  * trace_parser_get_init - gets the buffer for trace parser
1618  */
1619 int trace_parser_get_init(struct trace_parser *parser, int size)
1620 {
1621 	memset(parser, 0, sizeof(*parser));
1622 
1623 	parser->buffer = kmalloc(size, GFP_KERNEL);
1624 	if (!parser->buffer)
1625 		return 1;
1626 
1627 	parser->size = size;
1628 	return 0;
1629 }
1630 
1631 /*
1632  * trace_parser_put - frees the buffer for trace parser
1633  */
1634 void trace_parser_put(struct trace_parser *parser)
1635 {
1636 	kfree(parser->buffer);
1637 	parser->buffer = NULL;
1638 }
1639 
1640 /*
1641  * trace_get_user - reads the user input string separated by  space
1642  * (matched by isspace(ch))
1643  *
1644  * For each string found the 'struct trace_parser' is updated,
1645  * and the function returns.
1646  *
1647  * Returns number of bytes read.
1648  *
1649  * See kernel/trace/trace.h for 'struct trace_parser' details.
1650  */
1651 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1652 	size_t cnt, loff_t *ppos)
1653 {
1654 	char ch;
1655 	size_t read = 0;
1656 	ssize_t ret;
1657 
1658 	if (!*ppos)
1659 		trace_parser_clear(parser);
1660 
1661 	ret = get_user(ch, ubuf++);
1662 	if (ret)
1663 		goto out;
1664 
1665 	read++;
1666 	cnt--;
1667 
1668 	/*
1669 	 * The parser is not finished with the last write,
1670 	 * continue reading the user input without skipping spaces.
1671 	 */
1672 	if (!parser->cont) {
1673 		/* skip white space */
1674 		while (cnt && isspace(ch)) {
1675 			ret = get_user(ch, ubuf++);
1676 			if (ret)
1677 				goto out;
1678 			read++;
1679 			cnt--;
1680 		}
1681 
1682 		parser->idx = 0;
1683 
1684 		/* only spaces were written */
1685 		if (isspace(ch) || !ch) {
1686 			*ppos += read;
1687 			ret = read;
1688 			goto out;
1689 		}
1690 	}
1691 
1692 	/* read the non-space input */
1693 	while (cnt && !isspace(ch) && ch) {
1694 		if (parser->idx < parser->size - 1)
1695 			parser->buffer[parser->idx++] = ch;
1696 		else {
1697 			ret = -EINVAL;
1698 			goto out;
1699 		}
1700 		ret = get_user(ch, ubuf++);
1701 		if (ret)
1702 			goto out;
1703 		read++;
1704 		cnt--;
1705 	}
1706 
1707 	/* We either got finished input or we have to wait for another call. */
1708 	if (isspace(ch) || !ch) {
1709 		parser->buffer[parser->idx] = 0;
1710 		parser->cont = false;
1711 	} else if (parser->idx < parser->size - 1) {
1712 		parser->cont = true;
1713 		parser->buffer[parser->idx++] = ch;
1714 		/* Make sure the parsed string always terminates with '\0'. */
1715 		parser->buffer[parser->idx] = 0;
1716 	} else {
1717 		ret = -EINVAL;
1718 		goto out;
1719 	}
1720 
1721 	*ppos += read;
1722 	ret = read;
1723 
1724 out:
1725 	return ret;
1726 }
1727 
1728 /* TODO add a seq_buf_to_buffer() */
1729 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1730 {
1731 	int len;
1732 
1733 	if (trace_seq_used(s) <= s->seq.readpos)
1734 		return -EBUSY;
1735 
1736 	len = trace_seq_used(s) - s->seq.readpos;
1737 	if (cnt > len)
1738 		cnt = len;
1739 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1740 
1741 	s->seq.readpos += cnt;
1742 	return cnt;
1743 }
1744 
1745 unsigned long __read_mostly	tracing_thresh;
1746 
1747 #ifdef CONFIG_TRACER_MAX_TRACE
1748 static const struct file_operations tracing_max_lat_fops;
1749 
1750 #ifdef LATENCY_FS_NOTIFY
1751 
1752 static struct workqueue_struct *fsnotify_wq;
1753 
1754 static void latency_fsnotify_workfn(struct work_struct *work)
1755 {
1756 	struct trace_array *tr = container_of(work, struct trace_array,
1757 					      fsnotify_work);
1758 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1759 }
1760 
1761 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1762 {
1763 	struct trace_array *tr = container_of(iwork, struct trace_array,
1764 					      fsnotify_irqwork);
1765 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1766 }
1767 
1768 static void trace_create_maxlat_file(struct trace_array *tr,
1769 				     struct dentry *d_tracer)
1770 {
1771 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1772 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1773 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1774 					      TRACE_MODE_WRITE,
1775 					      d_tracer, &tr->max_latency,
1776 					      &tracing_max_lat_fops);
1777 }
1778 
1779 __init static int latency_fsnotify_init(void)
1780 {
1781 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1782 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1783 	if (!fsnotify_wq) {
1784 		pr_err("Unable to allocate tr_max_lat_wq\n");
1785 		return -ENOMEM;
1786 	}
1787 	return 0;
1788 }
1789 
1790 late_initcall_sync(latency_fsnotify_init);
1791 
1792 void latency_fsnotify(struct trace_array *tr)
1793 {
1794 	if (!fsnotify_wq)
1795 		return;
1796 	/*
1797 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1798 	 * possible that we are called from __schedule() or do_idle(), which
1799 	 * could cause a deadlock.
1800 	 */
1801 	irq_work_queue(&tr->fsnotify_irqwork);
1802 }
1803 
1804 #else /* !LATENCY_FS_NOTIFY */
1805 
1806 #define trace_create_maxlat_file(tr, d_tracer)				\
1807 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1808 			  d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1809 
1810 #endif
1811 
1812 /*
1813  * Copy the new maximum trace into the separate maximum-trace
1814  * structure. (this way the maximum trace is permanently saved,
1815  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1816  */
1817 static void
1818 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1819 {
1820 	struct array_buffer *trace_buf = &tr->array_buffer;
1821 	struct array_buffer *max_buf = &tr->max_buffer;
1822 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1823 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1824 
1825 	max_buf->cpu = cpu;
1826 	max_buf->time_start = data->preempt_timestamp;
1827 
1828 	max_data->saved_latency = tr->max_latency;
1829 	max_data->critical_start = data->critical_start;
1830 	max_data->critical_end = data->critical_end;
1831 
1832 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1833 	max_data->pid = tsk->pid;
1834 	/*
1835 	 * If tsk == current, then use current_uid(), as that does not use
1836 	 * RCU. The irq tracer can be called out of RCU scope.
1837 	 */
1838 	if (tsk == current)
1839 		max_data->uid = current_uid();
1840 	else
1841 		max_data->uid = task_uid(tsk);
1842 
1843 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1844 	max_data->policy = tsk->policy;
1845 	max_data->rt_priority = tsk->rt_priority;
1846 
1847 	/* record this tasks comm */
1848 	tracing_record_cmdline(tsk);
1849 	latency_fsnotify(tr);
1850 }
1851 
1852 /**
1853  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1854  * @tr: tracer
1855  * @tsk: the task with the latency
1856  * @cpu: The cpu that initiated the trace.
1857  * @cond_data: User data associated with a conditional snapshot
1858  *
1859  * Flip the buffers between the @tr and the max_tr and record information
1860  * about which task was the cause of this latency.
1861  */
1862 void
1863 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1864 	      void *cond_data)
1865 {
1866 	if (tr->stop_count)
1867 		return;
1868 
1869 	WARN_ON_ONCE(!irqs_disabled());
1870 
1871 	if (!tr->allocated_snapshot) {
1872 		/* Only the nop tracer should hit this when disabling */
1873 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1874 		return;
1875 	}
1876 
1877 	arch_spin_lock(&tr->max_lock);
1878 
1879 	/* Inherit the recordable setting from array_buffer */
1880 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1881 		ring_buffer_record_on(tr->max_buffer.buffer);
1882 	else
1883 		ring_buffer_record_off(tr->max_buffer.buffer);
1884 
1885 #ifdef CONFIG_TRACER_SNAPSHOT
1886 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1887 		arch_spin_unlock(&tr->max_lock);
1888 		return;
1889 	}
1890 #endif
1891 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1892 
1893 	__update_max_tr(tr, tsk, cpu);
1894 
1895 	arch_spin_unlock(&tr->max_lock);
1896 }
1897 
1898 /**
1899  * update_max_tr_single - only copy one trace over, and reset the rest
1900  * @tr: tracer
1901  * @tsk: task with the latency
1902  * @cpu: the cpu of the buffer to copy.
1903  *
1904  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1905  */
1906 void
1907 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1908 {
1909 	int ret;
1910 
1911 	if (tr->stop_count)
1912 		return;
1913 
1914 	WARN_ON_ONCE(!irqs_disabled());
1915 	if (!tr->allocated_snapshot) {
1916 		/* Only the nop tracer should hit this when disabling */
1917 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1918 		return;
1919 	}
1920 
1921 	arch_spin_lock(&tr->max_lock);
1922 
1923 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1924 
1925 	if (ret == -EBUSY) {
1926 		/*
1927 		 * We failed to swap the buffer due to a commit taking
1928 		 * place on this CPU. We fail to record, but we reset
1929 		 * the max trace buffer (no one writes directly to it)
1930 		 * and flag that it failed.
1931 		 */
1932 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1933 			"Failed to swap buffers due to commit in progress\n");
1934 	}
1935 
1936 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1937 
1938 	__update_max_tr(tr, tsk, cpu);
1939 	arch_spin_unlock(&tr->max_lock);
1940 }
1941 
1942 #endif /* CONFIG_TRACER_MAX_TRACE */
1943 
1944 static int wait_on_pipe(struct trace_iterator *iter, int full)
1945 {
1946 	/* Iterators are static, they should be filled or empty */
1947 	if (trace_buffer_iter(iter, iter->cpu_file))
1948 		return 0;
1949 
1950 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1951 				full);
1952 }
1953 
1954 #ifdef CONFIG_FTRACE_STARTUP_TEST
1955 static bool selftests_can_run;
1956 
1957 struct trace_selftests {
1958 	struct list_head		list;
1959 	struct tracer			*type;
1960 };
1961 
1962 static LIST_HEAD(postponed_selftests);
1963 
1964 static int save_selftest(struct tracer *type)
1965 {
1966 	struct trace_selftests *selftest;
1967 
1968 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1969 	if (!selftest)
1970 		return -ENOMEM;
1971 
1972 	selftest->type = type;
1973 	list_add(&selftest->list, &postponed_selftests);
1974 	return 0;
1975 }
1976 
1977 static int run_tracer_selftest(struct tracer *type)
1978 {
1979 	struct trace_array *tr = &global_trace;
1980 	struct tracer *saved_tracer = tr->current_trace;
1981 	int ret;
1982 
1983 	if (!type->selftest || tracing_selftest_disabled)
1984 		return 0;
1985 
1986 	/*
1987 	 * If a tracer registers early in boot up (before scheduling is
1988 	 * initialized and such), then do not run its selftests yet.
1989 	 * Instead, run it a little later in the boot process.
1990 	 */
1991 	if (!selftests_can_run)
1992 		return save_selftest(type);
1993 
1994 	if (!tracing_is_on()) {
1995 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1996 			type->name);
1997 		return 0;
1998 	}
1999 
2000 	/*
2001 	 * Run a selftest on this tracer.
2002 	 * Here we reset the trace buffer, and set the current
2003 	 * tracer to be this tracer. The tracer can then run some
2004 	 * internal tracing to verify that everything is in order.
2005 	 * If we fail, we do not register this tracer.
2006 	 */
2007 	tracing_reset_online_cpus(&tr->array_buffer);
2008 
2009 	tr->current_trace = type;
2010 
2011 #ifdef CONFIG_TRACER_MAX_TRACE
2012 	if (type->use_max_tr) {
2013 		/* If we expanded the buffers, make sure the max is expanded too */
2014 		if (ring_buffer_expanded)
2015 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2016 					   RING_BUFFER_ALL_CPUS);
2017 		tr->allocated_snapshot = true;
2018 	}
2019 #endif
2020 
2021 	/* the test is responsible for initializing and enabling */
2022 	pr_info("Testing tracer %s: ", type->name);
2023 	ret = type->selftest(type, tr);
2024 	/* the test is responsible for resetting too */
2025 	tr->current_trace = saved_tracer;
2026 	if (ret) {
2027 		printk(KERN_CONT "FAILED!\n");
2028 		/* Add the warning after printing 'FAILED' */
2029 		WARN_ON(1);
2030 		return -1;
2031 	}
2032 	/* Only reset on passing, to avoid touching corrupted buffers */
2033 	tracing_reset_online_cpus(&tr->array_buffer);
2034 
2035 #ifdef CONFIG_TRACER_MAX_TRACE
2036 	if (type->use_max_tr) {
2037 		tr->allocated_snapshot = false;
2038 
2039 		/* Shrink the max buffer again */
2040 		if (ring_buffer_expanded)
2041 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2042 					   RING_BUFFER_ALL_CPUS);
2043 	}
2044 #endif
2045 
2046 	printk(KERN_CONT "PASSED\n");
2047 	return 0;
2048 }
2049 
2050 static int do_run_tracer_selftest(struct tracer *type)
2051 {
2052 	int ret;
2053 
2054 	/*
2055 	 * Tests can take a long time, especially if they are run one after the
2056 	 * other, as does happen during bootup when all the tracers are
2057 	 * registered. This could cause the soft lockup watchdog to trigger.
2058 	 */
2059 	cond_resched();
2060 
2061 	tracing_selftest_running = true;
2062 	ret = run_tracer_selftest(type);
2063 	tracing_selftest_running = false;
2064 
2065 	return ret;
2066 }
2067 
2068 static __init int init_trace_selftests(void)
2069 {
2070 	struct trace_selftests *p, *n;
2071 	struct tracer *t, **last;
2072 	int ret;
2073 
2074 	selftests_can_run = true;
2075 
2076 	mutex_lock(&trace_types_lock);
2077 
2078 	if (list_empty(&postponed_selftests))
2079 		goto out;
2080 
2081 	pr_info("Running postponed tracer tests:\n");
2082 
2083 	tracing_selftest_running = true;
2084 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2085 		/* This loop can take minutes when sanitizers are enabled, so
2086 		 * lets make sure we allow RCU processing.
2087 		 */
2088 		cond_resched();
2089 		ret = run_tracer_selftest(p->type);
2090 		/* If the test fails, then warn and remove from available_tracers */
2091 		if (ret < 0) {
2092 			WARN(1, "tracer: %s failed selftest, disabling\n",
2093 			     p->type->name);
2094 			last = &trace_types;
2095 			for (t = trace_types; t; t = t->next) {
2096 				if (t == p->type) {
2097 					*last = t->next;
2098 					break;
2099 				}
2100 				last = &t->next;
2101 			}
2102 		}
2103 		list_del(&p->list);
2104 		kfree(p);
2105 	}
2106 	tracing_selftest_running = false;
2107 
2108  out:
2109 	mutex_unlock(&trace_types_lock);
2110 
2111 	return 0;
2112 }
2113 core_initcall(init_trace_selftests);
2114 #else
2115 static inline int run_tracer_selftest(struct tracer *type)
2116 {
2117 	return 0;
2118 }
2119 static inline int do_run_tracer_selftest(struct tracer *type)
2120 {
2121 	return 0;
2122 }
2123 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2124 
2125 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2126 
2127 static void __init apply_trace_boot_options(void);
2128 
2129 /**
2130  * register_tracer - register a tracer with the ftrace system.
2131  * @type: the plugin for the tracer
2132  *
2133  * Register a new plugin tracer.
2134  */
2135 int __init register_tracer(struct tracer *type)
2136 {
2137 	struct tracer *t;
2138 	int ret = 0;
2139 
2140 	if (!type->name) {
2141 		pr_info("Tracer must have a name\n");
2142 		return -1;
2143 	}
2144 
2145 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2146 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2147 		return -1;
2148 	}
2149 
2150 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2151 		pr_warn("Can not register tracer %s due to lockdown\n",
2152 			   type->name);
2153 		return -EPERM;
2154 	}
2155 
2156 	mutex_lock(&trace_types_lock);
2157 
2158 	for (t = trace_types; t; t = t->next) {
2159 		if (strcmp(type->name, t->name) == 0) {
2160 			/* already found */
2161 			pr_info("Tracer %s already registered\n",
2162 				type->name);
2163 			ret = -1;
2164 			goto out;
2165 		}
2166 	}
2167 
2168 	if (!type->set_flag)
2169 		type->set_flag = &dummy_set_flag;
2170 	if (!type->flags) {
2171 		/*allocate a dummy tracer_flags*/
2172 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2173 		if (!type->flags) {
2174 			ret = -ENOMEM;
2175 			goto out;
2176 		}
2177 		type->flags->val = 0;
2178 		type->flags->opts = dummy_tracer_opt;
2179 	} else
2180 		if (!type->flags->opts)
2181 			type->flags->opts = dummy_tracer_opt;
2182 
2183 	/* store the tracer for __set_tracer_option */
2184 	type->flags->trace = type;
2185 
2186 	ret = do_run_tracer_selftest(type);
2187 	if (ret < 0)
2188 		goto out;
2189 
2190 	type->next = trace_types;
2191 	trace_types = type;
2192 	add_tracer_options(&global_trace, type);
2193 
2194  out:
2195 	mutex_unlock(&trace_types_lock);
2196 
2197 	if (ret || !default_bootup_tracer)
2198 		goto out_unlock;
2199 
2200 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2201 		goto out_unlock;
2202 
2203 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2204 	/* Do we want this tracer to start on bootup? */
2205 	tracing_set_tracer(&global_trace, type->name);
2206 	default_bootup_tracer = NULL;
2207 
2208 	apply_trace_boot_options();
2209 
2210 	/* disable other selftests, since this will break it. */
2211 	disable_tracing_selftest("running a tracer");
2212 
2213  out_unlock:
2214 	return ret;
2215 }
2216 
2217 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2218 {
2219 	struct trace_buffer *buffer = buf->buffer;
2220 
2221 	if (!buffer)
2222 		return;
2223 
2224 	ring_buffer_record_disable(buffer);
2225 
2226 	/* Make sure all commits have finished */
2227 	synchronize_rcu();
2228 	ring_buffer_reset_cpu(buffer, cpu);
2229 
2230 	ring_buffer_record_enable(buffer);
2231 }
2232 
2233 void tracing_reset_online_cpus(struct array_buffer *buf)
2234 {
2235 	struct trace_buffer *buffer = buf->buffer;
2236 
2237 	if (!buffer)
2238 		return;
2239 
2240 	ring_buffer_record_disable(buffer);
2241 
2242 	/* Make sure all commits have finished */
2243 	synchronize_rcu();
2244 
2245 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2246 
2247 	ring_buffer_reset_online_cpus(buffer);
2248 
2249 	ring_buffer_record_enable(buffer);
2250 }
2251 
2252 /* Must have trace_types_lock held */
2253 void tracing_reset_all_online_cpus_unlocked(void)
2254 {
2255 	struct trace_array *tr;
2256 
2257 	lockdep_assert_held(&trace_types_lock);
2258 
2259 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2260 		if (!tr->clear_trace)
2261 			continue;
2262 		tr->clear_trace = false;
2263 		tracing_reset_online_cpus(&tr->array_buffer);
2264 #ifdef CONFIG_TRACER_MAX_TRACE
2265 		tracing_reset_online_cpus(&tr->max_buffer);
2266 #endif
2267 	}
2268 }
2269 
2270 void tracing_reset_all_online_cpus(void)
2271 {
2272 	mutex_lock(&trace_types_lock);
2273 	tracing_reset_all_online_cpus_unlocked();
2274 	mutex_unlock(&trace_types_lock);
2275 }
2276 
2277 /*
2278  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2279  * is the tgid last observed corresponding to pid=i.
2280  */
2281 static int *tgid_map;
2282 
2283 /* The maximum valid index into tgid_map. */
2284 static size_t tgid_map_max;
2285 
2286 #define SAVED_CMDLINES_DEFAULT 128
2287 #define NO_CMDLINE_MAP UINT_MAX
2288 /*
2289  * Preemption must be disabled before acquiring trace_cmdline_lock.
2290  * The various trace_arrays' max_lock must be acquired in a context
2291  * where interrupt is disabled.
2292  */
2293 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2294 struct saved_cmdlines_buffer {
2295 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2296 	unsigned *map_cmdline_to_pid;
2297 	unsigned cmdline_num;
2298 	int cmdline_idx;
2299 	char *saved_cmdlines;
2300 };
2301 static struct saved_cmdlines_buffer *savedcmd;
2302 
2303 static inline char *get_saved_cmdlines(int idx)
2304 {
2305 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2306 }
2307 
2308 static inline void set_cmdline(int idx, const char *cmdline)
2309 {
2310 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2311 }
2312 
2313 static int allocate_cmdlines_buffer(unsigned int val,
2314 				    struct saved_cmdlines_buffer *s)
2315 {
2316 	s->map_cmdline_to_pid = kmalloc_array(val,
2317 					      sizeof(*s->map_cmdline_to_pid),
2318 					      GFP_KERNEL);
2319 	if (!s->map_cmdline_to_pid)
2320 		return -ENOMEM;
2321 
2322 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2323 	if (!s->saved_cmdlines) {
2324 		kfree(s->map_cmdline_to_pid);
2325 		return -ENOMEM;
2326 	}
2327 
2328 	s->cmdline_idx = 0;
2329 	s->cmdline_num = val;
2330 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2331 	       sizeof(s->map_pid_to_cmdline));
2332 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2333 	       val * sizeof(*s->map_cmdline_to_pid));
2334 
2335 	return 0;
2336 }
2337 
2338 static int trace_create_savedcmd(void)
2339 {
2340 	int ret;
2341 
2342 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2343 	if (!savedcmd)
2344 		return -ENOMEM;
2345 
2346 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2347 	if (ret < 0) {
2348 		kfree(savedcmd);
2349 		savedcmd = NULL;
2350 		return -ENOMEM;
2351 	}
2352 
2353 	return 0;
2354 }
2355 
2356 int is_tracing_stopped(void)
2357 {
2358 	return global_trace.stop_count;
2359 }
2360 
2361 /**
2362  * tracing_start - quick start of the tracer
2363  *
2364  * If tracing is enabled but was stopped by tracing_stop,
2365  * this will start the tracer back up.
2366  */
2367 void tracing_start(void)
2368 {
2369 	struct trace_buffer *buffer;
2370 	unsigned long flags;
2371 
2372 	if (tracing_disabled)
2373 		return;
2374 
2375 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2376 	if (--global_trace.stop_count) {
2377 		if (global_trace.stop_count < 0) {
2378 			/* Someone screwed up their debugging */
2379 			WARN_ON_ONCE(1);
2380 			global_trace.stop_count = 0;
2381 		}
2382 		goto out;
2383 	}
2384 
2385 	/* Prevent the buffers from switching */
2386 	arch_spin_lock(&global_trace.max_lock);
2387 
2388 	buffer = global_trace.array_buffer.buffer;
2389 	if (buffer)
2390 		ring_buffer_record_enable(buffer);
2391 
2392 #ifdef CONFIG_TRACER_MAX_TRACE
2393 	buffer = global_trace.max_buffer.buffer;
2394 	if (buffer)
2395 		ring_buffer_record_enable(buffer);
2396 #endif
2397 
2398 	arch_spin_unlock(&global_trace.max_lock);
2399 
2400  out:
2401 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2402 }
2403 
2404 static void tracing_start_tr(struct trace_array *tr)
2405 {
2406 	struct trace_buffer *buffer;
2407 	unsigned long flags;
2408 
2409 	if (tracing_disabled)
2410 		return;
2411 
2412 	/* If global, we need to also start the max tracer */
2413 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2414 		return tracing_start();
2415 
2416 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2417 
2418 	if (--tr->stop_count) {
2419 		if (tr->stop_count < 0) {
2420 			/* Someone screwed up their debugging */
2421 			WARN_ON_ONCE(1);
2422 			tr->stop_count = 0;
2423 		}
2424 		goto out;
2425 	}
2426 
2427 	buffer = tr->array_buffer.buffer;
2428 	if (buffer)
2429 		ring_buffer_record_enable(buffer);
2430 
2431  out:
2432 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2433 }
2434 
2435 /**
2436  * tracing_stop - quick stop of the tracer
2437  *
2438  * Light weight way to stop tracing. Use in conjunction with
2439  * tracing_start.
2440  */
2441 void tracing_stop(void)
2442 {
2443 	struct trace_buffer *buffer;
2444 	unsigned long flags;
2445 
2446 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2447 	if (global_trace.stop_count++)
2448 		goto out;
2449 
2450 	/* Prevent the buffers from switching */
2451 	arch_spin_lock(&global_trace.max_lock);
2452 
2453 	buffer = global_trace.array_buffer.buffer;
2454 	if (buffer)
2455 		ring_buffer_record_disable(buffer);
2456 
2457 #ifdef CONFIG_TRACER_MAX_TRACE
2458 	buffer = global_trace.max_buffer.buffer;
2459 	if (buffer)
2460 		ring_buffer_record_disable(buffer);
2461 #endif
2462 
2463 	arch_spin_unlock(&global_trace.max_lock);
2464 
2465  out:
2466 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2467 }
2468 
2469 static void tracing_stop_tr(struct trace_array *tr)
2470 {
2471 	struct trace_buffer *buffer;
2472 	unsigned long flags;
2473 
2474 	/* If global, we need to also stop the max tracer */
2475 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2476 		return tracing_stop();
2477 
2478 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2479 	if (tr->stop_count++)
2480 		goto out;
2481 
2482 	buffer = tr->array_buffer.buffer;
2483 	if (buffer)
2484 		ring_buffer_record_disable(buffer);
2485 
2486  out:
2487 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2488 }
2489 
2490 static int trace_save_cmdline(struct task_struct *tsk)
2491 {
2492 	unsigned tpid, idx;
2493 
2494 	/* treat recording of idle task as a success */
2495 	if (!tsk->pid)
2496 		return 1;
2497 
2498 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2499 
2500 	/*
2501 	 * It's not the end of the world if we don't get
2502 	 * the lock, but we also don't want to spin
2503 	 * nor do we want to disable interrupts,
2504 	 * so if we miss here, then better luck next time.
2505 	 *
2506 	 * This is called within the scheduler and wake up, so interrupts
2507 	 * had better been disabled and run queue lock been held.
2508 	 */
2509 	lockdep_assert_preemption_disabled();
2510 	if (!arch_spin_trylock(&trace_cmdline_lock))
2511 		return 0;
2512 
2513 	idx = savedcmd->map_pid_to_cmdline[tpid];
2514 	if (idx == NO_CMDLINE_MAP) {
2515 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2516 
2517 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2518 		savedcmd->cmdline_idx = idx;
2519 	}
2520 
2521 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2522 	set_cmdline(idx, tsk->comm);
2523 
2524 	arch_spin_unlock(&trace_cmdline_lock);
2525 
2526 	return 1;
2527 }
2528 
2529 static void __trace_find_cmdline(int pid, char comm[])
2530 {
2531 	unsigned map;
2532 	int tpid;
2533 
2534 	if (!pid) {
2535 		strcpy(comm, "<idle>");
2536 		return;
2537 	}
2538 
2539 	if (WARN_ON_ONCE(pid < 0)) {
2540 		strcpy(comm, "<XXX>");
2541 		return;
2542 	}
2543 
2544 	tpid = pid & (PID_MAX_DEFAULT - 1);
2545 	map = savedcmd->map_pid_to_cmdline[tpid];
2546 	if (map != NO_CMDLINE_MAP) {
2547 		tpid = savedcmd->map_cmdline_to_pid[map];
2548 		if (tpid == pid) {
2549 			strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2550 			return;
2551 		}
2552 	}
2553 	strcpy(comm, "<...>");
2554 }
2555 
2556 void trace_find_cmdline(int pid, char comm[])
2557 {
2558 	preempt_disable();
2559 	arch_spin_lock(&trace_cmdline_lock);
2560 
2561 	__trace_find_cmdline(pid, comm);
2562 
2563 	arch_spin_unlock(&trace_cmdline_lock);
2564 	preempt_enable();
2565 }
2566 
2567 static int *trace_find_tgid_ptr(int pid)
2568 {
2569 	/*
2570 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2571 	 * if we observe a non-NULL tgid_map then we also observe the correct
2572 	 * tgid_map_max.
2573 	 */
2574 	int *map = smp_load_acquire(&tgid_map);
2575 
2576 	if (unlikely(!map || pid > tgid_map_max))
2577 		return NULL;
2578 
2579 	return &map[pid];
2580 }
2581 
2582 int trace_find_tgid(int pid)
2583 {
2584 	int *ptr = trace_find_tgid_ptr(pid);
2585 
2586 	return ptr ? *ptr : 0;
2587 }
2588 
2589 static int trace_save_tgid(struct task_struct *tsk)
2590 {
2591 	int *ptr;
2592 
2593 	/* treat recording of idle task as a success */
2594 	if (!tsk->pid)
2595 		return 1;
2596 
2597 	ptr = trace_find_tgid_ptr(tsk->pid);
2598 	if (!ptr)
2599 		return 0;
2600 
2601 	*ptr = tsk->tgid;
2602 	return 1;
2603 }
2604 
2605 static bool tracing_record_taskinfo_skip(int flags)
2606 {
2607 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2608 		return true;
2609 	if (!__this_cpu_read(trace_taskinfo_save))
2610 		return true;
2611 	return false;
2612 }
2613 
2614 /**
2615  * tracing_record_taskinfo - record the task info of a task
2616  *
2617  * @task:  task to record
2618  * @flags: TRACE_RECORD_CMDLINE for recording comm
2619  *         TRACE_RECORD_TGID for recording tgid
2620  */
2621 void tracing_record_taskinfo(struct task_struct *task, int flags)
2622 {
2623 	bool done;
2624 
2625 	if (tracing_record_taskinfo_skip(flags))
2626 		return;
2627 
2628 	/*
2629 	 * Record as much task information as possible. If some fail, continue
2630 	 * to try to record the others.
2631 	 */
2632 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2633 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2634 
2635 	/* If recording any information failed, retry again soon. */
2636 	if (!done)
2637 		return;
2638 
2639 	__this_cpu_write(trace_taskinfo_save, false);
2640 }
2641 
2642 /**
2643  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2644  *
2645  * @prev: previous task during sched_switch
2646  * @next: next task during sched_switch
2647  * @flags: TRACE_RECORD_CMDLINE for recording comm
2648  *         TRACE_RECORD_TGID for recording tgid
2649  */
2650 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2651 					  struct task_struct *next, int flags)
2652 {
2653 	bool done;
2654 
2655 	if (tracing_record_taskinfo_skip(flags))
2656 		return;
2657 
2658 	/*
2659 	 * Record as much task information as possible. If some fail, continue
2660 	 * to try to record the others.
2661 	 */
2662 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2663 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2664 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2665 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2666 
2667 	/* If recording any information failed, retry again soon. */
2668 	if (!done)
2669 		return;
2670 
2671 	__this_cpu_write(trace_taskinfo_save, false);
2672 }
2673 
2674 /* Helpers to record a specific task information */
2675 void tracing_record_cmdline(struct task_struct *task)
2676 {
2677 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2678 }
2679 
2680 void tracing_record_tgid(struct task_struct *task)
2681 {
2682 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2683 }
2684 
2685 /*
2686  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2687  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2688  * simplifies those functions and keeps them in sync.
2689  */
2690 enum print_line_t trace_handle_return(struct trace_seq *s)
2691 {
2692 	return trace_seq_has_overflowed(s) ?
2693 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2694 }
2695 EXPORT_SYMBOL_GPL(trace_handle_return);
2696 
2697 static unsigned short migration_disable_value(void)
2698 {
2699 #if defined(CONFIG_SMP)
2700 	return current->migration_disabled;
2701 #else
2702 	return 0;
2703 #endif
2704 }
2705 
2706 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2707 {
2708 	unsigned int trace_flags = irqs_status;
2709 	unsigned int pc;
2710 
2711 	pc = preempt_count();
2712 
2713 	if (pc & NMI_MASK)
2714 		trace_flags |= TRACE_FLAG_NMI;
2715 	if (pc & HARDIRQ_MASK)
2716 		trace_flags |= TRACE_FLAG_HARDIRQ;
2717 	if (in_serving_softirq())
2718 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2719 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2720 		trace_flags |= TRACE_FLAG_BH_OFF;
2721 
2722 	if (tif_need_resched())
2723 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2724 	if (test_preempt_need_resched())
2725 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2726 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2727 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2728 }
2729 
2730 struct ring_buffer_event *
2731 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2732 			  int type,
2733 			  unsigned long len,
2734 			  unsigned int trace_ctx)
2735 {
2736 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2737 }
2738 
2739 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2740 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2741 static int trace_buffered_event_ref;
2742 
2743 /**
2744  * trace_buffered_event_enable - enable buffering events
2745  *
2746  * When events are being filtered, it is quicker to use a temporary
2747  * buffer to write the event data into if there's a likely chance
2748  * that it will not be committed. The discard of the ring buffer
2749  * is not as fast as committing, and is much slower than copying
2750  * a commit.
2751  *
2752  * When an event is to be filtered, allocate per cpu buffers to
2753  * write the event data into, and if the event is filtered and discarded
2754  * it is simply dropped, otherwise, the entire data is to be committed
2755  * in one shot.
2756  */
2757 void trace_buffered_event_enable(void)
2758 {
2759 	struct ring_buffer_event *event;
2760 	struct page *page;
2761 	int cpu;
2762 
2763 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2764 
2765 	if (trace_buffered_event_ref++)
2766 		return;
2767 
2768 	for_each_tracing_cpu(cpu) {
2769 		page = alloc_pages_node(cpu_to_node(cpu),
2770 					GFP_KERNEL | __GFP_NORETRY, 0);
2771 		if (!page)
2772 			goto failed;
2773 
2774 		event = page_address(page);
2775 		memset(event, 0, sizeof(*event));
2776 
2777 		per_cpu(trace_buffered_event, cpu) = event;
2778 
2779 		preempt_disable();
2780 		if (cpu == smp_processor_id() &&
2781 		    __this_cpu_read(trace_buffered_event) !=
2782 		    per_cpu(trace_buffered_event, cpu))
2783 			WARN_ON_ONCE(1);
2784 		preempt_enable();
2785 	}
2786 
2787 	return;
2788  failed:
2789 	trace_buffered_event_disable();
2790 }
2791 
2792 static void enable_trace_buffered_event(void *data)
2793 {
2794 	/* Probably not needed, but do it anyway */
2795 	smp_rmb();
2796 	this_cpu_dec(trace_buffered_event_cnt);
2797 }
2798 
2799 static void disable_trace_buffered_event(void *data)
2800 {
2801 	this_cpu_inc(trace_buffered_event_cnt);
2802 }
2803 
2804 /**
2805  * trace_buffered_event_disable - disable buffering events
2806  *
2807  * When a filter is removed, it is faster to not use the buffered
2808  * events, and to commit directly into the ring buffer. Free up
2809  * the temp buffers when there are no more users. This requires
2810  * special synchronization with current events.
2811  */
2812 void trace_buffered_event_disable(void)
2813 {
2814 	int cpu;
2815 
2816 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2817 
2818 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2819 		return;
2820 
2821 	if (--trace_buffered_event_ref)
2822 		return;
2823 
2824 	preempt_disable();
2825 	/* For each CPU, set the buffer as used. */
2826 	smp_call_function_many(tracing_buffer_mask,
2827 			       disable_trace_buffered_event, NULL, 1);
2828 	preempt_enable();
2829 
2830 	/* Wait for all current users to finish */
2831 	synchronize_rcu();
2832 
2833 	for_each_tracing_cpu(cpu) {
2834 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2835 		per_cpu(trace_buffered_event, cpu) = NULL;
2836 	}
2837 	/*
2838 	 * Make sure trace_buffered_event is NULL before clearing
2839 	 * trace_buffered_event_cnt.
2840 	 */
2841 	smp_wmb();
2842 
2843 	preempt_disable();
2844 	/* Do the work on each cpu */
2845 	smp_call_function_many(tracing_buffer_mask,
2846 			       enable_trace_buffered_event, NULL, 1);
2847 	preempt_enable();
2848 }
2849 
2850 static struct trace_buffer *temp_buffer;
2851 
2852 struct ring_buffer_event *
2853 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2854 			  struct trace_event_file *trace_file,
2855 			  int type, unsigned long len,
2856 			  unsigned int trace_ctx)
2857 {
2858 	struct ring_buffer_event *entry;
2859 	struct trace_array *tr = trace_file->tr;
2860 	int val;
2861 
2862 	*current_rb = tr->array_buffer.buffer;
2863 
2864 	if (!tr->no_filter_buffering_ref &&
2865 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2866 		preempt_disable_notrace();
2867 		/*
2868 		 * Filtering is on, so try to use the per cpu buffer first.
2869 		 * This buffer will simulate a ring_buffer_event,
2870 		 * where the type_len is zero and the array[0] will
2871 		 * hold the full length.
2872 		 * (see include/linux/ring-buffer.h for details on
2873 		 *  how the ring_buffer_event is structured).
2874 		 *
2875 		 * Using a temp buffer during filtering and copying it
2876 		 * on a matched filter is quicker than writing directly
2877 		 * into the ring buffer and then discarding it when
2878 		 * it doesn't match. That is because the discard
2879 		 * requires several atomic operations to get right.
2880 		 * Copying on match and doing nothing on a failed match
2881 		 * is still quicker than no copy on match, but having
2882 		 * to discard out of the ring buffer on a failed match.
2883 		 */
2884 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2885 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2886 
2887 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2888 
2889 			/*
2890 			 * Preemption is disabled, but interrupts and NMIs
2891 			 * can still come in now. If that happens after
2892 			 * the above increment, then it will have to go
2893 			 * back to the old method of allocating the event
2894 			 * on the ring buffer, and if the filter fails, it
2895 			 * will have to call ring_buffer_discard_commit()
2896 			 * to remove it.
2897 			 *
2898 			 * Need to also check the unlikely case that the
2899 			 * length is bigger than the temp buffer size.
2900 			 * If that happens, then the reserve is pretty much
2901 			 * guaranteed to fail, as the ring buffer currently
2902 			 * only allows events less than a page. But that may
2903 			 * change in the future, so let the ring buffer reserve
2904 			 * handle the failure in that case.
2905 			 */
2906 			if (val == 1 && likely(len <= max_len)) {
2907 				trace_event_setup(entry, type, trace_ctx);
2908 				entry->array[0] = len;
2909 				/* Return with preemption disabled */
2910 				return entry;
2911 			}
2912 			this_cpu_dec(trace_buffered_event_cnt);
2913 		}
2914 		/* __trace_buffer_lock_reserve() disables preemption */
2915 		preempt_enable_notrace();
2916 	}
2917 
2918 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2919 					    trace_ctx);
2920 	/*
2921 	 * If tracing is off, but we have triggers enabled
2922 	 * we still need to look at the event data. Use the temp_buffer
2923 	 * to store the trace event for the trigger to use. It's recursive
2924 	 * safe and will not be recorded anywhere.
2925 	 */
2926 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2927 		*current_rb = temp_buffer;
2928 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2929 						    trace_ctx);
2930 	}
2931 	return entry;
2932 }
2933 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2934 
2935 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2936 static DEFINE_MUTEX(tracepoint_printk_mutex);
2937 
2938 static void output_printk(struct trace_event_buffer *fbuffer)
2939 {
2940 	struct trace_event_call *event_call;
2941 	struct trace_event_file *file;
2942 	struct trace_event *event;
2943 	unsigned long flags;
2944 	struct trace_iterator *iter = tracepoint_print_iter;
2945 
2946 	/* We should never get here if iter is NULL */
2947 	if (WARN_ON_ONCE(!iter))
2948 		return;
2949 
2950 	event_call = fbuffer->trace_file->event_call;
2951 	if (!event_call || !event_call->event.funcs ||
2952 	    !event_call->event.funcs->trace)
2953 		return;
2954 
2955 	file = fbuffer->trace_file;
2956 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2957 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2958 	     !filter_match_preds(file->filter, fbuffer->entry)))
2959 		return;
2960 
2961 	event = &fbuffer->trace_file->event_call->event;
2962 
2963 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2964 	trace_seq_init(&iter->seq);
2965 	iter->ent = fbuffer->entry;
2966 	event_call->event.funcs->trace(iter, 0, event);
2967 	trace_seq_putc(&iter->seq, 0);
2968 	printk("%s", iter->seq.buffer);
2969 
2970 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2971 }
2972 
2973 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2974 			     void *buffer, size_t *lenp,
2975 			     loff_t *ppos)
2976 {
2977 	int save_tracepoint_printk;
2978 	int ret;
2979 
2980 	mutex_lock(&tracepoint_printk_mutex);
2981 	save_tracepoint_printk = tracepoint_printk;
2982 
2983 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2984 
2985 	/*
2986 	 * This will force exiting early, as tracepoint_printk
2987 	 * is always zero when tracepoint_printk_iter is not allocated
2988 	 */
2989 	if (!tracepoint_print_iter)
2990 		tracepoint_printk = 0;
2991 
2992 	if (save_tracepoint_printk == tracepoint_printk)
2993 		goto out;
2994 
2995 	if (tracepoint_printk)
2996 		static_key_enable(&tracepoint_printk_key.key);
2997 	else
2998 		static_key_disable(&tracepoint_printk_key.key);
2999 
3000  out:
3001 	mutex_unlock(&tracepoint_printk_mutex);
3002 
3003 	return ret;
3004 }
3005 
3006 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
3007 {
3008 	enum event_trigger_type tt = ETT_NONE;
3009 	struct trace_event_file *file = fbuffer->trace_file;
3010 
3011 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
3012 			fbuffer->entry, &tt))
3013 		goto discard;
3014 
3015 	if (static_key_false(&tracepoint_printk_key.key))
3016 		output_printk(fbuffer);
3017 
3018 	if (static_branch_unlikely(&trace_event_exports_enabled))
3019 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
3020 
3021 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
3022 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
3023 
3024 discard:
3025 	if (tt)
3026 		event_triggers_post_call(file, tt);
3027 
3028 }
3029 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3030 
3031 /*
3032  * Skip 3:
3033  *
3034  *   trace_buffer_unlock_commit_regs()
3035  *   trace_event_buffer_commit()
3036  *   trace_event_raw_event_xxx()
3037  */
3038 # define STACK_SKIP 3
3039 
3040 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3041 				     struct trace_buffer *buffer,
3042 				     struct ring_buffer_event *event,
3043 				     unsigned int trace_ctx,
3044 				     struct pt_regs *regs)
3045 {
3046 	__buffer_unlock_commit(buffer, event);
3047 
3048 	/*
3049 	 * If regs is not set, then skip the necessary functions.
3050 	 * Note, we can still get here via blktrace, wakeup tracer
3051 	 * and mmiotrace, but that's ok if they lose a function or
3052 	 * two. They are not that meaningful.
3053 	 */
3054 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3055 	ftrace_trace_userstack(tr, buffer, trace_ctx);
3056 }
3057 
3058 /*
3059  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3060  */
3061 void
3062 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3063 				   struct ring_buffer_event *event)
3064 {
3065 	__buffer_unlock_commit(buffer, event);
3066 }
3067 
3068 void
3069 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3070 	       parent_ip, unsigned int trace_ctx)
3071 {
3072 	struct trace_event_call *call = &event_function;
3073 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3074 	struct ring_buffer_event *event;
3075 	struct ftrace_entry *entry;
3076 
3077 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3078 					    trace_ctx);
3079 	if (!event)
3080 		return;
3081 	entry	= ring_buffer_event_data(event);
3082 	entry->ip			= ip;
3083 	entry->parent_ip		= parent_ip;
3084 
3085 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3086 		if (static_branch_unlikely(&trace_function_exports_enabled))
3087 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3088 		__buffer_unlock_commit(buffer, event);
3089 	}
3090 }
3091 
3092 #ifdef CONFIG_STACKTRACE
3093 
3094 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3095 #define FTRACE_KSTACK_NESTING	4
3096 
3097 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
3098 
3099 struct ftrace_stack {
3100 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
3101 };
3102 
3103 
3104 struct ftrace_stacks {
3105 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
3106 };
3107 
3108 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3109 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3110 
3111 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3112 				 unsigned int trace_ctx,
3113 				 int skip, struct pt_regs *regs)
3114 {
3115 	struct trace_event_call *call = &event_kernel_stack;
3116 	struct ring_buffer_event *event;
3117 	unsigned int size, nr_entries;
3118 	struct ftrace_stack *fstack;
3119 	struct stack_entry *entry;
3120 	int stackidx;
3121 	void *ptr;
3122 
3123 	/*
3124 	 * Add one, for this function and the call to save_stack_trace()
3125 	 * If regs is set, then these functions will not be in the way.
3126 	 */
3127 #ifndef CONFIG_UNWINDER_ORC
3128 	if (!regs)
3129 		skip++;
3130 #endif
3131 
3132 	preempt_disable_notrace();
3133 
3134 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3135 
3136 	/* This should never happen. If it does, yell once and skip */
3137 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3138 		goto out;
3139 
3140 	/*
3141 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3142 	 * interrupt will either see the value pre increment or post
3143 	 * increment. If the interrupt happens pre increment it will have
3144 	 * restored the counter when it returns.  We just need a barrier to
3145 	 * keep gcc from moving things around.
3146 	 */
3147 	barrier();
3148 
3149 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3150 	size = ARRAY_SIZE(fstack->calls);
3151 
3152 	if (regs) {
3153 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3154 						   size, skip);
3155 	} else {
3156 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3157 	}
3158 
3159 	size = nr_entries * sizeof(unsigned long);
3160 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3161 				    (sizeof(*entry) - sizeof(entry->caller)) + size,
3162 				    trace_ctx);
3163 	if (!event)
3164 		goto out;
3165 	ptr = ring_buffer_event_data(event);
3166 	entry = ptr;
3167 
3168 	/*
3169 	 * For backward compatibility reasons, the entry->caller is an
3170 	 * array of 8 slots to store the stack. This is also exported
3171 	 * to user space. The amount allocated on the ring buffer actually
3172 	 * holds enough for the stack specified by nr_entries. This will
3173 	 * go into the location of entry->caller. Due to string fortifiers
3174 	 * checking the size of the destination of memcpy() it triggers
3175 	 * when it detects that size is greater than 8. To hide this from
3176 	 * the fortifiers, we use "ptr" and pointer arithmetic to assign caller.
3177 	 *
3178 	 * The below is really just:
3179 	 *   memcpy(&entry->caller, fstack->calls, size);
3180 	 */
3181 	ptr += offsetof(typeof(*entry), caller);
3182 	memcpy(ptr, fstack->calls, size);
3183 
3184 	entry->size = nr_entries;
3185 
3186 	if (!call_filter_check_discard(call, entry, buffer, event))
3187 		__buffer_unlock_commit(buffer, event);
3188 
3189  out:
3190 	/* Again, don't let gcc optimize things here */
3191 	barrier();
3192 	__this_cpu_dec(ftrace_stack_reserve);
3193 	preempt_enable_notrace();
3194 
3195 }
3196 
3197 static inline void ftrace_trace_stack(struct trace_array *tr,
3198 				      struct trace_buffer *buffer,
3199 				      unsigned int trace_ctx,
3200 				      int skip, struct pt_regs *regs)
3201 {
3202 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3203 		return;
3204 
3205 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3206 }
3207 
3208 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3209 		   int skip)
3210 {
3211 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3212 
3213 	if (rcu_is_watching()) {
3214 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3215 		return;
3216 	}
3217 
3218 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3219 		return;
3220 
3221 	/*
3222 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3223 	 * but if the above rcu_is_watching() failed, then the NMI
3224 	 * triggered someplace critical, and ct_irq_enter() should
3225 	 * not be called from NMI.
3226 	 */
3227 	if (unlikely(in_nmi()))
3228 		return;
3229 
3230 	ct_irq_enter_irqson();
3231 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3232 	ct_irq_exit_irqson();
3233 }
3234 
3235 /**
3236  * trace_dump_stack - record a stack back trace in the trace buffer
3237  * @skip: Number of functions to skip (helper handlers)
3238  */
3239 void trace_dump_stack(int skip)
3240 {
3241 	if (tracing_disabled || tracing_selftest_running)
3242 		return;
3243 
3244 #ifndef CONFIG_UNWINDER_ORC
3245 	/* Skip 1 to skip this function. */
3246 	skip++;
3247 #endif
3248 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3249 			     tracing_gen_ctx(), skip, NULL);
3250 }
3251 EXPORT_SYMBOL_GPL(trace_dump_stack);
3252 
3253 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3254 static DEFINE_PER_CPU(int, user_stack_count);
3255 
3256 static void
3257 ftrace_trace_userstack(struct trace_array *tr,
3258 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3259 {
3260 	struct trace_event_call *call = &event_user_stack;
3261 	struct ring_buffer_event *event;
3262 	struct userstack_entry *entry;
3263 
3264 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3265 		return;
3266 
3267 	/*
3268 	 * NMIs can not handle page faults, even with fix ups.
3269 	 * The save user stack can (and often does) fault.
3270 	 */
3271 	if (unlikely(in_nmi()))
3272 		return;
3273 
3274 	/*
3275 	 * prevent recursion, since the user stack tracing may
3276 	 * trigger other kernel events.
3277 	 */
3278 	preempt_disable();
3279 	if (__this_cpu_read(user_stack_count))
3280 		goto out;
3281 
3282 	__this_cpu_inc(user_stack_count);
3283 
3284 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3285 					    sizeof(*entry), trace_ctx);
3286 	if (!event)
3287 		goto out_drop_count;
3288 	entry	= ring_buffer_event_data(event);
3289 
3290 	entry->tgid		= current->tgid;
3291 	memset(&entry->caller, 0, sizeof(entry->caller));
3292 
3293 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3294 	if (!call_filter_check_discard(call, entry, buffer, event))
3295 		__buffer_unlock_commit(buffer, event);
3296 
3297  out_drop_count:
3298 	__this_cpu_dec(user_stack_count);
3299  out:
3300 	preempt_enable();
3301 }
3302 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3303 static void ftrace_trace_userstack(struct trace_array *tr,
3304 				   struct trace_buffer *buffer,
3305 				   unsigned int trace_ctx)
3306 {
3307 }
3308 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3309 
3310 #endif /* CONFIG_STACKTRACE */
3311 
3312 static inline void
3313 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3314 			  unsigned long long delta)
3315 {
3316 	entry->bottom_delta_ts = delta & U32_MAX;
3317 	entry->top_delta_ts = (delta >> 32);
3318 }
3319 
3320 void trace_last_func_repeats(struct trace_array *tr,
3321 			     struct trace_func_repeats *last_info,
3322 			     unsigned int trace_ctx)
3323 {
3324 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3325 	struct func_repeats_entry *entry;
3326 	struct ring_buffer_event *event;
3327 	u64 delta;
3328 
3329 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3330 					    sizeof(*entry), trace_ctx);
3331 	if (!event)
3332 		return;
3333 
3334 	delta = ring_buffer_event_time_stamp(buffer, event) -
3335 		last_info->ts_last_call;
3336 
3337 	entry = ring_buffer_event_data(event);
3338 	entry->ip = last_info->ip;
3339 	entry->parent_ip = last_info->parent_ip;
3340 	entry->count = last_info->count;
3341 	func_repeats_set_delta_ts(entry, delta);
3342 
3343 	__buffer_unlock_commit(buffer, event);
3344 }
3345 
3346 /* created for use with alloc_percpu */
3347 struct trace_buffer_struct {
3348 	int nesting;
3349 	char buffer[4][TRACE_BUF_SIZE];
3350 };
3351 
3352 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3353 
3354 /*
3355  * This allows for lockless recording.  If we're nested too deeply, then
3356  * this returns NULL.
3357  */
3358 static char *get_trace_buf(void)
3359 {
3360 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3361 
3362 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3363 		return NULL;
3364 
3365 	buffer->nesting++;
3366 
3367 	/* Interrupts must see nesting incremented before we use the buffer */
3368 	barrier();
3369 	return &buffer->buffer[buffer->nesting - 1][0];
3370 }
3371 
3372 static void put_trace_buf(void)
3373 {
3374 	/* Don't let the decrement of nesting leak before this */
3375 	barrier();
3376 	this_cpu_dec(trace_percpu_buffer->nesting);
3377 }
3378 
3379 static int alloc_percpu_trace_buffer(void)
3380 {
3381 	struct trace_buffer_struct __percpu *buffers;
3382 
3383 	if (trace_percpu_buffer)
3384 		return 0;
3385 
3386 	buffers = alloc_percpu(struct trace_buffer_struct);
3387 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3388 		return -ENOMEM;
3389 
3390 	trace_percpu_buffer = buffers;
3391 	return 0;
3392 }
3393 
3394 static int buffers_allocated;
3395 
3396 void trace_printk_init_buffers(void)
3397 {
3398 	if (buffers_allocated)
3399 		return;
3400 
3401 	if (alloc_percpu_trace_buffer())
3402 		return;
3403 
3404 	/* trace_printk() is for debug use only. Don't use it in production. */
3405 
3406 	pr_warn("\n");
3407 	pr_warn("**********************************************************\n");
3408 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3409 	pr_warn("**                                                      **\n");
3410 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3411 	pr_warn("**                                                      **\n");
3412 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3413 	pr_warn("** unsafe for production use.                           **\n");
3414 	pr_warn("**                                                      **\n");
3415 	pr_warn("** If you see this message and you are not debugging    **\n");
3416 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3417 	pr_warn("**                                                      **\n");
3418 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3419 	pr_warn("**********************************************************\n");
3420 
3421 	/* Expand the buffers to set size */
3422 	tracing_update_buffers();
3423 
3424 	buffers_allocated = 1;
3425 
3426 	/*
3427 	 * trace_printk_init_buffers() can be called by modules.
3428 	 * If that happens, then we need to start cmdline recording
3429 	 * directly here. If the global_trace.buffer is already
3430 	 * allocated here, then this was called by module code.
3431 	 */
3432 	if (global_trace.array_buffer.buffer)
3433 		tracing_start_cmdline_record();
3434 }
3435 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3436 
3437 void trace_printk_start_comm(void)
3438 {
3439 	/* Start tracing comms if trace printk is set */
3440 	if (!buffers_allocated)
3441 		return;
3442 	tracing_start_cmdline_record();
3443 }
3444 
3445 static void trace_printk_start_stop_comm(int enabled)
3446 {
3447 	if (!buffers_allocated)
3448 		return;
3449 
3450 	if (enabled)
3451 		tracing_start_cmdline_record();
3452 	else
3453 		tracing_stop_cmdline_record();
3454 }
3455 
3456 /**
3457  * trace_vbprintk - write binary msg to tracing buffer
3458  * @ip:    The address of the caller
3459  * @fmt:   The string format to write to the buffer
3460  * @args:  Arguments for @fmt
3461  */
3462 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3463 {
3464 	struct trace_event_call *call = &event_bprint;
3465 	struct ring_buffer_event *event;
3466 	struct trace_buffer *buffer;
3467 	struct trace_array *tr = &global_trace;
3468 	struct bprint_entry *entry;
3469 	unsigned int trace_ctx;
3470 	char *tbuffer;
3471 	int len = 0, size;
3472 
3473 	if (unlikely(tracing_selftest_running || tracing_disabled))
3474 		return 0;
3475 
3476 	/* Don't pollute graph traces with trace_vprintk internals */
3477 	pause_graph_tracing();
3478 
3479 	trace_ctx = tracing_gen_ctx();
3480 	preempt_disable_notrace();
3481 
3482 	tbuffer = get_trace_buf();
3483 	if (!tbuffer) {
3484 		len = 0;
3485 		goto out_nobuffer;
3486 	}
3487 
3488 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3489 
3490 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3491 		goto out_put;
3492 
3493 	size = sizeof(*entry) + sizeof(u32) * len;
3494 	buffer = tr->array_buffer.buffer;
3495 	ring_buffer_nest_start(buffer);
3496 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3497 					    trace_ctx);
3498 	if (!event)
3499 		goto out;
3500 	entry = ring_buffer_event_data(event);
3501 	entry->ip			= ip;
3502 	entry->fmt			= fmt;
3503 
3504 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3505 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3506 		__buffer_unlock_commit(buffer, event);
3507 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3508 	}
3509 
3510 out:
3511 	ring_buffer_nest_end(buffer);
3512 out_put:
3513 	put_trace_buf();
3514 
3515 out_nobuffer:
3516 	preempt_enable_notrace();
3517 	unpause_graph_tracing();
3518 
3519 	return len;
3520 }
3521 EXPORT_SYMBOL_GPL(trace_vbprintk);
3522 
3523 __printf(3, 0)
3524 static int
3525 __trace_array_vprintk(struct trace_buffer *buffer,
3526 		      unsigned long ip, const char *fmt, va_list args)
3527 {
3528 	struct trace_event_call *call = &event_print;
3529 	struct ring_buffer_event *event;
3530 	int len = 0, size;
3531 	struct print_entry *entry;
3532 	unsigned int trace_ctx;
3533 	char *tbuffer;
3534 
3535 	if (tracing_disabled)
3536 		return 0;
3537 
3538 	/* Don't pollute graph traces with trace_vprintk internals */
3539 	pause_graph_tracing();
3540 
3541 	trace_ctx = tracing_gen_ctx();
3542 	preempt_disable_notrace();
3543 
3544 
3545 	tbuffer = get_trace_buf();
3546 	if (!tbuffer) {
3547 		len = 0;
3548 		goto out_nobuffer;
3549 	}
3550 
3551 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3552 
3553 	size = sizeof(*entry) + len + 1;
3554 	ring_buffer_nest_start(buffer);
3555 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3556 					    trace_ctx);
3557 	if (!event)
3558 		goto out;
3559 	entry = ring_buffer_event_data(event);
3560 	entry->ip = ip;
3561 
3562 	memcpy(&entry->buf, tbuffer, len + 1);
3563 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3564 		__buffer_unlock_commit(buffer, event);
3565 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3566 	}
3567 
3568 out:
3569 	ring_buffer_nest_end(buffer);
3570 	put_trace_buf();
3571 
3572 out_nobuffer:
3573 	preempt_enable_notrace();
3574 	unpause_graph_tracing();
3575 
3576 	return len;
3577 }
3578 
3579 __printf(3, 0)
3580 int trace_array_vprintk(struct trace_array *tr,
3581 			unsigned long ip, const char *fmt, va_list args)
3582 {
3583 	if (tracing_selftest_running && tr == &global_trace)
3584 		return 0;
3585 
3586 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3587 }
3588 
3589 /**
3590  * trace_array_printk - Print a message to a specific instance
3591  * @tr: The instance trace_array descriptor
3592  * @ip: The instruction pointer that this is called from.
3593  * @fmt: The format to print (printf format)
3594  *
3595  * If a subsystem sets up its own instance, they have the right to
3596  * printk strings into their tracing instance buffer using this
3597  * function. Note, this function will not write into the top level
3598  * buffer (use trace_printk() for that), as writing into the top level
3599  * buffer should only have events that can be individually disabled.
3600  * trace_printk() is only used for debugging a kernel, and should not
3601  * be ever incorporated in normal use.
3602  *
3603  * trace_array_printk() can be used, as it will not add noise to the
3604  * top level tracing buffer.
3605  *
3606  * Note, trace_array_init_printk() must be called on @tr before this
3607  * can be used.
3608  */
3609 __printf(3, 0)
3610 int trace_array_printk(struct trace_array *tr,
3611 		       unsigned long ip, const char *fmt, ...)
3612 {
3613 	int ret;
3614 	va_list ap;
3615 
3616 	if (!tr)
3617 		return -ENOENT;
3618 
3619 	/* This is only allowed for created instances */
3620 	if (tr == &global_trace)
3621 		return 0;
3622 
3623 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3624 		return 0;
3625 
3626 	va_start(ap, fmt);
3627 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3628 	va_end(ap);
3629 	return ret;
3630 }
3631 EXPORT_SYMBOL_GPL(trace_array_printk);
3632 
3633 /**
3634  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3635  * @tr: The trace array to initialize the buffers for
3636  *
3637  * As trace_array_printk() only writes into instances, they are OK to
3638  * have in the kernel (unlike trace_printk()). This needs to be called
3639  * before trace_array_printk() can be used on a trace_array.
3640  */
3641 int trace_array_init_printk(struct trace_array *tr)
3642 {
3643 	if (!tr)
3644 		return -ENOENT;
3645 
3646 	/* This is only allowed for created instances */
3647 	if (tr == &global_trace)
3648 		return -EINVAL;
3649 
3650 	return alloc_percpu_trace_buffer();
3651 }
3652 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3653 
3654 __printf(3, 4)
3655 int trace_array_printk_buf(struct trace_buffer *buffer,
3656 			   unsigned long ip, const char *fmt, ...)
3657 {
3658 	int ret;
3659 	va_list ap;
3660 
3661 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3662 		return 0;
3663 
3664 	va_start(ap, fmt);
3665 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3666 	va_end(ap);
3667 	return ret;
3668 }
3669 
3670 __printf(2, 0)
3671 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3672 {
3673 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3674 }
3675 EXPORT_SYMBOL_GPL(trace_vprintk);
3676 
3677 static void trace_iterator_increment(struct trace_iterator *iter)
3678 {
3679 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3680 
3681 	iter->idx++;
3682 	if (buf_iter)
3683 		ring_buffer_iter_advance(buf_iter);
3684 }
3685 
3686 static struct trace_entry *
3687 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3688 		unsigned long *lost_events)
3689 {
3690 	struct ring_buffer_event *event;
3691 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3692 
3693 	if (buf_iter) {
3694 		event = ring_buffer_iter_peek(buf_iter, ts);
3695 		if (lost_events)
3696 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3697 				(unsigned long)-1 : 0;
3698 	} else {
3699 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3700 					 lost_events);
3701 	}
3702 
3703 	if (event) {
3704 		iter->ent_size = ring_buffer_event_length(event);
3705 		return ring_buffer_event_data(event);
3706 	}
3707 	iter->ent_size = 0;
3708 	return NULL;
3709 }
3710 
3711 static struct trace_entry *
3712 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3713 		  unsigned long *missing_events, u64 *ent_ts)
3714 {
3715 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3716 	struct trace_entry *ent, *next = NULL;
3717 	unsigned long lost_events = 0, next_lost = 0;
3718 	int cpu_file = iter->cpu_file;
3719 	u64 next_ts = 0, ts;
3720 	int next_cpu = -1;
3721 	int next_size = 0;
3722 	int cpu;
3723 
3724 	/*
3725 	 * If we are in a per_cpu trace file, don't bother by iterating over
3726 	 * all cpu and peek directly.
3727 	 */
3728 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3729 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3730 			return NULL;
3731 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3732 		if (ent_cpu)
3733 			*ent_cpu = cpu_file;
3734 
3735 		return ent;
3736 	}
3737 
3738 	for_each_tracing_cpu(cpu) {
3739 
3740 		if (ring_buffer_empty_cpu(buffer, cpu))
3741 			continue;
3742 
3743 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3744 
3745 		/*
3746 		 * Pick the entry with the smallest timestamp:
3747 		 */
3748 		if (ent && (!next || ts < next_ts)) {
3749 			next = ent;
3750 			next_cpu = cpu;
3751 			next_ts = ts;
3752 			next_lost = lost_events;
3753 			next_size = iter->ent_size;
3754 		}
3755 	}
3756 
3757 	iter->ent_size = next_size;
3758 
3759 	if (ent_cpu)
3760 		*ent_cpu = next_cpu;
3761 
3762 	if (ent_ts)
3763 		*ent_ts = next_ts;
3764 
3765 	if (missing_events)
3766 		*missing_events = next_lost;
3767 
3768 	return next;
3769 }
3770 
3771 #define STATIC_FMT_BUF_SIZE	128
3772 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3773 
3774 char *trace_iter_expand_format(struct trace_iterator *iter)
3775 {
3776 	char *tmp;
3777 
3778 	/*
3779 	 * iter->tr is NULL when used with tp_printk, which makes
3780 	 * this get called where it is not safe to call krealloc().
3781 	 */
3782 	if (!iter->tr || iter->fmt == static_fmt_buf)
3783 		return NULL;
3784 
3785 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3786 		       GFP_KERNEL);
3787 	if (tmp) {
3788 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3789 		iter->fmt = tmp;
3790 	}
3791 
3792 	return tmp;
3793 }
3794 
3795 /* Returns true if the string is safe to dereference from an event */
3796 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3797 			   bool star, int len)
3798 {
3799 	unsigned long addr = (unsigned long)str;
3800 	struct trace_event *trace_event;
3801 	struct trace_event_call *event;
3802 
3803 	/* Ignore strings with no length */
3804 	if (star && !len)
3805 		return true;
3806 
3807 	/* OK if part of the event data */
3808 	if ((addr >= (unsigned long)iter->ent) &&
3809 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3810 		return true;
3811 
3812 	/* OK if part of the temp seq buffer */
3813 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3814 	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3815 		return true;
3816 
3817 	/* Core rodata can not be freed */
3818 	if (is_kernel_rodata(addr))
3819 		return true;
3820 
3821 	if (trace_is_tracepoint_string(str))
3822 		return true;
3823 
3824 	/*
3825 	 * Now this could be a module event, referencing core module
3826 	 * data, which is OK.
3827 	 */
3828 	if (!iter->ent)
3829 		return false;
3830 
3831 	trace_event = ftrace_find_event(iter->ent->type);
3832 	if (!trace_event)
3833 		return false;
3834 
3835 	event = container_of(trace_event, struct trace_event_call, event);
3836 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3837 		return false;
3838 
3839 	/* Would rather have rodata, but this will suffice */
3840 	if (within_module_core(addr, event->module))
3841 		return true;
3842 
3843 	return false;
3844 }
3845 
3846 static const char *show_buffer(struct trace_seq *s)
3847 {
3848 	struct seq_buf *seq = &s->seq;
3849 
3850 	seq_buf_terminate(seq);
3851 
3852 	return seq->buffer;
3853 }
3854 
3855 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3856 
3857 static int test_can_verify_check(const char *fmt, ...)
3858 {
3859 	char buf[16];
3860 	va_list ap;
3861 	int ret;
3862 
3863 	/*
3864 	 * The verifier is dependent on vsnprintf() modifies the va_list
3865 	 * passed to it, where it is sent as a reference. Some architectures
3866 	 * (like x86_32) passes it by value, which means that vsnprintf()
3867 	 * does not modify the va_list passed to it, and the verifier
3868 	 * would then need to be able to understand all the values that
3869 	 * vsnprintf can use. If it is passed by value, then the verifier
3870 	 * is disabled.
3871 	 */
3872 	va_start(ap, fmt);
3873 	vsnprintf(buf, 16, "%d", ap);
3874 	ret = va_arg(ap, int);
3875 	va_end(ap);
3876 
3877 	return ret;
3878 }
3879 
3880 static void test_can_verify(void)
3881 {
3882 	if (!test_can_verify_check("%d %d", 0, 1)) {
3883 		pr_info("trace event string verifier disabled\n");
3884 		static_branch_inc(&trace_no_verify);
3885 	}
3886 }
3887 
3888 /**
3889  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3890  * @iter: The iterator that holds the seq buffer and the event being printed
3891  * @fmt: The format used to print the event
3892  * @ap: The va_list holding the data to print from @fmt.
3893  *
3894  * This writes the data into the @iter->seq buffer using the data from
3895  * @fmt and @ap. If the format has a %s, then the source of the string
3896  * is examined to make sure it is safe to print, otherwise it will
3897  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3898  * pointer.
3899  */
3900 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3901 			 va_list ap)
3902 {
3903 	const char *p = fmt;
3904 	const char *str;
3905 	int i, j;
3906 
3907 	if (WARN_ON_ONCE(!fmt))
3908 		return;
3909 
3910 	if (static_branch_unlikely(&trace_no_verify))
3911 		goto print;
3912 
3913 	/* Don't bother checking when doing a ftrace_dump() */
3914 	if (iter->fmt == static_fmt_buf)
3915 		goto print;
3916 
3917 	while (*p) {
3918 		bool star = false;
3919 		int len = 0;
3920 
3921 		j = 0;
3922 
3923 		/* We only care about %s and variants */
3924 		for (i = 0; p[i]; i++) {
3925 			if (i + 1 >= iter->fmt_size) {
3926 				/*
3927 				 * If we can't expand the copy buffer,
3928 				 * just print it.
3929 				 */
3930 				if (!trace_iter_expand_format(iter))
3931 					goto print;
3932 			}
3933 
3934 			if (p[i] == '\\' && p[i+1]) {
3935 				i++;
3936 				continue;
3937 			}
3938 			if (p[i] == '%') {
3939 				/* Need to test cases like %08.*s */
3940 				for (j = 1; p[i+j]; j++) {
3941 					if (isdigit(p[i+j]) ||
3942 					    p[i+j] == '.')
3943 						continue;
3944 					if (p[i+j] == '*') {
3945 						star = true;
3946 						continue;
3947 					}
3948 					break;
3949 				}
3950 				if (p[i+j] == 's')
3951 					break;
3952 				star = false;
3953 			}
3954 			j = 0;
3955 		}
3956 		/* If no %s found then just print normally */
3957 		if (!p[i])
3958 			break;
3959 
3960 		/* Copy up to the %s, and print that */
3961 		strncpy(iter->fmt, p, i);
3962 		iter->fmt[i] = '\0';
3963 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3964 
3965 		/*
3966 		 * If iter->seq is full, the above call no longer guarantees
3967 		 * that ap is in sync with fmt processing, and further calls
3968 		 * to va_arg() can return wrong positional arguments.
3969 		 *
3970 		 * Ensure that ap is no longer used in this case.
3971 		 */
3972 		if (iter->seq.full) {
3973 			p = "";
3974 			break;
3975 		}
3976 
3977 		if (star)
3978 			len = va_arg(ap, int);
3979 
3980 		/* The ap now points to the string data of the %s */
3981 		str = va_arg(ap, const char *);
3982 
3983 		/*
3984 		 * If you hit this warning, it is likely that the
3985 		 * trace event in question used %s on a string that
3986 		 * was saved at the time of the event, but may not be
3987 		 * around when the trace is read. Use __string(),
3988 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3989 		 * instead. See samples/trace_events/trace-events-sample.h
3990 		 * for reference.
3991 		 */
3992 		if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3993 			      "fmt: '%s' current_buffer: '%s'",
3994 			      fmt, show_buffer(&iter->seq))) {
3995 			int ret;
3996 
3997 			/* Try to safely read the string */
3998 			if (star) {
3999 				if (len + 1 > iter->fmt_size)
4000 					len = iter->fmt_size - 1;
4001 				if (len < 0)
4002 					len = 0;
4003 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
4004 				iter->fmt[len] = 0;
4005 				star = false;
4006 			} else {
4007 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
4008 								  iter->fmt_size);
4009 			}
4010 			if (ret < 0)
4011 				trace_seq_printf(&iter->seq, "(0x%px)", str);
4012 			else
4013 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
4014 						 str, iter->fmt);
4015 			str = "[UNSAFE-MEMORY]";
4016 			strcpy(iter->fmt, "%s");
4017 		} else {
4018 			strncpy(iter->fmt, p + i, j + 1);
4019 			iter->fmt[j+1] = '\0';
4020 		}
4021 		if (star)
4022 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
4023 		else
4024 			trace_seq_printf(&iter->seq, iter->fmt, str);
4025 
4026 		p += i + j + 1;
4027 	}
4028  print:
4029 	if (*p)
4030 		trace_seq_vprintf(&iter->seq, p, ap);
4031 }
4032 
4033 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
4034 {
4035 	const char *p, *new_fmt;
4036 	char *q;
4037 
4038 	if (WARN_ON_ONCE(!fmt))
4039 		return fmt;
4040 
4041 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
4042 		return fmt;
4043 
4044 	p = fmt;
4045 	new_fmt = q = iter->fmt;
4046 	while (*p) {
4047 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
4048 			if (!trace_iter_expand_format(iter))
4049 				return fmt;
4050 
4051 			q += iter->fmt - new_fmt;
4052 			new_fmt = iter->fmt;
4053 		}
4054 
4055 		*q++ = *p++;
4056 
4057 		/* Replace %p with %px */
4058 		if (p[-1] == '%') {
4059 			if (p[0] == '%') {
4060 				*q++ = *p++;
4061 			} else if (p[0] == 'p' && !isalnum(p[1])) {
4062 				*q++ = *p++;
4063 				*q++ = 'x';
4064 			}
4065 		}
4066 	}
4067 	*q = '\0';
4068 
4069 	return new_fmt;
4070 }
4071 
4072 #define STATIC_TEMP_BUF_SIZE	128
4073 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4074 
4075 /* Find the next real entry, without updating the iterator itself */
4076 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4077 					  int *ent_cpu, u64 *ent_ts)
4078 {
4079 	/* __find_next_entry will reset ent_size */
4080 	int ent_size = iter->ent_size;
4081 	struct trace_entry *entry;
4082 
4083 	/*
4084 	 * If called from ftrace_dump(), then the iter->temp buffer
4085 	 * will be the static_temp_buf and not created from kmalloc.
4086 	 * If the entry size is greater than the buffer, we can
4087 	 * not save it. Just return NULL in that case. This is only
4088 	 * used to add markers when two consecutive events' time
4089 	 * stamps have a large delta. See trace_print_lat_context()
4090 	 */
4091 	if (iter->temp == static_temp_buf &&
4092 	    STATIC_TEMP_BUF_SIZE < ent_size)
4093 		return NULL;
4094 
4095 	/*
4096 	 * The __find_next_entry() may call peek_next_entry(), which may
4097 	 * call ring_buffer_peek() that may make the contents of iter->ent
4098 	 * undefined. Need to copy iter->ent now.
4099 	 */
4100 	if (iter->ent && iter->ent != iter->temp) {
4101 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4102 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4103 			void *temp;
4104 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
4105 			if (!temp)
4106 				return NULL;
4107 			kfree(iter->temp);
4108 			iter->temp = temp;
4109 			iter->temp_size = iter->ent_size;
4110 		}
4111 		memcpy(iter->temp, iter->ent, iter->ent_size);
4112 		iter->ent = iter->temp;
4113 	}
4114 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4115 	/* Put back the original ent_size */
4116 	iter->ent_size = ent_size;
4117 
4118 	return entry;
4119 }
4120 
4121 /* Find the next real entry, and increment the iterator to the next entry */
4122 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4123 {
4124 	iter->ent = __find_next_entry(iter, &iter->cpu,
4125 				      &iter->lost_events, &iter->ts);
4126 
4127 	if (iter->ent)
4128 		trace_iterator_increment(iter);
4129 
4130 	return iter->ent ? iter : NULL;
4131 }
4132 
4133 static void trace_consume(struct trace_iterator *iter)
4134 {
4135 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4136 			    &iter->lost_events);
4137 }
4138 
4139 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4140 {
4141 	struct trace_iterator *iter = m->private;
4142 	int i = (int)*pos;
4143 	void *ent;
4144 
4145 	WARN_ON_ONCE(iter->leftover);
4146 
4147 	(*pos)++;
4148 
4149 	/* can't go backwards */
4150 	if (iter->idx > i)
4151 		return NULL;
4152 
4153 	if (iter->idx < 0)
4154 		ent = trace_find_next_entry_inc(iter);
4155 	else
4156 		ent = iter;
4157 
4158 	while (ent && iter->idx < i)
4159 		ent = trace_find_next_entry_inc(iter);
4160 
4161 	iter->pos = *pos;
4162 
4163 	return ent;
4164 }
4165 
4166 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4167 {
4168 	struct ring_buffer_iter *buf_iter;
4169 	unsigned long entries = 0;
4170 	u64 ts;
4171 
4172 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4173 
4174 	buf_iter = trace_buffer_iter(iter, cpu);
4175 	if (!buf_iter)
4176 		return;
4177 
4178 	ring_buffer_iter_reset(buf_iter);
4179 
4180 	/*
4181 	 * We could have the case with the max latency tracers
4182 	 * that a reset never took place on a cpu. This is evident
4183 	 * by the timestamp being before the start of the buffer.
4184 	 */
4185 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4186 		if (ts >= iter->array_buffer->time_start)
4187 			break;
4188 		entries++;
4189 		ring_buffer_iter_advance(buf_iter);
4190 	}
4191 
4192 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4193 }
4194 
4195 /*
4196  * The current tracer is copied to avoid a global locking
4197  * all around.
4198  */
4199 static void *s_start(struct seq_file *m, loff_t *pos)
4200 {
4201 	struct trace_iterator *iter = m->private;
4202 	struct trace_array *tr = iter->tr;
4203 	int cpu_file = iter->cpu_file;
4204 	void *p = NULL;
4205 	loff_t l = 0;
4206 	int cpu;
4207 
4208 	/*
4209 	 * copy the tracer to avoid using a global lock all around.
4210 	 * iter->trace is a copy of current_trace, the pointer to the
4211 	 * name may be used instead of a strcmp(), as iter->trace->name
4212 	 * will point to the same string as current_trace->name.
4213 	 */
4214 	mutex_lock(&trace_types_lock);
4215 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4216 		*iter->trace = *tr->current_trace;
4217 	mutex_unlock(&trace_types_lock);
4218 
4219 #ifdef CONFIG_TRACER_MAX_TRACE
4220 	if (iter->snapshot && iter->trace->use_max_tr)
4221 		return ERR_PTR(-EBUSY);
4222 #endif
4223 
4224 	if (*pos != iter->pos) {
4225 		iter->ent = NULL;
4226 		iter->cpu = 0;
4227 		iter->idx = -1;
4228 
4229 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4230 			for_each_tracing_cpu(cpu)
4231 				tracing_iter_reset(iter, cpu);
4232 		} else
4233 			tracing_iter_reset(iter, cpu_file);
4234 
4235 		iter->leftover = 0;
4236 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4237 			;
4238 
4239 	} else {
4240 		/*
4241 		 * If we overflowed the seq_file before, then we want
4242 		 * to just reuse the trace_seq buffer again.
4243 		 */
4244 		if (iter->leftover)
4245 			p = iter;
4246 		else {
4247 			l = *pos - 1;
4248 			p = s_next(m, p, &l);
4249 		}
4250 	}
4251 
4252 	trace_event_read_lock();
4253 	trace_access_lock(cpu_file);
4254 	return p;
4255 }
4256 
4257 static void s_stop(struct seq_file *m, void *p)
4258 {
4259 	struct trace_iterator *iter = m->private;
4260 
4261 #ifdef CONFIG_TRACER_MAX_TRACE
4262 	if (iter->snapshot && iter->trace->use_max_tr)
4263 		return;
4264 #endif
4265 
4266 	trace_access_unlock(iter->cpu_file);
4267 	trace_event_read_unlock();
4268 }
4269 
4270 static void
4271 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4272 		      unsigned long *entries, int cpu)
4273 {
4274 	unsigned long count;
4275 
4276 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4277 	/*
4278 	 * If this buffer has skipped entries, then we hold all
4279 	 * entries for the trace and we need to ignore the
4280 	 * ones before the time stamp.
4281 	 */
4282 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4283 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4284 		/* total is the same as the entries */
4285 		*total = count;
4286 	} else
4287 		*total = count +
4288 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4289 	*entries = count;
4290 }
4291 
4292 static void
4293 get_total_entries(struct array_buffer *buf,
4294 		  unsigned long *total, unsigned long *entries)
4295 {
4296 	unsigned long t, e;
4297 	int cpu;
4298 
4299 	*total = 0;
4300 	*entries = 0;
4301 
4302 	for_each_tracing_cpu(cpu) {
4303 		get_total_entries_cpu(buf, &t, &e, cpu);
4304 		*total += t;
4305 		*entries += e;
4306 	}
4307 }
4308 
4309 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4310 {
4311 	unsigned long total, entries;
4312 
4313 	if (!tr)
4314 		tr = &global_trace;
4315 
4316 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4317 
4318 	return entries;
4319 }
4320 
4321 unsigned long trace_total_entries(struct trace_array *tr)
4322 {
4323 	unsigned long total, entries;
4324 
4325 	if (!tr)
4326 		tr = &global_trace;
4327 
4328 	get_total_entries(&tr->array_buffer, &total, &entries);
4329 
4330 	return entries;
4331 }
4332 
4333 static void print_lat_help_header(struct seq_file *m)
4334 {
4335 	seq_puts(m, "#                    _------=> CPU#            \n"
4336 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4337 		    "#                  | / _----=> need-resched    \n"
4338 		    "#                  || / _---=> hardirq/softirq \n"
4339 		    "#                  ||| / _--=> preempt-depth   \n"
4340 		    "#                  |||| / _-=> migrate-disable \n"
4341 		    "#                  ||||| /     delay           \n"
4342 		    "#  cmd     pid     |||||| time  |   caller     \n"
4343 		    "#     \\   /        ||||||  \\    |    /       \n");
4344 }
4345 
4346 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4347 {
4348 	unsigned long total;
4349 	unsigned long entries;
4350 
4351 	get_total_entries(buf, &total, &entries);
4352 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4353 		   entries, total, num_online_cpus());
4354 	seq_puts(m, "#\n");
4355 }
4356 
4357 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4358 				   unsigned int flags)
4359 {
4360 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4361 
4362 	print_event_info(buf, m);
4363 
4364 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4365 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4366 }
4367 
4368 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4369 				       unsigned int flags)
4370 {
4371 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4372 	static const char space[] = "            ";
4373 	int prec = tgid ? 12 : 2;
4374 
4375 	print_event_info(buf, m);
4376 
4377 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4378 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4379 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4380 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4381 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4382 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4383 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4384 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4385 }
4386 
4387 void
4388 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4389 {
4390 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4391 	struct array_buffer *buf = iter->array_buffer;
4392 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4393 	struct tracer *type = iter->trace;
4394 	unsigned long entries;
4395 	unsigned long total;
4396 	const char *name = type->name;
4397 
4398 	get_total_entries(buf, &total, &entries);
4399 
4400 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4401 		   name, UTS_RELEASE);
4402 	seq_puts(m, "# -----------------------------------"
4403 		 "---------------------------------\n");
4404 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4405 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4406 		   nsecs_to_usecs(data->saved_latency),
4407 		   entries,
4408 		   total,
4409 		   buf->cpu,
4410 		   preempt_model_none()      ? "server" :
4411 		   preempt_model_voluntary() ? "desktop" :
4412 		   preempt_model_full()      ? "preempt" :
4413 		   preempt_model_rt()        ? "preempt_rt" :
4414 		   "unknown",
4415 		   /* These are reserved for later use */
4416 		   0, 0, 0, 0);
4417 #ifdef CONFIG_SMP
4418 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4419 #else
4420 	seq_puts(m, ")\n");
4421 #endif
4422 	seq_puts(m, "#    -----------------\n");
4423 	seq_printf(m, "#    | task: %.16s-%d "
4424 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4425 		   data->comm, data->pid,
4426 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4427 		   data->policy, data->rt_priority);
4428 	seq_puts(m, "#    -----------------\n");
4429 
4430 	if (data->critical_start) {
4431 		seq_puts(m, "#  => started at: ");
4432 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4433 		trace_print_seq(m, &iter->seq);
4434 		seq_puts(m, "\n#  => ended at:   ");
4435 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4436 		trace_print_seq(m, &iter->seq);
4437 		seq_puts(m, "\n#\n");
4438 	}
4439 
4440 	seq_puts(m, "#\n");
4441 }
4442 
4443 static void test_cpu_buff_start(struct trace_iterator *iter)
4444 {
4445 	struct trace_seq *s = &iter->seq;
4446 	struct trace_array *tr = iter->tr;
4447 
4448 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4449 		return;
4450 
4451 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4452 		return;
4453 
4454 	if (cpumask_available(iter->started) &&
4455 	    cpumask_test_cpu(iter->cpu, iter->started))
4456 		return;
4457 
4458 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4459 		return;
4460 
4461 	if (cpumask_available(iter->started))
4462 		cpumask_set_cpu(iter->cpu, iter->started);
4463 
4464 	/* Don't print started cpu buffer for the first entry of the trace */
4465 	if (iter->idx > 1)
4466 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4467 				iter->cpu);
4468 }
4469 
4470 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4471 {
4472 	struct trace_array *tr = iter->tr;
4473 	struct trace_seq *s = &iter->seq;
4474 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4475 	struct trace_entry *entry;
4476 	struct trace_event *event;
4477 
4478 	entry = iter->ent;
4479 
4480 	test_cpu_buff_start(iter);
4481 
4482 	event = ftrace_find_event(entry->type);
4483 
4484 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4485 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4486 			trace_print_lat_context(iter);
4487 		else
4488 			trace_print_context(iter);
4489 	}
4490 
4491 	if (trace_seq_has_overflowed(s))
4492 		return TRACE_TYPE_PARTIAL_LINE;
4493 
4494 	if (event) {
4495 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4496 			return print_event_fields(iter, event);
4497 		return event->funcs->trace(iter, sym_flags, event);
4498 	}
4499 
4500 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4501 
4502 	return trace_handle_return(s);
4503 }
4504 
4505 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4506 {
4507 	struct trace_array *tr = iter->tr;
4508 	struct trace_seq *s = &iter->seq;
4509 	struct trace_entry *entry;
4510 	struct trace_event *event;
4511 
4512 	entry = iter->ent;
4513 
4514 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4515 		trace_seq_printf(s, "%d %d %llu ",
4516 				 entry->pid, iter->cpu, iter->ts);
4517 
4518 	if (trace_seq_has_overflowed(s))
4519 		return TRACE_TYPE_PARTIAL_LINE;
4520 
4521 	event = ftrace_find_event(entry->type);
4522 	if (event)
4523 		return event->funcs->raw(iter, 0, event);
4524 
4525 	trace_seq_printf(s, "%d ?\n", entry->type);
4526 
4527 	return trace_handle_return(s);
4528 }
4529 
4530 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4531 {
4532 	struct trace_array *tr = iter->tr;
4533 	struct trace_seq *s = &iter->seq;
4534 	unsigned char newline = '\n';
4535 	struct trace_entry *entry;
4536 	struct trace_event *event;
4537 
4538 	entry = iter->ent;
4539 
4540 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4541 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4542 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4543 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4544 		if (trace_seq_has_overflowed(s))
4545 			return TRACE_TYPE_PARTIAL_LINE;
4546 	}
4547 
4548 	event = ftrace_find_event(entry->type);
4549 	if (event) {
4550 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4551 		if (ret != TRACE_TYPE_HANDLED)
4552 			return ret;
4553 	}
4554 
4555 	SEQ_PUT_FIELD(s, newline);
4556 
4557 	return trace_handle_return(s);
4558 }
4559 
4560 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4561 {
4562 	struct trace_array *tr = iter->tr;
4563 	struct trace_seq *s = &iter->seq;
4564 	struct trace_entry *entry;
4565 	struct trace_event *event;
4566 
4567 	entry = iter->ent;
4568 
4569 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4570 		SEQ_PUT_FIELD(s, entry->pid);
4571 		SEQ_PUT_FIELD(s, iter->cpu);
4572 		SEQ_PUT_FIELD(s, iter->ts);
4573 		if (trace_seq_has_overflowed(s))
4574 			return TRACE_TYPE_PARTIAL_LINE;
4575 	}
4576 
4577 	event = ftrace_find_event(entry->type);
4578 	return event ? event->funcs->binary(iter, 0, event) :
4579 		TRACE_TYPE_HANDLED;
4580 }
4581 
4582 int trace_empty(struct trace_iterator *iter)
4583 {
4584 	struct ring_buffer_iter *buf_iter;
4585 	int cpu;
4586 
4587 	/* If we are looking at one CPU buffer, only check that one */
4588 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4589 		cpu = iter->cpu_file;
4590 		buf_iter = trace_buffer_iter(iter, cpu);
4591 		if (buf_iter) {
4592 			if (!ring_buffer_iter_empty(buf_iter))
4593 				return 0;
4594 		} else {
4595 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4596 				return 0;
4597 		}
4598 		return 1;
4599 	}
4600 
4601 	for_each_tracing_cpu(cpu) {
4602 		buf_iter = trace_buffer_iter(iter, cpu);
4603 		if (buf_iter) {
4604 			if (!ring_buffer_iter_empty(buf_iter))
4605 				return 0;
4606 		} else {
4607 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4608 				return 0;
4609 		}
4610 	}
4611 
4612 	return 1;
4613 }
4614 
4615 /*  Called with trace_event_read_lock() held. */
4616 enum print_line_t print_trace_line(struct trace_iterator *iter)
4617 {
4618 	struct trace_array *tr = iter->tr;
4619 	unsigned long trace_flags = tr->trace_flags;
4620 	enum print_line_t ret;
4621 
4622 	if (iter->lost_events) {
4623 		if (iter->lost_events == (unsigned long)-1)
4624 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4625 					 iter->cpu);
4626 		else
4627 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4628 					 iter->cpu, iter->lost_events);
4629 		if (trace_seq_has_overflowed(&iter->seq))
4630 			return TRACE_TYPE_PARTIAL_LINE;
4631 	}
4632 
4633 	if (iter->trace && iter->trace->print_line) {
4634 		ret = iter->trace->print_line(iter);
4635 		if (ret != TRACE_TYPE_UNHANDLED)
4636 			return ret;
4637 	}
4638 
4639 	if (iter->ent->type == TRACE_BPUTS &&
4640 			trace_flags & TRACE_ITER_PRINTK &&
4641 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4642 		return trace_print_bputs_msg_only(iter);
4643 
4644 	if (iter->ent->type == TRACE_BPRINT &&
4645 			trace_flags & TRACE_ITER_PRINTK &&
4646 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4647 		return trace_print_bprintk_msg_only(iter);
4648 
4649 	if (iter->ent->type == TRACE_PRINT &&
4650 			trace_flags & TRACE_ITER_PRINTK &&
4651 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4652 		return trace_print_printk_msg_only(iter);
4653 
4654 	if (trace_flags & TRACE_ITER_BIN)
4655 		return print_bin_fmt(iter);
4656 
4657 	if (trace_flags & TRACE_ITER_HEX)
4658 		return print_hex_fmt(iter);
4659 
4660 	if (trace_flags & TRACE_ITER_RAW)
4661 		return print_raw_fmt(iter);
4662 
4663 	return print_trace_fmt(iter);
4664 }
4665 
4666 void trace_latency_header(struct seq_file *m)
4667 {
4668 	struct trace_iterator *iter = m->private;
4669 	struct trace_array *tr = iter->tr;
4670 
4671 	/* print nothing if the buffers are empty */
4672 	if (trace_empty(iter))
4673 		return;
4674 
4675 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4676 		print_trace_header(m, iter);
4677 
4678 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4679 		print_lat_help_header(m);
4680 }
4681 
4682 void trace_default_header(struct seq_file *m)
4683 {
4684 	struct trace_iterator *iter = m->private;
4685 	struct trace_array *tr = iter->tr;
4686 	unsigned long trace_flags = tr->trace_flags;
4687 
4688 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4689 		return;
4690 
4691 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4692 		/* print nothing if the buffers are empty */
4693 		if (trace_empty(iter))
4694 			return;
4695 		print_trace_header(m, iter);
4696 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4697 			print_lat_help_header(m);
4698 	} else {
4699 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4700 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4701 				print_func_help_header_irq(iter->array_buffer,
4702 							   m, trace_flags);
4703 			else
4704 				print_func_help_header(iter->array_buffer, m,
4705 						       trace_flags);
4706 		}
4707 	}
4708 }
4709 
4710 static void test_ftrace_alive(struct seq_file *m)
4711 {
4712 	if (!ftrace_is_dead())
4713 		return;
4714 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4715 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4716 }
4717 
4718 #ifdef CONFIG_TRACER_MAX_TRACE
4719 static void show_snapshot_main_help(struct seq_file *m)
4720 {
4721 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4722 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4723 		    "#                      Takes a snapshot of the main buffer.\n"
4724 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4725 		    "#                      (Doesn't have to be '2' works with any number that\n"
4726 		    "#                       is not a '0' or '1')\n");
4727 }
4728 
4729 static void show_snapshot_percpu_help(struct seq_file *m)
4730 {
4731 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4732 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4733 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4734 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4735 #else
4736 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4737 		    "#                     Must use main snapshot file to allocate.\n");
4738 #endif
4739 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4740 		    "#                      (Doesn't have to be '2' works with any number that\n"
4741 		    "#                       is not a '0' or '1')\n");
4742 }
4743 
4744 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4745 {
4746 	if (iter->tr->allocated_snapshot)
4747 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4748 	else
4749 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4750 
4751 	seq_puts(m, "# Snapshot commands:\n");
4752 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4753 		show_snapshot_main_help(m);
4754 	else
4755 		show_snapshot_percpu_help(m);
4756 }
4757 #else
4758 /* Should never be called */
4759 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4760 #endif
4761 
4762 static int s_show(struct seq_file *m, void *v)
4763 {
4764 	struct trace_iterator *iter = v;
4765 	int ret;
4766 
4767 	if (iter->ent == NULL) {
4768 		if (iter->tr) {
4769 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4770 			seq_puts(m, "#\n");
4771 			test_ftrace_alive(m);
4772 		}
4773 		if (iter->snapshot && trace_empty(iter))
4774 			print_snapshot_help(m, iter);
4775 		else if (iter->trace && iter->trace->print_header)
4776 			iter->trace->print_header(m);
4777 		else
4778 			trace_default_header(m);
4779 
4780 	} else if (iter->leftover) {
4781 		/*
4782 		 * If we filled the seq_file buffer earlier, we
4783 		 * want to just show it now.
4784 		 */
4785 		ret = trace_print_seq(m, &iter->seq);
4786 
4787 		/* ret should this time be zero, but you never know */
4788 		iter->leftover = ret;
4789 
4790 	} else {
4791 		print_trace_line(iter);
4792 		ret = trace_print_seq(m, &iter->seq);
4793 		/*
4794 		 * If we overflow the seq_file buffer, then it will
4795 		 * ask us for this data again at start up.
4796 		 * Use that instead.
4797 		 *  ret is 0 if seq_file write succeeded.
4798 		 *        -1 otherwise.
4799 		 */
4800 		iter->leftover = ret;
4801 	}
4802 
4803 	return 0;
4804 }
4805 
4806 /*
4807  * Should be used after trace_array_get(), trace_types_lock
4808  * ensures that i_cdev was already initialized.
4809  */
4810 static inline int tracing_get_cpu(struct inode *inode)
4811 {
4812 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4813 		return (long)inode->i_cdev - 1;
4814 	return RING_BUFFER_ALL_CPUS;
4815 }
4816 
4817 static const struct seq_operations tracer_seq_ops = {
4818 	.start		= s_start,
4819 	.next		= s_next,
4820 	.stop		= s_stop,
4821 	.show		= s_show,
4822 };
4823 
4824 static struct trace_iterator *
4825 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4826 {
4827 	struct trace_array *tr = inode->i_private;
4828 	struct trace_iterator *iter;
4829 	int cpu;
4830 
4831 	if (tracing_disabled)
4832 		return ERR_PTR(-ENODEV);
4833 
4834 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4835 	if (!iter)
4836 		return ERR_PTR(-ENOMEM);
4837 
4838 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4839 				    GFP_KERNEL);
4840 	if (!iter->buffer_iter)
4841 		goto release;
4842 
4843 	/*
4844 	 * trace_find_next_entry() may need to save off iter->ent.
4845 	 * It will place it into the iter->temp buffer. As most
4846 	 * events are less than 128, allocate a buffer of that size.
4847 	 * If one is greater, then trace_find_next_entry() will
4848 	 * allocate a new buffer to adjust for the bigger iter->ent.
4849 	 * It's not critical if it fails to get allocated here.
4850 	 */
4851 	iter->temp = kmalloc(128, GFP_KERNEL);
4852 	if (iter->temp)
4853 		iter->temp_size = 128;
4854 
4855 	/*
4856 	 * trace_event_printf() may need to modify given format
4857 	 * string to replace %p with %px so that it shows real address
4858 	 * instead of hash value. However, that is only for the event
4859 	 * tracing, other tracer may not need. Defer the allocation
4860 	 * until it is needed.
4861 	 */
4862 	iter->fmt = NULL;
4863 	iter->fmt_size = 0;
4864 
4865 	/*
4866 	 * We make a copy of the current tracer to avoid concurrent
4867 	 * changes on it while we are reading.
4868 	 */
4869 	mutex_lock(&trace_types_lock);
4870 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4871 	if (!iter->trace)
4872 		goto fail;
4873 
4874 	*iter->trace = *tr->current_trace;
4875 
4876 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4877 		goto fail;
4878 
4879 	iter->tr = tr;
4880 
4881 #ifdef CONFIG_TRACER_MAX_TRACE
4882 	/* Currently only the top directory has a snapshot */
4883 	if (tr->current_trace->print_max || snapshot)
4884 		iter->array_buffer = &tr->max_buffer;
4885 	else
4886 #endif
4887 		iter->array_buffer = &tr->array_buffer;
4888 	iter->snapshot = snapshot;
4889 	iter->pos = -1;
4890 	iter->cpu_file = tracing_get_cpu(inode);
4891 	mutex_init(&iter->mutex);
4892 
4893 	/* Notify the tracer early; before we stop tracing. */
4894 	if (iter->trace->open)
4895 		iter->trace->open(iter);
4896 
4897 	/* Annotate start of buffers if we had overruns */
4898 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4899 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4900 
4901 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4902 	if (trace_clocks[tr->clock_id].in_ns)
4903 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4904 
4905 	/*
4906 	 * If pause-on-trace is enabled, then stop the trace while
4907 	 * dumping, unless this is the "snapshot" file
4908 	 */
4909 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4910 		tracing_stop_tr(tr);
4911 
4912 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4913 		for_each_tracing_cpu(cpu) {
4914 			iter->buffer_iter[cpu] =
4915 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4916 							 cpu, GFP_KERNEL);
4917 		}
4918 		ring_buffer_read_prepare_sync();
4919 		for_each_tracing_cpu(cpu) {
4920 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4921 			tracing_iter_reset(iter, cpu);
4922 		}
4923 	} else {
4924 		cpu = iter->cpu_file;
4925 		iter->buffer_iter[cpu] =
4926 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4927 						 cpu, GFP_KERNEL);
4928 		ring_buffer_read_prepare_sync();
4929 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4930 		tracing_iter_reset(iter, cpu);
4931 	}
4932 
4933 	mutex_unlock(&trace_types_lock);
4934 
4935 	return iter;
4936 
4937  fail:
4938 	mutex_unlock(&trace_types_lock);
4939 	kfree(iter->trace);
4940 	kfree(iter->temp);
4941 	kfree(iter->buffer_iter);
4942 release:
4943 	seq_release_private(inode, file);
4944 	return ERR_PTR(-ENOMEM);
4945 }
4946 
4947 int tracing_open_generic(struct inode *inode, struct file *filp)
4948 {
4949 	int ret;
4950 
4951 	ret = tracing_check_open_get_tr(NULL);
4952 	if (ret)
4953 		return ret;
4954 
4955 	filp->private_data = inode->i_private;
4956 	return 0;
4957 }
4958 
4959 bool tracing_is_disabled(void)
4960 {
4961 	return (tracing_disabled) ? true: false;
4962 }
4963 
4964 /*
4965  * Open and update trace_array ref count.
4966  * Must have the current trace_array passed to it.
4967  */
4968 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4969 {
4970 	struct trace_array *tr = inode->i_private;
4971 	int ret;
4972 
4973 	ret = tracing_check_open_get_tr(tr);
4974 	if (ret)
4975 		return ret;
4976 
4977 	filp->private_data = inode->i_private;
4978 
4979 	return 0;
4980 }
4981 
4982 static int tracing_mark_open(struct inode *inode, struct file *filp)
4983 {
4984 	stream_open(inode, filp);
4985 	return tracing_open_generic_tr(inode, filp);
4986 }
4987 
4988 static int tracing_release(struct inode *inode, struct file *file)
4989 {
4990 	struct trace_array *tr = inode->i_private;
4991 	struct seq_file *m = file->private_data;
4992 	struct trace_iterator *iter;
4993 	int cpu;
4994 
4995 	if (!(file->f_mode & FMODE_READ)) {
4996 		trace_array_put(tr);
4997 		return 0;
4998 	}
4999 
5000 	/* Writes do not use seq_file */
5001 	iter = m->private;
5002 	mutex_lock(&trace_types_lock);
5003 
5004 	for_each_tracing_cpu(cpu) {
5005 		if (iter->buffer_iter[cpu])
5006 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
5007 	}
5008 
5009 	if (iter->trace && iter->trace->close)
5010 		iter->trace->close(iter);
5011 
5012 	if (!iter->snapshot && tr->stop_count)
5013 		/* reenable tracing if it was previously enabled */
5014 		tracing_start_tr(tr);
5015 
5016 	__trace_array_put(tr);
5017 
5018 	mutex_unlock(&trace_types_lock);
5019 
5020 	mutex_destroy(&iter->mutex);
5021 	free_cpumask_var(iter->started);
5022 	kfree(iter->fmt);
5023 	kfree(iter->temp);
5024 	kfree(iter->trace);
5025 	kfree(iter->buffer_iter);
5026 	seq_release_private(inode, file);
5027 
5028 	return 0;
5029 }
5030 
5031 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
5032 {
5033 	struct trace_array *tr = inode->i_private;
5034 
5035 	trace_array_put(tr);
5036 	return 0;
5037 }
5038 
5039 static int tracing_single_release_tr(struct inode *inode, struct file *file)
5040 {
5041 	struct trace_array *tr = inode->i_private;
5042 
5043 	trace_array_put(tr);
5044 
5045 	return single_release(inode, file);
5046 }
5047 
5048 static int tracing_open(struct inode *inode, struct file *file)
5049 {
5050 	struct trace_array *tr = inode->i_private;
5051 	struct trace_iterator *iter;
5052 	int ret;
5053 
5054 	ret = tracing_check_open_get_tr(tr);
5055 	if (ret)
5056 		return ret;
5057 
5058 	/* If this file was open for write, then erase contents */
5059 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5060 		int cpu = tracing_get_cpu(inode);
5061 		struct array_buffer *trace_buf = &tr->array_buffer;
5062 
5063 #ifdef CONFIG_TRACER_MAX_TRACE
5064 		if (tr->current_trace->print_max)
5065 			trace_buf = &tr->max_buffer;
5066 #endif
5067 
5068 		if (cpu == RING_BUFFER_ALL_CPUS)
5069 			tracing_reset_online_cpus(trace_buf);
5070 		else
5071 			tracing_reset_cpu(trace_buf, cpu);
5072 	}
5073 
5074 	if (file->f_mode & FMODE_READ) {
5075 		iter = __tracing_open(inode, file, false);
5076 		if (IS_ERR(iter))
5077 			ret = PTR_ERR(iter);
5078 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5079 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
5080 	}
5081 
5082 	if (ret < 0)
5083 		trace_array_put(tr);
5084 
5085 	return ret;
5086 }
5087 
5088 /*
5089  * Some tracers are not suitable for instance buffers.
5090  * A tracer is always available for the global array (toplevel)
5091  * or if it explicitly states that it is.
5092  */
5093 static bool
5094 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5095 {
5096 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5097 }
5098 
5099 /* Find the next tracer that this trace array may use */
5100 static struct tracer *
5101 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5102 {
5103 	while (t && !trace_ok_for_array(t, tr))
5104 		t = t->next;
5105 
5106 	return t;
5107 }
5108 
5109 static void *
5110 t_next(struct seq_file *m, void *v, loff_t *pos)
5111 {
5112 	struct trace_array *tr = m->private;
5113 	struct tracer *t = v;
5114 
5115 	(*pos)++;
5116 
5117 	if (t)
5118 		t = get_tracer_for_array(tr, t->next);
5119 
5120 	return t;
5121 }
5122 
5123 static void *t_start(struct seq_file *m, loff_t *pos)
5124 {
5125 	struct trace_array *tr = m->private;
5126 	struct tracer *t;
5127 	loff_t l = 0;
5128 
5129 	mutex_lock(&trace_types_lock);
5130 
5131 	t = get_tracer_for_array(tr, trace_types);
5132 	for (; t && l < *pos; t = t_next(m, t, &l))
5133 			;
5134 
5135 	return t;
5136 }
5137 
5138 static void t_stop(struct seq_file *m, void *p)
5139 {
5140 	mutex_unlock(&trace_types_lock);
5141 }
5142 
5143 static int t_show(struct seq_file *m, void *v)
5144 {
5145 	struct tracer *t = v;
5146 
5147 	if (!t)
5148 		return 0;
5149 
5150 	seq_puts(m, t->name);
5151 	if (t->next)
5152 		seq_putc(m, ' ');
5153 	else
5154 		seq_putc(m, '\n');
5155 
5156 	return 0;
5157 }
5158 
5159 static const struct seq_operations show_traces_seq_ops = {
5160 	.start		= t_start,
5161 	.next		= t_next,
5162 	.stop		= t_stop,
5163 	.show		= t_show,
5164 };
5165 
5166 static int show_traces_open(struct inode *inode, struct file *file)
5167 {
5168 	struct trace_array *tr = inode->i_private;
5169 	struct seq_file *m;
5170 	int ret;
5171 
5172 	ret = tracing_check_open_get_tr(tr);
5173 	if (ret)
5174 		return ret;
5175 
5176 	ret = seq_open(file, &show_traces_seq_ops);
5177 	if (ret) {
5178 		trace_array_put(tr);
5179 		return ret;
5180 	}
5181 
5182 	m = file->private_data;
5183 	m->private = tr;
5184 
5185 	return 0;
5186 }
5187 
5188 static int show_traces_release(struct inode *inode, struct file *file)
5189 {
5190 	struct trace_array *tr = inode->i_private;
5191 
5192 	trace_array_put(tr);
5193 	return seq_release(inode, file);
5194 }
5195 
5196 static ssize_t
5197 tracing_write_stub(struct file *filp, const char __user *ubuf,
5198 		   size_t count, loff_t *ppos)
5199 {
5200 	return count;
5201 }
5202 
5203 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5204 {
5205 	int ret;
5206 
5207 	if (file->f_mode & FMODE_READ)
5208 		ret = seq_lseek(file, offset, whence);
5209 	else
5210 		file->f_pos = ret = 0;
5211 
5212 	return ret;
5213 }
5214 
5215 static const struct file_operations tracing_fops = {
5216 	.open		= tracing_open,
5217 	.read		= seq_read,
5218 	.read_iter	= seq_read_iter,
5219 	.splice_read	= copy_splice_read,
5220 	.write		= tracing_write_stub,
5221 	.llseek		= tracing_lseek,
5222 	.release	= tracing_release,
5223 };
5224 
5225 static const struct file_operations show_traces_fops = {
5226 	.open		= show_traces_open,
5227 	.read		= seq_read,
5228 	.llseek		= seq_lseek,
5229 	.release	= show_traces_release,
5230 };
5231 
5232 static ssize_t
5233 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5234 		     size_t count, loff_t *ppos)
5235 {
5236 	struct trace_array *tr = file_inode(filp)->i_private;
5237 	char *mask_str;
5238 	int len;
5239 
5240 	len = snprintf(NULL, 0, "%*pb\n",
5241 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5242 	mask_str = kmalloc(len, GFP_KERNEL);
5243 	if (!mask_str)
5244 		return -ENOMEM;
5245 
5246 	len = snprintf(mask_str, len, "%*pb\n",
5247 		       cpumask_pr_args(tr->tracing_cpumask));
5248 	if (len >= count) {
5249 		count = -EINVAL;
5250 		goto out_err;
5251 	}
5252 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5253 
5254 out_err:
5255 	kfree(mask_str);
5256 
5257 	return count;
5258 }
5259 
5260 int tracing_set_cpumask(struct trace_array *tr,
5261 			cpumask_var_t tracing_cpumask_new)
5262 {
5263 	int cpu;
5264 
5265 	if (!tr)
5266 		return -EINVAL;
5267 
5268 	local_irq_disable();
5269 	arch_spin_lock(&tr->max_lock);
5270 	for_each_tracing_cpu(cpu) {
5271 		/*
5272 		 * Increase/decrease the disabled counter if we are
5273 		 * about to flip a bit in the cpumask:
5274 		 */
5275 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5276 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5277 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5278 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5279 		}
5280 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5281 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5282 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5283 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5284 		}
5285 	}
5286 	arch_spin_unlock(&tr->max_lock);
5287 	local_irq_enable();
5288 
5289 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5290 
5291 	return 0;
5292 }
5293 
5294 static ssize_t
5295 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5296 		      size_t count, loff_t *ppos)
5297 {
5298 	struct trace_array *tr = file_inode(filp)->i_private;
5299 	cpumask_var_t tracing_cpumask_new;
5300 	int err;
5301 
5302 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5303 		return -ENOMEM;
5304 
5305 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5306 	if (err)
5307 		goto err_free;
5308 
5309 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5310 	if (err)
5311 		goto err_free;
5312 
5313 	free_cpumask_var(tracing_cpumask_new);
5314 
5315 	return count;
5316 
5317 err_free:
5318 	free_cpumask_var(tracing_cpumask_new);
5319 
5320 	return err;
5321 }
5322 
5323 static const struct file_operations tracing_cpumask_fops = {
5324 	.open		= tracing_open_generic_tr,
5325 	.read		= tracing_cpumask_read,
5326 	.write		= tracing_cpumask_write,
5327 	.release	= tracing_release_generic_tr,
5328 	.llseek		= generic_file_llseek,
5329 };
5330 
5331 static int tracing_trace_options_show(struct seq_file *m, void *v)
5332 {
5333 	struct tracer_opt *trace_opts;
5334 	struct trace_array *tr = m->private;
5335 	u32 tracer_flags;
5336 	int i;
5337 
5338 	mutex_lock(&trace_types_lock);
5339 	tracer_flags = tr->current_trace->flags->val;
5340 	trace_opts = tr->current_trace->flags->opts;
5341 
5342 	for (i = 0; trace_options[i]; i++) {
5343 		if (tr->trace_flags & (1 << i))
5344 			seq_printf(m, "%s\n", trace_options[i]);
5345 		else
5346 			seq_printf(m, "no%s\n", trace_options[i]);
5347 	}
5348 
5349 	for (i = 0; trace_opts[i].name; i++) {
5350 		if (tracer_flags & trace_opts[i].bit)
5351 			seq_printf(m, "%s\n", trace_opts[i].name);
5352 		else
5353 			seq_printf(m, "no%s\n", trace_opts[i].name);
5354 	}
5355 	mutex_unlock(&trace_types_lock);
5356 
5357 	return 0;
5358 }
5359 
5360 static int __set_tracer_option(struct trace_array *tr,
5361 			       struct tracer_flags *tracer_flags,
5362 			       struct tracer_opt *opts, int neg)
5363 {
5364 	struct tracer *trace = tracer_flags->trace;
5365 	int ret;
5366 
5367 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5368 	if (ret)
5369 		return ret;
5370 
5371 	if (neg)
5372 		tracer_flags->val &= ~opts->bit;
5373 	else
5374 		tracer_flags->val |= opts->bit;
5375 	return 0;
5376 }
5377 
5378 /* Try to assign a tracer specific option */
5379 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5380 {
5381 	struct tracer *trace = tr->current_trace;
5382 	struct tracer_flags *tracer_flags = trace->flags;
5383 	struct tracer_opt *opts = NULL;
5384 	int i;
5385 
5386 	for (i = 0; tracer_flags->opts[i].name; i++) {
5387 		opts = &tracer_flags->opts[i];
5388 
5389 		if (strcmp(cmp, opts->name) == 0)
5390 			return __set_tracer_option(tr, trace->flags, opts, neg);
5391 	}
5392 
5393 	return -EINVAL;
5394 }
5395 
5396 /* Some tracers require overwrite to stay enabled */
5397 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5398 {
5399 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5400 		return -1;
5401 
5402 	return 0;
5403 }
5404 
5405 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5406 {
5407 	int *map;
5408 
5409 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5410 	    (mask == TRACE_ITER_RECORD_CMD))
5411 		lockdep_assert_held(&event_mutex);
5412 
5413 	/* do nothing if flag is already set */
5414 	if (!!(tr->trace_flags & mask) == !!enabled)
5415 		return 0;
5416 
5417 	/* Give the tracer a chance to approve the change */
5418 	if (tr->current_trace->flag_changed)
5419 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5420 			return -EINVAL;
5421 
5422 	if (enabled)
5423 		tr->trace_flags |= mask;
5424 	else
5425 		tr->trace_flags &= ~mask;
5426 
5427 	if (mask == TRACE_ITER_RECORD_CMD)
5428 		trace_event_enable_cmd_record(enabled);
5429 
5430 	if (mask == TRACE_ITER_RECORD_TGID) {
5431 		if (!tgid_map) {
5432 			tgid_map_max = pid_max;
5433 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5434 				       GFP_KERNEL);
5435 
5436 			/*
5437 			 * Pairs with smp_load_acquire() in
5438 			 * trace_find_tgid_ptr() to ensure that if it observes
5439 			 * the tgid_map we just allocated then it also observes
5440 			 * the corresponding tgid_map_max value.
5441 			 */
5442 			smp_store_release(&tgid_map, map);
5443 		}
5444 		if (!tgid_map) {
5445 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5446 			return -ENOMEM;
5447 		}
5448 
5449 		trace_event_enable_tgid_record(enabled);
5450 	}
5451 
5452 	if (mask == TRACE_ITER_EVENT_FORK)
5453 		trace_event_follow_fork(tr, enabled);
5454 
5455 	if (mask == TRACE_ITER_FUNC_FORK)
5456 		ftrace_pid_follow_fork(tr, enabled);
5457 
5458 	if (mask == TRACE_ITER_OVERWRITE) {
5459 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5460 #ifdef CONFIG_TRACER_MAX_TRACE
5461 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5462 #endif
5463 	}
5464 
5465 	if (mask == TRACE_ITER_PRINTK) {
5466 		trace_printk_start_stop_comm(enabled);
5467 		trace_printk_control(enabled);
5468 	}
5469 
5470 	return 0;
5471 }
5472 
5473 int trace_set_options(struct trace_array *tr, char *option)
5474 {
5475 	char *cmp;
5476 	int neg = 0;
5477 	int ret;
5478 	size_t orig_len = strlen(option);
5479 	int len;
5480 
5481 	cmp = strstrip(option);
5482 
5483 	len = str_has_prefix(cmp, "no");
5484 	if (len)
5485 		neg = 1;
5486 
5487 	cmp += len;
5488 
5489 	mutex_lock(&event_mutex);
5490 	mutex_lock(&trace_types_lock);
5491 
5492 	ret = match_string(trace_options, -1, cmp);
5493 	/* If no option could be set, test the specific tracer options */
5494 	if (ret < 0)
5495 		ret = set_tracer_option(tr, cmp, neg);
5496 	else
5497 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5498 
5499 	mutex_unlock(&trace_types_lock);
5500 	mutex_unlock(&event_mutex);
5501 
5502 	/*
5503 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5504 	 * turn it back into a space.
5505 	 */
5506 	if (orig_len > strlen(option))
5507 		option[strlen(option)] = ' ';
5508 
5509 	return ret;
5510 }
5511 
5512 static void __init apply_trace_boot_options(void)
5513 {
5514 	char *buf = trace_boot_options_buf;
5515 	char *option;
5516 
5517 	while (true) {
5518 		option = strsep(&buf, ",");
5519 
5520 		if (!option)
5521 			break;
5522 
5523 		if (*option)
5524 			trace_set_options(&global_trace, option);
5525 
5526 		/* Put back the comma to allow this to be called again */
5527 		if (buf)
5528 			*(buf - 1) = ',';
5529 	}
5530 }
5531 
5532 static ssize_t
5533 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5534 			size_t cnt, loff_t *ppos)
5535 {
5536 	struct seq_file *m = filp->private_data;
5537 	struct trace_array *tr = m->private;
5538 	char buf[64];
5539 	int ret;
5540 
5541 	if (cnt >= sizeof(buf))
5542 		return -EINVAL;
5543 
5544 	if (copy_from_user(buf, ubuf, cnt))
5545 		return -EFAULT;
5546 
5547 	buf[cnt] = 0;
5548 
5549 	ret = trace_set_options(tr, buf);
5550 	if (ret < 0)
5551 		return ret;
5552 
5553 	*ppos += cnt;
5554 
5555 	return cnt;
5556 }
5557 
5558 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5559 {
5560 	struct trace_array *tr = inode->i_private;
5561 	int ret;
5562 
5563 	ret = tracing_check_open_get_tr(tr);
5564 	if (ret)
5565 		return ret;
5566 
5567 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5568 	if (ret < 0)
5569 		trace_array_put(tr);
5570 
5571 	return ret;
5572 }
5573 
5574 static const struct file_operations tracing_iter_fops = {
5575 	.open		= tracing_trace_options_open,
5576 	.read		= seq_read,
5577 	.llseek		= seq_lseek,
5578 	.release	= tracing_single_release_tr,
5579 	.write		= tracing_trace_options_write,
5580 };
5581 
5582 static const char readme_msg[] =
5583 	"tracing mini-HOWTO:\n\n"
5584 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5585 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5586 	" Important files:\n"
5587 	"  trace\t\t\t- The static contents of the buffer\n"
5588 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5589 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5590 	"  current_tracer\t- function and latency tracers\n"
5591 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5592 	"  error_log\t- error log for failed commands (that support it)\n"
5593 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5594 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5595 	"  trace_clock\t\t- change the clock used to order events\n"
5596 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5597 	"      global:   Synced across CPUs but slows tracing down.\n"
5598 	"     counter:   Not a clock, but just an increment\n"
5599 	"      uptime:   Jiffy counter from time of boot\n"
5600 	"        perf:   Same clock that perf events use\n"
5601 #ifdef CONFIG_X86_64
5602 	"     x86-tsc:   TSC cycle counter\n"
5603 #endif
5604 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5605 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5606 	"    absolute:   Absolute (standalone) timestamp\n"
5607 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5608 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5609 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5610 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5611 	"\t\t\t  Remove sub-buffer with rmdir\n"
5612 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5613 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5614 	"\t\t\t  option name\n"
5615 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5616 #ifdef CONFIG_DYNAMIC_FTRACE
5617 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5618 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5619 	"\t\t\t  functions\n"
5620 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5621 	"\t     modules: Can select a group via module\n"
5622 	"\t      Format: :mod:<module-name>\n"
5623 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5624 	"\t    triggers: a command to perform when function is hit\n"
5625 	"\t      Format: <function>:<trigger>[:count]\n"
5626 	"\t     trigger: traceon, traceoff\n"
5627 	"\t\t      enable_event:<system>:<event>\n"
5628 	"\t\t      disable_event:<system>:<event>\n"
5629 #ifdef CONFIG_STACKTRACE
5630 	"\t\t      stacktrace\n"
5631 #endif
5632 #ifdef CONFIG_TRACER_SNAPSHOT
5633 	"\t\t      snapshot\n"
5634 #endif
5635 	"\t\t      dump\n"
5636 	"\t\t      cpudump\n"
5637 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5638 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5639 	"\t     The first one will disable tracing every time do_fault is hit\n"
5640 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5641 	"\t       The first time do trap is hit and it disables tracing, the\n"
5642 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5643 	"\t       the counter will not decrement. It only decrements when the\n"
5644 	"\t       trigger did work\n"
5645 	"\t     To remove trigger without count:\n"
5646 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5647 	"\t     To remove trigger with a count:\n"
5648 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5649 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5650 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5651 	"\t    modules: Can select a group via module command :mod:\n"
5652 	"\t    Does not accept triggers\n"
5653 #endif /* CONFIG_DYNAMIC_FTRACE */
5654 #ifdef CONFIG_FUNCTION_TRACER
5655 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5656 	"\t\t    (function)\n"
5657 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5658 	"\t\t    (function)\n"
5659 #endif
5660 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5661 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5662 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5663 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5664 #endif
5665 #ifdef CONFIG_TRACER_SNAPSHOT
5666 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5667 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5668 	"\t\t\t  information\n"
5669 #endif
5670 #ifdef CONFIG_STACK_TRACER
5671 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5672 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5673 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5674 	"\t\t\t  new trace)\n"
5675 #ifdef CONFIG_DYNAMIC_FTRACE
5676 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5677 	"\t\t\t  traces\n"
5678 #endif
5679 #endif /* CONFIG_STACK_TRACER */
5680 #ifdef CONFIG_DYNAMIC_EVENTS
5681 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5682 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5683 #endif
5684 #ifdef CONFIG_KPROBE_EVENTS
5685 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5686 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5687 #endif
5688 #ifdef CONFIG_UPROBE_EVENTS
5689 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5690 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5691 #endif
5692 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5693     defined(CONFIG_FPROBE_EVENTS)
5694 	"\t  accepts: event-definitions (one definition per line)\n"
5695 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5696 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5697 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5698 #endif
5699 #ifdef CONFIG_FPROBE_EVENTS
5700 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5701 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5702 #endif
5703 #ifdef CONFIG_HIST_TRIGGERS
5704 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5705 #endif
5706 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5707 	"\t           -:[<group>/][<event>]\n"
5708 #ifdef CONFIG_KPROBE_EVENTS
5709 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5710   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5711 #endif
5712 #ifdef CONFIG_UPROBE_EVENTS
5713   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5714 #endif
5715 	"\t     args: <name>=fetcharg[:type]\n"
5716 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5717 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5718 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5719 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>, <argname>\n"
5720 #else
5721 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5722 #endif
5723 #else
5724 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5725 #endif
5726 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5727 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5728 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5729 	"\t           symstr, <type>\\[<array-size>\\]\n"
5730 #ifdef CONFIG_HIST_TRIGGERS
5731 	"\t    field: <stype> <name>;\n"
5732 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5733 	"\t           [unsigned] char/int/long\n"
5734 #endif
5735 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5736 	"\t            of the <attached-group>/<attached-event>.\n"
5737 #endif
5738 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5739 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5740 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5741 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5742 	"\t\t\t  events\n"
5743 	"      filter\t\t- If set, only events passing filter are traced\n"
5744 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5745 	"\t\t\t  <event>:\n"
5746 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5747 	"      filter\t\t- If set, only events passing filter are traced\n"
5748 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5749 	"\t    Format: <trigger>[:count][if <filter>]\n"
5750 	"\t   trigger: traceon, traceoff\n"
5751 	"\t            enable_event:<system>:<event>\n"
5752 	"\t            disable_event:<system>:<event>\n"
5753 #ifdef CONFIG_HIST_TRIGGERS
5754 	"\t            enable_hist:<system>:<event>\n"
5755 	"\t            disable_hist:<system>:<event>\n"
5756 #endif
5757 #ifdef CONFIG_STACKTRACE
5758 	"\t\t    stacktrace\n"
5759 #endif
5760 #ifdef CONFIG_TRACER_SNAPSHOT
5761 	"\t\t    snapshot\n"
5762 #endif
5763 #ifdef CONFIG_HIST_TRIGGERS
5764 	"\t\t    hist (see below)\n"
5765 #endif
5766 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5767 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5768 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5769 	"\t                  events/block/block_unplug/trigger\n"
5770 	"\t   The first disables tracing every time block_unplug is hit.\n"
5771 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5772 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5773 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5774 	"\t   Like function triggers, the counter is only decremented if it\n"
5775 	"\t    enabled or disabled tracing.\n"
5776 	"\t   To remove a trigger without a count:\n"
5777 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5778 	"\t   To remove a trigger with a count:\n"
5779 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5780 	"\t   Filters can be ignored when removing a trigger.\n"
5781 #ifdef CONFIG_HIST_TRIGGERS
5782 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5783 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5784 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5785 	"\t            [:values=<field1[,field2,...]>]\n"
5786 	"\t            [:sort=<field1[,field2,...]>]\n"
5787 	"\t            [:size=#entries]\n"
5788 	"\t            [:pause][:continue][:clear]\n"
5789 	"\t            [:name=histname1]\n"
5790 	"\t            [:nohitcount]\n"
5791 	"\t            [:<handler>.<action>]\n"
5792 	"\t            [if <filter>]\n\n"
5793 	"\t    Note, special fields can be used as well:\n"
5794 	"\t            common_timestamp - to record current timestamp\n"
5795 	"\t            common_cpu - to record the CPU the event happened on\n"
5796 	"\n"
5797 	"\t    A hist trigger variable can be:\n"
5798 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5799 	"\t        - a reference to another variable e.g. y=$x,\n"
5800 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5801 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5802 	"\n"
5803 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5804 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5805 	"\t    variable reference, field or numeric literal.\n"
5806 	"\n"
5807 	"\t    When a matching event is hit, an entry is added to a hash\n"
5808 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5809 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5810 	"\t    correspond to fields in the event's format description.  Keys\n"
5811 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5812 	"\t    Compound keys consisting of up to two fields can be specified\n"
5813 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5814 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5815 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5816 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5817 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5818 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5819 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5820 	"\t    its histogram data will be shared with other triggers of the\n"
5821 	"\t    same name, and trigger hits will update this common data.\n\n"
5822 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5823 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5824 	"\t    triggers attached to an event, there will be a table for each\n"
5825 	"\t    trigger in the output.  The table displayed for a named\n"
5826 	"\t    trigger will be the same as any other instance having the\n"
5827 	"\t    same name.  The default format used to display a given field\n"
5828 	"\t    can be modified by appending any of the following modifiers\n"
5829 	"\t    to the field name, as applicable:\n\n"
5830 	"\t            .hex        display a number as a hex value\n"
5831 	"\t            .sym        display an address as a symbol\n"
5832 	"\t            .sym-offset display an address as a symbol and offset\n"
5833 	"\t            .execname   display a common_pid as a program name\n"
5834 	"\t            .syscall    display a syscall id as a syscall name\n"
5835 	"\t            .log2       display log2 value rather than raw number\n"
5836 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5837 	"\t            .usecs      display a common_timestamp in microseconds\n"
5838 	"\t            .percent    display a number of percentage value\n"
5839 	"\t            .graph      display a bar-graph of a value\n\n"
5840 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5841 	"\t    trigger or to start a hist trigger but not log any events\n"
5842 	"\t    until told to do so.  'continue' can be used to start or\n"
5843 	"\t    restart a paused hist trigger.\n\n"
5844 	"\t    The 'clear' parameter will clear the contents of a running\n"
5845 	"\t    hist trigger and leave its current paused/active state\n"
5846 	"\t    unchanged.\n\n"
5847 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5848 	"\t    raw hitcount in the histogram.\n\n"
5849 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5850 	"\t    have one event conditionally start and stop another event's\n"
5851 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5852 	"\t    the enable_event and disable_event triggers.\n\n"
5853 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5854 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5855 	"\t        <handler>.<action>\n\n"
5856 	"\t    The available handlers are:\n\n"
5857 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5858 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5859 	"\t        onchange(var)            - invoke action if var changes\n\n"
5860 	"\t    The available actions are:\n\n"
5861 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5862 	"\t        save(field,...)                      - save current event fields\n"
5863 #ifdef CONFIG_TRACER_SNAPSHOT
5864 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5865 #endif
5866 #ifdef CONFIG_SYNTH_EVENTS
5867 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5868 	"\t  Write into this file to define/undefine new synthetic events.\n"
5869 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5870 #endif
5871 #endif
5872 ;
5873 
5874 static ssize_t
5875 tracing_readme_read(struct file *filp, char __user *ubuf,
5876 		       size_t cnt, loff_t *ppos)
5877 {
5878 	return simple_read_from_buffer(ubuf, cnt, ppos,
5879 					readme_msg, strlen(readme_msg));
5880 }
5881 
5882 static const struct file_operations tracing_readme_fops = {
5883 	.open		= tracing_open_generic,
5884 	.read		= tracing_readme_read,
5885 	.llseek		= generic_file_llseek,
5886 };
5887 
5888 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5889 {
5890 	int pid = ++(*pos);
5891 
5892 	return trace_find_tgid_ptr(pid);
5893 }
5894 
5895 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5896 {
5897 	int pid = *pos;
5898 
5899 	return trace_find_tgid_ptr(pid);
5900 }
5901 
5902 static void saved_tgids_stop(struct seq_file *m, void *v)
5903 {
5904 }
5905 
5906 static int saved_tgids_show(struct seq_file *m, void *v)
5907 {
5908 	int *entry = (int *)v;
5909 	int pid = entry - tgid_map;
5910 	int tgid = *entry;
5911 
5912 	if (tgid == 0)
5913 		return SEQ_SKIP;
5914 
5915 	seq_printf(m, "%d %d\n", pid, tgid);
5916 	return 0;
5917 }
5918 
5919 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5920 	.start		= saved_tgids_start,
5921 	.stop		= saved_tgids_stop,
5922 	.next		= saved_tgids_next,
5923 	.show		= saved_tgids_show,
5924 };
5925 
5926 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5927 {
5928 	int ret;
5929 
5930 	ret = tracing_check_open_get_tr(NULL);
5931 	if (ret)
5932 		return ret;
5933 
5934 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5935 }
5936 
5937 
5938 static const struct file_operations tracing_saved_tgids_fops = {
5939 	.open		= tracing_saved_tgids_open,
5940 	.read		= seq_read,
5941 	.llseek		= seq_lseek,
5942 	.release	= seq_release,
5943 };
5944 
5945 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5946 {
5947 	unsigned int *ptr = v;
5948 
5949 	if (*pos || m->count)
5950 		ptr++;
5951 
5952 	(*pos)++;
5953 
5954 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5955 	     ptr++) {
5956 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5957 			continue;
5958 
5959 		return ptr;
5960 	}
5961 
5962 	return NULL;
5963 }
5964 
5965 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5966 {
5967 	void *v;
5968 	loff_t l = 0;
5969 
5970 	preempt_disable();
5971 	arch_spin_lock(&trace_cmdline_lock);
5972 
5973 	v = &savedcmd->map_cmdline_to_pid[0];
5974 	while (l <= *pos) {
5975 		v = saved_cmdlines_next(m, v, &l);
5976 		if (!v)
5977 			return NULL;
5978 	}
5979 
5980 	return v;
5981 }
5982 
5983 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5984 {
5985 	arch_spin_unlock(&trace_cmdline_lock);
5986 	preempt_enable();
5987 }
5988 
5989 static int saved_cmdlines_show(struct seq_file *m, void *v)
5990 {
5991 	char buf[TASK_COMM_LEN];
5992 	unsigned int *pid = v;
5993 
5994 	__trace_find_cmdline(*pid, buf);
5995 	seq_printf(m, "%d %s\n", *pid, buf);
5996 	return 0;
5997 }
5998 
5999 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
6000 	.start		= saved_cmdlines_start,
6001 	.next		= saved_cmdlines_next,
6002 	.stop		= saved_cmdlines_stop,
6003 	.show		= saved_cmdlines_show,
6004 };
6005 
6006 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
6007 {
6008 	int ret;
6009 
6010 	ret = tracing_check_open_get_tr(NULL);
6011 	if (ret)
6012 		return ret;
6013 
6014 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
6015 }
6016 
6017 static const struct file_operations tracing_saved_cmdlines_fops = {
6018 	.open		= tracing_saved_cmdlines_open,
6019 	.read		= seq_read,
6020 	.llseek		= seq_lseek,
6021 	.release	= seq_release,
6022 };
6023 
6024 static ssize_t
6025 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
6026 				 size_t cnt, loff_t *ppos)
6027 {
6028 	char buf[64];
6029 	int r;
6030 
6031 	preempt_disable();
6032 	arch_spin_lock(&trace_cmdline_lock);
6033 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
6034 	arch_spin_unlock(&trace_cmdline_lock);
6035 	preempt_enable();
6036 
6037 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6038 }
6039 
6040 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
6041 {
6042 	kfree(s->saved_cmdlines);
6043 	kfree(s->map_cmdline_to_pid);
6044 	kfree(s);
6045 }
6046 
6047 static int tracing_resize_saved_cmdlines(unsigned int val)
6048 {
6049 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
6050 
6051 	s = kmalloc(sizeof(*s), GFP_KERNEL);
6052 	if (!s)
6053 		return -ENOMEM;
6054 
6055 	if (allocate_cmdlines_buffer(val, s) < 0) {
6056 		kfree(s);
6057 		return -ENOMEM;
6058 	}
6059 
6060 	preempt_disable();
6061 	arch_spin_lock(&trace_cmdline_lock);
6062 	savedcmd_temp = savedcmd;
6063 	savedcmd = s;
6064 	arch_spin_unlock(&trace_cmdline_lock);
6065 	preempt_enable();
6066 	free_saved_cmdlines_buffer(savedcmd_temp);
6067 
6068 	return 0;
6069 }
6070 
6071 static ssize_t
6072 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6073 				  size_t cnt, loff_t *ppos)
6074 {
6075 	unsigned long val;
6076 	int ret;
6077 
6078 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6079 	if (ret)
6080 		return ret;
6081 
6082 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
6083 	if (!val || val > PID_MAX_DEFAULT)
6084 		return -EINVAL;
6085 
6086 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
6087 	if (ret < 0)
6088 		return ret;
6089 
6090 	*ppos += cnt;
6091 
6092 	return cnt;
6093 }
6094 
6095 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6096 	.open		= tracing_open_generic,
6097 	.read		= tracing_saved_cmdlines_size_read,
6098 	.write		= tracing_saved_cmdlines_size_write,
6099 };
6100 
6101 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6102 static union trace_eval_map_item *
6103 update_eval_map(union trace_eval_map_item *ptr)
6104 {
6105 	if (!ptr->map.eval_string) {
6106 		if (ptr->tail.next) {
6107 			ptr = ptr->tail.next;
6108 			/* Set ptr to the next real item (skip head) */
6109 			ptr++;
6110 		} else
6111 			return NULL;
6112 	}
6113 	return ptr;
6114 }
6115 
6116 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6117 {
6118 	union trace_eval_map_item *ptr = v;
6119 
6120 	/*
6121 	 * Paranoid! If ptr points to end, we don't want to increment past it.
6122 	 * This really should never happen.
6123 	 */
6124 	(*pos)++;
6125 	ptr = update_eval_map(ptr);
6126 	if (WARN_ON_ONCE(!ptr))
6127 		return NULL;
6128 
6129 	ptr++;
6130 	ptr = update_eval_map(ptr);
6131 
6132 	return ptr;
6133 }
6134 
6135 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6136 {
6137 	union trace_eval_map_item *v;
6138 	loff_t l = 0;
6139 
6140 	mutex_lock(&trace_eval_mutex);
6141 
6142 	v = trace_eval_maps;
6143 	if (v)
6144 		v++;
6145 
6146 	while (v && l < *pos) {
6147 		v = eval_map_next(m, v, &l);
6148 	}
6149 
6150 	return v;
6151 }
6152 
6153 static void eval_map_stop(struct seq_file *m, void *v)
6154 {
6155 	mutex_unlock(&trace_eval_mutex);
6156 }
6157 
6158 static int eval_map_show(struct seq_file *m, void *v)
6159 {
6160 	union trace_eval_map_item *ptr = v;
6161 
6162 	seq_printf(m, "%s %ld (%s)\n",
6163 		   ptr->map.eval_string, ptr->map.eval_value,
6164 		   ptr->map.system);
6165 
6166 	return 0;
6167 }
6168 
6169 static const struct seq_operations tracing_eval_map_seq_ops = {
6170 	.start		= eval_map_start,
6171 	.next		= eval_map_next,
6172 	.stop		= eval_map_stop,
6173 	.show		= eval_map_show,
6174 };
6175 
6176 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6177 {
6178 	int ret;
6179 
6180 	ret = tracing_check_open_get_tr(NULL);
6181 	if (ret)
6182 		return ret;
6183 
6184 	return seq_open(filp, &tracing_eval_map_seq_ops);
6185 }
6186 
6187 static const struct file_operations tracing_eval_map_fops = {
6188 	.open		= tracing_eval_map_open,
6189 	.read		= seq_read,
6190 	.llseek		= seq_lseek,
6191 	.release	= seq_release,
6192 };
6193 
6194 static inline union trace_eval_map_item *
6195 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6196 {
6197 	/* Return tail of array given the head */
6198 	return ptr + ptr->head.length + 1;
6199 }
6200 
6201 static void
6202 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6203 			   int len)
6204 {
6205 	struct trace_eval_map **stop;
6206 	struct trace_eval_map **map;
6207 	union trace_eval_map_item *map_array;
6208 	union trace_eval_map_item *ptr;
6209 
6210 	stop = start + len;
6211 
6212 	/*
6213 	 * The trace_eval_maps contains the map plus a head and tail item,
6214 	 * where the head holds the module and length of array, and the
6215 	 * tail holds a pointer to the next list.
6216 	 */
6217 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6218 	if (!map_array) {
6219 		pr_warn("Unable to allocate trace eval mapping\n");
6220 		return;
6221 	}
6222 
6223 	mutex_lock(&trace_eval_mutex);
6224 
6225 	if (!trace_eval_maps)
6226 		trace_eval_maps = map_array;
6227 	else {
6228 		ptr = trace_eval_maps;
6229 		for (;;) {
6230 			ptr = trace_eval_jmp_to_tail(ptr);
6231 			if (!ptr->tail.next)
6232 				break;
6233 			ptr = ptr->tail.next;
6234 
6235 		}
6236 		ptr->tail.next = map_array;
6237 	}
6238 	map_array->head.mod = mod;
6239 	map_array->head.length = len;
6240 	map_array++;
6241 
6242 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6243 		map_array->map = **map;
6244 		map_array++;
6245 	}
6246 	memset(map_array, 0, sizeof(*map_array));
6247 
6248 	mutex_unlock(&trace_eval_mutex);
6249 }
6250 
6251 static void trace_create_eval_file(struct dentry *d_tracer)
6252 {
6253 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6254 			  NULL, &tracing_eval_map_fops);
6255 }
6256 
6257 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6258 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6259 static inline void trace_insert_eval_map_file(struct module *mod,
6260 			      struct trace_eval_map **start, int len) { }
6261 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6262 
6263 static void trace_insert_eval_map(struct module *mod,
6264 				  struct trace_eval_map **start, int len)
6265 {
6266 	struct trace_eval_map **map;
6267 
6268 	if (len <= 0)
6269 		return;
6270 
6271 	map = start;
6272 
6273 	trace_event_eval_update(map, len);
6274 
6275 	trace_insert_eval_map_file(mod, start, len);
6276 }
6277 
6278 static ssize_t
6279 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6280 		       size_t cnt, loff_t *ppos)
6281 {
6282 	struct trace_array *tr = filp->private_data;
6283 	char buf[MAX_TRACER_SIZE+2];
6284 	int r;
6285 
6286 	mutex_lock(&trace_types_lock);
6287 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6288 	mutex_unlock(&trace_types_lock);
6289 
6290 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6291 }
6292 
6293 int tracer_init(struct tracer *t, struct trace_array *tr)
6294 {
6295 	tracing_reset_online_cpus(&tr->array_buffer);
6296 	return t->init(tr);
6297 }
6298 
6299 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6300 {
6301 	int cpu;
6302 
6303 	for_each_tracing_cpu(cpu)
6304 		per_cpu_ptr(buf->data, cpu)->entries = val;
6305 }
6306 
6307 #ifdef CONFIG_TRACER_MAX_TRACE
6308 /* resize @tr's buffer to the size of @size_tr's entries */
6309 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6310 					struct array_buffer *size_buf, int cpu_id)
6311 {
6312 	int cpu, ret = 0;
6313 
6314 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6315 		for_each_tracing_cpu(cpu) {
6316 			ret = ring_buffer_resize(trace_buf->buffer,
6317 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6318 			if (ret < 0)
6319 				break;
6320 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6321 				per_cpu_ptr(size_buf->data, cpu)->entries;
6322 		}
6323 	} else {
6324 		ret = ring_buffer_resize(trace_buf->buffer,
6325 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6326 		if (ret == 0)
6327 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6328 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6329 	}
6330 
6331 	return ret;
6332 }
6333 #endif /* CONFIG_TRACER_MAX_TRACE */
6334 
6335 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6336 					unsigned long size, int cpu)
6337 {
6338 	int ret;
6339 
6340 	/*
6341 	 * If kernel or user changes the size of the ring buffer
6342 	 * we use the size that was given, and we can forget about
6343 	 * expanding it later.
6344 	 */
6345 	ring_buffer_expanded = true;
6346 
6347 	/* May be called before buffers are initialized */
6348 	if (!tr->array_buffer.buffer)
6349 		return 0;
6350 
6351 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6352 	if (ret < 0)
6353 		return ret;
6354 
6355 #ifdef CONFIG_TRACER_MAX_TRACE
6356 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6357 	    !tr->current_trace->use_max_tr)
6358 		goto out;
6359 
6360 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6361 	if (ret < 0) {
6362 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6363 						     &tr->array_buffer, cpu);
6364 		if (r < 0) {
6365 			/*
6366 			 * AARGH! We are left with different
6367 			 * size max buffer!!!!
6368 			 * The max buffer is our "snapshot" buffer.
6369 			 * When a tracer needs a snapshot (one of the
6370 			 * latency tracers), it swaps the max buffer
6371 			 * with the saved snap shot. We succeeded to
6372 			 * update the size of the main buffer, but failed to
6373 			 * update the size of the max buffer. But when we tried
6374 			 * to reset the main buffer to the original size, we
6375 			 * failed there too. This is very unlikely to
6376 			 * happen, but if it does, warn and kill all
6377 			 * tracing.
6378 			 */
6379 			WARN_ON(1);
6380 			tracing_disabled = 1;
6381 		}
6382 		return ret;
6383 	}
6384 
6385 	if (cpu == RING_BUFFER_ALL_CPUS)
6386 		set_buffer_entries(&tr->max_buffer, size);
6387 	else
6388 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6389 
6390  out:
6391 #endif /* CONFIG_TRACER_MAX_TRACE */
6392 
6393 	if (cpu == RING_BUFFER_ALL_CPUS)
6394 		set_buffer_entries(&tr->array_buffer, size);
6395 	else
6396 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6397 
6398 	return ret;
6399 }
6400 
6401 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6402 				  unsigned long size, int cpu_id)
6403 {
6404 	int ret;
6405 
6406 	mutex_lock(&trace_types_lock);
6407 
6408 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6409 		/* make sure, this cpu is enabled in the mask */
6410 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6411 			ret = -EINVAL;
6412 			goto out;
6413 		}
6414 	}
6415 
6416 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6417 	if (ret < 0)
6418 		ret = -ENOMEM;
6419 
6420 out:
6421 	mutex_unlock(&trace_types_lock);
6422 
6423 	return ret;
6424 }
6425 
6426 
6427 /**
6428  * tracing_update_buffers - used by tracing facility to expand ring buffers
6429  *
6430  * To save on memory when the tracing is never used on a system with it
6431  * configured in. The ring buffers are set to a minimum size. But once
6432  * a user starts to use the tracing facility, then they need to grow
6433  * to their default size.
6434  *
6435  * This function is to be called when a tracer is about to be used.
6436  */
6437 int tracing_update_buffers(void)
6438 {
6439 	int ret = 0;
6440 
6441 	mutex_lock(&trace_types_lock);
6442 	if (!ring_buffer_expanded)
6443 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6444 						RING_BUFFER_ALL_CPUS);
6445 	mutex_unlock(&trace_types_lock);
6446 
6447 	return ret;
6448 }
6449 
6450 struct trace_option_dentry;
6451 
6452 static void
6453 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6454 
6455 /*
6456  * Used to clear out the tracer before deletion of an instance.
6457  * Must have trace_types_lock held.
6458  */
6459 static void tracing_set_nop(struct trace_array *tr)
6460 {
6461 	if (tr->current_trace == &nop_trace)
6462 		return;
6463 
6464 	tr->current_trace->enabled--;
6465 
6466 	if (tr->current_trace->reset)
6467 		tr->current_trace->reset(tr);
6468 
6469 	tr->current_trace = &nop_trace;
6470 }
6471 
6472 static bool tracer_options_updated;
6473 
6474 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6475 {
6476 	/* Only enable if the directory has been created already. */
6477 	if (!tr->dir)
6478 		return;
6479 
6480 	/* Only create trace option files after update_tracer_options finish */
6481 	if (!tracer_options_updated)
6482 		return;
6483 
6484 	create_trace_option_files(tr, t);
6485 }
6486 
6487 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6488 {
6489 	struct tracer *t;
6490 #ifdef CONFIG_TRACER_MAX_TRACE
6491 	bool had_max_tr;
6492 #endif
6493 	int ret = 0;
6494 
6495 	mutex_lock(&trace_types_lock);
6496 
6497 	if (!ring_buffer_expanded) {
6498 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6499 						RING_BUFFER_ALL_CPUS);
6500 		if (ret < 0)
6501 			goto out;
6502 		ret = 0;
6503 	}
6504 
6505 	for (t = trace_types; t; t = t->next) {
6506 		if (strcmp(t->name, buf) == 0)
6507 			break;
6508 	}
6509 	if (!t) {
6510 		ret = -EINVAL;
6511 		goto out;
6512 	}
6513 	if (t == tr->current_trace)
6514 		goto out;
6515 
6516 #ifdef CONFIG_TRACER_SNAPSHOT
6517 	if (t->use_max_tr) {
6518 		local_irq_disable();
6519 		arch_spin_lock(&tr->max_lock);
6520 		if (tr->cond_snapshot)
6521 			ret = -EBUSY;
6522 		arch_spin_unlock(&tr->max_lock);
6523 		local_irq_enable();
6524 		if (ret)
6525 			goto out;
6526 	}
6527 #endif
6528 	/* Some tracers won't work on kernel command line */
6529 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6530 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6531 			t->name);
6532 		goto out;
6533 	}
6534 
6535 	/* Some tracers are only allowed for the top level buffer */
6536 	if (!trace_ok_for_array(t, tr)) {
6537 		ret = -EINVAL;
6538 		goto out;
6539 	}
6540 
6541 	/* If trace pipe files are being read, we can't change the tracer */
6542 	if (tr->trace_ref) {
6543 		ret = -EBUSY;
6544 		goto out;
6545 	}
6546 
6547 	trace_branch_disable();
6548 
6549 	tr->current_trace->enabled--;
6550 
6551 	if (tr->current_trace->reset)
6552 		tr->current_trace->reset(tr);
6553 
6554 #ifdef CONFIG_TRACER_MAX_TRACE
6555 	had_max_tr = tr->current_trace->use_max_tr;
6556 
6557 	/* Current trace needs to be nop_trace before synchronize_rcu */
6558 	tr->current_trace = &nop_trace;
6559 
6560 	if (had_max_tr && !t->use_max_tr) {
6561 		/*
6562 		 * We need to make sure that the update_max_tr sees that
6563 		 * current_trace changed to nop_trace to keep it from
6564 		 * swapping the buffers after we resize it.
6565 		 * The update_max_tr is called from interrupts disabled
6566 		 * so a synchronized_sched() is sufficient.
6567 		 */
6568 		synchronize_rcu();
6569 		free_snapshot(tr);
6570 	}
6571 
6572 	if (t->use_max_tr && !tr->allocated_snapshot) {
6573 		ret = tracing_alloc_snapshot_instance(tr);
6574 		if (ret < 0)
6575 			goto out;
6576 	}
6577 #else
6578 	tr->current_trace = &nop_trace;
6579 #endif
6580 
6581 	if (t->init) {
6582 		ret = tracer_init(t, tr);
6583 		if (ret)
6584 			goto out;
6585 	}
6586 
6587 	tr->current_trace = t;
6588 	tr->current_trace->enabled++;
6589 	trace_branch_enable(tr);
6590  out:
6591 	mutex_unlock(&trace_types_lock);
6592 
6593 	return ret;
6594 }
6595 
6596 static ssize_t
6597 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6598 			size_t cnt, loff_t *ppos)
6599 {
6600 	struct trace_array *tr = filp->private_data;
6601 	char buf[MAX_TRACER_SIZE+1];
6602 	char *name;
6603 	size_t ret;
6604 	int err;
6605 
6606 	ret = cnt;
6607 
6608 	if (cnt > MAX_TRACER_SIZE)
6609 		cnt = MAX_TRACER_SIZE;
6610 
6611 	if (copy_from_user(buf, ubuf, cnt))
6612 		return -EFAULT;
6613 
6614 	buf[cnt] = 0;
6615 
6616 	name = strim(buf);
6617 
6618 	err = tracing_set_tracer(tr, name);
6619 	if (err)
6620 		return err;
6621 
6622 	*ppos += ret;
6623 
6624 	return ret;
6625 }
6626 
6627 static ssize_t
6628 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6629 		   size_t cnt, loff_t *ppos)
6630 {
6631 	char buf[64];
6632 	int r;
6633 
6634 	r = snprintf(buf, sizeof(buf), "%ld\n",
6635 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6636 	if (r > sizeof(buf))
6637 		r = sizeof(buf);
6638 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6639 }
6640 
6641 static ssize_t
6642 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6643 		    size_t cnt, loff_t *ppos)
6644 {
6645 	unsigned long val;
6646 	int ret;
6647 
6648 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6649 	if (ret)
6650 		return ret;
6651 
6652 	*ptr = val * 1000;
6653 
6654 	return cnt;
6655 }
6656 
6657 static ssize_t
6658 tracing_thresh_read(struct file *filp, char __user *ubuf,
6659 		    size_t cnt, loff_t *ppos)
6660 {
6661 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6662 }
6663 
6664 static ssize_t
6665 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6666 		     size_t cnt, loff_t *ppos)
6667 {
6668 	struct trace_array *tr = filp->private_data;
6669 	int ret;
6670 
6671 	mutex_lock(&trace_types_lock);
6672 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6673 	if (ret < 0)
6674 		goto out;
6675 
6676 	if (tr->current_trace->update_thresh) {
6677 		ret = tr->current_trace->update_thresh(tr);
6678 		if (ret < 0)
6679 			goto out;
6680 	}
6681 
6682 	ret = cnt;
6683 out:
6684 	mutex_unlock(&trace_types_lock);
6685 
6686 	return ret;
6687 }
6688 
6689 #ifdef CONFIG_TRACER_MAX_TRACE
6690 
6691 static ssize_t
6692 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6693 		     size_t cnt, loff_t *ppos)
6694 {
6695 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6696 }
6697 
6698 static ssize_t
6699 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6700 		      size_t cnt, loff_t *ppos)
6701 {
6702 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6703 }
6704 
6705 #endif
6706 
6707 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6708 {
6709 	struct trace_array *tr = inode->i_private;
6710 	struct trace_iterator *iter;
6711 	int ret;
6712 
6713 	ret = tracing_check_open_get_tr(tr);
6714 	if (ret)
6715 		return ret;
6716 
6717 	mutex_lock(&trace_types_lock);
6718 
6719 	/* create a buffer to store the information to pass to userspace */
6720 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6721 	if (!iter) {
6722 		ret = -ENOMEM;
6723 		__trace_array_put(tr);
6724 		goto out;
6725 	}
6726 
6727 	trace_seq_init(&iter->seq);
6728 	iter->trace = tr->current_trace;
6729 
6730 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6731 		ret = -ENOMEM;
6732 		goto fail;
6733 	}
6734 
6735 	/* trace pipe does not show start of buffer */
6736 	cpumask_setall(iter->started);
6737 
6738 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6739 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6740 
6741 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6742 	if (trace_clocks[tr->clock_id].in_ns)
6743 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6744 
6745 	iter->tr = tr;
6746 	iter->array_buffer = &tr->array_buffer;
6747 	iter->cpu_file = tracing_get_cpu(inode);
6748 	mutex_init(&iter->mutex);
6749 	filp->private_data = iter;
6750 
6751 	if (iter->trace->pipe_open)
6752 		iter->trace->pipe_open(iter);
6753 
6754 	nonseekable_open(inode, filp);
6755 
6756 	tr->trace_ref++;
6757 out:
6758 	mutex_unlock(&trace_types_lock);
6759 	return ret;
6760 
6761 fail:
6762 	kfree(iter);
6763 	__trace_array_put(tr);
6764 	mutex_unlock(&trace_types_lock);
6765 	return ret;
6766 }
6767 
6768 static int tracing_release_pipe(struct inode *inode, struct file *file)
6769 {
6770 	struct trace_iterator *iter = file->private_data;
6771 	struct trace_array *tr = inode->i_private;
6772 
6773 	mutex_lock(&trace_types_lock);
6774 
6775 	tr->trace_ref--;
6776 
6777 	if (iter->trace->pipe_close)
6778 		iter->trace->pipe_close(iter);
6779 
6780 	mutex_unlock(&trace_types_lock);
6781 
6782 	free_cpumask_var(iter->started);
6783 	kfree(iter->fmt);
6784 	kfree(iter->temp);
6785 	mutex_destroy(&iter->mutex);
6786 	kfree(iter);
6787 
6788 	trace_array_put(tr);
6789 
6790 	return 0;
6791 }
6792 
6793 static __poll_t
6794 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6795 {
6796 	struct trace_array *tr = iter->tr;
6797 
6798 	/* Iterators are static, they should be filled or empty */
6799 	if (trace_buffer_iter(iter, iter->cpu_file))
6800 		return EPOLLIN | EPOLLRDNORM;
6801 
6802 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6803 		/*
6804 		 * Always select as readable when in blocking mode
6805 		 */
6806 		return EPOLLIN | EPOLLRDNORM;
6807 	else
6808 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6809 					     filp, poll_table, iter->tr->buffer_percent);
6810 }
6811 
6812 static __poll_t
6813 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6814 {
6815 	struct trace_iterator *iter = filp->private_data;
6816 
6817 	return trace_poll(iter, filp, poll_table);
6818 }
6819 
6820 /* Must be called with iter->mutex held. */
6821 static int tracing_wait_pipe(struct file *filp)
6822 {
6823 	struct trace_iterator *iter = filp->private_data;
6824 	int ret;
6825 
6826 	while (trace_empty(iter)) {
6827 
6828 		if ((filp->f_flags & O_NONBLOCK)) {
6829 			return -EAGAIN;
6830 		}
6831 
6832 		/*
6833 		 * We block until we read something and tracing is disabled.
6834 		 * We still block if tracing is disabled, but we have never
6835 		 * read anything. This allows a user to cat this file, and
6836 		 * then enable tracing. But after we have read something,
6837 		 * we give an EOF when tracing is again disabled.
6838 		 *
6839 		 * iter->pos will be 0 if we haven't read anything.
6840 		 */
6841 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6842 			break;
6843 
6844 		mutex_unlock(&iter->mutex);
6845 
6846 		ret = wait_on_pipe(iter, 0);
6847 
6848 		mutex_lock(&iter->mutex);
6849 
6850 		if (ret)
6851 			return ret;
6852 	}
6853 
6854 	return 1;
6855 }
6856 
6857 /*
6858  * Consumer reader.
6859  */
6860 static ssize_t
6861 tracing_read_pipe(struct file *filp, char __user *ubuf,
6862 		  size_t cnt, loff_t *ppos)
6863 {
6864 	struct trace_iterator *iter = filp->private_data;
6865 	ssize_t sret;
6866 
6867 	/*
6868 	 * Avoid more than one consumer on a single file descriptor
6869 	 * This is just a matter of traces coherency, the ring buffer itself
6870 	 * is protected.
6871 	 */
6872 	mutex_lock(&iter->mutex);
6873 
6874 	/* return any leftover data */
6875 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6876 	if (sret != -EBUSY)
6877 		goto out;
6878 
6879 	trace_seq_init(&iter->seq);
6880 
6881 	if (iter->trace->read) {
6882 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6883 		if (sret)
6884 			goto out;
6885 	}
6886 
6887 waitagain:
6888 	sret = tracing_wait_pipe(filp);
6889 	if (sret <= 0)
6890 		goto out;
6891 
6892 	/* stop when tracing is finished */
6893 	if (trace_empty(iter)) {
6894 		sret = 0;
6895 		goto out;
6896 	}
6897 
6898 	if (cnt >= PAGE_SIZE)
6899 		cnt = PAGE_SIZE - 1;
6900 
6901 	/* reset all but tr, trace, and overruns */
6902 	trace_iterator_reset(iter);
6903 	cpumask_clear(iter->started);
6904 	trace_seq_init(&iter->seq);
6905 
6906 	trace_event_read_lock();
6907 	trace_access_lock(iter->cpu_file);
6908 	while (trace_find_next_entry_inc(iter) != NULL) {
6909 		enum print_line_t ret;
6910 		int save_len = iter->seq.seq.len;
6911 
6912 		ret = print_trace_line(iter);
6913 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6914 			/*
6915 			 * If one print_trace_line() fills entire trace_seq in one shot,
6916 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6917 			 * In this case, we need to consume it, otherwise, loop will peek
6918 			 * this event next time, resulting in an infinite loop.
6919 			 */
6920 			if (save_len == 0) {
6921 				iter->seq.full = 0;
6922 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6923 				trace_consume(iter);
6924 				break;
6925 			}
6926 
6927 			/* In other cases, don't print partial lines */
6928 			iter->seq.seq.len = save_len;
6929 			break;
6930 		}
6931 		if (ret != TRACE_TYPE_NO_CONSUME)
6932 			trace_consume(iter);
6933 
6934 		if (trace_seq_used(&iter->seq) >= cnt)
6935 			break;
6936 
6937 		/*
6938 		 * Setting the full flag means we reached the trace_seq buffer
6939 		 * size and we should leave by partial output condition above.
6940 		 * One of the trace_seq_* functions is not used properly.
6941 		 */
6942 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6943 			  iter->ent->type);
6944 	}
6945 	trace_access_unlock(iter->cpu_file);
6946 	trace_event_read_unlock();
6947 
6948 	/* Now copy what we have to the user */
6949 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6950 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6951 		trace_seq_init(&iter->seq);
6952 
6953 	/*
6954 	 * If there was nothing to send to user, in spite of consuming trace
6955 	 * entries, go back to wait for more entries.
6956 	 */
6957 	if (sret == -EBUSY)
6958 		goto waitagain;
6959 
6960 out:
6961 	mutex_unlock(&iter->mutex);
6962 
6963 	return sret;
6964 }
6965 
6966 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6967 				     unsigned int idx)
6968 {
6969 	__free_page(spd->pages[idx]);
6970 }
6971 
6972 static size_t
6973 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6974 {
6975 	size_t count;
6976 	int save_len;
6977 	int ret;
6978 
6979 	/* Seq buffer is page-sized, exactly what we need. */
6980 	for (;;) {
6981 		save_len = iter->seq.seq.len;
6982 		ret = print_trace_line(iter);
6983 
6984 		if (trace_seq_has_overflowed(&iter->seq)) {
6985 			iter->seq.seq.len = save_len;
6986 			break;
6987 		}
6988 
6989 		/*
6990 		 * This should not be hit, because it should only
6991 		 * be set if the iter->seq overflowed. But check it
6992 		 * anyway to be safe.
6993 		 */
6994 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6995 			iter->seq.seq.len = save_len;
6996 			break;
6997 		}
6998 
6999 		count = trace_seq_used(&iter->seq) - save_len;
7000 		if (rem < count) {
7001 			rem = 0;
7002 			iter->seq.seq.len = save_len;
7003 			break;
7004 		}
7005 
7006 		if (ret != TRACE_TYPE_NO_CONSUME)
7007 			trace_consume(iter);
7008 		rem -= count;
7009 		if (!trace_find_next_entry_inc(iter))	{
7010 			rem = 0;
7011 			iter->ent = NULL;
7012 			break;
7013 		}
7014 	}
7015 
7016 	return rem;
7017 }
7018 
7019 static ssize_t tracing_splice_read_pipe(struct file *filp,
7020 					loff_t *ppos,
7021 					struct pipe_inode_info *pipe,
7022 					size_t len,
7023 					unsigned int flags)
7024 {
7025 	struct page *pages_def[PIPE_DEF_BUFFERS];
7026 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7027 	struct trace_iterator *iter = filp->private_data;
7028 	struct splice_pipe_desc spd = {
7029 		.pages		= pages_def,
7030 		.partial	= partial_def,
7031 		.nr_pages	= 0, /* This gets updated below. */
7032 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7033 		.ops		= &default_pipe_buf_ops,
7034 		.spd_release	= tracing_spd_release_pipe,
7035 	};
7036 	ssize_t ret;
7037 	size_t rem;
7038 	unsigned int i;
7039 
7040 	if (splice_grow_spd(pipe, &spd))
7041 		return -ENOMEM;
7042 
7043 	mutex_lock(&iter->mutex);
7044 
7045 	if (iter->trace->splice_read) {
7046 		ret = iter->trace->splice_read(iter, filp,
7047 					       ppos, pipe, len, flags);
7048 		if (ret)
7049 			goto out_err;
7050 	}
7051 
7052 	ret = tracing_wait_pipe(filp);
7053 	if (ret <= 0)
7054 		goto out_err;
7055 
7056 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7057 		ret = -EFAULT;
7058 		goto out_err;
7059 	}
7060 
7061 	trace_event_read_lock();
7062 	trace_access_lock(iter->cpu_file);
7063 
7064 	/* Fill as many pages as possible. */
7065 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7066 		spd.pages[i] = alloc_page(GFP_KERNEL);
7067 		if (!spd.pages[i])
7068 			break;
7069 
7070 		rem = tracing_fill_pipe_page(rem, iter);
7071 
7072 		/* Copy the data into the page, so we can start over. */
7073 		ret = trace_seq_to_buffer(&iter->seq,
7074 					  page_address(spd.pages[i]),
7075 					  trace_seq_used(&iter->seq));
7076 		if (ret < 0) {
7077 			__free_page(spd.pages[i]);
7078 			break;
7079 		}
7080 		spd.partial[i].offset = 0;
7081 		spd.partial[i].len = trace_seq_used(&iter->seq);
7082 
7083 		trace_seq_init(&iter->seq);
7084 	}
7085 
7086 	trace_access_unlock(iter->cpu_file);
7087 	trace_event_read_unlock();
7088 	mutex_unlock(&iter->mutex);
7089 
7090 	spd.nr_pages = i;
7091 
7092 	if (i)
7093 		ret = splice_to_pipe(pipe, &spd);
7094 	else
7095 		ret = 0;
7096 out:
7097 	splice_shrink_spd(&spd);
7098 	return ret;
7099 
7100 out_err:
7101 	mutex_unlock(&iter->mutex);
7102 	goto out;
7103 }
7104 
7105 static ssize_t
7106 tracing_entries_read(struct file *filp, char __user *ubuf,
7107 		     size_t cnt, loff_t *ppos)
7108 {
7109 	struct inode *inode = file_inode(filp);
7110 	struct trace_array *tr = inode->i_private;
7111 	int cpu = tracing_get_cpu(inode);
7112 	char buf[64];
7113 	int r = 0;
7114 	ssize_t ret;
7115 
7116 	mutex_lock(&trace_types_lock);
7117 
7118 	if (cpu == RING_BUFFER_ALL_CPUS) {
7119 		int cpu, buf_size_same;
7120 		unsigned long size;
7121 
7122 		size = 0;
7123 		buf_size_same = 1;
7124 		/* check if all cpu sizes are same */
7125 		for_each_tracing_cpu(cpu) {
7126 			/* fill in the size from first enabled cpu */
7127 			if (size == 0)
7128 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7129 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7130 				buf_size_same = 0;
7131 				break;
7132 			}
7133 		}
7134 
7135 		if (buf_size_same) {
7136 			if (!ring_buffer_expanded)
7137 				r = sprintf(buf, "%lu (expanded: %lu)\n",
7138 					    size >> 10,
7139 					    trace_buf_size >> 10);
7140 			else
7141 				r = sprintf(buf, "%lu\n", size >> 10);
7142 		} else
7143 			r = sprintf(buf, "X\n");
7144 	} else
7145 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7146 
7147 	mutex_unlock(&trace_types_lock);
7148 
7149 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7150 	return ret;
7151 }
7152 
7153 static ssize_t
7154 tracing_entries_write(struct file *filp, const char __user *ubuf,
7155 		      size_t cnt, loff_t *ppos)
7156 {
7157 	struct inode *inode = file_inode(filp);
7158 	struct trace_array *tr = inode->i_private;
7159 	unsigned long val;
7160 	int ret;
7161 
7162 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7163 	if (ret)
7164 		return ret;
7165 
7166 	/* must have at least 1 entry */
7167 	if (!val)
7168 		return -EINVAL;
7169 
7170 	/* value is in KB */
7171 	val <<= 10;
7172 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7173 	if (ret < 0)
7174 		return ret;
7175 
7176 	*ppos += cnt;
7177 
7178 	return cnt;
7179 }
7180 
7181 static ssize_t
7182 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7183 				size_t cnt, loff_t *ppos)
7184 {
7185 	struct trace_array *tr = filp->private_data;
7186 	char buf[64];
7187 	int r, cpu;
7188 	unsigned long size = 0, expanded_size = 0;
7189 
7190 	mutex_lock(&trace_types_lock);
7191 	for_each_tracing_cpu(cpu) {
7192 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7193 		if (!ring_buffer_expanded)
7194 			expanded_size += trace_buf_size >> 10;
7195 	}
7196 	if (ring_buffer_expanded)
7197 		r = sprintf(buf, "%lu\n", size);
7198 	else
7199 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7200 	mutex_unlock(&trace_types_lock);
7201 
7202 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7203 }
7204 
7205 static ssize_t
7206 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7207 			  size_t cnt, loff_t *ppos)
7208 {
7209 	/*
7210 	 * There is no need to read what the user has written, this function
7211 	 * is just to make sure that there is no error when "echo" is used
7212 	 */
7213 
7214 	*ppos += cnt;
7215 
7216 	return cnt;
7217 }
7218 
7219 static int
7220 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7221 {
7222 	struct trace_array *tr = inode->i_private;
7223 
7224 	/* disable tracing ? */
7225 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7226 		tracer_tracing_off(tr);
7227 	/* resize the ring buffer to 0 */
7228 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7229 
7230 	trace_array_put(tr);
7231 
7232 	return 0;
7233 }
7234 
7235 static ssize_t
7236 tracing_mark_write(struct file *filp, const char __user *ubuf,
7237 					size_t cnt, loff_t *fpos)
7238 {
7239 	struct trace_array *tr = filp->private_data;
7240 	struct ring_buffer_event *event;
7241 	enum event_trigger_type tt = ETT_NONE;
7242 	struct trace_buffer *buffer;
7243 	struct print_entry *entry;
7244 	ssize_t written;
7245 	int size;
7246 	int len;
7247 
7248 /* Used in tracing_mark_raw_write() as well */
7249 #define FAULTED_STR "<faulted>"
7250 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7251 
7252 	if (tracing_disabled)
7253 		return -EINVAL;
7254 
7255 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7256 		return -EINVAL;
7257 
7258 	if (cnt > TRACE_BUF_SIZE)
7259 		cnt = TRACE_BUF_SIZE;
7260 
7261 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7262 
7263 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7264 
7265 	/* If less than "<faulted>", then make sure we can still add that */
7266 	if (cnt < FAULTED_SIZE)
7267 		size += FAULTED_SIZE - cnt;
7268 
7269 	buffer = tr->array_buffer.buffer;
7270 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7271 					    tracing_gen_ctx());
7272 	if (unlikely(!event))
7273 		/* Ring buffer disabled, return as if not open for write */
7274 		return -EBADF;
7275 
7276 	entry = ring_buffer_event_data(event);
7277 	entry->ip = _THIS_IP_;
7278 
7279 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7280 	if (len) {
7281 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7282 		cnt = FAULTED_SIZE;
7283 		written = -EFAULT;
7284 	} else
7285 		written = cnt;
7286 
7287 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7288 		/* do not add \n before testing triggers, but add \0 */
7289 		entry->buf[cnt] = '\0';
7290 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7291 	}
7292 
7293 	if (entry->buf[cnt - 1] != '\n') {
7294 		entry->buf[cnt] = '\n';
7295 		entry->buf[cnt + 1] = '\0';
7296 	} else
7297 		entry->buf[cnt] = '\0';
7298 
7299 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7300 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7301 	__buffer_unlock_commit(buffer, event);
7302 
7303 	if (tt)
7304 		event_triggers_post_call(tr->trace_marker_file, tt);
7305 
7306 	return written;
7307 }
7308 
7309 /* Limit it for now to 3K (including tag) */
7310 #define RAW_DATA_MAX_SIZE (1024*3)
7311 
7312 static ssize_t
7313 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7314 					size_t cnt, loff_t *fpos)
7315 {
7316 	struct trace_array *tr = filp->private_data;
7317 	struct ring_buffer_event *event;
7318 	struct trace_buffer *buffer;
7319 	struct raw_data_entry *entry;
7320 	ssize_t written;
7321 	int size;
7322 	int len;
7323 
7324 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7325 
7326 	if (tracing_disabled)
7327 		return -EINVAL;
7328 
7329 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7330 		return -EINVAL;
7331 
7332 	/* The marker must at least have a tag id */
7333 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7334 		return -EINVAL;
7335 
7336 	if (cnt > TRACE_BUF_SIZE)
7337 		cnt = TRACE_BUF_SIZE;
7338 
7339 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7340 
7341 	size = sizeof(*entry) + cnt;
7342 	if (cnt < FAULT_SIZE_ID)
7343 		size += FAULT_SIZE_ID - cnt;
7344 
7345 	buffer = tr->array_buffer.buffer;
7346 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7347 					    tracing_gen_ctx());
7348 	if (!event)
7349 		/* Ring buffer disabled, return as if not open for write */
7350 		return -EBADF;
7351 
7352 	entry = ring_buffer_event_data(event);
7353 
7354 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7355 	if (len) {
7356 		entry->id = -1;
7357 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7358 		written = -EFAULT;
7359 	} else
7360 		written = cnt;
7361 
7362 	__buffer_unlock_commit(buffer, event);
7363 
7364 	return written;
7365 }
7366 
7367 static int tracing_clock_show(struct seq_file *m, void *v)
7368 {
7369 	struct trace_array *tr = m->private;
7370 	int i;
7371 
7372 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7373 		seq_printf(m,
7374 			"%s%s%s%s", i ? " " : "",
7375 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7376 			i == tr->clock_id ? "]" : "");
7377 	seq_putc(m, '\n');
7378 
7379 	return 0;
7380 }
7381 
7382 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7383 {
7384 	int i;
7385 
7386 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7387 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7388 			break;
7389 	}
7390 	if (i == ARRAY_SIZE(trace_clocks))
7391 		return -EINVAL;
7392 
7393 	mutex_lock(&trace_types_lock);
7394 
7395 	tr->clock_id = i;
7396 
7397 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7398 
7399 	/*
7400 	 * New clock may not be consistent with the previous clock.
7401 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7402 	 */
7403 	tracing_reset_online_cpus(&tr->array_buffer);
7404 
7405 #ifdef CONFIG_TRACER_MAX_TRACE
7406 	if (tr->max_buffer.buffer)
7407 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7408 	tracing_reset_online_cpus(&tr->max_buffer);
7409 #endif
7410 
7411 	mutex_unlock(&trace_types_lock);
7412 
7413 	return 0;
7414 }
7415 
7416 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7417 				   size_t cnt, loff_t *fpos)
7418 {
7419 	struct seq_file *m = filp->private_data;
7420 	struct trace_array *tr = m->private;
7421 	char buf[64];
7422 	const char *clockstr;
7423 	int ret;
7424 
7425 	if (cnt >= sizeof(buf))
7426 		return -EINVAL;
7427 
7428 	if (copy_from_user(buf, ubuf, cnt))
7429 		return -EFAULT;
7430 
7431 	buf[cnt] = 0;
7432 
7433 	clockstr = strstrip(buf);
7434 
7435 	ret = tracing_set_clock(tr, clockstr);
7436 	if (ret)
7437 		return ret;
7438 
7439 	*fpos += cnt;
7440 
7441 	return cnt;
7442 }
7443 
7444 static int tracing_clock_open(struct inode *inode, struct file *file)
7445 {
7446 	struct trace_array *tr = inode->i_private;
7447 	int ret;
7448 
7449 	ret = tracing_check_open_get_tr(tr);
7450 	if (ret)
7451 		return ret;
7452 
7453 	ret = single_open(file, tracing_clock_show, inode->i_private);
7454 	if (ret < 0)
7455 		trace_array_put(tr);
7456 
7457 	return ret;
7458 }
7459 
7460 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7461 {
7462 	struct trace_array *tr = m->private;
7463 
7464 	mutex_lock(&trace_types_lock);
7465 
7466 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7467 		seq_puts(m, "delta [absolute]\n");
7468 	else
7469 		seq_puts(m, "[delta] absolute\n");
7470 
7471 	mutex_unlock(&trace_types_lock);
7472 
7473 	return 0;
7474 }
7475 
7476 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7477 {
7478 	struct trace_array *tr = inode->i_private;
7479 	int ret;
7480 
7481 	ret = tracing_check_open_get_tr(tr);
7482 	if (ret)
7483 		return ret;
7484 
7485 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7486 	if (ret < 0)
7487 		trace_array_put(tr);
7488 
7489 	return ret;
7490 }
7491 
7492 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7493 {
7494 	if (rbe == this_cpu_read(trace_buffered_event))
7495 		return ring_buffer_time_stamp(buffer);
7496 
7497 	return ring_buffer_event_time_stamp(buffer, rbe);
7498 }
7499 
7500 /*
7501  * Set or disable using the per CPU trace_buffer_event when possible.
7502  */
7503 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7504 {
7505 	int ret = 0;
7506 
7507 	mutex_lock(&trace_types_lock);
7508 
7509 	if (set && tr->no_filter_buffering_ref++)
7510 		goto out;
7511 
7512 	if (!set) {
7513 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7514 			ret = -EINVAL;
7515 			goto out;
7516 		}
7517 
7518 		--tr->no_filter_buffering_ref;
7519 	}
7520  out:
7521 	mutex_unlock(&trace_types_lock);
7522 
7523 	return ret;
7524 }
7525 
7526 struct ftrace_buffer_info {
7527 	struct trace_iterator	iter;
7528 	void			*spare;
7529 	unsigned int		spare_cpu;
7530 	unsigned int		read;
7531 };
7532 
7533 #ifdef CONFIG_TRACER_SNAPSHOT
7534 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7535 {
7536 	struct trace_array *tr = inode->i_private;
7537 	struct trace_iterator *iter;
7538 	struct seq_file *m;
7539 	int ret;
7540 
7541 	ret = tracing_check_open_get_tr(tr);
7542 	if (ret)
7543 		return ret;
7544 
7545 	if (file->f_mode & FMODE_READ) {
7546 		iter = __tracing_open(inode, file, true);
7547 		if (IS_ERR(iter))
7548 			ret = PTR_ERR(iter);
7549 	} else {
7550 		/* Writes still need the seq_file to hold the private data */
7551 		ret = -ENOMEM;
7552 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7553 		if (!m)
7554 			goto out;
7555 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7556 		if (!iter) {
7557 			kfree(m);
7558 			goto out;
7559 		}
7560 		ret = 0;
7561 
7562 		iter->tr = tr;
7563 		iter->array_buffer = &tr->max_buffer;
7564 		iter->cpu_file = tracing_get_cpu(inode);
7565 		m->private = iter;
7566 		file->private_data = m;
7567 	}
7568 out:
7569 	if (ret < 0)
7570 		trace_array_put(tr);
7571 
7572 	return ret;
7573 }
7574 
7575 static ssize_t
7576 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7577 		       loff_t *ppos)
7578 {
7579 	struct seq_file *m = filp->private_data;
7580 	struct trace_iterator *iter = m->private;
7581 	struct trace_array *tr = iter->tr;
7582 	unsigned long val;
7583 	int ret;
7584 
7585 	ret = tracing_update_buffers();
7586 	if (ret < 0)
7587 		return ret;
7588 
7589 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7590 	if (ret)
7591 		return ret;
7592 
7593 	mutex_lock(&trace_types_lock);
7594 
7595 	if (tr->current_trace->use_max_tr) {
7596 		ret = -EBUSY;
7597 		goto out;
7598 	}
7599 
7600 	local_irq_disable();
7601 	arch_spin_lock(&tr->max_lock);
7602 	if (tr->cond_snapshot)
7603 		ret = -EBUSY;
7604 	arch_spin_unlock(&tr->max_lock);
7605 	local_irq_enable();
7606 	if (ret)
7607 		goto out;
7608 
7609 	switch (val) {
7610 	case 0:
7611 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7612 			ret = -EINVAL;
7613 			break;
7614 		}
7615 		if (tr->allocated_snapshot)
7616 			free_snapshot(tr);
7617 		break;
7618 	case 1:
7619 /* Only allow per-cpu swap if the ring buffer supports it */
7620 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7621 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7622 			ret = -EINVAL;
7623 			break;
7624 		}
7625 #endif
7626 		if (tr->allocated_snapshot)
7627 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7628 					&tr->array_buffer, iter->cpu_file);
7629 		else
7630 			ret = tracing_alloc_snapshot_instance(tr);
7631 		if (ret < 0)
7632 			break;
7633 		local_irq_disable();
7634 		/* Now, we're going to swap */
7635 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7636 			update_max_tr(tr, current, smp_processor_id(), NULL);
7637 		else
7638 			update_max_tr_single(tr, current, iter->cpu_file);
7639 		local_irq_enable();
7640 		break;
7641 	default:
7642 		if (tr->allocated_snapshot) {
7643 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7644 				tracing_reset_online_cpus(&tr->max_buffer);
7645 			else
7646 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7647 		}
7648 		break;
7649 	}
7650 
7651 	if (ret >= 0) {
7652 		*ppos += cnt;
7653 		ret = cnt;
7654 	}
7655 out:
7656 	mutex_unlock(&trace_types_lock);
7657 	return ret;
7658 }
7659 
7660 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7661 {
7662 	struct seq_file *m = file->private_data;
7663 	int ret;
7664 
7665 	ret = tracing_release(inode, file);
7666 
7667 	if (file->f_mode & FMODE_READ)
7668 		return ret;
7669 
7670 	/* If write only, the seq_file is just a stub */
7671 	if (m)
7672 		kfree(m->private);
7673 	kfree(m);
7674 
7675 	return 0;
7676 }
7677 
7678 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7679 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7680 				    size_t count, loff_t *ppos);
7681 static int tracing_buffers_release(struct inode *inode, struct file *file);
7682 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7683 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7684 
7685 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7686 {
7687 	struct ftrace_buffer_info *info;
7688 	int ret;
7689 
7690 	/* The following checks for tracefs lockdown */
7691 	ret = tracing_buffers_open(inode, filp);
7692 	if (ret < 0)
7693 		return ret;
7694 
7695 	info = filp->private_data;
7696 
7697 	if (info->iter.trace->use_max_tr) {
7698 		tracing_buffers_release(inode, filp);
7699 		return -EBUSY;
7700 	}
7701 
7702 	info->iter.snapshot = true;
7703 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7704 
7705 	return ret;
7706 }
7707 
7708 #endif /* CONFIG_TRACER_SNAPSHOT */
7709 
7710 
7711 static const struct file_operations tracing_thresh_fops = {
7712 	.open		= tracing_open_generic,
7713 	.read		= tracing_thresh_read,
7714 	.write		= tracing_thresh_write,
7715 	.llseek		= generic_file_llseek,
7716 };
7717 
7718 #ifdef CONFIG_TRACER_MAX_TRACE
7719 static const struct file_operations tracing_max_lat_fops = {
7720 	.open		= tracing_open_generic,
7721 	.read		= tracing_max_lat_read,
7722 	.write		= tracing_max_lat_write,
7723 	.llseek		= generic_file_llseek,
7724 };
7725 #endif
7726 
7727 static const struct file_operations set_tracer_fops = {
7728 	.open		= tracing_open_generic,
7729 	.read		= tracing_set_trace_read,
7730 	.write		= tracing_set_trace_write,
7731 	.llseek		= generic_file_llseek,
7732 };
7733 
7734 static const struct file_operations tracing_pipe_fops = {
7735 	.open		= tracing_open_pipe,
7736 	.poll		= tracing_poll_pipe,
7737 	.read		= tracing_read_pipe,
7738 	.splice_read	= tracing_splice_read_pipe,
7739 	.release	= tracing_release_pipe,
7740 	.llseek		= no_llseek,
7741 };
7742 
7743 static const struct file_operations tracing_entries_fops = {
7744 	.open		= tracing_open_generic_tr,
7745 	.read		= tracing_entries_read,
7746 	.write		= tracing_entries_write,
7747 	.llseek		= generic_file_llseek,
7748 	.release	= tracing_release_generic_tr,
7749 };
7750 
7751 static const struct file_operations tracing_total_entries_fops = {
7752 	.open		= tracing_open_generic_tr,
7753 	.read		= tracing_total_entries_read,
7754 	.llseek		= generic_file_llseek,
7755 	.release	= tracing_release_generic_tr,
7756 };
7757 
7758 static const struct file_operations tracing_free_buffer_fops = {
7759 	.open		= tracing_open_generic_tr,
7760 	.write		= tracing_free_buffer_write,
7761 	.release	= tracing_free_buffer_release,
7762 };
7763 
7764 static const struct file_operations tracing_mark_fops = {
7765 	.open		= tracing_mark_open,
7766 	.write		= tracing_mark_write,
7767 	.release	= tracing_release_generic_tr,
7768 };
7769 
7770 static const struct file_operations tracing_mark_raw_fops = {
7771 	.open		= tracing_mark_open,
7772 	.write		= tracing_mark_raw_write,
7773 	.release	= tracing_release_generic_tr,
7774 };
7775 
7776 static const struct file_operations trace_clock_fops = {
7777 	.open		= tracing_clock_open,
7778 	.read		= seq_read,
7779 	.llseek		= seq_lseek,
7780 	.release	= tracing_single_release_tr,
7781 	.write		= tracing_clock_write,
7782 };
7783 
7784 static const struct file_operations trace_time_stamp_mode_fops = {
7785 	.open		= tracing_time_stamp_mode_open,
7786 	.read		= seq_read,
7787 	.llseek		= seq_lseek,
7788 	.release	= tracing_single_release_tr,
7789 };
7790 
7791 #ifdef CONFIG_TRACER_SNAPSHOT
7792 static const struct file_operations snapshot_fops = {
7793 	.open		= tracing_snapshot_open,
7794 	.read		= seq_read,
7795 	.write		= tracing_snapshot_write,
7796 	.llseek		= tracing_lseek,
7797 	.release	= tracing_snapshot_release,
7798 };
7799 
7800 static const struct file_operations snapshot_raw_fops = {
7801 	.open		= snapshot_raw_open,
7802 	.read		= tracing_buffers_read,
7803 	.release	= tracing_buffers_release,
7804 	.splice_read	= tracing_buffers_splice_read,
7805 	.llseek		= no_llseek,
7806 };
7807 
7808 #endif /* CONFIG_TRACER_SNAPSHOT */
7809 
7810 /*
7811  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7812  * @filp: The active open file structure
7813  * @ubuf: The userspace provided buffer to read value into
7814  * @cnt: The maximum number of bytes to read
7815  * @ppos: The current "file" position
7816  *
7817  * This function implements the write interface for a struct trace_min_max_param.
7818  * The filp->private_data must point to a trace_min_max_param structure that
7819  * defines where to write the value, the min and the max acceptable values,
7820  * and a lock to protect the write.
7821  */
7822 static ssize_t
7823 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7824 {
7825 	struct trace_min_max_param *param = filp->private_data;
7826 	u64 val;
7827 	int err;
7828 
7829 	if (!param)
7830 		return -EFAULT;
7831 
7832 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7833 	if (err)
7834 		return err;
7835 
7836 	if (param->lock)
7837 		mutex_lock(param->lock);
7838 
7839 	if (param->min && val < *param->min)
7840 		err = -EINVAL;
7841 
7842 	if (param->max && val > *param->max)
7843 		err = -EINVAL;
7844 
7845 	if (!err)
7846 		*param->val = val;
7847 
7848 	if (param->lock)
7849 		mutex_unlock(param->lock);
7850 
7851 	if (err)
7852 		return err;
7853 
7854 	return cnt;
7855 }
7856 
7857 /*
7858  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7859  * @filp: The active open file structure
7860  * @ubuf: The userspace provided buffer to read value into
7861  * @cnt: The maximum number of bytes to read
7862  * @ppos: The current "file" position
7863  *
7864  * This function implements the read interface for a struct trace_min_max_param.
7865  * The filp->private_data must point to a trace_min_max_param struct with valid
7866  * data.
7867  */
7868 static ssize_t
7869 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7870 {
7871 	struct trace_min_max_param *param = filp->private_data;
7872 	char buf[U64_STR_SIZE];
7873 	int len;
7874 	u64 val;
7875 
7876 	if (!param)
7877 		return -EFAULT;
7878 
7879 	val = *param->val;
7880 
7881 	if (cnt > sizeof(buf))
7882 		cnt = sizeof(buf);
7883 
7884 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7885 
7886 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7887 }
7888 
7889 const struct file_operations trace_min_max_fops = {
7890 	.open		= tracing_open_generic,
7891 	.read		= trace_min_max_read,
7892 	.write		= trace_min_max_write,
7893 };
7894 
7895 #define TRACING_LOG_ERRS_MAX	8
7896 #define TRACING_LOG_LOC_MAX	128
7897 
7898 #define CMD_PREFIX "  Command: "
7899 
7900 struct err_info {
7901 	const char	**errs;	/* ptr to loc-specific array of err strings */
7902 	u8		type;	/* index into errs -> specific err string */
7903 	u16		pos;	/* caret position */
7904 	u64		ts;
7905 };
7906 
7907 struct tracing_log_err {
7908 	struct list_head	list;
7909 	struct err_info		info;
7910 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7911 	char			*cmd;                     /* what caused err */
7912 };
7913 
7914 static DEFINE_MUTEX(tracing_err_log_lock);
7915 
7916 static struct tracing_log_err *alloc_tracing_log_err(int len)
7917 {
7918 	struct tracing_log_err *err;
7919 
7920 	err = kzalloc(sizeof(*err), GFP_KERNEL);
7921 	if (!err)
7922 		return ERR_PTR(-ENOMEM);
7923 
7924 	err->cmd = kzalloc(len, GFP_KERNEL);
7925 	if (!err->cmd) {
7926 		kfree(err);
7927 		return ERR_PTR(-ENOMEM);
7928 	}
7929 
7930 	return err;
7931 }
7932 
7933 static void free_tracing_log_err(struct tracing_log_err *err)
7934 {
7935 	kfree(err->cmd);
7936 	kfree(err);
7937 }
7938 
7939 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7940 						   int len)
7941 {
7942 	struct tracing_log_err *err;
7943 	char *cmd;
7944 
7945 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7946 		err = alloc_tracing_log_err(len);
7947 		if (PTR_ERR(err) != -ENOMEM)
7948 			tr->n_err_log_entries++;
7949 
7950 		return err;
7951 	}
7952 	cmd = kzalloc(len, GFP_KERNEL);
7953 	if (!cmd)
7954 		return ERR_PTR(-ENOMEM);
7955 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7956 	kfree(err->cmd);
7957 	err->cmd = cmd;
7958 	list_del(&err->list);
7959 
7960 	return err;
7961 }
7962 
7963 /**
7964  * err_pos - find the position of a string within a command for error careting
7965  * @cmd: The tracing command that caused the error
7966  * @str: The string to position the caret at within @cmd
7967  *
7968  * Finds the position of the first occurrence of @str within @cmd.  The
7969  * return value can be passed to tracing_log_err() for caret placement
7970  * within @cmd.
7971  *
7972  * Returns the index within @cmd of the first occurrence of @str or 0
7973  * if @str was not found.
7974  */
7975 unsigned int err_pos(char *cmd, const char *str)
7976 {
7977 	char *found;
7978 
7979 	if (WARN_ON(!strlen(cmd)))
7980 		return 0;
7981 
7982 	found = strstr(cmd, str);
7983 	if (found)
7984 		return found - cmd;
7985 
7986 	return 0;
7987 }
7988 
7989 /**
7990  * tracing_log_err - write an error to the tracing error log
7991  * @tr: The associated trace array for the error (NULL for top level array)
7992  * @loc: A string describing where the error occurred
7993  * @cmd: The tracing command that caused the error
7994  * @errs: The array of loc-specific static error strings
7995  * @type: The index into errs[], which produces the specific static err string
7996  * @pos: The position the caret should be placed in the cmd
7997  *
7998  * Writes an error into tracing/error_log of the form:
7999  *
8000  * <loc>: error: <text>
8001  *   Command: <cmd>
8002  *              ^
8003  *
8004  * tracing/error_log is a small log file containing the last
8005  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8006  * unless there has been a tracing error, and the error log can be
8007  * cleared and have its memory freed by writing the empty string in
8008  * truncation mode to it i.e. echo > tracing/error_log.
8009  *
8010  * NOTE: the @errs array along with the @type param are used to
8011  * produce a static error string - this string is not copied and saved
8012  * when the error is logged - only a pointer to it is saved.  See
8013  * existing callers for examples of how static strings are typically
8014  * defined for use with tracing_log_err().
8015  */
8016 void tracing_log_err(struct trace_array *tr,
8017 		     const char *loc, const char *cmd,
8018 		     const char **errs, u8 type, u16 pos)
8019 {
8020 	struct tracing_log_err *err;
8021 	int len = 0;
8022 
8023 	if (!tr)
8024 		tr = &global_trace;
8025 
8026 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8027 
8028 	mutex_lock(&tracing_err_log_lock);
8029 	err = get_tracing_log_err(tr, len);
8030 	if (PTR_ERR(err) == -ENOMEM) {
8031 		mutex_unlock(&tracing_err_log_lock);
8032 		return;
8033 	}
8034 
8035 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8036 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8037 
8038 	err->info.errs = errs;
8039 	err->info.type = type;
8040 	err->info.pos = pos;
8041 	err->info.ts = local_clock();
8042 
8043 	list_add_tail(&err->list, &tr->err_log);
8044 	mutex_unlock(&tracing_err_log_lock);
8045 }
8046 
8047 static void clear_tracing_err_log(struct trace_array *tr)
8048 {
8049 	struct tracing_log_err *err, *next;
8050 
8051 	mutex_lock(&tracing_err_log_lock);
8052 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
8053 		list_del(&err->list);
8054 		free_tracing_log_err(err);
8055 	}
8056 
8057 	tr->n_err_log_entries = 0;
8058 	mutex_unlock(&tracing_err_log_lock);
8059 }
8060 
8061 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8062 {
8063 	struct trace_array *tr = m->private;
8064 
8065 	mutex_lock(&tracing_err_log_lock);
8066 
8067 	return seq_list_start(&tr->err_log, *pos);
8068 }
8069 
8070 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8071 {
8072 	struct trace_array *tr = m->private;
8073 
8074 	return seq_list_next(v, &tr->err_log, pos);
8075 }
8076 
8077 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8078 {
8079 	mutex_unlock(&tracing_err_log_lock);
8080 }
8081 
8082 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8083 {
8084 	u16 i;
8085 
8086 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8087 		seq_putc(m, ' ');
8088 	for (i = 0; i < pos; i++)
8089 		seq_putc(m, ' ');
8090 	seq_puts(m, "^\n");
8091 }
8092 
8093 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8094 {
8095 	struct tracing_log_err *err = v;
8096 
8097 	if (err) {
8098 		const char *err_text = err->info.errs[err->info.type];
8099 		u64 sec = err->info.ts;
8100 		u32 nsec;
8101 
8102 		nsec = do_div(sec, NSEC_PER_SEC);
8103 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8104 			   err->loc, err_text);
8105 		seq_printf(m, "%s", err->cmd);
8106 		tracing_err_log_show_pos(m, err->info.pos);
8107 	}
8108 
8109 	return 0;
8110 }
8111 
8112 static const struct seq_operations tracing_err_log_seq_ops = {
8113 	.start  = tracing_err_log_seq_start,
8114 	.next   = tracing_err_log_seq_next,
8115 	.stop   = tracing_err_log_seq_stop,
8116 	.show   = tracing_err_log_seq_show
8117 };
8118 
8119 static int tracing_err_log_open(struct inode *inode, struct file *file)
8120 {
8121 	struct trace_array *tr = inode->i_private;
8122 	int ret = 0;
8123 
8124 	ret = tracing_check_open_get_tr(tr);
8125 	if (ret)
8126 		return ret;
8127 
8128 	/* If this file was opened for write, then erase contents */
8129 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8130 		clear_tracing_err_log(tr);
8131 
8132 	if (file->f_mode & FMODE_READ) {
8133 		ret = seq_open(file, &tracing_err_log_seq_ops);
8134 		if (!ret) {
8135 			struct seq_file *m = file->private_data;
8136 			m->private = tr;
8137 		} else {
8138 			trace_array_put(tr);
8139 		}
8140 	}
8141 	return ret;
8142 }
8143 
8144 static ssize_t tracing_err_log_write(struct file *file,
8145 				     const char __user *buffer,
8146 				     size_t count, loff_t *ppos)
8147 {
8148 	return count;
8149 }
8150 
8151 static int tracing_err_log_release(struct inode *inode, struct file *file)
8152 {
8153 	struct trace_array *tr = inode->i_private;
8154 
8155 	trace_array_put(tr);
8156 
8157 	if (file->f_mode & FMODE_READ)
8158 		seq_release(inode, file);
8159 
8160 	return 0;
8161 }
8162 
8163 static const struct file_operations tracing_err_log_fops = {
8164 	.open           = tracing_err_log_open,
8165 	.write		= tracing_err_log_write,
8166 	.read           = seq_read,
8167 	.llseek         = tracing_lseek,
8168 	.release        = tracing_err_log_release,
8169 };
8170 
8171 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8172 {
8173 	struct trace_array *tr = inode->i_private;
8174 	struct ftrace_buffer_info *info;
8175 	int ret;
8176 
8177 	ret = tracing_check_open_get_tr(tr);
8178 	if (ret)
8179 		return ret;
8180 
8181 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8182 	if (!info) {
8183 		trace_array_put(tr);
8184 		return -ENOMEM;
8185 	}
8186 
8187 	mutex_lock(&trace_types_lock);
8188 
8189 	info->iter.tr		= tr;
8190 	info->iter.cpu_file	= tracing_get_cpu(inode);
8191 	info->iter.trace	= tr->current_trace;
8192 	info->iter.array_buffer = &tr->array_buffer;
8193 	info->spare		= NULL;
8194 	/* Force reading ring buffer for first read */
8195 	info->read		= (unsigned int)-1;
8196 
8197 	filp->private_data = info;
8198 
8199 	tr->trace_ref++;
8200 
8201 	mutex_unlock(&trace_types_lock);
8202 
8203 	ret = nonseekable_open(inode, filp);
8204 	if (ret < 0)
8205 		trace_array_put(tr);
8206 
8207 	return ret;
8208 }
8209 
8210 static __poll_t
8211 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8212 {
8213 	struct ftrace_buffer_info *info = filp->private_data;
8214 	struct trace_iterator *iter = &info->iter;
8215 
8216 	return trace_poll(iter, filp, poll_table);
8217 }
8218 
8219 static ssize_t
8220 tracing_buffers_read(struct file *filp, char __user *ubuf,
8221 		     size_t count, loff_t *ppos)
8222 {
8223 	struct ftrace_buffer_info *info = filp->private_data;
8224 	struct trace_iterator *iter = &info->iter;
8225 	ssize_t ret = 0;
8226 	ssize_t size;
8227 
8228 	if (!count)
8229 		return 0;
8230 
8231 #ifdef CONFIG_TRACER_MAX_TRACE
8232 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8233 		return -EBUSY;
8234 #endif
8235 
8236 	if (!info->spare) {
8237 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8238 							  iter->cpu_file);
8239 		if (IS_ERR(info->spare)) {
8240 			ret = PTR_ERR(info->spare);
8241 			info->spare = NULL;
8242 		} else {
8243 			info->spare_cpu = iter->cpu_file;
8244 		}
8245 	}
8246 	if (!info->spare)
8247 		return ret;
8248 
8249 	/* Do we have previous read data to read? */
8250 	if (info->read < PAGE_SIZE)
8251 		goto read;
8252 
8253  again:
8254 	trace_access_lock(iter->cpu_file);
8255 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8256 				    &info->spare,
8257 				    count,
8258 				    iter->cpu_file, 0);
8259 	trace_access_unlock(iter->cpu_file);
8260 
8261 	if (ret < 0) {
8262 		if (trace_empty(iter)) {
8263 			if ((filp->f_flags & O_NONBLOCK))
8264 				return -EAGAIN;
8265 
8266 			ret = wait_on_pipe(iter, 0);
8267 			if (ret)
8268 				return ret;
8269 
8270 			goto again;
8271 		}
8272 		return 0;
8273 	}
8274 
8275 	info->read = 0;
8276  read:
8277 	size = PAGE_SIZE - info->read;
8278 	if (size > count)
8279 		size = count;
8280 
8281 	ret = copy_to_user(ubuf, info->spare + info->read, size);
8282 	if (ret == size)
8283 		return -EFAULT;
8284 
8285 	size -= ret;
8286 
8287 	*ppos += size;
8288 	info->read += size;
8289 
8290 	return size;
8291 }
8292 
8293 static int tracing_buffers_release(struct inode *inode, struct file *file)
8294 {
8295 	struct ftrace_buffer_info *info = file->private_data;
8296 	struct trace_iterator *iter = &info->iter;
8297 
8298 	mutex_lock(&trace_types_lock);
8299 
8300 	iter->tr->trace_ref--;
8301 
8302 	__trace_array_put(iter->tr);
8303 
8304 	iter->wait_index++;
8305 	/* Make sure the waiters see the new wait_index */
8306 	smp_wmb();
8307 
8308 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8309 
8310 	if (info->spare)
8311 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8312 					   info->spare_cpu, info->spare);
8313 	kvfree(info);
8314 
8315 	mutex_unlock(&trace_types_lock);
8316 
8317 	return 0;
8318 }
8319 
8320 struct buffer_ref {
8321 	struct trace_buffer	*buffer;
8322 	void			*page;
8323 	int			cpu;
8324 	refcount_t		refcount;
8325 };
8326 
8327 static void buffer_ref_release(struct buffer_ref *ref)
8328 {
8329 	if (!refcount_dec_and_test(&ref->refcount))
8330 		return;
8331 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8332 	kfree(ref);
8333 }
8334 
8335 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8336 				    struct pipe_buffer *buf)
8337 {
8338 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8339 
8340 	buffer_ref_release(ref);
8341 	buf->private = 0;
8342 }
8343 
8344 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8345 				struct pipe_buffer *buf)
8346 {
8347 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8348 
8349 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8350 		return false;
8351 
8352 	refcount_inc(&ref->refcount);
8353 	return true;
8354 }
8355 
8356 /* Pipe buffer operations for a buffer. */
8357 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8358 	.release		= buffer_pipe_buf_release,
8359 	.get			= buffer_pipe_buf_get,
8360 };
8361 
8362 /*
8363  * Callback from splice_to_pipe(), if we need to release some pages
8364  * at the end of the spd in case we error'ed out in filling the pipe.
8365  */
8366 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8367 {
8368 	struct buffer_ref *ref =
8369 		(struct buffer_ref *)spd->partial[i].private;
8370 
8371 	buffer_ref_release(ref);
8372 	spd->partial[i].private = 0;
8373 }
8374 
8375 static ssize_t
8376 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8377 			    struct pipe_inode_info *pipe, size_t len,
8378 			    unsigned int flags)
8379 {
8380 	struct ftrace_buffer_info *info = file->private_data;
8381 	struct trace_iterator *iter = &info->iter;
8382 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8383 	struct page *pages_def[PIPE_DEF_BUFFERS];
8384 	struct splice_pipe_desc spd = {
8385 		.pages		= pages_def,
8386 		.partial	= partial_def,
8387 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8388 		.ops		= &buffer_pipe_buf_ops,
8389 		.spd_release	= buffer_spd_release,
8390 	};
8391 	struct buffer_ref *ref;
8392 	int entries, i;
8393 	ssize_t ret = 0;
8394 
8395 #ifdef CONFIG_TRACER_MAX_TRACE
8396 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8397 		return -EBUSY;
8398 #endif
8399 
8400 	if (*ppos & (PAGE_SIZE - 1))
8401 		return -EINVAL;
8402 
8403 	if (len & (PAGE_SIZE - 1)) {
8404 		if (len < PAGE_SIZE)
8405 			return -EINVAL;
8406 		len &= PAGE_MASK;
8407 	}
8408 
8409 	if (splice_grow_spd(pipe, &spd))
8410 		return -ENOMEM;
8411 
8412  again:
8413 	trace_access_lock(iter->cpu_file);
8414 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8415 
8416 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8417 		struct page *page;
8418 		int r;
8419 
8420 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8421 		if (!ref) {
8422 			ret = -ENOMEM;
8423 			break;
8424 		}
8425 
8426 		refcount_set(&ref->refcount, 1);
8427 		ref->buffer = iter->array_buffer->buffer;
8428 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8429 		if (IS_ERR(ref->page)) {
8430 			ret = PTR_ERR(ref->page);
8431 			ref->page = NULL;
8432 			kfree(ref);
8433 			break;
8434 		}
8435 		ref->cpu = iter->cpu_file;
8436 
8437 		r = ring_buffer_read_page(ref->buffer, &ref->page,
8438 					  len, iter->cpu_file, 1);
8439 		if (r < 0) {
8440 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8441 						   ref->page);
8442 			kfree(ref);
8443 			break;
8444 		}
8445 
8446 		page = virt_to_page(ref->page);
8447 
8448 		spd.pages[i] = page;
8449 		spd.partial[i].len = PAGE_SIZE;
8450 		spd.partial[i].offset = 0;
8451 		spd.partial[i].private = (unsigned long)ref;
8452 		spd.nr_pages++;
8453 		*ppos += PAGE_SIZE;
8454 
8455 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8456 	}
8457 
8458 	trace_access_unlock(iter->cpu_file);
8459 	spd.nr_pages = i;
8460 
8461 	/* did we read anything? */
8462 	if (!spd.nr_pages) {
8463 		long wait_index;
8464 
8465 		if (ret)
8466 			goto out;
8467 
8468 		ret = -EAGAIN;
8469 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8470 			goto out;
8471 
8472 		wait_index = READ_ONCE(iter->wait_index);
8473 
8474 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8475 		if (ret)
8476 			goto out;
8477 
8478 		/* No need to wait after waking up when tracing is off */
8479 		if (!tracer_tracing_is_on(iter->tr))
8480 			goto out;
8481 
8482 		/* Make sure we see the new wait_index */
8483 		smp_rmb();
8484 		if (wait_index != iter->wait_index)
8485 			goto out;
8486 
8487 		goto again;
8488 	}
8489 
8490 	ret = splice_to_pipe(pipe, &spd);
8491 out:
8492 	splice_shrink_spd(&spd);
8493 
8494 	return ret;
8495 }
8496 
8497 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8498 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8499 {
8500 	struct ftrace_buffer_info *info = file->private_data;
8501 	struct trace_iterator *iter = &info->iter;
8502 
8503 	if (cmd)
8504 		return -ENOIOCTLCMD;
8505 
8506 	mutex_lock(&trace_types_lock);
8507 
8508 	iter->wait_index++;
8509 	/* Make sure the waiters see the new wait_index */
8510 	smp_wmb();
8511 
8512 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8513 
8514 	mutex_unlock(&trace_types_lock);
8515 	return 0;
8516 }
8517 
8518 static const struct file_operations tracing_buffers_fops = {
8519 	.open		= tracing_buffers_open,
8520 	.read		= tracing_buffers_read,
8521 	.poll		= tracing_buffers_poll,
8522 	.release	= tracing_buffers_release,
8523 	.splice_read	= tracing_buffers_splice_read,
8524 	.unlocked_ioctl = tracing_buffers_ioctl,
8525 	.llseek		= no_llseek,
8526 };
8527 
8528 static ssize_t
8529 tracing_stats_read(struct file *filp, char __user *ubuf,
8530 		   size_t count, loff_t *ppos)
8531 {
8532 	struct inode *inode = file_inode(filp);
8533 	struct trace_array *tr = inode->i_private;
8534 	struct array_buffer *trace_buf = &tr->array_buffer;
8535 	int cpu = tracing_get_cpu(inode);
8536 	struct trace_seq *s;
8537 	unsigned long cnt;
8538 	unsigned long long t;
8539 	unsigned long usec_rem;
8540 
8541 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8542 	if (!s)
8543 		return -ENOMEM;
8544 
8545 	trace_seq_init(s);
8546 
8547 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8548 	trace_seq_printf(s, "entries: %ld\n", cnt);
8549 
8550 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8551 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8552 
8553 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8554 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8555 
8556 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8557 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8558 
8559 	if (trace_clocks[tr->clock_id].in_ns) {
8560 		/* local or global for trace_clock */
8561 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8562 		usec_rem = do_div(t, USEC_PER_SEC);
8563 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8564 								t, usec_rem);
8565 
8566 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8567 		usec_rem = do_div(t, USEC_PER_SEC);
8568 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8569 	} else {
8570 		/* counter or tsc mode for trace_clock */
8571 		trace_seq_printf(s, "oldest event ts: %llu\n",
8572 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8573 
8574 		trace_seq_printf(s, "now ts: %llu\n",
8575 				ring_buffer_time_stamp(trace_buf->buffer));
8576 	}
8577 
8578 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8579 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8580 
8581 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8582 	trace_seq_printf(s, "read events: %ld\n", cnt);
8583 
8584 	count = simple_read_from_buffer(ubuf, count, ppos,
8585 					s->buffer, trace_seq_used(s));
8586 
8587 	kfree(s);
8588 
8589 	return count;
8590 }
8591 
8592 static const struct file_operations tracing_stats_fops = {
8593 	.open		= tracing_open_generic_tr,
8594 	.read		= tracing_stats_read,
8595 	.llseek		= generic_file_llseek,
8596 	.release	= tracing_release_generic_tr,
8597 };
8598 
8599 #ifdef CONFIG_DYNAMIC_FTRACE
8600 
8601 static ssize_t
8602 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8603 		  size_t cnt, loff_t *ppos)
8604 {
8605 	ssize_t ret;
8606 	char *buf;
8607 	int r;
8608 
8609 	/* 256 should be plenty to hold the amount needed */
8610 	buf = kmalloc(256, GFP_KERNEL);
8611 	if (!buf)
8612 		return -ENOMEM;
8613 
8614 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8615 		      ftrace_update_tot_cnt,
8616 		      ftrace_number_of_pages,
8617 		      ftrace_number_of_groups);
8618 
8619 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8620 	kfree(buf);
8621 	return ret;
8622 }
8623 
8624 static const struct file_operations tracing_dyn_info_fops = {
8625 	.open		= tracing_open_generic,
8626 	.read		= tracing_read_dyn_info,
8627 	.llseek		= generic_file_llseek,
8628 };
8629 #endif /* CONFIG_DYNAMIC_FTRACE */
8630 
8631 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8632 static void
8633 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8634 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8635 		void *data)
8636 {
8637 	tracing_snapshot_instance(tr);
8638 }
8639 
8640 static void
8641 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8642 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8643 		      void *data)
8644 {
8645 	struct ftrace_func_mapper *mapper = data;
8646 	long *count = NULL;
8647 
8648 	if (mapper)
8649 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8650 
8651 	if (count) {
8652 
8653 		if (*count <= 0)
8654 			return;
8655 
8656 		(*count)--;
8657 	}
8658 
8659 	tracing_snapshot_instance(tr);
8660 }
8661 
8662 static int
8663 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8664 		      struct ftrace_probe_ops *ops, void *data)
8665 {
8666 	struct ftrace_func_mapper *mapper = data;
8667 	long *count = NULL;
8668 
8669 	seq_printf(m, "%ps:", (void *)ip);
8670 
8671 	seq_puts(m, "snapshot");
8672 
8673 	if (mapper)
8674 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8675 
8676 	if (count)
8677 		seq_printf(m, ":count=%ld\n", *count);
8678 	else
8679 		seq_puts(m, ":unlimited\n");
8680 
8681 	return 0;
8682 }
8683 
8684 static int
8685 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8686 		     unsigned long ip, void *init_data, void **data)
8687 {
8688 	struct ftrace_func_mapper *mapper = *data;
8689 
8690 	if (!mapper) {
8691 		mapper = allocate_ftrace_func_mapper();
8692 		if (!mapper)
8693 			return -ENOMEM;
8694 		*data = mapper;
8695 	}
8696 
8697 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8698 }
8699 
8700 static void
8701 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8702 		     unsigned long ip, void *data)
8703 {
8704 	struct ftrace_func_mapper *mapper = data;
8705 
8706 	if (!ip) {
8707 		if (!mapper)
8708 			return;
8709 		free_ftrace_func_mapper(mapper, NULL);
8710 		return;
8711 	}
8712 
8713 	ftrace_func_mapper_remove_ip(mapper, ip);
8714 }
8715 
8716 static struct ftrace_probe_ops snapshot_probe_ops = {
8717 	.func			= ftrace_snapshot,
8718 	.print			= ftrace_snapshot_print,
8719 };
8720 
8721 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8722 	.func			= ftrace_count_snapshot,
8723 	.print			= ftrace_snapshot_print,
8724 	.init			= ftrace_snapshot_init,
8725 	.free			= ftrace_snapshot_free,
8726 };
8727 
8728 static int
8729 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8730 			       char *glob, char *cmd, char *param, int enable)
8731 {
8732 	struct ftrace_probe_ops *ops;
8733 	void *count = (void *)-1;
8734 	char *number;
8735 	int ret;
8736 
8737 	if (!tr)
8738 		return -ENODEV;
8739 
8740 	/* hash funcs only work with set_ftrace_filter */
8741 	if (!enable)
8742 		return -EINVAL;
8743 
8744 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8745 
8746 	if (glob[0] == '!')
8747 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8748 
8749 	if (!param)
8750 		goto out_reg;
8751 
8752 	number = strsep(&param, ":");
8753 
8754 	if (!strlen(number))
8755 		goto out_reg;
8756 
8757 	/*
8758 	 * We use the callback data field (which is a pointer)
8759 	 * as our counter.
8760 	 */
8761 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8762 	if (ret)
8763 		return ret;
8764 
8765  out_reg:
8766 	ret = tracing_alloc_snapshot_instance(tr);
8767 	if (ret < 0)
8768 		goto out;
8769 
8770 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8771 
8772  out:
8773 	return ret < 0 ? ret : 0;
8774 }
8775 
8776 static struct ftrace_func_command ftrace_snapshot_cmd = {
8777 	.name			= "snapshot",
8778 	.func			= ftrace_trace_snapshot_callback,
8779 };
8780 
8781 static __init int register_snapshot_cmd(void)
8782 {
8783 	return register_ftrace_command(&ftrace_snapshot_cmd);
8784 }
8785 #else
8786 static inline __init int register_snapshot_cmd(void) { return 0; }
8787 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8788 
8789 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8790 {
8791 	if (WARN_ON(!tr->dir))
8792 		return ERR_PTR(-ENODEV);
8793 
8794 	/* Top directory uses NULL as the parent */
8795 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8796 		return NULL;
8797 
8798 	/* All sub buffers have a descriptor */
8799 	return tr->dir;
8800 }
8801 
8802 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8803 {
8804 	struct dentry *d_tracer;
8805 
8806 	if (tr->percpu_dir)
8807 		return tr->percpu_dir;
8808 
8809 	d_tracer = tracing_get_dentry(tr);
8810 	if (IS_ERR(d_tracer))
8811 		return NULL;
8812 
8813 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8814 
8815 	MEM_FAIL(!tr->percpu_dir,
8816 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8817 
8818 	return tr->percpu_dir;
8819 }
8820 
8821 static struct dentry *
8822 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8823 		      void *data, long cpu, const struct file_operations *fops)
8824 {
8825 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8826 
8827 	if (ret) /* See tracing_get_cpu() */
8828 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8829 	return ret;
8830 }
8831 
8832 static void
8833 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8834 {
8835 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8836 	struct dentry *d_cpu;
8837 	char cpu_dir[30]; /* 30 characters should be more than enough */
8838 
8839 	if (!d_percpu)
8840 		return;
8841 
8842 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8843 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8844 	if (!d_cpu) {
8845 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8846 		return;
8847 	}
8848 
8849 	/* per cpu trace_pipe */
8850 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8851 				tr, cpu, &tracing_pipe_fops);
8852 
8853 	/* per cpu trace */
8854 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8855 				tr, cpu, &tracing_fops);
8856 
8857 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8858 				tr, cpu, &tracing_buffers_fops);
8859 
8860 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8861 				tr, cpu, &tracing_stats_fops);
8862 
8863 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8864 				tr, cpu, &tracing_entries_fops);
8865 
8866 #ifdef CONFIG_TRACER_SNAPSHOT
8867 	trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8868 				tr, cpu, &snapshot_fops);
8869 
8870 	trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8871 				tr, cpu, &snapshot_raw_fops);
8872 #endif
8873 }
8874 
8875 #ifdef CONFIG_FTRACE_SELFTEST
8876 /* Let selftest have access to static functions in this file */
8877 #include "trace_selftest.c"
8878 #endif
8879 
8880 static ssize_t
8881 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8882 			loff_t *ppos)
8883 {
8884 	struct trace_option_dentry *topt = filp->private_data;
8885 	char *buf;
8886 
8887 	if (topt->flags->val & topt->opt->bit)
8888 		buf = "1\n";
8889 	else
8890 		buf = "0\n";
8891 
8892 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8893 }
8894 
8895 static ssize_t
8896 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8897 			 loff_t *ppos)
8898 {
8899 	struct trace_option_dentry *topt = filp->private_data;
8900 	unsigned long val;
8901 	int ret;
8902 
8903 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8904 	if (ret)
8905 		return ret;
8906 
8907 	if (val != 0 && val != 1)
8908 		return -EINVAL;
8909 
8910 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8911 		mutex_lock(&trace_types_lock);
8912 		ret = __set_tracer_option(topt->tr, topt->flags,
8913 					  topt->opt, !val);
8914 		mutex_unlock(&trace_types_lock);
8915 		if (ret)
8916 			return ret;
8917 	}
8918 
8919 	*ppos += cnt;
8920 
8921 	return cnt;
8922 }
8923 
8924 
8925 static const struct file_operations trace_options_fops = {
8926 	.open = tracing_open_generic,
8927 	.read = trace_options_read,
8928 	.write = trace_options_write,
8929 	.llseek	= generic_file_llseek,
8930 };
8931 
8932 /*
8933  * In order to pass in both the trace_array descriptor as well as the index
8934  * to the flag that the trace option file represents, the trace_array
8935  * has a character array of trace_flags_index[], which holds the index
8936  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8937  * The address of this character array is passed to the flag option file
8938  * read/write callbacks.
8939  *
8940  * In order to extract both the index and the trace_array descriptor,
8941  * get_tr_index() uses the following algorithm.
8942  *
8943  *   idx = *ptr;
8944  *
8945  * As the pointer itself contains the address of the index (remember
8946  * index[1] == 1).
8947  *
8948  * Then to get the trace_array descriptor, by subtracting that index
8949  * from the ptr, we get to the start of the index itself.
8950  *
8951  *   ptr - idx == &index[0]
8952  *
8953  * Then a simple container_of() from that pointer gets us to the
8954  * trace_array descriptor.
8955  */
8956 static void get_tr_index(void *data, struct trace_array **ptr,
8957 			 unsigned int *pindex)
8958 {
8959 	*pindex = *(unsigned char *)data;
8960 
8961 	*ptr = container_of(data - *pindex, struct trace_array,
8962 			    trace_flags_index);
8963 }
8964 
8965 static ssize_t
8966 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8967 			loff_t *ppos)
8968 {
8969 	void *tr_index = filp->private_data;
8970 	struct trace_array *tr;
8971 	unsigned int index;
8972 	char *buf;
8973 
8974 	get_tr_index(tr_index, &tr, &index);
8975 
8976 	if (tr->trace_flags & (1 << index))
8977 		buf = "1\n";
8978 	else
8979 		buf = "0\n";
8980 
8981 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8982 }
8983 
8984 static ssize_t
8985 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8986 			 loff_t *ppos)
8987 {
8988 	void *tr_index = filp->private_data;
8989 	struct trace_array *tr;
8990 	unsigned int index;
8991 	unsigned long val;
8992 	int ret;
8993 
8994 	get_tr_index(tr_index, &tr, &index);
8995 
8996 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8997 	if (ret)
8998 		return ret;
8999 
9000 	if (val != 0 && val != 1)
9001 		return -EINVAL;
9002 
9003 	mutex_lock(&event_mutex);
9004 	mutex_lock(&trace_types_lock);
9005 	ret = set_tracer_flag(tr, 1 << index, val);
9006 	mutex_unlock(&trace_types_lock);
9007 	mutex_unlock(&event_mutex);
9008 
9009 	if (ret < 0)
9010 		return ret;
9011 
9012 	*ppos += cnt;
9013 
9014 	return cnt;
9015 }
9016 
9017 static const struct file_operations trace_options_core_fops = {
9018 	.open = tracing_open_generic,
9019 	.read = trace_options_core_read,
9020 	.write = trace_options_core_write,
9021 	.llseek = generic_file_llseek,
9022 };
9023 
9024 struct dentry *trace_create_file(const char *name,
9025 				 umode_t mode,
9026 				 struct dentry *parent,
9027 				 void *data,
9028 				 const struct file_operations *fops)
9029 {
9030 	struct dentry *ret;
9031 
9032 	ret = tracefs_create_file(name, mode, parent, data, fops);
9033 	if (!ret)
9034 		pr_warn("Could not create tracefs '%s' entry\n", name);
9035 
9036 	return ret;
9037 }
9038 
9039 
9040 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9041 {
9042 	struct dentry *d_tracer;
9043 
9044 	if (tr->options)
9045 		return tr->options;
9046 
9047 	d_tracer = tracing_get_dentry(tr);
9048 	if (IS_ERR(d_tracer))
9049 		return NULL;
9050 
9051 	tr->options = tracefs_create_dir("options", d_tracer);
9052 	if (!tr->options) {
9053 		pr_warn("Could not create tracefs directory 'options'\n");
9054 		return NULL;
9055 	}
9056 
9057 	return tr->options;
9058 }
9059 
9060 static void
9061 create_trace_option_file(struct trace_array *tr,
9062 			 struct trace_option_dentry *topt,
9063 			 struct tracer_flags *flags,
9064 			 struct tracer_opt *opt)
9065 {
9066 	struct dentry *t_options;
9067 
9068 	t_options = trace_options_init_dentry(tr);
9069 	if (!t_options)
9070 		return;
9071 
9072 	topt->flags = flags;
9073 	topt->opt = opt;
9074 	topt->tr = tr;
9075 
9076 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9077 					t_options, topt, &trace_options_fops);
9078 
9079 }
9080 
9081 static void
9082 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9083 {
9084 	struct trace_option_dentry *topts;
9085 	struct trace_options *tr_topts;
9086 	struct tracer_flags *flags;
9087 	struct tracer_opt *opts;
9088 	int cnt;
9089 	int i;
9090 
9091 	if (!tracer)
9092 		return;
9093 
9094 	flags = tracer->flags;
9095 
9096 	if (!flags || !flags->opts)
9097 		return;
9098 
9099 	/*
9100 	 * If this is an instance, only create flags for tracers
9101 	 * the instance may have.
9102 	 */
9103 	if (!trace_ok_for_array(tracer, tr))
9104 		return;
9105 
9106 	for (i = 0; i < tr->nr_topts; i++) {
9107 		/* Make sure there's no duplicate flags. */
9108 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9109 			return;
9110 	}
9111 
9112 	opts = flags->opts;
9113 
9114 	for (cnt = 0; opts[cnt].name; cnt++)
9115 		;
9116 
9117 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9118 	if (!topts)
9119 		return;
9120 
9121 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9122 			    GFP_KERNEL);
9123 	if (!tr_topts) {
9124 		kfree(topts);
9125 		return;
9126 	}
9127 
9128 	tr->topts = tr_topts;
9129 	tr->topts[tr->nr_topts].tracer = tracer;
9130 	tr->topts[tr->nr_topts].topts = topts;
9131 	tr->nr_topts++;
9132 
9133 	for (cnt = 0; opts[cnt].name; cnt++) {
9134 		create_trace_option_file(tr, &topts[cnt], flags,
9135 					 &opts[cnt]);
9136 		MEM_FAIL(topts[cnt].entry == NULL,
9137 			  "Failed to create trace option: %s",
9138 			  opts[cnt].name);
9139 	}
9140 }
9141 
9142 static struct dentry *
9143 create_trace_option_core_file(struct trace_array *tr,
9144 			      const char *option, long index)
9145 {
9146 	struct dentry *t_options;
9147 
9148 	t_options = trace_options_init_dentry(tr);
9149 	if (!t_options)
9150 		return NULL;
9151 
9152 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9153 				 (void *)&tr->trace_flags_index[index],
9154 				 &trace_options_core_fops);
9155 }
9156 
9157 static void create_trace_options_dir(struct trace_array *tr)
9158 {
9159 	struct dentry *t_options;
9160 	bool top_level = tr == &global_trace;
9161 	int i;
9162 
9163 	t_options = trace_options_init_dentry(tr);
9164 	if (!t_options)
9165 		return;
9166 
9167 	for (i = 0; trace_options[i]; i++) {
9168 		if (top_level ||
9169 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9170 			create_trace_option_core_file(tr, trace_options[i], i);
9171 	}
9172 }
9173 
9174 static ssize_t
9175 rb_simple_read(struct file *filp, char __user *ubuf,
9176 	       size_t cnt, loff_t *ppos)
9177 {
9178 	struct trace_array *tr = filp->private_data;
9179 	char buf[64];
9180 	int r;
9181 
9182 	r = tracer_tracing_is_on(tr);
9183 	r = sprintf(buf, "%d\n", r);
9184 
9185 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9186 }
9187 
9188 static ssize_t
9189 rb_simple_write(struct file *filp, const char __user *ubuf,
9190 		size_t cnt, loff_t *ppos)
9191 {
9192 	struct trace_array *tr = filp->private_data;
9193 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9194 	unsigned long val;
9195 	int ret;
9196 
9197 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9198 	if (ret)
9199 		return ret;
9200 
9201 	if (buffer) {
9202 		mutex_lock(&trace_types_lock);
9203 		if (!!val == tracer_tracing_is_on(tr)) {
9204 			val = 0; /* do nothing */
9205 		} else if (val) {
9206 			tracer_tracing_on(tr);
9207 			if (tr->current_trace->start)
9208 				tr->current_trace->start(tr);
9209 		} else {
9210 			tracer_tracing_off(tr);
9211 			if (tr->current_trace->stop)
9212 				tr->current_trace->stop(tr);
9213 			/* Wake up any waiters */
9214 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9215 		}
9216 		mutex_unlock(&trace_types_lock);
9217 	}
9218 
9219 	(*ppos)++;
9220 
9221 	return cnt;
9222 }
9223 
9224 static const struct file_operations rb_simple_fops = {
9225 	.open		= tracing_open_generic_tr,
9226 	.read		= rb_simple_read,
9227 	.write		= rb_simple_write,
9228 	.release	= tracing_release_generic_tr,
9229 	.llseek		= default_llseek,
9230 };
9231 
9232 static ssize_t
9233 buffer_percent_read(struct file *filp, char __user *ubuf,
9234 		    size_t cnt, loff_t *ppos)
9235 {
9236 	struct trace_array *tr = filp->private_data;
9237 	char buf[64];
9238 	int r;
9239 
9240 	r = tr->buffer_percent;
9241 	r = sprintf(buf, "%d\n", r);
9242 
9243 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9244 }
9245 
9246 static ssize_t
9247 buffer_percent_write(struct file *filp, const char __user *ubuf,
9248 		     size_t cnt, loff_t *ppos)
9249 {
9250 	struct trace_array *tr = filp->private_data;
9251 	unsigned long val;
9252 	int ret;
9253 
9254 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9255 	if (ret)
9256 		return ret;
9257 
9258 	if (val > 100)
9259 		return -EINVAL;
9260 
9261 	tr->buffer_percent = val;
9262 
9263 	(*ppos)++;
9264 
9265 	return cnt;
9266 }
9267 
9268 static const struct file_operations buffer_percent_fops = {
9269 	.open		= tracing_open_generic_tr,
9270 	.read		= buffer_percent_read,
9271 	.write		= buffer_percent_write,
9272 	.release	= tracing_release_generic_tr,
9273 	.llseek		= default_llseek,
9274 };
9275 
9276 static struct dentry *trace_instance_dir;
9277 
9278 static void
9279 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9280 
9281 static int
9282 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9283 {
9284 	enum ring_buffer_flags rb_flags;
9285 
9286 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9287 
9288 	buf->tr = tr;
9289 
9290 	buf->buffer = ring_buffer_alloc(size, rb_flags);
9291 	if (!buf->buffer)
9292 		return -ENOMEM;
9293 
9294 	buf->data = alloc_percpu(struct trace_array_cpu);
9295 	if (!buf->data) {
9296 		ring_buffer_free(buf->buffer);
9297 		buf->buffer = NULL;
9298 		return -ENOMEM;
9299 	}
9300 
9301 	/* Allocate the first page for all buffers */
9302 	set_buffer_entries(&tr->array_buffer,
9303 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9304 
9305 	return 0;
9306 }
9307 
9308 static void free_trace_buffer(struct array_buffer *buf)
9309 {
9310 	if (buf->buffer) {
9311 		ring_buffer_free(buf->buffer);
9312 		buf->buffer = NULL;
9313 		free_percpu(buf->data);
9314 		buf->data = NULL;
9315 	}
9316 }
9317 
9318 static int allocate_trace_buffers(struct trace_array *tr, int size)
9319 {
9320 	int ret;
9321 
9322 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9323 	if (ret)
9324 		return ret;
9325 
9326 #ifdef CONFIG_TRACER_MAX_TRACE
9327 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9328 				    allocate_snapshot ? size : 1);
9329 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9330 		free_trace_buffer(&tr->array_buffer);
9331 		return -ENOMEM;
9332 	}
9333 	tr->allocated_snapshot = allocate_snapshot;
9334 
9335 	allocate_snapshot = false;
9336 #endif
9337 
9338 	return 0;
9339 }
9340 
9341 static void free_trace_buffers(struct trace_array *tr)
9342 {
9343 	if (!tr)
9344 		return;
9345 
9346 	free_trace_buffer(&tr->array_buffer);
9347 
9348 #ifdef CONFIG_TRACER_MAX_TRACE
9349 	free_trace_buffer(&tr->max_buffer);
9350 #endif
9351 }
9352 
9353 static void init_trace_flags_index(struct trace_array *tr)
9354 {
9355 	int i;
9356 
9357 	/* Used by the trace options files */
9358 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9359 		tr->trace_flags_index[i] = i;
9360 }
9361 
9362 static void __update_tracer_options(struct trace_array *tr)
9363 {
9364 	struct tracer *t;
9365 
9366 	for (t = trace_types; t; t = t->next)
9367 		add_tracer_options(tr, t);
9368 }
9369 
9370 static void update_tracer_options(struct trace_array *tr)
9371 {
9372 	mutex_lock(&trace_types_lock);
9373 	tracer_options_updated = true;
9374 	__update_tracer_options(tr);
9375 	mutex_unlock(&trace_types_lock);
9376 }
9377 
9378 /* Must have trace_types_lock held */
9379 struct trace_array *trace_array_find(const char *instance)
9380 {
9381 	struct trace_array *tr, *found = NULL;
9382 
9383 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9384 		if (tr->name && strcmp(tr->name, instance) == 0) {
9385 			found = tr;
9386 			break;
9387 		}
9388 	}
9389 
9390 	return found;
9391 }
9392 
9393 struct trace_array *trace_array_find_get(const char *instance)
9394 {
9395 	struct trace_array *tr;
9396 
9397 	mutex_lock(&trace_types_lock);
9398 	tr = trace_array_find(instance);
9399 	if (tr)
9400 		tr->ref++;
9401 	mutex_unlock(&trace_types_lock);
9402 
9403 	return tr;
9404 }
9405 
9406 static int trace_array_create_dir(struct trace_array *tr)
9407 {
9408 	int ret;
9409 
9410 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9411 	if (!tr->dir)
9412 		return -EINVAL;
9413 
9414 	ret = event_trace_add_tracer(tr->dir, tr);
9415 	if (ret) {
9416 		tracefs_remove(tr->dir);
9417 		return ret;
9418 	}
9419 
9420 	init_tracer_tracefs(tr, tr->dir);
9421 	__update_tracer_options(tr);
9422 
9423 	return ret;
9424 }
9425 
9426 static struct trace_array *trace_array_create(const char *name)
9427 {
9428 	struct trace_array *tr;
9429 	int ret;
9430 
9431 	ret = -ENOMEM;
9432 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9433 	if (!tr)
9434 		return ERR_PTR(ret);
9435 
9436 	tr->name = kstrdup(name, GFP_KERNEL);
9437 	if (!tr->name)
9438 		goto out_free_tr;
9439 
9440 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9441 		goto out_free_tr;
9442 
9443 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9444 
9445 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9446 
9447 	raw_spin_lock_init(&tr->start_lock);
9448 
9449 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9450 
9451 	tr->current_trace = &nop_trace;
9452 
9453 	INIT_LIST_HEAD(&tr->systems);
9454 	INIT_LIST_HEAD(&tr->events);
9455 	INIT_LIST_HEAD(&tr->hist_vars);
9456 	INIT_LIST_HEAD(&tr->err_log);
9457 
9458 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9459 		goto out_free_tr;
9460 
9461 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9462 		goto out_free_tr;
9463 
9464 	ftrace_init_trace_array(tr);
9465 
9466 	init_trace_flags_index(tr);
9467 
9468 	if (trace_instance_dir) {
9469 		ret = trace_array_create_dir(tr);
9470 		if (ret)
9471 			goto out_free_tr;
9472 	} else
9473 		__trace_early_add_events(tr);
9474 
9475 	list_add(&tr->list, &ftrace_trace_arrays);
9476 
9477 	tr->ref++;
9478 
9479 	return tr;
9480 
9481  out_free_tr:
9482 	ftrace_free_ftrace_ops(tr);
9483 	free_trace_buffers(tr);
9484 	free_cpumask_var(tr->tracing_cpumask);
9485 	kfree(tr->name);
9486 	kfree(tr);
9487 
9488 	return ERR_PTR(ret);
9489 }
9490 
9491 static int instance_mkdir(const char *name)
9492 {
9493 	struct trace_array *tr;
9494 	int ret;
9495 
9496 	mutex_lock(&event_mutex);
9497 	mutex_lock(&trace_types_lock);
9498 
9499 	ret = -EEXIST;
9500 	if (trace_array_find(name))
9501 		goto out_unlock;
9502 
9503 	tr = trace_array_create(name);
9504 
9505 	ret = PTR_ERR_OR_ZERO(tr);
9506 
9507 out_unlock:
9508 	mutex_unlock(&trace_types_lock);
9509 	mutex_unlock(&event_mutex);
9510 	return ret;
9511 }
9512 
9513 /**
9514  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9515  * @name: The name of the trace array to be looked up/created.
9516  *
9517  * Returns pointer to trace array with given name.
9518  * NULL, if it cannot be created.
9519  *
9520  * NOTE: This function increments the reference counter associated with the
9521  * trace array returned. This makes sure it cannot be freed while in use.
9522  * Use trace_array_put() once the trace array is no longer needed.
9523  * If the trace_array is to be freed, trace_array_destroy() needs to
9524  * be called after the trace_array_put(), or simply let user space delete
9525  * it from the tracefs instances directory. But until the
9526  * trace_array_put() is called, user space can not delete it.
9527  *
9528  */
9529 struct trace_array *trace_array_get_by_name(const char *name)
9530 {
9531 	struct trace_array *tr;
9532 
9533 	mutex_lock(&event_mutex);
9534 	mutex_lock(&trace_types_lock);
9535 
9536 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9537 		if (tr->name && strcmp(tr->name, name) == 0)
9538 			goto out_unlock;
9539 	}
9540 
9541 	tr = trace_array_create(name);
9542 
9543 	if (IS_ERR(tr))
9544 		tr = NULL;
9545 out_unlock:
9546 	if (tr)
9547 		tr->ref++;
9548 
9549 	mutex_unlock(&trace_types_lock);
9550 	mutex_unlock(&event_mutex);
9551 	return tr;
9552 }
9553 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9554 
9555 static int __remove_instance(struct trace_array *tr)
9556 {
9557 	int i;
9558 
9559 	/* Reference counter for a newly created trace array = 1. */
9560 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9561 		return -EBUSY;
9562 
9563 	list_del(&tr->list);
9564 
9565 	/* Disable all the flags that were enabled coming in */
9566 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9567 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9568 			set_tracer_flag(tr, 1 << i, 0);
9569 	}
9570 
9571 	tracing_set_nop(tr);
9572 	clear_ftrace_function_probes(tr);
9573 	event_trace_del_tracer(tr);
9574 	ftrace_clear_pids(tr);
9575 	ftrace_destroy_function_files(tr);
9576 	tracefs_remove(tr->dir);
9577 	free_percpu(tr->last_func_repeats);
9578 	free_trace_buffers(tr);
9579 	clear_tracing_err_log(tr);
9580 
9581 	for (i = 0; i < tr->nr_topts; i++) {
9582 		kfree(tr->topts[i].topts);
9583 	}
9584 	kfree(tr->topts);
9585 
9586 	free_cpumask_var(tr->tracing_cpumask);
9587 	kfree(tr->name);
9588 	kfree(tr);
9589 
9590 	return 0;
9591 }
9592 
9593 int trace_array_destroy(struct trace_array *this_tr)
9594 {
9595 	struct trace_array *tr;
9596 	int ret;
9597 
9598 	if (!this_tr)
9599 		return -EINVAL;
9600 
9601 	mutex_lock(&event_mutex);
9602 	mutex_lock(&trace_types_lock);
9603 
9604 	ret = -ENODEV;
9605 
9606 	/* Making sure trace array exists before destroying it. */
9607 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9608 		if (tr == this_tr) {
9609 			ret = __remove_instance(tr);
9610 			break;
9611 		}
9612 	}
9613 
9614 	mutex_unlock(&trace_types_lock);
9615 	mutex_unlock(&event_mutex);
9616 
9617 	return ret;
9618 }
9619 EXPORT_SYMBOL_GPL(trace_array_destroy);
9620 
9621 static int instance_rmdir(const char *name)
9622 {
9623 	struct trace_array *tr;
9624 	int ret;
9625 
9626 	mutex_lock(&event_mutex);
9627 	mutex_lock(&trace_types_lock);
9628 
9629 	ret = -ENODEV;
9630 	tr = trace_array_find(name);
9631 	if (tr)
9632 		ret = __remove_instance(tr);
9633 
9634 	mutex_unlock(&trace_types_lock);
9635 	mutex_unlock(&event_mutex);
9636 
9637 	return ret;
9638 }
9639 
9640 static __init void create_trace_instances(struct dentry *d_tracer)
9641 {
9642 	struct trace_array *tr;
9643 
9644 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9645 							 instance_mkdir,
9646 							 instance_rmdir);
9647 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9648 		return;
9649 
9650 	mutex_lock(&event_mutex);
9651 	mutex_lock(&trace_types_lock);
9652 
9653 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9654 		if (!tr->name)
9655 			continue;
9656 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9657 			     "Failed to create instance directory\n"))
9658 			break;
9659 	}
9660 
9661 	mutex_unlock(&trace_types_lock);
9662 	mutex_unlock(&event_mutex);
9663 }
9664 
9665 static void
9666 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9667 {
9668 	struct trace_event_file *file;
9669 	int cpu;
9670 
9671 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9672 			tr, &show_traces_fops);
9673 
9674 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9675 			tr, &set_tracer_fops);
9676 
9677 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9678 			  tr, &tracing_cpumask_fops);
9679 
9680 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9681 			  tr, &tracing_iter_fops);
9682 
9683 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9684 			  tr, &tracing_fops);
9685 
9686 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9687 			  tr, &tracing_pipe_fops);
9688 
9689 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9690 			  tr, &tracing_entries_fops);
9691 
9692 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9693 			  tr, &tracing_total_entries_fops);
9694 
9695 	trace_create_file("free_buffer", 0200, d_tracer,
9696 			  tr, &tracing_free_buffer_fops);
9697 
9698 	trace_create_file("trace_marker", 0220, d_tracer,
9699 			  tr, &tracing_mark_fops);
9700 
9701 	file = __find_event_file(tr, "ftrace", "print");
9702 	if (file && file->dir)
9703 		trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9704 				  file, &event_trigger_fops);
9705 	tr->trace_marker_file = file;
9706 
9707 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9708 			  tr, &tracing_mark_raw_fops);
9709 
9710 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9711 			  &trace_clock_fops);
9712 
9713 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9714 			  tr, &rb_simple_fops);
9715 
9716 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9717 			  &trace_time_stamp_mode_fops);
9718 
9719 	tr->buffer_percent = 50;
9720 
9721 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9722 			tr, &buffer_percent_fops);
9723 
9724 	create_trace_options_dir(tr);
9725 
9726 #ifdef CONFIG_TRACER_MAX_TRACE
9727 	trace_create_maxlat_file(tr, d_tracer);
9728 #endif
9729 
9730 	if (ftrace_create_function_files(tr, d_tracer))
9731 		MEM_FAIL(1, "Could not allocate function filter files");
9732 
9733 #ifdef CONFIG_TRACER_SNAPSHOT
9734 	trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9735 			  tr, &snapshot_fops);
9736 #endif
9737 
9738 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9739 			  tr, &tracing_err_log_fops);
9740 
9741 	for_each_tracing_cpu(cpu)
9742 		tracing_init_tracefs_percpu(tr, cpu);
9743 
9744 	ftrace_init_tracefs(tr, d_tracer);
9745 }
9746 
9747 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9748 {
9749 	struct vfsmount *mnt;
9750 	struct file_system_type *type;
9751 
9752 	/*
9753 	 * To maintain backward compatibility for tools that mount
9754 	 * debugfs to get to the tracing facility, tracefs is automatically
9755 	 * mounted to the debugfs/tracing directory.
9756 	 */
9757 	type = get_fs_type("tracefs");
9758 	if (!type)
9759 		return NULL;
9760 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9761 	put_filesystem(type);
9762 	if (IS_ERR(mnt))
9763 		return NULL;
9764 	mntget(mnt);
9765 
9766 	return mnt;
9767 }
9768 
9769 /**
9770  * tracing_init_dentry - initialize top level trace array
9771  *
9772  * This is called when creating files or directories in the tracing
9773  * directory. It is called via fs_initcall() by any of the boot up code
9774  * and expects to return the dentry of the top level tracing directory.
9775  */
9776 int tracing_init_dentry(void)
9777 {
9778 	struct trace_array *tr = &global_trace;
9779 
9780 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9781 		pr_warn("Tracing disabled due to lockdown\n");
9782 		return -EPERM;
9783 	}
9784 
9785 	/* The top level trace array uses  NULL as parent */
9786 	if (tr->dir)
9787 		return 0;
9788 
9789 	if (WARN_ON(!tracefs_initialized()))
9790 		return -ENODEV;
9791 
9792 	/*
9793 	 * As there may still be users that expect the tracing
9794 	 * files to exist in debugfs/tracing, we must automount
9795 	 * the tracefs file system there, so older tools still
9796 	 * work with the newer kernel.
9797 	 */
9798 	tr->dir = debugfs_create_automount("tracing", NULL,
9799 					   trace_automount, NULL);
9800 
9801 	return 0;
9802 }
9803 
9804 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9805 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9806 
9807 static struct workqueue_struct *eval_map_wq __initdata;
9808 static struct work_struct eval_map_work __initdata;
9809 static struct work_struct tracerfs_init_work __initdata;
9810 
9811 static void __init eval_map_work_func(struct work_struct *work)
9812 {
9813 	int len;
9814 
9815 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9816 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9817 }
9818 
9819 static int __init trace_eval_init(void)
9820 {
9821 	INIT_WORK(&eval_map_work, eval_map_work_func);
9822 
9823 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9824 	if (!eval_map_wq) {
9825 		pr_err("Unable to allocate eval_map_wq\n");
9826 		/* Do work here */
9827 		eval_map_work_func(&eval_map_work);
9828 		return -ENOMEM;
9829 	}
9830 
9831 	queue_work(eval_map_wq, &eval_map_work);
9832 	return 0;
9833 }
9834 
9835 subsys_initcall(trace_eval_init);
9836 
9837 static int __init trace_eval_sync(void)
9838 {
9839 	/* Make sure the eval map updates are finished */
9840 	if (eval_map_wq)
9841 		destroy_workqueue(eval_map_wq);
9842 	return 0;
9843 }
9844 
9845 late_initcall_sync(trace_eval_sync);
9846 
9847 
9848 #ifdef CONFIG_MODULES
9849 static void trace_module_add_evals(struct module *mod)
9850 {
9851 	if (!mod->num_trace_evals)
9852 		return;
9853 
9854 	/*
9855 	 * Modules with bad taint do not have events created, do
9856 	 * not bother with enums either.
9857 	 */
9858 	if (trace_module_has_bad_taint(mod))
9859 		return;
9860 
9861 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9862 }
9863 
9864 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9865 static void trace_module_remove_evals(struct module *mod)
9866 {
9867 	union trace_eval_map_item *map;
9868 	union trace_eval_map_item **last = &trace_eval_maps;
9869 
9870 	if (!mod->num_trace_evals)
9871 		return;
9872 
9873 	mutex_lock(&trace_eval_mutex);
9874 
9875 	map = trace_eval_maps;
9876 
9877 	while (map) {
9878 		if (map->head.mod == mod)
9879 			break;
9880 		map = trace_eval_jmp_to_tail(map);
9881 		last = &map->tail.next;
9882 		map = map->tail.next;
9883 	}
9884 	if (!map)
9885 		goto out;
9886 
9887 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9888 	kfree(map);
9889  out:
9890 	mutex_unlock(&trace_eval_mutex);
9891 }
9892 #else
9893 static inline void trace_module_remove_evals(struct module *mod) { }
9894 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9895 
9896 static int trace_module_notify(struct notifier_block *self,
9897 			       unsigned long val, void *data)
9898 {
9899 	struct module *mod = data;
9900 
9901 	switch (val) {
9902 	case MODULE_STATE_COMING:
9903 		trace_module_add_evals(mod);
9904 		break;
9905 	case MODULE_STATE_GOING:
9906 		trace_module_remove_evals(mod);
9907 		break;
9908 	}
9909 
9910 	return NOTIFY_OK;
9911 }
9912 
9913 static struct notifier_block trace_module_nb = {
9914 	.notifier_call = trace_module_notify,
9915 	.priority = 0,
9916 };
9917 #endif /* CONFIG_MODULES */
9918 
9919 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9920 {
9921 
9922 	event_trace_init();
9923 
9924 	init_tracer_tracefs(&global_trace, NULL);
9925 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9926 
9927 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9928 			&global_trace, &tracing_thresh_fops);
9929 
9930 	trace_create_file("README", TRACE_MODE_READ, NULL,
9931 			NULL, &tracing_readme_fops);
9932 
9933 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9934 			NULL, &tracing_saved_cmdlines_fops);
9935 
9936 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9937 			  NULL, &tracing_saved_cmdlines_size_fops);
9938 
9939 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9940 			NULL, &tracing_saved_tgids_fops);
9941 
9942 	trace_create_eval_file(NULL);
9943 
9944 #ifdef CONFIG_MODULES
9945 	register_module_notifier(&trace_module_nb);
9946 #endif
9947 
9948 #ifdef CONFIG_DYNAMIC_FTRACE
9949 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9950 			NULL, &tracing_dyn_info_fops);
9951 #endif
9952 
9953 	create_trace_instances(NULL);
9954 
9955 	update_tracer_options(&global_trace);
9956 }
9957 
9958 static __init int tracer_init_tracefs(void)
9959 {
9960 	int ret;
9961 
9962 	trace_access_lock_init();
9963 
9964 	ret = tracing_init_dentry();
9965 	if (ret)
9966 		return 0;
9967 
9968 	if (eval_map_wq) {
9969 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9970 		queue_work(eval_map_wq, &tracerfs_init_work);
9971 	} else {
9972 		tracer_init_tracefs_work_func(NULL);
9973 	}
9974 
9975 	rv_init_interface();
9976 
9977 	return 0;
9978 }
9979 
9980 fs_initcall(tracer_init_tracefs);
9981 
9982 static int trace_die_panic_handler(struct notifier_block *self,
9983 				unsigned long ev, void *unused);
9984 
9985 static struct notifier_block trace_panic_notifier = {
9986 	.notifier_call = trace_die_panic_handler,
9987 	.priority = INT_MAX - 1,
9988 };
9989 
9990 static struct notifier_block trace_die_notifier = {
9991 	.notifier_call = trace_die_panic_handler,
9992 	.priority = INT_MAX - 1,
9993 };
9994 
9995 /*
9996  * The idea is to execute the following die/panic callback early, in order
9997  * to avoid showing irrelevant information in the trace (like other panic
9998  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
9999  * warnings get disabled (to prevent potential log flooding).
10000  */
10001 static int trace_die_panic_handler(struct notifier_block *self,
10002 				unsigned long ev, void *unused)
10003 {
10004 	if (!ftrace_dump_on_oops)
10005 		return NOTIFY_DONE;
10006 
10007 	/* The die notifier requires DIE_OOPS to trigger */
10008 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10009 		return NOTIFY_DONE;
10010 
10011 	ftrace_dump(ftrace_dump_on_oops);
10012 
10013 	return NOTIFY_DONE;
10014 }
10015 
10016 /*
10017  * printk is set to max of 1024, we really don't need it that big.
10018  * Nothing should be printing 1000 characters anyway.
10019  */
10020 #define TRACE_MAX_PRINT		1000
10021 
10022 /*
10023  * Define here KERN_TRACE so that we have one place to modify
10024  * it if we decide to change what log level the ftrace dump
10025  * should be at.
10026  */
10027 #define KERN_TRACE		KERN_EMERG
10028 
10029 void
10030 trace_printk_seq(struct trace_seq *s)
10031 {
10032 	/* Probably should print a warning here. */
10033 	if (s->seq.len >= TRACE_MAX_PRINT)
10034 		s->seq.len = TRACE_MAX_PRINT;
10035 
10036 	/*
10037 	 * More paranoid code. Although the buffer size is set to
10038 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10039 	 * an extra layer of protection.
10040 	 */
10041 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10042 		s->seq.len = s->seq.size - 1;
10043 
10044 	/* should be zero ended, but we are paranoid. */
10045 	s->buffer[s->seq.len] = 0;
10046 
10047 	printk(KERN_TRACE "%s", s->buffer);
10048 
10049 	trace_seq_init(s);
10050 }
10051 
10052 void trace_init_global_iter(struct trace_iterator *iter)
10053 {
10054 	iter->tr = &global_trace;
10055 	iter->trace = iter->tr->current_trace;
10056 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10057 	iter->array_buffer = &global_trace.array_buffer;
10058 
10059 	if (iter->trace && iter->trace->open)
10060 		iter->trace->open(iter);
10061 
10062 	/* Annotate start of buffers if we had overruns */
10063 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10064 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10065 
10066 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10067 	if (trace_clocks[iter->tr->clock_id].in_ns)
10068 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10069 
10070 	/* Can not use kmalloc for iter.temp and iter.fmt */
10071 	iter->temp = static_temp_buf;
10072 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10073 	iter->fmt = static_fmt_buf;
10074 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10075 }
10076 
10077 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10078 {
10079 	/* use static because iter can be a bit big for the stack */
10080 	static struct trace_iterator iter;
10081 	static atomic_t dump_running;
10082 	struct trace_array *tr = &global_trace;
10083 	unsigned int old_userobj;
10084 	unsigned long flags;
10085 	int cnt = 0, cpu;
10086 
10087 	/* Only allow one dump user at a time. */
10088 	if (atomic_inc_return(&dump_running) != 1) {
10089 		atomic_dec(&dump_running);
10090 		return;
10091 	}
10092 
10093 	/*
10094 	 * Always turn off tracing when we dump.
10095 	 * We don't need to show trace output of what happens
10096 	 * between multiple crashes.
10097 	 *
10098 	 * If the user does a sysrq-z, then they can re-enable
10099 	 * tracing with echo 1 > tracing_on.
10100 	 */
10101 	tracing_off();
10102 
10103 	local_irq_save(flags);
10104 
10105 	/* Simulate the iterator */
10106 	trace_init_global_iter(&iter);
10107 
10108 	for_each_tracing_cpu(cpu) {
10109 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10110 	}
10111 
10112 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10113 
10114 	/* don't look at user memory in panic mode */
10115 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10116 
10117 	switch (oops_dump_mode) {
10118 	case DUMP_ALL:
10119 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10120 		break;
10121 	case DUMP_ORIG:
10122 		iter.cpu_file = raw_smp_processor_id();
10123 		break;
10124 	case DUMP_NONE:
10125 		goto out_enable;
10126 	default:
10127 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10128 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10129 	}
10130 
10131 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
10132 
10133 	/* Did function tracer already get disabled? */
10134 	if (ftrace_is_dead()) {
10135 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10136 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10137 	}
10138 
10139 	/*
10140 	 * We need to stop all tracing on all CPUS to read
10141 	 * the next buffer. This is a bit expensive, but is
10142 	 * not done often. We fill all what we can read,
10143 	 * and then release the locks again.
10144 	 */
10145 
10146 	while (!trace_empty(&iter)) {
10147 
10148 		if (!cnt)
10149 			printk(KERN_TRACE "---------------------------------\n");
10150 
10151 		cnt++;
10152 
10153 		trace_iterator_reset(&iter);
10154 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10155 
10156 		if (trace_find_next_entry_inc(&iter) != NULL) {
10157 			int ret;
10158 
10159 			ret = print_trace_line(&iter);
10160 			if (ret != TRACE_TYPE_NO_CONSUME)
10161 				trace_consume(&iter);
10162 		}
10163 		touch_nmi_watchdog();
10164 
10165 		trace_printk_seq(&iter.seq);
10166 	}
10167 
10168 	if (!cnt)
10169 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10170 	else
10171 		printk(KERN_TRACE "---------------------------------\n");
10172 
10173  out_enable:
10174 	tr->trace_flags |= old_userobj;
10175 
10176 	for_each_tracing_cpu(cpu) {
10177 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10178 	}
10179 	atomic_dec(&dump_running);
10180 	local_irq_restore(flags);
10181 }
10182 EXPORT_SYMBOL_GPL(ftrace_dump);
10183 
10184 #define WRITE_BUFSIZE  4096
10185 
10186 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10187 				size_t count, loff_t *ppos,
10188 				int (*createfn)(const char *))
10189 {
10190 	char *kbuf, *buf, *tmp;
10191 	int ret = 0;
10192 	size_t done = 0;
10193 	size_t size;
10194 
10195 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10196 	if (!kbuf)
10197 		return -ENOMEM;
10198 
10199 	while (done < count) {
10200 		size = count - done;
10201 
10202 		if (size >= WRITE_BUFSIZE)
10203 			size = WRITE_BUFSIZE - 1;
10204 
10205 		if (copy_from_user(kbuf, buffer + done, size)) {
10206 			ret = -EFAULT;
10207 			goto out;
10208 		}
10209 		kbuf[size] = '\0';
10210 		buf = kbuf;
10211 		do {
10212 			tmp = strchr(buf, '\n');
10213 			if (tmp) {
10214 				*tmp = '\0';
10215 				size = tmp - buf + 1;
10216 			} else {
10217 				size = strlen(buf);
10218 				if (done + size < count) {
10219 					if (buf != kbuf)
10220 						break;
10221 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10222 					pr_warn("Line length is too long: Should be less than %d\n",
10223 						WRITE_BUFSIZE - 2);
10224 					ret = -EINVAL;
10225 					goto out;
10226 				}
10227 			}
10228 			done += size;
10229 
10230 			/* Remove comments */
10231 			tmp = strchr(buf, '#');
10232 
10233 			if (tmp)
10234 				*tmp = '\0';
10235 
10236 			ret = createfn(buf);
10237 			if (ret)
10238 				goto out;
10239 			buf += size;
10240 
10241 		} while (done < count);
10242 	}
10243 	ret = done;
10244 
10245 out:
10246 	kfree(kbuf);
10247 
10248 	return ret;
10249 }
10250 
10251 #ifdef CONFIG_TRACER_MAX_TRACE
10252 __init static bool tr_needs_alloc_snapshot(const char *name)
10253 {
10254 	char *test;
10255 	int len = strlen(name);
10256 	bool ret;
10257 
10258 	if (!boot_snapshot_index)
10259 		return false;
10260 
10261 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10262 	    boot_snapshot_info[len] == '\t')
10263 		return true;
10264 
10265 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10266 	if (!test)
10267 		return false;
10268 
10269 	sprintf(test, "\t%s\t", name);
10270 	ret = strstr(boot_snapshot_info, test) == NULL;
10271 	kfree(test);
10272 	return ret;
10273 }
10274 
10275 __init static void do_allocate_snapshot(const char *name)
10276 {
10277 	if (!tr_needs_alloc_snapshot(name))
10278 		return;
10279 
10280 	/*
10281 	 * When allocate_snapshot is set, the next call to
10282 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10283 	 * will allocate the snapshot buffer. That will alse clear
10284 	 * this flag.
10285 	 */
10286 	allocate_snapshot = true;
10287 }
10288 #else
10289 static inline void do_allocate_snapshot(const char *name) { }
10290 #endif
10291 
10292 __init static void enable_instances(void)
10293 {
10294 	struct trace_array *tr;
10295 	char *curr_str;
10296 	char *str;
10297 	char *tok;
10298 
10299 	/* A tab is always appended */
10300 	boot_instance_info[boot_instance_index - 1] = '\0';
10301 	str = boot_instance_info;
10302 
10303 	while ((curr_str = strsep(&str, "\t"))) {
10304 
10305 		tok = strsep(&curr_str, ",");
10306 
10307 		if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10308 			do_allocate_snapshot(tok);
10309 
10310 		tr = trace_array_get_by_name(tok);
10311 		if (!tr) {
10312 			pr_warn("Failed to create instance buffer %s\n", curr_str);
10313 			continue;
10314 		}
10315 		/* Allow user space to delete it */
10316 		trace_array_put(tr);
10317 
10318 		while ((tok = strsep(&curr_str, ","))) {
10319 			early_enable_events(tr, tok, true);
10320 		}
10321 	}
10322 }
10323 
10324 __init static int tracer_alloc_buffers(void)
10325 {
10326 	int ring_buf_size;
10327 	int ret = -ENOMEM;
10328 
10329 
10330 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10331 		pr_warn("Tracing disabled due to lockdown\n");
10332 		return -EPERM;
10333 	}
10334 
10335 	/*
10336 	 * Make sure we don't accidentally add more trace options
10337 	 * than we have bits for.
10338 	 */
10339 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10340 
10341 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10342 		goto out;
10343 
10344 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10345 		goto out_free_buffer_mask;
10346 
10347 	/* Only allocate trace_printk buffers if a trace_printk exists */
10348 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10349 		/* Must be called before global_trace.buffer is allocated */
10350 		trace_printk_init_buffers();
10351 
10352 	/* To save memory, keep the ring buffer size to its minimum */
10353 	if (ring_buffer_expanded)
10354 		ring_buf_size = trace_buf_size;
10355 	else
10356 		ring_buf_size = 1;
10357 
10358 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10359 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10360 
10361 	raw_spin_lock_init(&global_trace.start_lock);
10362 
10363 	/*
10364 	 * The prepare callbacks allocates some memory for the ring buffer. We
10365 	 * don't free the buffer if the CPU goes down. If we were to free
10366 	 * the buffer, then the user would lose any trace that was in the
10367 	 * buffer. The memory will be removed once the "instance" is removed.
10368 	 */
10369 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10370 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10371 				      NULL);
10372 	if (ret < 0)
10373 		goto out_free_cpumask;
10374 	/* Used for event triggers */
10375 	ret = -ENOMEM;
10376 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10377 	if (!temp_buffer)
10378 		goto out_rm_hp_state;
10379 
10380 	if (trace_create_savedcmd() < 0)
10381 		goto out_free_temp_buffer;
10382 
10383 	/* TODO: make the number of buffers hot pluggable with CPUS */
10384 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10385 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10386 		goto out_free_savedcmd;
10387 	}
10388 
10389 	if (global_trace.buffer_disabled)
10390 		tracing_off();
10391 
10392 	if (trace_boot_clock) {
10393 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10394 		if (ret < 0)
10395 			pr_warn("Trace clock %s not defined, going back to default\n",
10396 				trace_boot_clock);
10397 	}
10398 
10399 	/*
10400 	 * register_tracer() might reference current_trace, so it
10401 	 * needs to be set before we register anything. This is
10402 	 * just a bootstrap of current_trace anyway.
10403 	 */
10404 	global_trace.current_trace = &nop_trace;
10405 
10406 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10407 
10408 	ftrace_init_global_array_ops(&global_trace);
10409 
10410 	init_trace_flags_index(&global_trace);
10411 
10412 	register_tracer(&nop_trace);
10413 
10414 	/* Function tracing may start here (via kernel command line) */
10415 	init_function_trace();
10416 
10417 	/* All seems OK, enable tracing */
10418 	tracing_disabled = 0;
10419 
10420 	atomic_notifier_chain_register(&panic_notifier_list,
10421 				       &trace_panic_notifier);
10422 
10423 	register_die_notifier(&trace_die_notifier);
10424 
10425 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10426 
10427 	INIT_LIST_HEAD(&global_trace.systems);
10428 	INIT_LIST_HEAD(&global_trace.events);
10429 	INIT_LIST_HEAD(&global_trace.hist_vars);
10430 	INIT_LIST_HEAD(&global_trace.err_log);
10431 	list_add(&global_trace.list, &ftrace_trace_arrays);
10432 
10433 	apply_trace_boot_options();
10434 
10435 	register_snapshot_cmd();
10436 
10437 	test_can_verify();
10438 
10439 	return 0;
10440 
10441 out_free_savedcmd:
10442 	free_saved_cmdlines_buffer(savedcmd);
10443 out_free_temp_buffer:
10444 	ring_buffer_free(temp_buffer);
10445 out_rm_hp_state:
10446 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10447 out_free_cpumask:
10448 	free_cpumask_var(global_trace.tracing_cpumask);
10449 out_free_buffer_mask:
10450 	free_cpumask_var(tracing_buffer_mask);
10451 out:
10452 	return ret;
10453 }
10454 
10455 void __init ftrace_boot_snapshot(void)
10456 {
10457 #ifdef CONFIG_TRACER_MAX_TRACE
10458 	struct trace_array *tr;
10459 
10460 	if (!snapshot_at_boot)
10461 		return;
10462 
10463 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10464 		if (!tr->allocated_snapshot)
10465 			continue;
10466 
10467 		tracing_snapshot_instance(tr);
10468 		trace_array_puts(tr, "** Boot snapshot taken **\n");
10469 	}
10470 #endif
10471 }
10472 
10473 void __init early_trace_init(void)
10474 {
10475 	if (tracepoint_printk) {
10476 		tracepoint_print_iter =
10477 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10478 		if (MEM_FAIL(!tracepoint_print_iter,
10479 			     "Failed to allocate trace iterator\n"))
10480 			tracepoint_printk = 0;
10481 		else
10482 			static_key_enable(&tracepoint_printk_key.key);
10483 	}
10484 	tracer_alloc_buffers();
10485 
10486 	init_events();
10487 }
10488 
10489 void __init trace_init(void)
10490 {
10491 	trace_event_init();
10492 
10493 	if (boot_instance_index)
10494 		enable_instances();
10495 }
10496 
10497 __init static void clear_boot_tracer(void)
10498 {
10499 	/*
10500 	 * The default tracer at boot buffer is an init section.
10501 	 * This function is called in lateinit. If we did not
10502 	 * find the boot tracer, then clear it out, to prevent
10503 	 * later registration from accessing the buffer that is
10504 	 * about to be freed.
10505 	 */
10506 	if (!default_bootup_tracer)
10507 		return;
10508 
10509 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10510 	       default_bootup_tracer);
10511 	default_bootup_tracer = NULL;
10512 }
10513 
10514 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10515 __init static void tracing_set_default_clock(void)
10516 {
10517 	/* sched_clock_stable() is determined in late_initcall */
10518 	if (!trace_boot_clock && !sched_clock_stable()) {
10519 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10520 			pr_warn("Can not set tracing clock due to lockdown\n");
10521 			return;
10522 		}
10523 
10524 		printk(KERN_WARNING
10525 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10526 		       "If you want to keep using the local clock, then add:\n"
10527 		       "  \"trace_clock=local\"\n"
10528 		       "on the kernel command line\n");
10529 		tracing_set_clock(&global_trace, "global");
10530 	}
10531 }
10532 #else
10533 static inline void tracing_set_default_clock(void) { }
10534 #endif
10535 
10536 __init static int late_trace_init(void)
10537 {
10538 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10539 		static_key_disable(&tracepoint_printk_key.key);
10540 		tracepoint_printk = 0;
10541 	}
10542 
10543 	tracing_set_default_clock();
10544 	clear_boot_tracer();
10545 	return 0;
10546 }
10547 
10548 late_initcall_sync(late_trace_init);
10549