xref: /openbmc/linux/kernel/trace/trace.c (revision 19758688)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51 
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
53 
54 #include "trace.h"
55 #include "trace_output.h"
56 
57 /*
58  * On boot up, the ring buffer is set to the minimum size, so that
59  * we do not waste memory on systems that are not using tracing.
60  */
61 bool ring_buffer_expanded;
62 
63 #ifdef CONFIG_FTRACE_STARTUP_TEST
64 /*
65  * We need to change this state when a selftest is running.
66  * A selftest will lurk into the ring-buffer to count the
67  * entries inserted during the selftest although some concurrent
68  * insertions into the ring-buffer such as trace_printk could occurred
69  * at the same time, giving false positive or negative results.
70  */
71 static bool __read_mostly tracing_selftest_running;
72 
73 /*
74  * If boot-time tracing including tracers/events via kernel cmdline
75  * is running, we do not want to run SELFTEST.
76  */
77 bool __read_mostly tracing_selftest_disabled;
78 
79 void __init disable_tracing_selftest(const char *reason)
80 {
81 	if (!tracing_selftest_disabled) {
82 		tracing_selftest_disabled = true;
83 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
84 	}
85 }
86 #else
87 #define tracing_selftest_running	0
88 #define tracing_selftest_disabled	0
89 #endif
90 
91 /* Pipe tracepoints to printk */
92 static struct trace_iterator *tracepoint_print_iter;
93 int tracepoint_printk;
94 static bool tracepoint_printk_stop_on_boot __initdata;
95 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
96 
97 /* For tracers that don't implement custom flags */
98 static struct tracer_opt dummy_tracer_opt[] = {
99 	{ }
100 };
101 
102 static int
103 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
104 {
105 	return 0;
106 }
107 
108 /*
109  * To prevent the comm cache from being overwritten when no
110  * tracing is active, only save the comm when a trace event
111  * occurred.
112  */
113 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
114 
115 /*
116  * Kill all tracing for good (never come back).
117  * It is initialized to 1 but will turn to zero if the initialization
118  * of the tracer is successful. But that is the only place that sets
119  * this back to zero.
120  */
121 static int tracing_disabled = 1;
122 
123 cpumask_var_t __read_mostly	tracing_buffer_mask;
124 
125 /*
126  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
127  *
128  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
129  * is set, then ftrace_dump is called. This will output the contents
130  * of the ftrace buffers to the console.  This is very useful for
131  * capturing traces that lead to crashes and outputing it to a
132  * serial console.
133  *
134  * It is default off, but you can enable it with either specifying
135  * "ftrace_dump_on_oops" in the kernel command line, or setting
136  * /proc/sys/kernel/ftrace_dump_on_oops
137  * Set 1 if you want to dump buffers of all CPUs
138  * Set 2 if you want to dump the buffer of the CPU that triggered oops
139  */
140 
141 enum ftrace_dump_mode ftrace_dump_on_oops;
142 
143 /* When set, tracing will stop when a WARN*() is hit */
144 int __disable_trace_on_warning;
145 
146 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
147 /* Map of enums to their values, for "eval_map" file */
148 struct trace_eval_map_head {
149 	struct module			*mod;
150 	unsigned long			length;
151 };
152 
153 union trace_eval_map_item;
154 
155 struct trace_eval_map_tail {
156 	/*
157 	 * "end" is first and points to NULL as it must be different
158 	 * than "mod" or "eval_string"
159 	 */
160 	union trace_eval_map_item	*next;
161 	const char			*end;	/* points to NULL */
162 };
163 
164 static DEFINE_MUTEX(trace_eval_mutex);
165 
166 /*
167  * The trace_eval_maps are saved in an array with two extra elements,
168  * one at the beginning, and one at the end. The beginning item contains
169  * the count of the saved maps (head.length), and the module they
170  * belong to if not built in (head.mod). The ending item contains a
171  * pointer to the next array of saved eval_map items.
172  */
173 union trace_eval_map_item {
174 	struct trace_eval_map		map;
175 	struct trace_eval_map_head	head;
176 	struct trace_eval_map_tail	tail;
177 };
178 
179 static union trace_eval_map_item *trace_eval_maps;
180 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
181 
182 int tracing_set_tracer(struct trace_array *tr, const char *buf);
183 static void ftrace_trace_userstack(struct trace_array *tr,
184 				   struct trace_buffer *buffer,
185 				   unsigned int trace_ctx);
186 
187 #define MAX_TRACER_SIZE		100
188 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
189 static char *default_bootup_tracer;
190 
191 static bool allocate_snapshot;
192 static bool snapshot_at_boot;
193 
194 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
195 static int boot_instance_index;
196 
197 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
198 static int boot_snapshot_index;
199 
200 static int __init set_cmdline_ftrace(char *str)
201 {
202 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
203 	default_bootup_tracer = bootup_tracer_buf;
204 	/* We are using ftrace early, expand it */
205 	ring_buffer_expanded = true;
206 	return 1;
207 }
208 __setup("ftrace=", set_cmdline_ftrace);
209 
210 static int __init set_ftrace_dump_on_oops(char *str)
211 {
212 	if (*str++ != '=' || !*str || !strcmp("1", str)) {
213 		ftrace_dump_on_oops = DUMP_ALL;
214 		return 1;
215 	}
216 
217 	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
218 		ftrace_dump_on_oops = DUMP_ORIG;
219                 return 1;
220         }
221 
222         return 0;
223 }
224 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
225 
226 static int __init stop_trace_on_warning(char *str)
227 {
228 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
229 		__disable_trace_on_warning = 1;
230 	return 1;
231 }
232 __setup("traceoff_on_warning", stop_trace_on_warning);
233 
234 static int __init boot_alloc_snapshot(char *str)
235 {
236 	char *slot = boot_snapshot_info + boot_snapshot_index;
237 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
238 	int ret;
239 
240 	if (str[0] == '=') {
241 		str++;
242 		if (strlen(str) >= left)
243 			return -1;
244 
245 		ret = snprintf(slot, left, "%s\t", str);
246 		boot_snapshot_index += ret;
247 	} else {
248 		allocate_snapshot = true;
249 		/* We also need the main ring buffer expanded */
250 		ring_buffer_expanded = true;
251 	}
252 	return 1;
253 }
254 __setup("alloc_snapshot", boot_alloc_snapshot);
255 
256 
257 static int __init boot_snapshot(char *str)
258 {
259 	snapshot_at_boot = true;
260 	boot_alloc_snapshot(str);
261 	return 1;
262 }
263 __setup("ftrace_boot_snapshot", boot_snapshot);
264 
265 
266 static int __init boot_instance(char *str)
267 {
268 	char *slot = boot_instance_info + boot_instance_index;
269 	int left = sizeof(boot_instance_info) - boot_instance_index;
270 	int ret;
271 
272 	if (strlen(str) >= left)
273 		return -1;
274 
275 	ret = snprintf(slot, left, "%s\t", str);
276 	boot_instance_index += ret;
277 
278 	return 1;
279 }
280 __setup("trace_instance=", boot_instance);
281 
282 
283 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
284 
285 static int __init set_trace_boot_options(char *str)
286 {
287 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
288 	return 1;
289 }
290 __setup("trace_options=", set_trace_boot_options);
291 
292 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
293 static char *trace_boot_clock __initdata;
294 
295 static int __init set_trace_boot_clock(char *str)
296 {
297 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
298 	trace_boot_clock = trace_boot_clock_buf;
299 	return 1;
300 }
301 __setup("trace_clock=", set_trace_boot_clock);
302 
303 static int __init set_tracepoint_printk(char *str)
304 {
305 	/* Ignore the "tp_printk_stop_on_boot" param */
306 	if (*str == '_')
307 		return 0;
308 
309 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
310 		tracepoint_printk = 1;
311 	return 1;
312 }
313 __setup("tp_printk", set_tracepoint_printk);
314 
315 static int __init set_tracepoint_printk_stop(char *str)
316 {
317 	tracepoint_printk_stop_on_boot = true;
318 	return 1;
319 }
320 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
321 
322 unsigned long long ns2usecs(u64 nsec)
323 {
324 	nsec += 500;
325 	do_div(nsec, 1000);
326 	return nsec;
327 }
328 
329 static void
330 trace_process_export(struct trace_export *export,
331 	       struct ring_buffer_event *event, int flag)
332 {
333 	struct trace_entry *entry;
334 	unsigned int size = 0;
335 
336 	if (export->flags & flag) {
337 		entry = ring_buffer_event_data(event);
338 		size = ring_buffer_event_length(event);
339 		export->write(export, entry, size);
340 	}
341 }
342 
343 static DEFINE_MUTEX(ftrace_export_lock);
344 
345 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
346 
347 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
348 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
349 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
350 
351 static inline void ftrace_exports_enable(struct trace_export *export)
352 {
353 	if (export->flags & TRACE_EXPORT_FUNCTION)
354 		static_branch_inc(&trace_function_exports_enabled);
355 
356 	if (export->flags & TRACE_EXPORT_EVENT)
357 		static_branch_inc(&trace_event_exports_enabled);
358 
359 	if (export->flags & TRACE_EXPORT_MARKER)
360 		static_branch_inc(&trace_marker_exports_enabled);
361 }
362 
363 static inline void ftrace_exports_disable(struct trace_export *export)
364 {
365 	if (export->flags & TRACE_EXPORT_FUNCTION)
366 		static_branch_dec(&trace_function_exports_enabled);
367 
368 	if (export->flags & TRACE_EXPORT_EVENT)
369 		static_branch_dec(&trace_event_exports_enabled);
370 
371 	if (export->flags & TRACE_EXPORT_MARKER)
372 		static_branch_dec(&trace_marker_exports_enabled);
373 }
374 
375 static void ftrace_exports(struct ring_buffer_event *event, int flag)
376 {
377 	struct trace_export *export;
378 
379 	preempt_disable_notrace();
380 
381 	export = rcu_dereference_raw_check(ftrace_exports_list);
382 	while (export) {
383 		trace_process_export(export, event, flag);
384 		export = rcu_dereference_raw_check(export->next);
385 	}
386 
387 	preempt_enable_notrace();
388 }
389 
390 static inline void
391 add_trace_export(struct trace_export **list, struct trace_export *export)
392 {
393 	rcu_assign_pointer(export->next, *list);
394 	/*
395 	 * We are entering export into the list but another
396 	 * CPU might be walking that list. We need to make sure
397 	 * the export->next pointer is valid before another CPU sees
398 	 * the export pointer included into the list.
399 	 */
400 	rcu_assign_pointer(*list, export);
401 }
402 
403 static inline int
404 rm_trace_export(struct trace_export **list, struct trace_export *export)
405 {
406 	struct trace_export **p;
407 
408 	for (p = list; *p != NULL; p = &(*p)->next)
409 		if (*p == export)
410 			break;
411 
412 	if (*p != export)
413 		return -1;
414 
415 	rcu_assign_pointer(*p, (*p)->next);
416 
417 	return 0;
418 }
419 
420 static inline void
421 add_ftrace_export(struct trace_export **list, struct trace_export *export)
422 {
423 	ftrace_exports_enable(export);
424 
425 	add_trace_export(list, export);
426 }
427 
428 static inline int
429 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
430 {
431 	int ret;
432 
433 	ret = rm_trace_export(list, export);
434 	ftrace_exports_disable(export);
435 
436 	return ret;
437 }
438 
439 int register_ftrace_export(struct trace_export *export)
440 {
441 	if (WARN_ON_ONCE(!export->write))
442 		return -1;
443 
444 	mutex_lock(&ftrace_export_lock);
445 
446 	add_ftrace_export(&ftrace_exports_list, export);
447 
448 	mutex_unlock(&ftrace_export_lock);
449 
450 	return 0;
451 }
452 EXPORT_SYMBOL_GPL(register_ftrace_export);
453 
454 int unregister_ftrace_export(struct trace_export *export)
455 {
456 	int ret;
457 
458 	mutex_lock(&ftrace_export_lock);
459 
460 	ret = rm_ftrace_export(&ftrace_exports_list, export);
461 
462 	mutex_unlock(&ftrace_export_lock);
463 
464 	return ret;
465 }
466 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
467 
468 /* trace_flags holds trace_options default values */
469 #define TRACE_DEFAULT_FLAGS						\
470 	(FUNCTION_DEFAULT_FLAGS |					\
471 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
472 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
473 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
474 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
475 	 TRACE_ITER_HASH_PTR)
476 
477 /* trace_options that are only supported by global_trace */
478 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
479 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
480 
481 /* trace_flags that are default zero for instances */
482 #define ZEROED_TRACE_FLAGS \
483 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
484 
485 /*
486  * The global_trace is the descriptor that holds the top-level tracing
487  * buffers for the live tracing.
488  */
489 static struct trace_array global_trace = {
490 	.trace_flags = TRACE_DEFAULT_FLAGS,
491 };
492 
493 LIST_HEAD(ftrace_trace_arrays);
494 
495 int trace_array_get(struct trace_array *this_tr)
496 {
497 	struct trace_array *tr;
498 	int ret = -ENODEV;
499 
500 	mutex_lock(&trace_types_lock);
501 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
502 		if (tr == this_tr) {
503 			tr->ref++;
504 			ret = 0;
505 			break;
506 		}
507 	}
508 	mutex_unlock(&trace_types_lock);
509 
510 	return ret;
511 }
512 
513 static void __trace_array_put(struct trace_array *this_tr)
514 {
515 	WARN_ON(!this_tr->ref);
516 	this_tr->ref--;
517 }
518 
519 /**
520  * trace_array_put - Decrement the reference counter for this trace array.
521  * @this_tr : pointer to the trace array
522  *
523  * NOTE: Use this when we no longer need the trace array returned by
524  * trace_array_get_by_name(). This ensures the trace array can be later
525  * destroyed.
526  *
527  */
528 void trace_array_put(struct trace_array *this_tr)
529 {
530 	if (!this_tr)
531 		return;
532 
533 	mutex_lock(&trace_types_lock);
534 	__trace_array_put(this_tr);
535 	mutex_unlock(&trace_types_lock);
536 }
537 EXPORT_SYMBOL_GPL(trace_array_put);
538 
539 int tracing_check_open_get_tr(struct trace_array *tr)
540 {
541 	int ret;
542 
543 	ret = security_locked_down(LOCKDOWN_TRACEFS);
544 	if (ret)
545 		return ret;
546 
547 	if (tracing_disabled)
548 		return -ENODEV;
549 
550 	if (tr && trace_array_get(tr) < 0)
551 		return -ENODEV;
552 
553 	return 0;
554 }
555 
556 int call_filter_check_discard(struct trace_event_call *call, void *rec,
557 			      struct trace_buffer *buffer,
558 			      struct ring_buffer_event *event)
559 {
560 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
561 	    !filter_match_preds(call->filter, rec)) {
562 		__trace_event_discard_commit(buffer, event);
563 		return 1;
564 	}
565 
566 	return 0;
567 }
568 
569 /**
570  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
571  * @filtered_pids: The list of pids to check
572  * @search_pid: The PID to find in @filtered_pids
573  *
574  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
575  */
576 bool
577 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
578 {
579 	return trace_pid_list_is_set(filtered_pids, search_pid);
580 }
581 
582 /**
583  * trace_ignore_this_task - should a task be ignored for tracing
584  * @filtered_pids: The list of pids to check
585  * @filtered_no_pids: The list of pids not to be traced
586  * @task: The task that should be ignored if not filtered
587  *
588  * Checks if @task should be traced or not from @filtered_pids.
589  * Returns true if @task should *NOT* be traced.
590  * Returns false if @task should be traced.
591  */
592 bool
593 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
594 		       struct trace_pid_list *filtered_no_pids,
595 		       struct task_struct *task)
596 {
597 	/*
598 	 * If filtered_no_pids is not empty, and the task's pid is listed
599 	 * in filtered_no_pids, then return true.
600 	 * Otherwise, if filtered_pids is empty, that means we can
601 	 * trace all tasks. If it has content, then only trace pids
602 	 * within filtered_pids.
603 	 */
604 
605 	return (filtered_pids &&
606 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
607 		(filtered_no_pids &&
608 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
609 }
610 
611 /**
612  * trace_filter_add_remove_task - Add or remove a task from a pid_list
613  * @pid_list: The list to modify
614  * @self: The current task for fork or NULL for exit
615  * @task: The task to add or remove
616  *
617  * If adding a task, if @self is defined, the task is only added if @self
618  * is also included in @pid_list. This happens on fork and tasks should
619  * only be added when the parent is listed. If @self is NULL, then the
620  * @task pid will be removed from the list, which would happen on exit
621  * of a task.
622  */
623 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
624 				  struct task_struct *self,
625 				  struct task_struct *task)
626 {
627 	if (!pid_list)
628 		return;
629 
630 	/* For forks, we only add if the forking task is listed */
631 	if (self) {
632 		if (!trace_find_filtered_pid(pid_list, self->pid))
633 			return;
634 	}
635 
636 	/* "self" is set for forks, and NULL for exits */
637 	if (self)
638 		trace_pid_list_set(pid_list, task->pid);
639 	else
640 		trace_pid_list_clear(pid_list, task->pid);
641 }
642 
643 /**
644  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
645  * @pid_list: The pid list to show
646  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
647  * @pos: The position of the file
648  *
649  * This is used by the seq_file "next" operation to iterate the pids
650  * listed in a trace_pid_list structure.
651  *
652  * Returns the pid+1 as we want to display pid of zero, but NULL would
653  * stop the iteration.
654  */
655 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
656 {
657 	long pid = (unsigned long)v;
658 	unsigned int next;
659 
660 	(*pos)++;
661 
662 	/* pid already is +1 of the actual previous bit */
663 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
664 		return NULL;
665 
666 	pid = next;
667 
668 	/* Return pid + 1 to allow zero to be represented */
669 	return (void *)(pid + 1);
670 }
671 
672 /**
673  * trace_pid_start - Used for seq_file to start reading pid lists
674  * @pid_list: The pid list to show
675  * @pos: The position of the file
676  *
677  * This is used by seq_file "start" operation to start the iteration
678  * of listing pids.
679  *
680  * Returns the pid+1 as we want to display pid of zero, but NULL would
681  * stop the iteration.
682  */
683 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
684 {
685 	unsigned long pid;
686 	unsigned int first;
687 	loff_t l = 0;
688 
689 	if (trace_pid_list_first(pid_list, &first) < 0)
690 		return NULL;
691 
692 	pid = first;
693 
694 	/* Return pid + 1 so that zero can be the exit value */
695 	for (pid++; pid && l < *pos;
696 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
697 		;
698 	return (void *)pid;
699 }
700 
701 /**
702  * trace_pid_show - show the current pid in seq_file processing
703  * @m: The seq_file structure to write into
704  * @v: A void pointer of the pid (+1) value to display
705  *
706  * Can be directly used by seq_file operations to display the current
707  * pid value.
708  */
709 int trace_pid_show(struct seq_file *m, void *v)
710 {
711 	unsigned long pid = (unsigned long)v - 1;
712 
713 	seq_printf(m, "%lu\n", pid);
714 	return 0;
715 }
716 
717 /* 128 should be much more than enough */
718 #define PID_BUF_SIZE		127
719 
720 int trace_pid_write(struct trace_pid_list *filtered_pids,
721 		    struct trace_pid_list **new_pid_list,
722 		    const char __user *ubuf, size_t cnt)
723 {
724 	struct trace_pid_list *pid_list;
725 	struct trace_parser parser;
726 	unsigned long val;
727 	int nr_pids = 0;
728 	ssize_t read = 0;
729 	ssize_t ret;
730 	loff_t pos;
731 	pid_t pid;
732 
733 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
734 		return -ENOMEM;
735 
736 	/*
737 	 * Always recreate a new array. The write is an all or nothing
738 	 * operation. Always create a new array when adding new pids by
739 	 * the user. If the operation fails, then the current list is
740 	 * not modified.
741 	 */
742 	pid_list = trace_pid_list_alloc();
743 	if (!pid_list) {
744 		trace_parser_put(&parser);
745 		return -ENOMEM;
746 	}
747 
748 	if (filtered_pids) {
749 		/* copy the current bits to the new max */
750 		ret = trace_pid_list_first(filtered_pids, &pid);
751 		while (!ret) {
752 			trace_pid_list_set(pid_list, pid);
753 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
754 			nr_pids++;
755 		}
756 	}
757 
758 	ret = 0;
759 	while (cnt > 0) {
760 
761 		pos = 0;
762 
763 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
764 		if (ret < 0)
765 			break;
766 
767 		read += ret;
768 		ubuf += ret;
769 		cnt -= ret;
770 
771 		if (!trace_parser_loaded(&parser))
772 			break;
773 
774 		ret = -EINVAL;
775 		if (kstrtoul(parser.buffer, 0, &val))
776 			break;
777 
778 		pid = (pid_t)val;
779 
780 		if (trace_pid_list_set(pid_list, pid) < 0) {
781 			ret = -1;
782 			break;
783 		}
784 		nr_pids++;
785 
786 		trace_parser_clear(&parser);
787 		ret = 0;
788 	}
789 	trace_parser_put(&parser);
790 
791 	if (ret < 0) {
792 		trace_pid_list_free(pid_list);
793 		return ret;
794 	}
795 
796 	if (!nr_pids) {
797 		/* Cleared the list of pids */
798 		trace_pid_list_free(pid_list);
799 		pid_list = NULL;
800 	}
801 
802 	*new_pid_list = pid_list;
803 
804 	return read;
805 }
806 
807 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
808 {
809 	u64 ts;
810 
811 	/* Early boot up does not have a buffer yet */
812 	if (!buf->buffer)
813 		return trace_clock_local();
814 
815 	ts = ring_buffer_time_stamp(buf->buffer);
816 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
817 
818 	return ts;
819 }
820 
821 u64 ftrace_now(int cpu)
822 {
823 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
824 }
825 
826 /**
827  * tracing_is_enabled - Show if global_trace has been enabled
828  *
829  * Shows if the global trace has been enabled or not. It uses the
830  * mirror flag "buffer_disabled" to be used in fast paths such as for
831  * the irqsoff tracer. But it may be inaccurate due to races. If you
832  * need to know the accurate state, use tracing_is_on() which is a little
833  * slower, but accurate.
834  */
835 int tracing_is_enabled(void)
836 {
837 	/*
838 	 * For quick access (irqsoff uses this in fast path), just
839 	 * return the mirror variable of the state of the ring buffer.
840 	 * It's a little racy, but we don't really care.
841 	 */
842 	smp_rmb();
843 	return !global_trace.buffer_disabled;
844 }
845 
846 /*
847  * trace_buf_size is the size in bytes that is allocated
848  * for a buffer. Note, the number of bytes is always rounded
849  * to page size.
850  *
851  * This number is purposely set to a low number of 16384.
852  * If the dump on oops happens, it will be much appreciated
853  * to not have to wait for all that output. Anyway this can be
854  * boot time and run time configurable.
855  */
856 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
857 
858 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
859 
860 /* trace_types holds a link list of available tracers. */
861 static struct tracer		*trace_types __read_mostly;
862 
863 /*
864  * trace_types_lock is used to protect the trace_types list.
865  */
866 DEFINE_MUTEX(trace_types_lock);
867 
868 /*
869  * serialize the access of the ring buffer
870  *
871  * ring buffer serializes readers, but it is low level protection.
872  * The validity of the events (which returns by ring_buffer_peek() ..etc)
873  * are not protected by ring buffer.
874  *
875  * The content of events may become garbage if we allow other process consumes
876  * these events concurrently:
877  *   A) the page of the consumed events may become a normal page
878  *      (not reader page) in ring buffer, and this page will be rewritten
879  *      by events producer.
880  *   B) The page of the consumed events may become a page for splice_read,
881  *      and this page will be returned to system.
882  *
883  * These primitives allow multi process access to different cpu ring buffer
884  * concurrently.
885  *
886  * These primitives don't distinguish read-only and read-consume access.
887  * Multi read-only access are also serialized.
888  */
889 
890 #ifdef CONFIG_SMP
891 static DECLARE_RWSEM(all_cpu_access_lock);
892 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
893 
894 static inline void trace_access_lock(int cpu)
895 {
896 	if (cpu == RING_BUFFER_ALL_CPUS) {
897 		/* gain it for accessing the whole ring buffer. */
898 		down_write(&all_cpu_access_lock);
899 	} else {
900 		/* gain it for accessing a cpu ring buffer. */
901 
902 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
903 		down_read(&all_cpu_access_lock);
904 
905 		/* Secondly block other access to this @cpu ring buffer. */
906 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
907 	}
908 }
909 
910 static inline void trace_access_unlock(int cpu)
911 {
912 	if (cpu == RING_BUFFER_ALL_CPUS) {
913 		up_write(&all_cpu_access_lock);
914 	} else {
915 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
916 		up_read(&all_cpu_access_lock);
917 	}
918 }
919 
920 static inline void trace_access_lock_init(void)
921 {
922 	int cpu;
923 
924 	for_each_possible_cpu(cpu)
925 		mutex_init(&per_cpu(cpu_access_lock, cpu));
926 }
927 
928 #else
929 
930 static DEFINE_MUTEX(access_lock);
931 
932 static inline void trace_access_lock(int cpu)
933 {
934 	(void)cpu;
935 	mutex_lock(&access_lock);
936 }
937 
938 static inline void trace_access_unlock(int cpu)
939 {
940 	(void)cpu;
941 	mutex_unlock(&access_lock);
942 }
943 
944 static inline void trace_access_lock_init(void)
945 {
946 }
947 
948 #endif
949 
950 #ifdef CONFIG_STACKTRACE
951 static void __ftrace_trace_stack(struct trace_buffer *buffer,
952 				 unsigned int trace_ctx,
953 				 int skip, struct pt_regs *regs);
954 static inline void ftrace_trace_stack(struct trace_array *tr,
955 				      struct trace_buffer *buffer,
956 				      unsigned int trace_ctx,
957 				      int skip, struct pt_regs *regs);
958 
959 #else
960 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
961 					unsigned int trace_ctx,
962 					int skip, struct pt_regs *regs)
963 {
964 }
965 static inline void ftrace_trace_stack(struct trace_array *tr,
966 				      struct trace_buffer *buffer,
967 				      unsigned long trace_ctx,
968 				      int skip, struct pt_regs *regs)
969 {
970 }
971 
972 #endif
973 
974 static __always_inline void
975 trace_event_setup(struct ring_buffer_event *event,
976 		  int type, unsigned int trace_ctx)
977 {
978 	struct trace_entry *ent = ring_buffer_event_data(event);
979 
980 	tracing_generic_entry_update(ent, type, trace_ctx);
981 }
982 
983 static __always_inline struct ring_buffer_event *
984 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
985 			  int type,
986 			  unsigned long len,
987 			  unsigned int trace_ctx)
988 {
989 	struct ring_buffer_event *event;
990 
991 	event = ring_buffer_lock_reserve(buffer, len);
992 	if (event != NULL)
993 		trace_event_setup(event, type, trace_ctx);
994 
995 	return event;
996 }
997 
998 void tracer_tracing_on(struct trace_array *tr)
999 {
1000 	if (tr->array_buffer.buffer)
1001 		ring_buffer_record_on(tr->array_buffer.buffer);
1002 	/*
1003 	 * This flag is looked at when buffers haven't been allocated
1004 	 * yet, or by some tracers (like irqsoff), that just want to
1005 	 * know if the ring buffer has been disabled, but it can handle
1006 	 * races of where it gets disabled but we still do a record.
1007 	 * As the check is in the fast path of the tracers, it is more
1008 	 * important to be fast than accurate.
1009 	 */
1010 	tr->buffer_disabled = 0;
1011 	/* Make the flag seen by readers */
1012 	smp_wmb();
1013 }
1014 
1015 /**
1016  * tracing_on - enable tracing buffers
1017  *
1018  * This function enables tracing buffers that may have been
1019  * disabled with tracing_off.
1020  */
1021 void tracing_on(void)
1022 {
1023 	tracer_tracing_on(&global_trace);
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_on);
1026 
1027 
1028 static __always_inline void
1029 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1030 {
1031 	__this_cpu_write(trace_taskinfo_save, true);
1032 
1033 	/* If this is the temp buffer, we need to commit fully */
1034 	if (this_cpu_read(trace_buffered_event) == event) {
1035 		/* Length is in event->array[0] */
1036 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1037 		/* Release the temp buffer */
1038 		this_cpu_dec(trace_buffered_event_cnt);
1039 		/* ring_buffer_unlock_commit() enables preemption */
1040 		preempt_enable_notrace();
1041 	} else
1042 		ring_buffer_unlock_commit(buffer);
1043 }
1044 
1045 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1046 		       const char *str, int size)
1047 {
1048 	struct ring_buffer_event *event;
1049 	struct trace_buffer *buffer;
1050 	struct print_entry *entry;
1051 	unsigned int trace_ctx;
1052 	int alloc;
1053 
1054 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1055 		return 0;
1056 
1057 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1058 		return 0;
1059 
1060 	if (unlikely(tracing_disabled))
1061 		return 0;
1062 
1063 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1064 
1065 	trace_ctx = tracing_gen_ctx();
1066 	buffer = tr->array_buffer.buffer;
1067 	ring_buffer_nest_start(buffer);
1068 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1069 					    trace_ctx);
1070 	if (!event) {
1071 		size = 0;
1072 		goto out;
1073 	}
1074 
1075 	entry = ring_buffer_event_data(event);
1076 	entry->ip = ip;
1077 
1078 	memcpy(&entry->buf, str, size);
1079 
1080 	/* Add a newline if necessary */
1081 	if (entry->buf[size - 1] != '\n') {
1082 		entry->buf[size] = '\n';
1083 		entry->buf[size + 1] = '\0';
1084 	} else
1085 		entry->buf[size] = '\0';
1086 
1087 	__buffer_unlock_commit(buffer, event);
1088 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1089  out:
1090 	ring_buffer_nest_end(buffer);
1091 	return size;
1092 }
1093 EXPORT_SYMBOL_GPL(__trace_array_puts);
1094 
1095 /**
1096  * __trace_puts - write a constant string into the trace buffer.
1097  * @ip:	   The address of the caller
1098  * @str:   The constant string to write
1099  * @size:  The size of the string.
1100  */
1101 int __trace_puts(unsigned long ip, const char *str, int size)
1102 {
1103 	return __trace_array_puts(&global_trace, ip, str, size);
1104 }
1105 EXPORT_SYMBOL_GPL(__trace_puts);
1106 
1107 /**
1108  * __trace_bputs - write the pointer to a constant string into trace buffer
1109  * @ip:	   The address of the caller
1110  * @str:   The constant string to write to the buffer to
1111  */
1112 int __trace_bputs(unsigned long ip, const char *str)
1113 {
1114 	struct ring_buffer_event *event;
1115 	struct trace_buffer *buffer;
1116 	struct bputs_entry *entry;
1117 	unsigned int trace_ctx;
1118 	int size = sizeof(struct bputs_entry);
1119 	int ret = 0;
1120 
1121 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1122 		return 0;
1123 
1124 	if (unlikely(tracing_selftest_running || tracing_disabled))
1125 		return 0;
1126 
1127 	trace_ctx = tracing_gen_ctx();
1128 	buffer = global_trace.array_buffer.buffer;
1129 
1130 	ring_buffer_nest_start(buffer);
1131 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1132 					    trace_ctx);
1133 	if (!event)
1134 		goto out;
1135 
1136 	entry = ring_buffer_event_data(event);
1137 	entry->ip			= ip;
1138 	entry->str			= str;
1139 
1140 	__buffer_unlock_commit(buffer, event);
1141 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1142 
1143 	ret = 1;
1144  out:
1145 	ring_buffer_nest_end(buffer);
1146 	return ret;
1147 }
1148 EXPORT_SYMBOL_GPL(__trace_bputs);
1149 
1150 #ifdef CONFIG_TRACER_SNAPSHOT
1151 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1152 					   void *cond_data)
1153 {
1154 	struct tracer *tracer = tr->current_trace;
1155 	unsigned long flags;
1156 
1157 	if (in_nmi()) {
1158 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1159 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1160 		return;
1161 	}
1162 
1163 	if (!tr->allocated_snapshot) {
1164 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1165 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1166 		tracer_tracing_off(tr);
1167 		return;
1168 	}
1169 
1170 	/* Note, snapshot can not be used when the tracer uses it */
1171 	if (tracer->use_max_tr) {
1172 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1173 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1174 		return;
1175 	}
1176 
1177 	local_irq_save(flags);
1178 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1179 	local_irq_restore(flags);
1180 }
1181 
1182 void tracing_snapshot_instance(struct trace_array *tr)
1183 {
1184 	tracing_snapshot_instance_cond(tr, NULL);
1185 }
1186 
1187 /**
1188  * tracing_snapshot - take a snapshot of the current buffer.
1189  *
1190  * This causes a swap between the snapshot buffer and the current live
1191  * tracing buffer. You can use this to take snapshots of the live
1192  * trace when some condition is triggered, but continue to trace.
1193  *
1194  * Note, make sure to allocate the snapshot with either
1195  * a tracing_snapshot_alloc(), or by doing it manually
1196  * with: echo 1 > /sys/kernel/tracing/snapshot
1197  *
1198  * If the snapshot buffer is not allocated, it will stop tracing.
1199  * Basically making a permanent snapshot.
1200  */
1201 void tracing_snapshot(void)
1202 {
1203 	struct trace_array *tr = &global_trace;
1204 
1205 	tracing_snapshot_instance(tr);
1206 }
1207 EXPORT_SYMBOL_GPL(tracing_snapshot);
1208 
1209 /**
1210  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1211  * @tr:		The tracing instance to snapshot
1212  * @cond_data:	The data to be tested conditionally, and possibly saved
1213  *
1214  * This is the same as tracing_snapshot() except that the snapshot is
1215  * conditional - the snapshot will only happen if the
1216  * cond_snapshot.update() implementation receiving the cond_data
1217  * returns true, which means that the trace array's cond_snapshot
1218  * update() operation used the cond_data to determine whether the
1219  * snapshot should be taken, and if it was, presumably saved it along
1220  * with the snapshot.
1221  */
1222 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1223 {
1224 	tracing_snapshot_instance_cond(tr, cond_data);
1225 }
1226 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1227 
1228 /**
1229  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1230  * @tr:		The tracing instance
1231  *
1232  * When the user enables a conditional snapshot using
1233  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1234  * with the snapshot.  This accessor is used to retrieve it.
1235  *
1236  * Should not be called from cond_snapshot.update(), since it takes
1237  * the tr->max_lock lock, which the code calling
1238  * cond_snapshot.update() has already done.
1239  *
1240  * Returns the cond_data associated with the trace array's snapshot.
1241  */
1242 void *tracing_cond_snapshot_data(struct trace_array *tr)
1243 {
1244 	void *cond_data = NULL;
1245 
1246 	local_irq_disable();
1247 	arch_spin_lock(&tr->max_lock);
1248 
1249 	if (tr->cond_snapshot)
1250 		cond_data = tr->cond_snapshot->cond_data;
1251 
1252 	arch_spin_unlock(&tr->max_lock);
1253 	local_irq_enable();
1254 
1255 	return cond_data;
1256 }
1257 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1258 
1259 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1260 					struct array_buffer *size_buf, int cpu_id);
1261 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1262 
1263 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1264 {
1265 	int ret;
1266 
1267 	if (!tr->allocated_snapshot) {
1268 
1269 		/* allocate spare buffer */
1270 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1271 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1272 		if (ret < 0)
1273 			return ret;
1274 
1275 		tr->allocated_snapshot = true;
1276 	}
1277 
1278 	return 0;
1279 }
1280 
1281 static void free_snapshot(struct trace_array *tr)
1282 {
1283 	/*
1284 	 * We don't free the ring buffer. instead, resize it because
1285 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1286 	 * we want preserve it.
1287 	 */
1288 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1289 	set_buffer_entries(&tr->max_buffer, 1);
1290 	tracing_reset_online_cpus(&tr->max_buffer);
1291 	tr->allocated_snapshot = false;
1292 }
1293 
1294 /**
1295  * tracing_alloc_snapshot - allocate snapshot buffer.
1296  *
1297  * This only allocates the snapshot buffer if it isn't already
1298  * allocated - it doesn't also take a snapshot.
1299  *
1300  * This is meant to be used in cases where the snapshot buffer needs
1301  * to be set up for events that can't sleep but need to be able to
1302  * trigger a snapshot.
1303  */
1304 int tracing_alloc_snapshot(void)
1305 {
1306 	struct trace_array *tr = &global_trace;
1307 	int ret;
1308 
1309 	ret = tracing_alloc_snapshot_instance(tr);
1310 	WARN_ON(ret < 0);
1311 
1312 	return ret;
1313 }
1314 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1315 
1316 /**
1317  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1318  *
1319  * This is similar to tracing_snapshot(), but it will allocate the
1320  * snapshot buffer if it isn't already allocated. Use this only
1321  * where it is safe to sleep, as the allocation may sleep.
1322  *
1323  * This causes a swap between the snapshot buffer and the current live
1324  * tracing buffer. You can use this to take snapshots of the live
1325  * trace when some condition is triggered, but continue to trace.
1326  */
1327 void tracing_snapshot_alloc(void)
1328 {
1329 	int ret;
1330 
1331 	ret = tracing_alloc_snapshot();
1332 	if (ret < 0)
1333 		return;
1334 
1335 	tracing_snapshot();
1336 }
1337 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1338 
1339 /**
1340  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1341  * @tr:		The tracing instance
1342  * @cond_data:	User data to associate with the snapshot
1343  * @update:	Implementation of the cond_snapshot update function
1344  *
1345  * Check whether the conditional snapshot for the given instance has
1346  * already been enabled, or if the current tracer is already using a
1347  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1348  * save the cond_data and update function inside.
1349  *
1350  * Returns 0 if successful, error otherwise.
1351  */
1352 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1353 				 cond_update_fn_t update)
1354 {
1355 	struct cond_snapshot *cond_snapshot;
1356 	int ret = 0;
1357 
1358 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1359 	if (!cond_snapshot)
1360 		return -ENOMEM;
1361 
1362 	cond_snapshot->cond_data = cond_data;
1363 	cond_snapshot->update = update;
1364 
1365 	mutex_lock(&trace_types_lock);
1366 
1367 	ret = tracing_alloc_snapshot_instance(tr);
1368 	if (ret)
1369 		goto fail_unlock;
1370 
1371 	if (tr->current_trace->use_max_tr) {
1372 		ret = -EBUSY;
1373 		goto fail_unlock;
1374 	}
1375 
1376 	/*
1377 	 * The cond_snapshot can only change to NULL without the
1378 	 * trace_types_lock. We don't care if we race with it going
1379 	 * to NULL, but we want to make sure that it's not set to
1380 	 * something other than NULL when we get here, which we can
1381 	 * do safely with only holding the trace_types_lock and not
1382 	 * having to take the max_lock.
1383 	 */
1384 	if (tr->cond_snapshot) {
1385 		ret = -EBUSY;
1386 		goto fail_unlock;
1387 	}
1388 
1389 	local_irq_disable();
1390 	arch_spin_lock(&tr->max_lock);
1391 	tr->cond_snapshot = cond_snapshot;
1392 	arch_spin_unlock(&tr->max_lock);
1393 	local_irq_enable();
1394 
1395 	mutex_unlock(&trace_types_lock);
1396 
1397 	return ret;
1398 
1399  fail_unlock:
1400 	mutex_unlock(&trace_types_lock);
1401 	kfree(cond_snapshot);
1402 	return ret;
1403 }
1404 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1405 
1406 /**
1407  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1408  * @tr:		The tracing instance
1409  *
1410  * Check whether the conditional snapshot for the given instance is
1411  * enabled; if so, free the cond_snapshot associated with it,
1412  * otherwise return -EINVAL.
1413  *
1414  * Returns 0 if successful, error otherwise.
1415  */
1416 int tracing_snapshot_cond_disable(struct trace_array *tr)
1417 {
1418 	int ret = 0;
1419 
1420 	local_irq_disable();
1421 	arch_spin_lock(&tr->max_lock);
1422 
1423 	if (!tr->cond_snapshot)
1424 		ret = -EINVAL;
1425 	else {
1426 		kfree(tr->cond_snapshot);
1427 		tr->cond_snapshot = NULL;
1428 	}
1429 
1430 	arch_spin_unlock(&tr->max_lock);
1431 	local_irq_enable();
1432 
1433 	return ret;
1434 }
1435 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1436 #else
1437 void tracing_snapshot(void)
1438 {
1439 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1440 }
1441 EXPORT_SYMBOL_GPL(tracing_snapshot);
1442 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1443 {
1444 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1445 }
1446 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1447 int tracing_alloc_snapshot(void)
1448 {
1449 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1450 	return -ENODEV;
1451 }
1452 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1453 void tracing_snapshot_alloc(void)
1454 {
1455 	/* Give warning */
1456 	tracing_snapshot();
1457 }
1458 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1459 void *tracing_cond_snapshot_data(struct trace_array *tr)
1460 {
1461 	return NULL;
1462 }
1463 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1464 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1465 {
1466 	return -ENODEV;
1467 }
1468 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1469 int tracing_snapshot_cond_disable(struct trace_array *tr)
1470 {
1471 	return false;
1472 }
1473 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1474 #define free_snapshot(tr)	do { } while (0)
1475 #endif /* CONFIG_TRACER_SNAPSHOT */
1476 
1477 void tracer_tracing_off(struct trace_array *tr)
1478 {
1479 	if (tr->array_buffer.buffer)
1480 		ring_buffer_record_off(tr->array_buffer.buffer);
1481 	/*
1482 	 * This flag is looked at when buffers haven't been allocated
1483 	 * yet, or by some tracers (like irqsoff), that just want to
1484 	 * know if the ring buffer has been disabled, but it can handle
1485 	 * races of where it gets disabled but we still do a record.
1486 	 * As the check is in the fast path of the tracers, it is more
1487 	 * important to be fast than accurate.
1488 	 */
1489 	tr->buffer_disabled = 1;
1490 	/* Make the flag seen by readers */
1491 	smp_wmb();
1492 }
1493 
1494 /**
1495  * tracing_off - turn off tracing buffers
1496  *
1497  * This function stops the tracing buffers from recording data.
1498  * It does not disable any overhead the tracers themselves may
1499  * be causing. This function simply causes all recording to
1500  * the ring buffers to fail.
1501  */
1502 void tracing_off(void)
1503 {
1504 	tracer_tracing_off(&global_trace);
1505 }
1506 EXPORT_SYMBOL_GPL(tracing_off);
1507 
1508 void disable_trace_on_warning(void)
1509 {
1510 	if (__disable_trace_on_warning) {
1511 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1512 			"Disabling tracing due to warning\n");
1513 		tracing_off();
1514 	}
1515 }
1516 
1517 /**
1518  * tracer_tracing_is_on - show real state of ring buffer enabled
1519  * @tr : the trace array to know if ring buffer is enabled
1520  *
1521  * Shows real state of the ring buffer if it is enabled or not.
1522  */
1523 bool tracer_tracing_is_on(struct trace_array *tr)
1524 {
1525 	if (tr->array_buffer.buffer)
1526 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1527 	return !tr->buffer_disabled;
1528 }
1529 
1530 /**
1531  * tracing_is_on - show state of ring buffers enabled
1532  */
1533 int tracing_is_on(void)
1534 {
1535 	return tracer_tracing_is_on(&global_trace);
1536 }
1537 EXPORT_SYMBOL_GPL(tracing_is_on);
1538 
1539 static int __init set_buf_size(char *str)
1540 {
1541 	unsigned long buf_size;
1542 
1543 	if (!str)
1544 		return 0;
1545 	buf_size = memparse(str, &str);
1546 	/*
1547 	 * nr_entries can not be zero and the startup
1548 	 * tests require some buffer space. Therefore
1549 	 * ensure we have at least 4096 bytes of buffer.
1550 	 */
1551 	trace_buf_size = max(4096UL, buf_size);
1552 	return 1;
1553 }
1554 __setup("trace_buf_size=", set_buf_size);
1555 
1556 static int __init set_tracing_thresh(char *str)
1557 {
1558 	unsigned long threshold;
1559 	int ret;
1560 
1561 	if (!str)
1562 		return 0;
1563 	ret = kstrtoul(str, 0, &threshold);
1564 	if (ret < 0)
1565 		return 0;
1566 	tracing_thresh = threshold * 1000;
1567 	return 1;
1568 }
1569 __setup("tracing_thresh=", set_tracing_thresh);
1570 
1571 unsigned long nsecs_to_usecs(unsigned long nsecs)
1572 {
1573 	return nsecs / 1000;
1574 }
1575 
1576 /*
1577  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1578  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1579  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1580  * of strings in the order that the evals (enum) were defined.
1581  */
1582 #undef C
1583 #define C(a, b) b
1584 
1585 /* These must match the bit positions in trace_iterator_flags */
1586 static const char *trace_options[] = {
1587 	TRACE_FLAGS
1588 	NULL
1589 };
1590 
1591 static struct {
1592 	u64 (*func)(void);
1593 	const char *name;
1594 	int in_ns;		/* is this clock in nanoseconds? */
1595 } trace_clocks[] = {
1596 	{ trace_clock_local,		"local",	1 },
1597 	{ trace_clock_global,		"global",	1 },
1598 	{ trace_clock_counter,		"counter",	0 },
1599 	{ trace_clock_jiffies,		"uptime",	0 },
1600 	{ trace_clock,			"perf",		1 },
1601 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1602 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1603 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1604 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1605 	ARCH_TRACE_CLOCKS
1606 };
1607 
1608 bool trace_clock_in_ns(struct trace_array *tr)
1609 {
1610 	if (trace_clocks[tr->clock_id].in_ns)
1611 		return true;
1612 
1613 	return false;
1614 }
1615 
1616 /*
1617  * trace_parser_get_init - gets the buffer for trace parser
1618  */
1619 int trace_parser_get_init(struct trace_parser *parser, int size)
1620 {
1621 	memset(parser, 0, sizeof(*parser));
1622 
1623 	parser->buffer = kmalloc(size, GFP_KERNEL);
1624 	if (!parser->buffer)
1625 		return 1;
1626 
1627 	parser->size = size;
1628 	return 0;
1629 }
1630 
1631 /*
1632  * trace_parser_put - frees the buffer for trace parser
1633  */
1634 void trace_parser_put(struct trace_parser *parser)
1635 {
1636 	kfree(parser->buffer);
1637 	parser->buffer = NULL;
1638 }
1639 
1640 /*
1641  * trace_get_user - reads the user input string separated by  space
1642  * (matched by isspace(ch))
1643  *
1644  * For each string found the 'struct trace_parser' is updated,
1645  * and the function returns.
1646  *
1647  * Returns number of bytes read.
1648  *
1649  * See kernel/trace/trace.h for 'struct trace_parser' details.
1650  */
1651 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1652 	size_t cnt, loff_t *ppos)
1653 {
1654 	char ch;
1655 	size_t read = 0;
1656 	ssize_t ret;
1657 
1658 	if (!*ppos)
1659 		trace_parser_clear(parser);
1660 
1661 	ret = get_user(ch, ubuf++);
1662 	if (ret)
1663 		goto out;
1664 
1665 	read++;
1666 	cnt--;
1667 
1668 	/*
1669 	 * The parser is not finished with the last write,
1670 	 * continue reading the user input without skipping spaces.
1671 	 */
1672 	if (!parser->cont) {
1673 		/* skip white space */
1674 		while (cnt && isspace(ch)) {
1675 			ret = get_user(ch, ubuf++);
1676 			if (ret)
1677 				goto out;
1678 			read++;
1679 			cnt--;
1680 		}
1681 
1682 		parser->idx = 0;
1683 
1684 		/* only spaces were written */
1685 		if (isspace(ch) || !ch) {
1686 			*ppos += read;
1687 			ret = read;
1688 			goto out;
1689 		}
1690 	}
1691 
1692 	/* read the non-space input */
1693 	while (cnt && !isspace(ch) && ch) {
1694 		if (parser->idx < parser->size - 1)
1695 			parser->buffer[parser->idx++] = ch;
1696 		else {
1697 			ret = -EINVAL;
1698 			goto out;
1699 		}
1700 		ret = get_user(ch, ubuf++);
1701 		if (ret)
1702 			goto out;
1703 		read++;
1704 		cnt--;
1705 	}
1706 
1707 	/* We either got finished input or we have to wait for another call. */
1708 	if (isspace(ch) || !ch) {
1709 		parser->buffer[parser->idx] = 0;
1710 		parser->cont = false;
1711 	} else if (parser->idx < parser->size - 1) {
1712 		parser->cont = true;
1713 		parser->buffer[parser->idx++] = ch;
1714 		/* Make sure the parsed string always terminates with '\0'. */
1715 		parser->buffer[parser->idx] = 0;
1716 	} else {
1717 		ret = -EINVAL;
1718 		goto out;
1719 	}
1720 
1721 	*ppos += read;
1722 	ret = read;
1723 
1724 out:
1725 	return ret;
1726 }
1727 
1728 /* TODO add a seq_buf_to_buffer() */
1729 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1730 {
1731 	int len;
1732 
1733 	if (trace_seq_used(s) <= s->seq.readpos)
1734 		return -EBUSY;
1735 
1736 	len = trace_seq_used(s) - s->seq.readpos;
1737 	if (cnt > len)
1738 		cnt = len;
1739 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1740 
1741 	s->seq.readpos += cnt;
1742 	return cnt;
1743 }
1744 
1745 unsigned long __read_mostly	tracing_thresh;
1746 
1747 #ifdef CONFIG_TRACER_MAX_TRACE
1748 static const struct file_operations tracing_max_lat_fops;
1749 
1750 #ifdef LATENCY_FS_NOTIFY
1751 
1752 static struct workqueue_struct *fsnotify_wq;
1753 
1754 static void latency_fsnotify_workfn(struct work_struct *work)
1755 {
1756 	struct trace_array *tr = container_of(work, struct trace_array,
1757 					      fsnotify_work);
1758 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1759 }
1760 
1761 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1762 {
1763 	struct trace_array *tr = container_of(iwork, struct trace_array,
1764 					      fsnotify_irqwork);
1765 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1766 }
1767 
1768 static void trace_create_maxlat_file(struct trace_array *tr,
1769 				     struct dentry *d_tracer)
1770 {
1771 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1772 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1773 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1774 					      TRACE_MODE_WRITE,
1775 					      d_tracer, tr,
1776 					      &tracing_max_lat_fops);
1777 }
1778 
1779 __init static int latency_fsnotify_init(void)
1780 {
1781 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1782 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1783 	if (!fsnotify_wq) {
1784 		pr_err("Unable to allocate tr_max_lat_wq\n");
1785 		return -ENOMEM;
1786 	}
1787 	return 0;
1788 }
1789 
1790 late_initcall_sync(latency_fsnotify_init);
1791 
1792 void latency_fsnotify(struct trace_array *tr)
1793 {
1794 	if (!fsnotify_wq)
1795 		return;
1796 	/*
1797 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1798 	 * possible that we are called from __schedule() or do_idle(), which
1799 	 * could cause a deadlock.
1800 	 */
1801 	irq_work_queue(&tr->fsnotify_irqwork);
1802 }
1803 
1804 #else /* !LATENCY_FS_NOTIFY */
1805 
1806 #define trace_create_maxlat_file(tr, d_tracer)				\
1807 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1808 			  d_tracer, tr, &tracing_max_lat_fops)
1809 
1810 #endif
1811 
1812 /*
1813  * Copy the new maximum trace into the separate maximum-trace
1814  * structure. (this way the maximum trace is permanently saved,
1815  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1816  */
1817 static void
1818 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1819 {
1820 	struct array_buffer *trace_buf = &tr->array_buffer;
1821 	struct array_buffer *max_buf = &tr->max_buffer;
1822 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1823 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1824 
1825 	max_buf->cpu = cpu;
1826 	max_buf->time_start = data->preempt_timestamp;
1827 
1828 	max_data->saved_latency = tr->max_latency;
1829 	max_data->critical_start = data->critical_start;
1830 	max_data->critical_end = data->critical_end;
1831 
1832 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1833 	max_data->pid = tsk->pid;
1834 	/*
1835 	 * If tsk == current, then use current_uid(), as that does not use
1836 	 * RCU. The irq tracer can be called out of RCU scope.
1837 	 */
1838 	if (tsk == current)
1839 		max_data->uid = current_uid();
1840 	else
1841 		max_data->uid = task_uid(tsk);
1842 
1843 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1844 	max_data->policy = tsk->policy;
1845 	max_data->rt_priority = tsk->rt_priority;
1846 
1847 	/* record this tasks comm */
1848 	tracing_record_cmdline(tsk);
1849 	latency_fsnotify(tr);
1850 }
1851 
1852 /**
1853  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1854  * @tr: tracer
1855  * @tsk: the task with the latency
1856  * @cpu: The cpu that initiated the trace.
1857  * @cond_data: User data associated with a conditional snapshot
1858  *
1859  * Flip the buffers between the @tr and the max_tr and record information
1860  * about which task was the cause of this latency.
1861  */
1862 void
1863 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1864 	      void *cond_data)
1865 {
1866 	if (tr->stop_count)
1867 		return;
1868 
1869 	WARN_ON_ONCE(!irqs_disabled());
1870 
1871 	if (!tr->allocated_snapshot) {
1872 		/* Only the nop tracer should hit this when disabling */
1873 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1874 		return;
1875 	}
1876 
1877 	arch_spin_lock(&tr->max_lock);
1878 
1879 	/* Inherit the recordable setting from array_buffer */
1880 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1881 		ring_buffer_record_on(tr->max_buffer.buffer);
1882 	else
1883 		ring_buffer_record_off(tr->max_buffer.buffer);
1884 
1885 #ifdef CONFIG_TRACER_SNAPSHOT
1886 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1887 		arch_spin_unlock(&tr->max_lock);
1888 		return;
1889 	}
1890 #endif
1891 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1892 
1893 	__update_max_tr(tr, tsk, cpu);
1894 
1895 	arch_spin_unlock(&tr->max_lock);
1896 
1897 	/* Any waiters on the old snapshot buffer need to wake up */
1898 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1899 }
1900 
1901 /**
1902  * update_max_tr_single - only copy one trace over, and reset the rest
1903  * @tr: tracer
1904  * @tsk: task with the latency
1905  * @cpu: the cpu of the buffer to copy.
1906  *
1907  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1908  */
1909 void
1910 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1911 {
1912 	int ret;
1913 
1914 	if (tr->stop_count)
1915 		return;
1916 
1917 	WARN_ON_ONCE(!irqs_disabled());
1918 	if (!tr->allocated_snapshot) {
1919 		/* Only the nop tracer should hit this when disabling */
1920 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1921 		return;
1922 	}
1923 
1924 	arch_spin_lock(&tr->max_lock);
1925 
1926 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1927 
1928 	if (ret == -EBUSY) {
1929 		/*
1930 		 * We failed to swap the buffer due to a commit taking
1931 		 * place on this CPU. We fail to record, but we reset
1932 		 * the max trace buffer (no one writes directly to it)
1933 		 * and flag that it failed.
1934 		 * Another reason is resize is in progress.
1935 		 */
1936 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1937 			"Failed to swap buffers due to commit or resize in progress\n");
1938 	}
1939 
1940 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1941 
1942 	__update_max_tr(tr, tsk, cpu);
1943 	arch_spin_unlock(&tr->max_lock);
1944 }
1945 
1946 #endif /* CONFIG_TRACER_MAX_TRACE */
1947 
1948 static int wait_on_pipe(struct trace_iterator *iter, int full)
1949 {
1950 	int ret;
1951 
1952 	/* Iterators are static, they should be filled or empty */
1953 	if (trace_buffer_iter(iter, iter->cpu_file))
1954 		return 0;
1955 
1956 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
1957 
1958 #ifdef CONFIG_TRACER_MAX_TRACE
1959 	/*
1960 	 * Make sure this is still the snapshot buffer, as if a snapshot were
1961 	 * to happen, this would now be the main buffer.
1962 	 */
1963 	if (iter->snapshot)
1964 		iter->array_buffer = &iter->tr->max_buffer;
1965 #endif
1966 	return ret;
1967 }
1968 
1969 #ifdef CONFIG_FTRACE_STARTUP_TEST
1970 static bool selftests_can_run;
1971 
1972 struct trace_selftests {
1973 	struct list_head		list;
1974 	struct tracer			*type;
1975 };
1976 
1977 static LIST_HEAD(postponed_selftests);
1978 
1979 static int save_selftest(struct tracer *type)
1980 {
1981 	struct trace_selftests *selftest;
1982 
1983 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1984 	if (!selftest)
1985 		return -ENOMEM;
1986 
1987 	selftest->type = type;
1988 	list_add(&selftest->list, &postponed_selftests);
1989 	return 0;
1990 }
1991 
1992 static int run_tracer_selftest(struct tracer *type)
1993 {
1994 	struct trace_array *tr = &global_trace;
1995 	struct tracer *saved_tracer = tr->current_trace;
1996 	int ret;
1997 
1998 	if (!type->selftest || tracing_selftest_disabled)
1999 		return 0;
2000 
2001 	/*
2002 	 * If a tracer registers early in boot up (before scheduling is
2003 	 * initialized and such), then do not run its selftests yet.
2004 	 * Instead, run it a little later in the boot process.
2005 	 */
2006 	if (!selftests_can_run)
2007 		return save_selftest(type);
2008 
2009 	if (!tracing_is_on()) {
2010 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2011 			type->name);
2012 		return 0;
2013 	}
2014 
2015 	/*
2016 	 * Run a selftest on this tracer.
2017 	 * Here we reset the trace buffer, and set the current
2018 	 * tracer to be this tracer. The tracer can then run some
2019 	 * internal tracing to verify that everything is in order.
2020 	 * If we fail, we do not register this tracer.
2021 	 */
2022 	tracing_reset_online_cpus(&tr->array_buffer);
2023 
2024 	tr->current_trace = type;
2025 
2026 #ifdef CONFIG_TRACER_MAX_TRACE
2027 	if (type->use_max_tr) {
2028 		/* If we expanded the buffers, make sure the max is expanded too */
2029 		if (ring_buffer_expanded)
2030 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2031 					   RING_BUFFER_ALL_CPUS);
2032 		tr->allocated_snapshot = true;
2033 	}
2034 #endif
2035 
2036 	/* the test is responsible for initializing and enabling */
2037 	pr_info("Testing tracer %s: ", type->name);
2038 	ret = type->selftest(type, tr);
2039 	/* the test is responsible for resetting too */
2040 	tr->current_trace = saved_tracer;
2041 	if (ret) {
2042 		printk(KERN_CONT "FAILED!\n");
2043 		/* Add the warning after printing 'FAILED' */
2044 		WARN_ON(1);
2045 		return -1;
2046 	}
2047 	/* Only reset on passing, to avoid touching corrupted buffers */
2048 	tracing_reset_online_cpus(&tr->array_buffer);
2049 
2050 #ifdef CONFIG_TRACER_MAX_TRACE
2051 	if (type->use_max_tr) {
2052 		tr->allocated_snapshot = false;
2053 
2054 		/* Shrink the max buffer again */
2055 		if (ring_buffer_expanded)
2056 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2057 					   RING_BUFFER_ALL_CPUS);
2058 	}
2059 #endif
2060 
2061 	printk(KERN_CONT "PASSED\n");
2062 	return 0;
2063 }
2064 
2065 static int do_run_tracer_selftest(struct tracer *type)
2066 {
2067 	int ret;
2068 
2069 	/*
2070 	 * Tests can take a long time, especially if they are run one after the
2071 	 * other, as does happen during bootup when all the tracers are
2072 	 * registered. This could cause the soft lockup watchdog to trigger.
2073 	 */
2074 	cond_resched();
2075 
2076 	tracing_selftest_running = true;
2077 	ret = run_tracer_selftest(type);
2078 	tracing_selftest_running = false;
2079 
2080 	return ret;
2081 }
2082 
2083 static __init int init_trace_selftests(void)
2084 {
2085 	struct trace_selftests *p, *n;
2086 	struct tracer *t, **last;
2087 	int ret;
2088 
2089 	selftests_can_run = true;
2090 
2091 	mutex_lock(&trace_types_lock);
2092 
2093 	if (list_empty(&postponed_selftests))
2094 		goto out;
2095 
2096 	pr_info("Running postponed tracer tests:\n");
2097 
2098 	tracing_selftest_running = true;
2099 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2100 		/* This loop can take minutes when sanitizers are enabled, so
2101 		 * lets make sure we allow RCU processing.
2102 		 */
2103 		cond_resched();
2104 		ret = run_tracer_selftest(p->type);
2105 		/* If the test fails, then warn and remove from available_tracers */
2106 		if (ret < 0) {
2107 			WARN(1, "tracer: %s failed selftest, disabling\n",
2108 			     p->type->name);
2109 			last = &trace_types;
2110 			for (t = trace_types; t; t = t->next) {
2111 				if (t == p->type) {
2112 					*last = t->next;
2113 					break;
2114 				}
2115 				last = &t->next;
2116 			}
2117 		}
2118 		list_del(&p->list);
2119 		kfree(p);
2120 	}
2121 	tracing_selftest_running = false;
2122 
2123  out:
2124 	mutex_unlock(&trace_types_lock);
2125 
2126 	return 0;
2127 }
2128 core_initcall(init_trace_selftests);
2129 #else
2130 static inline int run_tracer_selftest(struct tracer *type)
2131 {
2132 	return 0;
2133 }
2134 static inline int do_run_tracer_selftest(struct tracer *type)
2135 {
2136 	return 0;
2137 }
2138 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2139 
2140 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2141 
2142 static void __init apply_trace_boot_options(void);
2143 
2144 /**
2145  * register_tracer - register a tracer with the ftrace system.
2146  * @type: the plugin for the tracer
2147  *
2148  * Register a new plugin tracer.
2149  */
2150 int __init register_tracer(struct tracer *type)
2151 {
2152 	struct tracer *t;
2153 	int ret = 0;
2154 
2155 	if (!type->name) {
2156 		pr_info("Tracer must have a name\n");
2157 		return -1;
2158 	}
2159 
2160 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2161 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2162 		return -1;
2163 	}
2164 
2165 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2166 		pr_warn("Can not register tracer %s due to lockdown\n",
2167 			   type->name);
2168 		return -EPERM;
2169 	}
2170 
2171 	mutex_lock(&trace_types_lock);
2172 
2173 	for (t = trace_types; t; t = t->next) {
2174 		if (strcmp(type->name, t->name) == 0) {
2175 			/* already found */
2176 			pr_info("Tracer %s already registered\n",
2177 				type->name);
2178 			ret = -1;
2179 			goto out;
2180 		}
2181 	}
2182 
2183 	if (!type->set_flag)
2184 		type->set_flag = &dummy_set_flag;
2185 	if (!type->flags) {
2186 		/*allocate a dummy tracer_flags*/
2187 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2188 		if (!type->flags) {
2189 			ret = -ENOMEM;
2190 			goto out;
2191 		}
2192 		type->flags->val = 0;
2193 		type->flags->opts = dummy_tracer_opt;
2194 	} else
2195 		if (!type->flags->opts)
2196 			type->flags->opts = dummy_tracer_opt;
2197 
2198 	/* store the tracer for __set_tracer_option */
2199 	type->flags->trace = type;
2200 
2201 	ret = do_run_tracer_selftest(type);
2202 	if (ret < 0)
2203 		goto out;
2204 
2205 	type->next = trace_types;
2206 	trace_types = type;
2207 	add_tracer_options(&global_trace, type);
2208 
2209  out:
2210 	mutex_unlock(&trace_types_lock);
2211 
2212 	if (ret || !default_bootup_tracer)
2213 		goto out_unlock;
2214 
2215 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2216 		goto out_unlock;
2217 
2218 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2219 	/* Do we want this tracer to start on bootup? */
2220 	tracing_set_tracer(&global_trace, type->name);
2221 	default_bootup_tracer = NULL;
2222 
2223 	apply_trace_boot_options();
2224 
2225 	/* disable other selftests, since this will break it. */
2226 	disable_tracing_selftest("running a tracer");
2227 
2228  out_unlock:
2229 	return ret;
2230 }
2231 
2232 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2233 {
2234 	struct trace_buffer *buffer = buf->buffer;
2235 
2236 	if (!buffer)
2237 		return;
2238 
2239 	ring_buffer_record_disable(buffer);
2240 
2241 	/* Make sure all commits have finished */
2242 	synchronize_rcu();
2243 	ring_buffer_reset_cpu(buffer, cpu);
2244 
2245 	ring_buffer_record_enable(buffer);
2246 }
2247 
2248 void tracing_reset_online_cpus(struct array_buffer *buf)
2249 {
2250 	struct trace_buffer *buffer = buf->buffer;
2251 
2252 	if (!buffer)
2253 		return;
2254 
2255 	ring_buffer_record_disable(buffer);
2256 
2257 	/* Make sure all commits have finished */
2258 	synchronize_rcu();
2259 
2260 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2261 
2262 	ring_buffer_reset_online_cpus(buffer);
2263 
2264 	ring_buffer_record_enable(buffer);
2265 }
2266 
2267 /* Must have trace_types_lock held */
2268 void tracing_reset_all_online_cpus_unlocked(void)
2269 {
2270 	struct trace_array *tr;
2271 
2272 	lockdep_assert_held(&trace_types_lock);
2273 
2274 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2275 		if (!tr->clear_trace)
2276 			continue;
2277 		tr->clear_trace = false;
2278 		tracing_reset_online_cpus(&tr->array_buffer);
2279 #ifdef CONFIG_TRACER_MAX_TRACE
2280 		tracing_reset_online_cpus(&tr->max_buffer);
2281 #endif
2282 	}
2283 }
2284 
2285 void tracing_reset_all_online_cpus(void)
2286 {
2287 	mutex_lock(&trace_types_lock);
2288 	tracing_reset_all_online_cpus_unlocked();
2289 	mutex_unlock(&trace_types_lock);
2290 }
2291 
2292 /*
2293  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2294  * is the tgid last observed corresponding to pid=i.
2295  */
2296 static int *tgid_map;
2297 
2298 /* The maximum valid index into tgid_map. */
2299 static size_t tgid_map_max;
2300 
2301 #define SAVED_CMDLINES_DEFAULT 128
2302 #define NO_CMDLINE_MAP UINT_MAX
2303 /*
2304  * Preemption must be disabled before acquiring trace_cmdline_lock.
2305  * The various trace_arrays' max_lock must be acquired in a context
2306  * where interrupt is disabled.
2307  */
2308 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2309 struct saved_cmdlines_buffer {
2310 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2311 	unsigned *map_cmdline_to_pid;
2312 	unsigned cmdline_num;
2313 	int cmdline_idx;
2314 	char saved_cmdlines[];
2315 };
2316 static struct saved_cmdlines_buffer *savedcmd;
2317 
2318 static inline char *get_saved_cmdlines(int idx)
2319 {
2320 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2321 }
2322 
2323 static inline void set_cmdline(int idx, const char *cmdline)
2324 {
2325 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2326 }
2327 
2328 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
2329 {
2330 	int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
2331 
2332 	kfree(s->map_cmdline_to_pid);
2333 	free_pages((unsigned long)s, order);
2334 }
2335 
2336 static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
2337 {
2338 	struct saved_cmdlines_buffer *s;
2339 	struct page *page;
2340 	int orig_size, size;
2341 	int order;
2342 
2343 	/* Figure out how much is needed to hold the given number of cmdlines */
2344 	orig_size = sizeof(*s) + val * TASK_COMM_LEN;
2345 	order = get_order(orig_size);
2346 	size = 1 << (order + PAGE_SHIFT);
2347 	page = alloc_pages(GFP_KERNEL, order);
2348 	if (!page)
2349 		return NULL;
2350 
2351 	s = page_address(page);
2352 	memset(s, 0, sizeof(*s));
2353 
2354 	/* Round up to actual allocation */
2355 	val = (size - sizeof(*s)) / TASK_COMM_LEN;
2356 	s->cmdline_num = val;
2357 
2358 	s->map_cmdline_to_pid = kmalloc_array(val,
2359 					      sizeof(*s->map_cmdline_to_pid),
2360 					      GFP_KERNEL);
2361 	if (!s->map_cmdline_to_pid) {
2362 		free_saved_cmdlines_buffer(s);
2363 		return NULL;
2364 	}
2365 
2366 	s->cmdline_idx = 0;
2367 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2368 	       sizeof(s->map_pid_to_cmdline));
2369 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2370 	       val * sizeof(*s->map_cmdline_to_pid));
2371 
2372 	return s;
2373 }
2374 
2375 static int trace_create_savedcmd(void)
2376 {
2377 	savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
2378 
2379 	return savedcmd ? 0 : -ENOMEM;
2380 }
2381 
2382 int is_tracing_stopped(void)
2383 {
2384 	return global_trace.stop_count;
2385 }
2386 
2387 static void tracing_start_tr(struct trace_array *tr)
2388 {
2389 	struct trace_buffer *buffer;
2390 	unsigned long flags;
2391 
2392 	if (tracing_disabled)
2393 		return;
2394 
2395 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2396 	if (--tr->stop_count) {
2397 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2398 			/* Someone screwed up their debugging */
2399 			tr->stop_count = 0;
2400 		}
2401 		goto out;
2402 	}
2403 
2404 	/* Prevent the buffers from switching */
2405 	arch_spin_lock(&tr->max_lock);
2406 
2407 	buffer = tr->array_buffer.buffer;
2408 	if (buffer)
2409 		ring_buffer_record_enable(buffer);
2410 
2411 #ifdef CONFIG_TRACER_MAX_TRACE
2412 	buffer = tr->max_buffer.buffer;
2413 	if (buffer)
2414 		ring_buffer_record_enable(buffer);
2415 #endif
2416 
2417 	arch_spin_unlock(&tr->max_lock);
2418 
2419  out:
2420 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2421 }
2422 
2423 /**
2424  * tracing_start - quick start of the tracer
2425  *
2426  * If tracing is enabled but was stopped by tracing_stop,
2427  * this will start the tracer back up.
2428  */
2429 void tracing_start(void)
2430 
2431 {
2432 	return tracing_start_tr(&global_trace);
2433 }
2434 
2435 static void tracing_stop_tr(struct trace_array *tr)
2436 {
2437 	struct trace_buffer *buffer;
2438 	unsigned long flags;
2439 
2440 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2441 	if (tr->stop_count++)
2442 		goto out;
2443 
2444 	/* Prevent the buffers from switching */
2445 	arch_spin_lock(&tr->max_lock);
2446 
2447 	buffer = tr->array_buffer.buffer;
2448 	if (buffer)
2449 		ring_buffer_record_disable(buffer);
2450 
2451 #ifdef CONFIG_TRACER_MAX_TRACE
2452 	buffer = tr->max_buffer.buffer;
2453 	if (buffer)
2454 		ring_buffer_record_disable(buffer);
2455 #endif
2456 
2457 	arch_spin_unlock(&tr->max_lock);
2458 
2459  out:
2460 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2461 }
2462 
2463 /**
2464  * tracing_stop - quick stop of the tracer
2465  *
2466  * Light weight way to stop tracing. Use in conjunction with
2467  * tracing_start.
2468  */
2469 void tracing_stop(void)
2470 {
2471 	return tracing_stop_tr(&global_trace);
2472 }
2473 
2474 static int trace_save_cmdline(struct task_struct *tsk)
2475 {
2476 	unsigned tpid, idx;
2477 
2478 	/* treat recording of idle task as a success */
2479 	if (!tsk->pid)
2480 		return 1;
2481 
2482 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2483 
2484 	/*
2485 	 * It's not the end of the world if we don't get
2486 	 * the lock, but we also don't want to spin
2487 	 * nor do we want to disable interrupts,
2488 	 * so if we miss here, then better luck next time.
2489 	 *
2490 	 * This is called within the scheduler and wake up, so interrupts
2491 	 * had better been disabled and run queue lock been held.
2492 	 */
2493 	lockdep_assert_preemption_disabled();
2494 	if (!arch_spin_trylock(&trace_cmdline_lock))
2495 		return 0;
2496 
2497 	idx = savedcmd->map_pid_to_cmdline[tpid];
2498 	if (idx == NO_CMDLINE_MAP) {
2499 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2500 
2501 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2502 		savedcmd->cmdline_idx = idx;
2503 	}
2504 
2505 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2506 	set_cmdline(idx, tsk->comm);
2507 
2508 	arch_spin_unlock(&trace_cmdline_lock);
2509 
2510 	return 1;
2511 }
2512 
2513 static void __trace_find_cmdline(int pid, char comm[])
2514 {
2515 	unsigned map;
2516 	int tpid;
2517 
2518 	if (!pid) {
2519 		strcpy(comm, "<idle>");
2520 		return;
2521 	}
2522 
2523 	if (WARN_ON_ONCE(pid < 0)) {
2524 		strcpy(comm, "<XXX>");
2525 		return;
2526 	}
2527 
2528 	tpid = pid & (PID_MAX_DEFAULT - 1);
2529 	map = savedcmd->map_pid_to_cmdline[tpid];
2530 	if (map != NO_CMDLINE_MAP) {
2531 		tpid = savedcmd->map_cmdline_to_pid[map];
2532 		if (tpid == pid) {
2533 			strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2534 			return;
2535 		}
2536 	}
2537 	strcpy(comm, "<...>");
2538 }
2539 
2540 void trace_find_cmdline(int pid, char comm[])
2541 {
2542 	preempt_disable();
2543 	arch_spin_lock(&trace_cmdline_lock);
2544 
2545 	__trace_find_cmdline(pid, comm);
2546 
2547 	arch_spin_unlock(&trace_cmdline_lock);
2548 	preempt_enable();
2549 }
2550 
2551 static int *trace_find_tgid_ptr(int pid)
2552 {
2553 	/*
2554 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2555 	 * if we observe a non-NULL tgid_map then we also observe the correct
2556 	 * tgid_map_max.
2557 	 */
2558 	int *map = smp_load_acquire(&tgid_map);
2559 
2560 	if (unlikely(!map || pid > tgid_map_max))
2561 		return NULL;
2562 
2563 	return &map[pid];
2564 }
2565 
2566 int trace_find_tgid(int pid)
2567 {
2568 	int *ptr = trace_find_tgid_ptr(pid);
2569 
2570 	return ptr ? *ptr : 0;
2571 }
2572 
2573 static int trace_save_tgid(struct task_struct *tsk)
2574 {
2575 	int *ptr;
2576 
2577 	/* treat recording of idle task as a success */
2578 	if (!tsk->pid)
2579 		return 1;
2580 
2581 	ptr = trace_find_tgid_ptr(tsk->pid);
2582 	if (!ptr)
2583 		return 0;
2584 
2585 	*ptr = tsk->tgid;
2586 	return 1;
2587 }
2588 
2589 static bool tracing_record_taskinfo_skip(int flags)
2590 {
2591 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2592 		return true;
2593 	if (!__this_cpu_read(trace_taskinfo_save))
2594 		return true;
2595 	return false;
2596 }
2597 
2598 /**
2599  * tracing_record_taskinfo - record the task info of a task
2600  *
2601  * @task:  task to record
2602  * @flags: TRACE_RECORD_CMDLINE for recording comm
2603  *         TRACE_RECORD_TGID for recording tgid
2604  */
2605 void tracing_record_taskinfo(struct task_struct *task, int flags)
2606 {
2607 	bool done;
2608 
2609 	if (tracing_record_taskinfo_skip(flags))
2610 		return;
2611 
2612 	/*
2613 	 * Record as much task information as possible. If some fail, continue
2614 	 * to try to record the others.
2615 	 */
2616 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2617 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2618 
2619 	/* If recording any information failed, retry again soon. */
2620 	if (!done)
2621 		return;
2622 
2623 	__this_cpu_write(trace_taskinfo_save, false);
2624 }
2625 
2626 /**
2627  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2628  *
2629  * @prev: previous task during sched_switch
2630  * @next: next task during sched_switch
2631  * @flags: TRACE_RECORD_CMDLINE for recording comm
2632  *         TRACE_RECORD_TGID for recording tgid
2633  */
2634 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2635 					  struct task_struct *next, int flags)
2636 {
2637 	bool done;
2638 
2639 	if (tracing_record_taskinfo_skip(flags))
2640 		return;
2641 
2642 	/*
2643 	 * Record as much task information as possible. If some fail, continue
2644 	 * to try to record the others.
2645 	 */
2646 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2647 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2648 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2649 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2650 
2651 	/* If recording any information failed, retry again soon. */
2652 	if (!done)
2653 		return;
2654 
2655 	__this_cpu_write(trace_taskinfo_save, false);
2656 }
2657 
2658 /* Helpers to record a specific task information */
2659 void tracing_record_cmdline(struct task_struct *task)
2660 {
2661 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2662 }
2663 
2664 void tracing_record_tgid(struct task_struct *task)
2665 {
2666 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2667 }
2668 
2669 /*
2670  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2671  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2672  * simplifies those functions and keeps them in sync.
2673  */
2674 enum print_line_t trace_handle_return(struct trace_seq *s)
2675 {
2676 	return trace_seq_has_overflowed(s) ?
2677 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2678 }
2679 EXPORT_SYMBOL_GPL(trace_handle_return);
2680 
2681 static unsigned short migration_disable_value(void)
2682 {
2683 #if defined(CONFIG_SMP)
2684 	return current->migration_disabled;
2685 #else
2686 	return 0;
2687 #endif
2688 }
2689 
2690 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2691 {
2692 	unsigned int trace_flags = irqs_status;
2693 	unsigned int pc;
2694 
2695 	pc = preempt_count();
2696 
2697 	if (pc & NMI_MASK)
2698 		trace_flags |= TRACE_FLAG_NMI;
2699 	if (pc & HARDIRQ_MASK)
2700 		trace_flags |= TRACE_FLAG_HARDIRQ;
2701 	if (in_serving_softirq())
2702 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2703 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2704 		trace_flags |= TRACE_FLAG_BH_OFF;
2705 
2706 	if (tif_need_resched())
2707 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2708 	if (test_preempt_need_resched())
2709 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2710 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2711 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2712 }
2713 
2714 struct ring_buffer_event *
2715 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2716 			  int type,
2717 			  unsigned long len,
2718 			  unsigned int trace_ctx)
2719 {
2720 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2721 }
2722 
2723 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2724 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2725 static int trace_buffered_event_ref;
2726 
2727 /**
2728  * trace_buffered_event_enable - enable buffering events
2729  *
2730  * When events are being filtered, it is quicker to use a temporary
2731  * buffer to write the event data into if there's a likely chance
2732  * that it will not be committed. The discard of the ring buffer
2733  * is not as fast as committing, and is much slower than copying
2734  * a commit.
2735  *
2736  * When an event is to be filtered, allocate per cpu buffers to
2737  * write the event data into, and if the event is filtered and discarded
2738  * it is simply dropped, otherwise, the entire data is to be committed
2739  * in one shot.
2740  */
2741 void trace_buffered_event_enable(void)
2742 {
2743 	struct ring_buffer_event *event;
2744 	struct page *page;
2745 	int cpu;
2746 
2747 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2748 
2749 	if (trace_buffered_event_ref++)
2750 		return;
2751 
2752 	for_each_tracing_cpu(cpu) {
2753 		page = alloc_pages_node(cpu_to_node(cpu),
2754 					GFP_KERNEL | __GFP_NORETRY, 0);
2755 		/* This is just an optimization and can handle failures */
2756 		if (!page) {
2757 			pr_err("Failed to allocate event buffer\n");
2758 			break;
2759 		}
2760 
2761 		event = page_address(page);
2762 		memset(event, 0, sizeof(*event));
2763 
2764 		per_cpu(trace_buffered_event, cpu) = event;
2765 
2766 		preempt_disable();
2767 		if (cpu == smp_processor_id() &&
2768 		    __this_cpu_read(trace_buffered_event) !=
2769 		    per_cpu(trace_buffered_event, cpu))
2770 			WARN_ON_ONCE(1);
2771 		preempt_enable();
2772 	}
2773 }
2774 
2775 static void enable_trace_buffered_event(void *data)
2776 {
2777 	/* Probably not needed, but do it anyway */
2778 	smp_rmb();
2779 	this_cpu_dec(trace_buffered_event_cnt);
2780 }
2781 
2782 static void disable_trace_buffered_event(void *data)
2783 {
2784 	this_cpu_inc(trace_buffered_event_cnt);
2785 }
2786 
2787 /**
2788  * trace_buffered_event_disable - disable buffering events
2789  *
2790  * When a filter is removed, it is faster to not use the buffered
2791  * events, and to commit directly into the ring buffer. Free up
2792  * the temp buffers when there are no more users. This requires
2793  * special synchronization with current events.
2794  */
2795 void trace_buffered_event_disable(void)
2796 {
2797 	int cpu;
2798 
2799 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2800 
2801 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2802 		return;
2803 
2804 	if (--trace_buffered_event_ref)
2805 		return;
2806 
2807 	/* For each CPU, set the buffer as used. */
2808 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2809 			 NULL, true);
2810 
2811 	/* Wait for all current users to finish */
2812 	synchronize_rcu();
2813 
2814 	for_each_tracing_cpu(cpu) {
2815 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2816 		per_cpu(trace_buffered_event, cpu) = NULL;
2817 	}
2818 
2819 	/*
2820 	 * Wait for all CPUs that potentially started checking if they can use
2821 	 * their event buffer only after the previous synchronize_rcu() call and
2822 	 * they still read a valid pointer from trace_buffered_event. It must be
2823 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2824 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2825 	 */
2826 	synchronize_rcu();
2827 
2828 	/* For each CPU, relinquish the buffer */
2829 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2830 			 true);
2831 }
2832 
2833 static struct trace_buffer *temp_buffer;
2834 
2835 struct ring_buffer_event *
2836 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2837 			  struct trace_event_file *trace_file,
2838 			  int type, unsigned long len,
2839 			  unsigned int trace_ctx)
2840 {
2841 	struct ring_buffer_event *entry;
2842 	struct trace_array *tr = trace_file->tr;
2843 	int val;
2844 
2845 	*current_rb = tr->array_buffer.buffer;
2846 
2847 	if (!tr->no_filter_buffering_ref &&
2848 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2849 		preempt_disable_notrace();
2850 		/*
2851 		 * Filtering is on, so try to use the per cpu buffer first.
2852 		 * This buffer will simulate a ring_buffer_event,
2853 		 * where the type_len is zero and the array[0] will
2854 		 * hold the full length.
2855 		 * (see include/linux/ring-buffer.h for details on
2856 		 *  how the ring_buffer_event is structured).
2857 		 *
2858 		 * Using a temp buffer during filtering and copying it
2859 		 * on a matched filter is quicker than writing directly
2860 		 * into the ring buffer and then discarding it when
2861 		 * it doesn't match. That is because the discard
2862 		 * requires several atomic operations to get right.
2863 		 * Copying on match and doing nothing on a failed match
2864 		 * is still quicker than no copy on match, but having
2865 		 * to discard out of the ring buffer on a failed match.
2866 		 */
2867 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2868 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2869 
2870 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2871 
2872 			/*
2873 			 * Preemption is disabled, but interrupts and NMIs
2874 			 * can still come in now. If that happens after
2875 			 * the above increment, then it will have to go
2876 			 * back to the old method of allocating the event
2877 			 * on the ring buffer, and if the filter fails, it
2878 			 * will have to call ring_buffer_discard_commit()
2879 			 * to remove it.
2880 			 *
2881 			 * Need to also check the unlikely case that the
2882 			 * length is bigger than the temp buffer size.
2883 			 * If that happens, then the reserve is pretty much
2884 			 * guaranteed to fail, as the ring buffer currently
2885 			 * only allows events less than a page. But that may
2886 			 * change in the future, so let the ring buffer reserve
2887 			 * handle the failure in that case.
2888 			 */
2889 			if (val == 1 && likely(len <= max_len)) {
2890 				trace_event_setup(entry, type, trace_ctx);
2891 				entry->array[0] = len;
2892 				/* Return with preemption disabled */
2893 				return entry;
2894 			}
2895 			this_cpu_dec(trace_buffered_event_cnt);
2896 		}
2897 		/* __trace_buffer_lock_reserve() disables preemption */
2898 		preempt_enable_notrace();
2899 	}
2900 
2901 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2902 					    trace_ctx);
2903 	/*
2904 	 * If tracing is off, but we have triggers enabled
2905 	 * we still need to look at the event data. Use the temp_buffer
2906 	 * to store the trace event for the trigger to use. It's recursive
2907 	 * safe and will not be recorded anywhere.
2908 	 */
2909 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2910 		*current_rb = temp_buffer;
2911 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2912 						    trace_ctx);
2913 	}
2914 	return entry;
2915 }
2916 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2917 
2918 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2919 static DEFINE_MUTEX(tracepoint_printk_mutex);
2920 
2921 static void output_printk(struct trace_event_buffer *fbuffer)
2922 {
2923 	struct trace_event_call *event_call;
2924 	struct trace_event_file *file;
2925 	struct trace_event *event;
2926 	unsigned long flags;
2927 	struct trace_iterator *iter = tracepoint_print_iter;
2928 
2929 	/* We should never get here if iter is NULL */
2930 	if (WARN_ON_ONCE(!iter))
2931 		return;
2932 
2933 	event_call = fbuffer->trace_file->event_call;
2934 	if (!event_call || !event_call->event.funcs ||
2935 	    !event_call->event.funcs->trace)
2936 		return;
2937 
2938 	file = fbuffer->trace_file;
2939 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2940 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2941 	     !filter_match_preds(file->filter, fbuffer->entry)))
2942 		return;
2943 
2944 	event = &fbuffer->trace_file->event_call->event;
2945 
2946 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2947 	trace_seq_init(&iter->seq);
2948 	iter->ent = fbuffer->entry;
2949 	event_call->event.funcs->trace(iter, 0, event);
2950 	trace_seq_putc(&iter->seq, 0);
2951 	printk("%s", iter->seq.buffer);
2952 
2953 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2954 }
2955 
2956 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2957 			     void *buffer, size_t *lenp,
2958 			     loff_t *ppos)
2959 {
2960 	int save_tracepoint_printk;
2961 	int ret;
2962 
2963 	mutex_lock(&tracepoint_printk_mutex);
2964 	save_tracepoint_printk = tracepoint_printk;
2965 
2966 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2967 
2968 	/*
2969 	 * This will force exiting early, as tracepoint_printk
2970 	 * is always zero when tracepoint_printk_iter is not allocated
2971 	 */
2972 	if (!tracepoint_print_iter)
2973 		tracepoint_printk = 0;
2974 
2975 	if (save_tracepoint_printk == tracepoint_printk)
2976 		goto out;
2977 
2978 	if (tracepoint_printk)
2979 		static_key_enable(&tracepoint_printk_key.key);
2980 	else
2981 		static_key_disable(&tracepoint_printk_key.key);
2982 
2983  out:
2984 	mutex_unlock(&tracepoint_printk_mutex);
2985 
2986 	return ret;
2987 }
2988 
2989 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2990 {
2991 	enum event_trigger_type tt = ETT_NONE;
2992 	struct trace_event_file *file = fbuffer->trace_file;
2993 
2994 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2995 			fbuffer->entry, &tt))
2996 		goto discard;
2997 
2998 	if (static_key_false(&tracepoint_printk_key.key))
2999 		output_printk(fbuffer);
3000 
3001 	if (static_branch_unlikely(&trace_event_exports_enabled))
3002 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
3003 
3004 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
3005 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
3006 
3007 discard:
3008 	if (tt)
3009 		event_triggers_post_call(file, tt);
3010 
3011 }
3012 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3013 
3014 /*
3015  * Skip 3:
3016  *
3017  *   trace_buffer_unlock_commit_regs()
3018  *   trace_event_buffer_commit()
3019  *   trace_event_raw_event_xxx()
3020  */
3021 # define STACK_SKIP 3
3022 
3023 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3024 				     struct trace_buffer *buffer,
3025 				     struct ring_buffer_event *event,
3026 				     unsigned int trace_ctx,
3027 				     struct pt_regs *regs)
3028 {
3029 	__buffer_unlock_commit(buffer, event);
3030 
3031 	/*
3032 	 * If regs is not set, then skip the necessary functions.
3033 	 * Note, we can still get here via blktrace, wakeup tracer
3034 	 * and mmiotrace, but that's ok if they lose a function or
3035 	 * two. They are not that meaningful.
3036 	 */
3037 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3038 	ftrace_trace_userstack(tr, buffer, trace_ctx);
3039 }
3040 
3041 /*
3042  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3043  */
3044 void
3045 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3046 				   struct ring_buffer_event *event)
3047 {
3048 	__buffer_unlock_commit(buffer, event);
3049 }
3050 
3051 void
3052 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3053 	       parent_ip, unsigned int trace_ctx)
3054 {
3055 	struct trace_event_call *call = &event_function;
3056 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3057 	struct ring_buffer_event *event;
3058 	struct ftrace_entry *entry;
3059 
3060 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3061 					    trace_ctx);
3062 	if (!event)
3063 		return;
3064 	entry	= ring_buffer_event_data(event);
3065 	entry->ip			= ip;
3066 	entry->parent_ip		= parent_ip;
3067 
3068 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3069 		if (static_branch_unlikely(&trace_function_exports_enabled))
3070 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3071 		__buffer_unlock_commit(buffer, event);
3072 	}
3073 }
3074 
3075 #ifdef CONFIG_STACKTRACE
3076 
3077 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3078 #define FTRACE_KSTACK_NESTING	4
3079 
3080 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
3081 
3082 struct ftrace_stack {
3083 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
3084 };
3085 
3086 
3087 struct ftrace_stacks {
3088 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
3089 };
3090 
3091 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3092 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3093 
3094 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3095 				 unsigned int trace_ctx,
3096 				 int skip, struct pt_regs *regs)
3097 {
3098 	struct trace_event_call *call = &event_kernel_stack;
3099 	struct ring_buffer_event *event;
3100 	unsigned int size, nr_entries;
3101 	struct ftrace_stack *fstack;
3102 	struct stack_entry *entry;
3103 	int stackidx;
3104 
3105 	/*
3106 	 * Add one, for this function and the call to save_stack_trace()
3107 	 * If regs is set, then these functions will not be in the way.
3108 	 */
3109 #ifndef CONFIG_UNWINDER_ORC
3110 	if (!regs)
3111 		skip++;
3112 #endif
3113 
3114 	preempt_disable_notrace();
3115 
3116 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3117 
3118 	/* This should never happen. If it does, yell once and skip */
3119 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3120 		goto out;
3121 
3122 	/*
3123 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3124 	 * interrupt will either see the value pre increment or post
3125 	 * increment. If the interrupt happens pre increment it will have
3126 	 * restored the counter when it returns.  We just need a barrier to
3127 	 * keep gcc from moving things around.
3128 	 */
3129 	barrier();
3130 
3131 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3132 	size = ARRAY_SIZE(fstack->calls);
3133 
3134 	if (regs) {
3135 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3136 						   size, skip);
3137 	} else {
3138 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3139 	}
3140 
3141 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3142 				    struct_size(entry, caller, nr_entries),
3143 				    trace_ctx);
3144 	if (!event)
3145 		goto out;
3146 	entry = ring_buffer_event_data(event);
3147 
3148 	entry->size = nr_entries;
3149 	memcpy(&entry->caller, fstack->calls,
3150 	       flex_array_size(entry, caller, nr_entries));
3151 
3152 	if (!call_filter_check_discard(call, entry, buffer, event))
3153 		__buffer_unlock_commit(buffer, event);
3154 
3155  out:
3156 	/* Again, don't let gcc optimize things here */
3157 	barrier();
3158 	__this_cpu_dec(ftrace_stack_reserve);
3159 	preempt_enable_notrace();
3160 
3161 }
3162 
3163 static inline void ftrace_trace_stack(struct trace_array *tr,
3164 				      struct trace_buffer *buffer,
3165 				      unsigned int trace_ctx,
3166 				      int skip, struct pt_regs *regs)
3167 {
3168 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3169 		return;
3170 
3171 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3172 }
3173 
3174 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3175 		   int skip)
3176 {
3177 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3178 
3179 	if (rcu_is_watching()) {
3180 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3181 		return;
3182 	}
3183 
3184 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3185 		return;
3186 
3187 	/*
3188 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3189 	 * but if the above rcu_is_watching() failed, then the NMI
3190 	 * triggered someplace critical, and ct_irq_enter() should
3191 	 * not be called from NMI.
3192 	 */
3193 	if (unlikely(in_nmi()))
3194 		return;
3195 
3196 	ct_irq_enter_irqson();
3197 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3198 	ct_irq_exit_irqson();
3199 }
3200 
3201 /**
3202  * trace_dump_stack - record a stack back trace in the trace buffer
3203  * @skip: Number of functions to skip (helper handlers)
3204  */
3205 void trace_dump_stack(int skip)
3206 {
3207 	if (tracing_disabled || tracing_selftest_running)
3208 		return;
3209 
3210 #ifndef CONFIG_UNWINDER_ORC
3211 	/* Skip 1 to skip this function. */
3212 	skip++;
3213 #endif
3214 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3215 			     tracing_gen_ctx(), skip, NULL);
3216 }
3217 EXPORT_SYMBOL_GPL(trace_dump_stack);
3218 
3219 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3220 static DEFINE_PER_CPU(int, user_stack_count);
3221 
3222 static void
3223 ftrace_trace_userstack(struct trace_array *tr,
3224 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3225 {
3226 	struct trace_event_call *call = &event_user_stack;
3227 	struct ring_buffer_event *event;
3228 	struct userstack_entry *entry;
3229 
3230 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3231 		return;
3232 
3233 	/*
3234 	 * NMIs can not handle page faults, even with fix ups.
3235 	 * The save user stack can (and often does) fault.
3236 	 */
3237 	if (unlikely(in_nmi()))
3238 		return;
3239 
3240 	/*
3241 	 * prevent recursion, since the user stack tracing may
3242 	 * trigger other kernel events.
3243 	 */
3244 	preempt_disable();
3245 	if (__this_cpu_read(user_stack_count))
3246 		goto out;
3247 
3248 	__this_cpu_inc(user_stack_count);
3249 
3250 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3251 					    sizeof(*entry), trace_ctx);
3252 	if (!event)
3253 		goto out_drop_count;
3254 	entry	= ring_buffer_event_data(event);
3255 
3256 	entry->tgid		= current->tgid;
3257 	memset(&entry->caller, 0, sizeof(entry->caller));
3258 
3259 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3260 	if (!call_filter_check_discard(call, entry, buffer, event))
3261 		__buffer_unlock_commit(buffer, event);
3262 
3263  out_drop_count:
3264 	__this_cpu_dec(user_stack_count);
3265  out:
3266 	preempt_enable();
3267 }
3268 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3269 static void ftrace_trace_userstack(struct trace_array *tr,
3270 				   struct trace_buffer *buffer,
3271 				   unsigned int trace_ctx)
3272 {
3273 }
3274 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3275 
3276 #endif /* CONFIG_STACKTRACE */
3277 
3278 static inline void
3279 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3280 			  unsigned long long delta)
3281 {
3282 	entry->bottom_delta_ts = delta & U32_MAX;
3283 	entry->top_delta_ts = (delta >> 32);
3284 }
3285 
3286 void trace_last_func_repeats(struct trace_array *tr,
3287 			     struct trace_func_repeats *last_info,
3288 			     unsigned int trace_ctx)
3289 {
3290 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3291 	struct func_repeats_entry *entry;
3292 	struct ring_buffer_event *event;
3293 	u64 delta;
3294 
3295 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3296 					    sizeof(*entry), trace_ctx);
3297 	if (!event)
3298 		return;
3299 
3300 	delta = ring_buffer_event_time_stamp(buffer, event) -
3301 		last_info->ts_last_call;
3302 
3303 	entry = ring_buffer_event_data(event);
3304 	entry->ip = last_info->ip;
3305 	entry->parent_ip = last_info->parent_ip;
3306 	entry->count = last_info->count;
3307 	func_repeats_set_delta_ts(entry, delta);
3308 
3309 	__buffer_unlock_commit(buffer, event);
3310 }
3311 
3312 /* created for use with alloc_percpu */
3313 struct trace_buffer_struct {
3314 	int nesting;
3315 	char buffer[4][TRACE_BUF_SIZE];
3316 };
3317 
3318 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3319 
3320 /*
3321  * This allows for lockless recording.  If we're nested too deeply, then
3322  * this returns NULL.
3323  */
3324 static char *get_trace_buf(void)
3325 {
3326 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3327 
3328 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3329 		return NULL;
3330 
3331 	buffer->nesting++;
3332 
3333 	/* Interrupts must see nesting incremented before we use the buffer */
3334 	barrier();
3335 	return &buffer->buffer[buffer->nesting - 1][0];
3336 }
3337 
3338 static void put_trace_buf(void)
3339 {
3340 	/* Don't let the decrement of nesting leak before this */
3341 	barrier();
3342 	this_cpu_dec(trace_percpu_buffer->nesting);
3343 }
3344 
3345 static int alloc_percpu_trace_buffer(void)
3346 {
3347 	struct trace_buffer_struct __percpu *buffers;
3348 
3349 	if (trace_percpu_buffer)
3350 		return 0;
3351 
3352 	buffers = alloc_percpu(struct trace_buffer_struct);
3353 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3354 		return -ENOMEM;
3355 
3356 	trace_percpu_buffer = buffers;
3357 	return 0;
3358 }
3359 
3360 static int buffers_allocated;
3361 
3362 void trace_printk_init_buffers(void)
3363 {
3364 	if (buffers_allocated)
3365 		return;
3366 
3367 	if (alloc_percpu_trace_buffer())
3368 		return;
3369 
3370 	/* trace_printk() is for debug use only. Don't use it in production. */
3371 
3372 	pr_warn("\n");
3373 	pr_warn("**********************************************************\n");
3374 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3375 	pr_warn("**                                                      **\n");
3376 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3377 	pr_warn("**                                                      **\n");
3378 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3379 	pr_warn("** unsafe for production use.                           **\n");
3380 	pr_warn("**                                                      **\n");
3381 	pr_warn("** If you see this message and you are not debugging    **\n");
3382 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3383 	pr_warn("**                                                      **\n");
3384 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3385 	pr_warn("**********************************************************\n");
3386 
3387 	/* Expand the buffers to set size */
3388 	tracing_update_buffers();
3389 
3390 	buffers_allocated = 1;
3391 
3392 	/*
3393 	 * trace_printk_init_buffers() can be called by modules.
3394 	 * If that happens, then we need to start cmdline recording
3395 	 * directly here. If the global_trace.buffer is already
3396 	 * allocated here, then this was called by module code.
3397 	 */
3398 	if (global_trace.array_buffer.buffer)
3399 		tracing_start_cmdline_record();
3400 }
3401 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3402 
3403 void trace_printk_start_comm(void)
3404 {
3405 	/* Start tracing comms if trace printk is set */
3406 	if (!buffers_allocated)
3407 		return;
3408 	tracing_start_cmdline_record();
3409 }
3410 
3411 static void trace_printk_start_stop_comm(int enabled)
3412 {
3413 	if (!buffers_allocated)
3414 		return;
3415 
3416 	if (enabled)
3417 		tracing_start_cmdline_record();
3418 	else
3419 		tracing_stop_cmdline_record();
3420 }
3421 
3422 /**
3423  * trace_vbprintk - write binary msg to tracing buffer
3424  * @ip:    The address of the caller
3425  * @fmt:   The string format to write to the buffer
3426  * @args:  Arguments for @fmt
3427  */
3428 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3429 {
3430 	struct trace_event_call *call = &event_bprint;
3431 	struct ring_buffer_event *event;
3432 	struct trace_buffer *buffer;
3433 	struct trace_array *tr = &global_trace;
3434 	struct bprint_entry *entry;
3435 	unsigned int trace_ctx;
3436 	char *tbuffer;
3437 	int len = 0, size;
3438 
3439 	if (unlikely(tracing_selftest_running || tracing_disabled))
3440 		return 0;
3441 
3442 	/* Don't pollute graph traces with trace_vprintk internals */
3443 	pause_graph_tracing();
3444 
3445 	trace_ctx = tracing_gen_ctx();
3446 	preempt_disable_notrace();
3447 
3448 	tbuffer = get_trace_buf();
3449 	if (!tbuffer) {
3450 		len = 0;
3451 		goto out_nobuffer;
3452 	}
3453 
3454 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3455 
3456 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3457 		goto out_put;
3458 
3459 	size = sizeof(*entry) + sizeof(u32) * len;
3460 	buffer = tr->array_buffer.buffer;
3461 	ring_buffer_nest_start(buffer);
3462 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3463 					    trace_ctx);
3464 	if (!event)
3465 		goto out;
3466 	entry = ring_buffer_event_data(event);
3467 	entry->ip			= ip;
3468 	entry->fmt			= fmt;
3469 
3470 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3471 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3472 		__buffer_unlock_commit(buffer, event);
3473 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3474 	}
3475 
3476 out:
3477 	ring_buffer_nest_end(buffer);
3478 out_put:
3479 	put_trace_buf();
3480 
3481 out_nobuffer:
3482 	preempt_enable_notrace();
3483 	unpause_graph_tracing();
3484 
3485 	return len;
3486 }
3487 EXPORT_SYMBOL_GPL(trace_vbprintk);
3488 
3489 __printf(3, 0)
3490 static int
3491 __trace_array_vprintk(struct trace_buffer *buffer,
3492 		      unsigned long ip, const char *fmt, va_list args)
3493 {
3494 	struct trace_event_call *call = &event_print;
3495 	struct ring_buffer_event *event;
3496 	int len = 0, size;
3497 	struct print_entry *entry;
3498 	unsigned int trace_ctx;
3499 	char *tbuffer;
3500 
3501 	if (tracing_disabled)
3502 		return 0;
3503 
3504 	/* Don't pollute graph traces with trace_vprintk internals */
3505 	pause_graph_tracing();
3506 
3507 	trace_ctx = tracing_gen_ctx();
3508 	preempt_disable_notrace();
3509 
3510 
3511 	tbuffer = get_trace_buf();
3512 	if (!tbuffer) {
3513 		len = 0;
3514 		goto out_nobuffer;
3515 	}
3516 
3517 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3518 
3519 	size = sizeof(*entry) + len + 1;
3520 	ring_buffer_nest_start(buffer);
3521 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3522 					    trace_ctx);
3523 	if (!event)
3524 		goto out;
3525 	entry = ring_buffer_event_data(event);
3526 	entry->ip = ip;
3527 
3528 	memcpy(&entry->buf, tbuffer, len + 1);
3529 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3530 		__buffer_unlock_commit(buffer, event);
3531 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3532 	}
3533 
3534 out:
3535 	ring_buffer_nest_end(buffer);
3536 	put_trace_buf();
3537 
3538 out_nobuffer:
3539 	preempt_enable_notrace();
3540 	unpause_graph_tracing();
3541 
3542 	return len;
3543 }
3544 
3545 __printf(3, 0)
3546 int trace_array_vprintk(struct trace_array *tr,
3547 			unsigned long ip, const char *fmt, va_list args)
3548 {
3549 	if (tracing_selftest_running && tr == &global_trace)
3550 		return 0;
3551 
3552 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3553 }
3554 
3555 /**
3556  * trace_array_printk - Print a message to a specific instance
3557  * @tr: The instance trace_array descriptor
3558  * @ip: The instruction pointer that this is called from.
3559  * @fmt: The format to print (printf format)
3560  *
3561  * If a subsystem sets up its own instance, they have the right to
3562  * printk strings into their tracing instance buffer using this
3563  * function. Note, this function will not write into the top level
3564  * buffer (use trace_printk() for that), as writing into the top level
3565  * buffer should only have events that can be individually disabled.
3566  * trace_printk() is only used for debugging a kernel, and should not
3567  * be ever incorporated in normal use.
3568  *
3569  * trace_array_printk() can be used, as it will not add noise to the
3570  * top level tracing buffer.
3571  *
3572  * Note, trace_array_init_printk() must be called on @tr before this
3573  * can be used.
3574  */
3575 __printf(3, 0)
3576 int trace_array_printk(struct trace_array *tr,
3577 		       unsigned long ip, const char *fmt, ...)
3578 {
3579 	int ret;
3580 	va_list ap;
3581 
3582 	if (!tr)
3583 		return -ENOENT;
3584 
3585 	/* This is only allowed for created instances */
3586 	if (tr == &global_trace)
3587 		return 0;
3588 
3589 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3590 		return 0;
3591 
3592 	va_start(ap, fmt);
3593 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3594 	va_end(ap);
3595 	return ret;
3596 }
3597 EXPORT_SYMBOL_GPL(trace_array_printk);
3598 
3599 /**
3600  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3601  * @tr: The trace array to initialize the buffers for
3602  *
3603  * As trace_array_printk() only writes into instances, they are OK to
3604  * have in the kernel (unlike trace_printk()). This needs to be called
3605  * before trace_array_printk() can be used on a trace_array.
3606  */
3607 int trace_array_init_printk(struct trace_array *tr)
3608 {
3609 	if (!tr)
3610 		return -ENOENT;
3611 
3612 	/* This is only allowed for created instances */
3613 	if (tr == &global_trace)
3614 		return -EINVAL;
3615 
3616 	return alloc_percpu_trace_buffer();
3617 }
3618 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3619 
3620 __printf(3, 4)
3621 int trace_array_printk_buf(struct trace_buffer *buffer,
3622 			   unsigned long ip, const char *fmt, ...)
3623 {
3624 	int ret;
3625 	va_list ap;
3626 
3627 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3628 		return 0;
3629 
3630 	va_start(ap, fmt);
3631 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3632 	va_end(ap);
3633 	return ret;
3634 }
3635 
3636 __printf(2, 0)
3637 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3638 {
3639 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3640 }
3641 EXPORT_SYMBOL_GPL(trace_vprintk);
3642 
3643 static void trace_iterator_increment(struct trace_iterator *iter)
3644 {
3645 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3646 
3647 	iter->idx++;
3648 	if (buf_iter)
3649 		ring_buffer_iter_advance(buf_iter);
3650 }
3651 
3652 static struct trace_entry *
3653 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3654 		unsigned long *lost_events)
3655 {
3656 	struct ring_buffer_event *event;
3657 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3658 
3659 	if (buf_iter) {
3660 		event = ring_buffer_iter_peek(buf_iter, ts);
3661 		if (lost_events)
3662 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3663 				(unsigned long)-1 : 0;
3664 	} else {
3665 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3666 					 lost_events);
3667 	}
3668 
3669 	if (event) {
3670 		iter->ent_size = ring_buffer_event_length(event);
3671 		return ring_buffer_event_data(event);
3672 	}
3673 	iter->ent_size = 0;
3674 	return NULL;
3675 }
3676 
3677 static struct trace_entry *
3678 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3679 		  unsigned long *missing_events, u64 *ent_ts)
3680 {
3681 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3682 	struct trace_entry *ent, *next = NULL;
3683 	unsigned long lost_events = 0, next_lost = 0;
3684 	int cpu_file = iter->cpu_file;
3685 	u64 next_ts = 0, ts;
3686 	int next_cpu = -1;
3687 	int next_size = 0;
3688 	int cpu;
3689 
3690 	/*
3691 	 * If we are in a per_cpu trace file, don't bother by iterating over
3692 	 * all cpu and peek directly.
3693 	 */
3694 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3695 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3696 			return NULL;
3697 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3698 		if (ent_cpu)
3699 			*ent_cpu = cpu_file;
3700 
3701 		return ent;
3702 	}
3703 
3704 	for_each_tracing_cpu(cpu) {
3705 
3706 		if (ring_buffer_empty_cpu(buffer, cpu))
3707 			continue;
3708 
3709 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3710 
3711 		/*
3712 		 * Pick the entry with the smallest timestamp:
3713 		 */
3714 		if (ent && (!next || ts < next_ts)) {
3715 			next = ent;
3716 			next_cpu = cpu;
3717 			next_ts = ts;
3718 			next_lost = lost_events;
3719 			next_size = iter->ent_size;
3720 		}
3721 	}
3722 
3723 	iter->ent_size = next_size;
3724 
3725 	if (ent_cpu)
3726 		*ent_cpu = next_cpu;
3727 
3728 	if (ent_ts)
3729 		*ent_ts = next_ts;
3730 
3731 	if (missing_events)
3732 		*missing_events = next_lost;
3733 
3734 	return next;
3735 }
3736 
3737 #define STATIC_FMT_BUF_SIZE	128
3738 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3739 
3740 char *trace_iter_expand_format(struct trace_iterator *iter)
3741 {
3742 	char *tmp;
3743 
3744 	/*
3745 	 * iter->tr is NULL when used with tp_printk, which makes
3746 	 * this get called where it is not safe to call krealloc().
3747 	 */
3748 	if (!iter->tr || iter->fmt == static_fmt_buf)
3749 		return NULL;
3750 
3751 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3752 		       GFP_KERNEL);
3753 	if (tmp) {
3754 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3755 		iter->fmt = tmp;
3756 	}
3757 
3758 	return tmp;
3759 }
3760 
3761 /* Returns true if the string is safe to dereference from an event */
3762 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3763 			   bool star, int len)
3764 {
3765 	unsigned long addr = (unsigned long)str;
3766 	struct trace_event *trace_event;
3767 	struct trace_event_call *event;
3768 
3769 	/* Ignore strings with no length */
3770 	if (star && !len)
3771 		return true;
3772 
3773 	/* OK if part of the event data */
3774 	if ((addr >= (unsigned long)iter->ent) &&
3775 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3776 		return true;
3777 
3778 	/* OK if part of the temp seq buffer */
3779 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3780 	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3781 		return true;
3782 
3783 	/* Core rodata can not be freed */
3784 	if (is_kernel_rodata(addr))
3785 		return true;
3786 
3787 	if (trace_is_tracepoint_string(str))
3788 		return true;
3789 
3790 	/*
3791 	 * Now this could be a module event, referencing core module
3792 	 * data, which is OK.
3793 	 */
3794 	if (!iter->ent)
3795 		return false;
3796 
3797 	trace_event = ftrace_find_event(iter->ent->type);
3798 	if (!trace_event)
3799 		return false;
3800 
3801 	event = container_of(trace_event, struct trace_event_call, event);
3802 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3803 		return false;
3804 
3805 	/* Would rather have rodata, but this will suffice */
3806 	if (within_module_core(addr, event->module))
3807 		return true;
3808 
3809 	return false;
3810 }
3811 
3812 static const char *show_buffer(struct trace_seq *s)
3813 {
3814 	struct seq_buf *seq = &s->seq;
3815 
3816 	seq_buf_terminate(seq);
3817 
3818 	return seq->buffer;
3819 }
3820 
3821 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3822 
3823 static int test_can_verify_check(const char *fmt, ...)
3824 {
3825 	char buf[16];
3826 	va_list ap;
3827 	int ret;
3828 
3829 	/*
3830 	 * The verifier is dependent on vsnprintf() modifies the va_list
3831 	 * passed to it, where it is sent as a reference. Some architectures
3832 	 * (like x86_32) passes it by value, which means that vsnprintf()
3833 	 * does not modify the va_list passed to it, and the verifier
3834 	 * would then need to be able to understand all the values that
3835 	 * vsnprintf can use. If it is passed by value, then the verifier
3836 	 * is disabled.
3837 	 */
3838 	va_start(ap, fmt);
3839 	vsnprintf(buf, 16, "%d", ap);
3840 	ret = va_arg(ap, int);
3841 	va_end(ap);
3842 
3843 	return ret;
3844 }
3845 
3846 static void test_can_verify(void)
3847 {
3848 	if (!test_can_verify_check("%d %d", 0, 1)) {
3849 		pr_info("trace event string verifier disabled\n");
3850 		static_branch_inc(&trace_no_verify);
3851 	}
3852 }
3853 
3854 /**
3855  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3856  * @iter: The iterator that holds the seq buffer and the event being printed
3857  * @fmt: The format used to print the event
3858  * @ap: The va_list holding the data to print from @fmt.
3859  *
3860  * This writes the data into the @iter->seq buffer using the data from
3861  * @fmt and @ap. If the format has a %s, then the source of the string
3862  * is examined to make sure it is safe to print, otherwise it will
3863  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3864  * pointer.
3865  */
3866 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3867 			 va_list ap)
3868 {
3869 	const char *p = fmt;
3870 	const char *str;
3871 	int i, j;
3872 
3873 	if (WARN_ON_ONCE(!fmt))
3874 		return;
3875 
3876 	if (static_branch_unlikely(&trace_no_verify))
3877 		goto print;
3878 
3879 	/* Don't bother checking when doing a ftrace_dump() */
3880 	if (iter->fmt == static_fmt_buf)
3881 		goto print;
3882 
3883 	while (*p) {
3884 		bool star = false;
3885 		int len = 0;
3886 
3887 		j = 0;
3888 
3889 		/* We only care about %s and variants */
3890 		for (i = 0; p[i]; i++) {
3891 			if (i + 1 >= iter->fmt_size) {
3892 				/*
3893 				 * If we can't expand the copy buffer,
3894 				 * just print it.
3895 				 */
3896 				if (!trace_iter_expand_format(iter))
3897 					goto print;
3898 			}
3899 
3900 			if (p[i] == '\\' && p[i+1]) {
3901 				i++;
3902 				continue;
3903 			}
3904 			if (p[i] == '%') {
3905 				/* Need to test cases like %08.*s */
3906 				for (j = 1; p[i+j]; j++) {
3907 					if (isdigit(p[i+j]) ||
3908 					    p[i+j] == '.')
3909 						continue;
3910 					if (p[i+j] == '*') {
3911 						star = true;
3912 						continue;
3913 					}
3914 					break;
3915 				}
3916 				if (p[i+j] == 's')
3917 					break;
3918 				star = false;
3919 			}
3920 			j = 0;
3921 		}
3922 		/* If no %s found then just print normally */
3923 		if (!p[i])
3924 			break;
3925 
3926 		/* Copy up to the %s, and print that */
3927 		strncpy(iter->fmt, p, i);
3928 		iter->fmt[i] = '\0';
3929 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3930 
3931 		/*
3932 		 * If iter->seq is full, the above call no longer guarantees
3933 		 * that ap is in sync with fmt processing, and further calls
3934 		 * to va_arg() can return wrong positional arguments.
3935 		 *
3936 		 * Ensure that ap is no longer used in this case.
3937 		 */
3938 		if (iter->seq.full) {
3939 			p = "";
3940 			break;
3941 		}
3942 
3943 		if (star)
3944 			len = va_arg(ap, int);
3945 
3946 		/* The ap now points to the string data of the %s */
3947 		str = va_arg(ap, const char *);
3948 
3949 		/*
3950 		 * If you hit this warning, it is likely that the
3951 		 * trace event in question used %s on a string that
3952 		 * was saved at the time of the event, but may not be
3953 		 * around when the trace is read. Use __string(),
3954 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3955 		 * instead. See samples/trace_events/trace-events-sample.h
3956 		 * for reference.
3957 		 */
3958 		if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3959 			      "fmt: '%s' current_buffer: '%s'",
3960 			      fmt, show_buffer(&iter->seq))) {
3961 			int ret;
3962 
3963 			/* Try to safely read the string */
3964 			if (star) {
3965 				if (len + 1 > iter->fmt_size)
3966 					len = iter->fmt_size - 1;
3967 				if (len < 0)
3968 					len = 0;
3969 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3970 				iter->fmt[len] = 0;
3971 				star = false;
3972 			} else {
3973 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3974 								  iter->fmt_size);
3975 			}
3976 			if (ret < 0)
3977 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3978 			else
3979 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3980 						 str, iter->fmt);
3981 			str = "[UNSAFE-MEMORY]";
3982 			strcpy(iter->fmt, "%s");
3983 		} else {
3984 			strncpy(iter->fmt, p + i, j + 1);
3985 			iter->fmt[j+1] = '\0';
3986 		}
3987 		if (star)
3988 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
3989 		else
3990 			trace_seq_printf(&iter->seq, iter->fmt, str);
3991 
3992 		p += i + j + 1;
3993 	}
3994  print:
3995 	if (*p)
3996 		trace_seq_vprintf(&iter->seq, p, ap);
3997 }
3998 
3999 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
4000 {
4001 	const char *p, *new_fmt;
4002 	char *q;
4003 
4004 	if (WARN_ON_ONCE(!fmt))
4005 		return fmt;
4006 
4007 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
4008 		return fmt;
4009 
4010 	p = fmt;
4011 	new_fmt = q = iter->fmt;
4012 	while (*p) {
4013 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
4014 			if (!trace_iter_expand_format(iter))
4015 				return fmt;
4016 
4017 			q += iter->fmt - new_fmt;
4018 			new_fmt = iter->fmt;
4019 		}
4020 
4021 		*q++ = *p++;
4022 
4023 		/* Replace %p with %px */
4024 		if (p[-1] == '%') {
4025 			if (p[0] == '%') {
4026 				*q++ = *p++;
4027 			} else if (p[0] == 'p' && !isalnum(p[1])) {
4028 				*q++ = *p++;
4029 				*q++ = 'x';
4030 			}
4031 		}
4032 	}
4033 	*q = '\0';
4034 
4035 	return new_fmt;
4036 }
4037 
4038 #define STATIC_TEMP_BUF_SIZE	128
4039 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4040 
4041 /* Find the next real entry, without updating the iterator itself */
4042 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4043 					  int *ent_cpu, u64 *ent_ts)
4044 {
4045 	/* __find_next_entry will reset ent_size */
4046 	int ent_size = iter->ent_size;
4047 	struct trace_entry *entry;
4048 
4049 	/*
4050 	 * If called from ftrace_dump(), then the iter->temp buffer
4051 	 * will be the static_temp_buf and not created from kmalloc.
4052 	 * If the entry size is greater than the buffer, we can
4053 	 * not save it. Just return NULL in that case. This is only
4054 	 * used to add markers when two consecutive events' time
4055 	 * stamps have a large delta. See trace_print_lat_context()
4056 	 */
4057 	if (iter->temp == static_temp_buf &&
4058 	    STATIC_TEMP_BUF_SIZE < ent_size)
4059 		return NULL;
4060 
4061 	/*
4062 	 * The __find_next_entry() may call peek_next_entry(), which may
4063 	 * call ring_buffer_peek() that may make the contents of iter->ent
4064 	 * undefined. Need to copy iter->ent now.
4065 	 */
4066 	if (iter->ent && iter->ent != iter->temp) {
4067 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4068 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4069 			void *temp;
4070 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
4071 			if (!temp)
4072 				return NULL;
4073 			kfree(iter->temp);
4074 			iter->temp = temp;
4075 			iter->temp_size = iter->ent_size;
4076 		}
4077 		memcpy(iter->temp, iter->ent, iter->ent_size);
4078 		iter->ent = iter->temp;
4079 	}
4080 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4081 	/* Put back the original ent_size */
4082 	iter->ent_size = ent_size;
4083 
4084 	return entry;
4085 }
4086 
4087 /* Find the next real entry, and increment the iterator to the next entry */
4088 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4089 {
4090 	iter->ent = __find_next_entry(iter, &iter->cpu,
4091 				      &iter->lost_events, &iter->ts);
4092 
4093 	if (iter->ent)
4094 		trace_iterator_increment(iter);
4095 
4096 	return iter->ent ? iter : NULL;
4097 }
4098 
4099 static void trace_consume(struct trace_iterator *iter)
4100 {
4101 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4102 			    &iter->lost_events);
4103 }
4104 
4105 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4106 {
4107 	struct trace_iterator *iter = m->private;
4108 	int i = (int)*pos;
4109 	void *ent;
4110 
4111 	WARN_ON_ONCE(iter->leftover);
4112 
4113 	(*pos)++;
4114 
4115 	/* can't go backwards */
4116 	if (iter->idx > i)
4117 		return NULL;
4118 
4119 	if (iter->idx < 0)
4120 		ent = trace_find_next_entry_inc(iter);
4121 	else
4122 		ent = iter;
4123 
4124 	while (ent && iter->idx < i)
4125 		ent = trace_find_next_entry_inc(iter);
4126 
4127 	iter->pos = *pos;
4128 
4129 	return ent;
4130 }
4131 
4132 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4133 {
4134 	struct ring_buffer_iter *buf_iter;
4135 	unsigned long entries = 0;
4136 	u64 ts;
4137 
4138 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4139 
4140 	buf_iter = trace_buffer_iter(iter, cpu);
4141 	if (!buf_iter)
4142 		return;
4143 
4144 	ring_buffer_iter_reset(buf_iter);
4145 
4146 	/*
4147 	 * We could have the case with the max latency tracers
4148 	 * that a reset never took place on a cpu. This is evident
4149 	 * by the timestamp being before the start of the buffer.
4150 	 */
4151 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4152 		if (ts >= iter->array_buffer->time_start)
4153 			break;
4154 		entries++;
4155 		ring_buffer_iter_advance(buf_iter);
4156 	}
4157 
4158 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4159 }
4160 
4161 /*
4162  * The current tracer is copied to avoid a global locking
4163  * all around.
4164  */
4165 static void *s_start(struct seq_file *m, loff_t *pos)
4166 {
4167 	struct trace_iterator *iter = m->private;
4168 	struct trace_array *tr = iter->tr;
4169 	int cpu_file = iter->cpu_file;
4170 	void *p = NULL;
4171 	loff_t l = 0;
4172 	int cpu;
4173 
4174 	mutex_lock(&trace_types_lock);
4175 	if (unlikely(tr->current_trace != iter->trace)) {
4176 		/* Close iter->trace before switching to the new current tracer */
4177 		if (iter->trace->close)
4178 			iter->trace->close(iter);
4179 		iter->trace = tr->current_trace;
4180 		/* Reopen the new current tracer */
4181 		if (iter->trace->open)
4182 			iter->trace->open(iter);
4183 	}
4184 	mutex_unlock(&trace_types_lock);
4185 
4186 #ifdef CONFIG_TRACER_MAX_TRACE
4187 	if (iter->snapshot && iter->trace->use_max_tr)
4188 		return ERR_PTR(-EBUSY);
4189 #endif
4190 
4191 	if (*pos != iter->pos) {
4192 		iter->ent = NULL;
4193 		iter->cpu = 0;
4194 		iter->idx = -1;
4195 
4196 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4197 			for_each_tracing_cpu(cpu)
4198 				tracing_iter_reset(iter, cpu);
4199 		} else
4200 			tracing_iter_reset(iter, cpu_file);
4201 
4202 		iter->leftover = 0;
4203 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4204 			;
4205 
4206 	} else {
4207 		/*
4208 		 * If we overflowed the seq_file before, then we want
4209 		 * to just reuse the trace_seq buffer again.
4210 		 */
4211 		if (iter->leftover)
4212 			p = iter;
4213 		else {
4214 			l = *pos - 1;
4215 			p = s_next(m, p, &l);
4216 		}
4217 	}
4218 
4219 	trace_event_read_lock();
4220 	trace_access_lock(cpu_file);
4221 	return p;
4222 }
4223 
4224 static void s_stop(struct seq_file *m, void *p)
4225 {
4226 	struct trace_iterator *iter = m->private;
4227 
4228 #ifdef CONFIG_TRACER_MAX_TRACE
4229 	if (iter->snapshot && iter->trace->use_max_tr)
4230 		return;
4231 #endif
4232 
4233 	trace_access_unlock(iter->cpu_file);
4234 	trace_event_read_unlock();
4235 }
4236 
4237 static void
4238 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4239 		      unsigned long *entries, int cpu)
4240 {
4241 	unsigned long count;
4242 
4243 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4244 	/*
4245 	 * If this buffer has skipped entries, then we hold all
4246 	 * entries for the trace and we need to ignore the
4247 	 * ones before the time stamp.
4248 	 */
4249 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4250 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4251 		/* total is the same as the entries */
4252 		*total = count;
4253 	} else
4254 		*total = count +
4255 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4256 	*entries = count;
4257 }
4258 
4259 static void
4260 get_total_entries(struct array_buffer *buf,
4261 		  unsigned long *total, unsigned long *entries)
4262 {
4263 	unsigned long t, e;
4264 	int cpu;
4265 
4266 	*total = 0;
4267 	*entries = 0;
4268 
4269 	for_each_tracing_cpu(cpu) {
4270 		get_total_entries_cpu(buf, &t, &e, cpu);
4271 		*total += t;
4272 		*entries += e;
4273 	}
4274 }
4275 
4276 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4277 {
4278 	unsigned long total, entries;
4279 
4280 	if (!tr)
4281 		tr = &global_trace;
4282 
4283 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4284 
4285 	return entries;
4286 }
4287 
4288 unsigned long trace_total_entries(struct trace_array *tr)
4289 {
4290 	unsigned long total, entries;
4291 
4292 	if (!tr)
4293 		tr = &global_trace;
4294 
4295 	get_total_entries(&tr->array_buffer, &total, &entries);
4296 
4297 	return entries;
4298 }
4299 
4300 static void print_lat_help_header(struct seq_file *m)
4301 {
4302 	seq_puts(m, "#                    _------=> CPU#            \n"
4303 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4304 		    "#                  | / _----=> need-resched    \n"
4305 		    "#                  || / _---=> hardirq/softirq \n"
4306 		    "#                  ||| / _--=> preempt-depth   \n"
4307 		    "#                  |||| / _-=> migrate-disable \n"
4308 		    "#                  ||||| /     delay           \n"
4309 		    "#  cmd     pid     |||||| time  |   caller     \n"
4310 		    "#     \\   /        ||||||  \\    |    /       \n");
4311 }
4312 
4313 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4314 {
4315 	unsigned long total;
4316 	unsigned long entries;
4317 
4318 	get_total_entries(buf, &total, &entries);
4319 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4320 		   entries, total, num_online_cpus());
4321 	seq_puts(m, "#\n");
4322 }
4323 
4324 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4325 				   unsigned int flags)
4326 {
4327 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4328 
4329 	print_event_info(buf, m);
4330 
4331 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4332 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4333 }
4334 
4335 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4336 				       unsigned int flags)
4337 {
4338 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4339 	static const char space[] = "            ";
4340 	int prec = tgid ? 12 : 2;
4341 
4342 	print_event_info(buf, m);
4343 
4344 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4345 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4346 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4347 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4348 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4349 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4350 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4351 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4352 }
4353 
4354 void
4355 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4356 {
4357 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4358 	struct array_buffer *buf = iter->array_buffer;
4359 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4360 	struct tracer *type = iter->trace;
4361 	unsigned long entries;
4362 	unsigned long total;
4363 	const char *name = type->name;
4364 
4365 	get_total_entries(buf, &total, &entries);
4366 
4367 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4368 		   name, UTS_RELEASE);
4369 	seq_puts(m, "# -----------------------------------"
4370 		 "---------------------------------\n");
4371 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4372 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4373 		   nsecs_to_usecs(data->saved_latency),
4374 		   entries,
4375 		   total,
4376 		   buf->cpu,
4377 		   preempt_model_none()      ? "server" :
4378 		   preempt_model_voluntary() ? "desktop" :
4379 		   preempt_model_full()      ? "preempt" :
4380 		   preempt_model_rt()        ? "preempt_rt" :
4381 		   "unknown",
4382 		   /* These are reserved for later use */
4383 		   0, 0, 0, 0);
4384 #ifdef CONFIG_SMP
4385 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4386 #else
4387 	seq_puts(m, ")\n");
4388 #endif
4389 	seq_puts(m, "#    -----------------\n");
4390 	seq_printf(m, "#    | task: %.16s-%d "
4391 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4392 		   data->comm, data->pid,
4393 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4394 		   data->policy, data->rt_priority);
4395 	seq_puts(m, "#    -----------------\n");
4396 
4397 	if (data->critical_start) {
4398 		seq_puts(m, "#  => started at: ");
4399 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4400 		trace_print_seq(m, &iter->seq);
4401 		seq_puts(m, "\n#  => ended at:   ");
4402 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4403 		trace_print_seq(m, &iter->seq);
4404 		seq_puts(m, "\n#\n");
4405 	}
4406 
4407 	seq_puts(m, "#\n");
4408 }
4409 
4410 static void test_cpu_buff_start(struct trace_iterator *iter)
4411 {
4412 	struct trace_seq *s = &iter->seq;
4413 	struct trace_array *tr = iter->tr;
4414 
4415 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4416 		return;
4417 
4418 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4419 		return;
4420 
4421 	if (cpumask_available(iter->started) &&
4422 	    cpumask_test_cpu(iter->cpu, iter->started))
4423 		return;
4424 
4425 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4426 		return;
4427 
4428 	if (cpumask_available(iter->started))
4429 		cpumask_set_cpu(iter->cpu, iter->started);
4430 
4431 	/* Don't print started cpu buffer for the first entry of the trace */
4432 	if (iter->idx > 1)
4433 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4434 				iter->cpu);
4435 }
4436 
4437 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4438 {
4439 	struct trace_array *tr = iter->tr;
4440 	struct trace_seq *s = &iter->seq;
4441 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4442 	struct trace_entry *entry;
4443 	struct trace_event *event;
4444 
4445 	entry = iter->ent;
4446 
4447 	test_cpu_buff_start(iter);
4448 
4449 	event = ftrace_find_event(entry->type);
4450 
4451 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4452 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4453 			trace_print_lat_context(iter);
4454 		else
4455 			trace_print_context(iter);
4456 	}
4457 
4458 	if (trace_seq_has_overflowed(s))
4459 		return TRACE_TYPE_PARTIAL_LINE;
4460 
4461 	if (event) {
4462 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4463 			return print_event_fields(iter, event);
4464 		return event->funcs->trace(iter, sym_flags, event);
4465 	}
4466 
4467 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4468 
4469 	return trace_handle_return(s);
4470 }
4471 
4472 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4473 {
4474 	struct trace_array *tr = iter->tr;
4475 	struct trace_seq *s = &iter->seq;
4476 	struct trace_entry *entry;
4477 	struct trace_event *event;
4478 
4479 	entry = iter->ent;
4480 
4481 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4482 		trace_seq_printf(s, "%d %d %llu ",
4483 				 entry->pid, iter->cpu, iter->ts);
4484 
4485 	if (trace_seq_has_overflowed(s))
4486 		return TRACE_TYPE_PARTIAL_LINE;
4487 
4488 	event = ftrace_find_event(entry->type);
4489 	if (event)
4490 		return event->funcs->raw(iter, 0, event);
4491 
4492 	trace_seq_printf(s, "%d ?\n", entry->type);
4493 
4494 	return trace_handle_return(s);
4495 }
4496 
4497 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4498 {
4499 	struct trace_array *tr = iter->tr;
4500 	struct trace_seq *s = &iter->seq;
4501 	unsigned char newline = '\n';
4502 	struct trace_entry *entry;
4503 	struct trace_event *event;
4504 
4505 	entry = iter->ent;
4506 
4507 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4508 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4509 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4510 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4511 		if (trace_seq_has_overflowed(s))
4512 			return TRACE_TYPE_PARTIAL_LINE;
4513 	}
4514 
4515 	event = ftrace_find_event(entry->type);
4516 	if (event) {
4517 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4518 		if (ret != TRACE_TYPE_HANDLED)
4519 			return ret;
4520 	}
4521 
4522 	SEQ_PUT_FIELD(s, newline);
4523 
4524 	return trace_handle_return(s);
4525 }
4526 
4527 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4528 {
4529 	struct trace_array *tr = iter->tr;
4530 	struct trace_seq *s = &iter->seq;
4531 	struct trace_entry *entry;
4532 	struct trace_event *event;
4533 
4534 	entry = iter->ent;
4535 
4536 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4537 		SEQ_PUT_FIELD(s, entry->pid);
4538 		SEQ_PUT_FIELD(s, iter->cpu);
4539 		SEQ_PUT_FIELD(s, iter->ts);
4540 		if (trace_seq_has_overflowed(s))
4541 			return TRACE_TYPE_PARTIAL_LINE;
4542 	}
4543 
4544 	event = ftrace_find_event(entry->type);
4545 	return event ? event->funcs->binary(iter, 0, event) :
4546 		TRACE_TYPE_HANDLED;
4547 }
4548 
4549 int trace_empty(struct trace_iterator *iter)
4550 {
4551 	struct ring_buffer_iter *buf_iter;
4552 	int cpu;
4553 
4554 	/* If we are looking at one CPU buffer, only check that one */
4555 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4556 		cpu = iter->cpu_file;
4557 		buf_iter = trace_buffer_iter(iter, cpu);
4558 		if (buf_iter) {
4559 			if (!ring_buffer_iter_empty(buf_iter))
4560 				return 0;
4561 		} else {
4562 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4563 				return 0;
4564 		}
4565 		return 1;
4566 	}
4567 
4568 	for_each_tracing_cpu(cpu) {
4569 		buf_iter = trace_buffer_iter(iter, cpu);
4570 		if (buf_iter) {
4571 			if (!ring_buffer_iter_empty(buf_iter))
4572 				return 0;
4573 		} else {
4574 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4575 				return 0;
4576 		}
4577 	}
4578 
4579 	return 1;
4580 }
4581 
4582 /*  Called with trace_event_read_lock() held. */
4583 enum print_line_t print_trace_line(struct trace_iterator *iter)
4584 {
4585 	struct trace_array *tr = iter->tr;
4586 	unsigned long trace_flags = tr->trace_flags;
4587 	enum print_line_t ret;
4588 
4589 	if (iter->lost_events) {
4590 		if (iter->lost_events == (unsigned long)-1)
4591 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4592 					 iter->cpu);
4593 		else
4594 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4595 					 iter->cpu, iter->lost_events);
4596 		if (trace_seq_has_overflowed(&iter->seq))
4597 			return TRACE_TYPE_PARTIAL_LINE;
4598 	}
4599 
4600 	if (iter->trace && iter->trace->print_line) {
4601 		ret = iter->trace->print_line(iter);
4602 		if (ret != TRACE_TYPE_UNHANDLED)
4603 			return ret;
4604 	}
4605 
4606 	if (iter->ent->type == TRACE_BPUTS &&
4607 			trace_flags & TRACE_ITER_PRINTK &&
4608 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4609 		return trace_print_bputs_msg_only(iter);
4610 
4611 	if (iter->ent->type == TRACE_BPRINT &&
4612 			trace_flags & TRACE_ITER_PRINTK &&
4613 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4614 		return trace_print_bprintk_msg_only(iter);
4615 
4616 	if (iter->ent->type == TRACE_PRINT &&
4617 			trace_flags & TRACE_ITER_PRINTK &&
4618 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4619 		return trace_print_printk_msg_only(iter);
4620 
4621 	if (trace_flags & TRACE_ITER_BIN)
4622 		return print_bin_fmt(iter);
4623 
4624 	if (trace_flags & TRACE_ITER_HEX)
4625 		return print_hex_fmt(iter);
4626 
4627 	if (trace_flags & TRACE_ITER_RAW)
4628 		return print_raw_fmt(iter);
4629 
4630 	return print_trace_fmt(iter);
4631 }
4632 
4633 void trace_latency_header(struct seq_file *m)
4634 {
4635 	struct trace_iterator *iter = m->private;
4636 	struct trace_array *tr = iter->tr;
4637 
4638 	/* print nothing if the buffers are empty */
4639 	if (trace_empty(iter))
4640 		return;
4641 
4642 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4643 		print_trace_header(m, iter);
4644 
4645 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4646 		print_lat_help_header(m);
4647 }
4648 
4649 void trace_default_header(struct seq_file *m)
4650 {
4651 	struct trace_iterator *iter = m->private;
4652 	struct trace_array *tr = iter->tr;
4653 	unsigned long trace_flags = tr->trace_flags;
4654 
4655 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4656 		return;
4657 
4658 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4659 		/* print nothing if the buffers are empty */
4660 		if (trace_empty(iter))
4661 			return;
4662 		print_trace_header(m, iter);
4663 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4664 			print_lat_help_header(m);
4665 	} else {
4666 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4667 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4668 				print_func_help_header_irq(iter->array_buffer,
4669 							   m, trace_flags);
4670 			else
4671 				print_func_help_header(iter->array_buffer, m,
4672 						       trace_flags);
4673 		}
4674 	}
4675 }
4676 
4677 static void test_ftrace_alive(struct seq_file *m)
4678 {
4679 	if (!ftrace_is_dead())
4680 		return;
4681 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4682 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4683 }
4684 
4685 #ifdef CONFIG_TRACER_MAX_TRACE
4686 static void show_snapshot_main_help(struct seq_file *m)
4687 {
4688 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4689 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4690 		    "#                      Takes a snapshot of the main buffer.\n"
4691 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4692 		    "#                      (Doesn't have to be '2' works with any number that\n"
4693 		    "#                       is not a '0' or '1')\n");
4694 }
4695 
4696 static void show_snapshot_percpu_help(struct seq_file *m)
4697 {
4698 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4699 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4700 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4701 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4702 #else
4703 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4704 		    "#                     Must use main snapshot file to allocate.\n");
4705 #endif
4706 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4707 		    "#                      (Doesn't have to be '2' works with any number that\n"
4708 		    "#                       is not a '0' or '1')\n");
4709 }
4710 
4711 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4712 {
4713 	if (iter->tr->allocated_snapshot)
4714 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4715 	else
4716 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4717 
4718 	seq_puts(m, "# Snapshot commands:\n");
4719 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4720 		show_snapshot_main_help(m);
4721 	else
4722 		show_snapshot_percpu_help(m);
4723 }
4724 #else
4725 /* Should never be called */
4726 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4727 #endif
4728 
4729 static int s_show(struct seq_file *m, void *v)
4730 {
4731 	struct trace_iterator *iter = v;
4732 	int ret;
4733 
4734 	if (iter->ent == NULL) {
4735 		if (iter->tr) {
4736 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4737 			seq_puts(m, "#\n");
4738 			test_ftrace_alive(m);
4739 		}
4740 		if (iter->snapshot && trace_empty(iter))
4741 			print_snapshot_help(m, iter);
4742 		else if (iter->trace && iter->trace->print_header)
4743 			iter->trace->print_header(m);
4744 		else
4745 			trace_default_header(m);
4746 
4747 	} else if (iter->leftover) {
4748 		/*
4749 		 * If we filled the seq_file buffer earlier, we
4750 		 * want to just show it now.
4751 		 */
4752 		ret = trace_print_seq(m, &iter->seq);
4753 
4754 		/* ret should this time be zero, but you never know */
4755 		iter->leftover = ret;
4756 
4757 	} else {
4758 		ret = print_trace_line(iter);
4759 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4760 			iter->seq.full = 0;
4761 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4762 		}
4763 		ret = trace_print_seq(m, &iter->seq);
4764 		/*
4765 		 * If we overflow the seq_file buffer, then it will
4766 		 * ask us for this data again at start up.
4767 		 * Use that instead.
4768 		 *  ret is 0 if seq_file write succeeded.
4769 		 *        -1 otherwise.
4770 		 */
4771 		iter->leftover = ret;
4772 	}
4773 
4774 	return 0;
4775 }
4776 
4777 /*
4778  * Should be used after trace_array_get(), trace_types_lock
4779  * ensures that i_cdev was already initialized.
4780  */
4781 static inline int tracing_get_cpu(struct inode *inode)
4782 {
4783 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4784 		return (long)inode->i_cdev - 1;
4785 	return RING_BUFFER_ALL_CPUS;
4786 }
4787 
4788 static const struct seq_operations tracer_seq_ops = {
4789 	.start		= s_start,
4790 	.next		= s_next,
4791 	.stop		= s_stop,
4792 	.show		= s_show,
4793 };
4794 
4795 /*
4796  * Note, as iter itself can be allocated and freed in different
4797  * ways, this function is only used to free its content, and not
4798  * the iterator itself. The only requirement to all the allocations
4799  * is that it must zero all fields (kzalloc), as freeing works with
4800  * ethier allocated content or NULL.
4801  */
4802 static void free_trace_iter_content(struct trace_iterator *iter)
4803 {
4804 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4805 	if (iter->fmt != static_fmt_buf)
4806 		kfree(iter->fmt);
4807 
4808 	kfree(iter->temp);
4809 	kfree(iter->buffer_iter);
4810 	mutex_destroy(&iter->mutex);
4811 	free_cpumask_var(iter->started);
4812 }
4813 
4814 static struct trace_iterator *
4815 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4816 {
4817 	struct trace_array *tr = inode->i_private;
4818 	struct trace_iterator *iter;
4819 	int cpu;
4820 
4821 	if (tracing_disabled)
4822 		return ERR_PTR(-ENODEV);
4823 
4824 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4825 	if (!iter)
4826 		return ERR_PTR(-ENOMEM);
4827 
4828 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4829 				    GFP_KERNEL);
4830 	if (!iter->buffer_iter)
4831 		goto release;
4832 
4833 	/*
4834 	 * trace_find_next_entry() may need to save off iter->ent.
4835 	 * It will place it into the iter->temp buffer. As most
4836 	 * events are less than 128, allocate a buffer of that size.
4837 	 * If one is greater, then trace_find_next_entry() will
4838 	 * allocate a new buffer to adjust for the bigger iter->ent.
4839 	 * It's not critical if it fails to get allocated here.
4840 	 */
4841 	iter->temp = kmalloc(128, GFP_KERNEL);
4842 	if (iter->temp)
4843 		iter->temp_size = 128;
4844 
4845 	/*
4846 	 * trace_event_printf() may need to modify given format
4847 	 * string to replace %p with %px so that it shows real address
4848 	 * instead of hash value. However, that is only for the event
4849 	 * tracing, other tracer may not need. Defer the allocation
4850 	 * until it is needed.
4851 	 */
4852 	iter->fmt = NULL;
4853 	iter->fmt_size = 0;
4854 
4855 	mutex_lock(&trace_types_lock);
4856 	iter->trace = tr->current_trace;
4857 
4858 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4859 		goto fail;
4860 
4861 	iter->tr = tr;
4862 
4863 #ifdef CONFIG_TRACER_MAX_TRACE
4864 	/* Currently only the top directory has a snapshot */
4865 	if (tr->current_trace->print_max || snapshot)
4866 		iter->array_buffer = &tr->max_buffer;
4867 	else
4868 #endif
4869 		iter->array_buffer = &tr->array_buffer;
4870 	iter->snapshot = snapshot;
4871 	iter->pos = -1;
4872 	iter->cpu_file = tracing_get_cpu(inode);
4873 	mutex_init(&iter->mutex);
4874 
4875 	/* Notify the tracer early; before we stop tracing. */
4876 	if (iter->trace->open)
4877 		iter->trace->open(iter);
4878 
4879 	/* Annotate start of buffers if we had overruns */
4880 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4881 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4882 
4883 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4884 	if (trace_clocks[tr->clock_id].in_ns)
4885 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4886 
4887 	/*
4888 	 * If pause-on-trace is enabled, then stop the trace while
4889 	 * dumping, unless this is the "snapshot" file
4890 	 */
4891 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4892 		tracing_stop_tr(tr);
4893 
4894 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4895 		for_each_tracing_cpu(cpu) {
4896 			iter->buffer_iter[cpu] =
4897 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4898 							 cpu, GFP_KERNEL);
4899 		}
4900 		ring_buffer_read_prepare_sync();
4901 		for_each_tracing_cpu(cpu) {
4902 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4903 			tracing_iter_reset(iter, cpu);
4904 		}
4905 	} else {
4906 		cpu = iter->cpu_file;
4907 		iter->buffer_iter[cpu] =
4908 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4909 						 cpu, GFP_KERNEL);
4910 		ring_buffer_read_prepare_sync();
4911 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4912 		tracing_iter_reset(iter, cpu);
4913 	}
4914 
4915 	mutex_unlock(&trace_types_lock);
4916 
4917 	return iter;
4918 
4919  fail:
4920 	mutex_unlock(&trace_types_lock);
4921 	free_trace_iter_content(iter);
4922 release:
4923 	seq_release_private(inode, file);
4924 	return ERR_PTR(-ENOMEM);
4925 }
4926 
4927 int tracing_open_generic(struct inode *inode, struct file *filp)
4928 {
4929 	int ret;
4930 
4931 	ret = tracing_check_open_get_tr(NULL);
4932 	if (ret)
4933 		return ret;
4934 
4935 	filp->private_data = inode->i_private;
4936 	return 0;
4937 }
4938 
4939 bool tracing_is_disabled(void)
4940 {
4941 	return (tracing_disabled) ? true: false;
4942 }
4943 
4944 /*
4945  * Open and update trace_array ref count.
4946  * Must have the current trace_array passed to it.
4947  */
4948 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4949 {
4950 	struct trace_array *tr = inode->i_private;
4951 	int ret;
4952 
4953 	ret = tracing_check_open_get_tr(tr);
4954 	if (ret)
4955 		return ret;
4956 
4957 	filp->private_data = inode->i_private;
4958 
4959 	return 0;
4960 }
4961 
4962 /*
4963  * The private pointer of the inode is the trace_event_file.
4964  * Update the tr ref count associated to it.
4965  */
4966 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4967 {
4968 	struct trace_event_file *file = inode->i_private;
4969 	int ret;
4970 
4971 	ret = tracing_check_open_get_tr(file->tr);
4972 	if (ret)
4973 		return ret;
4974 
4975 	mutex_lock(&event_mutex);
4976 
4977 	/* Fail if the file is marked for removal */
4978 	if (file->flags & EVENT_FILE_FL_FREED) {
4979 		trace_array_put(file->tr);
4980 		ret = -ENODEV;
4981 	} else {
4982 		event_file_get(file);
4983 	}
4984 
4985 	mutex_unlock(&event_mutex);
4986 	if (ret)
4987 		return ret;
4988 
4989 	filp->private_data = inode->i_private;
4990 
4991 	return 0;
4992 }
4993 
4994 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4995 {
4996 	struct trace_event_file *file = inode->i_private;
4997 
4998 	trace_array_put(file->tr);
4999 	event_file_put(file);
5000 
5001 	return 0;
5002 }
5003 
5004 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
5005 {
5006 	tracing_release_file_tr(inode, filp);
5007 	return single_release(inode, filp);
5008 }
5009 
5010 static int tracing_mark_open(struct inode *inode, struct file *filp)
5011 {
5012 	stream_open(inode, filp);
5013 	return tracing_open_generic_tr(inode, filp);
5014 }
5015 
5016 static int tracing_release(struct inode *inode, struct file *file)
5017 {
5018 	struct trace_array *tr = inode->i_private;
5019 	struct seq_file *m = file->private_data;
5020 	struct trace_iterator *iter;
5021 	int cpu;
5022 
5023 	if (!(file->f_mode & FMODE_READ)) {
5024 		trace_array_put(tr);
5025 		return 0;
5026 	}
5027 
5028 	/* Writes do not use seq_file */
5029 	iter = m->private;
5030 	mutex_lock(&trace_types_lock);
5031 
5032 	for_each_tracing_cpu(cpu) {
5033 		if (iter->buffer_iter[cpu])
5034 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
5035 	}
5036 
5037 	if (iter->trace && iter->trace->close)
5038 		iter->trace->close(iter);
5039 
5040 	if (!iter->snapshot && tr->stop_count)
5041 		/* reenable tracing if it was previously enabled */
5042 		tracing_start_tr(tr);
5043 
5044 	__trace_array_put(tr);
5045 
5046 	mutex_unlock(&trace_types_lock);
5047 
5048 	free_trace_iter_content(iter);
5049 	seq_release_private(inode, file);
5050 
5051 	return 0;
5052 }
5053 
5054 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
5055 {
5056 	struct trace_array *tr = inode->i_private;
5057 
5058 	trace_array_put(tr);
5059 	return 0;
5060 }
5061 
5062 static int tracing_single_release_tr(struct inode *inode, struct file *file)
5063 {
5064 	struct trace_array *tr = inode->i_private;
5065 
5066 	trace_array_put(tr);
5067 
5068 	return single_release(inode, file);
5069 }
5070 
5071 static int tracing_open(struct inode *inode, struct file *file)
5072 {
5073 	struct trace_array *tr = inode->i_private;
5074 	struct trace_iterator *iter;
5075 	int ret;
5076 
5077 	ret = tracing_check_open_get_tr(tr);
5078 	if (ret)
5079 		return ret;
5080 
5081 	/* If this file was open for write, then erase contents */
5082 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5083 		int cpu = tracing_get_cpu(inode);
5084 		struct array_buffer *trace_buf = &tr->array_buffer;
5085 
5086 #ifdef CONFIG_TRACER_MAX_TRACE
5087 		if (tr->current_trace->print_max)
5088 			trace_buf = &tr->max_buffer;
5089 #endif
5090 
5091 		if (cpu == RING_BUFFER_ALL_CPUS)
5092 			tracing_reset_online_cpus(trace_buf);
5093 		else
5094 			tracing_reset_cpu(trace_buf, cpu);
5095 	}
5096 
5097 	if (file->f_mode & FMODE_READ) {
5098 		iter = __tracing_open(inode, file, false);
5099 		if (IS_ERR(iter))
5100 			ret = PTR_ERR(iter);
5101 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5102 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
5103 	}
5104 
5105 	if (ret < 0)
5106 		trace_array_put(tr);
5107 
5108 	return ret;
5109 }
5110 
5111 /*
5112  * Some tracers are not suitable for instance buffers.
5113  * A tracer is always available for the global array (toplevel)
5114  * or if it explicitly states that it is.
5115  */
5116 static bool
5117 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5118 {
5119 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5120 }
5121 
5122 /* Find the next tracer that this trace array may use */
5123 static struct tracer *
5124 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5125 {
5126 	while (t && !trace_ok_for_array(t, tr))
5127 		t = t->next;
5128 
5129 	return t;
5130 }
5131 
5132 static void *
5133 t_next(struct seq_file *m, void *v, loff_t *pos)
5134 {
5135 	struct trace_array *tr = m->private;
5136 	struct tracer *t = v;
5137 
5138 	(*pos)++;
5139 
5140 	if (t)
5141 		t = get_tracer_for_array(tr, t->next);
5142 
5143 	return t;
5144 }
5145 
5146 static void *t_start(struct seq_file *m, loff_t *pos)
5147 {
5148 	struct trace_array *tr = m->private;
5149 	struct tracer *t;
5150 	loff_t l = 0;
5151 
5152 	mutex_lock(&trace_types_lock);
5153 
5154 	t = get_tracer_for_array(tr, trace_types);
5155 	for (; t && l < *pos; t = t_next(m, t, &l))
5156 			;
5157 
5158 	return t;
5159 }
5160 
5161 static void t_stop(struct seq_file *m, void *p)
5162 {
5163 	mutex_unlock(&trace_types_lock);
5164 }
5165 
5166 static int t_show(struct seq_file *m, void *v)
5167 {
5168 	struct tracer *t = v;
5169 
5170 	if (!t)
5171 		return 0;
5172 
5173 	seq_puts(m, t->name);
5174 	if (t->next)
5175 		seq_putc(m, ' ');
5176 	else
5177 		seq_putc(m, '\n');
5178 
5179 	return 0;
5180 }
5181 
5182 static const struct seq_operations show_traces_seq_ops = {
5183 	.start		= t_start,
5184 	.next		= t_next,
5185 	.stop		= t_stop,
5186 	.show		= t_show,
5187 };
5188 
5189 static int show_traces_open(struct inode *inode, struct file *file)
5190 {
5191 	struct trace_array *tr = inode->i_private;
5192 	struct seq_file *m;
5193 	int ret;
5194 
5195 	ret = tracing_check_open_get_tr(tr);
5196 	if (ret)
5197 		return ret;
5198 
5199 	ret = seq_open(file, &show_traces_seq_ops);
5200 	if (ret) {
5201 		trace_array_put(tr);
5202 		return ret;
5203 	}
5204 
5205 	m = file->private_data;
5206 	m->private = tr;
5207 
5208 	return 0;
5209 }
5210 
5211 static int show_traces_release(struct inode *inode, struct file *file)
5212 {
5213 	struct trace_array *tr = inode->i_private;
5214 
5215 	trace_array_put(tr);
5216 	return seq_release(inode, file);
5217 }
5218 
5219 static ssize_t
5220 tracing_write_stub(struct file *filp, const char __user *ubuf,
5221 		   size_t count, loff_t *ppos)
5222 {
5223 	return count;
5224 }
5225 
5226 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5227 {
5228 	int ret;
5229 
5230 	if (file->f_mode & FMODE_READ)
5231 		ret = seq_lseek(file, offset, whence);
5232 	else
5233 		file->f_pos = ret = 0;
5234 
5235 	return ret;
5236 }
5237 
5238 static const struct file_operations tracing_fops = {
5239 	.open		= tracing_open,
5240 	.read		= seq_read,
5241 	.read_iter	= seq_read_iter,
5242 	.splice_read	= copy_splice_read,
5243 	.write		= tracing_write_stub,
5244 	.llseek		= tracing_lseek,
5245 	.release	= tracing_release,
5246 };
5247 
5248 static const struct file_operations show_traces_fops = {
5249 	.open		= show_traces_open,
5250 	.read		= seq_read,
5251 	.llseek		= seq_lseek,
5252 	.release	= show_traces_release,
5253 };
5254 
5255 static ssize_t
5256 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5257 		     size_t count, loff_t *ppos)
5258 {
5259 	struct trace_array *tr = file_inode(filp)->i_private;
5260 	char *mask_str;
5261 	int len;
5262 
5263 	len = snprintf(NULL, 0, "%*pb\n",
5264 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5265 	mask_str = kmalloc(len, GFP_KERNEL);
5266 	if (!mask_str)
5267 		return -ENOMEM;
5268 
5269 	len = snprintf(mask_str, len, "%*pb\n",
5270 		       cpumask_pr_args(tr->tracing_cpumask));
5271 	if (len >= count) {
5272 		count = -EINVAL;
5273 		goto out_err;
5274 	}
5275 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5276 
5277 out_err:
5278 	kfree(mask_str);
5279 
5280 	return count;
5281 }
5282 
5283 int tracing_set_cpumask(struct trace_array *tr,
5284 			cpumask_var_t tracing_cpumask_new)
5285 {
5286 	int cpu;
5287 
5288 	if (!tr)
5289 		return -EINVAL;
5290 
5291 	local_irq_disable();
5292 	arch_spin_lock(&tr->max_lock);
5293 	for_each_tracing_cpu(cpu) {
5294 		/*
5295 		 * Increase/decrease the disabled counter if we are
5296 		 * about to flip a bit in the cpumask:
5297 		 */
5298 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5299 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5300 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5301 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5302 #ifdef CONFIG_TRACER_MAX_TRACE
5303 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5304 #endif
5305 		}
5306 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5307 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5308 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5309 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5310 #ifdef CONFIG_TRACER_MAX_TRACE
5311 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5312 #endif
5313 		}
5314 	}
5315 	arch_spin_unlock(&tr->max_lock);
5316 	local_irq_enable();
5317 
5318 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5319 
5320 	return 0;
5321 }
5322 
5323 static ssize_t
5324 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5325 		      size_t count, loff_t *ppos)
5326 {
5327 	struct trace_array *tr = file_inode(filp)->i_private;
5328 	cpumask_var_t tracing_cpumask_new;
5329 	int err;
5330 
5331 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5332 		return -ENOMEM;
5333 
5334 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5335 	if (err)
5336 		goto err_free;
5337 
5338 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5339 	if (err)
5340 		goto err_free;
5341 
5342 	free_cpumask_var(tracing_cpumask_new);
5343 
5344 	return count;
5345 
5346 err_free:
5347 	free_cpumask_var(tracing_cpumask_new);
5348 
5349 	return err;
5350 }
5351 
5352 static const struct file_operations tracing_cpumask_fops = {
5353 	.open		= tracing_open_generic_tr,
5354 	.read		= tracing_cpumask_read,
5355 	.write		= tracing_cpumask_write,
5356 	.release	= tracing_release_generic_tr,
5357 	.llseek		= generic_file_llseek,
5358 };
5359 
5360 static int tracing_trace_options_show(struct seq_file *m, void *v)
5361 {
5362 	struct tracer_opt *trace_opts;
5363 	struct trace_array *tr = m->private;
5364 	u32 tracer_flags;
5365 	int i;
5366 
5367 	mutex_lock(&trace_types_lock);
5368 	tracer_flags = tr->current_trace->flags->val;
5369 	trace_opts = tr->current_trace->flags->opts;
5370 
5371 	for (i = 0; trace_options[i]; i++) {
5372 		if (tr->trace_flags & (1 << i))
5373 			seq_printf(m, "%s\n", trace_options[i]);
5374 		else
5375 			seq_printf(m, "no%s\n", trace_options[i]);
5376 	}
5377 
5378 	for (i = 0; trace_opts[i].name; i++) {
5379 		if (tracer_flags & trace_opts[i].bit)
5380 			seq_printf(m, "%s\n", trace_opts[i].name);
5381 		else
5382 			seq_printf(m, "no%s\n", trace_opts[i].name);
5383 	}
5384 	mutex_unlock(&trace_types_lock);
5385 
5386 	return 0;
5387 }
5388 
5389 static int __set_tracer_option(struct trace_array *tr,
5390 			       struct tracer_flags *tracer_flags,
5391 			       struct tracer_opt *opts, int neg)
5392 {
5393 	struct tracer *trace = tracer_flags->trace;
5394 	int ret;
5395 
5396 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5397 	if (ret)
5398 		return ret;
5399 
5400 	if (neg)
5401 		tracer_flags->val &= ~opts->bit;
5402 	else
5403 		tracer_flags->val |= opts->bit;
5404 	return 0;
5405 }
5406 
5407 /* Try to assign a tracer specific option */
5408 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5409 {
5410 	struct tracer *trace = tr->current_trace;
5411 	struct tracer_flags *tracer_flags = trace->flags;
5412 	struct tracer_opt *opts = NULL;
5413 	int i;
5414 
5415 	for (i = 0; tracer_flags->opts[i].name; i++) {
5416 		opts = &tracer_flags->opts[i];
5417 
5418 		if (strcmp(cmp, opts->name) == 0)
5419 			return __set_tracer_option(tr, trace->flags, opts, neg);
5420 	}
5421 
5422 	return -EINVAL;
5423 }
5424 
5425 /* Some tracers require overwrite to stay enabled */
5426 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5427 {
5428 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5429 		return -1;
5430 
5431 	return 0;
5432 }
5433 
5434 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5435 {
5436 	int *map;
5437 
5438 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5439 	    (mask == TRACE_ITER_RECORD_CMD))
5440 		lockdep_assert_held(&event_mutex);
5441 
5442 	/* do nothing if flag is already set */
5443 	if (!!(tr->trace_flags & mask) == !!enabled)
5444 		return 0;
5445 
5446 	/* Give the tracer a chance to approve the change */
5447 	if (tr->current_trace->flag_changed)
5448 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5449 			return -EINVAL;
5450 
5451 	if (enabled)
5452 		tr->trace_flags |= mask;
5453 	else
5454 		tr->trace_flags &= ~mask;
5455 
5456 	if (mask == TRACE_ITER_RECORD_CMD)
5457 		trace_event_enable_cmd_record(enabled);
5458 
5459 	if (mask == TRACE_ITER_RECORD_TGID) {
5460 		if (!tgid_map) {
5461 			tgid_map_max = pid_max;
5462 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5463 				       GFP_KERNEL);
5464 
5465 			/*
5466 			 * Pairs with smp_load_acquire() in
5467 			 * trace_find_tgid_ptr() to ensure that if it observes
5468 			 * the tgid_map we just allocated then it also observes
5469 			 * the corresponding tgid_map_max value.
5470 			 */
5471 			smp_store_release(&tgid_map, map);
5472 		}
5473 		if (!tgid_map) {
5474 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5475 			return -ENOMEM;
5476 		}
5477 
5478 		trace_event_enable_tgid_record(enabled);
5479 	}
5480 
5481 	if (mask == TRACE_ITER_EVENT_FORK)
5482 		trace_event_follow_fork(tr, enabled);
5483 
5484 	if (mask == TRACE_ITER_FUNC_FORK)
5485 		ftrace_pid_follow_fork(tr, enabled);
5486 
5487 	if (mask == TRACE_ITER_OVERWRITE) {
5488 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5489 #ifdef CONFIG_TRACER_MAX_TRACE
5490 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5491 #endif
5492 	}
5493 
5494 	if (mask == TRACE_ITER_PRINTK) {
5495 		trace_printk_start_stop_comm(enabled);
5496 		trace_printk_control(enabled);
5497 	}
5498 
5499 	return 0;
5500 }
5501 
5502 int trace_set_options(struct trace_array *tr, char *option)
5503 {
5504 	char *cmp;
5505 	int neg = 0;
5506 	int ret;
5507 	size_t orig_len = strlen(option);
5508 	int len;
5509 
5510 	cmp = strstrip(option);
5511 
5512 	len = str_has_prefix(cmp, "no");
5513 	if (len)
5514 		neg = 1;
5515 
5516 	cmp += len;
5517 
5518 	mutex_lock(&event_mutex);
5519 	mutex_lock(&trace_types_lock);
5520 
5521 	ret = match_string(trace_options, -1, cmp);
5522 	/* If no option could be set, test the specific tracer options */
5523 	if (ret < 0)
5524 		ret = set_tracer_option(tr, cmp, neg);
5525 	else
5526 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5527 
5528 	mutex_unlock(&trace_types_lock);
5529 	mutex_unlock(&event_mutex);
5530 
5531 	/*
5532 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5533 	 * turn it back into a space.
5534 	 */
5535 	if (orig_len > strlen(option))
5536 		option[strlen(option)] = ' ';
5537 
5538 	return ret;
5539 }
5540 
5541 static void __init apply_trace_boot_options(void)
5542 {
5543 	char *buf = trace_boot_options_buf;
5544 	char *option;
5545 
5546 	while (true) {
5547 		option = strsep(&buf, ",");
5548 
5549 		if (!option)
5550 			break;
5551 
5552 		if (*option)
5553 			trace_set_options(&global_trace, option);
5554 
5555 		/* Put back the comma to allow this to be called again */
5556 		if (buf)
5557 			*(buf - 1) = ',';
5558 	}
5559 }
5560 
5561 static ssize_t
5562 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5563 			size_t cnt, loff_t *ppos)
5564 {
5565 	struct seq_file *m = filp->private_data;
5566 	struct trace_array *tr = m->private;
5567 	char buf[64];
5568 	int ret;
5569 
5570 	if (cnt >= sizeof(buf))
5571 		return -EINVAL;
5572 
5573 	if (copy_from_user(buf, ubuf, cnt))
5574 		return -EFAULT;
5575 
5576 	buf[cnt] = 0;
5577 
5578 	ret = trace_set_options(tr, buf);
5579 	if (ret < 0)
5580 		return ret;
5581 
5582 	*ppos += cnt;
5583 
5584 	return cnt;
5585 }
5586 
5587 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5588 {
5589 	struct trace_array *tr = inode->i_private;
5590 	int ret;
5591 
5592 	ret = tracing_check_open_get_tr(tr);
5593 	if (ret)
5594 		return ret;
5595 
5596 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5597 	if (ret < 0)
5598 		trace_array_put(tr);
5599 
5600 	return ret;
5601 }
5602 
5603 static const struct file_operations tracing_iter_fops = {
5604 	.open		= tracing_trace_options_open,
5605 	.read		= seq_read,
5606 	.llseek		= seq_lseek,
5607 	.release	= tracing_single_release_tr,
5608 	.write		= tracing_trace_options_write,
5609 };
5610 
5611 static const char readme_msg[] =
5612 	"tracing mini-HOWTO:\n\n"
5613 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5614 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5615 	" Important files:\n"
5616 	"  trace\t\t\t- The static contents of the buffer\n"
5617 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5618 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5619 	"  current_tracer\t- function and latency tracers\n"
5620 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5621 	"  error_log\t- error log for failed commands (that support it)\n"
5622 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5623 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5624 	"  trace_clock\t\t- change the clock used to order events\n"
5625 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5626 	"      global:   Synced across CPUs but slows tracing down.\n"
5627 	"     counter:   Not a clock, but just an increment\n"
5628 	"      uptime:   Jiffy counter from time of boot\n"
5629 	"        perf:   Same clock that perf events use\n"
5630 #ifdef CONFIG_X86_64
5631 	"     x86-tsc:   TSC cycle counter\n"
5632 #endif
5633 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5634 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5635 	"    absolute:   Absolute (standalone) timestamp\n"
5636 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5637 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5638 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5639 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5640 	"\t\t\t  Remove sub-buffer with rmdir\n"
5641 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5642 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5643 	"\t\t\t  option name\n"
5644 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5645 #ifdef CONFIG_DYNAMIC_FTRACE
5646 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5647 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5648 	"\t\t\t  functions\n"
5649 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5650 	"\t     modules: Can select a group via module\n"
5651 	"\t      Format: :mod:<module-name>\n"
5652 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5653 	"\t    triggers: a command to perform when function is hit\n"
5654 	"\t      Format: <function>:<trigger>[:count]\n"
5655 	"\t     trigger: traceon, traceoff\n"
5656 	"\t\t      enable_event:<system>:<event>\n"
5657 	"\t\t      disable_event:<system>:<event>\n"
5658 #ifdef CONFIG_STACKTRACE
5659 	"\t\t      stacktrace\n"
5660 #endif
5661 #ifdef CONFIG_TRACER_SNAPSHOT
5662 	"\t\t      snapshot\n"
5663 #endif
5664 	"\t\t      dump\n"
5665 	"\t\t      cpudump\n"
5666 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5667 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5668 	"\t     The first one will disable tracing every time do_fault is hit\n"
5669 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5670 	"\t       The first time do trap is hit and it disables tracing, the\n"
5671 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5672 	"\t       the counter will not decrement. It only decrements when the\n"
5673 	"\t       trigger did work\n"
5674 	"\t     To remove trigger without count:\n"
5675 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5676 	"\t     To remove trigger with a count:\n"
5677 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5678 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5679 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5680 	"\t    modules: Can select a group via module command :mod:\n"
5681 	"\t    Does not accept triggers\n"
5682 #endif /* CONFIG_DYNAMIC_FTRACE */
5683 #ifdef CONFIG_FUNCTION_TRACER
5684 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5685 	"\t\t    (function)\n"
5686 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5687 	"\t\t    (function)\n"
5688 #endif
5689 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5690 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5691 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5692 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5693 #endif
5694 #ifdef CONFIG_TRACER_SNAPSHOT
5695 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5696 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5697 	"\t\t\t  information\n"
5698 #endif
5699 #ifdef CONFIG_STACK_TRACER
5700 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5701 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5702 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5703 	"\t\t\t  new trace)\n"
5704 #ifdef CONFIG_DYNAMIC_FTRACE
5705 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5706 	"\t\t\t  traces\n"
5707 #endif
5708 #endif /* CONFIG_STACK_TRACER */
5709 #ifdef CONFIG_DYNAMIC_EVENTS
5710 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5711 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5712 #endif
5713 #ifdef CONFIG_KPROBE_EVENTS
5714 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5715 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5716 #endif
5717 #ifdef CONFIG_UPROBE_EVENTS
5718 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5719 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5720 #endif
5721 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5722     defined(CONFIG_FPROBE_EVENTS)
5723 	"\t  accepts: event-definitions (one definition per line)\n"
5724 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5725 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5726 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5727 #endif
5728 #ifdef CONFIG_FPROBE_EVENTS
5729 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5730 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5731 #endif
5732 #ifdef CONFIG_HIST_TRIGGERS
5733 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5734 #endif
5735 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5736 	"\t           -:[<group>/][<event>]\n"
5737 #ifdef CONFIG_KPROBE_EVENTS
5738 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5739   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5740 #endif
5741 #ifdef CONFIG_UPROBE_EVENTS
5742   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5743 #endif
5744 	"\t     args: <name>=fetcharg[:type]\n"
5745 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5746 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5747 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5748 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5749 	"\t           <argname>[->field[->field|.field...]],\n"
5750 #else
5751 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5752 #endif
5753 #else
5754 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5755 #endif
5756 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5757 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5758 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5759 	"\t           symstr, <type>\\[<array-size>\\]\n"
5760 #ifdef CONFIG_HIST_TRIGGERS
5761 	"\t    field: <stype> <name>;\n"
5762 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5763 	"\t           [unsigned] char/int/long\n"
5764 #endif
5765 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5766 	"\t            of the <attached-group>/<attached-event>.\n"
5767 #endif
5768 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5769 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5770 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5771 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5772 	"\t\t\t  events\n"
5773 	"      filter\t\t- If set, only events passing filter are traced\n"
5774 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5775 	"\t\t\t  <event>:\n"
5776 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5777 	"      filter\t\t- If set, only events passing filter are traced\n"
5778 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5779 	"\t    Format: <trigger>[:count][if <filter>]\n"
5780 	"\t   trigger: traceon, traceoff\n"
5781 	"\t            enable_event:<system>:<event>\n"
5782 	"\t            disable_event:<system>:<event>\n"
5783 #ifdef CONFIG_HIST_TRIGGERS
5784 	"\t            enable_hist:<system>:<event>\n"
5785 	"\t            disable_hist:<system>:<event>\n"
5786 #endif
5787 #ifdef CONFIG_STACKTRACE
5788 	"\t\t    stacktrace\n"
5789 #endif
5790 #ifdef CONFIG_TRACER_SNAPSHOT
5791 	"\t\t    snapshot\n"
5792 #endif
5793 #ifdef CONFIG_HIST_TRIGGERS
5794 	"\t\t    hist (see below)\n"
5795 #endif
5796 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5797 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5798 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5799 	"\t                  events/block/block_unplug/trigger\n"
5800 	"\t   The first disables tracing every time block_unplug is hit.\n"
5801 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5802 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5803 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5804 	"\t   Like function triggers, the counter is only decremented if it\n"
5805 	"\t    enabled or disabled tracing.\n"
5806 	"\t   To remove a trigger without a count:\n"
5807 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5808 	"\t   To remove a trigger with a count:\n"
5809 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5810 	"\t   Filters can be ignored when removing a trigger.\n"
5811 #ifdef CONFIG_HIST_TRIGGERS
5812 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5813 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5814 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5815 	"\t            [:values=<field1[,field2,...]>]\n"
5816 	"\t            [:sort=<field1[,field2,...]>]\n"
5817 	"\t            [:size=#entries]\n"
5818 	"\t            [:pause][:continue][:clear]\n"
5819 	"\t            [:name=histname1]\n"
5820 	"\t            [:nohitcount]\n"
5821 	"\t            [:<handler>.<action>]\n"
5822 	"\t            [if <filter>]\n\n"
5823 	"\t    Note, special fields can be used as well:\n"
5824 	"\t            common_timestamp - to record current timestamp\n"
5825 	"\t            common_cpu - to record the CPU the event happened on\n"
5826 	"\n"
5827 	"\t    A hist trigger variable can be:\n"
5828 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5829 	"\t        - a reference to another variable e.g. y=$x,\n"
5830 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5831 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5832 	"\n"
5833 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5834 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5835 	"\t    variable reference, field or numeric literal.\n"
5836 	"\n"
5837 	"\t    When a matching event is hit, an entry is added to a hash\n"
5838 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5839 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5840 	"\t    correspond to fields in the event's format description.  Keys\n"
5841 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5842 	"\t    Compound keys consisting of up to two fields can be specified\n"
5843 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5844 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5845 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5846 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5847 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5848 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5849 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5850 	"\t    its histogram data will be shared with other triggers of the\n"
5851 	"\t    same name, and trigger hits will update this common data.\n\n"
5852 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5853 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5854 	"\t    triggers attached to an event, there will be a table for each\n"
5855 	"\t    trigger in the output.  The table displayed for a named\n"
5856 	"\t    trigger will be the same as any other instance having the\n"
5857 	"\t    same name.  The default format used to display a given field\n"
5858 	"\t    can be modified by appending any of the following modifiers\n"
5859 	"\t    to the field name, as applicable:\n\n"
5860 	"\t            .hex        display a number as a hex value\n"
5861 	"\t            .sym        display an address as a symbol\n"
5862 	"\t            .sym-offset display an address as a symbol and offset\n"
5863 	"\t            .execname   display a common_pid as a program name\n"
5864 	"\t            .syscall    display a syscall id as a syscall name\n"
5865 	"\t            .log2       display log2 value rather than raw number\n"
5866 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5867 	"\t            .usecs      display a common_timestamp in microseconds\n"
5868 	"\t            .percent    display a number of percentage value\n"
5869 	"\t            .graph      display a bar-graph of a value\n\n"
5870 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5871 	"\t    trigger or to start a hist trigger but not log any events\n"
5872 	"\t    until told to do so.  'continue' can be used to start or\n"
5873 	"\t    restart a paused hist trigger.\n\n"
5874 	"\t    The 'clear' parameter will clear the contents of a running\n"
5875 	"\t    hist trigger and leave its current paused/active state\n"
5876 	"\t    unchanged.\n\n"
5877 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5878 	"\t    raw hitcount in the histogram.\n\n"
5879 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5880 	"\t    have one event conditionally start and stop another event's\n"
5881 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5882 	"\t    the enable_event and disable_event triggers.\n\n"
5883 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5884 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5885 	"\t        <handler>.<action>\n\n"
5886 	"\t    The available handlers are:\n\n"
5887 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5888 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5889 	"\t        onchange(var)            - invoke action if var changes\n\n"
5890 	"\t    The available actions are:\n\n"
5891 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5892 	"\t        save(field,...)                      - save current event fields\n"
5893 #ifdef CONFIG_TRACER_SNAPSHOT
5894 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5895 #endif
5896 #ifdef CONFIG_SYNTH_EVENTS
5897 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5898 	"\t  Write into this file to define/undefine new synthetic events.\n"
5899 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5900 #endif
5901 #endif
5902 ;
5903 
5904 static ssize_t
5905 tracing_readme_read(struct file *filp, char __user *ubuf,
5906 		       size_t cnt, loff_t *ppos)
5907 {
5908 	return simple_read_from_buffer(ubuf, cnt, ppos,
5909 					readme_msg, strlen(readme_msg));
5910 }
5911 
5912 static const struct file_operations tracing_readme_fops = {
5913 	.open		= tracing_open_generic,
5914 	.read		= tracing_readme_read,
5915 	.llseek		= generic_file_llseek,
5916 };
5917 
5918 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5919 {
5920 	int pid = ++(*pos);
5921 
5922 	return trace_find_tgid_ptr(pid);
5923 }
5924 
5925 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5926 {
5927 	int pid = *pos;
5928 
5929 	return trace_find_tgid_ptr(pid);
5930 }
5931 
5932 static void saved_tgids_stop(struct seq_file *m, void *v)
5933 {
5934 }
5935 
5936 static int saved_tgids_show(struct seq_file *m, void *v)
5937 {
5938 	int *entry = (int *)v;
5939 	int pid = entry - tgid_map;
5940 	int tgid = *entry;
5941 
5942 	if (tgid == 0)
5943 		return SEQ_SKIP;
5944 
5945 	seq_printf(m, "%d %d\n", pid, tgid);
5946 	return 0;
5947 }
5948 
5949 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5950 	.start		= saved_tgids_start,
5951 	.stop		= saved_tgids_stop,
5952 	.next		= saved_tgids_next,
5953 	.show		= saved_tgids_show,
5954 };
5955 
5956 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5957 {
5958 	int ret;
5959 
5960 	ret = tracing_check_open_get_tr(NULL);
5961 	if (ret)
5962 		return ret;
5963 
5964 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5965 }
5966 
5967 
5968 static const struct file_operations tracing_saved_tgids_fops = {
5969 	.open		= tracing_saved_tgids_open,
5970 	.read		= seq_read,
5971 	.llseek		= seq_lseek,
5972 	.release	= seq_release,
5973 };
5974 
5975 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5976 {
5977 	unsigned int *ptr = v;
5978 
5979 	if (*pos || m->count)
5980 		ptr++;
5981 
5982 	(*pos)++;
5983 
5984 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5985 	     ptr++) {
5986 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5987 			continue;
5988 
5989 		return ptr;
5990 	}
5991 
5992 	return NULL;
5993 }
5994 
5995 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5996 {
5997 	void *v;
5998 	loff_t l = 0;
5999 
6000 	preempt_disable();
6001 	arch_spin_lock(&trace_cmdline_lock);
6002 
6003 	v = &savedcmd->map_cmdline_to_pid[0];
6004 	while (l <= *pos) {
6005 		v = saved_cmdlines_next(m, v, &l);
6006 		if (!v)
6007 			return NULL;
6008 	}
6009 
6010 	return v;
6011 }
6012 
6013 static void saved_cmdlines_stop(struct seq_file *m, void *v)
6014 {
6015 	arch_spin_unlock(&trace_cmdline_lock);
6016 	preempt_enable();
6017 }
6018 
6019 static int saved_cmdlines_show(struct seq_file *m, void *v)
6020 {
6021 	char buf[TASK_COMM_LEN];
6022 	unsigned int *pid = v;
6023 
6024 	__trace_find_cmdline(*pid, buf);
6025 	seq_printf(m, "%d %s\n", *pid, buf);
6026 	return 0;
6027 }
6028 
6029 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
6030 	.start		= saved_cmdlines_start,
6031 	.next		= saved_cmdlines_next,
6032 	.stop		= saved_cmdlines_stop,
6033 	.show		= saved_cmdlines_show,
6034 };
6035 
6036 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
6037 {
6038 	int ret;
6039 
6040 	ret = tracing_check_open_get_tr(NULL);
6041 	if (ret)
6042 		return ret;
6043 
6044 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
6045 }
6046 
6047 static const struct file_operations tracing_saved_cmdlines_fops = {
6048 	.open		= tracing_saved_cmdlines_open,
6049 	.read		= seq_read,
6050 	.llseek		= seq_lseek,
6051 	.release	= seq_release,
6052 };
6053 
6054 static ssize_t
6055 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
6056 				 size_t cnt, loff_t *ppos)
6057 {
6058 	char buf[64];
6059 	int r;
6060 
6061 	preempt_disable();
6062 	arch_spin_lock(&trace_cmdline_lock);
6063 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
6064 	arch_spin_unlock(&trace_cmdline_lock);
6065 	preempt_enable();
6066 
6067 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6068 }
6069 
6070 static int tracing_resize_saved_cmdlines(unsigned int val)
6071 {
6072 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
6073 
6074 	s = allocate_cmdlines_buffer(val);
6075 	if (!s)
6076 		return -ENOMEM;
6077 
6078 	preempt_disable();
6079 	arch_spin_lock(&trace_cmdline_lock);
6080 	savedcmd_temp = savedcmd;
6081 	savedcmd = s;
6082 	arch_spin_unlock(&trace_cmdline_lock);
6083 	preempt_enable();
6084 	free_saved_cmdlines_buffer(savedcmd_temp);
6085 
6086 	return 0;
6087 }
6088 
6089 static ssize_t
6090 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6091 				  size_t cnt, loff_t *ppos)
6092 {
6093 	unsigned long val;
6094 	int ret;
6095 
6096 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6097 	if (ret)
6098 		return ret;
6099 
6100 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
6101 	if (!val || val > PID_MAX_DEFAULT)
6102 		return -EINVAL;
6103 
6104 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
6105 	if (ret < 0)
6106 		return ret;
6107 
6108 	*ppos += cnt;
6109 
6110 	return cnt;
6111 }
6112 
6113 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6114 	.open		= tracing_open_generic,
6115 	.read		= tracing_saved_cmdlines_size_read,
6116 	.write		= tracing_saved_cmdlines_size_write,
6117 };
6118 
6119 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6120 static union trace_eval_map_item *
6121 update_eval_map(union trace_eval_map_item *ptr)
6122 {
6123 	if (!ptr->map.eval_string) {
6124 		if (ptr->tail.next) {
6125 			ptr = ptr->tail.next;
6126 			/* Set ptr to the next real item (skip head) */
6127 			ptr++;
6128 		} else
6129 			return NULL;
6130 	}
6131 	return ptr;
6132 }
6133 
6134 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6135 {
6136 	union trace_eval_map_item *ptr = v;
6137 
6138 	/*
6139 	 * Paranoid! If ptr points to end, we don't want to increment past it.
6140 	 * This really should never happen.
6141 	 */
6142 	(*pos)++;
6143 	ptr = update_eval_map(ptr);
6144 	if (WARN_ON_ONCE(!ptr))
6145 		return NULL;
6146 
6147 	ptr++;
6148 	ptr = update_eval_map(ptr);
6149 
6150 	return ptr;
6151 }
6152 
6153 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6154 {
6155 	union trace_eval_map_item *v;
6156 	loff_t l = 0;
6157 
6158 	mutex_lock(&trace_eval_mutex);
6159 
6160 	v = trace_eval_maps;
6161 	if (v)
6162 		v++;
6163 
6164 	while (v && l < *pos) {
6165 		v = eval_map_next(m, v, &l);
6166 	}
6167 
6168 	return v;
6169 }
6170 
6171 static void eval_map_stop(struct seq_file *m, void *v)
6172 {
6173 	mutex_unlock(&trace_eval_mutex);
6174 }
6175 
6176 static int eval_map_show(struct seq_file *m, void *v)
6177 {
6178 	union trace_eval_map_item *ptr = v;
6179 
6180 	seq_printf(m, "%s %ld (%s)\n",
6181 		   ptr->map.eval_string, ptr->map.eval_value,
6182 		   ptr->map.system);
6183 
6184 	return 0;
6185 }
6186 
6187 static const struct seq_operations tracing_eval_map_seq_ops = {
6188 	.start		= eval_map_start,
6189 	.next		= eval_map_next,
6190 	.stop		= eval_map_stop,
6191 	.show		= eval_map_show,
6192 };
6193 
6194 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6195 {
6196 	int ret;
6197 
6198 	ret = tracing_check_open_get_tr(NULL);
6199 	if (ret)
6200 		return ret;
6201 
6202 	return seq_open(filp, &tracing_eval_map_seq_ops);
6203 }
6204 
6205 static const struct file_operations tracing_eval_map_fops = {
6206 	.open		= tracing_eval_map_open,
6207 	.read		= seq_read,
6208 	.llseek		= seq_lseek,
6209 	.release	= seq_release,
6210 };
6211 
6212 static inline union trace_eval_map_item *
6213 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6214 {
6215 	/* Return tail of array given the head */
6216 	return ptr + ptr->head.length + 1;
6217 }
6218 
6219 static void
6220 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6221 			   int len)
6222 {
6223 	struct trace_eval_map **stop;
6224 	struct trace_eval_map **map;
6225 	union trace_eval_map_item *map_array;
6226 	union trace_eval_map_item *ptr;
6227 
6228 	stop = start + len;
6229 
6230 	/*
6231 	 * The trace_eval_maps contains the map plus a head and tail item,
6232 	 * where the head holds the module and length of array, and the
6233 	 * tail holds a pointer to the next list.
6234 	 */
6235 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6236 	if (!map_array) {
6237 		pr_warn("Unable to allocate trace eval mapping\n");
6238 		return;
6239 	}
6240 
6241 	mutex_lock(&trace_eval_mutex);
6242 
6243 	if (!trace_eval_maps)
6244 		trace_eval_maps = map_array;
6245 	else {
6246 		ptr = trace_eval_maps;
6247 		for (;;) {
6248 			ptr = trace_eval_jmp_to_tail(ptr);
6249 			if (!ptr->tail.next)
6250 				break;
6251 			ptr = ptr->tail.next;
6252 
6253 		}
6254 		ptr->tail.next = map_array;
6255 	}
6256 	map_array->head.mod = mod;
6257 	map_array->head.length = len;
6258 	map_array++;
6259 
6260 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6261 		map_array->map = **map;
6262 		map_array++;
6263 	}
6264 	memset(map_array, 0, sizeof(*map_array));
6265 
6266 	mutex_unlock(&trace_eval_mutex);
6267 }
6268 
6269 static void trace_create_eval_file(struct dentry *d_tracer)
6270 {
6271 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6272 			  NULL, &tracing_eval_map_fops);
6273 }
6274 
6275 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6276 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6277 static inline void trace_insert_eval_map_file(struct module *mod,
6278 			      struct trace_eval_map **start, int len) { }
6279 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6280 
6281 static void trace_insert_eval_map(struct module *mod,
6282 				  struct trace_eval_map **start, int len)
6283 {
6284 	struct trace_eval_map **map;
6285 
6286 	if (len <= 0)
6287 		return;
6288 
6289 	map = start;
6290 
6291 	trace_event_eval_update(map, len);
6292 
6293 	trace_insert_eval_map_file(mod, start, len);
6294 }
6295 
6296 static ssize_t
6297 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6298 		       size_t cnt, loff_t *ppos)
6299 {
6300 	struct trace_array *tr = filp->private_data;
6301 	char buf[MAX_TRACER_SIZE+2];
6302 	int r;
6303 
6304 	mutex_lock(&trace_types_lock);
6305 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6306 	mutex_unlock(&trace_types_lock);
6307 
6308 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6309 }
6310 
6311 int tracer_init(struct tracer *t, struct trace_array *tr)
6312 {
6313 	tracing_reset_online_cpus(&tr->array_buffer);
6314 	return t->init(tr);
6315 }
6316 
6317 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6318 {
6319 	int cpu;
6320 
6321 	for_each_tracing_cpu(cpu)
6322 		per_cpu_ptr(buf->data, cpu)->entries = val;
6323 }
6324 
6325 static void update_buffer_entries(struct array_buffer *buf, int cpu)
6326 {
6327 	if (cpu == RING_BUFFER_ALL_CPUS) {
6328 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6329 	} else {
6330 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6331 	}
6332 }
6333 
6334 #ifdef CONFIG_TRACER_MAX_TRACE
6335 /* resize @tr's buffer to the size of @size_tr's entries */
6336 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6337 					struct array_buffer *size_buf, int cpu_id)
6338 {
6339 	int cpu, ret = 0;
6340 
6341 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6342 		for_each_tracing_cpu(cpu) {
6343 			ret = ring_buffer_resize(trace_buf->buffer,
6344 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6345 			if (ret < 0)
6346 				break;
6347 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6348 				per_cpu_ptr(size_buf->data, cpu)->entries;
6349 		}
6350 	} else {
6351 		ret = ring_buffer_resize(trace_buf->buffer,
6352 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6353 		if (ret == 0)
6354 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6355 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6356 	}
6357 
6358 	return ret;
6359 }
6360 #endif /* CONFIG_TRACER_MAX_TRACE */
6361 
6362 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6363 					unsigned long size, int cpu)
6364 {
6365 	int ret;
6366 
6367 	/*
6368 	 * If kernel or user changes the size of the ring buffer
6369 	 * we use the size that was given, and we can forget about
6370 	 * expanding it later.
6371 	 */
6372 	ring_buffer_expanded = true;
6373 
6374 	/* May be called before buffers are initialized */
6375 	if (!tr->array_buffer.buffer)
6376 		return 0;
6377 
6378 	/* Do not allow tracing while resizing ring buffer */
6379 	tracing_stop_tr(tr);
6380 
6381 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6382 	if (ret < 0)
6383 		goto out_start;
6384 
6385 #ifdef CONFIG_TRACER_MAX_TRACE
6386 	if (!tr->allocated_snapshot)
6387 		goto out;
6388 
6389 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6390 	if (ret < 0) {
6391 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6392 						     &tr->array_buffer, cpu);
6393 		if (r < 0) {
6394 			/*
6395 			 * AARGH! We are left with different
6396 			 * size max buffer!!!!
6397 			 * The max buffer is our "snapshot" buffer.
6398 			 * When a tracer needs a snapshot (one of the
6399 			 * latency tracers), it swaps the max buffer
6400 			 * with the saved snap shot. We succeeded to
6401 			 * update the size of the main buffer, but failed to
6402 			 * update the size of the max buffer. But when we tried
6403 			 * to reset the main buffer to the original size, we
6404 			 * failed there too. This is very unlikely to
6405 			 * happen, but if it does, warn and kill all
6406 			 * tracing.
6407 			 */
6408 			WARN_ON(1);
6409 			tracing_disabled = 1;
6410 		}
6411 		goto out_start;
6412 	}
6413 
6414 	update_buffer_entries(&tr->max_buffer, cpu);
6415 
6416  out:
6417 #endif /* CONFIG_TRACER_MAX_TRACE */
6418 
6419 	update_buffer_entries(&tr->array_buffer, cpu);
6420  out_start:
6421 	tracing_start_tr(tr);
6422 	return ret;
6423 }
6424 
6425 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6426 				  unsigned long size, int cpu_id)
6427 {
6428 	int ret;
6429 
6430 	mutex_lock(&trace_types_lock);
6431 
6432 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6433 		/* make sure, this cpu is enabled in the mask */
6434 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6435 			ret = -EINVAL;
6436 			goto out;
6437 		}
6438 	}
6439 
6440 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6441 	if (ret < 0)
6442 		ret = -ENOMEM;
6443 
6444 out:
6445 	mutex_unlock(&trace_types_lock);
6446 
6447 	return ret;
6448 }
6449 
6450 
6451 /**
6452  * tracing_update_buffers - used by tracing facility to expand ring buffers
6453  *
6454  * To save on memory when the tracing is never used on a system with it
6455  * configured in. The ring buffers are set to a minimum size. But once
6456  * a user starts to use the tracing facility, then they need to grow
6457  * to their default size.
6458  *
6459  * This function is to be called when a tracer is about to be used.
6460  */
6461 int tracing_update_buffers(void)
6462 {
6463 	int ret = 0;
6464 
6465 	mutex_lock(&trace_types_lock);
6466 	if (!ring_buffer_expanded)
6467 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6468 						RING_BUFFER_ALL_CPUS);
6469 	mutex_unlock(&trace_types_lock);
6470 
6471 	return ret;
6472 }
6473 
6474 struct trace_option_dentry;
6475 
6476 static void
6477 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6478 
6479 /*
6480  * Used to clear out the tracer before deletion of an instance.
6481  * Must have trace_types_lock held.
6482  */
6483 static void tracing_set_nop(struct trace_array *tr)
6484 {
6485 	if (tr->current_trace == &nop_trace)
6486 		return;
6487 
6488 	tr->current_trace->enabled--;
6489 
6490 	if (tr->current_trace->reset)
6491 		tr->current_trace->reset(tr);
6492 
6493 	tr->current_trace = &nop_trace;
6494 }
6495 
6496 static bool tracer_options_updated;
6497 
6498 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6499 {
6500 	/* Only enable if the directory has been created already. */
6501 	if (!tr->dir)
6502 		return;
6503 
6504 	/* Only create trace option files after update_tracer_options finish */
6505 	if (!tracer_options_updated)
6506 		return;
6507 
6508 	create_trace_option_files(tr, t);
6509 }
6510 
6511 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6512 {
6513 	struct tracer *t;
6514 #ifdef CONFIG_TRACER_MAX_TRACE
6515 	bool had_max_tr;
6516 #endif
6517 	int ret = 0;
6518 
6519 	mutex_lock(&trace_types_lock);
6520 
6521 	if (!ring_buffer_expanded) {
6522 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6523 						RING_BUFFER_ALL_CPUS);
6524 		if (ret < 0)
6525 			goto out;
6526 		ret = 0;
6527 	}
6528 
6529 	for (t = trace_types; t; t = t->next) {
6530 		if (strcmp(t->name, buf) == 0)
6531 			break;
6532 	}
6533 	if (!t) {
6534 		ret = -EINVAL;
6535 		goto out;
6536 	}
6537 	if (t == tr->current_trace)
6538 		goto out;
6539 
6540 #ifdef CONFIG_TRACER_SNAPSHOT
6541 	if (t->use_max_tr) {
6542 		local_irq_disable();
6543 		arch_spin_lock(&tr->max_lock);
6544 		if (tr->cond_snapshot)
6545 			ret = -EBUSY;
6546 		arch_spin_unlock(&tr->max_lock);
6547 		local_irq_enable();
6548 		if (ret)
6549 			goto out;
6550 	}
6551 #endif
6552 	/* Some tracers won't work on kernel command line */
6553 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6554 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6555 			t->name);
6556 		goto out;
6557 	}
6558 
6559 	/* Some tracers are only allowed for the top level buffer */
6560 	if (!trace_ok_for_array(t, tr)) {
6561 		ret = -EINVAL;
6562 		goto out;
6563 	}
6564 
6565 	/* If trace pipe files are being read, we can't change the tracer */
6566 	if (tr->trace_ref) {
6567 		ret = -EBUSY;
6568 		goto out;
6569 	}
6570 
6571 	trace_branch_disable();
6572 
6573 	tr->current_trace->enabled--;
6574 
6575 	if (tr->current_trace->reset)
6576 		tr->current_trace->reset(tr);
6577 
6578 #ifdef CONFIG_TRACER_MAX_TRACE
6579 	had_max_tr = tr->current_trace->use_max_tr;
6580 
6581 	/* Current trace needs to be nop_trace before synchronize_rcu */
6582 	tr->current_trace = &nop_trace;
6583 
6584 	if (had_max_tr && !t->use_max_tr) {
6585 		/*
6586 		 * We need to make sure that the update_max_tr sees that
6587 		 * current_trace changed to nop_trace to keep it from
6588 		 * swapping the buffers after we resize it.
6589 		 * The update_max_tr is called from interrupts disabled
6590 		 * so a synchronized_sched() is sufficient.
6591 		 */
6592 		synchronize_rcu();
6593 		free_snapshot(tr);
6594 	}
6595 
6596 	if (t->use_max_tr && !tr->allocated_snapshot) {
6597 		ret = tracing_alloc_snapshot_instance(tr);
6598 		if (ret < 0)
6599 			goto out;
6600 	}
6601 #else
6602 	tr->current_trace = &nop_trace;
6603 #endif
6604 
6605 	if (t->init) {
6606 		ret = tracer_init(t, tr);
6607 		if (ret)
6608 			goto out;
6609 	}
6610 
6611 	tr->current_trace = t;
6612 	tr->current_trace->enabled++;
6613 	trace_branch_enable(tr);
6614  out:
6615 	mutex_unlock(&trace_types_lock);
6616 
6617 	return ret;
6618 }
6619 
6620 static ssize_t
6621 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6622 			size_t cnt, loff_t *ppos)
6623 {
6624 	struct trace_array *tr = filp->private_data;
6625 	char buf[MAX_TRACER_SIZE+1];
6626 	char *name;
6627 	size_t ret;
6628 	int err;
6629 
6630 	ret = cnt;
6631 
6632 	if (cnt > MAX_TRACER_SIZE)
6633 		cnt = MAX_TRACER_SIZE;
6634 
6635 	if (copy_from_user(buf, ubuf, cnt))
6636 		return -EFAULT;
6637 
6638 	buf[cnt] = 0;
6639 
6640 	name = strim(buf);
6641 
6642 	err = tracing_set_tracer(tr, name);
6643 	if (err)
6644 		return err;
6645 
6646 	*ppos += ret;
6647 
6648 	return ret;
6649 }
6650 
6651 static ssize_t
6652 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6653 		   size_t cnt, loff_t *ppos)
6654 {
6655 	char buf[64];
6656 	int r;
6657 
6658 	r = snprintf(buf, sizeof(buf), "%ld\n",
6659 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6660 	if (r > sizeof(buf))
6661 		r = sizeof(buf);
6662 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6663 }
6664 
6665 static ssize_t
6666 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6667 		    size_t cnt, loff_t *ppos)
6668 {
6669 	unsigned long val;
6670 	int ret;
6671 
6672 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6673 	if (ret)
6674 		return ret;
6675 
6676 	*ptr = val * 1000;
6677 
6678 	return cnt;
6679 }
6680 
6681 static ssize_t
6682 tracing_thresh_read(struct file *filp, char __user *ubuf,
6683 		    size_t cnt, loff_t *ppos)
6684 {
6685 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6686 }
6687 
6688 static ssize_t
6689 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6690 		     size_t cnt, loff_t *ppos)
6691 {
6692 	struct trace_array *tr = filp->private_data;
6693 	int ret;
6694 
6695 	mutex_lock(&trace_types_lock);
6696 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6697 	if (ret < 0)
6698 		goto out;
6699 
6700 	if (tr->current_trace->update_thresh) {
6701 		ret = tr->current_trace->update_thresh(tr);
6702 		if (ret < 0)
6703 			goto out;
6704 	}
6705 
6706 	ret = cnt;
6707 out:
6708 	mutex_unlock(&trace_types_lock);
6709 
6710 	return ret;
6711 }
6712 
6713 #ifdef CONFIG_TRACER_MAX_TRACE
6714 
6715 static ssize_t
6716 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6717 		     size_t cnt, loff_t *ppos)
6718 {
6719 	struct trace_array *tr = filp->private_data;
6720 
6721 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6722 }
6723 
6724 static ssize_t
6725 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6726 		      size_t cnt, loff_t *ppos)
6727 {
6728 	struct trace_array *tr = filp->private_data;
6729 
6730 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6731 }
6732 
6733 #endif
6734 
6735 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6736 {
6737 	if (cpu == RING_BUFFER_ALL_CPUS) {
6738 		if (cpumask_empty(tr->pipe_cpumask)) {
6739 			cpumask_setall(tr->pipe_cpumask);
6740 			return 0;
6741 		}
6742 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6743 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6744 		return 0;
6745 	}
6746 	return -EBUSY;
6747 }
6748 
6749 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6750 {
6751 	if (cpu == RING_BUFFER_ALL_CPUS) {
6752 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6753 		cpumask_clear(tr->pipe_cpumask);
6754 	} else {
6755 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6756 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6757 	}
6758 }
6759 
6760 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6761 {
6762 	struct trace_array *tr = inode->i_private;
6763 	struct trace_iterator *iter;
6764 	int cpu;
6765 	int ret;
6766 
6767 	ret = tracing_check_open_get_tr(tr);
6768 	if (ret)
6769 		return ret;
6770 
6771 	mutex_lock(&trace_types_lock);
6772 	cpu = tracing_get_cpu(inode);
6773 	ret = open_pipe_on_cpu(tr, cpu);
6774 	if (ret)
6775 		goto fail_pipe_on_cpu;
6776 
6777 	/* create a buffer to store the information to pass to userspace */
6778 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6779 	if (!iter) {
6780 		ret = -ENOMEM;
6781 		goto fail_alloc_iter;
6782 	}
6783 
6784 	trace_seq_init(&iter->seq);
6785 	iter->trace = tr->current_trace;
6786 
6787 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6788 		ret = -ENOMEM;
6789 		goto fail;
6790 	}
6791 
6792 	/* trace pipe does not show start of buffer */
6793 	cpumask_setall(iter->started);
6794 
6795 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6796 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6797 
6798 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6799 	if (trace_clocks[tr->clock_id].in_ns)
6800 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6801 
6802 	iter->tr = tr;
6803 	iter->array_buffer = &tr->array_buffer;
6804 	iter->cpu_file = cpu;
6805 	mutex_init(&iter->mutex);
6806 	filp->private_data = iter;
6807 
6808 	if (iter->trace->pipe_open)
6809 		iter->trace->pipe_open(iter);
6810 
6811 	nonseekable_open(inode, filp);
6812 
6813 	tr->trace_ref++;
6814 
6815 	mutex_unlock(&trace_types_lock);
6816 	return ret;
6817 
6818 fail:
6819 	kfree(iter);
6820 fail_alloc_iter:
6821 	close_pipe_on_cpu(tr, cpu);
6822 fail_pipe_on_cpu:
6823 	__trace_array_put(tr);
6824 	mutex_unlock(&trace_types_lock);
6825 	return ret;
6826 }
6827 
6828 static int tracing_release_pipe(struct inode *inode, struct file *file)
6829 {
6830 	struct trace_iterator *iter = file->private_data;
6831 	struct trace_array *tr = inode->i_private;
6832 
6833 	mutex_lock(&trace_types_lock);
6834 
6835 	tr->trace_ref--;
6836 
6837 	if (iter->trace->pipe_close)
6838 		iter->trace->pipe_close(iter);
6839 	close_pipe_on_cpu(tr, iter->cpu_file);
6840 	mutex_unlock(&trace_types_lock);
6841 
6842 	free_trace_iter_content(iter);
6843 	kfree(iter);
6844 
6845 	trace_array_put(tr);
6846 
6847 	return 0;
6848 }
6849 
6850 static __poll_t
6851 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6852 {
6853 	struct trace_array *tr = iter->tr;
6854 
6855 	/* Iterators are static, they should be filled or empty */
6856 	if (trace_buffer_iter(iter, iter->cpu_file))
6857 		return EPOLLIN | EPOLLRDNORM;
6858 
6859 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6860 		/*
6861 		 * Always select as readable when in blocking mode
6862 		 */
6863 		return EPOLLIN | EPOLLRDNORM;
6864 	else
6865 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6866 					     filp, poll_table, iter->tr->buffer_percent);
6867 }
6868 
6869 static __poll_t
6870 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6871 {
6872 	struct trace_iterator *iter = filp->private_data;
6873 
6874 	return trace_poll(iter, filp, poll_table);
6875 }
6876 
6877 /* Must be called with iter->mutex held. */
6878 static int tracing_wait_pipe(struct file *filp)
6879 {
6880 	struct trace_iterator *iter = filp->private_data;
6881 	int ret;
6882 
6883 	while (trace_empty(iter)) {
6884 
6885 		if ((filp->f_flags & O_NONBLOCK)) {
6886 			return -EAGAIN;
6887 		}
6888 
6889 		/*
6890 		 * We block until we read something and tracing is disabled.
6891 		 * We still block if tracing is disabled, but we have never
6892 		 * read anything. This allows a user to cat this file, and
6893 		 * then enable tracing. But after we have read something,
6894 		 * we give an EOF when tracing is again disabled.
6895 		 *
6896 		 * iter->pos will be 0 if we haven't read anything.
6897 		 */
6898 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6899 			break;
6900 
6901 		mutex_unlock(&iter->mutex);
6902 
6903 		ret = wait_on_pipe(iter, 0);
6904 
6905 		mutex_lock(&iter->mutex);
6906 
6907 		if (ret)
6908 			return ret;
6909 	}
6910 
6911 	return 1;
6912 }
6913 
6914 /*
6915  * Consumer reader.
6916  */
6917 static ssize_t
6918 tracing_read_pipe(struct file *filp, char __user *ubuf,
6919 		  size_t cnt, loff_t *ppos)
6920 {
6921 	struct trace_iterator *iter = filp->private_data;
6922 	ssize_t sret;
6923 
6924 	/*
6925 	 * Avoid more than one consumer on a single file descriptor
6926 	 * This is just a matter of traces coherency, the ring buffer itself
6927 	 * is protected.
6928 	 */
6929 	mutex_lock(&iter->mutex);
6930 
6931 	/* return any leftover data */
6932 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6933 	if (sret != -EBUSY)
6934 		goto out;
6935 
6936 	trace_seq_init(&iter->seq);
6937 
6938 	if (iter->trace->read) {
6939 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6940 		if (sret)
6941 			goto out;
6942 	}
6943 
6944 waitagain:
6945 	sret = tracing_wait_pipe(filp);
6946 	if (sret <= 0)
6947 		goto out;
6948 
6949 	/* stop when tracing is finished */
6950 	if (trace_empty(iter)) {
6951 		sret = 0;
6952 		goto out;
6953 	}
6954 
6955 	if (cnt >= PAGE_SIZE)
6956 		cnt = PAGE_SIZE - 1;
6957 
6958 	/* reset all but tr, trace, and overruns */
6959 	trace_iterator_reset(iter);
6960 	cpumask_clear(iter->started);
6961 	trace_seq_init(&iter->seq);
6962 
6963 	trace_event_read_lock();
6964 	trace_access_lock(iter->cpu_file);
6965 	while (trace_find_next_entry_inc(iter) != NULL) {
6966 		enum print_line_t ret;
6967 		int save_len = iter->seq.seq.len;
6968 
6969 		ret = print_trace_line(iter);
6970 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6971 			/*
6972 			 * If one print_trace_line() fills entire trace_seq in one shot,
6973 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6974 			 * In this case, we need to consume it, otherwise, loop will peek
6975 			 * this event next time, resulting in an infinite loop.
6976 			 */
6977 			if (save_len == 0) {
6978 				iter->seq.full = 0;
6979 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6980 				trace_consume(iter);
6981 				break;
6982 			}
6983 
6984 			/* In other cases, don't print partial lines */
6985 			iter->seq.seq.len = save_len;
6986 			break;
6987 		}
6988 		if (ret != TRACE_TYPE_NO_CONSUME)
6989 			trace_consume(iter);
6990 
6991 		if (trace_seq_used(&iter->seq) >= cnt)
6992 			break;
6993 
6994 		/*
6995 		 * Setting the full flag means we reached the trace_seq buffer
6996 		 * size and we should leave by partial output condition above.
6997 		 * One of the trace_seq_* functions is not used properly.
6998 		 */
6999 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
7000 			  iter->ent->type);
7001 	}
7002 	trace_access_unlock(iter->cpu_file);
7003 	trace_event_read_unlock();
7004 
7005 	/* Now copy what we have to the user */
7006 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
7007 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
7008 		trace_seq_init(&iter->seq);
7009 
7010 	/*
7011 	 * If there was nothing to send to user, in spite of consuming trace
7012 	 * entries, go back to wait for more entries.
7013 	 */
7014 	if (sret == -EBUSY)
7015 		goto waitagain;
7016 
7017 out:
7018 	mutex_unlock(&iter->mutex);
7019 
7020 	return sret;
7021 }
7022 
7023 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
7024 				     unsigned int idx)
7025 {
7026 	__free_page(spd->pages[idx]);
7027 }
7028 
7029 static size_t
7030 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
7031 {
7032 	size_t count;
7033 	int save_len;
7034 	int ret;
7035 
7036 	/* Seq buffer is page-sized, exactly what we need. */
7037 	for (;;) {
7038 		save_len = iter->seq.seq.len;
7039 		ret = print_trace_line(iter);
7040 
7041 		if (trace_seq_has_overflowed(&iter->seq)) {
7042 			iter->seq.seq.len = save_len;
7043 			break;
7044 		}
7045 
7046 		/*
7047 		 * This should not be hit, because it should only
7048 		 * be set if the iter->seq overflowed. But check it
7049 		 * anyway to be safe.
7050 		 */
7051 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
7052 			iter->seq.seq.len = save_len;
7053 			break;
7054 		}
7055 
7056 		count = trace_seq_used(&iter->seq) - save_len;
7057 		if (rem < count) {
7058 			rem = 0;
7059 			iter->seq.seq.len = save_len;
7060 			break;
7061 		}
7062 
7063 		if (ret != TRACE_TYPE_NO_CONSUME)
7064 			trace_consume(iter);
7065 		rem -= count;
7066 		if (!trace_find_next_entry_inc(iter))	{
7067 			rem = 0;
7068 			iter->ent = NULL;
7069 			break;
7070 		}
7071 	}
7072 
7073 	return rem;
7074 }
7075 
7076 static ssize_t tracing_splice_read_pipe(struct file *filp,
7077 					loff_t *ppos,
7078 					struct pipe_inode_info *pipe,
7079 					size_t len,
7080 					unsigned int flags)
7081 {
7082 	struct page *pages_def[PIPE_DEF_BUFFERS];
7083 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7084 	struct trace_iterator *iter = filp->private_data;
7085 	struct splice_pipe_desc spd = {
7086 		.pages		= pages_def,
7087 		.partial	= partial_def,
7088 		.nr_pages	= 0, /* This gets updated below. */
7089 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7090 		.ops		= &default_pipe_buf_ops,
7091 		.spd_release	= tracing_spd_release_pipe,
7092 	};
7093 	ssize_t ret;
7094 	size_t rem;
7095 	unsigned int i;
7096 
7097 	if (splice_grow_spd(pipe, &spd))
7098 		return -ENOMEM;
7099 
7100 	mutex_lock(&iter->mutex);
7101 
7102 	if (iter->trace->splice_read) {
7103 		ret = iter->trace->splice_read(iter, filp,
7104 					       ppos, pipe, len, flags);
7105 		if (ret)
7106 			goto out_err;
7107 	}
7108 
7109 	ret = tracing_wait_pipe(filp);
7110 	if (ret <= 0)
7111 		goto out_err;
7112 
7113 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7114 		ret = -EFAULT;
7115 		goto out_err;
7116 	}
7117 
7118 	trace_event_read_lock();
7119 	trace_access_lock(iter->cpu_file);
7120 
7121 	/* Fill as many pages as possible. */
7122 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7123 		spd.pages[i] = alloc_page(GFP_KERNEL);
7124 		if (!spd.pages[i])
7125 			break;
7126 
7127 		rem = tracing_fill_pipe_page(rem, iter);
7128 
7129 		/* Copy the data into the page, so we can start over. */
7130 		ret = trace_seq_to_buffer(&iter->seq,
7131 					  page_address(spd.pages[i]),
7132 					  trace_seq_used(&iter->seq));
7133 		if (ret < 0) {
7134 			__free_page(spd.pages[i]);
7135 			break;
7136 		}
7137 		spd.partial[i].offset = 0;
7138 		spd.partial[i].len = trace_seq_used(&iter->seq);
7139 
7140 		trace_seq_init(&iter->seq);
7141 	}
7142 
7143 	trace_access_unlock(iter->cpu_file);
7144 	trace_event_read_unlock();
7145 	mutex_unlock(&iter->mutex);
7146 
7147 	spd.nr_pages = i;
7148 
7149 	if (i)
7150 		ret = splice_to_pipe(pipe, &spd);
7151 	else
7152 		ret = 0;
7153 out:
7154 	splice_shrink_spd(&spd);
7155 	return ret;
7156 
7157 out_err:
7158 	mutex_unlock(&iter->mutex);
7159 	goto out;
7160 }
7161 
7162 static ssize_t
7163 tracing_entries_read(struct file *filp, char __user *ubuf,
7164 		     size_t cnt, loff_t *ppos)
7165 {
7166 	struct inode *inode = file_inode(filp);
7167 	struct trace_array *tr = inode->i_private;
7168 	int cpu = tracing_get_cpu(inode);
7169 	char buf[64];
7170 	int r = 0;
7171 	ssize_t ret;
7172 
7173 	mutex_lock(&trace_types_lock);
7174 
7175 	if (cpu == RING_BUFFER_ALL_CPUS) {
7176 		int cpu, buf_size_same;
7177 		unsigned long size;
7178 
7179 		size = 0;
7180 		buf_size_same = 1;
7181 		/* check if all cpu sizes are same */
7182 		for_each_tracing_cpu(cpu) {
7183 			/* fill in the size from first enabled cpu */
7184 			if (size == 0)
7185 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7186 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7187 				buf_size_same = 0;
7188 				break;
7189 			}
7190 		}
7191 
7192 		if (buf_size_same) {
7193 			if (!ring_buffer_expanded)
7194 				r = sprintf(buf, "%lu (expanded: %lu)\n",
7195 					    size >> 10,
7196 					    trace_buf_size >> 10);
7197 			else
7198 				r = sprintf(buf, "%lu\n", size >> 10);
7199 		} else
7200 			r = sprintf(buf, "X\n");
7201 	} else
7202 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7203 
7204 	mutex_unlock(&trace_types_lock);
7205 
7206 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7207 	return ret;
7208 }
7209 
7210 static ssize_t
7211 tracing_entries_write(struct file *filp, const char __user *ubuf,
7212 		      size_t cnt, loff_t *ppos)
7213 {
7214 	struct inode *inode = file_inode(filp);
7215 	struct trace_array *tr = inode->i_private;
7216 	unsigned long val;
7217 	int ret;
7218 
7219 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7220 	if (ret)
7221 		return ret;
7222 
7223 	/* must have at least 1 entry */
7224 	if (!val)
7225 		return -EINVAL;
7226 
7227 	/* value is in KB */
7228 	val <<= 10;
7229 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7230 	if (ret < 0)
7231 		return ret;
7232 
7233 	*ppos += cnt;
7234 
7235 	return cnt;
7236 }
7237 
7238 static ssize_t
7239 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7240 				size_t cnt, loff_t *ppos)
7241 {
7242 	struct trace_array *tr = filp->private_data;
7243 	char buf[64];
7244 	int r, cpu;
7245 	unsigned long size = 0, expanded_size = 0;
7246 
7247 	mutex_lock(&trace_types_lock);
7248 	for_each_tracing_cpu(cpu) {
7249 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7250 		if (!ring_buffer_expanded)
7251 			expanded_size += trace_buf_size >> 10;
7252 	}
7253 	if (ring_buffer_expanded)
7254 		r = sprintf(buf, "%lu\n", size);
7255 	else
7256 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7257 	mutex_unlock(&trace_types_lock);
7258 
7259 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7260 }
7261 
7262 static ssize_t
7263 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7264 			  size_t cnt, loff_t *ppos)
7265 {
7266 	/*
7267 	 * There is no need to read what the user has written, this function
7268 	 * is just to make sure that there is no error when "echo" is used
7269 	 */
7270 
7271 	*ppos += cnt;
7272 
7273 	return cnt;
7274 }
7275 
7276 static int
7277 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7278 {
7279 	struct trace_array *tr = inode->i_private;
7280 
7281 	/* disable tracing ? */
7282 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7283 		tracer_tracing_off(tr);
7284 	/* resize the ring buffer to 0 */
7285 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7286 
7287 	trace_array_put(tr);
7288 
7289 	return 0;
7290 }
7291 
7292 static ssize_t
7293 tracing_mark_write(struct file *filp, const char __user *ubuf,
7294 					size_t cnt, loff_t *fpos)
7295 {
7296 	struct trace_array *tr = filp->private_data;
7297 	struct ring_buffer_event *event;
7298 	enum event_trigger_type tt = ETT_NONE;
7299 	struct trace_buffer *buffer;
7300 	struct print_entry *entry;
7301 	ssize_t written;
7302 	int size;
7303 	int len;
7304 
7305 /* Used in tracing_mark_raw_write() as well */
7306 #define FAULTED_STR "<faulted>"
7307 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7308 
7309 	if (tracing_disabled)
7310 		return -EINVAL;
7311 
7312 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7313 		return -EINVAL;
7314 
7315 	if (cnt > TRACE_BUF_SIZE)
7316 		cnt = TRACE_BUF_SIZE;
7317 
7318 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7319 
7320 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7321 
7322 	/* If less than "<faulted>", then make sure we can still add that */
7323 	if (cnt < FAULTED_SIZE)
7324 		size += FAULTED_SIZE - cnt;
7325 
7326 	buffer = tr->array_buffer.buffer;
7327 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7328 					    tracing_gen_ctx());
7329 	if (unlikely(!event))
7330 		/* Ring buffer disabled, return as if not open for write */
7331 		return -EBADF;
7332 
7333 	entry = ring_buffer_event_data(event);
7334 	entry->ip = _THIS_IP_;
7335 
7336 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7337 	if (len) {
7338 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7339 		cnt = FAULTED_SIZE;
7340 		written = -EFAULT;
7341 	} else
7342 		written = cnt;
7343 
7344 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7345 		/* do not add \n before testing triggers, but add \0 */
7346 		entry->buf[cnt] = '\0';
7347 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7348 	}
7349 
7350 	if (entry->buf[cnt - 1] != '\n') {
7351 		entry->buf[cnt] = '\n';
7352 		entry->buf[cnt + 1] = '\0';
7353 	} else
7354 		entry->buf[cnt] = '\0';
7355 
7356 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7357 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7358 	__buffer_unlock_commit(buffer, event);
7359 
7360 	if (tt)
7361 		event_triggers_post_call(tr->trace_marker_file, tt);
7362 
7363 	return written;
7364 }
7365 
7366 /* Limit it for now to 3K (including tag) */
7367 #define RAW_DATA_MAX_SIZE (1024*3)
7368 
7369 static ssize_t
7370 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7371 					size_t cnt, loff_t *fpos)
7372 {
7373 	struct trace_array *tr = filp->private_data;
7374 	struct ring_buffer_event *event;
7375 	struct trace_buffer *buffer;
7376 	struct raw_data_entry *entry;
7377 	ssize_t written;
7378 	int size;
7379 	int len;
7380 
7381 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7382 
7383 	if (tracing_disabled)
7384 		return -EINVAL;
7385 
7386 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7387 		return -EINVAL;
7388 
7389 	/* The marker must at least have a tag id */
7390 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7391 		return -EINVAL;
7392 
7393 	if (cnt > TRACE_BUF_SIZE)
7394 		cnt = TRACE_BUF_SIZE;
7395 
7396 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7397 
7398 	size = sizeof(*entry) + cnt;
7399 	if (cnt < FAULT_SIZE_ID)
7400 		size += FAULT_SIZE_ID - cnt;
7401 
7402 	buffer = tr->array_buffer.buffer;
7403 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7404 					    tracing_gen_ctx());
7405 	if (!event)
7406 		/* Ring buffer disabled, return as if not open for write */
7407 		return -EBADF;
7408 
7409 	entry = ring_buffer_event_data(event);
7410 
7411 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7412 	if (len) {
7413 		entry->id = -1;
7414 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7415 		written = -EFAULT;
7416 	} else
7417 		written = cnt;
7418 
7419 	__buffer_unlock_commit(buffer, event);
7420 
7421 	return written;
7422 }
7423 
7424 static int tracing_clock_show(struct seq_file *m, void *v)
7425 {
7426 	struct trace_array *tr = m->private;
7427 	int i;
7428 
7429 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7430 		seq_printf(m,
7431 			"%s%s%s%s", i ? " " : "",
7432 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7433 			i == tr->clock_id ? "]" : "");
7434 	seq_putc(m, '\n');
7435 
7436 	return 0;
7437 }
7438 
7439 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7440 {
7441 	int i;
7442 
7443 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7444 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7445 			break;
7446 	}
7447 	if (i == ARRAY_SIZE(trace_clocks))
7448 		return -EINVAL;
7449 
7450 	mutex_lock(&trace_types_lock);
7451 
7452 	tr->clock_id = i;
7453 
7454 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7455 
7456 	/*
7457 	 * New clock may not be consistent with the previous clock.
7458 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7459 	 */
7460 	tracing_reset_online_cpus(&tr->array_buffer);
7461 
7462 #ifdef CONFIG_TRACER_MAX_TRACE
7463 	if (tr->max_buffer.buffer)
7464 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7465 	tracing_reset_online_cpus(&tr->max_buffer);
7466 #endif
7467 
7468 	mutex_unlock(&trace_types_lock);
7469 
7470 	return 0;
7471 }
7472 
7473 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7474 				   size_t cnt, loff_t *fpos)
7475 {
7476 	struct seq_file *m = filp->private_data;
7477 	struct trace_array *tr = m->private;
7478 	char buf[64];
7479 	const char *clockstr;
7480 	int ret;
7481 
7482 	if (cnt >= sizeof(buf))
7483 		return -EINVAL;
7484 
7485 	if (copy_from_user(buf, ubuf, cnt))
7486 		return -EFAULT;
7487 
7488 	buf[cnt] = 0;
7489 
7490 	clockstr = strstrip(buf);
7491 
7492 	ret = tracing_set_clock(tr, clockstr);
7493 	if (ret)
7494 		return ret;
7495 
7496 	*fpos += cnt;
7497 
7498 	return cnt;
7499 }
7500 
7501 static int tracing_clock_open(struct inode *inode, struct file *file)
7502 {
7503 	struct trace_array *tr = inode->i_private;
7504 	int ret;
7505 
7506 	ret = tracing_check_open_get_tr(tr);
7507 	if (ret)
7508 		return ret;
7509 
7510 	ret = single_open(file, tracing_clock_show, inode->i_private);
7511 	if (ret < 0)
7512 		trace_array_put(tr);
7513 
7514 	return ret;
7515 }
7516 
7517 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7518 {
7519 	struct trace_array *tr = m->private;
7520 
7521 	mutex_lock(&trace_types_lock);
7522 
7523 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7524 		seq_puts(m, "delta [absolute]\n");
7525 	else
7526 		seq_puts(m, "[delta] absolute\n");
7527 
7528 	mutex_unlock(&trace_types_lock);
7529 
7530 	return 0;
7531 }
7532 
7533 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7534 {
7535 	struct trace_array *tr = inode->i_private;
7536 	int ret;
7537 
7538 	ret = tracing_check_open_get_tr(tr);
7539 	if (ret)
7540 		return ret;
7541 
7542 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7543 	if (ret < 0)
7544 		trace_array_put(tr);
7545 
7546 	return ret;
7547 }
7548 
7549 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7550 {
7551 	if (rbe == this_cpu_read(trace_buffered_event))
7552 		return ring_buffer_time_stamp(buffer);
7553 
7554 	return ring_buffer_event_time_stamp(buffer, rbe);
7555 }
7556 
7557 /*
7558  * Set or disable using the per CPU trace_buffer_event when possible.
7559  */
7560 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7561 {
7562 	int ret = 0;
7563 
7564 	mutex_lock(&trace_types_lock);
7565 
7566 	if (set && tr->no_filter_buffering_ref++)
7567 		goto out;
7568 
7569 	if (!set) {
7570 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7571 			ret = -EINVAL;
7572 			goto out;
7573 		}
7574 
7575 		--tr->no_filter_buffering_ref;
7576 	}
7577  out:
7578 	mutex_unlock(&trace_types_lock);
7579 
7580 	return ret;
7581 }
7582 
7583 struct ftrace_buffer_info {
7584 	struct trace_iterator	iter;
7585 	void			*spare;
7586 	unsigned int		spare_cpu;
7587 	unsigned int		read;
7588 };
7589 
7590 #ifdef CONFIG_TRACER_SNAPSHOT
7591 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7592 {
7593 	struct trace_array *tr = inode->i_private;
7594 	struct trace_iterator *iter;
7595 	struct seq_file *m;
7596 	int ret;
7597 
7598 	ret = tracing_check_open_get_tr(tr);
7599 	if (ret)
7600 		return ret;
7601 
7602 	if (file->f_mode & FMODE_READ) {
7603 		iter = __tracing_open(inode, file, true);
7604 		if (IS_ERR(iter))
7605 			ret = PTR_ERR(iter);
7606 	} else {
7607 		/* Writes still need the seq_file to hold the private data */
7608 		ret = -ENOMEM;
7609 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7610 		if (!m)
7611 			goto out;
7612 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7613 		if (!iter) {
7614 			kfree(m);
7615 			goto out;
7616 		}
7617 		ret = 0;
7618 
7619 		iter->tr = tr;
7620 		iter->array_buffer = &tr->max_buffer;
7621 		iter->cpu_file = tracing_get_cpu(inode);
7622 		m->private = iter;
7623 		file->private_data = m;
7624 	}
7625 out:
7626 	if (ret < 0)
7627 		trace_array_put(tr);
7628 
7629 	return ret;
7630 }
7631 
7632 static void tracing_swap_cpu_buffer(void *tr)
7633 {
7634 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7635 }
7636 
7637 static ssize_t
7638 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7639 		       loff_t *ppos)
7640 {
7641 	struct seq_file *m = filp->private_data;
7642 	struct trace_iterator *iter = m->private;
7643 	struct trace_array *tr = iter->tr;
7644 	unsigned long val;
7645 	int ret;
7646 
7647 	ret = tracing_update_buffers();
7648 	if (ret < 0)
7649 		return ret;
7650 
7651 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7652 	if (ret)
7653 		return ret;
7654 
7655 	mutex_lock(&trace_types_lock);
7656 
7657 	if (tr->current_trace->use_max_tr) {
7658 		ret = -EBUSY;
7659 		goto out;
7660 	}
7661 
7662 	local_irq_disable();
7663 	arch_spin_lock(&tr->max_lock);
7664 	if (tr->cond_snapshot)
7665 		ret = -EBUSY;
7666 	arch_spin_unlock(&tr->max_lock);
7667 	local_irq_enable();
7668 	if (ret)
7669 		goto out;
7670 
7671 	switch (val) {
7672 	case 0:
7673 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7674 			ret = -EINVAL;
7675 			break;
7676 		}
7677 		if (tr->allocated_snapshot)
7678 			free_snapshot(tr);
7679 		break;
7680 	case 1:
7681 /* Only allow per-cpu swap if the ring buffer supports it */
7682 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7683 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7684 			ret = -EINVAL;
7685 			break;
7686 		}
7687 #endif
7688 		if (tr->allocated_snapshot)
7689 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7690 					&tr->array_buffer, iter->cpu_file);
7691 		else
7692 			ret = tracing_alloc_snapshot_instance(tr);
7693 		if (ret < 0)
7694 			break;
7695 		/* Now, we're going to swap */
7696 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7697 			local_irq_disable();
7698 			update_max_tr(tr, current, smp_processor_id(), NULL);
7699 			local_irq_enable();
7700 		} else {
7701 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7702 						 (void *)tr, 1);
7703 		}
7704 		break;
7705 	default:
7706 		if (tr->allocated_snapshot) {
7707 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7708 				tracing_reset_online_cpus(&tr->max_buffer);
7709 			else
7710 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7711 		}
7712 		break;
7713 	}
7714 
7715 	if (ret >= 0) {
7716 		*ppos += cnt;
7717 		ret = cnt;
7718 	}
7719 out:
7720 	mutex_unlock(&trace_types_lock);
7721 	return ret;
7722 }
7723 
7724 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7725 {
7726 	struct seq_file *m = file->private_data;
7727 	int ret;
7728 
7729 	ret = tracing_release(inode, file);
7730 
7731 	if (file->f_mode & FMODE_READ)
7732 		return ret;
7733 
7734 	/* If write only, the seq_file is just a stub */
7735 	if (m)
7736 		kfree(m->private);
7737 	kfree(m);
7738 
7739 	return 0;
7740 }
7741 
7742 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7743 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7744 				    size_t count, loff_t *ppos);
7745 static int tracing_buffers_release(struct inode *inode, struct file *file);
7746 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7747 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7748 
7749 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7750 {
7751 	struct ftrace_buffer_info *info;
7752 	int ret;
7753 
7754 	/* The following checks for tracefs lockdown */
7755 	ret = tracing_buffers_open(inode, filp);
7756 	if (ret < 0)
7757 		return ret;
7758 
7759 	info = filp->private_data;
7760 
7761 	if (info->iter.trace->use_max_tr) {
7762 		tracing_buffers_release(inode, filp);
7763 		return -EBUSY;
7764 	}
7765 
7766 	info->iter.snapshot = true;
7767 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7768 
7769 	return ret;
7770 }
7771 
7772 #endif /* CONFIG_TRACER_SNAPSHOT */
7773 
7774 
7775 static const struct file_operations tracing_thresh_fops = {
7776 	.open		= tracing_open_generic,
7777 	.read		= tracing_thresh_read,
7778 	.write		= tracing_thresh_write,
7779 	.llseek		= generic_file_llseek,
7780 };
7781 
7782 #ifdef CONFIG_TRACER_MAX_TRACE
7783 static const struct file_operations tracing_max_lat_fops = {
7784 	.open		= tracing_open_generic_tr,
7785 	.read		= tracing_max_lat_read,
7786 	.write		= tracing_max_lat_write,
7787 	.llseek		= generic_file_llseek,
7788 	.release	= tracing_release_generic_tr,
7789 };
7790 #endif
7791 
7792 static const struct file_operations set_tracer_fops = {
7793 	.open		= tracing_open_generic_tr,
7794 	.read		= tracing_set_trace_read,
7795 	.write		= tracing_set_trace_write,
7796 	.llseek		= generic_file_llseek,
7797 	.release	= tracing_release_generic_tr,
7798 };
7799 
7800 static const struct file_operations tracing_pipe_fops = {
7801 	.open		= tracing_open_pipe,
7802 	.poll		= tracing_poll_pipe,
7803 	.read		= tracing_read_pipe,
7804 	.splice_read	= tracing_splice_read_pipe,
7805 	.release	= tracing_release_pipe,
7806 	.llseek		= no_llseek,
7807 };
7808 
7809 static const struct file_operations tracing_entries_fops = {
7810 	.open		= tracing_open_generic_tr,
7811 	.read		= tracing_entries_read,
7812 	.write		= tracing_entries_write,
7813 	.llseek		= generic_file_llseek,
7814 	.release	= tracing_release_generic_tr,
7815 };
7816 
7817 static const struct file_operations tracing_total_entries_fops = {
7818 	.open		= tracing_open_generic_tr,
7819 	.read		= tracing_total_entries_read,
7820 	.llseek		= generic_file_llseek,
7821 	.release	= tracing_release_generic_tr,
7822 };
7823 
7824 static const struct file_operations tracing_free_buffer_fops = {
7825 	.open		= tracing_open_generic_tr,
7826 	.write		= tracing_free_buffer_write,
7827 	.release	= tracing_free_buffer_release,
7828 };
7829 
7830 static const struct file_operations tracing_mark_fops = {
7831 	.open		= tracing_mark_open,
7832 	.write		= tracing_mark_write,
7833 	.release	= tracing_release_generic_tr,
7834 };
7835 
7836 static const struct file_operations tracing_mark_raw_fops = {
7837 	.open		= tracing_mark_open,
7838 	.write		= tracing_mark_raw_write,
7839 	.release	= tracing_release_generic_tr,
7840 };
7841 
7842 static const struct file_operations trace_clock_fops = {
7843 	.open		= tracing_clock_open,
7844 	.read		= seq_read,
7845 	.llseek		= seq_lseek,
7846 	.release	= tracing_single_release_tr,
7847 	.write		= tracing_clock_write,
7848 };
7849 
7850 static const struct file_operations trace_time_stamp_mode_fops = {
7851 	.open		= tracing_time_stamp_mode_open,
7852 	.read		= seq_read,
7853 	.llseek		= seq_lseek,
7854 	.release	= tracing_single_release_tr,
7855 };
7856 
7857 #ifdef CONFIG_TRACER_SNAPSHOT
7858 static const struct file_operations snapshot_fops = {
7859 	.open		= tracing_snapshot_open,
7860 	.read		= seq_read,
7861 	.write		= tracing_snapshot_write,
7862 	.llseek		= tracing_lseek,
7863 	.release	= tracing_snapshot_release,
7864 };
7865 
7866 static const struct file_operations snapshot_raw_fops = {
7867 	.open		= snapshot_raw_open,
7868 	.read		= tracing_buffers_read,
7869 	.release	= tracing_buffers_release,
7870 	.splice_read	= tracing_buffers_splice_read,
7871 	.llseek		= no_llseek,
7872 };
7873 
7874 #endif /* CONFIG_TRACER_SNAPSHOT */
7875 
7876 /*
7877  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7878  * @filp: The active open file structure
7879  * @ubuf: The userspace provided buffer to read value into
7880  * @cnt: The maximum number of bytes to read
7881  * @ppos: The current "file" position
7882  *
7883  * This function implements the write interface for a struct trace_min_max_param.
7884  * The filp->private_data must point to a trace_min_max_param structure that
7885  * defines where to write the value, the min and the max acceptable values,
7886  * and a lock to protect the write.
7887  */
7888 static ssize_t
7889 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7890 {
7891 	struct trace_min_max_param *param = filp->private_data;
7892 	u64 val;
7893 	int err;
7894 
7895 	if (!param)
7896 		return -EFAULT;
7897 
7898 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7899 	if (err)
7900 		return err;
7901 
7902 	if (param->lock)
7903 		mutex_lock(param->lock);
7904 
7905 	if (param->min && val < *param->min)
7906 		err = -EINVAL;
7907 
7908 	if (param->max && val > *param->max)
7909 		err = -EINVAL;
7910 
7911 	if (!err)
7912 		*param->val = val;
7913 
7914 	if (param->lock)
7915 		mutex_unlock(param->lock);
7916 
7917 	if (err)
7918 		return err;
7919 
7920 	return cnt;
7921 }
7922 
7923 /*
7924  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7925  * @filp: The active open file structure
7926  * @ubuf: The userspace provided buffer to read value into
7927  * @cnt: The maximum number of bytes to read
7928  * @ppos: The current "file" position
7929  *
7930  * This function implements the read interface for a struct trace_min_max_param.
7931  * The filp->private_data must point to a trace_min_max_param struct with valid
7932  * data.
7933  */
7934 static ssize_t
7935 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7936 {
7937 	struct trace_min_max_param *param = filp->private_data;
7938 	char buf[U64_STR_SIZE];
7939 	int len;
7940 	u64 val;
7941 
7942 	if (!param)
7943 		return -EFAULT;
7944 
7945 	val = *param->val;
7946 
7947 	if (cnt > sizeof(buf))
7948 		cnt = sizeof(buf);
7949 
7950 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7951 
7952 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7953 }
7954 
7955 const struct file_operations trace_min_max_fops = {
7956 	.open		= tracing_open_generic,
7957 	.read		= trace_min_max_read,
7958 	.write		= trace_min_max_write,
7959 };
7960 
7961 #define TRACING_LOG_ERRS_MAX	8
7962 #define TRACING_LOG_LOC_MAX	128
7963 
7964 #define CMD_PREFIX "  Command: "
7965 
7966 struct err_info {
7967 	const char	**errs;	/* ptr to loc-specific array of err strings */
7968 	u8		type;	/* index into errs -> specific err string */
7969 	u16		pos;	/* caret position */
7970 	u64		ts;
7971 };
7972 
7973 struct tracing_log_err {
7974 	struct list_head	list;
7975 	struct err_info		info;
7976 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7977 	char			*cmd;                     /* what caused err */
7978 };
7979 
7980 static DEFINE_MUTEX(tracing_err_log_lock);
7981 
7982 static struct tracing_log_err *alloc_tracing_log_err(int len)
7983 {
7984 	struct tracing_log_err *err;
7985 
7986 	err = kzalloc(sizeof(*err), GFP_KERNEL);
7987 	if (!err)
7988 		return ERR_PTR(-ENOMEM);
7989 
7990 	err->cmd = kzalloc(len, GFP_KERNEL);
7991 	if (!err->cmd) {
7992 		kfree(err);
7993 		return ERR_PTR(-ENOMEM);
7994 	}
7995 
7996 	return err;
7997 }
7998 
7999 static void free_tracing_log_err(struct tracing_log_err *err)
8000 {
8001 	kfree(err->cmd);
8002 	kfree(err);
8003 }
8004 
8005 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8006 						   int len)
8007 {
8008 	struct tracing_log_err *err;
8009 	char *cmd;
8010 
8011 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8012 		err = alloc_tracing_log_err(len);
8013 		if (PTR_ERR(err) != -ENOMEM)
8014 			tr->n_err_log_entries++;
8015 
8016 		return err;
8017 	}
8018 	cmd = kzalloc(len, GFP_KERNEL);
8019 	if (!cmd)
8020 		return ERR_PTR(-ENOMEM);
8021 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8022 	kfree(err->cmd);
8023 	err->cmd = cmd;
8024 	list_del(&err->list);
8025 
8026 	return err;
8027 }
8028 
8029 /**
8030  * err_pos - find the position of a string within a command for error careting
8031  * @cmd: The tracing command that caused the error
8032  * @str: The string to position the caret at within @cmd
8033  *
8034  * Finds the position of the first occurrence of @str within @cmd.  The
8035  * return value can be passed to tracing_log_err() for caret placement
8036  * within @cmd.
8037  *
8038  * Returns the index within @cmd of the first occurrence of @str or 0
8039  * if @str was not found.
8040  */
8041 unsigned int err_pos(char *cmd, const char *str)
8042 {
8043 	char *found;
8044 
8045 	if (WARN_ON(!strlen(cmd)))
8046 		return 0;
8047 
8048 	found = strstr(cmd, str);
8049 	if (found)
8050 		return found - cmd;
8051 
8052 	return 0;
8053 }
8054 
8055 /**
8056  * tracing_log_err - write an error to the tracing error log
8057  * @tr: The associated trace array for the error (NULL for top level array)
8058  * @loc: A string describing where the error occurred
8059  * @cmd: The tracing command that caused the error
8060  * @errs: The array of loc-specific static error strings
8061  * @type: The index into errs[], which produces the specific static err string
8062  * @pos: The position the caret should be placed in the cmd
8063  *
8064  * Writes an error into tracing/error_log of the form:
8065  *
8066  * <loc>: error: <text>
8067  *   Command: <cmd>
8068  *              ^
8069  *
8070  * tracing/error_log is a small log file containing the last
8071  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8072  * unless there has been a tracing error, and the error log can be
8073  * cleared and have its memory freed by writing the empty string in
8074  * truncation mode to it i.e. echo > tracing/error_log.
8075  *
8076  * NOTE: the @errs array along with the @type param are used to
8077  * produce a static error string - this string is not copied and saved
8078  * when the error is logged - only a pointer to it is saved.  See
8079  * existing callers for examples of how static strings are typically
8080  * defined for use with tracing_log_err().
8081  */
8082 void tracing_log_err(struct trace_array *tr,
8083 		     const char *loc, const char *cmd,
8084 		     const char **errs, u8 type, u16 pos)
8085 {
8086 	struct tracing_log_err *err;
8087 	int len = 0;
8088 
8089 	if (!tr)
8090 		tr = &global_trace;
8091 
8092 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8093 
8094 	mutex_lock(&tracing_err_log_lock);
8095 	err = get_tracing_log_err(tr, len);
8096 	if (PTR_ERR(err) == -ENOMEM) {
8097 		mutex_unlock(&tracing_err_log_lock);
8098 		return;
8099 	}
8100 
8101 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8102 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8103 
8104 	err->info.errs = errs;
8105 	err->info.type = type;
8106 	err->info.pos = pos;
8107 	err->info.ts = local_clock();
8108 
8109 	list_add_tail(&err->list, &tr->err_log);
8110 	mutex_unlock(&tracing_err_log_lock);
8111 }
8112 
8113 static void clear_tracing_err_log(struct trace_array *tr)
8114 {
8115 	struct tracing_log_err *err, *next;
8116 
8117 	mutex_lock(&tracing_err_log_lock);
8118 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
8119 		list_del(&err->list);
8120 		free_tracing_log_err(err);
8121 	}
8122 
8123 	tr->n_err_log_entries = 0;
8124 	mutex_unlock(&tracing_err_log_lock);
8125 }
8126 
8127 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8128 {
8129 	struct trace_array *tr = m->private;
8130 
8131 	mutex_lock(&tracing_err_log_lock);
8132 
8133 	return seq_list_start(&tr->err_log, *pos);
8134 }
8135 
8136 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8137 {
8138 	struct trace_array *tr = m->private;
8139 
8140 	return seq_list_next(v, &tr->err_log, pos);
8141 }
8142 
8143 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8144 {
8145 	mutex_unlock(&tracing_err_log_lock);
8146 }
8147 
8148 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8149 {
8150 	u16 i;
8151 
8152 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8153 		seq_putc(m, ' ');
8154 	for (i = 0; i < pos; i++)
8155 		seq_putc(m, ' ');
8156 	seq_puts(m, "^\n");
8157 }
8158 
8159 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8160 {
8161 	struct tracing_log_err *err = v;
8162 
8163 	if (err) {
8164 		const char *err_text = err->info.errs[err->info.type];
8165 		u64 sec = err->info.ts;
8166 		u32 nsec;
8167 
8168 		nsec = do_div(sec, NSEC_PER_SEC);
8169 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8170 			   err->loc, err_text);
8171 		seq_printf(m, "%s", err->cmd);
8172 		tracing_err_log_show_pos(m, err->info.pos);
8173 	}
8174 
8175 	return 0;
8176 }
8177 
8178 static const struct seq_operations tracing_err_log_seq_ops = {
8179 	.start  = tracing_err_log_seq_start,
8180 	.next   = tracing_err_log_seq_next,
8181 	.stop   = tracing_err_log_seq_stop,
8182 	.show   = tracing_err_log_seq_show
8183 };
8184 
8185 static int tracing_err_log_open(struct inode *inode, struct file *file)
8186 {
8187 	struct trace_array *tr = inode->i_private;
8188 	int ret = 0;
8189 
8190 	ret = tracing_check_open_get_tr(tr);
8191 	if (ret)
8192 		return ret;
8193 
8194 	/* If this file was opened for write, then erase contents */
8195 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8196 		clear_tracing_err_log(tr);
8197 
8198 	if (file->f_mode & FMODE_READ) {
8199 		ret = seq_open(file, &tracing_err_log_seq_ops);
8200 		if (!ret) {
8201 			struct seq_file *m = file->private_data;
8202 			m->private = tr;
8203 		} else {
8204 			trace_array_put(tr);
8205 		}
8206 	}
8207 	return ret;
8208 }
8209 
8210 static ssize_t tracing_err_log_write(struct file *file,
8211 				     const char __user *buffer,
8212 				     size_t count, loff_t *ppos)
8213 {
8214 	return count;
8215 }
8216 
8217 static int tracing_err_log_release(struct inode *inode, struct file *file)
8218 {
8219 	struct trace_array *tr = inode->i_private;
8220 
8221 	trace_array_put(tr);
8222 
8223 	if (file->f_mode & FMODE_READ)
8224 		seq_release(inode, file);
8225 
8226 	return 0;
8227 }
8228 
8229 static const struct file_operations tracing_err_log_fops = {
8230 	.open           = tracing_err_log_open,
8231 	.write		= tracing_err_log_write,
8232 	.read           = seq_read,
8233 	.llseek         = tracing_lseek,
8234 	.release        = tracing_err_log_release,
8235 };
8236 
8237 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8238 {
8239 	struct trace_array *tr = inode->i_private;
8240 	struct ftrace_buffer_info *info;
8241 	int ret;
8242 
8243 	ret = tracing_check_open_get_tr(tr);
8244 	if (ret)
8245 		return ret;
8246 
8247 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8248 	if (!info) {
8249 		trace_array_put(tr);
8250 		return -ENOMEM;
8251 	}
8252 
8253 	mutex_lock(&trace_types_lock);
8254 
8255 	info->iter.tr		= tr;
8256 	info->iter.cpu_file	= tracing_get_cpu(inode);
8257 	info->iter.trace	= tr->current_trace;
8258 	info->iter.array_buffer = &tr->array_buffer;
8259 	info->spare		= NULL;
8260 	/* Force reading ring buffer for first read */
8261 	info->read		= (unsigned int)-1;
8262 
8263 	filp->private_data = info;
8264 
8265 	tr->trace_ref++;
8266 
8267 	mutex_unlock(&trace_types_lock);
8268 
8269 	ret = nonseekable_open(inode, filp);
8270 	if (ret < 0)
8271 		trace_array_put(tr);
8272 
8273 	return ret;
8274 }
8275 
8276 static __poll_t
8277 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8278 {
8279 	struct ftrace_buffer_info *info = filp->private_data;
8280 	struct trace_iterator *iter = &info->iter;
8281 
8282 	return trace_poll(iter, filp, poll_table);
8283 }
8284 
8285 static ssize_t
8286 tracing_buffers_read(struct file *filp, char __user *ubuf,
8287 		     size_t count, loff_t *ppos)
8288 {
8289 	struct ftrace_buffer_info *info = filp->private_data;
8290 	struct trace_iterator *iter = &info->iter;
8291 	ssize_t ret = 0;
8292 	ssize_t size;
8293 
8294 	if (!count)
8295 		return 0;
8296 
8297 #ifdef CONFIG_TRACER_MAX_TRACE
8298 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8299 		return -EBUSY;
8300 #endif
8301 
8302 	if (!info->spare) {
8303 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8304 							  iter->cpu_file);
8305 		if (IS_ERR(info->spare)) {
8306 			ret = PTR_ERR(info->spare);
8307 			info->spare = NULL;
8308 		} else {
8309 			info->spare_cpu = iter->cpu_file;
8310 		}
8311 	}
8312 	if (!info->spare)
8313 		return ret;
8314 
8315 	/* Do we have previous read data to read? */
8316 	if (info->read < PAGE_SIZE)
8317 		goto read;
8318 
8319  again:
8320 	trace_access_lock(iter->cpu_file);
8321 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8322 				    &info->spare,
8323 				    count,
8324 				    iter->cpu_file, 0);
8325 	trace_access_unlock(iter->cpu_file);
8326 
8327 	if (ret < 0) {
8328 		if (trace_empty(iter)) {
8329 			if ((filp->f_flags & O_NONBLOCK))
8330 				return -EAGAIN;
8331 
8332 			ret = wait_on_pipe(iter, 0);
8333 			if (ret)
8334 				return ret;
8335 
8336 			goto again;
8337 		}
8338 		return 0;
8339 	}
8340 
8341 	info->read = 0;
8342  read:
8343 	size = PAGE_SIZE - info->read;
8344 	if (size > count)
8345 		size = count;
8346 
8347 	ret = copy_to_user(ubuf, info->spare + info->read, size);
8348 	if (ret == size)
8349 		return -EFAULT;
8350 
8351 	size -= ret;
8352 
8353 	*ppos += size;
8354 	info->read += size;
8355 
8356 	return size;
8357 }
8358 
8359 static int tracing_buffers_release(struct inode *inode, struct file *file)
8360 {
8361 	struct ftrace_buffer_info *info = file->private_data;
8362 	struct trace_iterator *iter = &info->iter;
8363 
8364 	mutex_lock(&trace_types_lock);
8365 
8366 	iter->tr->trace_ref--;
8367 
8368 	__trace_array_put(iter->tr);
8369 
8370 	iter->wait_index++;
8371 	/* Make sure the waiters see the new wait_index */
8372 	smp_wmb();
8373 
8374 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8375 
8376 	if (info->spare)
8377 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8378 					   info->spare_cpu, info->spare);
8379 	kvfree(info);
8380 
8381 	mutex_unlock(&trace_types_lock);
8382 
8383 	return 0;
8384 }
8385 
8386 struct buffer_ref {
8387 	struct trace_buffer	*buffer;
8388 	void			*page;
8389 	int			cpu;
8390 	refcount_t		refcount;
8391 };
8392 
8393 static void buffer_ref_release(struct buffer_ref *ref)
8394 {
8395 	if (!refcount_dec_and_test(&ref->refcount))
8396 		return;
8397 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8398 	kfree(ref);
8399 }
8400 
8401 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8402 				    struct pipe_buffer *buf)
8403 {
8404 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8405 
8406 	buffer_ref_release(ref);
8407 	buf->private = 0;
8408 }
8409 
8410 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8411 				struct pipe_buffer *buf)
8412 {
8413 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8414 
8415 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8416 		return false;
8417 
8418 	refcount_inc(&ref->refcount);
8419 	return true;
8420 }
8421 
8422 /* Pipe buffer operations for a buffer. */
8423 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8424 	.release		= buffer_pipe_buf_release,
8425 	.get			= buffer_pipe_buf_get,
8426 };
8427 
8428 /*
8429  * Callback from splice_to_pipe(), if we need to release some pages
8430  * at the end of the spd in case we error'ed out in filling the pipe.
8431  */
8432 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8433 {
8434 	struct buffer_ref *ref =
8435 		(struct buffer_ref *)spd->partial[i].private;
8436 
8437 	buffer_ref_release(ref);
8438 	spd->partial[i].private = 0;
8439 }
8440 
8441 static ssize_t
8442 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8443 			    struct pipe_inode_info *pipe, size_t len,
8444 			    unsigned int flags)
8445 {
8446 	struct ftrace_buffer_info *info = file->private_data;
8447 	struct trace_iterator *iter = &info->iter;
8448 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8449 	struct page *pages_def[PIPE_DEF_BUFFERS];
8450 	struct splice_pipe_desc spd = {
8451 		.pages		= pages_def,
8452 		.partial	= partial_def,
8453 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8454 		.ops		= &buffer_pipe_buf_ops,
8455 		.spd_release	= buffer_spd_release,
8456 	};
8457 	struct buffer_ref *ref;
8458 	int entries, i;
8459 	ssize_t ret = 0;
8460 
8461 #ifdef CONFIG_TRACER_MAX_TRACE
8462 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8463 		return -EBUSY;
8464 #endif
8465 
8466 	if (*ppos & (PAGE_SIZE - 1))
8467 		return -EINVAL;
8468 
8469 	if (len & (PAGE_SIZE - 1)) {
8470 		if (len < PAGE_SIZE)
8471 			return -EINVAL;
8472 		len &= PAGE_MASK;
8473 	}
8474 
8475 	if (splice_grow_spd(pipe, &spd))
8476 		return -ENOMEM;
8477 
8478  again:
8479 	trace_access_lock(iter->cpu_file);
8480 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8481 
8482 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8483 		struct page *page;
8484 		int r;
8485 
8486 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8487 		if (!ref) {
8488 			ret = -ENOMEM;
8489 			break;
8490 		}
8491 
8492 		refcount_set(&ref->refcount, 1);
8493 		ref->buffer = iter->array_buffer->buffer;
8494 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8495 		if (IS_ERR(ref->page)) {
8496 			ret = PTR_ERR(ref->page);
8497 			ref->page = NULL;
8498 			kfree(ref);
8499 			break;
8500 		}
8501 		ref->cpu = iter->cpu_file;
8502 
8503 		r = ring_buffer_read_page(ref->buffer, &ref->page,
8504 					  len, iter->cpu_file, 1);
8505 		if (r < 0) {
8506 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8507 						   ref->page);
8508 			kfree(ref);
8509 			break;
8510 		}
8511 
8512 		page = virt_to_page(ref->page);
8513 
8514 		spd.pages[i] = page;
8515 		spd.partial[i].len = PAGE_SIZE;
8516 		spd.partial[i].offset = 0;
8517 		spd.partial[i].private = (unsigned long)ref;
8518 		spd.nr_pages++;
8519 		*ppos += PAGE_SIZE;
8520 
8521 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8522 	}
8523 
8524 	trace_access_unlock(iter->cpu_file);
8525 	spd.nr_pages = i;
8526 
8527 	/* did we read anything? */
8528 	if (!spd.nr_pages) {
8529 		long wait_index;
8530 
8531 		if (ret)
8532 			goto out;
8533 
8534 		ret = -EAGAIN;
8535 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8536 			goto out;
8537 
8538 		wait_index = READ_ONCE(iter->wait_index);
8539 
8540 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8541 		if (ret)
8542 			goto out;
8543 
8544 		/* No need to wait after waking up when tracing is off */
8545 		if (!tracer_tracing_is_on(iter->tr))
8546 			goto out;
8547 
8548 		/* Make sure we see the new wait_index */
8549 		smp_rmb();
8550 		if (wait_index != iter->wait_index)
8551 			goto out;
8552 
8553 		goto again;
8554 	}
8555 
8556 	ret = splice_to_pipe(pipe, &spd);
8557 out:
8558 	splice_shrink_spd(&spd);
8559 
8560 	return ret;
8561 }
8562 
8563 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8564 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8565 {
8566 	struct ftrace_buffer_info *info = file->private_data;
8567 	struct trace_iterator *iter = &info->iter;
8568 
8569 	if (cmd)
8570 		return -ENOIOCTLCMD;
8571 
8572 	mutex_lock(&trace_types_lock);
8573 
8574 	iter->wait_index++;
8575 	/* Make sure the waiters see the new wait_index */
8576 	smp_wmb();
8577 
8578 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8579 
8580 	mutex_unlock(&trace_types_lock);
8581 	return 0;
8582 }
8583 
8584 static const struct file_operations tracing_buffers_fops = {
8585 	.open		= tracing_buffers_open,
8586 	.read		= tracing_buffers_read,
8587 	.poll		= tracing_buffers_poll,
8588 	.release	= tracing_buffers_release,
8589 	.splice_read	= tracing_buffers_splice_read,
8590 	.unlocked_ioctl = tracing_buffers_ioctl,
8591 	.llseek		= no_llseek,
8592 };
8593 
8594 static ssize_t
8595 tracing_stats_read(struct file *filp, char __user *ubuf,
8596 		   size_t count, loff_t *ppos)
8597 {
8598 	struct inode *inode = file_inode(filp);
8599 	struct trace_array *tr = inode->i_private;
8600 	struct array_buffer *trace_buf = &tr->array_buffer;
8601 	int cpu = tracing_get_cpu(inode);
8602 	struct trace_seq *s;
8603 	unsigned long cnt;
8604 	unsigned long long t;
8605 	unsigned long usec_rem;
8606 
8607 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8608 	if (!s)
8609 		return -ENOMEM;
8610 
8611 	trace_seq_init(s);
8612 
8613 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8614 	trace_seq_printf(s, "entries: %ld\n", cnt);
8615 
8616 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8617 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8618 
8619 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8620 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8621 
8622 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8623 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8624 
8625 	if (trace_clocks[tr->clock_id].in_ns) {
8626 		/* local or global for trace_clock */
8627 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8628 		usec_rem = do_div(t, USEC_PER_SEC);
8629 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8630 								t, usec_rem);
8631 
8632 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8633 		usec_rem = do_div(t, USEC_PER_SEC);
8634 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8635 	} else {
8636 		/* counter or tsc mode for trace_clock */
8637 		trace_seq_printf(s, "oldest event ts: %llu\n",
8638 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8639 
8640 		trace_seq_printf(s, "now ts: %llu\n",
8641 				ring_buffer_time_stamp(trace_buf->buffer));
8642 	}
8643 
8644 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8645 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8646 
8647 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8648 	trace_seq_printf(s, "read events: %ld\n", cnt);
8649 
8650 	count = simple_read_from_buffer(ubuf, count, ppos,
8651 					s->buffer, trace_seq_used(s));
8652 
8653 	kfree(s);
8654 
8655 	return count;
8656 }
8657 
8658 static const struct file_operations tracing_stats_fops = {
8659 	.open		= tracing_open_generic_tr,
8660 	.read		= tracing_stats_read,
8661 	.llseek		= generic_file_llseek,
8662 	.release	= tracing_release_generic_tr,
8663 };
8664 
8665 #ifdef CONFIG_DYNAMIC_FTRACE
8666 
8667 static ssize_t
8668 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8669 		  size_t cnt, loff_t *ppos)
8670 {
8671 	ssize_t ret;
8672 	char *buf;
8673 	int r;
8674 
8675 	/* 256 should be plenty to hold the amount needed */
8676 	buf = kmalloc(256, GFP_KERNEL);
8677 	if (!buf)
8678 		return -ENOMEM;
8679 
8680 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8681 		      ftrace_update_tot_cnt,
8682 		      ftrace_number_of_pages,
8683 		      ftrace_number_of_groups);
8684 
8685 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8686 	kfree(buf);
8687 	return ret;
8688 }
8689 
8690 static const struct file_operations tracing_dyn_info_fops = {
8691 	.open		= tracing_open_generic,
8692 	.read		= tracing_read_dyn_info,
8693 	.llseek		= generic_file_llseek,
8694 };
8695 #endif /* CONFIG_DYNAMIC_FTRACE */
8696 
8697 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8698 static void
8699 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8700 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8701 		void *data)
8702 {
8703 	tracing_snapshot_instance(tr);
8704 }
8705 
8706 static void
8707 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8708 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8709 		      void *data)
8710 {
8711 	struct ftrace_func_mapper *mapper = data;
8712 	long *count = NULL;
8713 
8714 	if (mapper)
8715 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8716 
8717 	if (count) {
8718 
8719 		if (*count <= 0)
8720 			return;
8721 
8722 		(*count)--;
8723 	}
8724 
8725 	tracing_snapshot_instance(tr);
8726 }
8727 
8728 static int
8729 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8730 		      struct ftrace_probe_ops *ops, void *data)
8731 {
8732 	struct ftrace_func_mapper *mapper = data;
8733 	long *count = NULL;
8734 
8735 	seq_printf(m, "%ps:", (void *)ip);
8736 
8737 	seq_puts(m, "snapshot");
8738 
8739 	if (mapper)
8740 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8741 
8742 	if (count)
8743 		seq_printf(m, ":count=%ld\n", *count);
8744 	else
8745 		seq_puts(m, ":unlimited\n");
8746 
8747 	return 0;
8748 }
8749 
8750 static int
8751 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8752 		     unsigned long ip, void *init_data, void **data)
8753 {
8754 	struct ftrace_func_mapper *mapper = *data;
8755 
8756 	if (!mapper) {
8757 		mapper = allocate_ftrace_func_mapper();
8758 		if (!mapper)
8759 			return -ENOMEM;
8760 		*data = mapper;
8761 	}
8762 
8763 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8764 }
8765 
8766 static void
8767 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8768 		     unsigned long ip, void *data)
8769 {
8770 	struct ftrace_func_mapper *mapper = data;
8771 
8772 	if (!ip) {
8773 		if (!mapper)
8774 			return;
8775 		free_ftrace_func_mapper(mapper, NULL);
8776 		return;
8777 	}
8778 
8779 	ftrace_func_mapper_remove_ip(mapper, ip);
8780 }
8781 
8782 static struct ftrace_probe_ops snapshot_probe_ops = {
8783 	.func			= ftrace_snapshot,
8784 	.print			= ftrace_snapshot_print,
8785 };
8786 
8787 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8788 	.func			= ftrace_count_snapshot,
8789 	.print			= ftrace_snapshot_print,
8790 	.init			= ftrace_snapshot_init,
8791 	.free			= ftrace_snapshot_free,
8792 };
8793 
8794 static int
8795 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8796 			       char *glob, char *cmd, char *param, int enable)
8797 {
8798 	struct ftrace_probe_ops *ops;
8799 	void *count = (void *)-1;
8800 	char *number;
8801 	int ret;
8802 
8803 	if (!tr)
8804 		return -ENODEV;
8805 
8806 	/* hash funcs only work with set_ftrace_filter */
8807 	if (!enable)
8808 		return -EINVAL;
8809 
8810 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8811 
8812 	if (glob[0] == '!')
8813 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8814 
8815 	if (!param)
8816 		goto out_reg;
8817 
8818 	number = strsep(&param, ":");
8819 
8820 	if (!strlen(number))
8821 		goto out_reg;
8822 
8823 	/*
8824 	 * We use the callback data field (which is a pointer)
8825 	 * as our counter.
8826 	 */
8827 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8828 	if (ret)
8829 		return ret;
8830 
8831  out_reg:
8832 	ret = tracing_alloc_snapshot_instance(tr);
8833 	if (ret < 0)
8834 		goto out;
8835 
8836 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8837 
8838  out:
8839 	return ret < 0 ? ret : 0;
8840 }
8841 
8842 static struct ftrace_func_command ftrace_snapshot_cmd = {
8843 	.name			= "snapshot",
8844 	.func			= ftrace_trace_snapshot_callback,
8845 };
8846 
8847 static __init int register_snapshot_cmd(void)
8848 {
8849 	return register_ftrace_command(&ftrace_snapshot_cmd);
8850 }
8851 #else
8852 static inline __init int register_snapshot_cmd(void) { return 0; }
8853 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8854 
8855 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8856 {
8857 	if (WARN_ON(!tr->dir))
8858 		return ERR_PTR(-ENODEV);
8859 
8860 	/* Top directory uses NULL as the parent */
8861 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8862 		return NULL;
8863 
8864 	/* All sub buffers have a descriptor */
8865 	return tr->dir;
8866 }
8867 
8868 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8869 {
8870 	struct dentry *d_tracer;
8871 
8872 	if (tr->percpu_dir)
8873 		return tr->percpu_dir;
8874 
8875 	d_tracer = tracing_get_dentry(tr);
8876 	if (IS_ERR(d_tracer))
8877 		return NULL;
8878 
8879 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8880 
8881 	MEM_FAIL(!tr->percpu_dir,
8882 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8883 
8884 	return tr->percpu_dir;
8885 }
8886 
8887 static struct dentry *
8888 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8889 		      void *data, long cpu, const struct file_operations *fops)
8890 {
8891 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8892 
8893 	if (ret) /* See tracing_get_cpu() */
8894 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8895 	return ret;
8896 }
8897 
8898 static void
8899 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8900 {
8901 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8902 	struct dentry *d_cpu;
8903 	char cpu_dir[30]; /* 30 characters should be more than enough */
8904 
8905 	if (!d_percpu)
8906 		return;
8907 
8908 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8909 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8910 	if (!d_cpu) {
8911 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8912 		return;
8913 	}
8914 
8915 	/* per cpu trace_pipe */
8916 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8917 				tr, cpu, &tracing_pipe_fops);
8918 
8919 	/* per cpu trace */
8920 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8921 				tr, cpu, &tracing_fops);
8922 
8923 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8924 				tr, cpu, &tracing_buffers_fops);
8925 
8926 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8927 				tr, cpu, &tracing_stats_fops);
8928 
8929 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8930 				tr, cpu, &tracing_entries_fops);
8931 
8932 #ifdef CONFIG_TRACER_SNAPSHOT
8933 	trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8934 				tr, cpu, &snapshot_fops);
8935 
8936 	trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8937 				tr, cpu, &snapshot_raw_fops);
8938 #endif
8939 }
8940 
8941 #ifdef CONFIG_FTRACE_SELFTEST
8942 /* Let selftest have access to static functions in this file */
8943 #include "trace_selftest.c"
8944 #endif
8945 
8946 static ssize_t
8947 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8948 			loff_t *ppos)
8949 {
8950 	struct trace_option_dentry *topt = filp->private_data;
8951 	char *buf;
8952 
8953 	if (topt->flags->val & topt->opt->bit)
8954 		buf = "1\n";
8955 	else
8956 		buf = "0\n";
8957 
8958 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8959 }
8960 
8961 static ssize_t
8962 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8963 			 loff_t *ppos)
8964 {
8965 	struct trace_option_dentry *topt = filp->private_data;
8966 	unsigned long val;
8967 	int ret;
8968 
8969 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8970 	if (ret)
8971 		return ret;
8972 
8973 	if (val != 0 && val != 1)
8974 		return -EINVAL;
8975 
8976 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8977 		mutex_lock(&trace_types_lock);
8978 		ret = __set_tracer_option(topt->tr, topt->flags,
8979 					  topt->opt, !val);
8980 		mutex_unlock(&trace_types_lock);
8981 		if (ret)
8982 			return ret;
8983 	}
8984 
8985 	*ppos += cnt;
8986 
8987 	return cnt;
8988 }
8989 
8990 static int tracing_open_options(struct inode *inode, struct file *filp)
8991 {
8992 	struct trace_option_dentry *topt = inode->i_private;
8993 	int ret;
8994 
8995 	ret = tracing_check_open_get_tr(topt->tr);
8996 	if (ret)
8997 		return ret;
8998 
8999 	filp->private_data = inode->i_private;
9000 	return 0;
9001 }
9002 
9003 static int tracing_release_options(struct inode *inode, struct file *file)
9004 {
9005 	struct trace_option_dentry *topt = file->private_data;
9006 
9007 	trace_array_put(topt->tr);
9008 	return 0;
9009 }
9010 
9011 static const struct file_operations trace_options_fops = {
9012 	.open = tracing_open_options,
9013 	.read = trace_options_read,
9014 	.write = trace_options_write,
9015 	.llseek	= generic_file_llseek,
9016 	.release = tracing_release_options,
9017 };
9018 
9019 /*
9020  * In order to pass in both the trace_array descriptor as well as the index
9021  * to the flag that the trace option file represents, the trace_array
9022  * has a character array of trace_flags_index[], which holds the index
9023  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9024  * The address of this character array is passed to the flag option file
9025  * read/write callbacks.
9026  *
9027  * In order to extract both the index and the trace_array descriptor,
9028  * get_tr_index() uses the following algorithm.
9029  *
9030  *   idx = *ptr;
9031  *
9032  * As the pointer itself contains the address of the index (remember
9033  * index[1] == 1).
9034  *
9035  * Then to get the trace_array descriptor, by subtracting that index
9036  * from the ptr, we get to the start of the index itself.
9037  *
9038  *   ptr - idx == &index[0]
9039  *
9040  * Then a simple container_of() from that pointer gets us to the
9041  * trace_array descriptor.
9042  */
9043 static void get_tr_index(void *data, struct trace_array **ptr,
9044 			 unsigned int *pindex)
9045 {
9046 	*pindex = *(unsigned char *)data;
9047 
9048 	*ptr = container_of(data - *pindex, struct trace_array,
9049 			    trace_flags_index);
9050 }
9051 
9052 static ssize_t
9053 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9054 			loff_t *ppos)
9055 {
9056 	void *tr_index = filp->private_data;
9057 	struct trace_array *tr;
9058 	unsigned int index;
9059 	char *buf;
9060 
9061 	get_tr_index(tr_index, &tr, &index);
9062 
9063 	if (tr->trace_flags & (1 << index))
9064 		buf = "1\n";
9065 	else
9066 		buf = "0\n";
9067 
9068 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9069 }
9070 
9071 static ssize_t
9072 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9073 			 loff_t *ppos)
9074 {
9075 	void *tr_index = filp->private_data;
9076 	struct trace_array *tr;
9077 	unsigned int index;
9078 	unsigned long val;
9079 	int ret;
9080 
9081 	get_tr_index(tr_index, &tr, &index);
9082 
9083 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9084 	if (ret)
9085 		return ret;
9086 
9087 	if (val != 0 && val != 1)
9088 		return -EINVAL;
9089 
9090 	mutex_lock(&event_mutex);
9091 	mutex_lock(&trace_types_lock);
9092 	ret = set_tracer_flag(tr, 1 << index, val);
9093 	mutex_unlock(&trace_types_lock);
9094 	mutex_unlock(&event_mutex);
9095 
9096 	if (ret < 0)
9097 		return ret;
9098 
9099 	*ppos += cnt;
9100 
9101 	return cnt;
9102 }
9103 
9104 static const struct file_operations trace_options_core_fops = {
9105 	.open = tracing_open_generic,
9106 	.read = trace_options_core_read,
9107 	.write = trace_options_core_write,
9108 	.llseek = generic_file_llseek,
9109 };
9110 
9111 struct dentry *trace_create_file(const char *name,
9112 				 umode_t mode,
9113 				 struct dentry *parent,
9114 				 void *data,
9115 				 const struct file_operations *fops)
9116 {
9117 	struct dentry *ret;
9118 
9119 	ret = tracefs_create_file(name, mode, parent, data, fops);
9120 	if (!ret)
9121 		pr_warn("Could not create tracefs '%s' entry\n", name);
9122 
9123 	return ret;
9124 }
9125 
9126 
9127 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9128 {
9129 	struct dentry *d_tracer;
9130 
9131 	if (tr->options)
9132 		return tr->options;
9133 
9134 	d_tracer = tracing_get_dentry(tr);
9135 	if (IS_ERR(d_tracer))
9136 		return NULL;
9137 
9138 	tr->options = tracefs_create_dir("options", d_tracer);
9139 	if (!tr->options) {
9140 		pr_warn("Could not create tracefs directory 'options'\n");
9141 		return NULL;
9142 	}
9143 
9144 	return tr->options;
9145 }
9146 
9147 static void
9148 create_trace_option_file(struct trace_array *tr,
9149 			 struct trace_option_dentry *topt,
9150 			 struct tracer_flags *flags,
9151 			 struct tracer_opt *opt)
9152 {
9153 	struct dentry *t_options;
9154 
9155 	t_options = trace_options_init_dentry(tr);
9156 	if (!t_options)
9157 		return;
9158 
9159 	topt->flags = flags;
9160 	topt->opt = opt;
9161 	topt->tr = tr;
9162 
9163 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9164 					t_options, topt, &trace_options_fops);
9165 
9166 }
9167 
9168 static void
9169 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9170 {
9171 	struct trace_option_dentry *topts;
9172 	struct trace_options *tr_topts;
9173 	struct tracer_flags *flags;
9174 	struct tracer_opt *opts;
9175 	int cnt;
9176 	int i;
9177 
9178 	if (!tracer)
9179 		return;
9180 
9181 	flags = tracer->flags;
9182 
9183 	if (!flags || !flags->opts)
9184 		return;
9185 
9186 	/*
9187 	 * If this is an instance, only create flags for tracers
9188 	 * the instance may have.
9189 	 */
9190 	if (!trace_ok_for_array(tracer, tr))
9191 		return;
9192 
9193 	for (i = 0; i < tr->nr_topts; i++) {
9194 		/* Make sure there's no duplicate flags. */
9195 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9196 			return;
9197 	}
9198 
9199 	opts = flags->opts;
9200 
9201 	for (cnt = 0; opts[cnt].name; cnt++)
9202 		;
9203 
9204 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9205 	if (!topts)
9206 		return;
9207 
9208 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9209 			    GFP_KERNEL);
9210 	if (!tr_topts) {
9211 		kfree(topts);
9212 		return;
9213 	}
9214 
9215 	tr->topts = tr_topts;
9216 	tr->topts[tr->nr_topts].tracer = tracer;
9217 	tr->topts[tr->nr_topts].topts = topts;
9218 	tr->nr_topts++;
9219 
9220 	for (cnt = 0; opts[cnt].name; cnt++) {
9221 		create_trace_option_file(tr, &topts[cnt], flags,
9222 					 &opts[cnt]);
9223 		MEM_FAIL(topts[cnt].entry == NULL,
9224 			  "Failed to create trace option: %s",
9225 			  opts[cnt].name);
9226 	}
9227 }
9228 
9229 static struct dentry *
9230 create_trace_option_core_file(struct trace_array *tr,
9231 			      const char *option, long index)
9232 {
9233 	struct dentry *t_options;
9234 
9235 	t_options = trace_options_init_dentry(tr);
9236 	if (!t_options)
9237 		return NULL;
9238 
9239 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9240 				 (void *)&tr->trace_flags_index[index],
9241 				 &trace_options_core_fops);
9242 }
9243 
9244 static void create_trace_options_dir(struct trace_array *tr)
9245 {
9246 	struct dentry *t_options;
9247 	bool top_level = tr == &global_trace;
9248 	int i;
9249 
9250 	t_options = trace_options_init_dentry(tr);
9251 	if (!t_options)
9252 		return;
9253 
9254 	for (i = 0; trace_options[i]; i++) {
9255 		if (top_level ||
9256 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9257 			create_trace_option_core_file(tr, trace_options[i], i);
9258 	}
9259 }
9260 
9261 static ssize_t
9262 rb_simple_read(struct file *filp, char __user *ubuf,
9263 	       size_t cnt, loff_t *ppos)
9264 {
9265 	struct trace_array *tr = filp->private_data;
9266 	char buf[64];
9267 	int r;
9268 
9269 	r = tracer_tracing_is_on(tr);
9270 	r = sprintf(buf, "%d\n", r);
9271 
9272 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9273 }
9274 
9275 static ssize_t
9276 rb_simple_write(struct file *filp, const char __user *ubuf,
9277 		size_t cnt, loff_t *ppos)
9278 {
9279 	struct trace_array *tr = filp->private_data;
9280 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9281 	unsigned long val;
9282 	int ret;
9283 
9284 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9285 	if (ret)
9286 		return ret;
9287 
9288 	if (buffer) {
9289 		mutex_lock(&trace_types_lock);
9290 		if (!!val == tracer_tracing_is_on(tr)) {
9291 			val = 0; /* do nothing */
9292 		} else if (val) {
9293 			tracer_tracing_on(tr);
9294 			if (tr->current_trace->start)
9295 				tr->current_trace->start(tr);
9296 		} else {
9297 			tracer_tracing_off(tr);
9298 			if (tr->current_trace->stop)
9299 				tr->current_trace->stop(tr);
9300 			/* Wake up any waiters */
9301 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9302 		}
9303 		mutex_unlock(&trace_types_lock);
9304 	}
9305 
9306 	(*ppos)++;
9307 
9308 	return cnt;
9309 }
9310 
9311 static const struct file_operations rb_simple_fops = {
9312 	.open		= tracing_open_generic_tr,
9313 	.read		= rb_simple_read,
9314 	.write		= rb_simple_write,
9315 	.release	= tracing_release_generic_tr,
9316 	.llseek		= default_llseek,
9317 };
9318 
9319 static ssize_t
9320 buffer_percent_read(struct file *filp, char __user *ubuf,
9321 		    size_t cnt, loff_t *ppos)
9322 {
9323 	struct trace_array *tr = filp->private_data;
9324 	char buf[64];
9325 	int r;
9326 
9327 	r = tr->buffer_percent;
9328 	r = sprintf(buf, "%d\n", r);
9329 
9330 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9331 }
9332 
9333 static ssize_t
9334 buffer_percent_write(struct file *filp, const char __user *ubuf,
9335 		     size_t cnt, loff_t *ppos)
9336 {
9337 	struct trace_array *tr = filp->private_data;
9338 	unsigned long val;
9339 	int ret;
9340 
9341 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9342 	if (ret)
9343 		return ret;
9344 
9345 	if (val > 100)
9346 		return -EINVAL;
9347 
9348 	tr->buffer_percent = val;
9349 
9350 	(*ppos)++;
9351 
9352 	return cnt;
9353 }
9354 
9355 static const struct file_operations buffer_percent_fops = {
9356 	.open		= tracing_open_generic_tr,
9357 	.read		= buffer_percent_read,
9358 	.write		= buffer_percent_write,
9359 	.release	= tracing_release_generic_tr,
9360 	.llseek		= default_llseek,
9361 };
9362 
9363 static struct dentry *trace_instance_dir;
9364 
9365 static void
9366 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9367 
9368 static int
9369 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9370 {
9371 	enum ring_buffer_flags rb_flags;
9372 
9373 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9374 
9375 	buf->tr = tr;
9376 
9377 	buf->buffer = ring_buffer_alloc(size, rb_flags);
9378 	if (!buf->buffer)
9379 		return -ENOMEM;
9380 
9381 	buf->data = alloc_percpu(struct trace_array_cpu);
9382 	if (!buf->data) {
9383 		ring_buffer_free(buf->buffer);
9384 		buf->buffer = NULL;
9385 		return -ENOMEM;
9386 	}
9387 
9388 	/* Allocate the first page for all buffers */
9389 	set_buffer_entries(&tr->array_buffer,
9390 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9391 
9392 	return 0;
9393 }
9394 
9395 static void free_trace_buffer(struct array_buffer *buf)
9396 {
9397 	if (buf->buffer) {
9398 		ring_buffer_free(buf->buffer);
9399 		buf->buffer = NULL;
9400 		free_percpu(buf->data);
9401 		buf->data = NULL;
9402 	}
9403 }
9404 
9405 static int allocate_trace_buffers(struct trace_array *tr, int size)
9406 {
9407 	int ret;
9408 
9409 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9410 	if (ret)
9411 		return ret;
9412 
9413 #ifdef CONFIG_TRACER_MAX_TRACE
9414 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9415 				    allocate_snapshot ? size : 1);
9416 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9417 		free_trace_buffer(&tr->array_buffer);
9418 		return -ENOMEM;
9419 	}
9420 	tr->allocated_snapshot = allocate_snapshot;
9421 
9422 	allocate_snapshot = false;
9423 #endif
9424 
9425 	return 0;
9426 }
9427 
9428 static void free_trace_buffers(struct trace_array *tr)
9429 {
9430 	if (!tr)
9431 		return;
9432 
9433 	free_trace_buffer(&tr->array_buffer);
9434 
9435 #ifdef CONFIG_TRACER_MAX_TRACE
9436 	free_trace_buffer(&tr->max_buffer);
9437 #endif
9438 }
9439 
9440 static void init_trace_flags_index(struct trace_array *tr)
9441 {
9442 	int i;
9443 
9444 	/* Used by the trace options files */
9445 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9446 		tr->trace_flags_index[i] = i;
9447 }
9448 
9449 static void __update_tracer_options(struct trace_array *tr)
9450 {
9451 	struct tracer *t;
9452 
9453 	for (t = trace_types; t; t = t->next)
9454 		add_tracer_options(tr, t);
9455 }
9456 
9457 static void update_tracer_options(struct trace_array *tr)
9458 {
9459 	mutex_lock(&trace_types_lock);
9460 	tracer_options_updated = true;
9461 	__update_tracer_options(tr);
9462 	mutex_unlock(&trace_types_lock);
9463 }
9464 
9465 /* Must have trace_types_lock held */
9466 struct trace_array *trace_array_find(const char *instance)
9467 {
9468 	struct trace_array *tr, *found = NULL;
9469 
9470 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9471 		if (tr->name && strcmp(tr->name, instance) == 0) {
9472 			found = tr;
9473 			break;
9474 		}
9475 	}
9476 
9477 	return found;
9478 }
9479 
9480 struct trace_array *trace_array_find_get(const char *instance)
9481 {
9482 	struct trace_array *tr;
9483 
9484 	mutex_lock(&trace_types_lock);
9485 	tr = trace_array_find(instance);
9486 	if (tr)
9487 		tr->ref++;
9488 	mutex_unlock(&trace_types_lock);
9489 
9490 	return tr;
9491 }
9492 
9493 static int trace_array_create_dir(struct trace_array *tr)
9494 {
9495 	int ret;
9496 
9497 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9498 	if (!tr->dir)
9499 		return -EINVAL;
9500 
9501 	ret = event_trace_add_tracer(tr->dir, tr);
9502 	if (ret) {
9503 		tracefs_remove(tr->dir);
9504 		return ret;
9505 	}
9506 
9507 	init_tracer_tracefs(tr, tr->dir);
9508 	__update_tracer_options(tr);
9509 
9510 	return ret;
9511 }
9512 
9513 static struct trace_array *trace_array_create(const char *name)
9514 {
9515 	struct trace_array *tr;
9516 	int ret;
9517 
9518 	ret = -ENOMEM;
9519 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9520 	if (!tr)
9521 		return ERR_PTR(ret);
9522 
9523 	tr->name = kstrdup(name, GFP_KERNEL);
9524 	if (!tr->name)
9525 		goto out_free_tr;
9526 
9527 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9528 		goto out_free_tr;
9529 
9530 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9531 		goto out_free_tr;
9532 
9533 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9534 
9535 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9536 
9537 	raw_spin_lock_init(&tr->start_lock);
9538 
9539 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9540 
9541 	tr->current_trace = &nop_trace;
9542 
9543 	INIT_LIST_HEAD(&tr->systems);
9544 	INIT_LIST_HEAD(&tr->events);
9545 	INIT_LIST_HEAD(&tr->hist_vars);
9546 	INIT_LIST_HEAD(&tr->err_log);
9547 
9548 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9549 		goto out_free_tr;
9550 
9551 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9552 		goto out_free_tr;
9553 
9554 	ftrace_init_trace_array(tr);
9555 
9556 	init_trace_flags_index(tr);
9557 
9558 	if (trace_instance_dir) {
9559 		ret = trace_array_create_dir(tr);
9560 		if (ret)
9561 			goto out_free_tr;
9562 	} else
9563 		__trace_early_add_events(tr);
9564 
9565 	list_add(&tr->list, &ftrace_trace_arrays);
9566 
9567 	tr->ref++;
9568 
9569 	return tr;
9570 
9571  out_free_tr:
9572 	ftrace_free_ftrace_ops(tr);
9573 	free_trace_buffers(tr);
9574 	free_cpumask_var(tr->pipe_cpumask);
9575 	free_cpumask_var(tr->tracing_cpumask);
9576 	kfree(tr->name);
9577 	kfree(tr);
9578 
9579 	return ERR_PTR(ret);
9580 }
9581 
9582 static int instance_mkdir(const char *name)
9583 {
9584 	struct trace_array *tr;
9585 	int ret;
9586 
9587 	mutex_lock(&event_mutex);
9588 	mutex_lock(&trace_types_lock);
9589 
9590 	ret = -EEXIST;
9591 	if (trace_array_find(name))
9592 		goto out_unlock;
9593 
9594 	tr = trace_array_create(name);
9595 
9596 	ret = PTR_ERR_OR_ZERO(tr);
9597 
9598 out_unlock:
9599 	mutex_unlock(&trace_types_lock);
9600 	mutex_unlock(&event_mutex);
9601 	return ret;
9602 }
9603 
9604 /**
9605  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9606  * @name: The name of the trace array to be looked up/created.
9607  *
9608  * Returns pointer to trace array with given name.
9609  * NULL, if it cannot be created.
9610  *
9611  * NOTE: This function increments the reference counter associated with the
9612  * trace array returned. This makes sure it cannot be freed while in use.
9613  * Use trace_array_put() once the trace array is no longer needed.
9614  * If the trace_array is to be freed, trace_array_destroy() needs to
9615  * be called after the trace_array_put(), or simply let user space delete
9616  * it from the tracefs instances directory. But until the
9617  * trace_array_put() is called, user space can not delete it.
9618  *
9619  */
9620 struct trace_array *trace_array_get_by_name(const char *name)
9621 {
9622 	struct trace_array *tr;
9623 
9624 	mutex_lock(&event_mutex);
9625 	mutex_lock(&trace_types_lock);
9626 
9627 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9628 		if (tr->name && strcmp(tr->name, name) == 0)
9629 			goto out_unlock;
9630 	}
9631 
9632 	tr = trace_array_create(name);
9633 
9634 	if (IS_ERR(tr))
9635 		tr = NULL;
9636 out_unlock:
9637 	if (tr)
9638 		tr->ref++;
9639 
9640 	mutex_unlock(&trace_types_lock);
9641 	mutex_unlock(&event_mutex);
9642 	return tr;
9643 }
9644 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9645 
9646 static int __remove_instance(struct trace_array *tr)
9647 {
9648 	int i;
9649 
9650 	/* Reference counter for a newly created trace array = 1. */
9651 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9652 		return -EBUSY;
9653 
9654 	list_del(&tr->list);
9655 
9656 	/* Disable all the flags that were enabled coming in */
9657 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9658 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9659 			set_tracer_flag(tr, 1 << i, 0);
9660 	}
9661 
9662 	tracing_set_nop(tr);
9663 	clear_ftrace_function_probes(tr);
9664 	event_trace_del_tracer(tr);
9665 	ftrace_clear_pids(tr);
9666 	ftrace_destroy_function_files(tr);
9667 	tracefs_remove(tr->dir);
9668 	free_percpu(tr->last_func_repeats);
9669 	free_trace_buffers(tr);
9670 	clear_tracing_err_log(tr);
9671 
9672 	for (i = 0; i < tr->nr_topts; i++) {
9673 		kfree(tr->topts[i].topts);
9674 	}
9675 	kfree(tr->topts);
9676 
9677 	free_cpumask_var(tr->pipe_cpumask);
9678 	free_cpumask_var(tr->tracing_cpumask);
9679 	kfree(tr->name);
9680 	kfree(tr);
9681 
9682 	return 0;
9683 }
9684 
9685 int trace_array_destroy(struct trace_array *this_tr)
9686 {
9687 	struct trace_array *tr;
9688 	int ret;
9689 
9690 	if (!this_tr)
9691 		return -EINVAL;
9692 
9693 	mutex_lock(&event_mutex);
9694 	mutex_lock(&trace_types_lock);
9695 
9696 	ret = -ENODEV;
9697 
9698 	/* Making sure trace array exists before destroying it. */
9699 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9700 		if (tr == this_tr) {
9701 			ret = __remove_instance(tr);
9702 			break;
9703 		}
9704 	}
9705 
9706 	mutex_unlock(&trace_types_lock);
9707 	mutex_unlock(&event_mutex);
9708 
9709 	return ret;
9710 }
9711 EXPORT_SYMBOL_GPL(trace_array_destroy);
9712 
9713 static int instance_rmdir(const char *name)
9714 {
9715 	struct trace_array *tr;
9716 	int ret;
9717 
9718 	mutex_lock(&event_mutex);
9719 	mutex_lock(&trace_types_lock);
9720 
9721 	ret = -ENODEV;
9722 	tr = trace_array_find(name);
9723 	if (tr)
9724 		ret = __remove_instance(tr);
9725 
9726 	mutex_unlock(&trace_types_lock);
9727 	mutex_unlock(&event_mutex);
9728 
9729 	return ret;
9730 }
9731 
9732 static __init void create_trace_instances(struct dentry *d_tracer)
9733 {
9734 	struct trace_array *tr;
9735 
9736 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9737 							 instance_mkdir,
9738 							 instance_rmdir);
9739 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9740 		return;
9741 
9742 	mutex_lock(&event_mutex);
9743 	mutex_lock(&trace_types_lock);
9744 
9745 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9746 		if (!tr->name)
9747 			continue;
9748 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9749 			     "Failed to create instance directory\n"))
9750 			break;
9751 	}
9752 
9753 	mutex_unlock(&trace_types_lock);
9754 	mutex_unlock(&event_mutex);
9755 }
9756 
9757 static void
9758 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9759 {
9760 	struct trace_event_file *file;
9761 	int cpu;
9762 
9763 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9764 			tr, &show_traces_fops);
9765 
9766 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9767 			tr, &set_tracer_fops);
9768 
9769 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9770 			  tr, &tracing_cpumask_fops);
9771 
9772 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9773 			  tr, &tracing_iter_fops);
9774 
9775 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9776 			  tr, &tracing_fops);
9777 
9778 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9779 			  tr, &tracing_pipe_fops);
9780 
9781 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9782 			  tr, &tracing_entries_fops);
9783 
9784 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9785 			  tr, &tracing_total_entries_fops);
9786 
9787 	trace_create_file("free_buffer", 0200, d_tracer,
9788 			  tr, &tracing_free_buffer_fops);
9789 
9790 	trace_create_file("trace_marker", 0220, d_tracer,
9791 			  tr, &tracing_mark_fops);
9792 
9793 	file = __find_event_file(tr, "ftrace", "print");
9794 	if (file && file->ef)
9795 		eventfs_add_file("trigger", TRACE_MODE_WRITE, file->ef,
9796 				  file, &event_trigger_fops);
9797 	tr->trace_marker_file = file;
9798 
9799 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9800 			  tr, &tracing_mark_raw_fops);
9801 
9802 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9803 			  &trace_clock_fops);
9804 
9805 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9806 			  tr, &rb_simple_fops);
9807 
9808 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9809 			  &trace_time_stamp_mode_fops);
9810 
9811 	tr->buffer_percent = 50;
9812 
9813 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9814 			tr, &buffer_percent_fops);
9815 
9816 	create_trace_options_dir(tr);
9817 
9818 #ifdef CONFIG_TRACER_MAX_TRACE
9819 	trace_create_maxlat_file(tr, d_tracer);
9820 #endif
9821 
9822 	if (ftrace_create_function_files(tr, d_tracer))
9823 		MEM_FAIL(1, "Could not allocate function filter files");
9824 
9825 #ifdef CONFIG_TRACER_SNAPSHOT
9826 	trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9827 			  tr, &snapshot_fops);
9828 #endif
9829 
9830 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9831 			  tr, &tracing_err_log_fops);
9832 
9833 	for_each_tracing_cpu(cpu)
9834 		tracing_init_tracefs_percpu(tr, cpu);
9835 
9836 	ftrace_init_tracefs(tr, d_tracer);
9837 }
9838 
9839 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9840 {
9841 	struct vfsmount *mnt;
9842 	struct file_system_type *type;
9843 
9844 	/*
9845 	 * To maintain backward compatibility for tools that mount
9846 	 * debugfs to get to the tracing facility, tracefs is automatically
9847 	 * mounted to the debugfs/tracing directory.
9848 	 */
9849 	type = get_fs_type("tracefs");
9850 	if (!type)
9851 		return NULL;
9852 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9853 	put_filesystem(type);
9854 	if (IS_ERR(mnt))
9855 		return NULL;
9856 	mntget(mnt);
9857 
9858 	return mnt;
9859 }
9860 
9861 /**
9862  * tracing_init_dentry - initialize top level trace array
9863  *
9864  * This is called when creating files or directories in the tracing
9865  * directory. It is called via fs_initcall() by any of the boot up code
9866  * and expects to return the dentry of the top level tracing directory.
9867  */
9868 int tracing_init_dentry(void)
9869 {
9870 	struct trace_array *tr = &global_trace;
9871 
9872 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9873 		pr_warn("Tracing disabled due to lockdown\n");
9874 		return -EPERM;
9875 	}
9876 
9877 	/* The top level trace array uses  NULL as parent */
9878 	if (tr->dir)
9879 		return 0;
9880 
9881 	if (WARN_ON(!tracefs_initialized()))
9882 		return -ENODEV;
9883 
9884 	/*
9885 	 * As there may still be users that expect the tracing
9886 	 * files to exist in debugfs/tracing, we must automount
9887 	 * the tracefs file system there, so older tools still
9888 	 * work with the newer kernel.
9889 	 */
9890 	tr->dir = debugfs_create_automount("tracing", NULL,
9891 					   trace_automount, NULL);
9892 
9893 	return 0;
9894 }
9895 
9896 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9897 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9898 
9899 static struct workqueue_struct *eval_map_wq __initdata;
9900 static struct work_struct eval_map_work __initdata;
9901 static struct work_struct tracerfs_init_work __initdata;
9902 
9903 static void __init eval_map_work_func(struct work_struct *work)
9904 {
9905 	int len;
9906 
9907 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9908 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9909 }
9910 
9911 static int __init trace_eval_init(void)
9912 {
9913 	INIT_WORK(&eval_map_work, eval_map_work_func);
9914 
9915 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9916 	if (!eval_map_wq) {
9917 		pr_err("Unable to allocate eval_map_wq\n");
9918 		/* Do work here */
9919 		eval_map_work_func(&eval_map_work);
9920 		return -ENOMEM;
9921 	}
9922 
9923 	queue_work(eval_map_wq, &eval_map_work);
9924 	return 0;
9925 }
9926 
9927 subsys_initcall(trace_eval_init);
9928 
9929 static int __init trace_eval_sync(void)
9930 {
9931 	/* Make sure the eval map updates are finished */
9932 	if (eval_map_wq)
9933 		destroy_workqueue(eval_map_wq);
9934 	return 0;
9935 }
9936 
9937 late_initcall_sync(trace_eval_sync);
9938 
9939 
9940 #ifdef CONFIG_MODULES
9941 static void trace_module_add_evals(struct module *mod)
9942 {
9943 	if (!mod->num_trace_evals)
9944 		return;
9945 
9946 	/*
9947 	 * Modules with bad taint do not have events created, do
9948 	 * not bother with enums either.
9949 	 */
9950 	if (trace_module_has_bad_taint(mod))
9951 		return;
9952 
9953 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9954 }
9955 
9956 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9957 static void trace_module_remove_evals(struct module *mod)
9958 {
9959 	union trace_eval_map_item *map;
9960 	union trace_eval_map_item **last = &trace_eval_maps;
9961 
9962 	if (!mod->num_trace_evals)
9963 		return;
9964 
9965 	mutex_lock(&trace_eval_mutex);
9966 
9967 	map = trace_eval_maps;
9968 
9969 	while (map) {
9970 		if (map->head.mod == mod)
9971 			break;
9972 		map = trace_eval_jmp_to_tail(map);
9973 		last = &map->tail.next;
9974 		map = map->tail.next;
9975 	}
9976 	if (!map)
9977 		goto out;
9978 
9979 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9980 	kfree(map);
9981  out:
9982 	mutex_unlock(&trace_eval_mutex);
9983 }
9984 #else
9985 static inline void trace_module_remove_evals(struct module *mod) { }
9986 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9987 
9988 static int trace_module_notify(struct notifier_block *self,
9989 			       unsigned long val, void *data)
9990 {
9991 	struct module *mod = data;
9992 
9993 	switch (val) {
9994 	case MODULE_STATE_COMING:
9995 		trace_module_add_evals(mod);
9996 		break;
9997 	case MODULE_STATE_GOING:
9998 		trace_module_remove_evals(mod);
9999 		break;
10000 	}
10001 
10002 	return NOTIFY_OK;
10003 }
10004 
10005 static struct notifier_block trace_module_nb = {
10006 	.notifier_call = trace_module_notify,
10007 	.priority = 0,
10008 };
10009 #endif /* CONFIG_MODULES */
10010 
10011 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10012 {
10013 
10014 	event_trace_init();
10015 
10016 	init_tracer_tracefs(&global_trace, NULL);
10017 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10018 
10019 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10020 			&global_trace, &tracing_thresh_fops);
10021 
10022 	trace_create_file("README", TRACE_MODE_READ, NULL,
10023 			NULL, &tracing_readme_fops);
10024 
10025 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10026 			NULL, &tracing_saved_cmdlines_fops);
10027 
10028 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10029 			  NULL, &tracing_saved_cmdlines_size_fops);
10030 
10031 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10032 			NULL, &tracing_saved_tgids_fops);
10033 
10034 	trace_create_eval_file(NULL);
10035 
10036 #ifdef CONFIG_MODULES
10037 	register_module_notifier(&trace_module_nb);
10038 #endif
10039 
10040 #ifdef CONFIG_DYNAMIC_FTRACE
10041 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10042 			NULL, &tracing_dyn_info_fops);
10043 #endif
10044 
10045 	create_trace_instances(NULL);
10046 
10047 	update_tracer_options(&global_trace);
10048 }
10049 
10050 static __init int tracer_init_tracefs(void)
10051 {
10052 	int ret;
10053 
10054 	trace_access_lock_init();
10055 
10056 	ret = tracing_init_dentry();
10057 	if (ret)
10058 		return 0;
10059 
10060 	if (eval_map_wq) {
10061 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10062 		queue_work(eval_map_wq, &tracerfs_init_work);
10063 	} else {
10064 		tracer_init_tracefs_work_func(NULL);
10065 	}
10066 
10067 	rv_init_interface();
10068 
10069 	return 0;
10070 }
10071 
10072 fs_initcall(tracer_init_tracefs);
10073 
10074 static int trace_die_panic_handler(struct notifier_block *self,
10075 				unsigned long ev, void *unused);
10076 
10077 static struct notifier_block trace_panic_notifier = {
10078 	.notifier_call = trace_die_panic_handler,
10079 	.priority = INT_MAX - 1,
10080 };
10081 
10082 static struct notifier_block trace_die_notifier = {
10083 	.notifier_call = trace_die_panic_handler,
10084 	.priority = INT_MAX - 1,
10085 };
10086 
10087 /*
10088  * The idea is to execute the following die/panic callback early, in order
10089  * to avoid showing irrelevant information in the trace (like other panic
10090  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10091  * warnings get disabled (to prevent potential log flooding).
10092  */
10093 static int trace_die_panic_handler(struct notifier_block *self,
10094 				unsigned long ev, void *unused)
10095 {
10096 	if (!ftrace_dump_on_oops)
10097 		return NOTIFY_DONE;
10098 
10099 	/* The die notifier requires DIE_OOPS to trigger */
10100 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10101 		return NOTIFY_DONE;
10102 
10103 	ftrace_dump(ftrace_dump_on_oops);
10104 
10105 	return NOTIFY_DONE;
10106 }
10107 
10108 /*
10109  * printk is set to max of 1024, we really don't need it that big.
10110  * Nothing should be printing 1000 characters anyway.
10111  */
10112 #define TRACE_MAX_PRINT		1000
10113 
10114 /*
10115  * Define here KERN_TRACE so that we have one place to modify
10116  * it if we decide to change what log level the ftrace dump
10117  * should be at.
10118  */
10119 #define KERN_TRACE		KERN_EMERG
10120 
10121 void
10122 trace_printk_seq(struct trace_seq *s)
10123 {
10124 	/* Probably should print a warning here. */
10125 	if (s->seq.len >= TRACE_MAX_PRINT)
10126 		s->seq.len = TRACE_MAX_PRINT;
10127 
10128 	/*
10129 	 * More paranoid code. Although the buffer size is set to
10130 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10131 	 * an extra layer of protection.
10132 	 */
10133 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10134 		s->seq.len = s->seq.size - 1;
10135 
10136 	/* should be zero ended, but we are paranoid. */
10137 	s->buffer[s->seq.len] = 0;
10138 
10139 	printk(KERN_TRACE "%s", s->buffer);
10140 
10141 	trace_seq_init(s);
10142 }
10143 
10144 void trace_init_global_iter(struct trace_iterator *iter)
10145 {
10146 	iter->tr = &global_trace;
10147 	iter->trace = iter->tr->current_trace;
10148 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10149 	iter->array_buffer = &global_trace.array_buffer;
10150 
10151 	if (iter->trace && iter->trace->open)
10152 		iter->trace->open(iter);
10153 
10154 	/* Annotate start of buffers if we had overruns */
10155 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10156 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10157 
10158 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10159 	if (trace_clocks[iter->tr->clock_id].in_ns)
10160 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10161 
10162 	/* Can not use kmalloc for iter.temp and iter.fmt */
10163 	iter->temp = static_temp_buf;
10164 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10165 	iter->fmt = static_fmt_buf;
10166 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10167 }
10168 
10169 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10170 {
10171 	/* use static because iter can be a bit big for the stack */
10172 	static struct trace_iterator iter;
10173 	static atomic_t dump_running;
10174 	struct trace_array *tr = &global_trace;
10175 	unsigned int old_userobj;
10176 	unsigned long flags;
10177 	int cnt = 0, cpu;
10178 
10179 	/* Only allow one dump user at a time. */
10180 	if (atomic_inc_return(&dump_running) != 1) {
10181 		atomic_dec(&dump_running);
10182 		return;
10183 	}
10184 
10185 	/*
10186 	 * Always turn off tracing when we dump.
10187 	 * We don't need to show trace output of what happens
10188 	 * between multiple crashes.
10189 	 *
10190 	 * If the user does a sysrq-z, then they can re-enable
10191 	 * tracing with echo 1 > tracing_on.
10192 	 */
10193 	tracing_off();
10194 
10195 	local_irq_save(flags);
10196 
10197 	/* Simulate the iterator */
10198 	trace_init_global_iter(&iter);
10199 
10200 	for_each_tracing_cpu(cpu) {
10201 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10202 	}
10203 
10204 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10205 
10206 	/* don't look at user memory in panic mode */
10207 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10208 
10209 	switch (oops_dump_mode) {
10210 	case DUMP_ALL:
10211 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10212 		break;
10213 	case DUMP_ORIG:
10214 		iter.cpu_file = raw_smp_processor_id();
10215 		break;
10216 	case DUMP_NONE:
10217 		goto out_enable;
10218 	default:
10219 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10220 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10221 	}
10222 
10223 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
10224 
10225 	/* Did function tracer already get disabled? */
10226 	if (ftrace_is_dead()) {
10227 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10228 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10229 	}
10230 
10231 	/*
10232 	 * We need to stop all tracing on all CPUS to read
10233 	 * the next buffer. This is a bit expensive, but is
10234 	 * not done often. We fill all what we can read,
10235 	 * and then release the locks again.
10236 	 */
10237 
10238 	while (!trace_empty(&iter)) {
10239 
10240 		if (!cnt)
10241 			printk(KERN_TRACE "---------------------------------\n");
10242 
10243 		cnt++;
10244 
10245 		trace_iterator_reset(&iter);
10246 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10247 
10248 		if (trace_find_next_entry_inc(&iter) != NULL) {
10249 			int ret;
10250 
10251 			ret = print_trace_line(&iter);
10252 			if (ret != TRACE_TYPE_NO_CONSUME)
10253 				trace_consume(&iter);
10254 		}
10255 		touch_nmi_watchdog();
10256 
10257 		trace_printk_seq(&iter.seq);
10258 	}
10259 
10260 	if (!cnt)
10261 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10262 	else
10263 		printk(KERN_TRACE "---------------------------------\n");
10264 
10265  out_enable:
10266 	tr->trace_flags |= old_userobj;
10267 
10268 	for_each_tracing_cpu(cpu) {
10269 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10270 	}
10271 	atomic_dec(&dump_running);
10272 	local_irq_restore(flags);
10273 }
10274 EXPORT_SYMBOL_GPL(ftrace_dump);
10275 
10276 #define WRITE_BUFSIZE  4096
10277 
10278 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10279 				size_t count, loff_t *ppos,
10280 				int (*createfn)(const char *))
10281 {
10282 	char *kbuf, *buf, *tmp;
10283 	int ret = 0;
10284 	size_t done = 0;
10285 	size_t size;
10286 
10287 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10288 	if (!kbuf)
10289 		return -ENOMEM;
10290 
10291 	while (done < count) {
10292 		size = count - done;
10293 
10294 		if (size >= WRITE_BUFSIZE)
10295 			size = WRITE_BUFSIZE - 1;
10296 
10297 		if (copy_from_user(kbuf, buffer + done, size)) {
10298 			ret = -EFAULT;
10299 			goto out;
10300 		}
10301 		kbuf[size] = '\0';
10302 		buf = kbuf;
10303 		do {
10304 			tmp = strchr(buf, '\n');
10305 			if (tmp) {
10306 				*tmp = '\0';
10307 				size = tmp - buf + 1;
10308 			} else {
10309 				size = strlen(buf);
10310 				if (done + size < count) {
10311 					if (buf != kbuf)
10312 						break;
10313 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10314 					pr_warn("Line length is too long: Should be less than %d\n",
10315 						WRITE_BUFSIZE - 2);
10316 					ret = -EINVAL;
10317 					goto out;
10318 				}
10319 			}
10320 			done += size;
10321 
10322 			/* Remove comments */
10323 			tmp = strchr(buf, '#');
10324 
10325 			if (tmp)
10326 				*tmp = '\0';
10327 
10328 			ret = createfn(buf);
10329 			if (ret)
10330 				goto out;
10331 			buf += size;
10332 
10333 		} while (done < count);
10334 	}
10335 	ret = done;
10336 
10337 out:
10338 	kfree(kbuf);
10339 
10340 	return ret;
10341 }
10342 
10343 #ifdef CONFIG_TRACER_MAX_TRACE
10344 __init static bool tr_needs_alloc_snapshot(const char *name)
10345 {
10346 	char *test;
10347 	int len = strlen(name);
10348 	bool ret;
10349 
10350 	if (!boot_snapshot_index)
10351 		return false;
10352 
10353 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10354 	    boot_snapshot_info[len] == '\t')
10355 		return true;
10356 
10357 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10358 	if (!test)
10359 		return false;
10360 
10361 	sprintf(test, "\t%s\t", name);
10362 	ret = strstr(boot_snapshot_info, test) == NULL;
10363 	kfree(test);
10364 	return ret;
10365 }
10366 
10367 __init static void do_allocate_snapshot(const char *name)
10368 {
10369 	if (!tr_needs_alloc_snapshot(name))
10370 		return;
10371 
10372 	/*
10373 	 * When allocate_snapshot is set, the next call to
10374 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10375 	 * will allocate the snapshot buffer. That will alse clear
10376 	 * this flag.
10377 	 */
10378 	allocate_snapshot = true;
10379 }
10380 #else
10381 static inline void do_allocate_snapshot(const char *name) { }
10382 #endif
10383 
10384 __init static void enable_instances(void)
10385 {
10386 	struct trace_array *tr;
10387 	char *curr_str;
10388 	char *str;
10389 	char *tok;
10390 
10391 	/* A tab is always appended */
10392 	boot_instance_info[boot_instance_index - 1] = '\0';
10393 	str = boot_instance_info;
10394 
10395 	while ((curr_str = strsep(&str, "\t"))) {
10396 
10397 		tok = strsep(&curr_str, ",");
10398 
10399 		if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10400 			do_allocate_snapshot(tok);
10401 
10402 		tr = trace_array_get_by_name(tok);
10403 		if (!tr) {
10404 			pr_warn("Failed to create instance buffer %s\n", curr_str);
10405 			continue;
10406 		}
10407 		/* Allow user space to delete it */
10408 		trace_array_put(tr);
10409 
10410 		while ((tok = strsep(&curr_str, ","))) {
10411 			early_enable_events(tr, tok, true);
10412 		}
10413 	}
10414 }
10415 
10416 __init static int tracer_alloc_buffers(void)
10417 {
10418 	int ring_buf_size;
10419 	int ret = -ENOMEM;
10420 
10421 
10422 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10423 		pr_warn("Tracing disabled due to lockdown\n");
10424 		return -EPERM;
10425 	}
10426 
10427 	/*
10428 	 * Make sure we don't accidentally add more trace options
10429 	 * than we have bits for.
10430 	 */
10431 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10432 
10433 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10434 		goto out;
10435 
10436 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10437 		goto out_free_buffer_mask;
10438 
10439 	/* Only allocate trace_printk buffers if a trace_printk exists */
10440 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10441 		/* Must be called before global_trace.buffer is allocated */
10442 		trace_printk_init_buffers();
10443 
10444 	/* To save memory, keep the ring buffer size to its minimum */
10445 	if (ring_buffer_expanded)
10446 		ring_buf_size = trace_buf_size;
10447 	else
10448 		ring_buf_size = 1;
10449 
10450 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10451 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10452 
10453 	raw_spin_lock_init(&global_trace.start_lock);
10454 
10455 	/*
10456 	 * The prepare callbacks allocates some memory for the ring buffer. We
10457 	 * don't free the buffer if the CPU goes down. If we were to free
10458 	 * the buffer, then the user would lose any trace that was in the
10459 	 * buffer. The memory will be removed once the "instance" is removed.
10460 	 */
10461 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10462 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10463 				      NULL);
10464 	if (ret < 0)
10465 		goto out_free_cpumask;
10466 	/* Used for event triggers */
10467 	ret = -ENOMEM;
10468 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10469 	if (!temp_buffer)
10470 		goto out_rm_hp_state;
10471 
10472 	if (trace_create_savedcmd() < 0)
10473 		goto out_free_temp_buffer;
10474 
10475 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10476 		goto out_free_savedcmd;
10477 
10478 	/* TODO: make the number of buffers hot pluggable with CPUS */
10479 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10480 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10481 		goto out_free_pipe_cpumask;
10482 	}
10483 	if (global_trace.buffer_disabled)
10484 		tracing_off();
10485 
10486 	if (trace_boot_clock) {
10487 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10488 		if (ret < 0)
10489 			pr_warn("Trace clock %s not defined, going back to default\n",
10490 				trace_boot_clock);
10491 	}
10492 
10493 	/*
10494 	 * register_tracer() might reference current_trace, so it
10495 	 * needs to be set before we register anything. This is
10496 	 * just a bootstrap of current_trace anyway.
10497 	 */
10498 	global_trace.current_trace = &nop_trace;
10499 
10500 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10501 
10502 	ftrace_init_global_array_ops(&global_trace);
10503 
10504 	init_trace_flags_index(&global_trace);
10505 
10506 	register_tracer(&nop_trace);
10507 
10508 	/* Function tracing may start here (via kernel command line) */
10509 	init_function_trace();
10510 
10511 	/* All seems OK, enable tracing */
10512 	tracing_disabled = 0;
10513 
10514 	atomic_notifier_chain_register(&panic_notifier_list,
10515 				       &trace_panic_notifier);
10516 
10517 	register_die_notifier(&trace_die_notifier);
10518 
10519 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10520 
10521 	INIT_LIST_HEAD(&global_trace.systems);
10522 	INIT_LIST_HEAD(&global_trace.events);
10523 	INIT_LIST_HEAD(&global_trace.hist_vars);
10524 	INIT_LIST_HEAD(&global_trace.err_log);
10525 	list_add(&global_trace.list, &ftrace_trace_arrays);
10526 
10527 	apply_trace_boot_options();
10528 
10529 	register_snapshot_cmd();
10530 
10531 	test_can_verify();
10532 
10533 	return 0;
10534 
10535 out_free_pipe_cpumask:
10536 	free_cpumask_var(global_trace.pipe_cpumask);
10537 out_free_savedcmd:
10538 	free_saved_cmdlines_buffer(savedcmd);
10539 out_free_temp_buffer:
10540 	ring_buffer_free(temp_buffer);
10541 out_rm_hp_state:
10542 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10543 out_free_cpumask:
10544 	free_cpumask_var(global_trace.tracing_cpumask);
10545 out_free_buffer_mask:
10546 	free_cpumask_var(tracing_buffer_mask);
10547 out:
10548 	return ret;
10549 }
10550 
10551 void __init ftrace_boot_snapshot(void)
10552 {
10553 #ifdef CONFIG_TRACER_MAX_TRACE
10554 	struct trace_array *tr;
10555 
10556 	if (!snapshot_at_boot)
10557 		return;
10558 
10559 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10560 		if (!tr->allocated_snapshot)
10561 			continue;
10562 
10563 		tracing_snapshot_instance(tr);
10564 		trace_array_puts(tr, "** Boot snapshot taken **\n");
10565 	}
10566 #endif
10567 }
10568 
10569 void __init early_trace_init(void)
10570 {
10571 	if (tracepoint_printk) {
10572 		tracepoint_print_iter =
10573 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10574 		if (MEM_FAIL(!tracepoint_print_iter,
10575 			     "Failed to allocate trace iterator\n"))
10576 			tracepoint_printk = 0;
10577 		else
10578 			static_key_enable(&tracepoint_printk_key.key);
10579 	}
10580 	tracer_alloc_buffers();
10581 
10582 	init_events();
10583 }
10584 
10585 void __init trace_init(void)
10586 {
10587 	trace_event_init();
10588 
10589 	if (boot_instance_index)
10590 		enable_instances();
10591 }
10592 
10593 __init static void clear_boot_tracer(void)
10594 {
10595 	/*
10596 	 * The default tracer at boot buffer is an init section.
10597 	 * This function is called in lateinit. If we did not
10598 	 * find the boot tracer, then clear it out, to prevent
10599 	 * later registration from accessing the buffer that is
10600 	 * about to be freed.
10601 	 */
10602 	if (!default_bootup_tracer)
10603 		return;
10604 
10605 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10606 	       default_bootup_tracer);
10607 	default_bootup_tracer = NULL;
10608 }
10609 
10610 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10611 __init static void tracing_set_default_clock(void)
10612 {
10613 	/* sched_clock_stable() is determined in late_initcall */
10614 	if (!trace_boot_clock && !sched_clock_stable()) {
10615 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10616 			pr_warn("Can not set tracing clock due to lockdown\n");
10617 			return;
10618 		}
10619 
10620 		printk(KERN_WARNING
10621 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10622 		       "If you want to keep using the local clock, then add:\n"
10623 		       "  \"trace_clock=local\"\n"
10624 		       "on the kernel command line\n");
10625 		tracing_set_clock(&global_trace, "global");
10626 	}
10627 }
10628 #else
10629 static inline void tracing_set_default_clock(void) { }
10630 #endif
10631 
10632 __init static int late_trace_init(void)
10633 {
10634 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10635 		static_key_disable(&tracepoint_printk_key.key);
10636 		tracepoint_printk = 0;
10637 	}
10638 
10639 	tracing_set_default_clock();
10640 	clear_boot_tracer();
10641 	return 0;
10642 }
10643 
10644 late_initcall_sync(late_trace_init);
10645