xref: /openbmc/linux/kernel/trace/trace.c (revision 39f555fb)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51 
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
53 
54 #include "trace.h"
55 #include "trace_output.h"
56 
57 /*
58  * On boot up, the ring buffer is set to the minimum size, so that
59  * we do not waste memory on systems that are not using tracing.
60  */
61 bool ring_buffer_expanded;
62 
63 #ifdef CONFIG_FTRACE_STARTUP_TEST
64 /*
65  * We need to change this state when a selftest is running.
66  * A selftest will lurk into the ring-buffer to count the
67  * entries inserted during the selftest although some concurrent
68  * insertions into the ring-buffer such as trace_printk could occurred
69  * at the same time, giving false positive or negative results.
70  */
71 static bool __read_mostly tracing_selftest_running;
72 
73 /*
74  * If boot-time tracing including tracers/events via kernel cmdline
75  * is running, we do not want to run SELFTEST.
76  */
77 bool __read_mostly tracing_selftest_disabled;
78 
79 void __init disable_tracing_selftest(const char *reason)
80 {
81 	if (!tracing_selftest_disabled) {
82 		tracing_selftest_disabled = true;
83 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
84 	}
85 }
86 #else
87 #define tracing_selftest_running	0
88 #define tracing_selftest_disabled	0
89 #endif
90 
91 /* Pipe tracepoints to printk */
92 static struct trace_iterator *tracepoint_print_iter;
93 int tracepoint_printk;
94 static bool tracepoint_printk_stop_on_boot __initdata;
95 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
96 
97 /* For tracers that don't implement custom flags */
98 static struct tracer_opt dummy_tracer_opt[] = {
99 	{ }
100 };
101 
102 static int
103 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
104 {
105 	return 0;
106 }
107 
108 /*
109  * To prevent the comm cache from being overwritten when no
110  * tracing is active, only save the comm when a trace event
111  * occurred.
112  */
113 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
114 
115 /*
116  * Kill all tracing for good (never come back).
117  * It is initialized to 1 but will turn to zero if the initialization
118  * of the tracer is successful. But that is the only place that sets
119  * this back to zero.
120  */
121 static int tracing_disabled = 1;
122 
123 cpumask_var_t __read_mostly	tracing_buffer_mask;
124 
125 /*
126  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
127  *
128  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
129  * is set, then ftrace_dump is called. This will output the contents
130  * of the ftrace buffers to the console.  This is very useful for
131  * capturing traces that lead to crashes and outputing it to a
132  * serial console.
133  *
134  * It is default off, but you can enable it with either specifying
135  * "ftrace_dump_on_oops" in the kernel command line, or setting
136  * /proc/sys/kernel/ftrace_dump_on_oops
137  * Set 1 if you want to dump buffers of all CPUs
138  * Set 2 if you want to dump the buffer of the CPU that triggered oops
139  */
140 
141 enum ftrace_dump_mode ftrace_dump_on_oops;
142 
143 /* When set, tracing will stop when a WARN*() is hit */
144 int __disable_trace_on_warning;
145 
146 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
147 /* Map of enums to their values, for "eval_map" file */
148 struct trace_eval_map_head {
149 	struct module			*mod;
150 	unsigned long			length;
151 };
152 
153 union trace_eval_map_item;
154 
155 struct trace_eval_map_tail {
156 	/*
157 	 * "end" is first and points to NULL as it must be different
158 	 * than "mod" or "eval_string"
159 	 */
160 	union trace_eval_map_item	*next;
161 	const char			*end;	/* points to NULL */
162 };
163 
164 static DEFINE_MUTEX(trace_eval_mutex);
165 
166 /*
167  * The trace_eval_maps are saved in an array with two extra elements,
168  * one at the beginning, and one at the end. The beginning item contains
169  * the count of the saved maps (head.length), and the module they
170  * belong to if not built in (head.mod). The ending item contains a
171  * pointer to the next array of saved eval_map items.
172  */
173 union trace_eval_map_item {
174 	struct trace_eval_map		map;
175 	struct trace_eval_map_head	head;
176 	struct trace_eval_map_tail	tail;
177 };
178 
179 static union trace_eval_map_item *trace_eval_maps;
180 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
181 
182 int tracing_set_tracer(struct trace_array *tr, const char *buf);
183 static void ftrace_trace_userstack(struct trace_array *tr,
184 				   struct trace_buffer *buffer,
185 				   unsigned int trace_ctx);
186 
187 #define MAX_TRACER_SIZE		100
188 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
189 static char *default_bootup_tracer;
190 
191 static bool allocate_snapshot;
192 static bool snapshot_at_boot;
193 
194 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
195 static int boot_instance_index;
196 
197 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
198 static int boot_snapshot_index;
199 
200 static int __init set_cmdline_ftrace(char *str)
201 {
202 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
203 	default_bootup_tracer = bootup_tracer_buf;
204 	/* We are using ftrace early, expand it */
205 	ring_buffer_expanded = true;
206 	return 1;
207 }
208 __setup("ftrace=", set_cmdline_ftrace);
209 
210 static int __init set_ftrace_dump_on_oops(char *str)
211 {
212 	if (*str++ != '=' || !*str || !strcmp("1", str)) {
213 		ftrace_dump_on_oops = DUMP_ALL;
214 		return 1;
215 	}
216 
217 	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
218 		ftrace_dump_on_oops = DUMP_ORIG;
219                 return 1;
220         }
221 
222         return 0;
223 }
224 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
225 
226 static int __init stop_trace_on_warning(char *str)
227 {
228 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
229 		__disable_trace_on_warning = 1;
230 	return 1;
231 }
232 __setup("traceoff_on_warning", stop_trace_on_warning);
233 
234 static int __init boot_alloc_snapshot(char *str)
235 {
236 	char *slot = boot_snapshot_info + boot_snapshot_index;
237 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
238 	int ret;
239 
240 	if (str[0] == '=') {
241 		str++;
242 		if (strlen(str) >= left)
243 			return -1;
244 
245 		ret = snprintf(slot, left, "%s\t", str);
246 		boot_snapshot_index += ret;
247 	} else {
248 		allocate_snapshot = true;
249 		/* We also need the main ring buffer expanded */
250 		ring_buffer_expanded = true;
251 	}
252 	return 1;
253 }
254 __setup("alloc_snapshot", boot_alloc_snapshot);
255 
256 
257 static int __init boot_snapshot(char *str)
258 {
259 	snapshot_at_boot = true;
260 	boot_alloc_snapshot(str);
261 	return 1;
262 }
263 __setup("ftrace_boot_snapshot", boot_snapshot);
264 
265 
266 static int __init boot_instance(char *str)
267 {
268 	char *slot = boot_instance_info + boot_instance_index;
269 	int left = sizeof(boot_instance_info) - boot_instance_index;
270 	int ret;
271 
272 	if (strlen(str) >= left)
273 		return -1;
274 
275 	ret = snprintf(slot, left, "%s\t", str);
276 	boot_instance_index += ret;
277 
278 	return 1;
279 }
280 __setup("trace_instance=", boot_instance);
281 
282 
283 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
284 
285 static int __init set_trace_boot_options(char *str)
286 {
287 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
288 	return 1;
289 }
290 __setup("trace_options=", set_trace_boot_options);
291 
292 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
293 static char *trace_boot_clock __initdata;
294 
295 static int __init set_trace_boot_clock(char *str)
296 {
297 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
298 	trace_boot_clock = trace_boot_clock_buf;
299 	return 1;
300 }
301 __setup("trace_clock=", set_trace_boot_clock);
302 
303 static int __init set_tracepoint_printk(char *str)
304 {
305 	/* Ignore the "tp_printk_stop_on_boot" param */
306 	if (*str == '_')
307 		return 0;
308 
309 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
310 		tracepoint_printk = 1;
311 	return 1;
312 }
313 __setup("tp_printk", set_tracepoint_printk);
314 
315 static int __init set_tracepoint_printk_stop(char *str)
316 {
317 	tracepoint_printk_stop_on_boot = true;
318 	return 1;
319 }
320 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
321 
322 unsigned long long ns2usecs(u64 nsec)
323 {
324 	nsec += 500;
325 	do_div(nsec, 1000);
326 	return nsec;
327 }
328 
329 static void
330 trace_process_export(struct trace_export *export,
331 	       struct ring_buffer_event *event, int flag)
332 {
333 	struct trace_entry *entry;
334 	unsigned int size = 0;
335 
336 	if (export->flags & flag) {
337 		entry = ring_buffer_event_data(event);
338 		size = ring_buffer_event_length(event);
339 		export->write(export, entry, size);
340 	}
341 }
342 
343 static DEFINE_MUTEX(ftrace_export_lock);
344 
345 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
346 
347 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
348 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
349 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
350 
351 static inline void ftrace_exports_enable(struct trace_export *export)
352 {
353 	if (export->flags & TRACE_EXPORT_FUNCTION)
354 		static_branch_inc(&trace_function_exports_enabled);
355 
356 	if (export->flags & TRACE_EXPORT_EVENT)
357 		static_branch_inc(&trace_event_exports_enabled);
358 
359 	if (export->flags & TRACE_EXPORT_MARKER)
360 		static_branch_inc(&trace_marker_exports_enabled);
361 }
362 
363 static inline void ftrace_exports_disable(struct trace_export *export)
364 {
365 	if (export->flags & TRACE_EXPORT_FUNCTION)
366 		static_branch_dec(&trace_function_exports_enabled);
367 
368 	if (export->flags & TRACE_EXPORT_EVENT)
369 		static_branch_dec(&trace_event_exports_enabled);
370 
371 	if (export->flags & TRACE_EXPORT_MARKER)
372 		static_branch_dec(&trace_marker_exports_enabled);
373 }
374 
375 static void ftrace_exports(struct ring_buffer_event *event, int flag)
376 {
377 	struct trace_export *export;
378 
379 	preempt_disable_notrace();
380 
381 	export = rcu_dereference_raw_check(ftrace_exports_list);
382 	while (export) {
383 		trace_process_export(export, event, flag);
384 		export = rcu_dereference_raw_check(export->next);
385 	}
386 
387 	preempt_enable_notrace();
388 }
389 
390 static inline void
391 add_trace_export(struct trace_export **list, struct trace_export *export)
392 {
393 	rcu_assign_pointer(export->next, *list);
394 	/*
395 	 * We are entering export into the list but another
396 	 * CPU might be walking that list. We need to make sure
397 	 * the export->next pointer is valid before another CPU sees
398 	 * the export pointer included into the list.
399 	 */
400 	rcu_assign_pointer(*list, export);
401 }
402 
403 static inline int
404 rm_trace_export(struct trace_export **list, struct trace_export *export)
405 {
406 	struct trace_export **p;
407 
408 	for (p = list; *p != NULL; p = &(*p)->next)
409 		if (*p == export)
410 			break;
411 
412 	if (*p != export)
413 		return -1;
414 
415 	rcu_assign_pointer(*p, (*p)->next);
416 
417 	return 0;
418 }
419 
420 static inline void
421 add_ftrace_export(struct trace_export **list, struct trace_export *export)
422 {
423 	ftrace_exports_enable(export);
424 
425 	add_trace_export(list, export);
426 }
427 
428 static inline int
429 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
430 {
431 	int ret;
432 
433 	ret = rm_trace_export(list, export);
434 	ftrace_exports_disable(export);
435 
436 	return ret;
437 }
438 
439 int register_ftrace_export(struct trace_export *export)
440 {
441 	if (WARN_ON_ONCE(!export->write))
442 		return -1;
443 
444 	mutex_lock(&ftrace_export_lock);
445 
446 	add_ftrace_export(&ftrace_exports_list, export);
447 
448 	mutex_unlock(&ftrace_export_lock);
449 
450 	return 0;
451 }
452 EXPORT_SYMBOL_GPL(register_ftrace_export);
453 
454 int unregister_ftrace_export(struct trace_export *export)
455 {
456 	int ret;
457 
458 	mutex_lock(&ftrace_export_lock);
459 
460 	ret = rm_ftrace_export(&ftrace_exports_list, export);
461 
462 	mutex_unlock(&ftrace_export_lock);
463 
464 	return ret;
465 }
466 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
467 
468 /* trace_flags holds trace_options default values */
469 #define TRACE_DEFAULT_FLAGS						\
470 	(FUNCTION_DEFAULT_FLAGS |					\
471 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
472 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
473 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
474 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
475 	 TRACE_ITER_HASH_PTR)
476 
477 /* trace_options that are only supported by global_trace */
478 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
479 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
480 
481 /* trace_flags that are default zero for instances */
482 #define ZEROED_TRACE_FLAGS \
483 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
484 
485 /*
486  * The global_trace is the descriptor that holds the top-level tracing
487  * buffers for the live tracing.
488  */
489 static struct trace_array global_trace = {
490 	.trace_flags = TRACE_DEFAULT_FLAGS,
491 };
492 
493 LIST_HEAD(ftrace_trace_arrays);
494 
495 int trace_array_get(struct trace_array *this_tr)
496 {
497 	struct trace_array *tr;
498 	int ret = -ENODEV;
499 
500 	mutex_lock(&trace_types_lock);
501 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
502 		if (tr == this_tr) {
503 			tr->ref++;
504 			ret = 0;
505 			break;
506 		}
507 	}
508 	mutex_unlock(&trace_types_lock);
509 
510 	return ret;
511 }
512 
513 static void __trace_array_put(struct trace_array *this_tr)
514 {
515 	WARN_ON(!this_tr->ref);
516 	this_tr->ref--;
517 }
518 
519 /**
520  * trace_array_put - Decrement the reference counter for this trace array.
521  * @this_tr : pointer to the trace array
522  *
523  * NOTE: Use this when we no longer need the trace array returned by
524  * trace_array_get_by_name(). This ensures the trace array can be later
525  * destroyed.
526  *
527  */
528 void trace_array_put(struct trace_array *this_tr)
529 {
530 	if (!this_tr)
531 		return;
532 
533 	mutex_lock(&trace_types_lock);
534 	__trace_array_put(this_tr);
535 	mutex_unlock(&trace_types_lock);
536 }
537 EXPORT_SYMBOL_GPL(trace_array_put);
538 
539 int tracing_check_open_get_tr(struct trace_array *tr)
540 {
541 	int ret;
542 
543 	ret = security_locked_down(LOCKDOWN_TRACEFS);
544 	if (ret)
545 		return ret;
546 
547 	if (tracing_disabled)
548 		return -ENODEV;
549 
550 	if (tr && trace_array_get(tr) < 0)
551 		return -ENODEV;
552 
553 	return 0;
554 }
555 
556 int call_filter_check_discard(struct trace_event_call *call, void *rec,
557 			      struct trace_buffer *buffer,
558 			      struct ring_buffer_event *event)
559 {
560 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
561 	    !filter_match_preds(call->filter, rec)) {
562 		__trace_event_discard_commit(buffer, event);
563 		return 1;
564 	}
565 
566 	return 0;
567 }
568 
569 /**
570  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
571  * @filtered_pids: The list of pids to check
572  * @search_pid: The PID to find in @filtered_pids
573  *
574  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
575  */
576 bool
577 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
578 {
579 	return trace_pid_list_is_set(filtered_pids, search_pid);
580 }
581 
582 /**
583  * trace_ignore_this_task - should a task be ignored for tracing
584  * @filtered_pids: The list of pids to check
585  * @filtered_no_pids: The list of pids not to be traced
586  * @task: The task that should be ignored if not filtered
587  *
588  * Checks if @task should be traced or not from @filtered_pids.
589  * Returns true if @task should *NOT* be traced.
590  * Returns false if @task should be traced.
591  */
592 bool
593 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
594 		       struct trace_pid_list *filtered_no_pids,
595 		       struct task_struct *task)
596 {
597 	/*
598 	 * If filtered_no_pids is not empty, and the task's pid is listed
599 	 * in filtered_no_pids, then return true.
600 	 * Otherwise, if filtered_pids is empty, that means we can
601 	 * trace all tasks. If it has content, then only trace pids
602 	 * within filtered_pids.
603 	 */
604 
605 	return (filtered_pids &&
606 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
607 		(filtered_no_pids &&
608 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
609 }
610 
611 /**
612  * trace_filter_add_remove_task - Add or remove a task from a pid_list
613  * @pid_list: The list to modify
614  * @self: The current task for fork or NULL for exit
615  * @task: The task to add or remove
616  *
617  * If adding a task, if @self is defined, the task is only added if @self
618  * is also included in @pid_list. This happens on fork and tasks should
619  * only be added when the parent is listed. If @self is NULL, then the
620  * @task pid will be removed from the list, which would happen on exit
621  * of a task.
622  */
623 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
624 				  struct task_struct *self,
625 				  struct task_struct *task)
626 {
627 	if (!pid_list)
628 		return;
629 
630 	/* For forks, we only add if the forking task is listed */
631 	if (self) {
632 		if (!trace_find_filtered_pid(pid_list, self->pid))
633 			return;
634 	}
635 
636 	/* "self" is set for forks, and NULL for exits */
637 	if (self)
638 		trace_pid_list_set(pid_list, task->pid);
639 	else
640 		trace_pid_list_clear(pid_list, task->pid);
641 }
642 
643 /**
644  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
645  * @pid_list: The pid list to show
646  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
647  * @pos: The position of the file
648  *
649  * This is used by the seq_file "next" operation to iterate the pids
650  * listed in a trace_pid_list structure.
651  *
652  * Returns the pid+1 as we want to display pid of zero, but NULL would
653  * stop the iteration.
654  */
655 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
656 {
657 	long pid = (unsigned long)v;
658 	unsigned int next;
659 
660 	(*pos)++;
661 
662 	/* pid already is +1 of the actual previous bit */
663 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
664 		return NULL;
665 
666 	pid = next;
667 
668 	/* Return pid + 1 to allow zero to be represented */
669 	return (void *)(pid + 1);
670 }
671 
672 /**
673  * trace_pid_start - Used for seq_file to start reading pid lists
674  * @pid_list: The pid list to show
675  * @pos: The position of the file
676  *
677  * This is used by seq_file "start" operation to start the iteration
678  * of listing pids.
679  *
680  * Returns the pid+1 as we want to display pid of zero, but NULL would
681  * stop the iteration.
682  */
683 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
684 {
685 	unsigned long pid;
686 	unsigned int first;
687 	loff_t l = 0;
688 
689 	if (trace_pid_list_first(pid_list, &first) < 0)
690 		return NULL;
691 
692 	pid = first;
693 
694 	/* Return pid + 1 so that zero can be the exit value */
695 	for (pid++; pid && l < *pos;
696 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
697 		;
698 	return (void *)pid;
699 }
700 
701 /**
702  * trace_pid_show - show the current pid in seq_file processing
703  * @m: The seq_file structure to write into
704  * @v: A void pointer of the pid (+1) value to display
705  *
706  * Can be directly used by seq_file operations to display the current
707  * pid value.
708  */
709 int trace_pid_show(struct seq_file *m, void *v)
710 {
711 	unsigned long pid = (unsigned long)v - 1;
712 
713 	seq_printf(m, "%lu\n", pid);
714 	return 0;
715 }
716 
717 /* 128 should be much more than enough */
718 #define PID_BUF_SIZE		127
719 
720 int trace_pid_write(struct trace_pid_list *filtered_pids,
721 		    struct trace_pid_list **new_pid_list,
722 		    const char __user *ubuf, size_t cnt)
723 {
724 	struct trace_pid_list *pid_list;
725 	struct trace_parser parser;
726 	unsigned long val;
727 	int nr_pids = 0;
728 	ssize_t read = 0;
729 	ssize_t ret;
730 	loff_t pos;
731 	pid_t pid;
732 
733 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
734 		return -ENOMEM;
735 
736 	/*
737 	 * Always recreate a new array. The write is an all or nothing
738 	 * operation. Always create a new array when adding new pids by
739 	 * the user. If the operation fails, then the current list is
740 	 * not modified.
741 	 */
742 	pid_list = trace_pid_list_alloc();
743 	if (!pid_list) {
744 		trace_parser_put(&parser);
745 		return -ENOMEM;
746 	}
747 
748 	if (filtered_pids) {
749 		/* copy the current bits to the new max */
750 		ret = trace_pid_list_first(filtered_pids, &pid);
751 		while (!ret) {
752 			trace_pid_list_set(pid_list, pid);
753 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
754 			nr_pids++;
755 		}
756 	}
757 
758 	ret = 0;
759 	while (cnt > 0) {
760 
761 		pos = 0;
762 
763 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
764 		if (ret < 0)
765 			break;
766 
767 		read += ret;
768 		ubuf += ret;
769 		cnt -= ret;
770 
771 		if (!trace_parser_loaded(&parser))
772 			break;
773 
774 		ret = -EINVAL;
775 		if (kstrtoul(parser.buffer, 0, &val))
776 			break;
777 
778 		pid = (pid_t)val;
779 
780 		if (trace_pid_list_set(pid_list, pid) < 0) {
781 			ret = -1;
782 			break;
783 		}
784 		nr_pids++;
785 
786 		trace_parser_clear(&parser);
787 		ret = 0;
788 	}
789 	trace_parser_put(&parser);
790 
791 	if (ret < 0) {
792 		trace_pid_list_free(pid_list);
793 		return ret;
794 	}
795 
796 	if (!nr_pids) {
797 		/* Cleared the list of pids */
798 		trace_pid_list_free(pid_list);
799 		pid_list = NULL;
800 	}
801 
802 	*new_pid_list = pid_list;
803 
804 	return read;
805 }
806 
807 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
808 {
809 	u64 ts;
810 
811 	/* Early boot up does not have a buffer yet */
812 	if (!buf->buffer)
813 		return trace_clock_local();
814 
815 	ts = ring_buffer_time_stamp(buf->buffer);
816 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
817 
818 	return ts;
819 }
820 
821 u64 ftrace_now(int cpu)
822 {
823 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
824 }
825 
826 /**
827  * tracing_is_enabled - Show if global_trace has been enabled
828  *
829  * Shows if the global trace has been enabled or not. It uses the
830  * mirror flag "buffer_disabled" to be used in fast paths such as for
831  * the irqsoff tracer. But it may be inaccurate due to races. If you
832  * need to know the accurate state, use tracing_is_on() which is a little
833  * slower, but accurate.
834  */
835 int tracing_is_enabled(void)
836 {
837 	/*
838 	 * For quick access (irqsoff uses this in fast path), just
839 	 * return the mirror variable of the state of the ring buffer.
840 	 * It's a little racy, but we don't really care.
841 	 */
842 	smp_rmb();
843 	return !global_trace.buffer_disabled;
844 }
845 
846 /*
847  * trace_buf_size is the size in bytes that is allocated
848  * for a buffer. Note, the number of bytes is always rounded
849  * to page size.
850  *
851  * This number is purposely set to a low number of 16384.
852  * If the dump on oops happens, it will be much appreciated
853  * to not have to wait for all that output. Anyway this can be
854  * boot time and run time configurable.
855  */
856 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
857 
858 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
859 
860 /* trace_types holds a link list of available tracers. */
861 static struct tracer		*trace_types __read_mostly;
862 
863 /*
864  * trace_types_lock is used to protect the trace_types list.
865  */
866 DEFINE_MUTEX(trace_types_lock);
867 
868 /*
869  * serialize the access of the ring buffer
870  *
871  * ring buffer serializes readers, but it is low level protection.
872  * The validity of the events (which returns by ring_buffer_peek() ..etc)
873  * are not protected by ring buffer.
874  *
875  * The content of events may become garbage if we allow other process consumes
876  * these events concurrently:
877  *   A) the page of the consumed events may become a normal page
878  *      (not reader page) in ring buffer, and this page will be rewritten
879  *      by events producer.
880  *   B) The page of the consumed events may become a page for splice_read,
881  *      and this page will be returned to system.
882  *
883  * These primitives allow multi process access to different cpu ring buffer
884  * concurrently.
885  *
886  * These primitives don't distinguish read-only and read-consume access.
887  * Multi read-only access are also serialized.
888  */
889 
890 #ifdef CONFIG_SMP
891 static DECLARE_RWSEM(all_cpu_access_lock);
892 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
893 
894 static inline void trace_access_lock(int cpu)
895 {
896 	if (cpu == RING_BUFFER_ALL_CPUS) {
897 		/* gain it for accessing the whole ring buffer. */
898 		down_write(&all_cpu_access_lock);
899 	} else {
900 		/* gain it for accessing a cpu ring buffer. */
901 
902 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
903 		down_read(&all_cpu_access_lock);
904 
905 		/* Secondly block other access to this @cpu ring buffer. */
906 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
907 	}
908 }
909 
910 static inline void trace_access_unlock(int cpu)
911 {
912 	if (cpu == RING_BUFFER_ALL_CPUS) {
913 		up_write(&all_cpu_access_lock);
914 	} else {
915 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
916 		up_read(&all_cpu_access_lock);
917 	}
918 }
919 
920 static inline void trace_access_lock_init(void)
921 {
922 	int cpu;
923 
924 	for_each_possible_cpu(cpu)
925 		mutex_init(&per_cpu(cpu_access_lock, cpu));
926 }
927 
928 #else
929 
930 static DEFINE_MUTEX(access_lock);
931 
932 static inline void trace_access_lock(int cpu)
933 {
934 	(void)cpu;
935 	mutex_lock(&access_lock);
936 }
937 
938 static inline void trace_access_unlock(int cpu)
939 {
940 	(void)cpu;
941 	mutex_unlock(&access_lock);
942 }
943 
944 static inline void trace_access_lock_init(void)
945 {
946 }
947 
948 #endif
949 
950 #ifdef CONFIG_STACKTRACE
951 static void __ftrace_trace_stack(struct trace_buffer *buffer,
952 				 unsigned int trace_ctx,
953 				 int skip, struct pt_regs *regs);
954 static inline void ftrace_trace_stack(struct trace_array *tr,
955 				      struct trace_buffer *buffer,
956 				      unsigned int trace_ctx,
957 				      int skip, struct pt_regs *regs);
958 
959 #else
960 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
961 					unsigned int trace_ctx,
962 					int skip, struct pt_regs *regs)
963 {
964 }
965 static inline void ftrace_trace_stack(struct trace_array *tr,
966 				      struct trace_buffer *buffer,
967 				      unsigned long trace_ctx,
968 				      int skip, struct pt_regs *regs)
969 {
970 }
971 
972 #endif
973 
974 static __always_inline void
975 trace_event_setup(struct ring_buffer_event *event,
976 		  int type, unsigned int trace_ctx)
977 {
978 	struct trace_entry *ent = ring_buffer_event_data(event);
979 
980 	tracing_generic_entry_update(ent, type, trace_ctx);
981 }
982 
983 static __always_inline struct ring_buffer_event *
984 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
985 			  int type,
986 			  unsigned long len,
987 			  unsigned int trace_ctx)
988 {
989 	struct ring_buffer_event *event;
990 
991 	event = ring_buffer_lock_reserve(buffer, len);
992 	if (event != NULL)
993 		trace_event_setup(event, type, trace_ctx);
994 
995 	return event;
996 }
997 
998 void tracer_tracing_on(struct trace_array *tr)
999 {
1000 	if (tr->array_buffer.buffer)
1001 		ring_buffer_record_on(tr->array_buffer.buffer);
1002 	/*
1003 	 * This flag is looked at when buffers haven't been allocated
1004 	 * yet, or by some tracers (like irqsoff), that just want to
1005 	 * know if the ring buffer has been disabled, but it can handle
1006 	 * races of where it gets disabled but we still do a record.
1007 	 * As the check is in the fast path of the tracers, it is more
1008 	 * important to be fast than accurate.
1009 	 */
1010 	tr->buffer_disabled = 0;
1011 	/* Make the flag seen by readers */
1012 	smp_wmb();
1013 }
1014 
1015 /**
1016  * tracing_on - enable tracing buffers
1017  *
1018  * This function enables tracing buffers that may have been
1019  * disabled with tracing_off.
1020  */
1021 void tracing_on(void)
1022 {
1023 	tracer_tracing_on(&global_trace);
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_on);
1026 
1027 
1028 static __always_inline void
1029 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1030 {
1031 	__this_cpu_write(trace_taskinfo_save, true);
1032 
1033 	/* If this is the temp buffer, we need to commit fully */
1034 	if (this_cpu_read(trace_buffered_event) == event) {
1035 		/* Length is in event->array[0] */
1036 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1037 		/* Release the temp buffer */
1038 		this_cpu_dec(trace_buffered_event_cnt);
1039 		/* ring_buffer_unlock_commit() enables preemption */
1040 		preempt_enable_notrace();
1041 	} else
1042 		ring_buffer_unlock_commit(buffer);
1043 }
1044 
1045 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1046 		       const char *str, int size)
1047 {
1048 	struct ring_buffer_event *event;
1049 	struct trace_buffer *buffer;
1050 	struct print_entry *entry;
1051 	unsigned int trace_ctx;
1052 	int alloc;
1053 
1054 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1055 		return 0;
1056 
1057 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1058 		return 0;
1059 
1060 	if (unlikely(tracing_disabled))
1061 		return 0;
1062 
1063 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1064 
1065 	trace_ctx = tracing_gen_ctx();
1066 	buffer = tr->array_buffer.buffer;
1067 	ring_buffer_nest_start(buffer);
1068 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1069 					    trace_ctx);
1070 	if (!event) {
1071 		size = 0;
1072 		goto out;
1073 	}
1074 
1075 	entry = ring_buffer_event_data(event);
1076 	entry->ip = ip;
1077 
1078 	memcpy(&entry->buf, str, size);
1079 
1080 	/* Add a newline if necessary */
1081 	if (entry->buf[size - 1] != '\n') {
1082 		entry->buf[size] = '\n';
1083 		entry->buf[size + 1] = '\0';
1084 	} else
1085 		entry->buf[size] = '\0';
1086 
1087 	__buffer_unlock_commit(buffer, event);
1088 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1089  out:
1090 	ring_buffer_nest_end(buffer);
1091 	return size;
1092 }
1093 EXPORT_SYMBOL_GPL(__trace_array_puts);
1094 
1095 /**
1096  * __trace_puts - write a constant string into the trace buffer.
1097  * @ip:	   The address of the caller
1098  * @str:   The constant string to write
1099  * @size:  The size of the string.
1100  */
1101 int __trace_puts(unsigned long ip, const char *str, int size)
1102 {
1103 	return __trace_array_puts(&global_trace, ip, str, size);
1104 }
1105 EXPORT_SYMBOL_GPL(__trace_puts);
1106 
1107 /**
1108  * __trace_bputs - write the pointer to a constant string into trace buffer
1109  * @ip:	   The address of the caller
1110  * @str:   The constant string to write to the buffer to
1111  */
1112 int __trace_bputs(unsigned long ip, const char *str)
1113 {
1114 	struct ring_buffer_event *event;
1115 	struct trace_buffer *buffer;
1116 	struct bputs_entry *entry;
1117 	unsigned int trace_ctx;
1118 	int size = sizeof(struct bputs_entry);
1119 	int ret = 0;
1120 
1121 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1122 		return 0;
1123 
1124 	if (unlikely(tracing_selftest_running || tracing_disabled))
1125 		return 0;
1126 
1127 	trace_ctx = tracing_gen_ctx();
1128 	buffer = global_trace.array_buffer.buffer;
1129 
1130 	ring_buffer_nest_start(buffer);
1131 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1132 					    trace_ctx);
1133 	if (!event)
1134 		goto out;
1135 
1136 	entry = ring_buffer_event_data(event);
1137 	entry->ip			= ip;
1138 	entry->str			= str;
1139 
1140 	__buffer_unlock_commit(buffer, event);
1141 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1142 
1143 	ret = 1;
1144  out:
1145 	ring_buffer_nest_end(buffer);
1146 	return ret;
1147 }
1148 EXPORT_SYMBOL_GPL(__trace_bputs);
1149 
1150 #ifdef CONFIG_TRACER_SNAPSHOT
1151 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1152 					   void *cond_data)
1153 {
1154 	struct tracer *tracer = tr->current_trace;
1155 	unsigned long flags;
1156 
1157 	if (in_nmi()) {
1158 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1159 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1160 		return;
1161 	}
1162 
1163 	if (!tr->allocated_snapshot) {
1164 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1165 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1166 		tracer_tracing_off(tr);
1167 		return;
1168 	}
1169 
1170 	/* Note, snapshot can not be used when the tracer uses it */
1171 	if (tracer->use_max_tr) {
1172 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1173 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1174 		return;
1175 	}
1176 
1177 	local_irq_save(flags);
1178 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1179 	local_irq_restore(flags);
1180 }
1181 
1182 void tracing_snapshot_instance(struct trace_array *tr)
1183 {
1184 	tracing_snapshot_instance_cond(tr, NULL);
1185 }
1186 
1187 /**
1188  * tracing_snapshot - take a snapshot of the current buffer.
1189  *
1190  * This causes a swap between the snapshot buffer and the current live
1191  * tracing buffer. You can use this to take snapshots of the live
1192  * trace when some condition is triggered, but continue to trace.
1193  *
1194  * Note, make sure to allocate the snapshot with either
1195  * a tracing_snapshot_alloc(), or by doing it manually
1196  * with: echo 1 > /sys/kernel/tracing/snapshot
1197  *
1198  * If the snapshot buffer is not allocated, it will stop tracing.
1199  * Basically making a permanent snapshot.
1200  */
1201 void tracing_snapshot(void)
1202 {
1203 	struct trace_array *tr = &global_trace;
1204 
1205 	tracing_snapshot_instance(tr);
1206 }
1207 EXPORT_SYMBOL_GPL(tracing_snapshot);
1208 
1209 /**
1210  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1211  * @tr:		The tracing instance to snapshot
1212  * @cond_data:	The data to be tested conditionally, and possibly saved
1213  *
1214  * This is the same as tracing_snapshot() except that the snapshot is
1215  * conditional - the snapshot will only happen if the
1216  * cond_snapshot.update() implementation receiving the cond_data
1217  * returns true, which means that the trace array's cond_snapshot
1218  * update() operation used the cond_data to determine whether the
1219  * snapshot should be taken, and if it was, presumably saved it along
1220  * with the snapshot.
1221  */
1222 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1223 {
1224 	tracing_snapshot_instance_cond(tr, cond_data);
1225 }
1226 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1227 
1228 /**
1229  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1230  * @tr:		The tracing instance
1231  *
1232  * When the user enables a conditional snapshot using
1233  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1234  * with the snapshot.  This accessor is used to retrieve it.
1235  *
1236  * Should not be called from cond_snapshot.update(), since it takes
1237  * the tr->max_lock lock, which the code calling
1238  * cond_snapshot.update() has already done.
1239  *
1240  * Returns the cond_data associated with the trace array's snapshot.
1241  */
1242 void *tracing_cond_snapshot_data(struct trace_array *tr)
1243 {
1244 	void *cond_data = NULL;
1245 
1246 	local_irq_disable();
1247 	arch_spin_lock(&tr->max_lock);
1248 
1249 	if (tr->cond_snapshot)
1250 		cond_data = tr->cond_snapshot->cond_data;
1251 
1252 	arch_spin_unlock(&tr->max_lock);
1253 	local_irq_enable();
1254 
1255 	return cond_data;
1256 }
1257 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1258 
1259 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1260 					struct array_buffer *size_buf, int cpu_id);
1261 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1262 
1263 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1264 {
1265 	int ret;
1266 
1267 	if (!tr->allocated_snapshot) {
1268 
1269 		/* allocate spare buffer */
1270 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1271 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1272 		if (ret < 0)
1273 			return ret;
1274 
1275 		tr->allocated_snapshot = true;
1276 	}
1277 
1278 	return 0;
1279 }
1280 
1281 static void free_snapshot(struct trace_array *tr)
1282 {
1283 	/*
1284 	 * We don't free the ring buffer. instead, resize it because
1285 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1286 	 * we want preserve it.
1287 	 */
1288 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1289 	set_buffer_entries(&tr->max_buffer, 1);
1290 	tracing_reset_online_cpus(&tr->max_buffer);
1291 	tr->allocated_snapshot = false;
1292 }
1293 
1294 /**
1295  * tracing_alloc_snapshot - allocate snapshot buffer.
1296  *
1297  * This only allocates the snapshot buffer if it isn't already
1298  * allocated - it doesn't also take a snapshot.
1299  *
1300  * This is meant to be used in cases where the snapshot buffer needs
1301  * to be set up for events that can't sleep but need to be able to
1302  * trigger a snapshot.
1303  */
1304 int tracing_alloc_snapshot(void)
1305 {
1306 	struct trace_array *tr = &global_trace;
1307 	int ret;
1308 
1309 	ret = tracing_alloc_snapshot_instance(tr);
1310 	WARN_ON(ret < 0);
1311 
1312 	return ret;
1313 }
1314 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1315 
1316 /**
1317  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1318  *
1319  * This is similar to tracing_snapshot(), but it will allocate the
1320  * snapshot buffer if it isn't already allocated. Use this only
1321  * where it is safe to sleep, as the allocation may sleep.
1322  *
1323  * This causes a swap between the snapshot buffer and the current live
1324  * tracing buffer. You can use this to take snapshots of the live
1325  * trace when some condition is triggered, but continue to trace.
1326  */
1327 void tracing_snapshot_alloc(void)
1328 {
1329 	int ret;
1330 
1331 	ret = tracing_alloc_snapshot();
1332 	if (ret < 0)
1333 		return;
1334 
1335 	tracing_snapshot();
1336 }
1337 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1338 
1339 /**
1340  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1341  * @tr:		The tracing instance
1342  * @cond_data:	User data to associate with the snapshot
1343  * @update:	Implementation of the cond_snapshot update function
1344  *
1345  * Check whether the conditional snapshot for the given instance has
1346  * already been enabled, or if the current tracer is already using a
1347  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1348  * save the cond_data and update function inside.
1349  *
1350  * Returns 0 if successful, error otherwise.
1351  */
1352 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1353 				 cond_update_fn_t update)
1354 {
1355 	struct cond_snapshot *cond_snapshot;
1356 	int ret = 0;
1357 
1358 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1359 	if (!cond_snapshot)
1360 		return -ENOMEM;
1361 
1362 	cond_snapshot->cond_data = cond_data;
1363 	cond_snapshot->update = update;
1364 
1365 	mutex_lock(&trace_types_lock);
1366 
1367 	ret = tracing_alloc_snapshot_instance(tr);
1368 	if (ret)
1369 		goto fail_unlock;
1370 
1371 	if (tr->current_trace->use_max_tr) {
1372 		ret = -EBUSY;
1373 		goto fail_unlock;
1374 	}
1375 
1376 	/*
1377 	 * The cond_snapshot can only change to NULL without the
1378 	 * trace_types_lock. We don't care if we race with it going
1379 	 * to NULL, but we want to make sure that it's not set to
1380 	 * something other than NULL when we get here, which we can
1381 	 * do safely with only holding the trace_types_lock and not
1382 	 * having to take the max_lock.
1383 	 */
1384 	if (tr->cond_snapshot) {
1385 		ret = -EBUSY;
1386 		goto fail_unlock;
1387 	}
1388 
1389 	local_irq_disable();
1390 	arch_spin_lock(&tr->max_lock);
1391 	tr->cond_snapshot = cond_snapshot;
1392 	arch_spin_unlock(&tr->max_lock);
1393 	local_irq_enable();
1394 
1395 	mutex_unlock(&trace_types_lock);
1396 
1397 	return ret;
1398 
1399  fail_unlock:
1400 	mutex_unlock(&trace_types_lock);
1401 	kfree(cond_snapshot);
1402 	return ret;
1403 }
1404 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1405 
1406 /**
1407  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1408  * @tr:		The tracing instance
1409  *
1410  * Check whether the conditional snapshot for the given instance is
1411  * enabled; if so, free the cond_snapshot associated with it,
1412  * otherwise return -EINVAL.
1413  *
1414  * Returns 0 if successful, error otherwise.
1415  */
1416 int tracing_snapshot_cond_disable(struct trace_array *tr)
1417 {
1418 	int ret = 0;
1419 
1420 	local_irq_disable();
1421 	arch_spin_lock(&tr->max_lock);
1422 
1423 	if (!tr->cond_snapshot)
1424 		ret = -EINVAL;
1425 	else {
1426 		kfree(tr->cond_snapshot);
1427 		tr->cond_snapshot = NULL;
1428 	}
1429 
1430 	arch_spin_unlock(&tr->max_lock);
1431 	local_irq_enable();
1432 
1433 	return ret;
1434 }
1435 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1436 #else
1437 void tracing_snapshot(void)
1438 {
1439 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1440 }
1441 EXPORT_SYMBOL_GPL(tracing_snapshot);
1442 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1443 {
1444 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1445 }
1446 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1447 int tracing_alloc_snapshot(void)
1448 {
1449 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1450 	return -ENODEV;
1451 }
1452 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1453 void tracing_snapshot_alloc(void)
1454 {
1455 	/* Give warning */
1456 	tracing_snapshot();
1457 }
1458 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1459 void *tracing_cond_snapshot_data(struct trace_array *tr)
1460 {
1461 	return NULL;
1462 }
1463 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1464 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1465 {
1466 	return -ENODEV;
1467 }
1468 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1469 int tracing_snapshot_cond_disable(struct trace_array *tr)
1470 {
1471 	return false;
1472 }
1473 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1474 #define free_snapshot(tr)	do { } while (0)
1475 #endif /* CONFIG_TRACER_SNAPSHOT */
1476 
1477 void tracer_tracing_off(struct trace_array *tr)
1478 {
1479 	if (tr->array_buffer.buffer)
1480 		ring_buffer_record_off(tr->array_buffer.buffer);
1481 	/*
1482 	 * This flag is looked at when buffers haven't been allocated
1483 	 * yet, or by some tracers (like irqsoff), that just want to
1484 	 * know if the ring buffer has been disabled, but it can handle
1485 	 * races of where it gets disabled but we still do a record.
1486 	 * As the check is in the fast path of the tracers, it is more
1487 	 * important to be fast than accurate.
1488 	 */
1489 	tr->buffer_disabled = 1;
1490 	/* Make the flag seen by readers */
1491 	smp_wmb();
1492 }
1493 
1494 /**
1495  * tracing_off - turn off tracing buffers
1496  *
1497  * This function stops the tracing buffers from recording data.
1498  * It does not disable any overhead the tracers themselves may
1499  * be causing. This function simply causes all recording to
1500  * the ring buffers to fail.
1501  */
1502 void tracing_off(void)
1503 {
1504 	tracer_tracing_off(&global_trace);
1505 }
1506 EXPORT_SYMBOL_GPL(tracing_off);
1507 
1508 void disable_trace_on_warning(void)
1509 {
1510 	if (__disable_trace_on_warning) {
1511 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1512 			"Disabling tracing due to warning\n");
1513 		tracing_off();
1514 	}
1515 }
1516 
1517 /**
1518  * tracer_tracing_is_on - show real state of ring buffer enabled
1519  * @tr : the trace array to know if ring buffer is enabled
1520  *
1521  * Shows real state of the ring buffer if it is enabled or not.
1522  */
1523 bool tracer_tracing_is_on(struct trace_array *tr)
1524 {
1525 	if (tr->array_buffer.buffer)
1526 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1527 	return !tr->buffer_disabled;
1528 }
1529 
1530 /**
1531  * tracing_is_on - show state of ring buffers enabled
1532  */
1533 int tracing_is_on(void)
1534 {
1535 	return tracer_tracing_is_on(&global_trace);
1536 }
1537 EXPORT_SYMBOL_GPL(tracing_is_on);
1538 
1539 static int __init set_buf_size(char *str)
1540 {
1541 	unsigned long buf_size;
1542 
1543 	if (!str)
1544 		return 0;
1545 	buf_size = memparse(str, &str);
1546 	/*
1547 	 * nr_entries can not be zero and the startup
1548 	 * tests require some buffer space. Therefore
1549 	 * ensure we have at least 4096 bytes of buffer.
1550 	 */
1551 	trace_buf_size = max(4096UL, buf_size);
1552 	return 1;
1553 }
1554 __setup("trace_buf_size=", set_buf_size);
1555 
1556 static int __init set_tracing_thresh(char *str)
1557 {
1558 	unsigned long threshold;
1559 	int ret;
1560 
1561 	if (!str)
1562 		return 0;
1563 	ret = kstrtoul(str, 0, &threshold);
1564 	if (ret < 0)
1565 		return 0;
1566 	tracing_thresh = threshold * 1000;
1567 	return 1;
1568 }
1569 __setup("tracing_thresh=", set_tracing_thresh);
1570 
1571 unsigned long nsecs_to_usecs(unsigned long nsecs)
1572 {
1573 	return nsecs / 1000;
1574 }
1575 
1576 /*
1577  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1578  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1579  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1580  * of strings in the order that the evals (enum) were defined.
1581  */
1582 #undef C
1583 #define C(a, b) b
1584 
1585 /* These must match the bit positions in trace_iterator_flags */
1586 static const char *trace_options[] = {
1587 	TRACE_FLAGS
1588 	NULL
1589 };
1590 
1591 static struct {
1592 	u64 (*func)(void);
1593 	const char *name;
1594 	int in_ns;		/* is this clock in nanoseconds? */
1595 } trace_clocks[] = {
1596 	{ trace_clock_local,		"local",	1 },
1597 	{ trace_clock_global,		"global",	1 },
1598 	{ trace_clock_counter,		"counter",	0 },
1599 	{ trace_clock_jiffies,		"uptime",	0 },
1600 	{ trace_clock,			"perf",		1 },
1601 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1602 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1603 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1604 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1605 	ARCH_TRACE_CLOCKS
1606 };
1607 
1608 bool trace_clock_in_ns(struct trace_array *tr)
1609 {
1610 	if (trace_clocks[tr->clock_id].in_ns)
1611 		return true;
1612 
1613 	return false;
1614 }
1615 
1616 /*
1617  * trace_parser_get_init - gets the buffer for trace parser
1618  */
1619 int trace_parser_get_init(struct trace_parser *parser, int size)
1620 {
1621 	memset(parser, 0, sizeof(*parser));
1622 
1623 	parser->buffer = kmalloc(size, GFP_KERNEL);
1624 	if (!parser->buffer)
1625 		return 1;
1626 
1627 	parser->size = size;
1628 	return 0;
1629 }
1630 
1631 /*
1632  * trace_parser_put - frees the buffer for trace parser
1633  */
1634 void trace_parser_put(struct trace_parser *parser)
1635 {
1636 	kfree(parser->buffer);
1637 	parser->buffer = NULL;
1638 }
1639 
1640 /*
1641  * trace_get_user - reads the user input string separated by  space
1642  * (matched by isspace(ch))
1643  *
1644  * For each string found the 'struct trace_parser' is updated,
1645  * and the function returns.
1646  *
1647  * Returns number of bytes read.
1648  *
1649  * See kernel/trace/trace.h for 'struct trace_parser' details.
1650  */
1651 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1652 	size_t cnt, loff_t *ppos)
1653 {
1654 	char ch;
1655 	size_t read = 0;
1656 	ssize_t ret;
1657 
1658 	if (!*ppos)
1659 		trace_parser_clear(parser);
1660 
1661 	ret = get_user(ch, ubuf++);
1662 	if (ret)
1663 		goto out;
1664 
1665 	read++;
1666 	cnt--;
1667 
1668 	/*
1669 	 * The parser is not finished with the last write,
1670 	 * continue reading the user input without skipping spaces.
1671 	 */
1672 	if (!parser->cont) {
1673 		/* skip white space */
1674 		while (cnt && isspace(ch)) {
1675 			ret = get_user(ch, ubuf++);
1676 			if (ret)
1677 				goto out;
1678 			read++;
1679 			cnt--;
1680 		}
1681 
1682 		parser->idx = 0;
1683 
1684 		/* only spaces were written */
1685 		if (isspace(ch) || !ch) {
1686 			*ppos += read;
1687 			ret = read;
1688 			goto out;
1689 		}
1690 	}
1691 
1692 	/* read the non-space input */
1693 	while (cnt && !isspace(ch) && ch) {
1694 		if (parser->idx < parser->size - 1)
1695 			parser->buffer[parser->idx++] = ch;
1696 		else {
1697 			ret = -EINVAL;
1698 			goto out;
1699 		}
1700 		ret = get_user(ch, ubuf++);
1701 		if (ret)
1702 			goto out;
1703 		read++;
1704 		cnt--;
1705 	}
1706 
1707 	/* We either got finished input or we have to wait for another call. */
1708 	if (isspace(ch) || !ch) {
1709 		parser->buffer[parser->idx] = 0;
1710 		parser->cont = false;
1711 	} else if (parser->idx < parser->size - 1) {
1712 		parser->cont = true;
1713 		parser->buffer[parser->idx++] = ch;
1714 		/* Make sure the parsed string always terminates with '\0'. */
1715 		parser->buffer[parser->idx] = 0;
1716 	} else {
1717 		ret = -EINVAL;
1718 		goto out;
1719 	}
1720 
1721 	*ppos += read;
1722 	ret = read;
1723 
1724 out:
1725 	return ret;
1726 }
1727 
1728 /* TODO add a seq_buf_to_buffer() */
1729 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1730 {
1731 	int len;
1732 
1733 	if (trace_seq_used(s) <= s->seq.readpos)
1734 		return -EBUSY;
1735 
1736 	len = trace_seq_used(s) - s->seq.readpos;
1737 	if (cnt > len)
1738 		cnt = len;
1739 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1740 
1741 	s->seq.readpos += cnt;
1742 	return cnt;
1743 }
1744 
1745 unsigned long __read_mostly	tracing_thresh;
1746 
1747 #ifdef CONFIG_TRACER_MAX_TRACE
1748 static const struct file_operations tracing_max_lat_fops;
1749 
1750 #ifdef LATENCY_FS_NOTIFY
1751 
1752 static struct workqueue_struct *fsnotify_wq;
1753 
1754 static void latency_fsnotify_workfn(struct work_struct *work)
1755 {
1756 	struct trace_array *tr = container_of(work, struct trace_array,
1757 					      fsnotify_work);
1758 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1759 }
1760 
1761 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1762 {
1763 	struct trace_array *tr = container_of(iwork, struct trace_array,
1764 					      fsnotify_irqwork);
1765 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1766 }
1767 
1768 static void trace_create_maxlat_file(struct trace_array *tr,
1769 				     struct dentry *d_tracer)
1770 {
1771 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1772 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1773 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1774 					      TRACE_MODE_WRITE,
1775 					      d_tracer, tr,
1776 					      &tracing_max_lat_fops);
1777 }
1778 
1779 __init static int latency_fsnotify_init(void)
1780 {
1781 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1782 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1783 	if (!fsnotify_wq) {
1784 		pr_err("Unable to allocate tr_max_lat_wq\n");
1785 		return -ENOMEM;
1786 	}
1787 	return 0;
1788 }
1789 
1790 late_initcall_sync(latency_fsnotify_init);
1791 
1792 void latency_fsnotify(struct trace_array *tr)
1793 {
1794 	if (!fsnotify_wq)
1795 		return;
1796 	/*
1797 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1798 	 * possible that we are called from __schedule() or do_idle(), which
1799 	 * could cause a deadlock.
1800 	 */
1801 	irq_work_queue(&tr->fsnotify_irqwork);
1802 }
1803 
1804 #else /* !LATENCY_FS_NOTIFY */
1805 
1806 #define trace_create_maxlat_file(tr, d_tracer)				\
1807 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1808 			  d_tracer, tr, &tracing_max_lat_fops)
1809 
1810 #endif
1811 
1812 /*
1813  * Copy the new maximum trace into the separate maximum-trace
1814  * structure. (this way the maximum trace is permanently saved,
1815  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1816  */
1817 static void
1818 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1819 {
1820 	struct array_buffer *trace_buf = &tr->array_buffer;
1821 	struct array_buffer *max_buf = &tr->max_buffer;
1822 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1823 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1824 
1825 	max_buf->cpu = cpu;
1826 	max_buf->time_start = data->preempt_timestamp;
1827 
1828 	max_data->saved_latency = tr->max_latency;
1829 	max_data->critical_start = data->critical_start;
1830 	max_data->critical_end = data->critical_end;
1831 
1832 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1833 	max_data->pid = tsk->pid;
1834 	/*
1835 	 * If tsk == current, then use current_uid(), as that does not use
1836 	 * RCU. The irq tracer can be called out of RCU scope.
1837 	 */
1838 	if (tsk == current)
1839 		max_data->uid = current_uid();
1840 	else
1841 		max_data->uid = task_uid(tsk);
1842 
1843 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1844 	max_data->policy = tsk->policy;
1845 	max_data->rt_priority = tsk->rt_priority;
1846 
1847 	/* record this tasks comm */
1848 	tracing_record_cmdline(tsk);
1849 	latency_fsnotify(tr);
1850 }
1851 
1852 /**
1853  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1854  * @tr: tracer
1855  * @tsk: the task with the latency
1856  * @cpu: The cpu that initiated the trace.
1857  * @cond_data: User data associated with a conditional snapshot
1858  *
1859  * Flip the buffers between the @tr and the max_tr and record information
1860  * about which task was the cause of this latency.
1861  */
1862 void
1863 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1864 	      void *cond_data)
1865 {
1866 	if (tr->stop_count)
1867 		return;
1868 
1869 	WARN_ON_ONCE(!irqs_disabled());
1870 
1871 	if (!tr->allocated_snapshot) {
1872 		/* Only the nop tracer should hit this when disabling */
1873 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1874 		return;
1875 	}
1876 
1877 	arch_spin_lock(&tr->max_lock);
1878 
1879 	/* Inherit the recordable setting from array_buffer */
1880 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1881 		ring_buffer_record_on(tr->max_buffer.buffer);
1882 	else
1883 		ring_buffer_record_off(tr->max_buffer.buffer);
1884 
1885 #ifdef CONFIG_TRACER_SNAPSHOT
1886 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1887 		arch_spin_unlock(&tr->max_lock);
1888 		return;
1889 	}
1890 #endif
1891 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1892 
1893 	__update_max_tr(tr, tsk, cpu);
1894 
1895 	arch_spin_unlock(&tr->max_lock);
1896 }
1897 
1898 /**
1899  * update_max_tr_single - only copy one trace over, and reset the rest
1900  * @tr: tracer
1901  * @tsk: task with the latency
1902  * @cpu: the cpu of the buffer to copy.
1903  *
1904  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1905  */
1906 void
1907 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1908 {
1909 	int ret;
1910 
1911 	if (tr->stop_count)
1912 		return;
1913 
1914 	WARN_ON_ONCE(!irqs_disabled());
1915 	if (!tr->allocated_snapshot) {
1916 		/* Only the nop tracer should hit this when disabling */
1917 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1918 		return;
1919 	}
1920 
1921 	arch_spin_lock(&tr->max_lock);
1922 
1923 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1924 
1925 	if (ret == -EBUSY) {
1926 		/*
1927 		 * We failed to swap the buffer due to a commit taking
1928 		 * place on this CPU. We fail to record, but we reset
1929 		 * the max trace buffer (no one writes directly to it)
1930 		 * and flag that it failed.
1931 		 * Another reason is resize is in progress.
1932 		 */
1933 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1934 			"Failed to swap buffers due to commit or resize in progress\n");
1935 	}
1936 
1937 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1938 
1939 	__update_max_tr(tr, tsk, cpu);
1940 	arch_spin_unlock(&tr->max_lock);
1941 }
1942 
1943 #endif /* CONFIG_TRACER_MAX_TRACE */
1944 
1945 static int wait_on_pipe(struct trace_iterator *iter, int full)
1946 {
1947 	/* Iterators are static, they should be filled or empty */
1948 	if (trace_buffer_iter(iter, iter->cpu_file))
1949 		return 0;
1950 
1951 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1952 				full);
1953 }
1954 
1955 #ifdef CONFIG_FTRACE_STARTUP_TEST
1956 static bool selftests_can_run;
1957 
1958 struct trace_selftests {
1959 	struct list_head		list;
1960 	struct tracer			*type;
1961 };
1962 
1963 static LIST_HEAD(postponed_selftests);
1964 
1965 static int save_selftest(struct tracer *type)
1966 {
1967 	struct trace_selftests *selftest;
1968 
1969 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1970 	if (!selftest)
1971 		return -ENOMEM;
1972 
1973 	selftest->type = type;
1974 	list_add(&selftest->list, &postponed_selftests);
1975 	return 0;
1976 }
1977 
1978 static int run_tracer_selftest(struct tracer *type)
1979 {
1980 	struct trace_array *tr = &global_trace;
1981 	struct tracer *saved_tracer = tr->current_trace;
1982 	int ret;
1983 
1984 	if (!type->selftest || tracing_selftest_disabled)
1985 		return 0;
1986 
1987 	/*
1988 	 * If a tracer registers early in boot up (before scheduling is
1989 	 * initialized and such), then do not run its selftests yet.
1990 	 * Instead, run it a little later in the boot process.
1991 	 */
1992 	if (!selftests_can_run)
1993 		return save_selftest(type);
1994 
1995 	if (!tracing_is_on()) {
1996 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1997 			type->name);
1998 		return 0;
1999 	}
2000 
2001 	/*
2002 	 * Run a selftest on this tracer.
2003 	 * Here we reset the trace buffer, and set the current
2004 	 * tracer to be this tracer. The tracer can then run some
2005 	 * internal tracing to verify that everything is in order.
2006 	 * If we fail, we do not register this tracer.
2007 	 */
2008 	tracing_reset_online_cpus(&tr->array_buffer);
2009 
2010 	tr->current_trace = type;
2011 
2012 #ifdef CONFIG_TRACER_MAX_TRACE
2013 	if (type->use_max_tr) {
2014 		/* If we expanded the buffers, make sure the max is expanded too */
2015 		if (ring_buffer_expanded)
2016 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2017 					   RING_BUFFER_ALL_CPUS);
2018 		tr->allocated_snapshot = true;
2019 	}
2020 #endif
2021 
2022 	/* the test is responsible for initializing and enabling */
2023 	pr_info("Testing tracer %s: ", type->name);
2024 	ret = type->selftest(type, tr);
2025 	/* the test is responsible for resetting too */
2026 	tr->current_trace = saved_tracer;
2027 	if (ret) {
2028 		printk(KERN_CONT "FAILED!\n");
2029 		/* Add the warning after printing 'FAILED' */
2030 		WARN_ON(1);
2031 		return -1;
2032 	}
2033 	/* Only reset on passing, to avoid touching corrupted buffers */
2034 	tracing_reset_online_cpus(&tr->array_buffer);
2035 
2036 #ifdef CONFIG_TRACER_MAX_TRACE
2037 	if (type->use_max_tr) {
2038 		tr->allocated_snapshot = false;
2039 
2040 		/* Shrink the max buffer again */
2041 		if (ring_buffer_expanded)
2042 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2043 					   RING_BUFFER_ALL_CPUS);
2044 	}
2045 #endif
2046 
2047 	printk(KERN_CONT "PASSED\n");
2048 	return 0;
2049 }
2050 
2051 static int do_run_tracer_selftest(struct tracer *type)
2052 {
2053 	int ret;
2054 
2055 	/*
2056 	 * Tests can take a long time, especially if they are run one after the
2057 	 * other, as does happen during bootup when all the tracers are
2058 	 * registered. This could cause the soft lockup watchdog to trigger.
2059 	 */
2060 	cond_resched();
2061 
2062 	tracing_selftest_running = true;
2063 	ret = run_tracer_selftest(type);
2064 	tracing_selftest_running = false;
2065 
2066 	return ret;
2067 }
2068 
2069 static __init int init_trace_selftests(void)
2070 {
2071 	struct trace_selftests *p, *n;
2072 	struct tracer *t, **last;
2073 	int ret;
2074 
2075 	selftests_can_run = true;
2076 
2077 	mutex_lock(&trace_types_lock);
2078 
2079 	if (list_empty(&postponed_selftests))
2080 		goto out;
2081 
2082 	pr_info("Running postponed tracer tests:\n");
2083 
2084 	tracing_selftest_running = true;
2085 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2086 		/* This loop can take minutes when sanitizers are enabled, so
2087 		 * lets make sure we allow RCU processing.
2088 		 */
2089 		cond_resched();
2090 		ret = run_tracer_selftest(p->type);
2091 		/* If the test fails, then warn and remove from available_tracers */
2092 		if (ret < 0) {
2093 			WARN(1, "tracer: %s failed selftest, disabling\n",
2094 			     p->type->name);
2095 			last = &trace_types;
2096 			for (t = trace_types; t; t = t->next) {
2097 				if (t == p->type) {
2098 					*last = t->next;
2099 					break;
2100 				}
2101 				last = &t->next;
2102 			}
2103 		}
2104 		list_del(&p->list);
2105 		kfree(p);
2106 	}
2107 	tracing_selftest_running = false;
2108 
2109  out:
2110 	mutex_unlock(&trace_types_lock);
2111 
2112 	return 0;
2113 }
2114 core_initcall(init_trace_selftests);
2115 #else
2116 static inline int run_tracer_selftest(struct tracer *type)
2117 {
2118 	return 0;
2119 }
2120 static inline int do_run_tracer_selftest(struct tracer *type)
2121 {
2122 	return 0;
2123 }
2124 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2125 
2126 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2127 
2128 static void __init apply_trace_boot_options(void);
2129 
2130 /**
2131  * register_tracer - register a tracer with the ftrace system.
2132  * @type: the plugin for the tracer
2133  *
2134  * Register a new plugin tracer.
2135  */
2136 int __init register_tracer(struct tracer *type)
2137 {
2138 	struct tracer *t;
2139 	int ret = 0;
2140 
2141 	if (!type->name) {
2142 		pr_info("Tracer must have a name\n");
2143 		return -1;
2144 	}
2145 
2146 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2147 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2148 		return -1;
2149 	}
2150 
2151 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2152 		pr_warn("Can not register tracer %s due to lockdown\n",
2153 			   type->name);
2154 		return -EPERM;
2155 	}
2156 
2157 	mutex_lock(&trace_types_lock);
2158 
2159 	for (t = trace_types; t; t = t->next) {
2160 		if (strcmp(type->name, t->name) == 0) {
2161 			/* already found */
2162 			pr_info("Tracer %s already registered\n",
2163 				type->name);
2164 			ret = -1;
2165 			goto out;
2166 		}
2167 	}
2168 
2169 	if (!type->set_flag)
2170 		type->set_flag = &dummy_set_flag;
2171 	if (!type->flags) {
2172 		/*allocate a dummy tracer_flags*/
2173 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2174 		if (!type->flags) {
2175 			ret = -ENOMEM;
2176 			goto out;
2177 		}
2178 		type->flags->val = 0;
2179 		type->flags->opts = dummy_tracer_opt;
2180 	} else
2181 		if (!type->flags->opts)
2182 			type->flags->opts = dummy_tracer_opt;
2183 
2184 	/* store the tracer for __set_tracer_option */
2185 	type->flags->trace = type;
2186 
2187 	ret = do_run_tracer_selftest(type);
2188 	if (ret < 0)
2189 		goto out;
2190 
2191 	type->next = trace_types;
2192 	trace_types = type;
2193 	add_tracer_options(&global_trace, type);
2194 
2195  out:
2196 	mutex_unlock(&trace_types_lock);
2197 
2198 	if (ret || !default_bootup_tracer)
2199 		goto out_unlock;
2200 
2201 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2202 		goto out_unlock;
2203 
2204 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2205 	/* Do we want this tracer to start on bootup? */
2206 	tracing_set_tracer(&global_trace, type->name);
2207 	default_bootup_tracer = NULL;
2208 
2209 	apply_trace_boot_options();
2210 
2211 	/* disable other selftests, since this will break it. */
2212 	disable_tracing_selftest("running a tracer");
2213 
2214  out_unlock:
2215 	return ret;
2216 }
2217 
2218 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2219 {
2220 	struct trace_buffer *buffer = buf->buffer;
2221 
2222 	if (!buffer)
2223 		return;
2224 
2225 	ring_buffer_record_disable(buffer);
2226 
2227 	/* Make sure all commits have finished */
2228 	synchronize_rcu();
2229 	ring_buffer_reset_cpu(buffer, cpu);
2230 
2231 	ring_buffer_record_enable(buffer);
2232 }
2233 
2234 void tracing_reset_online_cpus(struct array_buffer *buf)
2235 {
2236 	struct trace_buffer *buffer = buf->buffer;
2237 
2238 	if (!buffer)
2239 		return;
2240 
2241 	ring_buffer_record_disable(buffer);
2242 
2243 	/* Make sure all commits have finished */
2244 	synchronize_rcu();
2245 
2246 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2247 
2248 	ring_buffer_reset_online_cpus(buffer);
2249 
2250 	ring_buffer_record_enable(buffer);
2251 }
2252 
2253 /* Must have trace_types_lock held */
2254 void tracing_reset_all_online_cpus_unlocked(void)
2255 {
2256 	struct trace_array *tr;
2257 
2258 	lockdep_assert_held(&trace_types_lock);
2259 
2260 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2261 		if (!tr->clear_trace)
2262 			continue;
2263 		tr->clear_trace = false;
2264 		tracing_reset_online_cpus(&tr->array_buffer);
2265 #ifdef CONFIG_TRACER_MAX_TRACE
2266 		tracing_reset_online_cpus(&tr->max_buffer);
2267 #endif
2268 	}
2269 }
2270 
2271 void tracing_reset_all_online_cpus(void)
2272 {
2273 	mutex_lock(&trace_types_lock);
2274 	tracing_reset_all_online_cpus_unlocked();
2275 	mutex_unlock(&trace_types_lock);
2276 }
2277 
2278 /*
2279  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2280  * is the tgid last observed corresponding to pid=i.
2281  */
2282 static int *tgid_map;
2283 
2284 /* The maximum valid index into tgid_map. */
2285 static size_t tgid_map_max;
2286 
2287 #define SAVED_CMDLINES_DEFAULT 128
2288 #define NO_CMDLINE_MAP UINT_MAX
2289 /*
2290  * Preemption must be disabled before acquiring trace_cmdline_lock.
2291  * The various trace_arrays' max_lock must be acquired in a context
2292  * where interrupt is disabled.
2293  */
2294 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2295 struct saved_cmdlines_buffer {
2296 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2297 	unsigned *map_cmdline_to_pid;
2298 	unsigned cmdline_num;
2299 	int cmdline_idx;
2300 	char *saved_cmdlines;
2301 };
2302 static struct saved_cmdlines_buffer *savedcmd;
2303 
2304 static inline char *get_saved_cmdlines(int idx)
2305 {
2306 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2307 }
2308 
2309 static inline void set_cmdline(int idx, const char *cmdline)
2310 {
2311 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2312 }
2313 
2314 static int allocate_cmdlines_buffer(unsigned int val,
2315 				    struct saved_cmdlines_buffer *s)
2316 {
2317 	s->map_cmdline_to_pid = kmalloc_array(val,
2318 					      sizeof(*s->map_cmdline_to_pid),
2319 					      GFP_KERNEL);
2320 	if (!s->map_cmdline_to_pid)
2321 		return -ENOMEM;
2322 
2323 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2324 	if (!s->saved_cmdlines) {
2325 		kfree(s->map_cmdline_to_pid);
2326 		return -ENOMEM;
2327 	}
2328 
2329 	s->cmdline_idx = 0;
2330 	s->cmdline_num = val;
2331 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2332 	       sizeof(s->map_pid_to_cmdline));
2333 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2334 	       val * sizeof(*s->map_cmdline_to_pid));
2335 
2336 	return 0;
2337 }
2338 
2339 static int trace_create_savedcmd(void)
2340 {
2341 	int ret;
2342 
2343 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2344 	if (!savedcmd)
2345 		return -ENOMEM;
2346 
2347 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2348 	if (ret < 0) {
2349 		kfree(savedcmd);
2350 		savedcmd = NULL;
2351 		return -ENOMEM;
2352 	}
2353 
2354 	return 0;
2355 }
2356 
2357 int is_tracing_stopped(void)
2358 {
2359 	return global_trace.stop_count;
2360 }
2361 
2362 /**
2363  * tracing_start - quick start of the tracer
2364  *
2365  * If tracing is enabled but was stopped by tracing_stop,
2366  * this will start the tracer back up.
2367  */
2368 void tracing_start(void)
2369 {
2370 	struct trace_buffer *buffer;
2371 	unsigned long flags;
2372 
2373 	if (tracing_disabled)
2374 		return;
2375 
2376 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2377 	if (--global_trace.stop_count) {
2378 		if (global_trace.stop_count < 0) {
2379 			/* Someone screwed up their debugging */
2380 			WARN_ON_ONCE(1);
2381 			global_trace.stop_count = 0;
2382 		}
2383 		goto out;
2384 	}
2385 
2386 	/* Prevent the buffers from switching */
2387 	arch_spin_lock(&global_trace.max_lock);
2388 
2389 	buffer = global_trace.array_buffer.buffer;
2390 	if (buffer)
2391 		ring_buffer_record_enable(buffer);
2392 
2393 #ifdef CONFIG_TRACER_MAX_TRACE
2394 	buffer = global_trace.max_buffer.buffer;
2395 	if (buffer)
2396 		ring_buffer_record_enable(buffer);
2397 #endif
2398 
2399 	arch_spin_unlock(&global_trace.max_lock);
2400 
2401  out:
2402 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2403 }
2404 
2405 static void tracing_start_tr(struct trace_array *tr)
2406 {
2407 	struct trace_buffer *buffer;
2408 	unsigned long flags;
2409 
2410 	if (tracing_disabled)
2411 		return;
2412 
2413 	/* If global, we need to also start the max tracer */
2414 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2415 		return tracing_start();
2416 
2417 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2418 
2419 	if (--tr->stop_count) {
2420 		if (tr->stop_count < 0) {
2421 			/* Someone screwed up their debugging */
2422 			WARN_ON_ONCE(1);
2423 			tr->stop_count = 0;
2424 		}
2425 		goto out;
2426 	}
2427 
2428 	buffer = tr->array_buffer.buffer;
2429 	if (buffer)
2430 		ring_buffer_record_enable(buffer);
2431 
2432  out:
2433 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2434 }
2435 
2436 /**
2437  * tracing_stop - quick stop of the tracer
2438  *
2439  * Light weight way to stop tracing. Use in conjunction with
2440  * tracing_start.
2441  */
2442 void tracing_stop(void)
2443 {
2444 	struct trace_buffer *buffer;
2445 	unsigned long flags;
2446 
2447 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2448 	if (global_trace.stop_count++)
2449 		goto out;
2450 
2451 	/* Prevent the buffers from switching */
2452 	arch_spin_lock(&global_trace.max_lock);
2453 
2454 	buffer = global_trace.array_buffer.buffer;
2455 	if (buffer)
2456 		ring_buffer_record_disable(buffer);
2457 
2458 #ifdef CONFIG_TRACER_MAX_TRACE
2459 	buffer = global_trace.max_buffer.buffer;
2460 	if (buffer)
2461 		ring_buffer_record_disable(buffer);
2462 #endif
2463 
2464 	arch_spin_unlock(&global_trace.max_lock);
2465 
2466  out:
2467 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2468 }
2469 
2470 static void tracing_stop_tr(struct trace_array *tr)
2471 {
2472 	struct trace_buffer *buffer;
2473 	unsigned long flags;
2474 
2475 	/* If global, we need to also stop the max tracer */
2476 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2477 		return tracing_stop();
2478 
2479 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2480 	if (tr->stop_count++)
2481 		goto out;
2482 
2483 	buffer = tr->array_buffer.buffer;
2484 	if (buffer)
2485 		ring_buffer_record_disable(buffer);
2486 
2487  out:
2488 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2489 }
2490 
2491 static int trace_save_cmdline(struct task_struct *tsk)
2492 {
2493 	unsigned tpid, idx;
2494 
2495 	/* treat recording of idle task as a success */
2496 	if (!tsk->pid)
2497 		return 1;
2498 
2499 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2500 
2501 	/*
2502 	 * It's not the end of the world if we don't get
2503 	 * the lock, but we also don't want to spin
2504 	 * nor do we want to disable interrupts,
2505 	 * so if we miss here, then better luck next time.
2506 	 *
2507 	 * This is called within the scheduler and wake up, so interrupts
2508 	 * had better been disabled and run queue lock been held.
2509 	 */
2510 	lockdep_assert_preemption_disabled();
2511 	if (!arch_spin_trylock(&trace_cmdline_lock))
2512 		return 0;
2513 
2514 	idx = savedcmd->map_pid_to_cmdline[tpid];
2515 	if (idx == NO_CMDLINE_MAP) {
2516 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2517 
2518 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2519 		savedcmd->cmdline_idx = idx;
2520 	}
2521 
2522 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2523 	set_cmdline(idx, tsk->comm);
2524 
2525 	arch_spin_unlock(&trace_cmdline_lock);
2526 
2527 	return 1;
2528 }
2529 
2530 static void __trace_find_cmdline(int pid, char comm[])
2531 {
2532 	unsigned map;
2533 	int tpid;
2534 
2535 	if (!pid) {
2536 		strcpy(comm, "<idle>");
2537 		return;
2538 	}
2539 
2540 	if (WARN_ON_ONCE(pid < 0)) {
2541 		strcpy(comm, "<XXX>");
2542 		return;
2543 	}
2544 
2545 	tpid = pid & (PID_MAX_DEFAULT - 1);
2546 	map = savedcmd->map_pid_to_cmdline[tpid];
2547 	if (map != NO_CMDLINE_MAP) {
2548 		tpid = savedcmd->map_cmdline_to_pid[map];
2549 		if (tpid == pid) {
2550 			strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2551 			return;
2552 		}
2553 	}
2554 	strcpy(comm, "<...>");
2555 }
2556 
2557 void trace_find_cmdline(int pid, char comm[])
2558 {
2559 	preempt_disable();
2560 	arch_spin_lock(&trace_cmdline_lock);
2561 
2562 	__trace_find_cmdline(pid, comm);
2563 
2564 	arch_spin_unlock(&trace_cmdline_lock);
2565 	preempt_enable();
2566 }
2567 
2568 static int *trace_find_tgid_ptr(int pid)
2569 {
2570 	/*
2571 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2572 	 * if we observe a non-NULL tgid_map then we also observe the correct
2573 	 * tgid_map_max.
2574 	 */
2575 	int *map = smp_load_acquire(&tgid_map);
2576 
2577 	if (unlikely(!map || pid > tgid_map_max))
2578 		return NULL;
2579 
2580 	return &map[pid];
2581 }
2582 
2583 int trace_find_tgid(int pid)
2584 {
2585 	int *ptr = trace_find_tgid_ptr(pid);
2586 
2587 	return ptr ? *ptr : 0;
2588 }
2589 
2590 static int trace_save_tgid(struct task_struct *tsk)
2591 {
2592 	int *ptr;
2593 
2594 	/* treat recording of idle task as a success */
2595 	if (!tsk->pid)
2596 		return 1;
2597 
2598 	ptr = trace_find_tgid_ptr(tsk->pid);
2599 	if (!ptr)
2600 		return 0;
2601 
2602 	*ptr = tsk->tgid;
2603 	return 1;
2604 }
2605 
2606 static bool tracing_record_taskinfo_skip(int flags)
2607 {
2608 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2609 		return true;
2610 	if (!__this_cpu_read(trace_taskinfo_save))
2611 		return true;
2612 	return false;
2613 }
2614 
2615 /**
2616  * tracing_record_taskinfo - record the task info of a task
2617  *
2618  * @task:  task to record
2619  * @flags: TRACE_RECORD_CMDLINE for recording comm
2620  *         TRACE_RECORD_TGID for recording tgid
2621  */
2622 void tracing_record_taskinfo(struct task_struct *task, int flags)
2623 {
2624 	bool done;
2625 
2626 	if (tracing_record_taskinfo_skip(flags))
2627 		return;
2628 
2629 	/*
2630 	 * Record as much task information as possible. If some fail, continue
2631 	 * to try to record the others.
2632 	 */
2633 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2634 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2635 
2636 	/* If recording any information failed, retry again soon. */
2637 	if (!done)
2638 		return;
2639 
2640 	__this_cpu_write(trace_taskinfo_save, false);
2641 }
2642 
2643 /**
2644  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2645  *
2646  * @prev: previous task during sched_switch
2647  * @next: next task during sched_switch
2648  * @flags: TRACE_RECORD_CMDLINE for recording comm
2649  *         TRACE_RECORD_TGID for recording tgid
2650  */
2651 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2652 					  struct task_struct *next, int flags)
2653 {
2654 	bool done;
2655 
2656 	if (tracing_record_taskinfo_skip(flags))
2657 		return;
2658 
2659 	/*
2660 	 * Record as much task information as possible. If some fail, continue
2661 	 * to try to record the others.
2662 	 */
2663 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2664 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2665 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2666 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2667 
2668 	/* If recording any information failed, retry again soon. */
2669 	if (!done)
2670 		return;
2671 
2672 	__this_cpu_write(trace_taskinfo_save, false);
2673 }
2674 
2675 /* Helpers to record a specific task information */
2676 void tracing_record_cmdline(struct task_struct *task)
2677 {
2678 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2679 }
2680 
2681 void tracing_record_tgid(struct task_struct *task)
2682 {
2683 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2684 }
2685 
2686 /*
2687  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2688  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2689  * simplifies those functions and keeps them in sync.
2690  */
2691 enum print_line_t trace_handle_return(struct trace_seq *s)
2692 {
2693 	return trace_seq_has_overflowed(s) ?
2694 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2695 }
2696 EXPORT_SYMBOL_GPL(trace_handle_return);
2697 
2698 static unsigned short migration_disable_value(void)
2699 {
2700 #if defined(CONFIG_SMP)
2701 	return current->migration_disabled;
2702 #else
2703 	return 0;
2704 #endif
2705 }
2706 
2707 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2708 {
2709 	unsigned int trace_flags = irqs_status;
2710 	unsigned int pc;
2711 
2712 	pc = preempt_count();
2713 
2714 	if (pc & NMI_MASK)
2715 		trace_flags |= TRACE_FLAG_NMI;
2716 	if (pc & HARDIRQ_MASK)
2717 		trace_flags |= TRACE_FLAG_HARDIRQ;
2718 	if (in_serving_softirq())
2719 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2720 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2721 		trace_flags |= TRACE_FLAG_BH_OFF;
2722 
2723 	if (tif_need_resched())
2724 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2725 	if (test_preempt_need_resched())
2726 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2727 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2728 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2729 }
2730 
2731 struct ring_buffer_event *
2732 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2733 			  int type,
2734 			  unsigned long len,
2735 			  unsigned int trace_ctx)
2736 {
2737 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2738 }
2739 
2740 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2741 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2742 static int trace_buffered_event_ref;
2743 
2744 /**
2745  * trace_buffered_event_enable - enable buffering events
2746  *
2747  * When events are being filtered, it is quicker to use a temporary
2748  * buffer to write the event data into if there's a likely chance
2749  * that it will not be committed. The discard of the ring buffer
2750  * is not as fast as committing, and is much slower than copying
2751  * a commit.
2752  *
2753  * When an event is to be filtered, allocate per cpu buffers to
2754  * write the event data into, and if the event is filtered and discarded
2755  * it is simply dropped, otherwise, the entire data is to be committed
2756  * in one shot.
2757  */
2758 void trace_buffered_event_enable(void)
2759 {
2760 	struct ring_buffer_event *event;
2761 	struct page *page;
2762 	int cpu;
2763 
2764 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2765 
2766 	if (trace_buffered_event_ref++)
2767 		return;
2768 
2769 	for_each_tracing_cpu(cpu) {
2770 		page = alloc_pages_node(cpu_to_node(cpu),
2771 					GFP_KERNEL | __GFP_NORETRY, 0);
2772 		if (!page)
2773 			goto failed;
2774 
2775 		event = page_address(page);
2776 		memset(event, 0, sizeof(*event));
2777 
2778 		per_cpu(trace_buffered_event, cpu) = event;
2779 
2780 		preempt_disable();
2781 		if (cpu == smp_processor_id() &&
2782 		    __this_cpu_read(trace_buffered_event) !=
2783 		    per_cpu(trace_buffered_event, cpu))
2784 			WARN_ON_ONCE(1);
2785 		preempt_enable();
2786 	}
2787 
2788 	return;
2789  failed:
2790 	trace_buffered_event_disable();
2791 }
2792 
2793 static void enable_trace_buffered_event(void *data)
2794 {
2795 	/* Probably not needed, but do it anyway */
2796 	smp_rmb();
2797 	this_cpu_dec(trace_buffered_event_cnt);
2798 }
2799 
2800 static void disable_trace_buffered_event(void *data)
2801 {
2802 	this_cpu_inc(trace_buffered_event_cnt);
2803 }
2804 
2805 /**
2806  * trace_buffered_event_disable - disable buffering events
2807  *
2808  * When a filter is removed, it is faster to not use the buffered
2809  * events, and to commit directly into the ring buffer. Free up
2810  * the temp buffers when there are no more users. This requires
2811  * special synchronization with current events.
2812  */
2813 void trace_buffered_event_disable(void)
2814 {
2815 	int cpu;
2816 
2817 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2818 
2819 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2820 		return;
2821 
2822 	if (--trace_buffered_event_ref)
2823 		return;
2824 
2825 	preempt_disable();
2826 	/* For each CPU, set the buffer as used. */
2827 	smp_call_function_many(tracing_buffer_mask,
2828 			       disable_trace_buffered_event, NULL, 1);
2829 	preempt_enable();
2830 
2831 	/* Wait for all current users to finish */
2832 	synchronize_rcu();
2833 
2834 	for_each_tracing_cpu(cpu) {
2835 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2836 		per_cpu(trace_buffered_event, cpu) = NULL;
2837 	}
2838 	/*
2839 	 * Make sure trace_buffered_event is NULL before clearing
2840 	 * trace_buffered_event_cnt.
2841 	 */
2842 	smp_wmb();
2843 
2844 	preempt_disable();
2845 	/* Do the work on each cpu */
2846 	smp_call_function_many(tracing_buffer_mask,
2847 			       enable_trace_buffered_event, NULL, 1);
2848 	preempt_enable();
2849 }
2850 
2851 static struct trace_buffer *temp_buffer;
2852 
2853 struct ring_buffer_event *
2854 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2855 			  struct trace_event_file *trace_file,
2856 			  int type, unsigned long len,
2857 			  unsigned int trace_ctx)
2858 {
2859 	struct ring_buffer_event *entry;
2860 	struct trace_array *tr = trace_file->tr;
2861 	int val;
2862 
2863 	*current_rb = tr->array_buffer.buffer;
2864 
2865 	if (!tr->no_filter_buffering_ref &&
2866 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2867 		preempt_disable_notrace();
2868 		/*
2869 		 * Filtering is on, so try to use the per cpu buffer first.
2870 		 * This buffer will simulate a ring_buffer_event,
2871 		 * where the type_len is zero and the array[0] will
2872 		 * hold the full length.
2873 		 * (see include/linux/ring-buffer.h for details on
2874 		 *  how the ring_buffer_event is structured).
2875 		 *
2876 		 * Using a temp buffer during filtering and copying it
2877 		 * on a matched filter is quicker than writing directly
2878 		 * into the ring buffer and then discarding it when
2879 		 * it doesn't match. That is because the discard
2880 		 * requires several atomic operations to get right.
2881 		 * Copying on match and doing nothing on a failed match
2882 		 * is still quicker than no copy on match, but having
2883 		 * to discard out of the ring buffer on a failed match.
2884 		 */
2885 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2886 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2887 
2888 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2889 
2890 			/*
2891 			 * Preemption is disabled, but interrupts and NMIs
2892 			 * can still come in now. If that happens after
2893 			 * the above increment, then it will have to go
2894 			 * back to the old method of allocating the event
2895 			 * on the ring buffer, and if the filter fails, it
2896 			 * will have to call ring_buffer_discard_commit()
2897 			 * to remove it.
2898 			 *
2899 			 * Need to also check the unlikely case that the
2900 			 * length is bigger than the temp buffer size.
2901 			 * If that happens, then the reserve is pretty much
2902 			 * guaranteed to fail, as the ring buffer currently
2903 			 * only allows events less than a page. But that may
2904 			 * change in the future, so let the ring buffer reserve
2905 			 * handle the failure in that case.
2906 			 */
2907 			if (val == 1 && likely(len <= max_len)) {
2908 				trace_event_setup(entry, type, trace_ctx);
2909 				entry->array[0] = len;
2910 				/* Return with preemption disabled */
2911 				return entry;
2912 			}
2913 			this_cpu_dec(trace_buffered_event_cnt);
2914 		}
2915 		/* __trace_buffer_lock_reserve() disables preemption */
2916 		preempt_enable_notrace();
2917 	}
2918 
2919 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2920 					    trace_ctx);
2921 	/*
2922 	 * If tracing is off, but we have triggers enabled
2923 	 * we still need to look at the event data. Use the temp_buffer
2924 	 * to store the trace event for the trigger to use. It's recursive
2925 	 * safe and will not be recorded anywhere.
2926 	 */
2927 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2928 		*current_rb = temp_buffer;
2929 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2930 						    trace_ctx);
2931 	}
2932 	return entry;
2933 }
2934 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2935 
2936 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2937 static DEFINE_MUTEX(tracepoint_printk_mutex);
2938 
2939 static void output_printk(struct trace_event_buffer *fbuffer)
2940 {
2941 	struct trace_event_call *event_call;
2942 	struct trace_event_file *file;
2943 	struct trace_event *event;
2944 	unsigned long flags;
2945 	struct trace_iterator *iter = tracepoint_print_iter;
2946 
2947 	/* We should never get here if iter is NULL */
2948 	if (WARN_ON_ONCE(!iter))
2949 		return;
2950 
2951 	event_call = fbuffer->trace_file->event_call;
2952 	if (!event_call || !event_call->event.funcs ||
2953 	    !event_call->event.funcs->trace)
2954 		return;
2955 
2956 	file = fbuffer->trace_file;
2957 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2958 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2959 	     !filter_match_preds(file->filter, fbuffer->entry)))
2960 		return;
2961 
2962 	event = &fbuffer->trace_file->event_call->event;
2963 
2964 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2965 	trace_seq_init(&iter->seq);
2966 	iter->ent = fbuffer->entry;
2967 	event_call->event.funcs->trace(iter, 0, event);
2968 	trace_seq_putc(&iter->seq, 0);
2969 	printk("%s", iter->seq.buffer);
2970 
2971 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2972 }
2973 
2974 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2975 			     void *buffer, size_t *lenp,
2976 			     loff_t *ppos)
2977 {
2978 	int save_tracepoint_printk;
2979 	int ret;
2980 
2981 	mutex_lock(&tracepoint_printk_mutex);
2982 	save_tracepoint_printk = tracepoint_printk;
2983 
2984 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2985 
2986 	/*
2987 	 * This will force exiting early, as tracepoint_printk
2988 	 * is always zero when tracepoint_printk_iter is not allocated
2989 	 */
2990 	if (!tracepoint_print_iter)
2991 		tracepoint_printk = 0;
2992 
2993 	if (save_tracepoint_printk == tracepoint_printk)
2994 		goto out;
2995 
2996 	if (tracepoint_printk)
2997 		static_key_enable(&tracepoint_printk_key.key);
2998 	else
2999 		static_key_disable(&tracepoint_printk_key.key);
3000 
3001  out:
3002 	mutex_unlock(&tracepoint_printk_mutex);
3003 
3004 	return ret;
3005 }
3006 
3007 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
3008 {
3009 	enum event_trigger_type tt = ETT_NONE;
3010 	struct trace_event_file *file = fbuffer->trace_file;
3011 
3012 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
3013 			fbuffer->entry, &tt))
3014 		goto discard;
3015 
3016 	if (static_key_false(&tracepoint_printk_key.key))
3017 		output_printk(fbuffer);
3018 
3019 	if (static_branch_unlikely(&trace_event_exports_enabled))
3020 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
3021 
3022 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
3023 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
3024 
3025 discard:
3026 	if (tt)
3027 		event_triggers_post_call(file, tt);
3028 
3029 }
3030 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3031 
3032 /*
3033  * Skip 3:
3034  *
3035  *   trace_buffer_unlock_commit_regs()
3036  *   trace_event_buffer_commit()
3037  *   trace_event_raw_event_xxx()
3038  */
3039 # define STACK_SKIP 3
3040 
3041 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3042 				     struct trace_buffer *buffer,
3043 				     struct ring_buffer_event *event,
3044 				     unsigned int trace_ctx,
3045 				     struct pt_regs *regs)
3046 {
3047 	__buffer_unlock_commit(buffer, event);
3048 
3049 	/*
3050 	 * If regs is not set, then skip the necessary functions.
3051 	 * Note, we can still get here via blktrace, wakeup tracer
3052 	 * and mmiotrace, but that's ok if they lose a function or
3053 	 * two. They are not that meaningful.
3054 	 */
3055 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3056 	ftrace_trace_userstack(tr, buffer, trace_ctx);
3057 }
3058 
3059 /*
3060  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3061  */
3062 void
3063 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3064 				   struct ring_buffer_event *event)
3065 {
3066 	__buffer_unlock_commit(buffer, event);
3067 }
3068 
3069 void
3070 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3071 	       parent_ip, unsigned int trace_ctx)
3072 {
3073 	struct trace_event_call *call = &event_function;
3074 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3075 	struct ring_buffer_event *event;
3076 	struct ftrace_entry *entry;
3077 
3078 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3079 					    trace_ctx);
3080 	if (!event)
3081 		return;
3082 	entry	= ring_buffer_event_data(event);
3083 	entry->ip			= ip;
3084 	entry->parent_ip		= parent_ip;
3085 
3086 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3087 		if (static_branch_unlikely(&trace_function_exports_enabled))
3088 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3089 		__buffer_unlock_commit(buffer, event);
3090 	}
3091 }
3092 
3093 #ifdef CONFIG_STACKTRACE
3094 
3095 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3096 #define FTRACE_KSTACK_NESTING	4
3097 
3098 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
3099 
3100 struct ftrace_stack {
3101 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
3102 };
3103 
3104 
3105 struct ftrace_stacks {
3106 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
3107 };
3108 
3109 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3110 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3111 
3112 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3113 				 unsigned int trace_ctx,
3114 				 int skip, struct pt_regs *regs)
3115 {
3116 	struct trace_event_call *call = &event_kernel_stack;
3117 	struct ring_buffer_event *event;
3118 	unsigned int size, nr_entries;
3119 	struct ftrace_stack *fstack;
3120 	struct stack_entry *entry;
3121 	int stackidx;
3122 
3123 	/*
3124 	 * Add one, for this function and the call to save_stack_trace()
3125 	 * If regs is set, then these functions will not be in the way.
3126 	 */
3127 #ifndef CONFIG_UNWINDER_ORC
3128 	if (!regs)
3129 		skip++;
3130 #endif
3131 
3132 	preempt_disable_notrace();
3133 
3134 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3135 
3136 	/* This should never happen. If it does, yell once and skip */
3137 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3138 		goto out;
3139 
3140 	/*
3141 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3142 	 * interrupt will either see the value pre increment or post
3143 	 * increment. If the interrupt happens pre increment it will have
3144 	 * restored the counter when it returns.  We just need a barrier to
3145 	 * keep gcc from moving things around.
3146 	 */
3147 	barrier();
3148 
3149 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3150 	size = ARRAY_SIZE(fstack->calls);
3151 
3152 	if (regs) {
3153 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3154 						   size, skip);
3155 	} else {
3156 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3157 	}
3158 
3159 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3160 				    struct_size(entry, caller, nr_entries),
3161 				    trace_ctx);
3162 	if (!event)
3163 		goto out;
3164 	entry = ring_buffer_event_data(event);
3165 
3166 	entry->size = nr_entries;
3167 	memcpy(&entry->caller, fstack->calls,
3168 	       flex_array_size(entry, caller, nr_entries));
3169 
3170 	if (!call_filter_check_discard(call, entry, buffer, event))
3171 		__buffer_unlock_commit(buffer, event);
3172 
3173  out:
3174 	/* Again, don't let gcc optimize things here */
3175 	barrier();
3176 	__this_cpu_dec(ftrace_stack_reserve);
3177 	preempt_enable_notrace();
3178 
3179 }
3180 
3181 static inline void ftrace_trace_stack(struct trace_array *tr,
3182 				      struct trace_buffer *buffer,
3183 				      unsigned int trace_ctx,
3184 				      int skip, struct pt_regs *regs)
3185 {
3186 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3187 		return;
3188 
3189 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3190 }
3191 
3192 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3193 		   int skip)
3194 {
3195 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3196 
3197 	if (rcu_is_watching()) {
3198 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3199 		return;
3200 	}
3201 
3202 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3203 		return;
3204 
3205 	/*
3206 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3207 	 * but if the above rcu_is_watching() failed, then the NMI
3208 	 * triggered someplace critical, and ct_irq_enter() should
3209 	 * not be called from NMI.
3210 	 */
3211 	if (unlikely(in_nmi()))
3212 		return;
3213 
3214 	ct_irq_enter_irqson();
3215 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3216 	ct_irq_exit_irqson();
3217 }
3218 
3219 /**
3220  * trace_dump_stack - record a stack back trace in the trace buffer
3221  * @skip: Number of functions to skip (helper handlers)
3222  */
3223 void trace_dump_stack(int skip)
3224 {
3225 	if (tracing_disabled || tracing_selftest_running)
3226 		return;
3227 
3228 #ifndef CONFIG_UNWINDER_ORC
3229 	/* Skip 1 to skip this function. */
3230 	skip++;
3231 #endif
3232 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3233 			     tracing_gen_ctx(), skip, NULL);
3234 }
3235 EXPORT_SYMBOL_GPL(trace_dump_stack);
3236 
3237 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3238 static DEFINE_PER_CPU(int, user_stack_count);
3239 
3240 static void
3241 ftrace_trace_userstack(struct trace_array *tr,
3242 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3243 {
3244 	struct trace_event_call *call = &event_user_stack;
3245 	struct ring_buffer_event *event;
3246 	struct userstack_entry *entry;
3247 
3248 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3249 		return;
3250 
3251 	/*
3252 	 * NMIs can not handle page faults, even with fix ups.
3253 	 * The save user stack can (and often does) fault.
3254 	 */
3255 	if (unlikely(in_nmi()))
3256 		return;
3257 
3258 	/*
3259 	 * prevent recursion, since the user stack tracing may
3260 	 * trigger other kernel events.
3261 	 */
3262 	preempt_disable();
3263 	if (__this_cpu_read(user_stack_count))
3264 		goto out;
3265 
3266 	__this_cpu_inc(user_stack_count);
3267 
3268 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3269 					    sizeof(*entry), trace_ctx);
3270 	if (!event)
3271 		goto out_drop_count;
3272 	entry	= ring_buffer_event_data(event);
3273 
3274 	entry->tgid		= current->tgid;
3275 	memset(&entry->caller, 0, sizeof(entry->caller));
3276 
3277 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3278 	if (!call_filter_check_discard(call, entry, buffer, event))
3279 		__buffer_unlock_commit(buffer, event);
3280 
3281  out_drop_count:
3282 	__this_cpu_dec(user_stack_count);
3283  out:
3284 	preempt_enable();
3285 }
3286 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3287 static void ftrace_trace_userstack(struct trace_array *tr,
3288 				   struct trace_buffer *buffer,
3289 				   unsigned int trace_ctx)
3290 {
3291 }
3292 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3293 
3294 #endif /* CONFIG_STACKTRACE */
3295 
3296 static inline void
3297 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3298 			  unsigned long long delta)
3299 {
3300 	entry->bottom_delta_ts = delta & U32_MAX;
3301 	entry->top_delta_ts = (delta >> 32);
3302 }
3303 
3304 void trace_last_func_repeats(struct trace_array *tr,
3305 			     struct trace_func_repeats *last_info,
3306 			     unsigned int trace_ctx)
3307 {
3308 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3309 	struct func_repeats_entry *entry;
3310 	struct ring_buffer_event *event;
3311 	u64 delta;
3312 
3313 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3314 					    sizeof(*entry), trace_ctx);
3315 	if (!event)
3316 		return;
3317 
3318 	delta = ring_buffer_event_time_stamp(buffer, event) -
3319 		last_info->ts_last_call;
3320 
3321 	entry = ring_buffer_event_data(event);
3322 	entry->ip = last_info->ip;
3323 	entry->parent_ip = last_info->parent_ip;
3324 	entry->count = last_info->count;
3325 	func_repeats_set_delta_ts(entry, delta);
3326 
3327 	__buffer_unlock_commit(buffer, event);
3328 }
3329 
3330 /* created for use with alloc_percpu */
3331 struct trace_buffer_struct {
3332 	int nesting;
3333 	char buffer[4][TRACE_BUF_SIZE];
3334 };
3335 
3336 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3337 
3338 /*
3339  * This allows for lockless recording.  If we're nested too deeply, then
3340  * this returns NULL.
3341  */
3342 static char *get_trace_buf(void)
3343 {
3344 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3345 
3346 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3347 		return NULL;
3348 
3349 	buffer->nesting++;
3350 
3351 	/* Interrupts must see nesting incremented before we use the buffer */
3352 	barrier();
3353 	return &buffer->buffer[buffer->nesting - 1][0];
3354 }
3355 
3356 static void put_trace_buf(void)
3357 {
3358 	/* Don't let the decrement of nesting leak before this */
3359 	barrier();
3360 	this_cpu_dec(trace_percpu_buffer->nesting);
3361 }
3362 
3363 static int alloc_percpu_trace_buffer(void)
3364 {
3365 	struct trace_buffer_struct __percpu *buffers;
3366 
3367 	if (trace_percpu_buffer)
3368 		return 0;
3369 
3370 	buffers = alloc_percpu(struct trace_buffer_struct);
3371 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3372 		return -ENOMEM;
3373 
3374 	trace_percpu_buffer = buffers;
3375 	return 0;
3376 }
3377 
3378 static int buffers_allocated;
3379 
3380 void trace_printk_init_buffers(void)
3381 {
3382 	if (buffers_allocated)
3383 		return;
3384 
3385 	if (alloc_percpu_trace_buffer())
3386 		return;
3387 
3388 	/* trace_printk() is for debug use only. Don't use it in production. */
3389 
3390 	pr_warn("\n");
3391 	pr_warn("**********************************************************\n");
3392 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3393 	pr_warn("**                                                      **\n");
3394 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3395 	pr_warn("**                                                      **\n");
3396 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3397 	pr_warn("** unsafe for production use.                           **\n");
3398 	pr_warn("**                                                      **\n");
3399 	pr_warn("** If you see this message and you are not debugging    **\n");
3400 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3401 	pr_warn("**                                                      **\n");
3402 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3403 	pr_warn("**********************************************************\n");
3404 
3405 	/* Expand the buffers to set size */
3406 	tracing_update_buffers();
3407 
3408 	buffers_allocated = 1;
3409 
3410 	/*
3411 	 * trace_printk_init_buffers() can be called by modules.
3412 	 * If that happens, then we need to start cmdline recording
3413 	 * directly here. If the global_trace.buffer is already
3414 	 * allocated here, then this was called by module code.
3415 	 */
3416 	if (global_trace.array_buffer.buffer)
3417 		tracing_start_cmdline_record();
3418 }
3419 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3420 
3421 void trace_printk_start_comm(void)
3422 {
3423 	/* Start tracing comms if trace printk is set */
3424 	if (!buffers_allocated)
3425 		return;
3426 	tracing_start_cmdline_record();
3427 }
3428 
3429 static void trace_printk_start_stop_comm(int enabled)
3430 {
3431 	if (!buffers_allocated)
3432 		return;
3433 
3434 	if (enabled)
3435 		tracing_start_cmdline_record();
3436 	else
3437 		tracing_stop_cmdline_record();
3438 }
3439 
3440 /**
3441  * trace_vbprintk - write binary msg to tracing buffer
3442  * @ip:    The address of the caller
3443  * @fmt:   The string format to write to the buffer
3444  * @args:  Arguments for @fmt
3445  */
3446 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3447 {
3448 	struct trace_event_call *call = &event_bprint;
3449 	struct ring_buffer_event *event;
3450 	struct trace_buffer *buffer;
3451 	struct trace_array *tr = &global_trace;
3452 	struct bprint_entry *entry;
3453 	unsigned int trace_ctx;
3454 	char *tbuffer;
3455 	int len = 0, size;
3456 
3457 	if (unlikely(tracing_selftest_running || tracing_disabled))
3458 		return 0;
3459 
3460 	/* Don't pollute graph traces with trace_vprintk internals */
3461 	pause_graph_tracing();
3462 
3463 	trace_ctx = tracing_gen_ctx();
3464 	preempt_disable_notrace();
3465 
3466 	tbuffer = get_trace_buf();
3467 	if (!tbuffer) {
3468 		len = 0;
3469 		goto out_nobuffer;
3470 	}
3471 
3472 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3473 
3474 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3475 		goto out_put;
3476 
3477 	size = sizeof(*entry) + sizeof(u32) * len;
3478 	buffer = tr->array_buffer.buffer;
3479 	ring_buffer_nest_start(buffer);
3480 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3481 					    trace_ctx);
3482 	if (!event)
3483 		goto out;
3484 	entry = ring_buffer_event_data(event);
3485 	entry->ip			= ip;
3486 	entry->fmt			= fmt;
3487 
3488 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3489 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3490 		__buffer_unlock_commit(buffer, event);
3491 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3492 	}
3493 
3494 out:
3495 	ring_buffer_nest_end(buffer);
3496 out_put:
3497 	put_trace_buf();
3498 
3499 out_nobuffer:
3500 	preempt_enable_notrace();
3501 	unpause_graph_tracing();
3502 
3503 	return len;
3504 }
3505 EXPORT_SYMBOL_GPL(trace_vbprintk);
3506 
3507 __printf(3, 0)
3508 static int
3509 __trace_array_vprintk(struct trace_buffer *buffer,
3510 		      unsigned long ip, const char *fmt, va_list args)
3511 {
3512 	struct trace_event_call *call = &event_print;
3513 	struct ring_buffer_event *event;
3514 	int len = 0, size;
3515 	struct print_entry *entry;
3516 	unsigned int trace_ctx;
3517 	char *tbuffer;
3518 
3519 	if (tracing_disabled)
3520 		return 0;
3521 
3522 	/* Don't pollute graph traces with trace_vprintk internals */
3523 	pause_graph_tracing();
3524 
3525 	trace_ctx = tracing_gen_ctx();
3526 	preempt_disable_notrace();
3527 
3528 
3529 	tbuffer = get_trace_buf();
3530 	if (!tbuffer) {
3531 		len = 0;
3532 		goto out_nobuffer;
3533 	}
3534 
3535 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3536 
3537 	size = sizeof(*entry) + len + 1;
3538 	ring_buffer_nest_start(buffer);
3539 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3540 					    trace_ctx);
3541 	if (!event)
3542 		goto out;
3543 	entry = ring_buffer_event_data(event);
3544 	entry->ip = ip;
3545 
3546 	memcpy(&entry->buf, tbuffer, len + 1);
3547 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3548 		__buffer_unlock_commit(buffer, event);
3549 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3550 	}
3551 
3552 out:
3553 	ring_buffer_nest_end(buffer);
3554 	put_trace_buf();
3555 
3556 out_nobuffer:
3557 	preempt_enable_notrace();
3558 	unpause_graph_tracing();
3559 
3560 	return len;
3561 }
3562 
3563 __printf(3, 0)
3564 int trace_array_vprintk(struct trace_array *tr,
3565 			unsigned long ip, const char *fmt, va_list args)
3566 {
3567 	if (tracing_selftest_running && tr == &global_trace)
3568 		return 0;
3569 
3570 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3571 }
3572 
3573 /**
3574  * trace_array_printk - Print a message to a specific instance
3575  * @tr: The instance trace_array descriptor
3576  * @ip: The instruction pointer that this is called from.
3577  * @fmt: The format to print (printf format)
3578  *
3579  * If a subsystem sets up its own instance, they have the right to
3580  * printk strings into their tracing instance buffer using this
3581  * function. Note, this function will not write into the top level
3582  * buffer (use trace_printk() for that), as writing into the top level
3583  * buffer should only have events that can be individually disabled.
3584  * trace_printk() is only used for debugging a kernel, and should not
3585  * be ever incorporated in normal use.
3586  *
3587  * trace_array_printk() can be used, as it will not add noise to the
3588  * top level tracing buffer.
3589  *
3590  * Note, trace_array_init_printk() must be called on @tr before this
3591  * can be used.
3592  */
3593 __printf(3, 0)
3594 int trace_array_printk(struct trace_array *tr,
3595 		       unsigned long ip, const char *fmt, ...)
3596 {
3597 	int ret;
3598 	va_list ap;
3599 
3600 	if (!tr)
3601 		return -ENOENT;
3602 
3603 	/* This is only allowed for created instances */
3604 	if (tr == &global_trace)
3605 		return 0;
3606 
3607 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3608 		return 0;
3609 
3610 	va_start(ap, fmt);
3611 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3612 	va_end(ap);
3613 	return ret;
3614 }
3615 EXPORT_SYMBOL_GPL(trace_array_printk);
3616 
3617 /**
3618  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3619  * @tr: The trace array to initialize the buffers for
3620  *
3621  * As trace_array_printk() only writes into instances, they are OK to
3622  * have in the kernel (unlike trace_printk()). This needs to be called
3623  * before trace_array_printk() can be used on a trace_array.
3624  */
3625 int trace_array_init_printk(struct trace_array *tr)
3626 {
3627 	if (!tr)
3628 		return -ENOENT;
3629 
3630 	/* This is only allowed for created instances */
3631 	if (tr == &global_trace)
3632 		return -EINVAL;
3633 
3634 	return alloc_percpu_trace_buffer();
3635 }
3636 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3637 
3638 __printf(3, 4)
3639 int trace_array_printk_buf(struct trace_buffer *buffer,
3640 			   unsigned long ip, const char *fmt, ...)
3641 {
3642 	int ret;
3643 	va_list ap;
3644 
3645 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3646 		return 0;
3647 
3648 	va_start(ap, fmt);
3649 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3650 	va_end(ap);
3651 	return ret;
3652 }
3653 
3654 __printf(2, 0)
3655 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3656 {
3657 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3658 }
3659 EXPORT_SYMBOL_GPL(trace_vprintk);
3660 
3661 static void trace_iterator_increment(struct trace_iterator *iter)
3662 {
3663 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3664 
3665 	iter->idx++;
3666 	if (buf_iter)
3667 		ring_buffer_iter_advance(buf_iter);
3668 }
3669 
3670 static struct trace_entry *
3671 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3672 		unsigned long *lost_events)
3673 {
3674 	struct ring_buffer_event *event;
3675 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3676 
3677 	if (buf_iter) {
3678 		event = ring_buffer_iter_peek(buf_iter, ts);
3679 		if (lost_events)
3680 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3681 				(unsigned long)-1 : 0;
3682 	} else {
3683 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3684 					 lost_events);
3685 	}
3686 
3687 	if (event) {
3688 		iter->ent_size = ring_buffer_event_length(event);
3689 		return ring_buffer_event_data(event);
3690 	}
3691 	iter->ent_size = 0;
3692 	return NULL;
3693 }
3694 
3695 static struct trace_entry *
3696 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3697 		  unsigned long *missing_events, u64 *ent_ts)
3698 {
3699 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3700 	struct trace_entry *ent, *next = NULL;
3701 	unsigned long lost_events = 0, next_lost = 0;
3702 	int cpu_file = iter->cpu_file;
3703 	u64 next_ts = 0, ts;
3704 	int next_cpu = -1;
3705 	int next_size = 0;
3706 	int cpu;
3707 
3708 	/*
3709 	 * If we are in a per_cpu trace file, don't bother by iterating over
3710 	 * all cpu and peek directly.
3711 	 */
3712 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3713 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3714 			return NULL;
3715 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3716 		if (ent_cpu)
3717 			*ent_cpu = cpu_file;
3718 
3719 		return ent;
3720 	}
3721 
3722 	for_each_tracing_cpu(cpu) {
3723 
3724 		if (ring_buffer_empty_cpu(buffer, cpu))
3725 			continue;
3726 
3727 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3728 
3729 		/*
3730 		 * Pick the entry with the smallest timestamp:
3731 		 */
3732 		if (ent && (!next || ts < next_ts)) {
3733 			next = ent;
3734 			next_cpu = cpu;
3735 			next_ts = ts;
3736 			next_lost = lost_events;
3737 			next_size = iter->ent_size;
3738 		}
3739 	}
3740 
3741 	iter->ent_size = next_size;
3742 
3743 	if (ent_cpu)
3744 		*ent_cpu = next_cpu;
3745 
3746 	if (ent_ts)
3747 		*ent_ts = next_ts;
3748 
3749 	if (missing_events)
3750 		*missing_events = next_lost;
3751 
3752 	return next;
3753 }
3754 
3755 #define STATIC_FMT_BUF_SIZE	128
3756 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3757 
3758 char *trace_iter_expand_format(struct trace_iterator *iter)
3759 {
3760 	char *tmp;
3761 
3762 	/*
3763 	 * iter->tr is NULL when used with tp_printk, which makes
3764 	 * this get called where it is not safe to call krealloc().
3765 	 */
3766 	if (!iter->tr || iter->fmt == static_fmt_buf)
3767 		return NULL;
3768 
3769 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3770 		       GFP_KERNEL);
3771 	if (tmp) {
3772 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3773 		iter->fmt = tmp;
3774 	}
3775 
3776 	return tmp;
3777 }
3778 
3779 /* Returns true if the string is safe to dereference from an event */
3780 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3781 			   bool star, int len)
3782 {
3783 	unsigned long addr = (unsigned long)str;
3784 	struct trace_event *trace_event;
3785 	struct trace_event_call *event;
3786 
3787 	/* Ignore strings with no length */
3788 	if (star && !len)
3789 		return true;
3790 
3791 	/* OK if part of the event data */
3792 	if ((addr >= (unsigned long)iter->ent) &&
3793 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3794 		return true;
3795 
3796 	/* OK if part of the temp seq buffer */
3797 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3798 	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3799 		return true;
3800 
3801 	/* Core rodata can not be freed */
3802 	if (is_kernel_rodata(addr))
3803 		return true;
3804 
3805 	if (trace_is_tracepoint_string(str))
3806 		return true;
3807 
3808 	/*
3809 	 * Now this could be a module event, referencing core module
3810 	 * data, which is OK.
3811 	 */
3812 	if (!iter->ent)
3813 		return false;
3814 
3815 	trace_event = ftrace_find_event(iter->ent->type);
3816 	if (!trace_event)
3817 		return false;
3818 
3819 	event = container_of(trace_event, struct trace_event_call, event);
3820 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3821 		return false;
3822 
3823 	/* Would rather have rodata, but this will suffice */
3824 	if (within_module_core(addr, event->module))
3825 		return true;
3826 
3827 	return false;
3828 }
3829 
3830 static const char *show_buffer(struct trace_seq *s)
3831 {
3832 	struct seq_buf *seq = &s->seq;
3833 
3834 	seq_buf_terminate(seq);
3835 
3836 	return seq->buffer;
3837 }
3838 
3839 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3840 
3841 static int test_can_verify_check(const char *fmt, ...)
3842 {
3843 	char buf[16];
3844 	va_list ap;
3845 	int ret;
3846 
3847 	/*
3848 	 * The verifier is dependent on vsnprintf() modifies the va_list
3849 	 * passed to it, where it is sent as a reference. Some architectures
3850 	 * (like x86_32) passes it by value, which means that vsnprintf()
3851 	 * does not modify the va_list passed to it, and the verifier
3852 	 * would then need to be able to understand all the values that
3853 	 * vsnprintf can use. If it is passed by value, then the verifier
3854 	 * is disabled.
3855 	 */
3856 	va_start(ap, fmt);
3857 	vsnprintf(buf, 16, "%d", ap);
3858 	ret = va_arg(ap, int);
3859 	va_end(ap);
3860 
3861 	return ret;
3862 }
3863 
3864 static void test_can_verify(void)
3865 {
3866 	if (!test_can_verify_check("%d %d", 0, 1)) {
3867 		pr_info("trace event string verifier disabled\n");
3868 		static_branch_inc(&trace_no_verify);
3869 	}
3870 }
3871 
3872 /**
3873  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3874  * @iter: The iterator that holds the seq buffer and the event being printed
3875  * @fmt: The format used to print the event
3876  * @ap: The va_list holding the data to print from @fmt.
3877  *
3878  * This writes the data into the @iter->seq buffer using the data from
3879  * @fmt and @ap. If the format has a %s, then the source of the string
3880  * is examined to make sure it is safe to print, otherwise it will
3881  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3882  * pointer.
3883  */
3884 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3885 			 va_list ap)
3886 {
3887 	const char *p = fmt;
3888 	const char *str;
3889 	int i, j;
3890 
3891 	if (WARN_ON_ONCE(!fmt))
3892 		return;
3893 
3894 	if (static_branch_unlikely(&trace_no_verify))
3895 		goto print;
3896 
3897 	/* Don't bother checking when doing a ftrace_dump() */
3898 	if (iter->fmt == static_fmt_buf)
3899 		goto print;
3900 
3901 	while (*p) {
3902 		bool star = false;
3903 		int len = 0;
3904 
3905 		j = 0;
3906 
3907 		/* We only care about %s and variants */
3908 		for (i = 0; p[i]; i++) {
3909 			if (i + 1 >= iter->fmt_size) {
3910 				/*
3911 				 * If we can't expand the copy buffer,
3912 				 * just print it.
3913 				 */
3914 				if (!trace_iter_expand_format(iter))
3915 					goto print;
3916 			}
3917 
3918 			if (p[i] == '\\' && p[i+1]) {
3919 				i++;
3920 				continue;
3921 			}
3922 			if (p[i] == '%') {
3923 				/* Need to test cases like %08.*s */
3924 				for (j = 1; p[i+j]; j++) {
3925 					if (isdigit(p[i+j]) ||
3926 					    p[i+j] == '.')
3927 						continue;
3928 					if (p[i+j] == '*') {
3929 						star = true;
3930 						continue;
3931 					}
3932 					break;
3933 				}
3934 				if (p[i+j] == 's')
3935 					break;
3936 				star = false;
3937 			}
3938 			j = 0;
3939 		}
3940 		/* If no %s found then just print normally */
3941 		if (!p[i])
3942 			break;
3943 
3944 		/* Copy up to the %s, and print that */
3945 		strncpy(iter->fmt, p, i);
3946 		iter->fmt[i] = '\0';
3947 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3948 
3949 		/*
3950 		 * If iter->seq is full, the above call no longer guarantees
3951 		 * that ap is in sync with fmt processing, and further calls
3952 		 * to va_arg() can return wrong positional arguments.
3953 		 *
3954 		 * Ensure that ap is no longer used in this case.
3955 		 */
3956 		if (iter->seq.full) {
3957 			p = "";
3958 			break;
3959 		}
3960 
3961 		if (star)
3962 			len = va_arg(ap, int);
3963 
3964 		/* The ap now points to the string data of the %s */
3965 		str = va_arg(ap, const char *);
3966 
3967 		/*
3968 		 * If you hit this warning, it is likely that the
3969 		 * trace event in question used %s on a string that
3970 		 * was saved at the time of the event, but may not be
3971 		 * around when the trace is read. Use __string(),
3972 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3973 		 * instead. See samples/trace_events/trace-events-sample.h
3974 		 * for reference.
3975 		 */
3976 		if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3977 			      "fmt: '%s' current_buffer: '%s'",
3978 			      fmt, show_buffer(&iter->seq))) {
3979 			int ret;
3980 
3981 			/* Try to safely read the string */
3982 			if (star) {
3983 				if (len + 1 > iter->fmt_size)
3984 					len = iter->fmt_size - 1;
3985 				if (len < 0)
3986 					len = 0;
3987 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3988 				iter->fmt[len] = 0;
3989 				star = false;
3990 			} else {
3991 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3992 								  iter->fmt_size);
3993 			}
3994 			if (ret < 0)
3995 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3996 			else
3997 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3998 						 str, iter->fmt);
3999 			str = "[UNSAFE-MEMORY]";
4000 			strcpy(iter->fmt, "%s");
4001 		} else {
4002 			strncpy(iter->fmt, p + i, j + 1);
4003 			iter->fmt[j+1] = '\0';
4004 		}
4005 		if (star)
4006 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
4007 		else
4008 			trace_seq_printf(&iter->seq, iter->fmt, str);
4009 
4010 		p += i + j + 1;
4011 	}
4012  print:
4013 	if (*p)
4014 		trace_seq_vprintf(&iter->seq, p, ap);
4015 }
4016 
4017 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
4018 {
4019 	const char *p, *new_fmt;
4020 	char *q;
4021 
4022 	if (WARN_ON_ONCE(!fmt))
4023 		return fmt;
4024 
4025 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
4026 		return fmt;
4027 
4028 	p = fmt;
4029 	new_fmt = q = iter->fmt;
4030 	while (*p) {
4031 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
4032 			if (!trace_iter_expand_format(iter))
4033 				return fmt;
4034 
4035 			q += iter->fmt - new_fmt;
4036 			new_fmt = iter->fmt;
4037 		}
4038 
4039 		*q++ = *p++;
4040 
4041 		/* Replace %p with %px */
4042 		if (p[-1] == '%') {
4043 			if (p[0] == '%') {
4044 				*q++ = *p++;
4045 			} else if (p[0] == 'p' && !isalnum(p[1])) {
4046 				*q++ = *p++;
4047 				*q++ = 'x';
4048 			}
4049 		}
4050 	}
4051 	*q = '\0';
4052 
4053 	return new_fmt;
4054 }
4055 
4056 #define STATIC_TEMP_BUF_SIZE	128
4057 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4058 
4059 /* Find the next real entry, without updating the iterator itself */
4060 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4061 					  int *ent_cpu, u64 *ent_ts)
4062 {
4063 	/* __find_next_entry will reset ent_size */
4064 	int ent_size = iter->ent_size;
4065 	struct trace_entry *entry;
4066 
4067 	/*
4068 	 * If called from ftrace_dump(), then the iter->temp buffer
4069 	 * will be the static_temp_buf and not created from kmalloc.
4070 	 * If the entry size is greater than the buffer, we can
4071 	 * not save it. Just return NULL in that case. This is only
4072 	 * used to add markers when two consecutive events' time
4073 	 * stamps have a large delta. See trace_print_lat_context()
4074 	 */
4075 	if (iter->temp == static_temp_buf &&
4076 	    STATIC_TEMP_BUF_SIZE < ent_size)
4077 		return NULL;
4078 
4079 	/*
4080 	 * The __find_next_entry() may call peek_next_entry(), which may
4081 	 * call ring_buffer_peek() that may make the contents of iter->ent
4082 	 * undefined. Need to copy iter->ent now.
4083 	 */
4084 	if (iter->ent && iter->ent != iter->temp) {
4085 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4086 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4087 			void *temp;
4088 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
4089 			if (!temp)
4090 				return NULL;
4091 			kfree(iter->temp);
4092 			iter->temp = temp;
4093 			iter->temp_size = iter->ent_size;
4094 		}
4095 		memcpy(iter->temp, iter->ent, iter->ent_size);
4096 		iter->ent = iter->temp;
4097 	}
4098 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4099 	/* Put back the original ent_size */
4100 	iter->ent_size = ent_size;
4101 
4102 	return entry;
4103 }
4104 
4105 /* Find the next real entry, and increment the iterator to the next entry */
4106 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4107 {
4108 	iter->ent = __find_next_entry(iter, &iter->cpu,
4109 				      &iter->lost_events, &iter->ts);
4110 
4111 	if (iter->ent)
4112 		trace_iterator_increment(iter);
4113 
4114 	return iter->ent ? iter : NULL;
4115 }
4116 
4117 static void trace_consume(struct trace_iterator *iter)
4118 {
4119 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4120 			    &iter->lost_events);
4121 }
4122 
4123 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4124 {
4125 	struct trace_iterator *iter = m->private;
4126 	int i = (int)*pos;
4127 	void *ent;
4128 
4129 	WARN_ON_ONCE(iter->leftover);
4130 
4131 	(*pos)++;
4132 
4133 	/* can't go backwards */
4134 	if (iter->idx > i)
4135 		return NULL;
4136 
4137 	if (iter->idx < 0)
4138 		ent = trace_find_next_entry_inc(iter);
4139 	else
4140 		ent = iter;
4141 
4142 	while (ent && iter->idx < i)
4143 		ent = trace_find_next_entry_inc(iter);
4144 
4145 	iter->pos = *pos;
4146 
4147 	return ent;
4148 }
4149 
4150 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4151 {
4152 	struct ring_buffer_iter *buf_iter;
4153 	unsigned long entries = 0;
4154 	u64 ts;
4155 
4156 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4157 
4158 	buf_iter = trace_buffer_iter(iter, cpu);
4159 	if (!buf_iter)
4160 		return;
4161 
4162 	ring_buffer_iter_reset(buf_iter);
4163 
4164 	/*
4165 	 * We could have the case with the max latency tracers
4166 	 * that a reset never took place on a cpu. This is evident
4167 	 * by the timestamp being before the start of the buffer.
4168 	 */
4169 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4170 		if (ts >= iter->array_buffer->time_start)
4171 			break;
4172 		entries++;
4173 		ring_buffer_iter_advance(buf_iter);
4174 	}
4175 
4176 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4177 }
4178 
4179 /*
4180  * The current tracer is copied to avoid a global locking
4181  * all around.
4182  */
4183 static void *s_start(struct seq_file *m, loff_t *pos)
4184 {
4185 	struct trace_iterator *iter = m->private;
4186 	struct trace_array *tr = iter->tr;
4187 	int cpu_file = iter->cpu_file;
4188 	void *p = NULL;
4189 	loff_t l = 0;
4190 	int cpu;
4191 
4192 	mutex_lock(&trace_types_lock);
4193 	if (unlikely(tr->current_trace != iter->trace)) {
4194 		/* Close iter->trace before switching to the new current tracer */
4195 		if (iter->trace->close)
4196 			iter->trace->close(iter);
4197 		iter->trace = tr->current_trace;
4198 		/* Reopen the new current tracer */
4199 		if (iter->trace->open)
4200 			iter->trace->open(iter);
4201 	}
4202 	mutex_unlock(&trace_types_lock);
4203 
4204 #ifdef CONFIG_TRACER_MAX_TRACE
4205 	if (iter->snapshot && iter->trace->use_max_tr)
4206 		return ERR_PTR(-EBUSY);
4207 #endif
4208 
4209 	if (*pos != iter->pos) {
4210 		iter->ent = NULL;
4211 		iter->cpu = 0;
4212 		iter->idx = -1;
4213 
4214 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4215 			for_each_tracing_cpu(cpu)
4216 				tracing_iter_reset(iter, cpu);
4217 		} else
4218 			tracing_iter_reset(iter, cpu_file);
4219 
4220 		iter->leftover = 0;
4221 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4222 			;
4223 
4224 	} else {
4225 		/*
4226 		 * If we overflowed the seq_file before, then we want
4227 		 * to just reuse the trace_seq buffer again.
4228 		 */
4229 		if (iter->leftover)
4230 			p = iter;
4231 		else {
4232 			l = *pos - 1;
4233 			p = s_next(m, p, &l);
4234 		}
4235 	}
4236 
4237 	trace_event_read_lock();
4238 	trace_access_lock(cpu_file);
4239 	return p;
4240 }
4241 
4242 static void s_stop(struct seq_file *m, void *p)
4243 {
4244 	struct trace_iterator *iter = m->private;
4245 
4246 #ifdef CONFIG_TRACER_MAX_TRACE
4247 	if (iter->snapshot && iter->trace->use_max_tr)
4248 		return;
4249 #endif
4250 
4251 	trace_access_unlock(iter->cpu_file);
4252 	trace_event_read_unlock();
4253 }
4254 
4255 static void
4256 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4257 		      unsigned long *entries, int cpu)
4258 {
4259 	unsigned long count;
4260 
4261 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4262 	/*
4263 	 * If this buffer has skipped entries, then we hold all
4264 	 * entries for the trace and we need to ignore the
4265 	 * ones before the time stamp.
4266 	 */
4267 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4268 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4269 		/* total is the same as the entries */
4270 		*total = count;
4271 	} else
4272 		*total = count +
4273 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4274 	*entries = count;
4275 }
4276 
4277 static void
4278 get_total_entries(struct array_buffer *buf,
4279 		  unsigned long *total, unsigned long *entries)
4280 {
4281 	unsigned long t, e;
4282 	int cpu;
4283 
4284 	*total = 0;
4285 	*entries = 0;
4286 
4287 	for_each_tracing_cpu(cpu) {
4288 		get_total_entries_cpu(buf, &t, &e, cpu);
4289 		*total += t;
4290 		*entries += e;
4291 	}
4292 }
4293 
4294 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4295 {
4296 	unsigned long total, entries;
4297 
4298 	if (!tr)
4299 		tr = &global_trace;
4300 
4301 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4302 
4303 	return entries;
4304 }
4305 
4306 unsigned long trace_total_entries(struct trace_array *tr)
4307 {
4308 	unsigned long total, entries;
4309 
4310 	if (!tr)
4311 		tr = &global_trace;
4312 
4313 	get_total_entries(&tr->array_buffer, &total, &entries);
4314 
4315 	return entries;
4316 }
4317 
4318 static void print_lat_help_header(struct seq_file *m)
4319 {
4320 	seq_puts(m, "#                    _------=> CPU#            \n"
4321 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4322 		    "#                  | / _----=> need-resched    \n"
4323 		    "#                  || / _---=> hardirq/softirq \n"
4324 		    "#                  ||| / _--=> preempt-depth   \n"
4325 		    "#                  |||| / _-=> migrate-disable \n"
4326 		    "#                  ||||| /     delay           \n"
4327 		    "#  cmd     pid     |||||| time  |   caller     \n"
4328 		    "#     \\   /        ||||||  \\    |    /       \n");
4329 }
4330 
4331 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4332 {
4333 	unsigned long total;
4334 	unsigned long entries;
4335 
4336 	get_total_entries(buf, &total, &entries);
4337 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4338 		   entries, total, num_online_cpus());
4339 	seq_puts(m, "#\n");
4340 }
4341 
4342 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4343 				   unsigned int flags)
4344 {
4345 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4346 
4347 	print_event_info(buf, m);
4348 
4349 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4350 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4351 }
4352 
4353 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4354 				       unsigned int flags)
4355 {
4356 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4357 	static const char space[] = "            ";
4358 	int prec = tgid ? 12 : 2;
4359 
4360 	print_event_info(buf, m);
4361 
4362 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4363 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4364 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4365 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4366 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4367 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4368 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4369 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4370 }
4371 
4372 void
4373 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4374 {
4375 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4376 	struct array_buffer *buf = iter->array_buffer;
4377 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4378 	struct tracer *type = iter->trace;
4379 	unsigned long entries;
4380 	unsigned long total;
4381 	const char *name = type->name;
4382 
4383 	get_total_entries(buf, &total, &entries);
4384 
4385 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4386 		   name, UTS_RELEASE);
4387 	seq_puts(m, "# -----------------------------------"
4388 		 "---------------------------------\n");
4389 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4390 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4391 		   nsecs_to_usecs(data->saved_latency),
4392 		   entries,
4393 		   total,
4394 		   buf->cpu,
4395 		   preempt_model_none()      ? "server" :
4396 		   preempt_model_voluntary() ? "desktop" :
4397 		   preempt_model_full()      ? "preempt" :
4398 		   preempt_model_rt()        ? "preempt_rt" :
4399 		   "unknown",
4400 		   /* These are reserved for later use */
4401 		   0, 0, 0, 0);
4402 #ifdef CONFIG_SMP
4403 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4404 #else
4405 	seq_puts(m, ")\n");
4406 #endif
4407 	seq_puts(m, "#    -----------------\n");
4408 	seq_printf(m, "#    | task: %.16s-%d "
4409 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4410 		   data->comm, data->pid,
4411 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4412 		   data->policy, data->rt_priority);
4413 	seq_puts(m, "#    -----------------\n");
4414 
4415 	if (data->critical_start) {
4416 		seq_puts(m, "#  => started at: ");
4417 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4418 		trace_print_seq(m, &iter->seq);
4419 		seq_puts(m, "\n#  => ended at:   ");
4420 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4421 		trace_print_seq(m, &iter->seq);
4422 		seq_puts(m, "\n#\n");
4423 	}
4424 
4425 	seq_puts(m, "#\n");
4426 }
4427 
4428 static void test_cpu_buff_start(struct trace_iterator *iter)
4429 {
4430 	struct trace_seq *s = &iter->seq;
4431 	struct trace_array *tr = iter->tr;
4432 
4433 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4434 		return;
4435 
4436 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4437 		return;
4438 
4439 	if (cpumask_available(iter->started) &&
4440 	    cpumask_test_cpu(iter->cpu, iter->started))
4441 		return;
4442 
4443 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4444 		return;
4445 
4446 	if (cpumask_available(iter->started))
4447 		cpumask_set_cpu(iter->cpu, iter->started);
4448 
4449 	/* Don't print started cpu buffer for the first entry of the trace */
4450 	if (iter->idx > 1)
4451 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4452 				iter->cpu);
4453 }
4454 
4455 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4456 {
4457 	struct trace_array *tr = iter->tr;
4458 	struct trace_seq *s = &iter->seq;
4459 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4460 	struct trace_entry *entry;
4461 	struct trace_event *event;
4462 
4463 	entry = iter->ent;
4464 
4465 	test_cpu_buff_start(iter);
4466 
4467 	event = ftrace_find_event(entry->type);
4468 
4469 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4470 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4471 			trace_print_lat_context(iter);
4472 		else
4473 			trace_print_context(iter);
4474 	}
4475 
4476 	if (trace_seq_has_overflowed(s))
4477 		return TRACE_TYPE_PARTIAL_LINE;
4478 
4479 	if (event) {
4480 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4481 			return print_event_fields(iter, event);
4482 		return event->funcs->trace(iter, sym_flags, event);
4483 	}
4484 
4485 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4486 
4487 	return trace_handle_return(s);
4488 }
4489 
4490 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4491 {
4492 	struct trace_array *tr = iter->tr;
4493 	struct trace_seq *s = &iter->seq;
4494 	struct trace_entry *entry;
4495 	struct trace_event *event;
4496 
4497 	entry = iter->ent;
4498 
4499 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4500 		trace_seq_printf(s, "%d %d %llu ",
4501 				 entry->pid, iter->cpu, iter->ts);
4502 
4503 	if (trace_seq_has_overflowed(s))
4504 		return TRACE_TYPE_PARTIAL_LINE;
4505 
4506 	event = ftrace_find_event(entry->type);
4507 	if (event)
4508 		return event->funcs->raw(iter, 0, event);
4509 
4510 	trace_seq_printf(s, "%d ?\n", entry->type);
4511 
4512 	return trace_handle_return(s);
4513 }
4514 
4515 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4516 {
4517 	struct trace_array *tr = iter->tr;
4518 	struct trace_seq *s = &iter->seq;
4519 	unsigned char newline = '\n';
4520 	struct trace_entry *entry;
4521 	struct trace_event *event;
4522 
4523 	entry = iter->ent;
4524 
4525 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4526 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4527 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4528 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4529 		if (trace_seq_has_overflowed(s))
4530 			return TRACE_TYPE_PARTIAL_LINE;
4531 	}
4532 
4533 	event = ftrace_find_event(entry->type);
4534 	if (event) {
4535 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4536 		if (ret != TRACE_TYPE_HANDLED)
4537 			return ret;
4538 	}
4539 
4540 	SEQ_PUT_FIELD(s, newline);
4541 
4542 	return trace_handle_return(s);
4543 }
4544 
4545 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4546 {
4547 	struct trace_array *tr = iter->tr;
4548 	struct trace_seq *s = &iter->seq;
4549 	struct trace_entry *entry;
4550 	struct trace_event *event;
4551 
4552 	entry = iter->ent;
4553 
4554 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4555 		SEQ_PUT_FIELD(s, entry->pid);
4556 		SEQ_PUT_FIELD(s, iter->cpu);
4557 		SEQ_PUT_FIELD(s, iter->ts);
4558 		if (trace_seq_has_overflowed(s))
4559 			return TRACE_TYPE_PARTIAL_LINE;
4560 	}
4561 
4562 	event = ftrace_find_event(entry->type);
4563 	return event ? event->funcs->binary(iter, 0, event) :
4564 		TRACE_TYPE_HANDLED;
4565 }
4566 
4567 int trace_empty(struct trace_iterator *iter)
4568 {
4569 	struct ring_buffer_iter *buf_iter;
4570 	int cpu;
4571 
4572 	/* If we are looking at one CPU buffer, only check that one */
4573 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4574 		cpu = iter->cpu_file;
4575 		buf_iter = trace_buffer_iter(iter, cpu);
4576 		if (buf_iter) {
4577 			if (!ring_buffer_iter_empty(buf_iter))
4578 				return 0;
4579 		} else {
4580 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4581 				return 0;
4582 		}
4583 		return 1;
4584 	}
4585 
4586 	for_each_tracing_cpu(cpu) {
4587 		buf_iter = trace_buffer_iter(iter, cpu);
4588 		if (buf_iter) {
4589 			if (!ring_buffer_iter_empty(buf_iter))
4590 				return 0;
4591 		} else {
4592 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4593 				return 0;
4594 		}
4595 	}
4596 
4597 	return 1;
4598 }
4599 
4600 /*  Called with trace_event_read_lock() held. */
4601 enum print_line_t print_trace_line(struct trace_iterator *iter)
4602 {
4603 	struct trace_array *tr = iter->tr;
4604 	unsigned long trace_flags = tr->trace_flags;
4605 	enum print_line_t ret;
4606 
4607 	if (iter->lost_events) {
4608 		if (iter->lost_events == (unsigned long)-1)
4609 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4610 					 iter->cpu);
4611 		else
4612 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4613 					 iter->cpu, iter->lost_events);
4614 		if (trace_seq_has_overflowed(&iter->seq))
4615 			return TRACE_TYPE_PARTIAL_LINE;
4616 	}
4617 
4618 	if (iter->trace && iter->trace->print_line) {
4619 		ret = iter->trace->print_line(iter);
4620 		if (ret != TRACE_TYPE_UNHANDLED)
4621 			return ret;
4622 	}
4623 
4624 	if (iter->ent->type == TRACE_BPUTS &&
4625 			trace_flags & TRACE_ITER_PRINTK &&
4626 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4627 		return trace_print_bputs_msg_only(iter);
4628 
4629 	if (iter->ent->type == TRACE_BPRINT &&
4630 			trace_flags & TRACE_ITER_PRINTK &&
4631 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4632 		return trace_print_bprintk_msg_only(iter);
4633 
4634 	if (iter->ent->type == TRACE_PRINT &&
4635 			trace_flags & TRACE_ITER_PRINTK &&
4636 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4637 		return trace_print_printk_msg_only(iter);
4638 
4639 	if (trace_flags & TRACE_ITER_BIN)
4640 		return print_bin_fmt(iter);
4641 
4642 	if (trace_flags & TRACE_ITER_HEX)
4643 		return print_hex_fmt(iter);
4644 
4645 	if (trace_flags & TRACE_ITER_RAW)
4646 		return print_raw_fmt(iter);
4647 
4648 	return print_trace_fmt(iter);
4649 }
4650 
4651 void trace_latency_header(struct seq_file *m)
4652 {
4653 	struct trace_iterator *iter = m->private;
4654 	struct trace_array *tr = iter->tr;
4655 
4656 	/* print nothing if the buffers are empty */
4657 	if (trace_empty(iter))
4658 		return;
4659 
4660 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4661 		print_trace_header(m, iter);
4662 
4663 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4664 		print_lat_help_header(m);
4665 }
4666 
4667 void trace_default_header(struct seq_file *m)
4668 {
4669 	struct trace_iterator *iter = m->private;
4670 	struct trace_array *tr = iter->tr;
4671 	unsigned long trace_flags = tr->trace_flags;
4672 
4673 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4674 		return;
4675 
4676 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4677 		/* print nothing if the buffers are empty */
4678 		if (trace_empty(iter))
4679 			return;
4680 		print_trace_header(m, iter);
4681 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4682 			print_lat_help_header(m);
4683 	} else {
4684 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4685 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4686 				print_func_help_header_irq(iter->array_buffer,
4687 							   m, trace_flags);
4688 			else
4689 				print_func_help_header(iter->array_buffer, m,
4690 						       trace_flags);
4691 		}
4692 	}
4693 }
4694 
4695 static void test_ftrace_alive(struct seq_file *m)
4696 {
4697 	if (!ftrace_is_dead())
4698 		return;
4699 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4700 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4701 }
4702 
4703 #ifdef CONFIG_TRACER_MAX_TRACE
4704 static void show_snapshot_main_help(struct seq_file *m)
4705 {
4706 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4707 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4708 		    "#                      Takes a snapshot of the main buffer.\n"
4709 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4710 		    "#                      (Doesn't have to be '2' works with any number that\n"
4711 		    "#                       is not a '0' or '1')\n");
4712 }
4713 
4714 static void show_snapshot_percpu_help(struct seq_file *m)
4715 {
4716 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4717 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4718 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4719 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4720 #else
4721 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4722 		    "#                     Must use main snapshot file to allocate.\n");
4723 #endif
4724 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4725 		    "#                      (Doesn't have to be '2' works with any number that\n"
4726 		    "#                       is not a '0' or '1')\n");
4727 }
4728 
4729 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4730 {
4731 	if (iter->tr->allocated_snapshot)
4732 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4733 	else
4734 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4735 
4736 	seq_puts(m, "# Snapshot commands:\n");
4737 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4738 		show_snapshot_main_help(m);
4739 	else
4740 		show_snapshot_percpu_help(m);
4741 }
4742 #else
4743 /* Should never be called */
4744 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4745 #endif
4746 
4747 static int s_show(struct seq_file *m, void *v)
4748 {
4749 	struct trace_iterator *iter = v;
4750 	int ret;
4751 
4752 	if (iter->ent == NULL) {
4753 		if (iter->tr) {
4754 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4755 			seq_puts(m, "#\n");
4756 			test_ftrace_alive(m);
4757 		}
4758 		if (iter->snapshot && trace_empty(iter))
4759 			print_snapshot_help(m, iter);
4760 		else if (iter->trace && iter->trace->print_header)
4761 			iter->trace->print_header(m);
4762 		else
4763 			trace_default_header(m);
4764 
4765 	} else if (iter->leftover) {
4766 		/*
4767 		 * If we filled the seq_file buffer earlier, we
4768 		 * want to just show it now.
4769 		 */
4770 		ret = trace_print_seq(m, &iter->seq);
4771 
4772 		/* ret should this time be zero, but you never know */
4773 		iter->leftover = ret;
4774 
4775 	} else {
4776 		print_trace_line(iter);
4777 		ret = trace_print_seq(m, &iter->seq);
4778 		/*
4779 		 * If we overflow the seq_file buffer, then it will
4780 		 * ask us for this data again at start up.
4781 		 * Use that instead.
4782 		 *  ret is 0 if seq_file write succeeded.
4783 		 *        -1 otherwise.
4784 		 */
4785 		iter->leftover = ret;
4786 	}
4787 
4788 	return 0;
4789 }
4790 
4791 /*
4792  * Should be used after trace_array_get(), trace_types_lock
4793  * ensures that i_cdev was already initialized.
4794  */
4795 static inline int tracing_get_cpu(struct inode *inode)
4796 {
4797 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4798 		return (long)inode->i_cdev - 1;
4799 	return RING_BUFFER_ALL_CPUS;
4800 }
4801 
4802 static const struct seq_operations tracer_seq_ops = {
4803 	.start		= s_start,
4804 	.next		= s_next,
4805 	.stop		= s_stop,
4806 	.show		= s_show,
4807 };
4808 
4809 /*
4810  * Note, as iter itself can be allocated and freed in different
4811  * ways, this function is only used to free its content, and not
4812  * the iterator itself. The only requirement to all the allocations
4813  * is that it must zero all fields (kzalloc), as freeing works with
4814  * ethier allocated content or NULL.
4815  */
4816 static void free_trace_iter_content(struct trace_iterator *iter)
4817 {
4818 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4819 	if (iter->fmt != static_fmt_buf)
4820 		kfree(iter->fmt);
4821 
4822 	kfree(iter->temp);
4823 	kfree(iter->buffer_iter);
4824 	mutex_destroy(&iter->mutex);
4825 	free_cpumask_var(iter->started);
4826 }
4827 
4828 static struct trace_iterator *
4829 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4830 {
4831 	struct trace_array *tr = inode->i_private;
4832 	struct trace_iterator *iter;
4833 	int cpu;
4834 
4835 	if (tracing_disabled)
4836 		return ERR_PTR(-ENODEV);
4837 
4838 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4839 	if (!iter)
4840 		return ERR_PTR(-ENOMEM);
4841 
4842 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4843 				    GFP_KERNEL);
4844 	if (!iter->buffer_iter)
4845 		goto release;
4846 
4847 	/*
4848 	 * trace_find_next_entry() may need to save off iter->ent.
4849 	 * It will place it into the iter->temp buffer. As most
4850 	 * events are less than 128, allocate a buffer of that size.
4851 	 * If one is greater, then trace_find_next_entry() will
4852 	 * allocate a new buffer to adjust for the bigger iter->ent.
4853 	 * It's not critical if it fails to get allocated here.
4854 	 */
4855 	iter->temp = kmalloc(128, GFP_KERNEL);
4856 	if (iter->temp)
4857 		iter->temp_size = 128;
4858 
4859 	/*
4860 	 * trace_event_printf() may need to modify given format
4861 	 * string to replace %p with %px so that it shows real address
4862 	 * instead of hash value. However, that is only for the event
4863 	 * tracing, other tracer may not need. Defer the allocation
4864 	 * until it is needed.
4865 	 */
4866 	iter->fmt = NULL;
4867 	iter->fmt_size = 0;
4868 
4869 	mutex_lock(&trace_types_lock);
4870 	iter->trace = tr->current_trace;
4871 
4872 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4873 		goto fail;
4874 
4875 	iter->tr = tr;
4876 
4877 #ifdef CONFIG_TRACER_MAX_TRACE
4878 	/* Currently only the top directory has a snapshot */
4879 	if (tr->current_trace->print_max || snapshot)
4880 		iter->array_buffer = &tr->max_buffer;
4881 	else
4882 #endif
4883 		iter->array_buffer = &tr->array_buffer;
4884 	iter->snapshot = snapshot;
4885 	iter->pos = -1;
4886 	iter->cpu_file = tracing_get_cpu(inode);
4887 	mutex_init(&iter->mutex);
4888 
4889 	/* Notify the tracer early; before we stop tracing. */
4890 	if (iter->trace->open)
4891 		iter->trace->open(iter);
4892 
4893 	/* Annotate start of buffers if we had overruns */
4894 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4895 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4896 
4897 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4898 	if (trace_clocks[tr->clock_id].in_ns)
4899 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4900 
4901 	/*
4902 	 * If pause-on-trace is enabled, then stop the trace while
4903 	 * dumping, unless this is the "snapshot" file
4904 	 */
4905 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4906 		tracing_stop_tr(tr);
4907 
4908 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4909 		for_each_tracing_cpu(cpu) {
4910 			iter->buffer_iter[cpu] =
4911 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4912 							 cpu, GFP_KERNEL);
4913 		}
4914 		ring_buffer_read_prepare_sync();
4915 		for_each_tracing_cpu(cpu) {
4916 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4917 			tracing_iter_reset(iter, cpu);
4918 		}
4919 	} else {
4920 		cpu = iter->cpu_file;
4921 		iter->buffer_iter[cpu] =
4922 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4923 						 cpu, GFP_KERNEL);
4924 		ring_buffer_read_prepare_sync();
4925 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4926 		tracing_iter_reset(iter, cpu);
4927 	}
4928 
4929 	mutex_unlock(&trace_types_lock);
4930 
4931 	return iter;
4932 
4933  fail:
4934 	mutex_unlock(&trace_types_lock);
4935 	free_trace_iter_content(iter);
4936 release:
4937 	seq_release_private(inode, file);
4938 	return ERR_PTR(-ENOMEM);
4939 }
4940 
4941 int tracing_open_generic(struct inode *inode, struct file *filp)
4942 {
4943 	int ret;
4944 
4945 	ret = tracing_check_open_get_tr(NULL);
4946 	if (ret)
4947 		return ret;
4948 
4949 	filp->private_data = inode->i_private;
4950 	return 0;
4951 }
4952 
4953 bool tracing_is_disabled(void)
4954 {
4955 	return (tracing_disabled) ? true: false;
4956 }
4957 
4958 /*
4959  * Open and update trace_array ref count.
4960  * Must have the current trace_array passed to it.
4961  */
4962 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4963 {
4964 	struct trace_array *tr = inode->i_private;
4965 	int ret;
4966 
4967 	ret = tracing_check_open_get_tr(tr);
4968 	if (ret)
4969 		return ret;
4970 
4971 	filp->private_data = inode->i_private;
4972 
4973 	return 0;
4974 }
4975 
4976 /*
4977  * The private pointer of the inode is the trace_event_file.
4978  * Update the tr ref count associated to it.
4979  */
4980 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4981 {
4982 	struct trace_event_file *file = inode->i_private;
4983 	int ret;
4984 
4985 	ret = tracing_check_open_get_tr(file->tr);
4986 	if (ret)
4987 		return ret;
4988 
4989 	mutex_lock(&event_mutex);
4990 
4991 	/* Fail if the file is marked for removal */
4992 	if (file->flags & EVENT_FILE_FL_FREED) {
4993 		trace_array_put(file->tr);
4994 		ret = -ENODEV;
4995 	} else {
4996 		event_file_get(file);
4997 	}
4998 
4999 	mutex_unlock(&event_mutex);
5000 	if (ret)
5001 		return ret;
5002 
5003 	filp->private_data = inode->i_private;
5004 
5005 	return 0;
5006 }
5007 
5008 int tracing_release_file_tr(struct inode *inode, struct file *filp)
5009 {
5010 	struct trace_event_file *file = inode->i_private;
5011 
5012 	trace_array_put(file->tr);
5013 	event_file_put(file);
5014 
5015 	return 0;
5016 }
5017 
5018 static int tracing_mark_open(struct inode *inode, struct file *filp)
5019 {
5020 	stream_open(inode, filp);
5021 	return tracing_open_generic_tr(inode, filp);
5022 }
5023 
5024 static int tracing_release(struct inode *inode, struct file *file)
5025 {
5026 	struct trace_array *tr = inode->i_private;
5027 	struct seq_file *m = file->private_data;
5028 	struct trace_iterator *iter;
5029 	int cpu;
5030 
5031 	if (!(file->f_mode & FMODE_READ)) {
5032 		trace_array_put(tr);
5033 		return 0;
5034 	}
5035 
5036 	/* Writes do not use seq_file */
5037 	iter = m->private;
5038 	mutex_lock(&trace_types_lock);
5039 
5040 	for_each_tracing_cpu(cpu) {
5041 		if (iter->buffer_iter[cpu])
5042 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
5043 	}
5044 
5045 	if (iter->trace && iter->trace->close)
5046 		iter->trace->close(iter);
5047 
5048 	if (!iter->snapshot && tr->stop_count)
5049 		/* reenable tracing if it was previously enabled */
5050 		tracing_start_tr(tr);
5051 
5052 	__trace_array_put(tr);
5053 
5054 	mutex_unlock(&trace_types_lock);
5055 
5056 	free_trace_iter_content(iter);
5057 	seq_release_private(inode, file);
5058 
5059 	return 0;
5060 }
5061 
5062 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
5063 {
5064 	struct trace_array *tr = inode->i_private;
5065 
5066 	trace_array_put(tr);
5067 	return 0;
5068 }
5069 
5070 static int tracing_single_release_tr(struct inode *inode, struct file *file)
5071 {
5072 	struct trace_array *tr = inode->i_private;
5073 
5074 	trace_array_put(tr);
5075 
5076 	return single_release(inode, file);
5077 }
5078 
5079 static int tracing_open(struct inode *inode, struct file *file)
5080 {
5081 	struct trace_array *tr = inode->i_private;
5082 	struct trace_iterator *iter;
5083 	int ret;
5084 
5085 	ret = tracing_check_open_get_tr(tr);
5086 	if (ret)
5087 		return ret;
5088 
5089 	/* If this file was open for write, then erase contents */
5090 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5091 		int cpu = tracing_get_cpu(inode);
5092 		struct array_buffer *trace_buf = &tr->array_buffer;
5093 
5094 #ifdef CONFIG_TRACER_MAX_TRACE
5095 		if (tr->current_trace->print_max)
5096 			trace_buf = &tr->max_buffer;
5097 #endif
5098 
5099 		if (cpu == RING_BUFFER_ALL_CPUS)
5100 			tracing_reset_online_cpus(trace_buf);
5101 		else
5102 			tracing_reset_cpu(trace_buf, cpu);
5103 	}
5104 
5105 	if (file->f_mode & FMODE_READ) {
5106 		iter = __tracing_open(inode, file, false);
5107 		if (IS_ERR(iter))
5108 			ret = PTR_ERR(iter);
5109 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5110 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
5111 	}
5112 
5113 	if (ret < 0)
5114 		trace_array_put(tr);
5115 
5116 	return ret;
5117 }
5118 
5119 /*
5120  * Some tracers are not suitable for instance buffers.
5121  * A tracer is always available for the global array (toplevel)
5122  * or if it explicitly states that it is.
5123  */
5124 static bool
5125 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5126 {
5127 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5128 }
5129 
5130 /* Find the next tracer that this trace array may use */
5131 static struct tracer *
5132 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5133 {
5134 	while (t && !trace_ok_for_array(t, tr))
5135 		t = t->next;
5136 
5137 	return t;
5138 }
5139 
5140 static void *
5141 t_next(struct seq_file *m, void *v, loff_t *pos)
5142 {
5143 	struct trace_array *tr = m->private;
5144 	struct tracer *t = v;
5145 
5146 	(*pos)++;
5147 
5148 	if (t)
5149 		t = get_tracer_for_array(tr, t->next);
5150 
5151 	return t;
5152 }
5153 
5154 static void *t_start(struct seq_file *m, loff_t *pos)
5155 {
5156 	struct trace_array *tr = m->private;
5157 	struct tracer *t;
5158 	loff_t l = 0;
5159 
5160 	mutex_lock(&trace_types_lock);
5161 
5162 	t = get_tracer_for_array(tr, trace_types);
5163 	for (; t && l < *pos; t = t_next(m, t, &l))
5164 			;
5165 
5166 	return t;
5167 }
5168 
5169 static void t_stop(struct seq_file *m, void *p)
5170 {
5171 	mutex_unlock(&trace_types_lock);
5172 }
5173 
5174 static int t_show(struct seq_file *m, void *v)
5175 {
5176 	struct tracer *t = v;
5177 
5178 	if (!t)
5179 		return 0;
5180 
5181 	seq_puts(m, t->name);
5182 	if (t->next)
5183 		seq_putc(m, ' ');
5184 	else
5185 		seq_putc(m, '\n');
5186 
5187 	return 0;
5188 }
5189 
5190 static const struct seq_operations show_traces_seq_ops = {
5191 	.start		= t_start,
5192 	.next		= t_next,
5193 	.stop		= t_stop,
5194 	.show		= t_show,
5195 };
5196 
5197 static int show_traces_open(struct inode *inode, struct file *file)
5198 {
5199 	struct trace_array *tr = inode->i_private;
5200 	struct seq_file *m;
5201 	int ret;
5202 
5203 	ret = tracing_check_open_get_tr(tr);
5204 	if (ret)
5205 		return ret;
5206 
5207 	ret = seq_open(file, &show_traces_seq_ops);
5208 	if (ret) {
5209 		trace_array_put(tr);
5210 		return ret;
5211 	}
5212 
5213 	m = file->private_data;
5214 	m->private = tr;
5215 
5216 	return 0;
5217 }
5218 
5219 static int show_traces_release(struct inode *inode, struct file *file)
5220 {
5221 	struct trace_array *tr = inode->i_private;
5222 
5223 	trace_array_put(tr);
5224 	return seq_release(inode, file);
5225 }
5226 
5227 static ssize_t
5228 tracing_write_stub(struct file *filp, const char __user *ubuf,
5229 		   size_t count, loff_t *ppos)
5230 {
5231 	return count;
5232 }
5233 
5234 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5235 {
5236 	int ret;
5237 
5238 	if (file->f_mode & FMODE_READ)
5239 		ret = seq_lseek(file, offset, whence);
5240 	else
5241 		file->f_pos = ret = 0;
5242 
5243 	return ret;
5244 }
5245 
5246 static const struct file_operations tracing_fops = {
5247 	.open		= tracing_open,
5248 	.read		= seq_read,
5249 	.read_iter	= seq_read_iter,
5250 	.splice_read	= copy_splice_read,
5251 	.write		= tracing_write_stub,
5252 	.llseek		= tracing_lseek,
5253 	.release	= tracing_release,
5254 };
5255 
5256 static const struct file_operations show_traces_fops = {
5257 	.open		= show_traces_open,
5258 	.read		= seq_read,
5259 	.llseek		= seq_lseek,
5260 	.release	= show_traces_release,
5261 };
5262 
5263 static ssize_t
5264 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5265 		     size_t count, loff_t *ppos)
5266 {
5267 	struct trace_array *tr = file_inode(filp)->i_private;
5268 	char *mask_str;
5269 	int len;
5270 
5271 	len = snprintf(NULL, 0, "%*pb\n",
5272 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5273 	mask_str = kmalloc(len, GFP_KERNEL);
5274 	if (!mask_str)
5275 		return -ENOMEM;
5276 
5277 	len = snprintf(mask_str, len, "%*pb\n",
5278 		       cpumask_pr_args(tr->tracing_cpumask));
5279 	if (len >= count) {
5280 		count = -EINVAL;
5281 		goto out_err;
5282 	}
5283 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5284 
5285 out_err:
5286 	kfree(mask_str);
5287 
5288 	return count;
5289 }
5290 
5291 int tracing_set_cpumask(struct trace_array *tr,
5292 			cpumask_var_t tracing_cpumask_new)
5293 {
5294 	int cpu;
5295 
5296 	if (!tr)
5297 		return -EINVAL;
5298 
5299 	local_irq_disable();
5300 	arch_spin_lock(&tr->max_lock);
5301 	for_each_tracing_cpu(cpu) {
5302 		/*
5303 		 * Increase/decrease the disabled counter if we are
5304 		 * about to flip a bit in the cpumask:
5305 		 */
5306 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5307 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5308 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5309 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5310 #ifdef CONFIG_TRACER_MAX_TRACE
5311 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5312 #endif
5313 		}
5314 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5315 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5316 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5317 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5318 #ifdef CONFIG_TRACER_MAX_TRACE
5319 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5320 #endif
5321 		}
5322 	}
5323 	arch_spin_unlock(&tr->max_lock);
5324 	local_irq_enable();
5325 
5326 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5327 
5328 	return 0;
5329 }
5330 
5331 static ssize_t
5332 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5333 		      size_t count, loff_t *ppos)
5334 {
5335 	struct trace_array *tr = file_inode(filp)->i_private;
5336 	cpumask_var_t tracing_cpumask_new;
5337 	int err;
5338 
5339 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5340 		return -ENOMEM;
5341 
5342 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5343 	if (err)
5344 		goto err_free;
5345 
5346 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5347 	if (err)
5348 		goto err_free;
5349 
5350 	free_cpumask_var(tracing_cpumask_new);
5351 
5352 	return count;
5353 
5354 err_free:
5355 	free_cpumask_var(tracing_cpumask_new);
5356 
5357 	return err;
5358 }
5359 
5360 static const struct file_operations tracing_cpumask_fops = {
5361 	.open		= tracing_open_generic_tr,
5362 	.read		= tracing_cpumask_read,
5363 	.write		= tracing_cpumask_write,
5364 	.release	= tracing_release_generic_tr,
5365 	.llseek		= generic_file_llseek,
5366 };
5367 
5368 static int tracing_trace_options_show(struct seq_file *m, void *v)
5369 {
5370 	struct tracer_opt *trace_opts;
5371 	struct trace_array *tr = m->private;
5372 	u32 tracer_flags;
5373 	int i;
5374 
5375 	mutex_lock(&trace_types_lock);
5376 	tracer_flags = tr->current_trace->flags->val;
5377 	trace_opts = tr->current_trace->flags->opts;
5378 
5379 	for (i = 0; trace_options[i]; i++) {
5380 		if (tr->trace_flags & (1 << i))
5381 			seq_printf(m, "%s\n", trace_options[i]);
5382 		else
5383 			seq_printf(m, "no%s\n", trace_options[i]);
5384 	}
5385 
5386 	for (i = 0; trace_opts[i].name; i++) {
5387 		if (tracer_flags & trace_opts[i].bit)
5388 			seq_printf(m, "%s\n", trace_opts[i].name);
5389 		else
5390 			seq_printf(m, "no%s\n", trace_opts[i].name);
5391 	}
5392 	mutex_unlock(&trace_types_lock);
5393 
5394 	return 0;
5395 }
5396 
5397 static int __set_tracer_option(struct trace_array *tr,
5398 			       struct tracer_flags *tracer_flags,
5399 			       struct tracer_opt *opts, int neg)
5400 {
5401 	struct tracer *trace = tracer_flags->trace;
5402 	int ret;
5403 
5404 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5405 	if (ret)
5406 		return ret;
5407 
5408 	if (neg)
5409 		tracer_flags->val &= ~opts->bit;
5410 	else
5411 		tracer_flags->val |= opts->bit;
5412 	return 0;
5413 }
5414 
5415 /* Try to assign a tracer specific option */
5416 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5417 {
5418 	struct tracer *trace = tr->current_trace;
5419 	struct tracer_flags *tracer_flags = trace->flags;
5420 	struct tracer_opt *opts = NULL;
5421 	int i;
5422 
5423 	for (i = 0; tracer_flags->opts[i].name; i++) {
5424 		opts = &tracer_flags->opts[i];
5425 
5426 		if (strcmp(cmp, opts->name) == 0)
5427 			return __set_tracer_option(tr, trace->flags, opts, neg);
5428 	}
5429 
5430 	return -EINVAL;
5431 }
5432 
5433 /* Some tracers require overwrite to stay enabled */
5434 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5435 {
5436 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5437 		return -1;
5438 
5439 	return 0;
5440 }
5441 
5442 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5443 {
5444 	int *map;
5445 
5446 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5447 	    (mask == TRACE_ITER_RECORD_CMD))
5448 		lockdep_assert_held(&event_mutex);
5449 
5450 	/* do nothing if flag is already set */
5451 	if (!!(tr->trace_flags & mask) == !!enabled)
5452 		return 0;
5453 
5454 	/* Give the tracer a chance to approve the change */
5455 	if (tr->current_trace->flag_changed)
5456 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5457 			return -EINVAL;
5458 
5459 	if (enabled)
5460 		tr->trace_flags |= mask;
5461 	else
5462 		tr->trace_flags &= ~mask;
5463 
5464 	if (mask == TRACE_ITER_RECORD_CMD)
5465 		trace_event_enable_cmd_record(enabled);
5466 
5467 	if (mask == TRACE_ITER_RECORD_TGID) {
5468 		if (!tgid_map) {
5469 			tgid_map_max = pid_max;
5470 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5471 				       GFP_KERNEL);
5472 
5473 			/*
5474 			 * Pairs with smp_load_acquire() in
5475 			 * trace_find_tgid_ptr() to ensure that if it observes
5476 			 * the tgid_map we just allocated then it also observes
5477 			 * the corresponding tgid_map_max value.
5478 			 */
5479 			smp_store_release(&tgid_map, map);
5480 		}
5481 		if (!tgid_map) {
5482 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5483 			return -ENOMEM;
5484 		}
5485 
5486 		trace_event_enable_tgid_record(enabled);
5487 	}
5488 
5489 	if (mask == TRACE_ITER_EVENT_FORK)
5490 		trace_event_follow_fork(tr, enabled);
5491 
5492 	if (mask == TRACE_ITER_FUNC_FORK)
5493 		ftrace_pid_follow_fork(tr, enabled);
5494 
5495 	if (mask == TRACE_ITER_OVERWRITE) {
5496 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5497 #ifdef CONFIG_TRACER_MAX_TRACE
5498 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5499 #endif
5500 	}
5501 
5502 	if (mask == TRACE_ITER_PRINTK) {
5503 		trace_printk_start_stop_comm(enabled);
5504 		trace_printk_control(enabled);
5505 	}
5506 
5507 	return 0;
5508 }
5509 
5510 int trace_set_options(struct trace_array *tr, char *option)
5511 {
5512 	char *cmp;
5513 	int neg = 0;
5514 	int ret;
5515 	size_t orig_len = strlen(option);
5516 	int len;
5517 
5518 	cmp = strstrip(option);
5519 
5520 	len = str_has_prefix(cmp, "no");
5521 	if (len)
5522 		neg = 1;
5523 
5524 	cmp += len;
5525 
5526 	mutex_lock(&event_mutex);
5527 	mutex_lock(&trace_types_lock);
5528 
5529 	ret = match_string(trace_options, -1, cmp);
5530 	/* If no option could be set, test the specific tracer options */
5531 	if (ret < 0)
5532 		ret = set_tracer_option(tr, cmp, neg);
5533 	else
5534 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5535 
5536 	mutex_unlock(&trace_types_lock);
5537 	mutex_unlock(&event_mutex);
5538 
5539 	/*
5540 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5541 	 * turn it back into a space.
5542 	 */
5543 	if (orig_len > strlen(option))
5544 		option[strlen(option)] = ' ';
5545 
5546 	return ret;
5547 }
5548 
5549 static void __init apply_trace_boot_options(void)
5550 {
5551 	char *buf = trace_boot_options_buf;
5552 	char *option;
5553 
5554 	while (true) {
5555 		option = strsep(&buf, ",");
5556 
5557 		if (!option)
5558 			break;
5559 
5560 		if (*option)
5561 			trace_set_options(&global_trace, option);
5562 
5563 		/* Put back the comma to allow this to be called again */
5564 		if (buf)
5565 			*(buf - 1) = ',';
5566 	}
5567 }
5568 
5569 static ssize_t
5570 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5571 			size_t cnt, loff_t *ppos)
5572 {
5573 	struct seq_file *m = filp->private_data;
5574 	struct trace_array *tr = m->private;
5575 	char buf[64];
5576 	int ret;
5577 
5578 	if (cnt >= sizeof(buf))
5579 		return -EINVAL;
5580 
5581 	if (copy_from_user(buf, ubuf, cnt))
5582 		return -EFAULT;
5583 
5584 	buf[cnt] = 0;
5585 
5586 	ret = trace_set_options(tr, buf);
5587 	if (ret < 0)
5588 		return ret;
5589 
5590 	*ppos += cnt;
5591 
5592 	return cnt;
5593 }
5594 
5595 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5596 {
5597 	struct trace_array *tr = inode->i_private;
5598 	int ret;
5599 
5600 	ret = tracing_check_open_get_tr(tr);
5601 	if (ret)
5602 		return ret;
5603 
5604 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5605 	if (ret < 0)
5606 		trace_array_put(tr);
5607 
5608 	return ret;
5609 }
5610 
5611 static const struct file_operations tracing_iter_fops = {
5612 	.open		= tracing_trace_options_open,
5613 	.read		= seq_read,
5614 	.llseek		= seq_lseek,
5615 	.release	= tracing_single_release_tr,
5616 	.write		= tracing_trace_options_write,
5617 };
5618 
5619 static const char readme_msg[] =
5620 	"tracing mini-HOWTO:\n\n"
5621 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5622 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5623 	" Important files:\n"
5624 	"  trace\t\t\t- The static contents of the buffer\n"
5625 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5626 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5627 	"  current_tracer\t- function and latency tracers\n"
5628 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5629 	"  error_log\t- error log for failed commands (that support it)\n"
5630 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5631 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5632 	"  trace_clock\t\t- change the clock used to order events\n"
5633 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5634 	"      global:   Synced across CPUs but slows tracing down.\n"
5635 	"     counter:   Not a clock, but just an increment\n"
5636 	"      uptime:   Jiffy counter from time of boot\n"
5637 	"        perf:   Same clock that perf events use\n"
5638 #ifdef CONFIG_X86_64
5639 	"     x86-tsc:   TSC cycle counter\n"
5640 #endif
5641 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5642 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5643 	"    absolute:   Absolute (standalone) timestamp\n"
5644 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5645 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5646 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5647 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5648 	"\t\t\t  Remove sub-buffer with rmdir\n"
5649 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5650 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5651 	"\t\t\t  option name\n"
5652 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5653 #ifdef CONFIG_DYNAMIC_FTRACE
5654 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5655 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5656 	"\t\t\t  functions\n"
5657 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5658 	"\t     modules: Can select a group via module\n"
5659 	"\t      Format: :mod:<module-name>\n"
5660 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5661 	"\t    triggers: a command to perform when function is hit\n"
5662 	"\t      Format: <function>:<trigger>[:count]\n"
5663 	"\t     trigger: traceon, traceoff\n"
5664 	"\t\t      enable_event:<system>:<event>\n"
5665 	"\t\t      disable_event:<system>:<event>\n"
5666 #ifdef CONFIG_STACKTRACE
5667 	"\t\t      stacktrace\n"
5668 #endif
5669 #ifdef CONFIG_TRACER_SNAPSHOT
5670 	"\t\t      snapshot\n"
5671 #endif
5672 	"\t\t      dump\n"
5673 	"\t\t      cpudump\n"
5674 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5675 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5676 	"\t     The first one will disable tracing every time do_fault is hit\n"
5677 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5678 	"\t       The first time do trap is hit and it disables tracing, the\n"
5679 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5680 	"\t       the counter will not decrement. It only decrements when the\n"
5681 	"\t       trigger did work\n"
5682 	"\t     To remove trigger without count:\n"
5683 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5684 	"\t     To remove trigger with a count:\n"
5685 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5686 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5687 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5688 	"\t    modules: Can select a group via module command :mod:\n"
5689 	"\t    Does not accept triggers\n"
5690 #endif /* CONFIG_DYNAMIC_FTRACE */
5691 #ifdef CONFIG_FUNCTION_TRACER
5692 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5693 	"\t\t    (function)\n"
5694 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5695 	"\t\t    (function)\n"
5696 #endif
5697 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5698 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5699 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5700 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5701 #endif
5702 #ifdef CONFIG_TRACER_SNAPSHOT
5703 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5704 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5705 	"\t\t\t  information\n"
5706 #endif
5707 #ifdef CONFIG_STACK_TRACER
5708 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5709 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5710 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5711 	"\t\t\t  new trace)\n"
5712 #ifdef CONFIG_DYNAMIC_FTRACE
5713 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5714 	"\t\t\t  traces\n"
5715 #endif
5716 #endif /* CONFIG_STACK_TRACER */
5717 #ifdef CONFIG_DYNAMIC_EVENTS
5718 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5719 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5720 #endif
5721 #ifdef CONFIG_KPROBE_EVENTS
5722 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5723 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5724 #endif
5725 #ifdef CONFIG_UPROBE_EVENTS
5726 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5727 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5728 #endif
5729 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5730     defined(CONFIG_FPROBE_EVENTS)
5731 	"\t  accepts: event-definitions (one definition per line)\n"
5732 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5733 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5734 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5735 #endif
5736 #ifdef CONFIG_FPROBE_EVENTS
5737 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5738 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5739 #endif
5740 #ifdef CONFIG_HIST_TRIGGERS
5741 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5742 #endif
5743 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5744 	"\t           -:[<group>/][<event>]\n"
5745 #ifdef CONFIG_KPROBE_EVENTS
5746 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5747   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5748 #endif
5749 #ifdef CONFIG_UPROBE_EVENTS
5750   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5751 #endif
5752 	"\t     args: <name>=fetcharg[:type]\n"
5753 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5754 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5755 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5756 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5757 	"\t           <argname>[->field[->field|.field...]],\n"
5758 #else
5759 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5760 #endif
5761 #else
5762 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5763 #endif
5764 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5765 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5766 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5767 	"\t           symstr, <type>\\[<array-size>\\]\n"
5768 #ifdef CONFIG_HIST_TRIGGERS
5769 	"\t    field: <stype> <name>;\n"
5770 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5771 	"\t           [unsigned] char/int/long\n"
5772 #endif
5773 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5774 	"\t            of the <attached-group>/<attached-event>.\n"
5775 #endif
5776 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5777 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5778 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5779 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5780 	"\t\t\t  events\n"
5781 	"      filter\t\t- If set, only events passing filter are traced\n"
5782 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5783 	"\t\t\t  <event>:\n"
5784 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5785 	"      filter\t\t- If set, only events passing filter are traced\n"
5786 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5787 	"\t    Format: <trigger>[:count][if <filter>]\n"
5788 	"\t   trigger: traceon, traceoff\n"
5789 	"\t            enable_event:<system>:<event>\n"
5790 	"\t            disable_event:<system>:<event>\n"
5791 #ifdef CONFIG_HIST_TRIGGERS
5792 	"\t            enable_hist:<system>:<event>\n"
5793 	"\t            disable_hist:<system>:<event>\n"
5794 #endif
5795 #ifdef CONFIG_STACKTRACE
5796 	"\t\t    stacktrace\n"
5797 #endif
5798 #ifdef CONFIG_TRACER_SNAPSHOT
5799 	"\t\t    snapshot\n"
5800 #endif
5801 #ifdef CONFIG_HIST_TRIGGERS
5802 	"\t\t    hist (see below)\n"
5803 #endif
5804 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5805 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5806 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5807 	"\t                  events/block/block_unplug/trigger\n"
5808 	"\t   The first disables tracing every time block_unplug is hit.\n"
5809 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5810 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5811 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5812 	"\t   Like function triggers, the counter is only decremented if it\n"
5813 	"\t    enabled or disabled tracing.\n"
5814 	"\t   To remove a trigger without a count:\n"
5815 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5816 	"\t   To remove a trigger with a count:\n"
5817 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5818 	"\t   Filters can be ignored when removing a trigger.\n"
5819 #ifdef CONFIG_HIST_TRIGGERS
5820 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5821 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5822 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5823 	"\t            [:values=<field1[,field2,...]>]\n"
5824 	"\t            [:sort=<field1[,field2,...]>]\n"
5825 	"\t            [:size=#entries]\n"
5826 	"\t            [:pause][:continue][:clear]\n"
5827 	"\t            [:name=histname1]\n"
5828 	"\t            [:nohitcount]\n"
5829 	"\t            [:<handler>.<action>]\n"
5830 	"\t            [if <filter>]\n\n"
5831 	"\t    Note, special fields can be used as well:\n"
5832 	"\t            common_timestamp - to record current timestamp\n"
5833 	"\t            common_cpu - to record the CPU the event happened on\n"
5834 	"\n"
5835 	"\t    A hist trigger variable can be:\n"
5836 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5837 	"\t        - a reference to another variable e.g. y=$x,\n"
5838 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5839 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5840 	"\n"
5841 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5842 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5843 	"\t    variable reference, field or numeric literal.\n"
5844 	"\n"
5845 	"\t    When a matching event is hit, an entry is added to a hash\n"
5846 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5847 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5848 	"\t    correspond to fields in the event's format description.  Keys\n"
5849 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5850 	"\t    Compound keys consisting of up to two fields can be specified\n"
5851 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5852 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5853 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5854 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5855 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5856 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5857 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5858 	"\t    its histogram data will be shared with other triggers of the\n"
5859 	"\t    same name, and trigger hits will update this common data.\n\n"
5860 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5861 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5862 	"\t    triggers attached to an event, there will be a table for each\n"
5863 	"\t    trigger in the output.  The table displayed for a named\n"
5864 	"\t    trigger will be the same as any other instance having the\n"
5865 	"\t    same name.  The default format used to display a given field\n"
5866 	"\t    can be modified by appending any of the following modifiers\n"
5867 	"\t    to the field name, as applicable:\n\n"
5868 	"\t            .hex        display a number as a hex value\n"
5869 	"\t            .sym        display an address as a symbol\n"
5870 	"\t            .sym-offset display an address as a symbol and offset\n"
5871 	"\t            .execname   display a common_pid as a program name\n"
5872 	"\t            .syscall    display a syscall id as a syscall name\n"
5873 	"\t            .log2       display log2 value rather than raw number\n"
5874 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5875 	"\t            .usecs      display a common_timestamp in microseconds\n"
5876 	"\t            .percent    display a number of percentage value\n"
5877 	"\t            .graph      display a bar-graph of a value\n\n"
5878 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5879 	"\t    trigger or to start a hist trigger but not log any events\n"
5880 	"\t    until told to do so.  'continue' can be used to start or\n"
5881 	"\t    restart a paused hist trigger.\n\n"
5882 	"\t    The 'clear' parameter will clear the contents of a running\n"
5883 	"\t    hist trigger and leave its current paused/active state\n"
5884 	"\t    unchanged.\n\n"
5885 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5886 	"\t    raw hitcount in the histogram.\n\n"
5887 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5888 	"\t    have one event conditionally start and stop another event's\n"
5889 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5890 	"\t    the enable_event and disable_event triggers.\n\n"
5891 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5892 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5893 	"\t        <handler>.<action>\n\n"
5894 	"\t    The available handlers are:\n\n"
5895 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5896 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5897 	"\t        onchange(var)            - invoke action if var changes\n\n"
5898 	"\t    The available actions are:\n\n"
5899 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5900 	"\t        save(field,...)                      - save current event fields\n"
5901 #ifdef CONFIG_TRACER_SNAPSHOT
5902 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5903 #endif
5904 #ifdef CONFIG_SYNTH_EVENTS
5905 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5906 	"\t  Write into this file to define/undefine new synthetic events.\n"
5907 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5908 #endif
5909 #endif
5910 ;
5911 
5912 static ssize_t
5913 tracing_readme_read(struct file *filp, char __user *ubuf,
5914 		       size_t cnt, loff_t *ppos)
5915 {
5916 	return simple_read_from_buffer(ubuf, cnt, ppos,
5917 					readme_msg, strlen(readme_msg));
5918 }
5919 
5920 static const struct file_operations tracing_readme_fops = {
5921 	.open		= tracing_open_generic,
5922 	.read		= tracing_readme_read,
5923 	.llseek		= generic_file_llseek,
5924 };
5925 
5926 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5927 {
5928 	int pid = ++(*pos);
5929 
5930 	return trace_find_tgid_ptr(pid);
5931 }
5932 
5933 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5934 {
5935 	int pid = *pos;
5936 
5937 	return trace_find_tgid_ptr(pid);
5938 }
5939 
5940 static void saved_tgids_stop(struct seq_file *m, void *v)
5941 {
5942 }
5943 
5944 static int saved_tgids_show(struct seq_file *m, void *v)
5945 {
5946 	int *entry = (int *)v;
5947 	int pid = entry - tgid_map;
5948 	int tgid = *entry;
5949 
5950 	if (tgid == 0)
5951 		return SEQ_SKIP;
5952 
5953 	seq_printf(m, "%d %d\n", pid, tgid);
5954 	return 0;
5955 }
5956 
5957 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5958 	.start		= saved_tgids_start,
5959 	.stop		= saved_tgids_stop,
5960 	.next		= saved_tgids_next,
5961 	.show		= saved_tgids_show,
5962 };
5963 
5964 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5965 {
5966 	int ret;
5967 
5968 	ret = tracing_check_open_get_tr(NULL);
5969 	if (ret)
5970 		return ret;
5971 
5972 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5973 }
5974 
5975 
5976 static const struct file_operations tracing_saved_tgids_fops = {
5977 	.open		= tracing_saved_tgids_open,
5978 	.read		= seq_read,
5979 	.llseek		= seq_lseek,
5980 	.release	= seq_release,
5981 };
5982 
5983 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5984 {
5985 	unsigned int *ptr = v;
5986 
5987 	if (*pos || m->count)
5988 		ptr++;
5989 
5990 	(*pos)++;
5991 
5992 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5993 	     ptr++) {
5994 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5995 			continue;
5996 
5997 		return ptr;
5998 	}
5999 
6000 	return NULL;
6001 }
6002 
6003 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
6004 {
6005 	void *v;
6006 	loff_t l = 0;
6007 
6008 	preempt_disable();
6009 	arch_spin_lock(&trace_cmdline_lock);
6010 
6011 	v = &savedcmd->map_cmdline_to_pid[0];
6012 	while (l <= *pos) {
6013 		v = saved_cmdlines_next(m, v, &l);
6014 		if (!v)
6015 			return NULL;
6016 	}
6017 
6018 	return v;
6019 }
6020 
6021 static void saved_cmdlines_stop(struct seq_file *m, void *v)
6022 {
6023 	arch_spin_unlock(&trace_cmdline_lock);
6024 	preempt_enable();
6025 }
6026 
6027 static int saved_cmdlines_show(struct seq_file *m, void *v)
6028 {
6029 	char buf[TASK_COMM_LEN];
6030 	unsigned int *pid = v;
6031 
6032 	__trace_find_cmdline(*pid, buf);
6033 	seq_printf(m, "%d %s\n", *pid, buf);
6034 	return 0;
6035 }
6036 
6037 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
6038 	.start		= saved_cmdlines_start,
6039 	.next		= saved_cmdlines_next,
6040 	.stop		= saved_cmdlines_stop,
6041 	.show		= saved_cmdlines_show,
6042 };
6043 
6044 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
6045 {
6046 	int ret;
6047 
6048 	ret = tracing_check_open_get_tr(NULL);
6049 	if (ret)
6050 		return ret;
6051 
6052 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
6053 }
6054 
6055 static const struct file_operations tracing_saved_cmdlines_fops = {
6056 	.open		= tracing_saved_cmdlines_open,
6057 	.read		= seq_read,
6058 	.llseek		= seq_lseek,
6059 	.release	= seq_release,
6060 };
6061 
6062 static ssize_t
6063 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
6064 				 size_t cnt, loff_t *ppos)
6065 {
6066 	char buf[64];
6067 	int r;
6068 
6069 	preempt_disable();
6070 	arch_spin_lock(&trace_cmdline_lock);
6071 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
6072 	arch_spin_unlock(&trace_cmdline_lock);
6073 	preempt_enable();
6074 
6075 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6076 }
6077 
6078 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
6079 {
6080 	kfree(s->saved_cmdlines);
6081 	kfree(s->map_cmdline_to_pid);
6082 	kfree(s);
6083 }
6084 
6085 static int tracing_resize_saved_cmdlines(unsigned int val)
6086 {
6087 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
6088 
6089 	s = kmalloc(sizeof(*s), GFP_KERNEL);
6090 	if (!s)
6091 		return -ENOMEM;
6092 
6093 	if (allocate_cmdlines_buffer(val, s) < 0) {
6094 		kfree(s);
6095 		return -ENOMEM;
6096 	}
6097 
6098 	preempt_disable();
6099 	arch_spin_lock(&trace_cmdline_lock);
6100 	savedcmd_temp = savedcmd;
6101 	savedcmd = s;
6102 	arch_spin_unlock(&trace_cmdline_lock);
6103 	preempt_enable();
6104 	free_saved_cmdlines_buffer(savedcmd_temp);
6105 
6106 	return 0;
6107 }
6108 
6109 static ssize_t
6110 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6111 				  size_t cnt, loff_t *ppos)
6112 {
6113 	unsigned long val;
6114 	int ret;
6115 
6116 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6117 	if (ret)
6118 		return ret;
6119 
6120 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
6121 	if (!val || val > PID_MAX_DEFAULT)
6122 		return -EINVAL;
6123 
6124 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
6125 	if (ret < 0)
6126 		return ret;
6127 
6128 	*ppos += cnt;
6129 
6130 	return cnt;
6131 }
6132 
6133 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6134 	.open		= tracing_open_generic,
6135 	.read		= tracing_saved_cmdlines_size_read,
6136 	.write		= tracing_saved_cmdlines_size_write,
6137 };
6138 
6139 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6140 static union trace_eval_map_item *
6141 update_eval_map(union trace_eval_map_item *ptr)
6142 {
6143 	if (!ptr->map.eval_string) {
6144 		if (ptr->tail.next) {
6145 			ptr = ptr->tail.next;
6146 			/* Set ptr to the next real item (skip head) */
6147 			ptr++;
6148 		} else
6149 			return NULL;
6150 	}
6151 	return ptr;
6152 }
6153 
6154 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6155 {
6156 	union trace_eval_map_item *ptr = v;
6157 
6158 	/*
6159 	 * Paranoid! If ptr points to end, we don't want to increment past it.
6160 	 * This really should never happen.
6161 	 */
6162 	(*pos)++;
6163 	ptr = update_eval_map(ptr);
6164 	if (WARN_ON_ONCE(!ptr))
6165 		return NULL;
6166 
6167 	ptr++;
6168 	ptr = update_eval_map(ptr);
6169 
6170 	return ptr;
6171 }
6172 
6173 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6174 {
6175 	union trace_eval_map_item *v;
6176 	loff_t l = 0;
6177 
6178 	mutex_lock(&trace_eval_mutex);
6179 
6180 	v = trace_eval_maps;
6181 	if (v)
6182 		v++;
6183 
6184 	while (v && l < *pos) {
6185 		v = eval_map_next(m, v, &l);
6186 	}
6187 
6188 	return v;
6189 }
6190 
6191 static void eval_map_stop(struct seq_file *m, void *v)
6192 {
6193 	mutex_unlock(&trace_eval_mutex);
6194 }
6195 
6196 static int eval_map_show(struct seq_file *m, void *v)
6197 {
6198 	union trace_eval_map_item *ptr = v;
6199 
6200 	seq_printf(m, "%s %ld (%s)\n",
6201 		   ptr->map.eval_string, ptr->map.eval_value,
6202 		   ptr->map.system);
6203 
6204 	return 0;
6205 }
6206 
6207 static const struct seq_operations tracing_eval_map_seq_ops = {
6208 	.start		= eval_map_start,
6209 	.next		= eval_map_next,
6210 	.stop		= eval_map_stop,
6211 	.show		= eval_map_show,
6212 };
6213 
6214 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6215 {
6216 	int ret;
6217 
6218 	ret = tracing_check_open_get_tr(NULL);
6219 	if (ret)
6220 		return ret;
6221 
6222 	return seq_open(filp, &tracing_eval_map_seq_ops);
6223 }
6224 
6225 static const struct file_operations tracing_eval_map_fops = {
6226 	.open		= tracing_eval_map_open,
6227 	.read		= seq_read,
6228 	.llseek		= seq_lseek,
6229 	.release	= seq_release,
6230 };
6231 
6232 static inline union trace_eval_map_item *
6233 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6234 {
6235 	/* Return tail of array given the head */
6236 	return ptr + ptr->head.length + 1;
6237 }
6238 
6239 static void
6240 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6241 			   int len)
6242 {
6243 	struct trace_eval_map **stop;
6244 	struct trace_eval_map **map;
6245 	union trace_eval_map_item *map_array;
6246 	union trace_eval_map_item *ptr;
6247 
6248 	stop = start + len;
6249 
6250 	/*
6251 	 * The trace_eval_maps contains the map plus a head and tail item,
6252 	 * where the head holds the module and length of array, and the
6253 	 * tail holds a pointer to the next list.
6254 	 */
6255 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6256 	if (!map_array) {
6257 		pr_warn("Unable to allocate trace eval mapping\n");
6258 		return;
6259 	}
6260 
6261 	mutex_lock(&trace_eval_mutex);
6262 
6263 	if (!trace_eval_maps)
6264 		trace_eval_maps = map_array;
6265 	else {
6266 		ptr = trace_eval_maps;
6267 		for (;;) {
6268 			ptr = trace_eval_jmp_to_tail(ptr);
6269 			if (!ptr->tail.next)
6270 				break;
6271 			ptr = ptr->tail.next;
6272 
6273 		}
6274 		ptr->tail.next = map_array;
6275 	}
6276 	map_array->head.mod = mod;
6277 	map_array->head.length = len;
6278 	map_array++;
6279 
6280 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6281 		map_array->map = **map;
6282 		map_array++;
6283 	}
6284 	memset(map_array, 0, sizeof(*map_array));
6285 
6286 	mutex_unlock(&trace_eval_mutex);
6287 }
6288 
6289 static void trace_create_eval_file(struct dentry *d_tracer)
6290 {
6291 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6292 			  NULL, &tracing_eval_map_fops);
6293 }
6294 
6295 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6296 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6297 static inline void trace_insert_eval_map_file(struct module *mod,
6298 			      struct trace_eval_map **start, int len) { }
6299 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6300 
6301 static void trace_insert_eval_map(struct module *mod,
6302 				  struct trace_eval_map **start, int len)
6303 {
6304 	struct trace_eval_map **map;
6305 
6306 	if (len <= 0)
6307 		return;
6308 
6309 	map = start;
6310 
6311 	trace_event_eval_update(map, len);
6312 
6313 	trace_insert_eval_map_file(mod, start, len);
6314 }
6315 
6316 static ssize_t
6317 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6318 		       size_t cnt, loff_t *ppos)
6319 {
6320 	struct trace_array *tr = filp->private_data;
6321 	char buf[MAX_TRACER_SIZE+2];
6322 	int r;
6323 
6324 	mutex_lock(&trace_types_lock);
6325 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6326 	mutex_unlock(&trace_types_lock);
6327 
6328 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6329 }
6330 
6331 int tracer_init(struct tracer *t, struct trace_array *tr)
6332 {
6333 	tracing_reset_online_cpus(&tr->array_buffer);
6334 	return t->init(tr);
6335 }
6336 
6337 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6338 {
6339 	int cpu;
6340 
6341 	for_each_tracing_cpu(cpu)
6342 		per_cpu_ptr(buf->data, cpu)->entries = val;
6343 }
6344 
6345 static void update_buffer_entries(struct array_buffer *buf, int cpu)
6346 {
6347 	if (cpu == RING_BUFFER_ALL_CPUS) {
6348 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6349 	} else {
6350 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6351 	}
6352 }
6353 
6354 #ifdef CONFIG_TRACER_MAX_TRACE
6355 /* resize @tr's buffer to the size of @size_tr's entries */
6356 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6357 					struct array_buffer *size_buf, int cpu_id)
6358 {
6359 	int cpu, ret = 0;
6360 
6361 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6362 		for_each_tracing_cpu(cpu) {
6363 			ret = ring_buffer_resize(trace_buf->buffer,
6364 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6365 			if (ret < 0)
6366 				break;
6367 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6368 				per_cpu_ptr(size_buf->data, cpu)->entries;
6369 		}
6370 	} else {
6371 		ret = ring_buffer_resize(trace_buf->buffer,
6372 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6373 		if (ret == 0)
6374 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6375 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6376 	}
6377 
6378 	return ret;
6379 }
6380 #endif /* CONFIG_TRACER_MAX_TRACE */
6381 
6382 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6383 					unsigned long size, int cpu)
6384 {
6385 	int ret;
6386 
6387 	/*
6388 	 * If kernel or user changes the size of the ring buffer
6389 	 * we use the size that was given, and we can forget about
6390 	 * expanding it later.
6391 	 */
6392 	ring_buffer_expanded = true;
6393 
6394 	/* May be called before buffers are initialized */
6395 	if (!tr->array_buffer.buffer)
6396 		return 0;
6397 
6398 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6399 	if (ret < 0)
6400 		return ret;
6401 
6402 #ifdef CONFIG_TRACER_MAX_TRACE
6403 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6404 	    !tr->current_trace->use_max_tr)
6405 		goto out;
6406 
6407 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6408 	if (ret < 0) {
6409 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6410 						     &tr->array_buffer, cpu);
6411 		if (r < 0) {
6412 			/*
6413 			 * AARGH! We are left with different
6414 			 * size max buffer!!!!
6415 			 * The max buffer is our "snapshot" buffer.
6416 			 * When a tracer needs a snapshot (one of the
6417 			 * latency tracers), it swaps the max buffer
6418 			 * with the saved snap shot. We succeeded to
6419 			 * update the size of the main buffer, but failed to
6420 			 * update the size of the max buffer. But when we tried
6421 			 * to reset the main buffer to the original size, we
6422 			 * failed there too. This is very unlikely to
6423 			 * happen, but if it does, warn and kill all
6424 			 * tracing.
6425 			 */
6426 			WARN_ON(1);
6427 			tracing_disabled = 1;
6428 		}
6429 		return ret;
6430 	}
6431 
6432 	update_buffer_entries(&tr->max_buffer, cpu);
6433 
6434  out:
6435 #endif /* CONFIG_TRACER_MAX_TRACE */
6436 
6437 	update_buffer_entries(&tr->array_buffer, cpu);
6438 
6439 	return ret;
6440 }
6441 
6442 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6443 				  unsigned long size, int cpu_id)
6444 {
6445 	int ret;
6446 
6447 	mutex_lock(&trace_types_lock);
6448 
6449 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6450 		/* make sure, this cpu is enabled in the mask */
6451 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6452 			ret = -EINVAL;
6453 			goto out;
6454 		}
6455 	}
6456 
6457 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6458 	if (ret < 0)
6459 		ret = -ENOMEM;
6460 
6461 out:
6462 	mutex_unlock(&trace_types_lock);
6463 
6464 	return ret;
6465 }
6466 
6467 
6468 /**
6469  * tracing_update_buffers - used by tracing facility to expand ring buffers
6470  *
6471  * To save on memory when the tracing is never used on a system with it
6472  * configured in. The ring buffers are set to a minimum size. But once
6473  * a user starts to use the tracing facility, then they need to grow
6474  * to their default size.
6475  *
6476  * This function is to be called when a tracer is about to be used.
6477  */
6478 int tracing_update_buffers(void)
6479 {
6480 	int ret = 0;
6481 
6482 	mutex_lock(&trace_types_lock);
6483 	if (!ring_buffer_expanded)
6484 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6485 						RING_BUFFER_ALL_CPUS);
6486 	mutex_unlock(&trace_types_lock);
6487 
6488 	return ret;
6489 }
6490 
6491 struct trace_option_dentry;
6492 
6493 static void
6494 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6495 
6496 /*
6497  * Used to clear out the tracer before deletion of an instance.
6498  * Must have trace_types_lock held.
6499  */
6500 static void tracing_set_nop(struct trace_array *tr)
6501 {
6502 	if (tr->current_trace == &nop_trace)
6503 		return;
6504 
6505 	tr->current_trace->enabled--;
6506 
6507 	if (tr->current_trace->reset)
6508 		tr->current_trace->reset(tr);
6509 
6510 	tr->current_trace = &nop_trace;
6511 }
6512 
6513 static bool tracer_options_updated;
6514 
6515 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6516 {
6517 	/* Only enable if the directory has been created already. */
6518 	if (!tr->dir)
6519 		return;
6520 
6521 	/* Only create trace option files after update_tracer_options finish */
6522 	if (!tracer_options_updated)
6523 		return;
6524 
6525 	create_trace_option_files(tr, t);
6526 }
6527 
6528 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6529 {
6530 	struct tracer *t;
6531 #ifdef CONFIG_TRACER_MAX_TRACE
6532 	bool had_max_tr;
6533 #endif
6534 	int ret = 0;
6535 
6536 	mutex_lock(&trace_types_lock);
6537 
6538 	if (!ring_buffer_expanded) {
6539 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6540 						RING_BUFFER_ALL_CPUS);
6541 		if (ret < 0)
6542 			goto out;
6543 		ret = 0;
6544 	}
6545 
6546 	for (t = trace_types; t; t = t->next) {
6547 		if (strcmp(t->name, buf) == 0)
6548 			break;
6549 	}
6550 	if (!t) {
6551 		ret = -EINVAL;
6552 		goto out;
6553 	}
6554 	if (t == tr->current_trace)
6555 		goto out;
6556 
6557 #ifdef CONFIG_TRACER_SNAPSHOT
6558 	if (t->use_max_tr) {
6559 		local_irq_disable();
6560 		arch_spin_lock(&tr->max_lock);
6561 		if (tr->cond_snapshot)
6562 			ret = -EBUSY;
6563 		arch_spin_unlock(&tr->max_lock);
6564 		local_irq_enable();
6565 		if (ret)
6566 			goto out;
6567 	}
6568 #endif
6569 	/* Some tracers won't work on kernel command line */
6570 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6571 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6572 			t->name);
6573 		goto out;
6574 	}
6575 
6576 	/* Some tracers are only allowed for the top level buffer */
6577 	if (!trace_ok_for_array(t, tr)) {
6578 		ret = -EINVAL;
6579 		goto out;
6580 	}
6581 
6582 	/* If trace pipe files are being read, we can't change the tracer */
6583 	if (tr->trace_ref) {
6584 		ret = -EBUSY;
6585 		goto out;
6586 	}
6587 
6588 	trace_branch_disable();
6589 
6590 	tr->current_trace->enabled--;
6591 
6592 	if (tr->current_trace->reset)
6593 		tr->current_trace->reset(tr);
6594 
6595 #ifdef CONFIG_TRACER_MAX_TRACE
6596 	had_max_tr = tr->current_trace->use_max_tr;
6597 
6598 	/* Current trace needs to be nop_trace before synchronize_rcu */
6599 	tr->current_trace = &nop_trace;
6600 
6601 	if (had_max_tr && !t->use_max_tr) {
6602 		/*
6603 		 * We need to make sure that the update_max_tr sees that
6604 		 * current_trace changed to nop_trace to keep it from
6605 		 * swapping the buffers after we resize it.
6606 		 * The update_max_tr is called from interrupts disabled
6607 		 * so a synchronized_sched() is sufficient.
6608 		 */
6609 		synchronize_rcu();
6610 		free_snapshot(tr);
6611 	}
6612 
6613 	if (t->use_max_tr && !tr->allocated_snapshot) {
6614 		ret = tracing_alloc_snapshot_instance(tr);
6615 		if (ret < 0)
6616 			goto out;
6617 	}
6618 #else
6619 	tr->current_trace = &nop_trace;
6620 #endif
6621 
6622 	if (t->init) {
6623 		ret = tracer_init(t, tr);
6624 		if (ret)
6625 			goto out;
6626 	}
6627 
6628 	tr->current_trace = t;
6629 	tr->current_trace->enabled++;
6630 	trace_branch_enable(tr);
6631  out:
6632 	mutex_unlock(&trace_types_lock);
6633 
6634 	return ret;
6635 }
6636 
6637 static ssize_t
6638 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6639 			size_t cnt, loff_t *ppos)
6640 {
6641 	struct trace_array *tr = filp->private_data;
6642 	char buf[MAX_TRACER_SIZE+1];
6643 	char *name;
6644 	size_t ret;
6645 	int err;
6646 
6647 	ret = cnt;
6648 
6649 	if (cnt > MAX_TRACER_SIZE)
6650 		cnt = MAX_TRACER_SIZE;
6651 
6652 	if (copy_from_user(buf, ubuf, cnt))
6653 		return -EFAULT;
6654 
6655 	buf[cnt] = 0;
6656 
6657 	name = strim(buf);
6658 
6659 	err = tracing_set_tracer(tr, name);
6660 	if (err)
6661 		return err;
6662 
6663 	*ppos += ret;
6664 
6665 	return ret;
6666 }
6667 
6668 static ssize_t
6669 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6670 		   size_t cnt, loff_t *ppos)
6671 {
6672 	char buf[64];
6673 	int r;
6674 
6675 	r = snprintf(buf, sizeof(buf), "%ld\n",
6676 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6677 	if (r > sizeof(buf))
6678 		r = sizeof(buf);
6679 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6680 }
6681 
6682 static ssize_t
6683 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6684 		    size_t cnt, loff_t *ppos)
6685 {
6686 	unsigned long val;
6687 	int ret;
6688 
6689 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6690 	if (ret)
6691 		return ret;
6692 
6693 	*ptr = val * 1000;
6694 
6695 	return cnt;
6696 }
6697 
6698 static ssize_t
6699 tracing_thresh_read(struct file *filp, char __user *ubuf,
6700 		    size_t cnt, loff_t *ppos)
6701 {
6702 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6703 }
6704 
6705 static ssize_t
6706 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6707 		     size_t cnt, loff_t *ppos)
6708 {
6709 	struct trace_array *tr = filp->private_data;
6710 	int ret;
6711 
6712 	mutex_lock(&trace_types_lock);
6713 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6714 	if (ret < 0)
6715 		goto out;
6716 
6717 	if (tr->current_trace->update_thresh) {
6718 		ret = tr->current_trace->update_thresh(tr);
6719 		if (ret < 0)
6720 			goto out;
6721 	}
6722 
6723 	ret = cnt;
6724 out:
6725 	mutex_unlock(&trace_types_lock);
6726 
6727 	return ret;
6728 }
6729 
6730 #ifdef CONFIG_TRACER_MAX_TRACE
6731 
6732 static ssize_t
6733 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6734 		     size_t cnt, loff_t *ppos)
6735 {
6736 	struct trace_array *tr = filp->private_data;
6737 
6738 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6739 }
6740 
6741 static ssize_t
6742 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6743 		      size_t cnt, loff_t *ppos)
6744 {
6745 	struct trace_array *tr = filp->private_data;
6746 
6747 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6748 }
6749 
6750 #endif
6751 
6752 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6753 {
6754 	if (cpu == RING_BUFFER_ALL_CPUS) {
6755 		if (cpumask_empty(tr->pipe_cpumask)) {
6756 			cpumask_setall(tr->pipe_cpumask);
6757 			return 0;
6758 		}
6759 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6760 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6761 		return 0;
6762 	}
6763 	return -EBUSY;
6764 }
6765 
6766 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6767 {
6768 	if (cpu == RING_BUFFER_ALL_CPUS) {
6769 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6770 		cpumask_clear(tr->pipe_cpumask);
6771 	} else {
6772 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6773 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6774 	}
6775 }
6776 
6777 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6778 {
6779 	struct trace_array *tr = inode->i_private;
6780 	struct trace_iterator *iter;
6781 	int cpu;
6782 	int ret;
6783 
6784 	ret = tracing_check_open_get_tr(tr);
6785 	if (ret)
6786 		return ret;
6787 
6788 	mutex_lock(&trace_types_lock);
6789 	cpu = tracing_get_cpu(inode);
6790 	ret = open_pipe_on_cpu(tr, cpu);
6791 	if (ret)
6792 		goto fail_pipe_on_cpu;
6793 
6794 	/* create a buffer to store the information to pass to userspace */
6795 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6796 	if (!iter) {
6797 		ret = -ENOMEM;
6798 		goto fail_alloc_iter;
6799 	}
6800 
6801 	trace_seq_init(&iter->seq);
6802 	iter->trace = tr->current_trace;
6803 
6804 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6805 		ret = -ENOMEM;
6806 		goto fail;
6807 	}
6808 
6809 	/* trace pipe does not show start of buffer */
6810 	cpumask_setall(iter->started);
6811 
6812 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6813 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6814 
6815 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6816 	if (trace_clocks[tr->clock_id].in_ns)
6817 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6818 
6819 	iter->tr = tr;
6820 	iter->array_buffer = &tr->array_buffer;
6821 	iter->cpu_file = cpu;
6822 	mutex_init(&iter->mutex);
6823 	filp->private_data = iter;
6824 
6825 	if (iter->trace->pipe_open)
6826 		iter->trace->pipe_open(iter);
6827 
6828 	nonseekable_open(inode, filp);
6829 
6830 	tr->trace_ref++;
6831 
6832 	mutex_unlock(&trace_types_lock);
6833 	return ret;
6834 
6835 fail:
6836 	kfree(iter);
6837 fail_alloc_iter:
6838 	close_pipe_on_cpu(tr, cpu);
6839 fail_pipe_on_cpu:
6840 	__trace_array_put(tr);
6841 	mutex_unlock(&trace_types_lock);
6842 	return ret;
6843 }
6844 
6845 static int tracing_release_pipe(struct inode *inode, struct file *file)
6846 {
6847 	struct trace_iterator *iter = file->private_data;
6848 	struct trace_array *tr = inode->i_private;
6849 
6850 	mutex_lock(&trace_types_lock);
6851 
6852 	tr->trace_ref--;
6853 
6854 	if (iter->trace->pipe_close)
6855 		iter->trace->pipe_close(iter);
6856 	close_pipe_on_cpu(tr, iter->cpu_file);
6857 	mutex_unlock(&trace_types_lock);
6858 
6859 	free_trace_iter_content(iter);
6860 	kfree(iter);
6861 
6862 	trace_array_put(tr);
6863 
6864 	return 0;
6865 }
6866 
6867 static __poll_t
6868 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6869 {
6870 	struct trace_array *tr = iter->tr;
6871 
6872 	/* Iterators are static, they should be filled or empty */
6873 	if (trace_buffer_iter(iter, iter->cpu_file))
6874 		return EPOLLIN | EPOLLRDNORM;
6875 
6876 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6877 		/*
6878 		 * Always select as readable when in blocking mode
6879 		 */
6880 		return EPOLLIN | EPOLLRDNORM;
6881 	else
6882 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6883 					     filp, poll_table, iter->tr->buffer_percent);
6884 }
6885 
6886 static __poll_t
6887 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6888 {
6889 	struct trace_iterator *iter = filp->private_data;
6890 
6891 	return trace_poll(iter, filp, poll_table);
6892 }
6893 
6894 /* Must be called with iter->mutex held. */
6895 static int tracing_wait_pipe(struct file *filp)
6896 {
6897 	struct trace_iterator *iter = filp->private_data;
6898 	int ret;
6899 
6900 	while (trace_empty(iter)) {
6901 
6902 		if ((filp->f_flags & O_NONBLOCK)) {
6903 			return -EAGAIN;
6904 		}
6905 
6906 		/*
6907 		 * We block until we read something and tracing is disabled.
6908 		 * We still block if tracing is disabled, but we have never
6909 		 * read anything. This allows a user to cat this file, and
6910 		 * then enable tracing. But after we have read something,
6911 		 * we give an EOF when tracing is again disabled.
6912 		 *
6913 		 * iter->pos will be 0 if we haven't read anything.
6914 		 */
6915 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6916 			break;
6917 
6918 		mutex_unlock(&iter->mutex);
6919 
6920 		ret = wait_on_pipe(iter, 0);
6921 
6922 		mutex_lock(&iter->mutex);
6923 
6924 		if (ret)
6925 			return ret;
6926 	}
6927 
6928 	return 1;
6929 }
6930 
6931 /*
6932  * Consumer reader.
6933  */
6934 static ssize_t
6935 tracing_read_pipe(struct file *filp, char __user *ubuf,
6936 		  size_t cnt, loff_t *ppos)
6937 {
6938 	struct trace_iterator *iter = filp->private_data;
6939 	ssize_t sret;
6940 
6941 	/*
6942 	 * Avoid more than one consumer on a single file descriptor
6943 	 * This is just a matter of traces coherency, the ring buffer itself
6944 	 * is protected.
6945 	 */
6946 	mutex_lock(&iter->mutex);
6947 
6948 	/* return any leftover data */
6949 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6950 	if (sret != -EBUSY)
6951 		goto out;
6952 
6953 	trace_seq_init(&iter->seq);
6954 
6955 	if (iter->trace->read) {
6956 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6957 		if (sret)
6958 			goto out;
6959 	}
6960 
6961 waitagain:
6962 	sret = tracing_wait_pipe(filp);
6963 	if (sret <= 0)
6964 		goto out;
6965 
6966 	/* stop when tracing is finished */
6967 	if (trace_empty(iter)) {
6968 		sret = 0;
6969 		goto out;
6970 	}
6971 
6972 	if (cnt >= PAGE_SIZE)
6973 		cnt = PAGE_SIZE - 1;
6974 
6975 	/* reset all but tr, trace, and overruns */
6976 	trace_iterator_reset(iter);
6977 	cpumask_clear(iter->started);
6978 	trace_seq_init(&iter->seq);
6979 
6980 	trace_event_read_lock();
6981 	trace_access_lock(iter->cpu_file);
6982 	while (trace_find_next_entry_inc(iter) != NULL) {
6983 		enum print_line_t ret;
6984 		int save_len = iter->seq.seq.len;
6985 
6986 		ret = print_trace_line(iter);
6987 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6988 			/*
6989 			 * If one print_trace_line() fills entire trace_seq in one shot,
6990 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6991 			 * In this case, we need to consume it, otherwise, loop will peek
6992 			 * this event next time, resulting in an infinite loop.
6993 			 */
6994 			if (save_len == 0) {
6995 				iter->seq.full = 0;
6996 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6997 				trace_consume(iter);
6998 				break;
6999 			}
7000 
7001 			/* In other cases, don't print partial lines */
7002 			iter->seq.seq.len = save_len;
7003 			break;
7004 		}
7005 		if (ret != TRACE_TYPE_NO_CONSUME)
7006 			trace_consume(iter);
7007 
7008 		if (trace_seq_used(&iter->seq) >= cnt)
7009 			break;
7010 
7011 		/*
7012 		 * Setting the full flag means we reached the trace_seq buffer
7013 		 * size and we should leave by partial output condition above.
7014 		 * One of the trace_seq_* functions is not used properly.
7015 		 */
7016 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
7017 			  iter->ent->type);
7018 	}
7019 	trace_access_unlock(iter->cpu_file);
7020 	trace_event_read_unlock();
7021 
7022 	/* Now copy what we have to the user */
7023 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
7024 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
7025 		trace_seq_init(&iter->seq);
7026 
7027 	/*
7028 	 * If there was nothing to send to user, in spite of consuming trace
7029 	 * entries, go back to wait for more entries.
7030 	 */
7031 	if (sret == -EBUSY)
7032 		goto waitagain;
7033 
7034 out:
7035 	mutex_unlock(&iter->mutex);
7036 
7037 	return sret;
7038 }
7039 
7040 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
7041 				     unsigned int idx)
7042 {
7043 	__free_page(spd->pages[idx]);
7044 }
7045 
7046 static size_t
7047 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
7048 {
7049 	size_t count;
7050 	int save_len;
7051 	int ret;
7052 
7053 	/* Seq buffer is page-sized, exactly what we need. */
7054 	for (;;) {
7055 		save_len = iter->seq.seq.len;
7056 		ret = print_trace_line(iter);
7057 
7058 		if (trace_seq_has_overflowed(&iter->seq)) {
7059 			iter->seq.seq.len = save_len;
7060 			break;
7061 		}
7062 
7063 		/*
7064 		 * This should not be hit, because it should only
7065 		 * be set if the iter->seq overflowed. But check it
7066 		 * anyway to be safe.
7067 		 */
7068 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
7069 			iter->seq.seq.len = save_len;
7070 			break;
7071 		}
7072 
7073 		count = trace_seq_used(&iter->seq) - save_len;
7074 		if (rem < count) {
7075 			rem = 0;
7076 			iter->seq.seq.len = save_len;
7077 			break;
7078 		}
7079 
7080 		if (ret != TRACE_TYPE_NO_CONSUME)
7081 			trace_consume(iter);
7082 		rem -= count;
7083 		if (!trace_find_next_entry_inc(iter))	{
7084 			rem = 0;
7085 			iter->ent = NULL;
7086 			break;
7087 		}
7088 	}
7089 
7090 	return rem;
7091 }
7092 
7093 static ssize_t tracing_splice_read_pipe(struct file *filp,
7094 					loff_t *ppos,
7095 					struct pipe_inode_info *pipe,
7096 					size_t len,
7097 					unsigned int flags)
7098 {
7099 	struct page *pages_def[PIPE_DEF_BUFFERS];
7100 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7101 	struct trace_iterator *iter = filp->private_data;
7102 	struct splice_pipe_desc spd = {
7103 		.pages		= pages_def,
7104 		.partial	= partial_def,
7105 		.nr_pages	= 0, /* This gets updated below. */
7106 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7107 		.ops		= &default_pipe_buf_ops,
7108 		.spd_release	= tracing_spd_release_pipe,
7109 	};
7110 	ssize_t ret;
7111 	size_t rem;
7112 	unsigned int i;
7113 
7114 	if (splice_grow_spd(pipe, &spd))
7115 		return -ENOMEM;
7116 
7117 	mutex_lock(&iter->mutex);
7118 
7119 	if (iter->trace->splice_read) {
7120 		ret = iter->trace->splice_read(iter, filp,
7121 					       ppos, pipe, len, flags);
7122 		if (ret)
7123 			goto out_err;
7124 	}
7125 
7126 	ret = tracing_wait_pipe(filp);
7127 	if (ret <= 0)
7128 		goto out_err;
7129 
7130 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7131 		ret = -EFAULT;
7132 		goto out_err;
7133 	}
7134 
7135 	trace_event_read_lock();
7136 	trace_access_lock(iter->cpu_file);
7137 
7138 	/* Fill as many pages as possible. */
7139 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7140 		spd.pages[i] = alloc_page(GFP_KERNEL);
7141 		if (!spd.pages[i])
7142 			break;
7143 
7144 		rem = tracing_fill_pipe_page(rem, iter);
7145 
7146 		/* Copy the data into the page, so we can start over. */
7147 		ret = trace_seq_to_buffer(&iter->seq,
7148 					  page_address(spd.pages[i]),
7149 					  trace_seq_used(&iter->seq));
7150 		if (ret < 0) {
7151 			__free_page(spd.pages[i]);
7152 			break;
7153 		}
7154 		spd.partial[i].offset = 0;
7155 		spd.partial[i].len = trace_seq_used(&iter->seq);
7156 
7157 		trace_seq_init(&iter->seq);
7158 	}
7159 
7160 	trace_access_unlock(iter->cpu_file);
7161 	trace_event_read_unlock();
7162 	mutex_unlock(&iter->mutex);
7163 
7164 	spd.nr_pages = i;
7165 
7166 	if (i)
7167 		ret = splice_to_pipe(pipe, &spd);
7168 	else
7169 		ret = 0;
7170 out:
7171 	splice_shrink_spd(&spd);
7172 	return ret;
7173 
7174 out_err:
7175 	mutex_unlock(&iter->mutex);
7176 	goto out;
7177 }
7178 
7179 static ssize_t
7180 tracing_entries_read(struct file *filp, char __user *ubuf,
7181 		     size_t cnt, loff_t *ppos)
7182 {
7183 	struct inode *inode = file_inode(filp);
7184 	struct trace_array *tr = inode->i_private;
7185 	int cpu = tracing_get_cpu(inode);
7186 	char buf[64];
7187 	int r = 0;
7188 	ssize_t ret;
7189 
7190 	mutex_lock(&trace_types_lock);
7191 
7192 	if (cpu == RING_BUFFER_ALL_CPUS) {
7193 		int cpu, buf_size_same;
7194 		unsigned long size;
7195 
7196 		size = 0;
7197 		buf_size_same = 1;
7198 		/* check if all cpu sizes are same */
7199 		for_each_tracing_cpu(cpu) {
7200 			/* fill in the size from first enabled cpu */
7201 			if (size == 0)
7202 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7203 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7204 				buf_size_same = 0;
7205 				break;
7206 			}
7207 		}
7208 
7209 		if (buf_size_same) {
7210 			if (!ring_buffer_expanded)
7211 				r = sprintf(buf, "%lu (expanded: %lu)\n",
7212 					    size >> 10,
7213 					    trace_buf_size >> 10);
7214 			else
7215 				r = sprintf(buf, "%lu\n", size >> 10);
7216 		} else
7217 			r = sprintf(buf, "X\n");
7218 	} else
7219 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7220 
7221 	mutex_unlock(&trace_types_lock);
7222 
7223 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7224 	return ret;
7225 }
7226 
7227 static ssize_t
7228 tracing_entries_write(struct file *filp, const char __user *ubuf,
7229 		      size_t cnt, loff_t *ppos)
7230 {
7231 	struct inode *inode = file_inode(filp);
7232 	struct trace_array *tr = inode->i_private;
7233 	unsigned long val;
7234 	int ret;
7235 
7236 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7237 	if (ret)
7238 		return ret;
7239 
7240 	/* must have at least 1 entry */
7241 	if (!val)
7242 		return -EINVAL;
7243 
7244 	/* value is in KB */
7245 	val <<= 10;
7246 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7247 	if (ret < 0)
7248 		return ret;
7249 
7250 	*ppos += cnt;
7251 
7252 	return cnt;
7253 }
7254 
7255 static ssize_t
7256 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7257 				size_t cnt, loff_t *ppos)
7258 {
7259 	struct trace_array *tr = filp->private_data;
7260 	char buf[64];
7261 	int r, cpu;
7262 	unsigned long size = 0, expanded_size = 0;
7263 
7264 	mutex_lock(&trace_types_lock);
7265 	for_each_tracing_cpu(cpu) {
7266 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7267 		if (!ring_buffer_expanded)
7268 			expanded_size += trace_buf_size >> 10;
7269 	}
7270 	if (ring_buffer_expanded)
7271 		r = sprintf(buf, "%lu\n", size);
7272 	else
7273 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7274 	mutex_unlock(&trace_types_lock);
7275 
7276 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7277 }
7278 
7279 static ssize_t
7280 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7281 			  size_t cnt, loff_t *ppos)
7282 {
7283 	/*
7284 	 * There is no need to read what the user has written, this function
7285 	 * is just to make sure that there is no error when "echo" is used
7286 	 */
7287 
7288 	*ppos += cnt;
7289 
7290 	return cnt;
7291 }
7292 
7293 static int
7294 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7295 {
7296 	struct trace_array *tr = inode->i_private;
7297 
7298 	/* disable tracing ? */
7299 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7300 		tracer_tracing_off(tr);
7301 	/* resize the ring buffer to 0 */
7302 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7303 
7304 	trace_array_put(tr);
7305 
7306 	return 0;
7307 }
7308 
7309 static ssize_t
7310 tracing_mark_write(struct file *filp, const char __user *ubuf,
7311 					size_t cnt, loff_t *fpos)
7312 {
7313 	struct trace_array *tr = filp->private_data;
7314 	struct ring_buffer_event *event;
7315 	enum event_trigger_type tt = ETT_NONE;
7316 	struct trace_buffer *buffer;
7317 	struct print_entry *entry;
7318 	ssize_t written;
7319 	int size;
7320 	int len;
7321 
7322 /* Used in tracing_mark_raw_write() as well */
7323 #define FAULTED_STR "<faulted>"
7324 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7325 
7326 	if (tracing_disabled)
7327 		return -EINVAL;
7328 
7329 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7330 		return -EINVAL;
7331 
7332 	if (cnt > TRACE_BUF_SIZE)
7333 		cnt = TRACE_BUF_SIZE;
7334 
7335 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7336 
7337 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7338 
7339 	/* If less than "<faulted>", then make sure we can still add that */
7340 	if (cnt < FAULTED_SIZE)
7341 		size += FAULTED_SIZE - cnt;
7342 
7343 	buffer = tr->array_buffer.buffer;
7344 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7345 					    tracing_gen_ctx());
7346 	if (unlikely(!event))
7347 		/* Ring buffer disabled, return as if not open for write */
7348 		return -EBADF;
7349 
7350 	entry = ring_buffer_event_data(event);
7351 	entry->ip = _THIS_IP_;
7352 
7353 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7354 	if (len) {
7355 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7356 		cnt = FAULTED_SIZE;
7357 		written = -EFAULT;
7358 	} else
7359 		written = cnt;
7360 
7361 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7362 		/* do not add \n before testing triggers, but add \0 */
7363 		entry->buf[cnt] = '\0';
7364 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7365 	}
7366 
7367 	if (entry->buf[cnt - 1] != '\n') {
7368 		entry->buf[cnt] = '\n';
7369 		entry->buf[cnt + 1] = '\0';
7370 	} else
7371 		entry->buf[cnt] = '\0';
7372 
7373 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7374 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7375 	__buffer_unlock_commit(buffer, event);
7376 
7377 	if (tt)
7378 		event_triggers_post_call(tr->trace_marker_file, tt);
7379 
7380 	return written;
7381 }
7382 
7383 /* Limit it for now to 3K (including tag) */
7384 #define RAW_DATA_MAX_SIZE (1024*3)
7385 
7386 static ssize_t
7387 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7388 					size_t cnt, loff_t *fpos)
7389 {
7390 	struct trace_array *tr = filp->private_data;
7391 	struct ring_buffer_event *event;
7392 	struct trace_buffer *buffer;
7393 	struct raw_data_entry *entry;
7394 	ssize_t written;
7395 	int size;
7396 	int len;
7397 
7398 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7399 
7400 	if (tracing_disabled)
7401 		return -EINVAL;
7402 
7403 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7404 		return -EINVAL;
7405 
7406 	/* The marker must at least have a tag id */
7407 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7408 		return -EINVAL;
7409 
7410 	if (cnt > TRACE_BUF_SIZE)
7411 		cnt = TRACE_BUF_SIZE;
7412 
7413 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7414 
7415 	size = sizeof(*entry) + cnt;
7416 	if (cnt < FAULT_SIZE_ID)
7417 		size += FAULT_SIZE_ID - cnt;
7418 
7419 	buffer = tr->array_buffer.buffer;
7420 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7421 					    tracing_gen_ctx());
7422 	if (!event)
7423 		/* Ring buffer disabled, return as if not open for write */
7424 		return -EBADF;
7425 
7426 	entry = ring_buffer_event_data(event);
7427 
7428 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7429 	if (len) {
7430 		entry->id = -1;
7431 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7432 		written = -EFAULT;
7433 	} else
7434 		written = cnt;
7435 
7436 	__buffer_unlock_commit(buffer, event);
7437 
7438 	return written;
7439 }
7440 
7441 static int tracing_clock_show(struct seq_file *m, void *v)
7442 {
7443 	struct trace_array *tr = m->private;
7444 	int i;
7445 
7446 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7447 		seq_printf(m,
7448 			"%s%s%s%s", i ? " " : "",
7449 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7450 			i == tr->clock_id ? "]" : "");
7451 	seq_putc(m, '\n');
7452 
7453 	return 0;
7454 }
7455 
7456 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7457 {
7458 	int i;
7459 
7460 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7461 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7462 			break;
7463 	}
7464 	if (i == ARRAY_SIZE(trace_clocks))
7465 		return -EINVAL;
7466 
7467 	mutex_lock(&trace_types_lock);
7468 
7469 	tr->clock_id = i;
7470 
7471 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7472 
7473 	/*
7474 	 * New clock may not be consistent with the previous clock.
7475 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7476 	 */
7477 	tracing_reset_online_cpus(&tr->array_buffer);
7478 
7479 #ifdef CONFIG_TRACER_MAX_TRACE
7480 	if (tr->max_buffer.buffer)
7481 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7482 	tracing_reset_online_cpus(&tr->max_buffer);
7483 #endif
7484 
7485 	mutex_unlock(&trace_types_lock);
7486 
7487 	return 0;
7488 }
7489 
7490 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7491 				   size_t cnt, loff_t *fpos)
7492 {
7493 	struct seq_file *m = filp->private_data;
7494 	struct trace_array *tr = m->private;
7495 	char buf[64];
7496 	const char *clockstr;
7497 	int ret;
7498 
7499 	if (cnt >= sizeof(buf))
7500 		return -EINVAL;
7501 
7502 	if (copy_from_user(buf, ubuf, cnt))
7503 		return -EFAULT;
7504 
7505 	buf[cnt] = 0;
7506 
7507 	clockstr = strstrip(buf);
7508 
7509 	ret = tracing_set_clock(tr, clockstr);
7510 	if (ret)
7511 		return ret;
7512 
7513 	*fpos += cnt;
7514 
7515 	return cnt;
7516 }
7517 
7518 static int tracing_clock_open(struct inode *inode, struct file *file)
7519 {
7520 	struct trace_array *tr = inode->i_private;
7521 	int ret;
7522 
7523 	ret = tracing_check_open_get_tr(tr);
7524 	if (ret)
7525 		return ret;
7526 
7527 	ret = single_open(file, tracing_clock_show, inode->i_private);
7528 	if (ret < 0)
7529 		trace_array_put(tr);
7530 
7531 	return ret;
7532 }
7533 
7534 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7535 {
7536 	struct trace_array *tr = m->private;
7537 
7538 	mutex_lock(&trace_types_lock);
7539 
7540 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7541 		seq_puts(m, "delta [absolute]\n");
7542 	else
7543 		seq_puts(m, "[delta] absolute\n");
7544 
7545 	mutex_unlock(&trace_types_lock);
7546 
7547 	return 0;
7548 }
7549 
7550 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7551 {
7552 	struct trace_array *tr = inode->i_private;
7553 	int ret;
7554 
7555 	ret = tracing_check_open_get_tr(tr);
7556 	if (ret)
7557 		return ret;
7558 
7559 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7560 	if (ret < 0)
7561 		trace_array_put(tr);
7562 
7563 	return ret;
7564 }
7565 
7566 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7567 {
7568 	if (rbe == this_cpu_read(trace_buffered_event))
7569 		return ring_buffer_time_stamp(buffer);
7570 
7571 	return ring_buffer_event_time_stamp(buffer, rbe);
7572 }
7573 
7574 /*
7575  * Set or disable using the per CPU trace_buffer_event when possible.
7576  */
7577 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7578 {
7579 	int ret = 0;
7580 
7581 	mutex_lock(&trace_types_lock);
7582 
7583 	if (set && tr->no_filter_buffering_ref++)
7584 		goto out;
7585 
7586 	if (!set) {
7587 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7588 			ret = -EINVAL;
7589 			goto out;
7590 		}
7591 
7592 		--tr->no_filter_buffering_ref;
7593 	}
7594  out:
7595 	mutex_unlock(&trace_types_lock);
7596 
7597 	return ret;
7598 }
7599 
7600 struct ftrace_buffer_info {
7601 	struct trace_iterator	iter;
7602 	void			*spare;
7603 	unsigned int		spare_cpu;
7604 	unsigned int		read;
7605 };
7606 
7607 #ifdef CONFIG_TRACER_SNAPSHOT
7608 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7609 {
7610 	struct trace_array *tr = inode->i_private;
7611 	struct trace_iterator *iter;
7612 	struct seq_file *m;
7613 	int ret;
7614 
7615 	ret = tracing_check_open_get_tr(tr);
7616 	if (ret)
7617 		return ret;
7618 
7619 	if (file->f_mode & FMODE_READ) {
7620 		iter = __tracing_open(inode, file, true);
7621 		if (IS_ERR(iter))
7622 			ret = PTR_ERR(iter);
7623 	} else {
7624 		/* Writes still need the seq_file to hold the private data */
7625 		ret = -ENOMEM;
7626 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7627 		if (!m)
7628 			goto out;
7629 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7630 		if (!iter) {
7631 			kfree(m);
7632 			goto out;
7633 		}
7634 		ret = 0;
7635 
7636 		iter->tr = tr;
7637 		iter->array_buffer = &tr->max_buffer;
7638 		iter->cpu_file = tracing_get_cpu(inode);
7639 		m->private = iter;
7640 		file->private_data = m;
7641 	}
7642 out:
7643 	if (ret < 0)
7644 		trace_array_put(tr);
7645 
7646 	return ret;
7647 }
7648 
7649 static void tracing_swap_cpu_buffer(void *tr)
7650 {
7651 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7652 }
7653 
7654 static ssize_t
7655 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7656 		       loff_t *ppos)
7657 {
7658 	struct seq_file *m = filp->private_data;
7659 	struct trace_iterator *iter = m->private;
7660 	struct trace_array *tr = iter->tr;
7661 	unsigned long val;
7662 	int ret;
7663 
7664 	ret = tracing_update_buffers();
7665 	if (ret < 0)
7666 		return ret;
7667 
7668 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7669 	if (ret)
7670 		return ret;
7671 
7672 	mutex_lock(&trace_types_lock);
7673 
7674 	if (tr->current_trace->use_max_tr) {
7675 		ret = -EBUSY;
7676 		goto out;
7677 	}
7678 
7679 	local_irq_disable();
7680 	arch_spin_lock(&tr->max_lock);
7681 	if (tr->cond_snapshot)
7682 		ret = -EBUSY;
7683 	arch_spin_unlock(&tr->max_lock);
7684 	local_irq_enable();
7685 	if (ret)
7686 		goto out;
7687 
7688 	switch (val) {
7689 	case 0:
7690 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7691 			ret = -EINVAL;
7692 			break;
7693 		}
7694 		if (tr->allocated_snapshot)
7695 			free_snapshot(tr);
7696 		break;
7697 	case 1:
7698 /* Only allow per-cpu swap if the ring buffer supports it */
7699 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7700 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7701 			ret = -EINVAL;
7702 			break;
7703 		}
7704 #endif
7705 		if (tr->allocated_snapshot)
7706 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7707 					&tr->array_buffer, iter->cpu_file);
7708 		else
7709 			ret = tracing_alloc_snapshot_instance(tr);
7710 		if (ret < 0)
7711 			break;
7712 		/* Now, we're going to swap */
7713 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7714 			local_irq_disable();
7715 			update_max_tr(tr, current, smp_processor_id(), NULL);
7716 			local_irq_enable();
7717 		} else {
7718 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7719 						 (void *)tr, 1);
7720 		}
7721 		break;
7722 	default:
7723 		if (tr->allocated_snapshot) {
7724 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7725 				tracing_reset_online_cpus(&tr->max_buffer);
7726 			else
7727 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7728 		}
7729 		break;
7730 	}
7731 
7732 	if (ret >= 0) {
7733 		*ppos += cnt;
7734 		ret = cnt;
7735 	}
7736 out:
7737 	mutex_unlock(&trace_types_lock);
7738 	return ret;
7739 }
7740 
7741 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7742 {
7743 	struct seq_file *m = file->private_data;
7744 	int ret;
7745 
7746 	ret = tracing_release(inode, file);
7747 
7748 	if (file->f_mode & FMODE_READ)
7749 		return ret;
7750 
7751 	/* If write only, the seq_file is just a stub */
7752 	if (m)
7753 		kfree(m->private);
7754 	kfree(m);
7755 
7756 	return 0;
7757 }
7758 
7759 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7760 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7761 				    size_t count, loff_t *ppos);
7762 static int tracing_buffers_release(struct inode *inode, struct file *file);
7763 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7764 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7765 
7766 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7767 {
7768 	struct ftrace_buffer_info *info;
7769 	int ret;
7770 
7771 	/* The following checks for tracefs lockdown */
7772 	ret = tracing_buffers_open(inode, filp);
7773 	if (ret < 0)
7774 		return ret;
7775 
7776 	info = filp->private_data;
7777 
7778 	if (info->iter.trace->use_max_tr) {
7779 		tracing_buffers_release(inode, filp);
7780 		return -EBUSY;
7781 	}
7782 
7783 	info->iter.snapshot = true;
7784 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7785 
7786 	return ret;
7787 }
7788 
7789 #endif /* CONFIG_TRACER_SNAPSHOT */
7790 
7791 
7792 static const struct file_operations tracing_thresh_fops = {
7793 	.open		= tracing_open_generic,
7794 	.read		= tracing_thresh_read,
7795 	.write		= tracing_thresh_write,
7796 	.llseek		= generic_file_llseek,
7797 };
7798 
7799 #ifdef CONFIG_TRACER_MAX_TRACE
7800 static const struct file_operations tracing_max_lat_fops = {
7801 	.open		= tracing_open_generic_tr,
7802 	.read		= tracing_max_lat_read,
7803 	.write		= tracing_max_lat_write,
7804 	.llseek		= generic_file_llseek,
7805 	.release	= tracing_release_generic_tr,
7806 };
7807 #endif
7808 
7809 static const struct file_operations set_tracer_fops = {
7810 	.open		= tracing_open_generic_tr,
7811 	.read		= tracing_set_trace_read,
7812 	.write		= tracing_set_trace_write,
7813 	.llseek		= generic_file_llseek,
7814 	.release	= tracing_release_generic_tr,
7815 };
7816 
7817 static const struct file_operations tracing_pipe_fops = {
7818 	.open		= tracing_open_pipe,
7819 	.poll		= tracing_poll_pipe,
7820 	.read		= tracing_read_pipe,
7821 	.splice_read	= tracing_splice_read_pipe,
7822 	.release	= tracing_release_pipe,
7823 	.llseek		= no_llseek,
7824 };
7825 
7826 static const struct file_operations tracing_entries_fops = {
7827 	.open		= tracing_open_generic_tr,
7828 	.read		= tracing_entries_read,
7829 	.write		= tracing_entries_write,
7830 	.llseek		= generic_file_llseek,
7831 	.release	= tracing_release_generic_tr,
7832 };
7833 
7834 static const struct file_operations tracing_total_entries_fops = {
7835 	.open		= tracing_open_generic_tr,
7836 	.read		= tracing_total_entries_read,
7837 	.llseek		= generic_file_llseek,
7838 	.release	= tracing_release_generic_tr,
7839 };
7840 
7841 static const struct file_operations tracing_free_buffer_fops = {
7842 	.open		= tracing_open_generic_tr,
7843 	.write		= tracing_free_buffer_write,
7844 	.release	= tracing_free_buffer_release,
7845 };
7846 
7847 static const struct file_operations tracing_mark_fops = {
7848 	.open		= tracing_mark_open,
7849 	.write		= tracing_mark_write,
7850 	.release	= tracing_release_generic_tr,
7851 };
7852 
7853 static const struct file_operations tracing_mark_raw_fops = {
7854 	.open		= tracing_mark_open,
7855 	.write		= tracing_mark_raw_write,
7856 	.release	= tracing_release_generic_tr,
7857 };
7858 
7859 static const struct file_operations trace_clock_fops = {
7860 	.open		= tracing_clock_open,
7861 	.read		= seq_read,
7862 	.llseek		= seq_lseek,
7863 	.release	= tracing_single_release_tr,
7864 	.write		= tracing_clock_write,
7865 };
7866 
7867 static const struct file_operations trace_time_stamp_mode_fops = {
7868 	.open		= tracing_time_stamp_mode_open,
7869 	.read		= seq_read,
7870 	.llseek		= seq_lseek,
7871 	.release	= tracing_single_release_tr,
7872 };
7873 
7874 #ifdef CONFIG_TRACER_SNAPSHOT
7875 static const struct file_operations snapshot_fops = {
7876 	.open		= tracing_snapshot_open,
7877 	.read		= seq_read,
7878 	.write		= tracing_snapshot_write,
7879 	.llseek		= tracing_lseek,
7880 	.release	= tracing_snapshot_release,
7881 };
7882 
7883 static const struct file_operations snapshot_raw_fops = {
7884 	.open		= snapshot_raw_open,
7885 	.read		= tracing_buffers_read,
7886 	.release	= tracing_buffers_release,
7887 	.splice_read	= tracing_buffers_splice_read,
7888 	.llseek		= no_llseek,
7889 };
7890 
7891 #endif /* CONFIG_TRACER_SNAPSHOT */
7892 
7893 /*
7894  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7895  * @filp: The active open file structure
7896  * @ubuf: The userspace provided buffer to read value into
7897  * @cnt: The maximum number of bytes to read
7898  * @ppos: The current "file" position
7899  *
7900  * This function implements the write interface for a struct trace_min_max_param.
7901  * The filp->private_data must point to a trace_min_max_param structure that
7902  * defines where to write the value, the min and the max acceptable values,
7903  * and a lock to protect the write.
7904  */
7905 static ssize_t
7906 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7907 {
7908 	struct trace_min_max_param *param = filp->private_data;
7909 	u64 val;
7910 	int err;
7911 
7912 	if (!param)
7913 		return -EFAULT;
7914 
7915 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7916 	if (err)
7917 		return err;
7918 
7919 	if (param->lock)
7920 		mutex_lock(param->lock);
7921 
7922 	if (param->min && val < *param->min)
7923 		err = -EINVAL;
7924 
7925 	if (param->max && val > *param->max)
7926 		err = -EINVAL;
7927 
7928 	if (!err)
7929 		*param->val = val;
7930 
7931 	if (param->lock)
7932 		mutex_unlock(param->lock);
7933 
7934 	if (err)
7935 		return err;
7936 
7937 	return cnt;
7938 }
7939 
7940 /*
7941  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7942  * @filp: The active open file structure
7943  * @ubuf: The userspace provided buffer to read value into
7944  * @cnt: The maximum number of bytes to read
7945  * @ppos: The current "file" position
7946  *
7947  * This function implements the read interface for a struct trace_min_max_param.
7948  * The filp->private_data must point to a trace_min_max_param struct with valid
7949  * data.
7950  */
7951 static ssize_t
7952 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7953 {
7954 	struct trace_min_max_param *param = filp->private_data;
7955 	char buf[U64_STR_SIZE];
7956 	int len;
7957 	u64 val;
7958 
7959 	if (!param)
7960 		return -EFAULT;
7961 
7962 	val = *param->val;
7963 
7964 	if (cnt > sizeof(buf))
7965 		cnt = sizeof(buf);
7966 
7967 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7968 
7969 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7970 }
7971 
7972 const struct file_operations trace_min_max_fops = {
7973 	.open		= tracing_open_generic,
7974 	.read		= trace_min_max_read,
7975 	.write		= trace_min_max_write,
7976 };
7977 
7978 #define TRACING_LOG_ERRS_MAX	8
7979 #define TRACING_LOG_LOC_MAX	128
7980 
7981 #define CMD_PREFIX "  Command: "
7982 
7983 struct err_info {
7984 	const char	**errs;	/* ptr to loc-specific array of err strings */
7985 	u8		type;	/* index into errs -> specific err string */
7986 	u16		pos;	/* caret position */
7987 	u64		ts;
7988 };
7989 
7990 struct tracing_log_err {
7991 	struct list_head	list;
7992 	struct err_info		info;
7993 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7994 	char			*cmd;                     /* what caused err */
7995 };
7996 
7997 static DEFINE_MUTEX(tracing_err_log_lock);
7998 
7999 static struct tracing_log_err *alloc_tracing_log_err(int len)
8000 {
8001 	struct tracing_log_err *err;
8002 
8003 	err = kzalloc(sizeof(*err), GFP_KERNEL);
8004 	if (!err)
8005 		return ERR_PTR(-ENOMEM);
8006 
8007 	err->cmd = kzalloc(len, GFP_KERNEL);
8008 	if (!err->cmd) {
8009 		kfree(err);
8010 		return ERR_PTR(-ENOMEM);
8011 	}
8012 
8013 	return err;
8014 }
8015 
8016 static void free_tracing_log_err(struct tracing_log_err *err)
8017 {
8018 	kfree(err->cmd);
8019 	kfree(err);
8020 }
8021 
8022 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8023 						   int len)
8024 {
8025 	struct tracing_log_err *err;
8026 	char *cmd;
8027 
8028 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8029 		err = alloc_tracing_log_err(len);
8030 		if (PTR_ERR(err) != -ENOMEM)
8031 			tr->n_err_log_entries++;
8032 
8033 		return err;
8034 	}
8035 	cmd = kzalloc(len, GFP_KERNEL);
8036 	if (!cmd)
8037 		return ERR_PTR(-ENOMEM);
8038 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8039 	kfree(err->cmd);
8040 	err->cmd = cmd;
8041 	list_del(&err->list);
8042 
8043 	return err;
8044 }
8045 
8046 /**
8047  * err_pos - find the position of a string within a command for error careting
8048  * @cmd: The tracing command that caused the error
8049  * @str: The string to position the caret at within @cmd
8050  *
8051  * Finds the position of the first occurrence of @str within @cmd.  The
8052  * return value can be passed to tracing_log_err() for caret placement
8053  * within @cmd.
8054  *
8055  * Returns the index within @cmd of the first occurrence of @str or 0
8056  * if @str was not found.
8057  */
8058 unsigned int err_pos(char *cmd, const char *str)
8059 {
8060 	char *found;
8061 
8062 	if (WARN_ON(!strlen(cmd)))
8063 		return 0;
8064 
8065 	found = strstr(cmd, str);
8066 	if (found)
8067 		return found - cmd;
8068 
8069 	return 0;
8070 }
8071 
8072 /**
8073  * tracing_log_err - write an error to the tracing error log
8074  * @tr: The associated trace array for the error (NULL for top level array)
8075  * @loc: A string describing where the error occurred
8076  * @cmd: The tracing command that caused the error
8077  * @errs: The array of loc-specific static error strings
8078  * @type: The index into errs[], which produces the specific static err string
8079  * @pos: The position the caret should be placed in the cmd
8080  *
8081  * Writes an error into tracing/error_log of the form:
8082  *
8083  * <loc>: error: <text>
8084  *   Command: <cmd>
8085  *              ^
8086  *
8087  * tracing/error_log is a small log file containing the last
8088  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8089  * unless there has been a tracing error, and the error log can be
8090  * cleared and have its memory freed by writing the empty string in
8091  * truncation mode to it i.e. echo > tracing/error_log.
8092  *
8093  * NOTE: the @errs array along with the @type param are used to
8094  * produce a static error string - this string is not copied and saved
8095  * when the error is logged - only a pointer to it is saved.  See
8096  * existing callers for examples of how static strings are typically
8097  * defined for use with tracing_log_err().
8098  */
8099 void tracing_log_err(struct trace_array *tr,
8100 		     const char *loc, const char *cmd,
8101 		     const char **errs, u8 type, u16 pos)
8102 {
8103 	struct tracing_log_err *err;
8104 	int len = 0;
8105 
8106 	if (!tr)
8107 		tr = &global_trace;
8108 
8109 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8110 
8111 	mutex_lock(&tracing_err_log_lock);
8112 	err = get_tracing_log_err(tr, len);
8113 	if (PTR_ERR(err) == -ENOMEM) {
8114 		mutex_unlock(&tracing_err_log_lock);
8115 		return;
8116 	}
8117 
8118 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8119 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8120 
8121 	err->info.errs = errs;
8122 	err->info.type = type;
8123 	err->info.pos = pos;
8124 	err->info.ts = local_clock();
8125 
8126 	list_add_tail(&err->list, &tr->err_log);
8127 	mutex_unlock(&tracing_err_log_lock);
8128 }
8129 
8130 static void clear_tracing_err_log(struct trace_array *tr)
8131 {
8132 	struct tracing_log_err *err, *next;
8133 
8134 	mutex_lock(&tracing_err_log_lock);
8135 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
8136 		list_del(&err->list);
8137 		free_tracing_log_err(err);
8138 	}
8139 
8140 	tr->n_err_log_entries = 0;
8141 	mutex_unlock(&tracing_err_log_lock);
8142 }
8143 
8144 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8145 {
8146 	struct trace_array *tr = m->private;
8147 
8148 	mutex_lock(&tracing_err_log_lock);
8149 
8150 	return seq_list_start(&tr->err_log, *pos);
8151 }
8152 
8153 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8154 {
8155 	struct trace_array *tr = m->private;
8156 
8157 	return seq_list_next(v, &tr->err_log, pos);
8158 }
8159 
8160 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8161 {
8162 	mutex_unlock(&tracing_err_log_lock);
8163 }
8164 
8165 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8166 {
8167 	u16 i;
8168 
8169 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8170 		seq_putc(m, ' ');
8171 	for (i = 0; i < pos; i++)
8172 		seq_putc(m, ' ');
8173 	seq_puts(m, "^\n");
8174 }
8175 
8176 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8177 {
8178 	struct tracing_log_err *err = v;
8179 
8180 	if (err) {
8181 		const char *err_text = err->info.errs[err->info.type];
8182 		u64 sec = err->info.ts;
8183 		u32 nsec;
8184 
8185 		nsec = do_div(sec, NSEC_PER_SEC);
8186 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8187 			   err->loc, err_text);
8188 		seq_printf(m, "%s", err->cmd);
8189 		tracing_err_log_show_pos(m, err->info.pos);
8190 	}
8191 
8192 	return 0;
8193 }
8194 
8195 static const struct seq_operations tracing_err_log_seq_ops = {
8196 	.start  = tracing_err_log_seq_start,
8197 	.next   = tracing_err_log_seq_next,
8198 	.stop   = tracing_err_log_seq_stop,
8199 	.show   = tracing_err_log_seq_show
8200 };
8201 
8202 static int tracing_err_log_open(struct inode *inode, struct file *file)
8203 {
8204 	struct trace_array *tr = inode->i_private;
8205 	int ret = 0;
8206 
8207 	ret = tracing_check_open_get_tr(tr);
8208 	if (ret)
8209 		return ret;
8210 
8211 	/* If this file was opened for write, then erase contents */
8212 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8213 		clear_tracing_err_log(tr);
8214 
8215 	if (file->f_mode & FMODE_READ) {
8216 		ret = seq_open(file, &tracing_err_log_seq_ops);
8217 		if (!ret) {
8218 			struct seq_file *m = file->private_data;
8219 			m->private = tr;
8220 		} else {
8221 			trace_array_put(tr);
8222 		}
8223 	}
8224 	return ret;
8225 }
8226 
8227 static ssize_t tracing_err_log_write(struct file *file,
8228 				     const char __user *buffer,
8229 				     size_t count, loff_t *ppos)
8230 {
8231 	return count;
8232 }
8233 
8234 static int tracing_err_log_release(struct inode *inode, struct file *file)
8235 {
8236 	struct trace_array *tr = inode->i_private;
8237 
8238 	trace_array_put(tr);
8239 
8240 	if (file->f_mode & FMODE_READ)
8241 		seq_release(inode, file);
8242 
8243 	return 0;
8244 }
8245 
8246 static const struct file_operations tracing_err_log_fops = {
8247 	.open           = tracing_err_log_open,
8248 	.write		= tracing_err_log_write,
8249 	.read           = seq_read,
8250 	.llseek         = tracing_lseek,
8251 	.release        = tracing_err_log_release,
8252 };
8253 
8254 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8255 {
8256 	struct trace_array *tr = inode->i_private;
8257 	struct ftrace_buffer_info *info;
8258 	int ret;
8259 
8260 	ret = tracing_check_open_get_tr(tr);
8261 	if (ret)
8262 		return ret;
8263 
8264 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8265 	if (!info) {
8266 		trace_array_put(tr);
8267 		return -ENOMEM;
8268 	}
8269 
8270 	mutex_lock(&trace_types_lock);
8271 
8272 	info->iter.tr		= tr;
8273 	info->iter.cpu_file	= tracing_get_cpu(inode);
8274 	info->iter.trace	= tr->current_trace;
8275 	info->iter.array_buffer = &tr->array_buffer;
8276 	info->spare		= NULL;
8277 	/* Force reading ring buffer for first read */
8278 	info->read		= (unsigned int)-1;
8279 
8280 	filp->private_data = info;
8281 
8282 	tr->trace_ref++;
8283 
8284 	mutex_unlock(&trace_types_lock);
8285 
8286 	ret = nonseekable_open(inode, filp);
8287 	if (ret < 0)
8288 		trace_array_put(tr);
8289 
8290 	return ret;
8291 }
8292 
8293 static __poll_t
8294 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8295 {
8296 	struct ftrace_buffer_info *info = filp->private_data;
8297 	struct trace_iterator *iter = &info->iter;
8298 
8299 	return trace_poll(iter, filp, poll_table);
8300 }
8301 
8302 static ssize_t
8303 tracing_buffers_read(struct file *filp, char __user *ubuf,
8304 		     size_t count, loff_t *ppos)
8305 {
8306 	struct ftrace_buffer_info *info = filp->private_data;
8307 	struct trace_iterator *iter = &info->iter;
8308 	ssize_t ret = 0;
8309 	ssize_t size;
8310 
8311 	if (!count)
8312 		return 0;
8313 
8314 #ifdef CONFIG_TRACER_MAX_TRACE
8315 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8316 		return -EBUSY;
8317 #endif
8318 
8319 	if (!info->spare) {
8320 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8321 							  iter->cpu_file);
8322 		if (IS_ERR(info->spare)) {
8323 			ret = PTR_ERR(info->spare);
8324 			info->spare = NULL;
8325 		} else {
8326 			info->spare_cpu = iter->cpu_file;
8327 		}
8328 	}
8329 	if (!info->spare)
8330 		return ret;
8331 
8332 	/* Do we have previous read data to read? */
8333 	if (info->read < PAGE_SIZE)
8334 		goto read;
8335 
8336  again:
8337 	trace_access_lock(iter->cpu_file);
8338 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8339 				    &info->spare,
8340 				    count,
8341 				    iter->cpu_file, 0);
8342 	trace_access_unlock(iter->cpu_file);
8343 
8344 	if (ret < 0) {
8345 		if (trace_empty(iter)) {
8346 			if ((filp->f_flags & O_NONBLOCK))
8347 				return -EAGAIN;
8348 
8349 			ret = wait_on_pipe(iter, 0);
8350 			if (ret)
8351 				return ret;
8352 
8353 			goto again;
8354 		}
8355 		return 0;
8356 	}
8357 
8358 	info->read = 0;
8359  read:
8360 	size = PAGE_SIZE - info->read;
8361 	if (size > count)
8362 		size = count;
8363 
8364 	ret = copy_to_user(ubuf, info->spare + info->read, size);
8365 	if (ret == size)
8366 		return -EFAULT;
8367 
8368 	size -= ret;
8369 
8370 	*ppos += size;
8371 	info->read += size;
8372 
8373 	return size;
8374 }
8375 
8376 static int tracing_buffers_release(struct inode *inode, struct file *file)
8377 {
8378 	struct ftrace_buffer_info *info = file->private_data;
8379 	struct trace_iterator *iter = &info->iter;
8380 
8381 	mutex_lock(&trace_types_lock);
8382 
8383 	iter->tr->trace_ref--;
8384 
8385 	__trace_array_put(iter->tr);
8386 
8387 	iter->wait_index++;
8388 	/* Make sure the waiters see the new wait_index */
8389 	smp_wmb();
8390 
8391 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8392 
8393 	if (info->spare)
8394 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8395 					   info->spare_cpu, info->spare);
8396 	kvfree(info);
8397 
8398 	mutex_unlock(&trace_types_lock);
8399 
8400 	return 0;
8401 }
8402 
8403 struct buffer_ref {
8404 	struct trace_buffer	*buffer;
8405 	void			*page;
8406 	int			cpu;
8407 	refcount_t		refcount;
8408 };
8409 
8410 static void buffer_ref_release(struct buffer_ref *ref)
8411 {
8412 	if (!refcount_dec_and_test(&ref->refcount))
8413 		return;
8414 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8415 	kfree(ref);
8416 }
8417 
8418 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8419 				    struct pipe_buffer *buf)
8420 {
8421 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8422 
8423 	buffer_ref_release(ref);
8424 	buf->private = 0;
8425 }
8426 
8427 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8428 				struct pipe_buffer *buf)
8429 {
8430 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8431 
8432 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8433 		return false;
8434 
8435 	refcount_inc(&ref->refcount);
8436 	return true;
8437 }
8438 
8439 /* Pipe buffer operations for a buffer. */
8440 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8441 	.release		= buffer_pipe_buf_release,
8442 	.get			= buffer_pipe_buf_get,
8443 };
8444 
8445 /*
8446  * Callback from splice_to_pipe(), if we need to release some pages
8447  * at the end of the spd in case we error'ed out in filling the pipe.
8448  */
8449 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8450 {
8451 	struct buffer_ref *ref =
8452 		(struct buffer_ref *)spd->partial[i].private;
8453 
8454 	buffer_ref_release(ref);
8455 	spd->partial[i].private = 0;
8456 }
8457 
8458 static ssize_t
8459 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8460 			    struct pipe_inode_info *pipe, size_t len,
8461 			    unsigned int flags)
8462 {
8463 	struct ftrace_buffer_info *info = file->private_data;
8464 	struct trace_iterator *iter = &info->iter;
8465 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8466 	struct page *pages_def[PIPE_DEF_BUFFERS];
8467 	struct splice_pipe_desc spd = {
8468 		.pages		= pages_def,
8469 		.partial	= partial_def,
8470 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8471 		.ops		= &buffer_pipe_buf_ops,
8472 		.spd_release	= buffer_spd_release,
8473 	};
8474 	struct buffer_ref *ref;
8475 	int entries, i;
8476 	ssize_t ret = 0;
8477 
8478 #ifdef CONFIG_TRACER_MAX_TRACE
8479 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8480 		return -EBUSY;
8481 #endif
8482 
8483 	if (*ppos & (PAGE_SIZE - 1))
8484 		return -EINVAL;
8485 
8486 	if (len & (PAGE_SIZE - 1)) {
8487 		if (len < PAGE_SIZE)
8488 			return -EINVAL;
8489 		len &= PAGE_MASK;
8490 	}
8491 
8492 	if (splice_grow_spd(pipe, &spd))
8493 		return -ENOMEM;
8494 
8495  again:
8496 	trace_access_lock(iter->cpu_file);
8497 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8498 
8499 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8500 		struct page *page;
8501 		int r;
8502 
8503 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8504 		if (!ref) {
8505 			ret = -ENOMEM;
8506 			break;
8507 		}
8508 
8509 		refcount_set(&ref->refcount, 1);
8510 		ref->buffer = iter->array_buffer->buffer;
8511 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8512 		if (IS_ERR(ref->page)) {
8513 			ret = PTR_ERR(ref->page);
8514 			ref->page = NULL;
8515 			kfree(ref);
8516 			break;
8517 		}
8518 		ref->cpu = iter->cpu_file;
8519 
8520 		r = ring_buffer_read_page(ref->buffer, &ref->page,
8521 					  len, iter->cpu_file, 1);
8522 		if (r < 0) {
8523 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8524 						   ref->page);
8525 			kfree(ref);
8526 			break;
8527 		}
8528 
8529 		page = virt_to_page(ref->page);
8530 
8531 		spd.pages[i] = page;
8532 		spd.partial[i].len = PAGE_SIZE;
8533 		spd.partial[i].offset = 0;
8534 		spd.partial[i].private = (unsigned long)ref;
8535 		spd.nr_pages++;
8536 		*ppos += PAGE_SIZE;
8537 
8538 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8539 	}
8540 
8541 	trace_access_unlock(iter->cpu_file);
8542 	spd.nr_pages = i;
8543 
8544 	/* did we read anything? */
8545 	if (!spd.nr_pages) {
8546 		long wait_index;
8547 
8548 		if (ret)
8549 			goto out;
8550 
8551 		ret = -EAGAIN;
8552 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8553 			goto out;
8554 
8555 		wait_index = READ_ONCE(iter->wait_index);
8556 
8557 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8558 		if (ret)
8559 			goto out;
8560 
8561 		/* No need to wait after waking up when tracing is off */
8562 		if (!tracer_tracing_is_on(iter->tr))
8563 			goto out;
8564 
8565 		/* Make sure we see the new wait_index */
8566 		smp_rmb();
8567 		if (wait_index != iter->wait_index)
8568 			goto out;
8569 
8570 		goto again;
8571 	}
8572 
8573 	ret = splice_to_pipe(pipe, &spd);
8574 out:
8575 	splice_shrink_spd(&spd);
8576 
8577 	return ret;
8578 }
8579 
8580 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8581 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8582 {
8583 	struct ftrace_buffer_info *info = file->private_data;
8584 	struct trace_iterator *iter = &info->iter;
8585 
8586 	if (cmd)
8587 		return -ENOIOCTLCMD;
8588 
8589 	mutex_lock(&trace_types_lock);
8590 
8591 	iter->wait_index++;
8592 	/* Make sure the waiters see the new wait_index */
8593 	smp_wmb();
8594 
8595 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8596 
8597 	mutex_unlock(&trace_types_lock);
8598 	return 0;
8599 }
8600 
8601 static const struct file_operations tracing_buffers_fops = {
8602 	.open		= tracing_buffers_open,
8603 	.read		= tracing_buffers_read,
8604 	.poll		= tracing_buffers_poll,
8605 	.release	= tracing_buffers_release,
8606 	.splice_read	= tracing_buffers_splice_read,
8607 	.unlocked_ioctl = tracing_buffers_ioctl,
8608 	.llseek		= no_llseek,
8609 };
8610 
8611 static ssize_t
8612 tracing_stats_read(struct file *filp, char __user *ubuf,
8613 		   size_t count, loff_t *ppos)
8614 {
8615 	struct inode *inode = file_inode(filp);
8616 	struct trace_array *tr = inode->i_private;
8617 	struct array_buffer *trace_buf = &tr->array_buffer;
8618 	int cpu = tracing_get_cpu(inode);
8619 	struct trace_seq *s;
8620 	unsigned long cnt;
8621 	unsigned long long t;
8622 	unsigned long usec_rem;
8623 
8624 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8625 	if (!s)
8626 		return -ENOMEM;
8627 
8628 	trace_seq_init(s);
8629 
8630 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8631 	trace_seq_printf(s, "entries: %ld\n", cnt);
8632 
8633 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8634 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8635 
8636 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8637 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8638 
8639 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8640 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8641 
8642 	if (trace_clocks[tr->clock_id].in_ns) {
8643 		/* local or global for trace_clock */
8644 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8645 		usec_rem = do_div(t, USEC_PER_SEC);
8646 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8647 								t, usec_rem);
8648 
8649 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8650 		usec_rem = do_div(t, USEC_PER_SEC);
8651 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8652 	} else {
8653 		/* counter or tsc mode for trace_clock */
8654 		trace_seq_printf(s, "oldest event ts: %llu\n",
8655 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8656 
8657 		trace_seq_printf(s, "now ts: %llu\n",
8658 				ring_buffer_time_stamp(trace_buf->buffer));
8659 	}
8660 
8661 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8662 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8663 
8664 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8665 	trace_seq_printf(s, "read events: %ld\n", cnt);
8666 
8667 	count = simple_read_from_buffer(ubuf, count, ppos,
8668 					s->buffer, trace_seq_used(s));
8669 
8670 	kfree(s);
8671 
8672 	return count;
8673 }
8674 
8675 static const struct file_operations tracing_stats_fops = {
8676 	.open		= tracing_open_generic_tr,
8677 	.read		= tracing_stats_read,
8678 	.llseek		= generic_file_llseek,
8679 	.release	= tracing_release_generic_tr,
8680 };
8681 
8682 #ifdef CONFIG_DYNAMIC_FTRACE
8683 
8684 static ssize_t
8685 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8686 		  size_t cnt, loff_t *ppos)
8687 {
8688 	ssize_t ret;
8689 	char *buf;
8690 	int r;
8691 
8692 	/* 256 should be plenty to hold the amount needed */
8693 	buf = kmalloc(256, GFP_KERNEL);
8694 	if (!buf)
8695 		return -ENOMEM;
8696 
8697 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8698 		      ftrace_update_tot_cnt,
8699 		      ftrace_number_of_pages,
8700 		      ftrace_number_of_groups);
8701 
8702 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8703 	kfree(buf);
8704 	return ret;
8705 }
8706 
8707 static const struct file_operations tracing_dyn_info_fops = {
8708 	.open		= tracing_open_generic,
8709 	.read		= tracing_read_dyn_info,
8710 	.llseek		= generic_file_llseek,
8711 };
8712 #endif /* CONFIG_DYNAMIC_FTRACE */
8713 
8714 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8715 static void
8716 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8717 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8718 		void *data)
8719 {
8720 	tracing_snapshot_instance(tr);
8721 }
8722 
8723 static void
8724 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8725 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8726 		      void *data)
8727 {
8728 	struct ftrace_func_mapper *mapper = data;
8729 	long *count = NULL;
8730 
8731 	if (mapper)
8732 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8733 
8734 	if (count) {
8735 
8736 		if (*count <= 0)
8737 			return;
8738 
8739 		(*count)--;
8740 	}
8741 
8742 	tracing_snapshot_instance(tr);
8743 }
8744 
8745 static int
8746 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8747 		      struct ftrace_probe_ops *ops, void *data)
8748 {
8749 	struct ftrace_func_mapper *mapper = data;
8750 	long *count = NULL;
8751 
8752 	seq_printf(m, "%ps:", (void *)ip);
8753 
8754 	seq_puts(m, "snapshot");
8755 
8756 	if (mapper)
8757 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8758 
8759 	if (count)
8760 		seq_printf(m, ":count=%ld\n", *count);
8761 	else
8762 		seq_puts(m, ":unlimited\n");
8763 
8764 	return 0;
8765 }
8766 
8767 static int
8768 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8769 		     unsigned long ip, void *init_data, void **data)
8770 {
8771 	struct ftrace_func_mapper *mapper = *data;
8772 
8773 	if (!mapper) {
8774 		mapper = allocate_ftrace_func_mapper();
8775 		if (!mapper)
8776 			return -ENOMEM;
8777 		*data = mapper;
8778 	}
8779 
8780 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8781 }
8782 
8783 static void
8784 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8785 		     unsigned long ip, void *data)
8786 {
8787 	struct ftrace_func_mapper *mapper = data;
8788 
8789 	if (!ip) {
8790 		if (!mapper)
8791 			return;
8792 		free_ftrace_func_mapper(mapper, NULL);
8793 		return;
8794 	}
8795 
8796 	ftrace_func_mapper_remove_ip(mapper, ip);
8797 }
8798 
8799 static struct ftrace_probe_ops snapshot_probe_ops = {
8800 	.func			= ftrace_snapshot,
8801 	.print			= ftrace_snapshot_print,
8802 };
8803 
8804 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8805 	.func			= ftrace_count_snapshot,
8806 	.print			= ftrace_snapshot_print,
8807 	.init			= ftrace_snapshot_init,
8808 	.free			= ftrace_snapshot_free,
8809 };
8810 
8811 static int
8812 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8813 			       char *glob, char *cmd, char *param, int enable)
8814 {
8815 	struct ftrace_probe_ops *ops;
8816 	void *count = (void *)-1;
8817 	char *number;
8818 	int ret;
8819 
8820 	if (!tr)
8821 		return -ENODEV;
8822 
8823 	/* hash funcs only work with set_ftrace_filter */
8824 	if (!enable)
8825 		return -EINVAL;
8826 
8827 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8828 
8829 	if (glob[0] == '!')
8830 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8831 
8832 	if (!param)
8833 		goto out_reg;
8834 
8835 	number = strsep(&param, ":");
8836 
8837 	if (!strlen(number))
8838 		goto out_reg;
8839 
8840 	/*
8841 	 * We use the callback data field (which is a pointer)
8842 	 * as our counter.
8843 	 */
8844 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8845 	if (ret)
8846 		return ret;
8847 
8848  out_reg:
8849 	ret = tracing_alloc_snapshot_instance(tr);
8850 	if (ret < 0)
8851 		goto out;
8852 
8853 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8854 
8855  out:
8856 	return ret < 0 ? ret : 0;
8857 }
8858 
8859 static struct ftrace_func_command ftrace_snapshot_cmd = {
8860 	.name			= "snapshot",
8861 	.func			= ftrace_trace_snapshot_callback,
8862 };
8863 
8864 static __init int register_snapshot_cmd(void)
8865 {
8866 	return register_ftrace_command(&ftrace_snapshot_cmd);
8867 }
8868 #else
8869 static inline __init int register_snapshot_cmd(void) { return 0; }
8870 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8871 
8872 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8873 {
8874 	if (WARN_ON(!tr->dir))
8875 		return ERR_PTR(-ENODEV);
8876 
8877 	/* Top directory uses NULL as the parent */
8878 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8879 		return NULL;
8880 
8881 	/* All sub buffers have a descriptor */
8882 	return tr->dir;
8883 }
8884 
8885 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8886 {
8887 	struct dentry *d_tracer;
8888 
8889 	if (tr->percpu_dir)
8890 		return tr->percpu_dir;
8891 
8892 	d_tracer = tracing_get_dentry(tr);
8893 	if (IS_ERR(d_tracer))
8894 		return NULL;
8895 
8896 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8897 
8898 	MEM_FAIL(!tr->percpu_dir,
8899 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8900 
8901 	return tr->percpu_dir;
8902 }
8903 
8904 static struct dentry *
8905 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8906 		      void *data, long cpu, const struct file_operations *fops)
8907 {
8908 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8909 
8910 	if (ret) /* See tracing_get_cpu() */
8911 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8912 	return ret;
8913 }
8914 
8915 static void
8916 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8917 {
8918 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8919 	struct dentry *d_cpu;
8920 	char cpu_dir[30]; /* 30 characters should be more than enough */
8921 
8922 	if (!d_percpu)
8923 		return;
8924 
8925 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8926 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8927 	if (!d_cpu) {
8928 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8929 		return;
8930 	}
8931 
8932 	/* per cpu trace_pipe */
8933 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8934 				tr, cpu, &tracing_pipe_fops);
8935 
8936 	/* per cpu trace */
8937 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8938 				tr, cpu, &tracing_fops);
8939 
8940 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8941 				tr, cpu, &tracing_buffers_fops);
8942 
8943 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8944 				tr, cpu, &tracing_stats_fops);
8945 
8946 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8947 				tr, cpu, &tracing_entries_fops);
8948 
8949 #ifdef CONFIG_TRACER_SNAPSHOT
8950 	trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8951 				tr, cpu, &snapshot_fops);
8952 
8953 	trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8954 				tr, cpu, &snapshot_raw_fops);
8955 #endif
8956 }
8957 
8958 #ifdef CONFIG_FTRACE_SELFTEST
8959 /* Let selftest have access to static functions in this file */
8960 #include "trace_selftest.c"
8961 #endif
8962 
8963 static ssize_t
8964 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8965 			loff_t *ppos)
8966 {
8967 	struct trace_option_dentry *topt = filp->private_data;
8968 	char *buf;
8969 
8970 	if (topt->flags->val & topt->opt->bit)
8971 		buf = "1\n";
8972 	else
8973 		buf = "0\n";
8974 
8975 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8976 }
8977 
8978 static ssize_t
8979 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8980 			 loff_t *ppos)
8981 {
8982 	struct trace_option_dentry *topt = filp->private_data;
8983 	unsigned long val;
8984 	int ret;
8985 
8986 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8987 	if (ret)
8988 		return ret;
8989 
8990 	if (val != 0 && val != 1)
8991 		return -EINVAL;
8992 
8993 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8994 		mutex_lock(&trace_types_lock);
8995 		ret = __set_tracer_option(topt->tr, topt->flags,
8996 					  topt->opt, !val);
8997 		mutex_unlock(&trace_types_lock);
8998 		if (ret)
8999 			return ret;
9000 	}
9001 
9002 	*ppos += cnt;
9003 
9004 	return cnt;
9005 }
9006 
9007 static int tracing_open_options(struct inode *inode, struct file *filp)
9008 {
9009 	struct trace_option_dentry *topt = inode->i_private;
9010 	int ret;
9011 
9012 	ret = tracing_check_open_get_tr(topt->tr);
9013 	if (ret)
9014 		return ret;
9015 
9016 	filp->private_data = inode->i_private;
9017 	return 0;
9018 }
9019 
9020 static int tracing_release_options(struct inode *inode, struct file *file)
9021 {
9022 	struct trace_option_dentry *topt = file->private_data;
9023 
9024 	trace_array_put(topt->tr);
9025 	return 0;
9026 }
9027 
9028 static const struct file_operations trace_options_fops = {
9029 	.open = tracing_open_options,
9030 	.read = trace_options_read,
9031 	.write = trace_options_write,
9032 	.llseek	= generic_file_llseek,
9033 	.release = tracing_release_options,
9034 };
9035 
9036 /*
9037  * In order to pass in both the trace_array descriptor as well as the index
9038  * to the flag that the trace option file represents, the trace_array
9039  * has a character array of trace_flags_index[], which holds the index
9040  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9041  * The address of this character array is passed to the flag option file
9042  * read/write callbacks.
9043  *
9044  * In order to extract both the index and the trace_array descriptor,
9045  * get_tr_index() uses the following algorithm.
9046  *
9047  *   idx = *ptr;
9048  *
9049  * As the pointer itself contains the address of the index (remember
9050  * index[1] == 1).
9051  *
9052  * Then to get the trace_array descriptor, by subtracting that index
9053  * from the ptr, we get to the start of the index itself.
9054  *
9055  *   ptr - idx == &index[0]
9056  *
9057  * Then a simple container_of() from that pointer gets us to the
9058  * trace_array descriptor.
9059  */
9060 static void get_tr_index(void *data, struct trace_array **ptr,
9061 			 unsigned int *pindex)
9062 {
9063 	*pindex = *(unsigned char *)data;
9064 
9065 	*ptr = container_of(data - *pindex, struct trace_array,
9066 			    trace_flags_index);
9067 }
9068 
9069 static ssize_t
9070 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9071 			loff_t *ppos)
9072 {
9073 	void *tr_index = filp->private_data;
9074 	struct trace_array *tr;
9075 	unsigned int index;
9076 	char *buf;
9077 
9078 	get_tr_index(tr_index, &tr, &index);
9079 
9080 	if (tr->trace_flags & (1 << index))
9081 		buf = "1\n";
9082 	else
9083 		buf = "0\n";
9084 
9085 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9086 }
9087 
9088 static ssize_t
9089 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9090 			 loff_t *ppos)
9091 {
9092 	void *tr_index = filp->private_data;
9093 	struct trace_array *tr;
9094 	unsigned int index;
9095 	unsigned long val;
9096 	int ret;
9097 
9098 	get_tr_index(tr_index, &tr, &index);
9099 
9100 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9101 	if (ret)
9102 		return ret;
9103 
9104 	if (val != 0 && val != 1)
9105 		return -EINVAL;
9106 
9107 	mutex_lock(&event_mutex);
9108 	mutex_lock(&trace_types_lock);
9109 	ret = set_tracer_flag(tr, 1 << index, val);
9110 	mutex_unlock(&trace_types_lock);
9111 	mutex_unlock(&event_mutex);
9112 
9113 	if (ret < 0)
9114 		return ret;
9115 
9116 	*ppos += cnt;
9117 
9118 	return cnt;
9119 }
9120 
9121 static const struct file_operations trace_options_core_fops = {
9122 	.open = tracing_open_generic,
9123 	.read = trace_options_core_read,
9124 	.write = trace_options_core_write,
9125 	.llseek = generic_file_llseek,
9126 };
9127 
9128 struct dentry *trace_create_file(const char *name,
9129 				 umode_t mode,
9130 				 struct dentry *parent,
9131 				 void *data,
9132 				 const struct file_operations *fops)
9133 {
9134 	struct dentry *ret;
9135 
9136 	ret = tracefs_create_file(name, mode, parent, data, fops);
9137 	if (!ret)
9138 		pr_warn("Could not create tracefs '%s' entry\n", name);
9139 
9140 	return ret;
9141 }
9142 
9143 
9144 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9145 {
9146 	struct dentry *d_tracer;
9147 
9148 	if (tr->options)
9149 		return tr->options;
9150 
9151 	d_tracer = tracing_get_dentry(tr);
9152 	if (IS_ERR(d_tracer))
9153 		return NULL;
9154 
9155 	tr->options = tracefs_create_dir("options", d_tracer);
9156 	if (!tr->options) {
9157 		pr_warn("Could not create tracefs directory 'options'\n");
9158 		return NULL;
9159 	}
9160 
9161 	return tr->options;
9162 }
9163 
9164 static void
9165 create_trace_option_file(struct trace_array *tr,
9166 			 struct trace_option_dentry *topt,
9167 			 struct tracer_flags *flags,
9168 			 struct tracer_opt *opt)
9169 {
9170 	struct dentry *t_options;
9171 
9172 	t_options = trace_options_init_dentry(tr);
9173 	if (!t_options)
9174 		return;
9175 
9176 	topt->flags = flags;
9177 	topt->opt = opt;
9178 	topt->tr = tr;
9179 
9180 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9181 					t_options, topt, &trace_options_fops);
9182 
9183 }
9184 
9185 static void
9186 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9187 {
9188 	struct trace_option_dentry *topts;
9189 	struct trace_options *tr_topts;
9190 	struct tracer_flags *flags;
9191 	struct tracer_opt *opts;
9192 	int cnt;
9193 	int i;
9194 
9195 	if (!tracer)
9196 		return;
9197 
9198 	flags = tracer->flags;
9199 
9200 	if (!flags || !flags->opts)
9201 		return;
9202 
9203 	/*
9204 	 * If this is an instance, only create flags for tracers
9205 	 * the instance may have.
9206 	 */
9207 	if (!trace_ok_for_array(tracer, tr))
9208 		return;
9209 
9210 	for (i = 0; i < tr->nr_topts; i++) {
9211 		/* Make sure there's no duplicate flags. */
9212 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9213 			return;
9214 	}
9215 
9216 	opts = flags->opts;
9217 
9218 	for (cnt = 0; opts[cnt].name; cnt++)
9219 		;
9220 
9221 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9222 	if (!topts)
9223 		return;
9224 
9225 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9226 			    GFP_KERNEL);
9227 	if (!tr_topts) {
9228 		kfree(topts);
9229 		return;
9230 	}
9231 
9232 	tr->topts = tr_topts;
9233 	tr->topts[tr->nr_topts].tracer = tracer;
9234 	tr->topts[tr->nr_topts].topts = topts;
9235 	tr->nr_topts++;
9236 
9237 	for (cnt = 0; opts[cnt].name; cnt++) {
9238 		create_trace_option_file(tr, &topts[cnt], flags,
9239 					 &opts[cnt]);
9240 		MEM_FAIL(topts[cnt].entry == NULL,
9241 			  "Failed to create trace option: %s",
9242 			  opts[cnt].name);
9243 	}
9244 }
9245 
9246 static struct dentry *
9247 create_trace_option_core_file(struct trace_array *tr,
9248 			      const char *option, long index)
9249 {
9250 	struct dentry *t_options;
9251 
9252 	t_options = trace_options_init_dentry(tr);
9253 	if (!t_options)
9254 		return NULL;
9255 
9256 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9257 				 (void *)&tr->trace_flags_index[index],
9258 				 &trace_options_core_fops);
9259 }
9260 
9261 static void create_trace_options_dir(struct trace_array *tr)
9262 {
9263 	struct dentry *t_options;
9264 	bool top_level = tr == &global_trace;
9265 	int i;
9266 
9267 	t_options = trace_options_init_dentry(tr);
9268 	if (!t_options)
9269 		return;
9270 
9271 	for (i = 0; trace_options[i]; i++) {
9272 		if (top_level ||
9273 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9274 			create_trace_option_core_file(tr, trace_options[i], i);
9275 	}
9276 }
9277 
9278 static ssize_t
9279 rb_simple_read(struct file *filp, char __user *ubuf,
9280 	       size_t cnt, loff_t *ppos)
9281 {
9282 	struct trace_array *tr = filp->private_data;
9283 	char buf[64];
9284 	int r;
9285 
9286 	r = tracer_tracing_is_on(tr);
9287 	r = sprintf(buf, "%d\n", r);
9288 
9289 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9290 }
9291 
9292 static ssize_t
9293 rb_simple_write(struct file *filp, const char __user *ubuf,
9294 		size_t cnt, loff_t *ppos)
9295 {
9296 	struct trace_array *tr = filp->private_data;
9297 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9298 	unsigned long val;
9299 	int ret;
9300 
9301 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9302 	if (ret)
9303 		return ret;
9304 
9305 	if (buffer) {
9306 		mutex_lock(&trace_types_lock);
9307 		if (!!val == tracer_tracing_is_on(tr)) {
9308 			val = 0; /* do nothing */
9309 		} else if (val) {
9310 			tracer_tracing_on(tr);
9311 			if (tr->current_trace->start)
9312 				tr->current_trace->start(tr);
9313 		} else {
9314 			tracer_tracing_off(tr);
9315 			if (tr->current_trace->stop)
9316 				tr->current_trace->stop(tr);
9317 			/* Wake up any waiters */
9318 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9319 		}
9320 		mutex_unlock(&trace_types_lock);
9321 	}
9322 
9323 	(*ppos)++;
9324 
9325 	return cnt;
9326 }
9327 
9328 static const struct file_operations rb_simple_fops = {
9329 	.open		= tracing_open_generic_tr,
9330 	.read		= rb_simple_read,
9331 	.write		= rb_simple_write,
9332 	.release	= tracing_release_generic_tr,
9333 	.llseek		= default_llseek,
9334 };
9335 
9336 static ssize_t
9337 buffer_percent_read(struct file *filp, char __user *ubuf,
9338 		    size_t cnt, loff_t *ppos)
9339 {
9340 	struct trace_array *tr = filp->private_data;
9341 	char buf[64];
9342 	int r;
9343 
9344 	r = tr->buffer_percent;
9345 	r = sprintf(buf, "%d\n", r);
9346 
9347 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9348 }
9349 
9350 static ssize_t
9351 buffer_percent_write(struct file *filp, const char __user *ubuf,
9352 		     size_t cnt, loff_t *ppos)
9353 {
9354 	struct trace_array *tr = filp->private_data;
9355 	unsigned long val;
9356 	int ret;
9357 
9358 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9359 	if (ret)
9360 		return ret;
9361 
9362 	if (val > 100)
9363 		return -EINVAL;
9364 
9365 	tr->buffer_percent = val;
9366 
9367 	(*ppos)++;
9368 
9369 	return cnt;
9370 }
9371 
9372 static const struct file_operations buffer_percent_fops = {
9373 	.open		= tracing_open_generic_tr,
9374 	.read		= buffer_percent_read,
9375 	.write		= buffer_percent_write,
9376 	.release	= tracing_release_generic_tr,
9377 	.llseek		= default_llseek,
9378 };
9379 
9380 static struct dentry *trace_instance_dir;
9381 
9382 static void
9383 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9384 
9385 static int
9386 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9387 {
9388 	enum ring_buffer_flags rb_flags;
9389 
9390 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9391 
9392 	buf->tr = tr;
9393 
9394 	buf->buffer = ring_buffer_alloc(size, rb_flags);
9395 	if (!buf->buffer)
9396 		return -ENOMEM;
9397 
9398 	buf->data = alloc_percpu(struct trace_array_cpu);
9399 	if (!buf->data) {
9400 		ring_buffer_free(buf->buffer);
9401 		buf->buffer = NULL;
9402 		return -ENOMEM;
9403 	}
9404 
9405 	/* Allocate the first page for all buffers */
9406 	set_buffer_entries(&tr->array_buffer,
9407 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9408 
9409 	return 0;
9410 }
9411 
9412 static void free_trace_buffer(struct array_buffer *buf)
9413 {
9414 	if (buf->buffer) {
9415 		ring_buffer_free(buf->buffer);
9416 		buf->buffer = NULL;
9417 		free_percpu(buf->data);
9418 		buf->data = NULL;
9419 	}
9420 }
9421 
9422 static int allocate_trace_buffers(struct trace_array *tr, int size)
9423 {
9424 	int ret;
9425 
9426 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9427 	if (ret)
9428 		return ret;
9429 
9430 #ifdef CONFIG_TRACER_MAX_TRACE
9431 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9432 				    allocate_snapshot ? size : 1);
9433 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9434 		free_trace_buffer(&tr->array_buffer);
9435 		return -ENOMEM;
9436 	}
9437 	tr->allocated_snapshot = allocate_snapshot;
9438 
9439 	allocate_snapshot = false;
9440 #endif
9441 
9442 	return 0;
9443 }
9444 
9445 static void free_trace_buffers(struct trace_array *tr)
9446 {
9447 	if (!tr)
9448 		return;
9449 
9450 	free_trace_buffer(&tr->array_buffer);
9451 
9452 #ifdef CONFIG_TRACER_MAX_TRACE
9453 	free_trace_buffer(&tr->max_buffer);
9454 #endif
9455 }
9456 
9457 static void init_trace_flags_index(struct trace_array *tr)
9458 {
9459 	int i;
9460 
9461 	/* Used by the trace options files */
9462 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9463 		tr->trace_flags_index[i] = i;
9464 }
9465 
9466 static void __update_tracer_options(struct trace_array *tr)
9467 {
9468 	struct tracer *t;
9469 
9470 	for (t = trace_types; t; t = t->next)
9471 		add_tracer_options(tr, t);
9472 }
9473 
9474 static void update_tracer_options(struct trace_array *tr)
9475 {
9476 	mutex_lock(&trace_types_lock);
9477 	tracer_options_updated = true;
9478 	__update_tracer_options(tr);
9479 	mutex_unlock(&trace_types_lock);
9480 }
9481 
9482 /* Must have trace_types_lock held */
9483 struct trace_array *trace_array_find(const char *instance)
9484 {
9485 	struct trace_array *tr, *found = NULL;
9486 
9487 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9488 		if (tr->name && strcmp(tr->name, instance) == 0) {
9489 			found = tr;
9490 			break;
9491 		}
9492 	}
9493 
9494 	return found;
9495 }
9496 
9497 struct trace_array *trace_array_find_get(const char *instance)
9498 {
9499 	struct trace_array *tr;
9500 
9501 	mutex_lock(&trace_types_lock);
9502 	tr = trace_array_find(instance);
9503 	if (tr)
9504 		tr->ref++;
9505 	mutex_unlock(&trace_types_lock);
9506 
9507 	return tr;
9508 }
9509 
9510 static int trace_array_create_dir(struct trace_array *tr)
9511 {
9512 	int ret;
9513 
9514 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9515 	if (!tr->dir)
9516 		return -EINVAL;
9517 
9518 	ret = event_trace_add_tracer(tr->dir, tr);
9519 	if (ret) {
9520 		tracefs_remove(tr->dir);
9521 		return ret;
9522 	}
9523 
9524 	init_tracer_tracefs(tr, tr->dir);
9525 	__update_tracer_options(tr);
9526 
9527 	return ret;
9528 }
9529 
9530 static struct trace_array *trace_array_create(const char *name)
9531 {
9532 	struct trace_array *tr;
9533 	int ret;
9534 
9535 	ret = -ENOMEM;
9536 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9537 	if (!tr)
9538 		return ERR_PTR(ret);
9539 
9540 	tr->name = kstrdup(name, GFP_KERNEL);
9541 	if (!tr->name)
9542 		goto out_free_tr;
9543 
9544 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9545 		goto out_free_tr;
9546 
9547 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9548 		goto out_free_tr;
9549 
9550 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9551 
9552 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9553 
9554 	raw_spin_lock_init(&tr->start_lock);
9555 
9556 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9557 
9558 	tr->current_trace = &nop_trace;
9559 
9560 	INIT_LIST_HEAD(&tr->systems);
9561 	INIT_LIST_HEAD(&tr->events);
9562 	INIT_LIST_HEAD(&tr->hist_vars);
9563 	INIT_LIST_HEAD(&tr->err_log);
9564 
9565 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9566 		goto out_free_tr;
9567 
9568 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9569 		goto out_free_tr;
9570 
9571 	ftrace_init_trace_array(tr);
9572 
9573 	init_trace_flags_index(tr);
9574 
9575 	if (trace_instance_dir) {
9576 		ret = trace_array_create_dir(tr);
9577 		if (ret)
9578 			goto out_free_tr;
9579 	} else
9580 		__trace_early_add_events(tr);
9581 
9582 	list_add(&tr->list, &ftrace_trace_arrays);
9583 
9584 	tr->ref++;
9585 
9586 	return tr;
9587 
9588  out_free_tr:
9589 	ftrace_free_ftrace_ops(tr);
9590 	free_trace_buffers(tr);
9591 	free_cpumask_var(tr->pipe_cpumask);
9592 	free_cpumask_var(tr->tracing_cpumask);
9593 	kfree(tr->name);
9594 	kfree(tr);
9595 
9596 	return ERR_PTR(ret);
9597 }
9598 
9599 static int instance_mkdir(const char *name)
9600 {
9601 	struct trace_array *tr;
9602 	int ret;
9603 
9604 	mutex_lock(&event_mutex);
9605 	mutex_lock(&trace_types_lock);
9606 
9607 	ret = -EEXIST;
9608 	if (trace_array_find(name))
9609 		goto out_unlock;
9610 
9611 	tr = trace_array_create(name);
9612 
9613 	ret = PTR_ERR_OR_ZERO(tr);
9614 
9615 out_unlock:
9616 	mutex_unlock(&trace_types_lock);
9617 	mutex_unlock(&event_mutex);
9618 	return ret;
9619 }
9620 
9621 /**
9622  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9623  * @name: The name of the trace array to be looked up/created.
9624  *
9625  * Returns pointer to trace array with given name.
9626  * NULL, if it cannot be created.
9627  *
9628  * NOTE: This function increments the reference counter associated with the
9629  * trace array returned. This makes sure it cannot be freed while in use.
9630  * Use trace_array_put() once the trace array is no longer needed.
9631  * If the trace_array is to be freed, trace_array_destroy() needs to
9632  * be called after the trace_array_put(), or simply let user space delete
9633  * it from the tracefs instances directory. But until the
9634  * trace_array_put() is called, user space can not delete it.
9635  *
9636  */
9637 struct trace_array *trace_array_get_by_name(const char *name)
9638 {
9639 	struct trace_array *tr;
9640 
9641 	mutex_lock(&event_mutex);
9642 	mutex_lock(&trace_types_lock);
9643 
9644 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9645 		if (tr->name && strcmp(tr->name, name) == 0)
9646 			goto out_unlock;
9647 	}
9648 
9649 	tr = trace_array_create(name);
9650 
9651 	if (IS_ERR(tr))
9652 		tr = NULL;
9653 out_unlock:
9654 	if (tr)
9655 		tr->ref++;
9656 
9657 	mutex_unlock(&trace_types_lock);
9658 	mutex_unlock(&event_mutex);
9659 	return tr;
9660 }
9661 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9662 
9663 static int __remove_instance(struct trace_array *tr)
9664 {
9665 	int i;
9666 
9667 	/* Reference counter for a newly created trace array = 1. */
9668 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9669 		return -EBUSY;
9670 
9671 	list_del(&tr->list);
9672 
9673 	/* Disable all the flags that were enabled coming in */
9674 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9675 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9676 			set_tracer_flag(tr, 1 << i, 0);
9677 	}
9678 
9679 	tracing_set_nop(tr);
9680 	clear_ftrace_function_probes(tr);
9681 	event_trace_del_tracer(tr);
9682 	ftrace_clear_pids(tr);
9683 	ftrace_destroy_function_files(tr);
9684 	tracefs_remove(tr->dir);
9685 	free_percpu(tr->last_func_repeats);
9686 	free_trace_buffers(tr);
9687 	clear_tracing_err_log(tr);
9688 
9689 	for (i = 0; i < tr->nr_topts; i++) {
9690 		kfree(tr->topts[i].topts);
9691 	}
9692 	kfree(tr->topts);
9693 
9694 	free_cpumask_var(tr->pipe_cpumask);
9695 	free_cpumask_var(tr->tracing_cpumask);
9696 	kfree(tr->name);
9697 	kfree(tr);
9698 
9699 	return 0;
9700 }
9701 
9702 int trace_array_destroy(struct trace_array *this_tr)
9703 {
9704 	struct trace_array *tr;
9705 	int ret;
9706 
9707 	if (!this_tr)
9708 		return -EINVAL;
9709 
9710 	mutex_lock(&event_mutex);
9711 	mutex_lock(&trace_types_lock);
9712 
9713 	ret = -ENODEV;
9714 
9715 	/* Making sure trace array exists before destroying it. */
9716 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9717 		if (tr == this_tr) {
9718 			ret = __remove_instance(tr);
9719 			break;
9720 		}
9721 	}
9722 
9723 	mutex_unlock(&trace_types_lock);
9724 	mutex_unlock(&event_mutex);
9725 
9726 	return ret;
9727 }
9728 EXPORT_SYMBOL_GPL(trace_array_destroy);
9729 
9730 static int instance_rmdir(const char *name)
9731 {
9732 	struct trace_array *tr;
9733 	int ret;
9734 
9735 	mutex_lock(&event_mutex);
9736 	mutex_lock(&trace_types_lock);
9737 
9738 	ret = -ENODEV;
9739 	tr = trace_array_find(name);
9740 	if (tr)
9741 		ret = __remove_instance(tr);
9742 
9743 	mutex_unlock(&trace_types_lock);
9744 	mutex_unlock(&event_mutex);
9745 
9746 	return ret;
9747 }
9748 
9749 static __init void create_trace_instances(struct dentry *d_tracer)
9750 {
9751 	struct trace_array *tr;
9752 
9753 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9754 							 instance_mkdir,
9755 							 instance_rmdir);
9756 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9757 		return;
9758 
9759 	mutex_lock(&event_mutex);
9760 	mutex_lock(&trace_types_lock);
9761 
9762 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9763 		if (!tr->name)
9764 			continue;
9765 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9766 			     "Failed to create instance directory\n"))
9767 			break;
9768 	}
9769 
9770 	mutex_unlock(&trace_types_lock);
9771 	mutex_unlock(&event_mutex);
9772 }
9773 
9774 static void
9775 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9776 {
9777 	struct trace_event_file *file;
9778 	int cpu;
9779 
9780 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9781 			tr, &show_traces_fops);
9782 
9783 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9784 			tr, &set_tracer_fops);
9785 
9786 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9787 			  tr, &tracing_cpumask_fops);
9788 
9789 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9790 			  tr, &tracing_iter_fops);
9791 
9792 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9793 			  tr, &tracing_fops);
9794 
9795 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9796 			  tr, &tracing_pipe_fops);
9797 
9798 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9799 			  tr, &tracing_entries_fops);
9800 
9801 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9802 			  tr, &tracing_total_entries_fops);
9803 
9804 	trace_create_file("free_buffer", 0200, d_tracer,
9805 			  tr, &tracing_free_buffer_fops);
9806 
9807 	trace_create_file("trace_marker", 0220, d_tracer,
9808 			  tr, &tracing_mark_fops);
9809 
9810 	file = __find_event_file(tr, "ftrace", "print");
9811 	if (file && file->ef)
9812 		eventfs_add_file("trigger", TRACE_MODE_WRITE, file->ef,
9813 				  file, &event_trigger_fops);
9814 	tr->trace_marker_file = file;
9815 
9816 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9817 			  tr, &tracing_mark_raw_fops);
9818 
9819 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9820 			  &trace_clock_fops);
9821 
9822 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9823 			  tr, &rb_simple_fops);
9824 
9825 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9826 			  &trace_time_stamp_mode_fops);
9827 
9828 	tr->buffer_percent = 50;
9829 
9830 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9831 			tr, &buffer_percent_fops);
9832 
9833 	create_trace_options_dir(tr);
9834 
9835 #ifdef CONFIG_TRACER_MAX_TRACE
9836 	trace_create_maxlat_file(tr, d_tracer);
9837 #endif
9838 
9839 	if (ftrace_create_function_files(tr, d_tracer))
9840 		MEM_FAIL(1, "Could not allocate function filter files");
9841 
9842 #ifdef CONFIG_TRACER_SNAPSHOT
9843 	trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9844 			  tr, &snapshot_fops);
9845 #endif
9846 
9847 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9848 			  tr, &tracing_err_log_fops);
9849 
9850 	for_each_tracing_cpu(cpu)
9851 		tracing_init_tracefs_percpu(tr, cpu);
9852 
9853 	ftrace_init_tracefs(tr, d_tracer);
9854 }
9855 
9856 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9857 {
9858 	struct vfsmount *mnt;
9859 	struct file_system_type *type;
9860 
9861 	/*
9862 	 * To maintain backward compatibility for tools that mount
9863 	 * debugfs to get to the tracing facility, tracefs is automatically
9864 	 * mounted to the debugfs/tracing directory.
9865 	 */
9866 	type = get_fs_type("tracefs");
9867 	if (!type)
9868 		return NULL;
9869 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9870 	put_filesystem(type);
9871 	if (IS_ERR(mnt))
9872 		return NULL;
9873 	mntget(mnt);
9874 
9875 	return mnt;
9876 }
9877 
9878 /**
9879  * tracing_init_dentry - initialize top level trace array
9880  *
9881  * This is called when creating files or directories in the tracing
9882  * directory. It is called via fs_initcall() by any of the boot up code
9883  * and expects to return the dentry of the top level tracing directory.
9884  */
9885 int tracing_init_dentry(void)
9886 {
9887 	struct trace_array *tr = &global_trace;
9888 
9889 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9890 		pr_warn("Tracing disabled due to lockdown\n");
9891 		return -EPERM;
9892 	}
9893 
9894 	/* The top level trace array uses  NULL as parent */
9895 	if (tr->dir)
9896 		return 0;
9897 
9898 	if (WARN_ON(!tracefs_initialized()))
9899 		return -ENODEV;
9900 
9901 	/*
9902 	 * As there may still be users that expect the tracing
9903 	 * files to exist in debugfs/tracing, we must automount
9904 	 * the tracefs file system there, so older tools still
9905 	 * work with the newer kernel.
9906 	 */
9907 	tr->dir = debugfs_create_automount("tracing", NULL,
9908 					   trace_automount, NULL);
9909 
9910 	return 0;
9911 }
9912 
9913 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9914 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9915 
9916 static struct workqueue_struct *eval_map_wq __initdata;
9917 static struct work_struct eval_map_work __initdata;
9918 static struct work_struct tracerfs_init_work __initdata;
9919 
9920 static void __init eval_map_work_func(struct work_struct *work)
9921 {
9922 	int len;
9923 
9924 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9925 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9926 }
9927 
9928 static int __init trace_eval_init(void)
9929 {
9930 	INIT_WORK(&eval_map_work, eval_map_work_func);
9931 
9932 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9933 	if (!eval_map_wq) {
9934 		pr_err("Unable to allocate eval_map_wq\n");
9935 		/* Do work here */
9936 		eval_map_work_func(&eval_map_work);
9937 		return -ENOMEM;
9938 	}
9939 
9940 	queue_work(eval_map_wq, &eval_map_work);
9941 	return 0;
9942 }
9943 
9944 subsys_initcall(trace_eval_init);
9945 
9946 static int __init trace_eval_sync(void)
9947 {
9948 	/* Make sure the eval map updates are finished */
9949 	if (eval_map_wq)
9950 		destroy_workqueue(eval_map_wq);
9951 	return 0;
9952 }
9953 
9954 late_initcall_sync(trace_eval_sync);
9955 
9956 
9957 #ifdef CONFIG_MODULES
9958 static void trace_module_add_evals(struct module *mod)
9959 {
9960 	if (!mod->num_trace_evals)
9961 		return;
9962 
9963 	/*
9964 	 * Modules with bad taint do not have events created, do
9965 	 * not bother with enums either.
9966 	 */
9967 	if (trace_module_has_bad_taint(mod))
9968 		return;
9969 
9970 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9971 }
9972 
9973 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9974 static void trace_module_remove_evals(struct module *mod)
9975 {
9976 	union trace_eval_map_item *map;
9977 	union trace_eval_map_item **last = &trace_eval_maps;
9978 
9979 	if (!mod->num_trace_evals)
9980 		return;
9981 
9982 	mutex_lock(&trace_eval_mutex);
9983 
9984 	map = trace_eval_maps;
9985 
9986 	while (map) {
9987 		if (map->head.mod == mod)
9988 			break;
9989 		map = trace_eval_jmp_to_tail(map);
9990 		last = &map->tail.next;
9991 		map = map->tail.next;
9992 	}
9993 	if (!map)
9994 		goto out;
9995 
9996 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9997 	kfree(map);
9998  out:
9999 	mutex_unlock(&trace_eval_mutex);
10000 }
10001 #else
10002 static inline void trace_module_remove_evals(struct module *mod) { }
10003 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10004 
10005 static int trace_module_notify(struct notifier_block *self,
10006 			       unsigned long val, void *data)
10007 {
10008 	struct module *mod = data;
10009 
10010 	switch (val) {
10011 	case MODULE_STATE_COMING:
10012 		trace_module_add_evals(mod);
10013 		break;
10014 	case MODULE_STATE_GOING:
10015 		trace_module_remove_evals(mod);
10016 		break;
10017 	}
10018 
10019 	return NOTIFY_OK;
10020 }
10021 
10022 static struct notifier_block trace_module_nb = {
10023 	.notifier_call = trace_module_notify,
10024 	.priority = 0,
10025 };
10026 #endif /* CONFIG_MODULES */
10027 
10028 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10029 {
10030 
10031 	event_trace_init();
10032 
10033 	init_tracer_tracefs(&global_trace, NULL);
10034 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10035 
10036 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10037 			&global_trace, &tracing_thresh_fops);
10038 
10039 	trace_create_file("README", TRACE_MODE_READ, NULL,
10040 			NULL, &tracing_readme_fops);
10041 
10042 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10043 			NULL, &tracing_saved_cmdlines_fops);
10044 
10045 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10046 			  NULL, &tracing_saved_cmdlines_size_fops);
10047 
10048 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10049 			NULL, &tracing_saved_tgids_fops);
10050 
10051 	trace_create_eval_file(NULL);
10052 
10053 #ifdef CONFIG_MODULES
10054 	register_module_notifier(&trace_module_nb);
10055 #endif
10056 
10057 #ifdef CONFIG_DYNAMIC_FTRACE
10058 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10059 			NULL, &tracing_dyn_info_fops);
10060 #endif
10061 
10062 	create_trace_instances(NULL);
10063 
10064 	update_tracer_options(&global_trace);
10065 }
10066 
10067 static __init int tracer_init_tracefs(void)
10068 {
10069 	int ret;
10070 
10071 	trace_access_lock_init();
10072 
10073 	ret = tracing_init_dentry();
10074 	if (ret)
10075 		return 0;
10076 
10077 	if (eval_map_wq) {
10078 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10079 		queue_work(eval_map_wq, &tracerfs_init_work);
10080 	} else {
10081 		tracer_init_tracefs_work_func(NULL);
10082 	}
10083 
10084 	rv_init_interface();
10085 
10086 	return 0;
10087 }
10088 
10089 fs_initcall(tracer_init_tracefs);
10090 
10091 static int trace_die_panic_handler(struct notifier_block *self,
10092 				unsigned long ev, void *unused);
10093 
10094 static struct notifier_block trace_panic_notifier = {
10095 	.notifier_call = trace_die_panic_handler,
10096 	.priority = INT_MAX - 1,
10097 };
10098 
10099 static struct notifier_block trace_die_notifier = {
10100 	.notifier_call = trace_die_panic_handler,
10101 	.priority = INT_MAX - 1,
10102 };
10103 
10104 /*
10105  * The idea is to execute the following die/panic callback early, in order
10106  * to avoid showing irrelevant information in the trace (like other panic
10107  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10108  * warnings get disabled (to prevent potential log flooding).
10109  */
10110 static int trace_die_panic_handler(struct notifier_block *self,
10111 				unsigned long ev, void *unused)
10112 {
10113 	if (!ftrace_dump_on_oops)
10114 		return NOTIFY_DONE;
10115 
10116 	/* The die notifier requires DIE_OOPS to trigger */
10117 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10118 		return NOTIFY_DONE;
10119 
10120 	ftrace_dump(ftrace_dump_on_oops);
10121 
10122 	return NOTIFY_DONE;
10123 }
10124 
10125 /*
10126  * printk is set to max of 1024, we really don't need it that big.
10127  * Nothing should be printing 1000 characters anyway.
10128  */
10129 #define TRACE_MAX_PRINT		1000
10130 
10131 /*
10132  * Define here KERN_TRACE so that we have one place to modify
10133  * it if we decide to change what log level the ftrace dump
10134  * should be at.
10135  */
10136 #define KERN_TRACE		KERN_EMERG
10137 
10138 void
10139 trace_printk_seq(struct trace_seq *s)
10140 {
10141 	/* Probably should print a warning here. */
10142 	if (s->seq.len >= TRACE_MAX_PRINT)
10143 		s->seq.len = TRACE_MAX_PRINT;
10144 
10145 	/*
10146 	 * More paranoid code. Although the buffer size is set to
10147 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10148 	 * an extra layer of protection.
10149 	 */
10150 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10151 		s->seq.len = s->seq.size - 1;
10152 
10153 	/* should be zero ended, but we are paranoid. */
10154 	s->buffer[s->seq.len] = 0;
10155 
10156 	printk(KERN_TRACE "%s", s->buffer);
10157 
10158 	trace_seq_init(s);
10159 }
10160 
10161 void trace_init_global_iter(struct trace_iterator *iter)
10162 {
10163 	iter->tr = &global_trace;
10164 	iter->trace = iter->tr->current_trace;
10165 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10166 	iter->array_buffer = &global_trace.array_buffer;
10167 
10168 	if (iter->trace && iter->trace->open)
10169 		iter->trace->open(iter);
10170 
10171 	/* Annotate start of buffers if we had overruns */
10172 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10173 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10174 
10175 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10176 	if (trace_clocks[iter->tr->clock_id].in_ns)
10177 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10178 
10179 	/* Can not use kmalloc for iter.temp and iter.fmt */
10180 	iter->temp = static_temp_buf;
10181 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10182 	iter->fmt = static_fmt_buf;
10183 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10184 }
10185 
10186 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10187 {
10188 	/* use static because iter can be a bit big for the stack */
10189 	static struct trace_iterator iter;
10190 	static atomic_t dump_running;
10191 	struct trace_array *tr = &global_trace;
10192 	unsigned int old_userobj;
10193 	unsigned long flags;
10194 	int cnt = 0, cpu;
10195 
10196 	/* Only allow one dump user at a time. */
10197 	if (atomic_inc_return(&dump_running) != 1) {
10198 		atomic_dec(&dump_running);
10199 		return;
10200 	}
10201 
10202 	/*
10203 	 * Always turn off tracing when we dump.
10204 	 * We don't need to show trace output of what happens
10205 	 * between multiple crashes.
10206 	 *
10207 	 * If the user does a sysrq-z, then they can re-enable
10208 	 * tracing with echo 1 > tracing_on.
10209 	 */
10210 	tracing_off();
10211 
10212 	local_irq_save(flags);
10213 
10214 	/* Simulate the iterator */
10215 	trace_init_global_iter(&iter);
10216 
10217 	for_each_tracing_cpu(cpu) {
10218 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10219 	}
10220 
10221 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10222 
10223 	/* don't look at user memory in panic mode */
10224 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10225 
10226 	switch (oops_dump_mode) {
10227 	case DUMP_ALL:
10228 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10229 		break;
10230 	case DUMP_ORIG:
10231 		iter.cpu_file = raw_smp_processor_id();
10232 		break;
10233 	case DUMP_NONE:
10234 		goto out_enable;
10235 	default:
10236 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10237 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10238 	}
10239 
10240 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
10241 
10242 	/* Did function tracer already get disabled? */
10243 	if (ftrace_is_dead()) {
10244 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10245 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10246 	}
10247 
10248 	/*
10249 	 * We need to stop all tracing on all CPUS to read
10250 	 * the next buffer. This is a bit expensive, but is
10251 	 * not done often. We fill all what we can read,
10252 	 * and then release the locks again.
10253 	 */
10254 
10255 	while (!trace_empty(&iter)) {
10256 
10257 		if (!cnt)
10258 			printk(KERN_TRACE "---------------------------------\n");
10259 
10260 		cnt++;
10261 
10262 		trace_iterator_reset(&iter);
10263 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10264 
10265 		if (trace_find_next_entry_inc(&iter) != NULL) {
10266 			int ret;
10267 
10268 			ret = print_trace_line(&iter);
10269 			if (ret != TRACE_TYPE_NO_CONSUME)
10270 				trace_consume(&iter);
10271 		}
10272 		touch_nmi_watchdog();
10273 
10274 		trace_printk_seq(&iter.seq);
10275 	}
10276 
10277 	if (!cnt)
10278 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10279 	else
10280 		printk(KERN_TRACE "---------------------------------\n");
10281 
10282  out_enable:
10283 	tr->trace_flags |= old_userobj;
10284 
10285 	for_each_tracing_cpu(cpu) {
10286 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10287 	}
10288 	atomic_dec(&dump_running);
10289 	local_irq_restore(flags);
10290 }
10291 EXPORT_SYMBOL_GPL(ftrace_dump);
10292 
10293 #define WRITE_BUFSIZE  4096
10294 
10295 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10296 				size_t count, loff_t *ppos,
10297 				int (*createfn)(const char *))
10298 {
10299 	char *kbuf, *buf, *tmp;
10300 	int ret = 0;
10301 	size_t done = 0;
10302 	size_t size;
10303 
10304 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10305 	if (!kbuf)
10306 		return -ENOMEM;
10307 
10308 	while (done < count) {
10309 		size = count - done;
10310 
10311 		if (size >= WRITE_BUFSIZE)
10312 			size = WRITE_BUFSIZE - 1;
10313 
10314 		if (copy_from_user(kbuf, buffer + done, size)) {
10315 			ret = -EFAULT;
10316 			goto out;
10317 		}
10318 		kbuf[size] = '\0';
10319 		buf = kbuf;
10320 		do {
10321 			tmp = strchr(buf, '\n');
10322 			if (tmp) {
10323 				*tmp = '\0';
10324 				size = tmp - buf + 1;
10325 			} else {
10326 				size = strlen(buf);
10327 				if (done + size < count) {
10328 					if (buf != kbuf)
10329 						break;
10330 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10331 					pr_warn("Line length is too long: Should be less than %d\n",
10332 						WRITE_BUFSIZE - 2);
10333 					ret = -EINVAL;
10334 					goto out;
10335 				}
10336 			}
10337 			done += size;
10338 
10339 			/* Remove comments */
10340 			tmp = strchr(buf, '#');
10341 
10342 			if (tmp)
10343 				*tmp = '\0';
10344 
10345 			ret = createfn(buf);
10346 			if (ret)
10347 				goto out;
10348 			buf += size;
10349 
10350 		} while (done < count);
10351 	}
10352 	ret = done;
10353 
10354 out:
10355 	kfree(kbuf);
10356 
10357 	return ret;
10358 }
10359 
10360 #ifdef CONFIG_TRACER_MAX_TRACE
10361 __init static bool tr_needs_alloc_snapshot(const char *name)
10362 {
10363 	char *test;
10364 	int len = strlen(name);
10365 	bool ret;
10366 
10367 	if (!boot_snapshot_index)
10368 		return false;
10369 
10370 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10371 	    boot_snapshot_info[len] == '\t')
10372 		return true;
10373 
10374 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10375 	if (!test)
10376 		return false;
10377 
10378 	sprintf(test, "\t%s\t", name);
10379 	ret = strstr(boot_snapshot_info, test) == NULL;
10380 	kfree(test);
10381 	return ret;
10382 }
10383 
10384 __init static void do_allocate_snapshot(const char *name)
10385 {
10386 	if (!tr_needs_alloc_snapshot(name))
10387 		return;
10388 
10389 	/*
10390 	 * When allocate_snapshot is set, the next call to
10391 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10392 	 * will allocate the snapshot buffer. That will alse clear
10393 	 * this flag.
10394 	 */
10395 	allocate_snapshot = true;
10396 }
10397 #else
10398 static inline void do_allocate_snapshot(const char *name) { }
10399 #endif
10400 
10401 __init static void enable_instances(void)
10402 {
10403 	struct trace_array *tr;
10404 	char *curr_str;
10405 	char *str;
10406 	char *tok;
10407 
10408 	/* A tab is always appended */
10409 	boot_instance_info[boot_instance_index - 1] = '\0';
10410 	str = boot_instance_info;
10411 
10412 	while ((curr_str = strsep(&str, "\t"))) {
10413 
10414 		tok = strsep(&curr_str, ",");
10415 
10416 		if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10417 			do_allocate_snapshot(tok);
10418 
10419 		tr = trace_array_get_by_name(tok);
10420 		if (!tr) {
10421 			pr_warn("Failed to create instance buffer %s\n", curr_str);
10422 			continue;
10423 		}
10424 		/* Allow user space to delete it */
10425 		trace_array_put(tr);
10426 
10427 		while ((tok = strsep(&curr_str, ","))) {
10428 			early_enable_events(tr, tok, true);
10429 		}
10430 	}
10431 }
10432 
10433 __init static int tracer_alloc_buffers(void)
10434 {
10435 	int ring_buf_size;
10436 	int ret = -ENOMEM;
10437 
10438 
10439 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10440 		pr_warn("Tracing disabled due to lockdown\n");
10441 		return -EPERM;
10442 	}
10443 
10444 	/*
10445 	 * Make sure we don't accidentally add more trace options
10446 	 * than we have bits for.
10447 	 */
10448 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10449 
10450 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10451 		goto out;
10452 
10453 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10454 		goto out_free_buffer_mask;
10455 
10456 	/* Only allocate trace_printk buffers if a trace_printk exists */
10457 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10458 		/* Must be called before global_trace.buffer is allocated */
10459 		trace_printk_init_buffers();
10460 
10461 	/* To save memory, keep the ring buffer size to its minimum */
10462 	if (ring_buffer_expanded)
10463 		ring_buf_size = trace_buf_size;
10464 	else
10465 		ring_buf_size = 1;
10466 
10467 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10468 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10469 
10470 	raw_spin_lock_init(&global_trace.start_lock);
10471 
10472 	/*
10473 	 * The prepare callbacks allocates some memory for the ring buffer. We
10474 	 * don't free the buffer if the CPU goes down. If we were to free
10475 	 * the buffer, then the user would lose any trace that was in the
10476 	 * buffer. The memory will be removed once the "instance" is removed.
10477 	 */
10478 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10479 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10480 				      NULL);
10481 	if (ret < 0)
10482 		goto out_free_cpumask;
10483 	/* Used for event triggers */
10484 	ret = -ENOMEM;
10485 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10486 	if (!temp_buffer)
10487 		goto out_rm_hp_state;
10488 
10489 	if (trace_create_savedcmd() < 0)
10490 		goto out_free_temp_buffer;
10491 
10492 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10493 		goto out_free_savedcmd;
10494 
10495 	/* TODO: make the number of buffers hot pluggable with CPUS */
10496 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10497 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10498 		goto out_free_pipe_cpumask;
10499 	}
10500 	if (global_trace.buffer_disabled)
10501 		tracing_off();
10502 
10503 	if (trace_boot_clock) {
10504 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10505 		if (ret < 0)
10506 			pr_warn("Trace clock %s not defined, going back to default\n",
10507 				trace_boot_clock);
10508 	}
10509 
10510 	/*
10511 	 * register_tracer() might reference current_trace, so it
10512 	 * needs to be set before we register anything. This is
10513 	 * just a bootstrap of current_trace anyway.
10514 	 */
10515 	global_trace.current_trace = &nop_trace;
10516 
10517 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10518 
10519 	ftrace_init_global_array_ops(&global_trace);
10520 
10521 	init_trace_flags_index(&global_trace);
10522 
10523 	register_tracer(&nop_trace);
10524 
10525 	/* Function tracing may start here (via kernel command line) */
10526 	init_function_trace();
10527 
10528 	/* All seems OK, enable tracing */
10529 	tracing_disabled = 0;
10530 
10531 	atomic_notifier_chain_register(&panic_notifier_list,
10532 				       &trace_panic_notifier);
10533 
10534 	register_die_notifier(&trace_die_notifier);
10535 
10536 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10537 
10538 	INIT_LIST_HEAD(&global_trace.systems);
10539 	INIT_LIST_HEAD(&global_trace.events);
10540 	INIT_LIST_HEAD(&global_trace.hist_vars);
10541 	INIT_LIST_HEAD(&global_trace.err_log);
10542 	list_add(&global_trace.list, &ftrace_trace_arrays);
10543 
10544 	apply_trace_boot_options();
10545 
10546 	register_snapshot_cmd();
10547 
10548 	test_can_verify();
10549 
10550 	return 0;
10551 
10552 out_free_pipe_cpumask:
10553 	free_cpumask_var(global_trace.pipe_cpumask);
10554 out_free_savedcmd:
10555 	free_saved_cmdlines_buffer(savedcmd);
10556 out_free_temp_buffer:
10557 	ring_buffer_free(temp_buffer);
10558 out_rm_hp_state:
10559 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10560 out_free_cpumask:
10561 	free_cpumask_var(global_trace.tracing_cpumask);
10562 out_free_buffer_mask:
10563 	free_cpumask_var(tracing_buffer_mask);
10564 out:
10565 	return ret;
10566 }
10567 
10568 void __init ftrace_boot_snapshot(void)
10569 {
10570 #ifdef CONFIG_TRACER_MAX_TRACE
10571 	struct trace_array *tr;
10572 
10573 	if (!snapshot_at_boot)
10574 		return;
10575 
10576 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10577 		if (!tr->allocated_snapshot)
10578 			continue;
10579 
10580 		tracing_snapshot_instance(tr);
10581 		trace_array_puts(tr, "** Boot snapshot taken **\n");
10582 	}
10583 #endif
10584 }
10585 
10586 void __init early_trace_init(void)
10587 {
10588 	if (tracepoint_printk) {
10589 		tracepoint_print_iter =
10590 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10591 		if (MEM_FAIL(!tracepoint_print_iter,
10592 			     "Failed to allocate trace iterator\n"))
10593 			tracepoint_printk = 0;
10594 		else
10595 			static_key_enable(&tracepoint_printk_key.key);
10596 	}
10597 	tracer_alloc_buffers();
10598 
10599 	init_events();
10600 }
10601 
10602 void __init trace_init(void)
10603 {
10604 	trace_event_init();
10605 
10606 	if (boot_instance_index)
10607 		enable_instances();
10608 }
10609 
10610 __init static void clear_boot_tracer(void)
10611 {
10612 	/*
10613 	 * The default tracer at boot buffer is an init section.
10614 	 * This function is called in lateinit. If we did not
10615 	 * find the boot tracer, then clear it out, to prevent
10616 	 * later registration from accessing the buffer that is
10617 	 * about to be freed.
10618 	 */
10619 	if (!default_bootup_tracer)
10620 		return;
10621 
10622 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10623 	       default_bootup_tracer);
10624 	default_bootup_tracer = NULL;
10625 }
10626 
10627 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10628 __init static void tracing_set_default_clock(void)
10629 {
10630 	/* sched_clock_stable() is determined in late_initcall */
10631 	if (!trace_boot_clock && !sched_clock_stable()) {
10632 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10633 			pr_warn("Can not set tracing clock due to lockdown\n");
10634 			return;
10635 		}
10636 
10637 		printk(KERN_WARNING
10638 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10639 		       "If you want to keep using the local clock, then add:\n"
10640 		       "  \"trace_clock=local\"\n"
10641 		       "on the kernel command line\n");
10642 		tracing_set_clock(&global_trace, "global");
10643 	}
10644 }
10645 #else
10646 static inline void tracing_set_default_clock(void) { }
10647 #endif
10648 
10649 __init static int late_trace_init(void)
10650 {
10651 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10652 		static_key_disable(&tracepoint_printk_key.key);
10653 		tracepoint_printk = 0;
10654 	}
10655 
10656 	tracing_set_default_clock();
10657 	clear_boot_tracer();
10658 	return 0;
10659 }
10660 
10661 late_initcall_sync(late_trace_init);
10662