xref: /openbmc/linux/kernel/trace/trace.c (revision 0e73f1ba602d953ee8ceda5cea3a381bf212b80b)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51 
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
53 
54 #include "trace.h"
55 #include "trace_output.h"
56 
57 /*
58  * On boot up, the ring buffer is set to the minimum size, so that
59  * we do not waste memory on systems that are not using tracing.
60  */
61 bool ring_buffer_expanded;
62 
63 #ifdef CONFIG_FTRACE_STARTUP_TEST
64 /*
65  * We need to change this state when a selftest is running.
66  * A selftest will lurk into the ring-buffer to count the
67  * entries inserted during the selftest although some concurrent
68  * insertions into the ring-buffer such as trace_printk could occurred
69  * at the same time, giving false positive or negative results.
70  */
71 static bool __read_mostly tracing_selftest_running;
72 
73 /*
74  * If boot-time tracing including tracers/events via kernel cmdline
75  * is running, we do not want to run SELFTEST.
76  */
77 bool __read_mostly tracing_selftest_disabled;
78 
79 void __init disable_tracing_selftest(const char *reason)
80 {
81 	if (!tracing_selftest_disabled) {
82 		tracing_selftest_disabled = true;
83 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
84 	}
85 }
86 #else
87 #define tracing_selftest_running	0
88 #define tracing_selftest_disabled	0
89 #endif
90 
91 /* Pipe tracepoints to printk */
92 static struct trace_iterator *tracepoint_print_iter;
93 int tracepoint_printk;
94 static bool tracepoint_printk_stop_on_boot __initdata;
95 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
96 
97 /* For tracers that don't implement custom flags */
98 static struct tracer_opt dummy_tracer_opt[] = {
99 	{ }
100 };
101 
102 static int
103 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
104 {
105 	return 0;
106 }
107 
108 /*
109  * To prevent the comm cache from being overwritten when no
110  * tracing is active, only save the comm when a trace event
111  * occurred.
112  */
113 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
114 
115 /*
116  * Kill all tracing for good (never come back).
117  * It is initialized to 1 but will turn to zero if the initialization
118  * of the tracer is successful. But that is the only place that sets
119  * this back to zero.
120  */
121 static int tracing_disabled = 1;
122 
123 cpumask_var_t __read_mostly	tracing_buffer_mask;
124 
125 /*
126  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
127  *
128  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
129  * is set, then ftrace_dump is called. This will output the contents
130  * of the ftrace buffers to the console.  This is very useful for
131  * capturing traces that lead to crashes and outputing it to a
132  * serial console.
133  *
134  * It is default off, but you can enable it with either specifying
135  * "ftrace_dump_on_oops" in the kernel command line, or setting
136  * /proc/sys/kernel/ftrace_dump_on_oops
137  * Set 1 if you want to dump buffers of all CPUs
138  * Set 2 if you want to dump the buffer of the CPU that triggered oops
139  */
140 
141 enum ftrace_dump_mode ftrace_dump_on_oops;
142 
143 /* When set, tracing will stop when a WARN*() is hit */
144 int __disable_trace_on_warning;
145 
146 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
147 /* Map of enums to their values, for "eval_map" file */
148 struct trace_eval_map_head {
149 	struct module			*mod;
150 	unsigned long			length;
151 };
152 
153 union trace_eval_map_item;
154 
155 struct trace_eval_map_tail {
156 	/*
157 	 * "end" is first and points to NULL as it must be different
158 	 * than "mod" or "eval_string"
159 	 */
160 	union trace_eval_map_item	*next;
161 	const char			*end;	/* points to NULL */
162 };
163 
164 static DEFINE_MUTEX(trace_eval_mutex);
165 
166 /*
167  * The trace_eval_maps are saved in an array with two extra elements,
168  * one at the beginning, and one at the end. The beginning item contains
169  * the count of the saved maps (head.length), and the module they
170  * belong to if not built in (head.mod). The ending item contains a
171  * pointer to the next array of saved eval_map items.
172  */
173 union trace_eval_map_item {
174 	struct trace_eval_map		map;
175 	struct trace_eval_map_head	head;
176 	struct trace_eval_map_tail	tail;
177 };
178 
179 static union trace_eval_map_item *trace_eval_maps;
180 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
181 
182 int tracing_set_tracer(struct trace_array *tr, const char *buf);
183 static void ftrace_trace_userstack(struct trace_array *tr,
184 				   struct trace_buffer *buffer,
185 				   unsigned int trace_ctx);
186 
187 #define MAX_TRACER_SIZE		100
188 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
189 static char *default_bootup_tracer;
190 
191 static bool allocate_snapshot;
192 static bool snapshot_at_boot;
193 
194 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
195 static int boot_instance_index;
196 
197 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
198 static int boot_snapshot_index;
199 
200 static int __init set_cmdline_ftrace(char *str)
201 {
202 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
203 	default_bootup_tracer = bootup_tracer_buf;
204 	/* We are using ftrace early, expand it */
205 	ring_buffer_expanded = true;
206 	return 1;
207 }
208 __setup("ftrace=", set_cmdline_ftrace);
209 
210 static int __init set_ftrace_dump_on_oops(char *str)
211 {
212 	if (*str++ != '=' || !*str || !strcmp("1", str)) {
213 		ftrace_dump_on_oops = DUMP_ALL;
214 		return 1;
215 	}
216 
217 	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
218 		ftrace_dump_on_oops = DUMP_ORIG;
219                 return 1;
220         }
221 
222         return 0;
223 }
224 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
225 
226 static int __init stop_trace_on_warning(char *str)
227 {
228 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
229 		__disable_trace_on_warning = 1;
230 	return 1;
231 }
232 __setup("traceoff_on_warning", stop_trace_on_warning);
233 
234 static int __init boot_alloc_snapshot(char *str)
235 {
236 	char *slot = boot_snapshot_info + boot_snapshot_index;
237 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
238 	int ret;
239 
240 	if (str[0] == '=') {
241 		str++;
242 		if (strlen(str) >= left)
243 			return -1;
244 
245 		ret = snprintf(slot, left, "%s\t", str);
246 		boot_snapshot_index += ret;
247 	} else {
248 		allocate_snapshot = true;
249 		/* We also need the main ring buffer expanded */
250 		ring_buffer_expanded = true;
251 	}
252 	return 1;
253 }
254 __setup("alloc_snapshot", boot_alloc_snapshot);
255 
256 
257 static int __init boot_snapshot(char *str)
258 {
259 	snapshot_at_boot = true;
260 	boot_alloc_snapshot(str);
261 	return 1;
262 }
263 __setup("ftrace_boot_snapshot", boot_snapshot);
264 
265 
266 static int __init boot_instance(char *str)
267 {
268 	char *slot = boot_instance_info + boot_instance_index;
269 	int left = sizeof(boot_instance_info) - boot_instance_index;
270 	int ret;
271 
272 	if (strlen(str) >= left)
273 		return -1;
274 
275 	ret = snprintf(slot, left, "%s\t", str);
276 	boot_instance_index += ret;
277 
278 	return 1;
279 }
280 __setup("trace_instance=", boot_instance);
281 
282 
283 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
284 
285 static int __init set_trace_boot_options(char *str)
286 {
287 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
288 	return 1;
289 }
290 __setup("trace_options=", set_trace_boot_options);
291 
292 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
293 static char *trace_boot_clock __initdata;
294 
295 static int __init set_trace_boot_clock(char *str)
296 {
297 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
298 	trace_boot_clock = trace_boot_clock_buf;
299 	return 1;
300 }
301 __setup("trace_clock=", set_trace_boot_clock);
302 
303 static int __init set_tracepoint_printk(char *str)
304 {
305 	/* Ignore the "tp_printk_stop_on_boot" param */
306 	if (*str == '_')
307 		return 0;
308 
309 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
310 		tracepoint_printk = 1;
311 	return 1;
312 }
313 __setup("tp_printk", set_tracepoint_printk);
314 
315 static int __init set_tracepoint_printk_stop(char *str)
316 {
317 	tracepoint_printk_stop_on_boot = true;
318 	return 1;
319 }
320 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
321 
322 unsigned long long ns2usecs(u64 nsec)
323 {
324 	nsec += 500;
325 	do_div(nsec, 1000);
326 	return nsec;
327 }
328 
329 static void
330 trace_process_export(struct trace_export *export,
331 	       struct ring_buffer_event *event, int flag)
332 {
333 	struct trace_entry *entry;
334 	unsigned int size = 0;
335 
336 	if (export->flags & flag) {
337 		entry = ring_buffer_event_data(event);
338 		size = ring_buffer_event_length(event);
339 		export->write(export, entry, size);
340 	}
341 }
342 
343 static DEFINE_MUTEX(ftrace_export_lock);
344 
345 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
346 
347 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
348 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
349 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
350 
351 static inline void ftrace_exports_enable(struct trace_export *export)
352 {
353 	if (export->flags & TRACE_EXPORT_FUNCTION)
354 		static_branch_inc(&trace_function_exports_enabled);
355 
356 	if (export->flags & TRACE_EXPORT_EVENT)
357 		static_branch_inc(&trace_event_exports_enabled);
358 
359 	if (export->flags & TRACE_EXPORT_MARKER)
360 		static_branch_inc(&trace_marker_exports_enabled);
361 }
362 
363 static inline void ftrace_exports_disable(struct trace_export *export)
364 {
365 	if (export->flags & TRACE_EXPORT_FUNCTION)
366 		static_branch_dec(&trace_function_exports_enabled);
367 
368 	if (export->flags & TRACE_EXPORT_EVENT)
369 		static_branch_dec(&trace_event_exports_enabled);
370 
371 	if (export->flags & TRACE_EXPORT_MARKER)
372 		static_branch_dec(&trace_marker_exports_enabled);
373 }
374 
375 static void ftrace_exports(struct ring_buffer_event *event, int flag)
376 {
377 	struct trace_export *export;
378 
379 	preempt_disable_notrace();
380 
381 	export = rcu_dereference_raw_check(ftrace_exports_list);
382 	while (export) {
383 		trace_process_export(export, event, flag);
384 		export = rcu_dereference_raw_check(export->next);
385 	}
386 
387 	preempt_enable_notrace();
388 }
389 
390 static inline void
391 add_trace_export(struct trace_export **list, struct trace_export *export)
392 {
393 	rcu_assign_pointer(export->next, *list);
394 	/*
395 	 * We are entering export into the list but another
396 	 * CPU might be walking that list. We need to make sure
397 	 * the export->next pointer is valid before another CPU sees
398 	 * the export pointer included into the list.
399 	 */
400 	rcu_assign_pointer(*list, export);
401 }
402 
403 static inline int
404 rm_trace_export(struct trace_export **list, struct trace_export *export)
405 {
406 	struct trace_export **p;
407 
408 	for (p = list; *p != NULL; p = &(*p)->next)
409 		if (*p == export)
410 			break;
411 
412 	if (*p != export)
413 		return -1;
414 
415 	rcu_assign_pointer(*p, (*p)->next);
416 
417 	return 0;
418 }
419 
420 static inline void
421 add_ftrace_export(struct trace_export **list, struct trace_export *export)
422 {
423 	ftrace_exports_enable(export);
424 
425 	add_trace_export(list, export);
426 }
427 
428 static inline int
429 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
430 {
431 	int ret;
432 
433 	ret = rm_trace_export(list, export);
434 	ftrace_exports_disable(export);
435 
436 	return ret;
437 }
438 
439 int register_ftrace_export(struct trace_export *export)
440 {
441 	if (WARN_ON_ONCE(!export->write))
442 		return -1;
443 
444 	mutex_lock(&ftrace_export_lock);
445 
446 	add_ftrace_export(&ftrace_exports_list, export);
447 
448 	mutex_unlock(&ftrace_export_lock);
449 
450 	return 0;
451 }
452 EXPORT_SYMBOL_GPL(register_ftrace_export);
453 
454 int unregister_ftrace_export(struct trace_export *export)
455 {
456 	int ret;
457 
458 	mutex_lock(&ftrace_export_lock);
459 
460 	ret = rm_ftrace_export(&ftrace_exports_list, export);
461 
462 	mutex_unlock(&ftrace_export_lock);
463 
464 	return ret;
465 }
466 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
467 
468 /* trace_flags holds trace_options default values */
469 #define TRACE_DEFAULT_FLAGS						\
470 	(FUNCTION_DEFAULT_FLAGS |					\
471 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
472 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
473 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
474 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
475 	 TRACE_ITER_HASH_PTR)
476 
477 /* trace_options that are only supported by global_trace */
478 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
479 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
480 
481 /* trace_flags that are default zero for instances */
482 #define ZEROED_TRACE_FLAGS \
483 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
484 
485 /*
486  * The global_trace is the descriptor that holds the top-level tracing
487  * buffers for the live tracing.
488  */
489 static struct trace_array global_trace = {
490 	.trace_flags = TRACE_DEFAULT_FLAGS,
491 };
492 
493 LIST_HEAD(ftrace_trace_arrays);
494 
495 int trace_array_get(struct trace_array *this_tr)
496 {
497 	struct trace_array *tr;
498 	int ret = -ENODEV;
499 
500 	mutex_lock(&trace_types_lock);
501 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
502 		if (tr == this_tr) {
503 			tr->ref++;
504 			ret = 0;
505 			break;
506 		}
507 	}
508 	mutex_unlock(&trace_types_lock);
509 
510 	return ret;
511 }
512 
513 static void __trace_array_put(struct trace_array *this_tr)
514 {
515 	WARN_ON(!this_tr->ref);
516 	this_tr->ref--;
517 }
518 
519 /**
520  * trace_array_put - Decrement the reference counter for this trace array.
521  * @this_tr : pointer to the trace array
522  *
523  * NOTE: Use this when we no longer need the trace array returned by
524  * trace_array_get_by_name(). This ensures the trace array can be later
525  * destroyed.
526  *
527  */
528 void trace_array_put(struct trace_array *this_tr)
529 {
530 	if (!this_tr)
531 		return;
532 
533 	mutex_lock(&trace_types_lock);
534 	__trace_array_put(this_tr);
535 	mutex_unlock(&trace_types_lock);
536 }
537 EXPORT_SYMBOL_GPL(trace_array_put);
538 
539 int tracing_check_open_get_tr(struct trace_array *tr)
540 {
541 	int ret;
542 
543 	ret = security_locked_down(LOCKDOWN_TRACEFS);
544 	if (ret)
545 		return ret;
546 
547 	if (tracing_disabled)
548 		return -ENODEV;
549 
550 	if (tr && trace_array_get(tr) < 0)
551 		return -ENODEV;
552 
553 	return 0;
554 }
555 
556 int call_filter_check_discard(struct trace_event_call *call, void *rec,
557 			      struct trace_buffer *buffer,
558 			      struct ring_buffer_event *event)
559 {
560 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
561 	    !filter_match_preds(call->filter, rec)) {
562 		__trace_event_discard_commit(buffer, event);
563 		return 1;
564 	}
565 
566 	return 0;
567 }
568 
569 /**
570  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
571  * @filtered_pids: The list of pids to check
572  * @search_pid: The PID to find in @filtered_pids
573  *
574  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
575  */
576 bool
577 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
578 {
579 	return trace_pid_list_is_set(filtered_pids, search_pid);
580 }
581 
582 /**
583  * trace_ignore_this_task - should a task be ignored for tracing
584  * @filtered_pids: The list of pids to check
585  * @filtered_no_pids: The list of pids not to be traced
586  * @task: The task that should be ignored if not filtered
587  *
588  * Checks if @task should be traced or not from @filtered_pids.
589  * Returns true if @task should *NOT* be traced.
590  * Returns false if @task should be traced.
591  */
592 bool
593 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
594 		       struct trace_pid_list *filtered_no_pids,
595 		       struct task_struct *task)
596 {
597 	/*
598 	 * If filtered_no_pids is not empty, and the task's pid is listed
599 	 * in filtered_no_pids, then return true.
600 	 * Otherwise, if filtered_pids is empty, that means we can
601 	 * trace all tasks. If it has content, then only trace pids
602 	 * within filtered_pids.
603 	 */
604 
605 	return (filtered_pids &&
606 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
607 		(filtered_no_pids &&
608 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
609 }
610 
611 /**
612  * trace_filter_add_remove_task - Add or remove a task from a pid_list
613  * @pid_list: The list to modify
614  * @self: The current task for fork or NULL for exit
615  * @task: The task to add or remove
616  *
617  * If adding a task, if @self is defined, the task is only added if @self
618  * is also included in @pid_list. This happens on fork and tasks should
619  * only be added when the parent is listed. If @self is NULL, then the
620  * @task pid will be removed from the list, which would happen on exit
621  * of a task.
622  */
623 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
624 				  struct task_struct *self,
625 				  struct task_struct *task)
626 {
627 	if (!pid_list)
628 		return;
629 
630 	/* For forks, we only add if the forking task is listed */
631 	if (self) {
632 		if (!trace_find_filtered_pid(pid_list, self->pid))
633 			return;
634 	}
635 
636 	/* "self" is set for forks, and NULL for exits */
637 	if (self)
638 		trace_pid_list_set(pid_list, task->pid);
639 	else
640 		trace_pid_list_clear(pid_list, task->pid);
641 }
642 
643 /**
644  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
645  * @pid_list: The pid list to show
646  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
647  * @pos: The position of the file
648  *
649  * This is used by the seq_file "next" operation to iterate the pids
650  * listed in a trace_pid_list structure.
651  *
652  * Returns the pid+1 as we want to display pid of zero, but NULL would
653  * stop the iteration.
654  */
655 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
656 {
657 	long pid = (unsigned long)v;
658 	unsigned int next;
659 
660 	(*pos)++;
661 
662 	/* pid already is +1 of the actual previous bit */
663 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
664 		return NULL;
665 
666 	pid = next;
667 
668 	/* Return pid + 1 to allow zero to be represented */
669 	return (void *)(pid + 1);
670 }
671 
672 /**
673  * trace_pid_start - Used for seq_file to start reading pid lists
674  * @pid_list: The pid list to show
675  * @pos: The position of the file
676  *
677  * This is used by seq_file "start" operation to start the iteration
678  * of listing pids.
679  *
680  * Returns the pid+1 as we want to display pid of zero, but NULL would
681  * stop the iteration.
682  */
683 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
684 {
685 	unsigned long pid;
686 	unsigned int first;
687 	loff_t l = 0;
688 
689 	if (trace_pid_list_first(pid_list, &first) < 0)
690 		return NULL;
691 
692 	pid = first;
693 
694 	/* Return pid + 1 so that zero can be the exit value */
695 	for (pid++; pid && l < *pos;
696 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
697 		;
698 	return (void *)pid;
699 }
700 
701 /**
702  * trace_pid_show - show the current pid in seq_file processing
703  * @m: The seq_file structure to write into
704  * @v: A void pointer of the pid (+1) value to display
705  *
706  * Can be directly used by seq_file operations to display the current
707  * pid value.
708  */
709 int trace_pid_show(struct seq_file *m, void *v)
710 {
711 	unsigned long pid = (unsigned long)v - 1;
712 
713 	seq_printf(m, "%lu\n", pid);
714 	return 0;
715 }
716 
717 /* 128 should be much more than enough */
718 #define PID_BUF_SIZE		127
719 
720 int trace_pid_write(struct trace_pid_list *filtered_pids,
721 		    struct trace_pid_list **new_pid_list,
722 		    const char __user *ubuf, size_t cnt)
723 {
724 	struct trace_pid_list *pid_list;
725 	struct trace_parser parser;
726 	unsigned long val;
727 	int nr_pids = 0;
728 	ssize_t read = 0;
729 	ssize_t ret;
730 	loff_t pos;
731 	pid_t pid;
732 
733 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
734 		return -ENOMEM;
735 
736 	/*
737 	 * Always recreate a new array. The write is an all or nothing
738 	 * operation. Always create a new array when adding new pids by
739 	 * the user. If the operation fails, then the current list is
740 	 * not modified.
741 	 */
742 	pid_list = trace_pid_list_alloc();
743 	if (!pid_list) {
744 		trace_parser_put(&parser);
745 		return -ENOMEM;
746 	}
747 
748 	if (filtered_pids) {
749 		/* copy the current bits to the new max */
750 		ret = trace_pid_list_first(filtered_pids, &pid);
751 		while (!ret) {
752 			trace_pid_list_set(pid_list, pid);
753 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
754 			nr_pids++;
755 		}
756 	}
757 
758 	ret = 0;
759 	while (cnt > 0) {
760 
761 		pos = 0;
762 
763 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
764 		if (ret < 0)
765 			break;
766 
767 		read += ret;
768 		ubuf += ret;
769 		cnt -= ret;
770 
771 		if (!trace_parser_loaded(&parser))
772 			break;
773 
774 		ret = -EINVAL;
775 		if (kstrtoul(parser.buffer, 0, &val))
776 			break;
777 
778 		pid = (pid_t)val;
779 
780 		if (trace_pid_list_set(pid_list, pid) < 0) {
781 			ret = -1;
782 			break;
783 		}
784 		nr_pids++;
785 
786 		trace_parser_clear(&parser);
787 		ret = 0;
788 	}
789 	trace_parser_put(&parser);
790 
791 	if (ret < 0) {
792 		trace_pid_list_free(pid_list);
793 		return ret;
794 	}
795 
796 	if (!nr_pids) {
797 		/* Cleared the list of pids */
798 		trace_pid_list_free(pid_list);
799 		pid_list = NULL;
800 	}
801 
802 	*new_pid_list = pid_list;
803 
804 	return read;
805 }
806 
807 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
808 {
809 	u64 ts;
810 
811 	/* Early boot up does not have a buffer yet */
812 	if (!buf->buffer)
813 		return trace_clock_local();
814 
815 	ts = ring_buffer_time_stamp(buf->buffer);
816 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
817 
818 	return ts;
819 }
820 
821 u64 ftrace_now(int cpu)
822 {
823 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
824 }
825 
826 /**
827  * tracing_is_enabled - Show if global_trace has been enabled
828  *
829  * Shows if the global trace has been enabled or not. It uses the
830  * mirror flag "buffer_disabled" to be used in fast paths such as for
831  * the irqsoff tracer. But it may be inaccurate due to races. If you
832  * need to know the accurate state, use tracing_is_on() which is a little
833  * slower, but accurate.
834  */
835 int tracing_is_enabled(void)
836 {
837 	/*
838 	 * For quick access (irqsoff uses this in fast path), just
839 	 * return the mirror variable of the state of the ring buffer.
840 	 * It's a little racy, but we don't really care.
841 	 */
842 	smp_rmb();
843 	return !global_trace.buffer_disabled;
844 }
845 
846 /*
847  * trace_buf_size is the size in bytes that is allocated
848  * for a buffer. Note, the number of bytes is always rounded
849  * to page size.
850  *
851  * This number is purposely set to a low number of 16384.
852  * If the dump on oops happens, it will be much appreciated
853  * to not have to wait for all that output. Anyway this can be
854  * boot time and run time configurable.
855  */
856 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
857 
858 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
859 
860 /* trace_types holds a link list of available tracers. */
861 static struct tracer		*trace_types __read_mostly;
862 
863 /*
864  * trace_types_lock is used to protect the trace_types list.
865  */
866 DEFINE_MUTEX(trace_types_lock);
867 
868 /*
869  * serialize the access of the ring buffer
870  *
871  * ring buffer serializes readers, but it is low level protection.
872  * The validity of the events (which returns by ring_buffer_peek() ..etc)
873  * are not protected by ring buffer.
874  *
875  * The content of events may become garbage if we allow other process consumes
876  * these events concurrently:
877  *   A) the page of the consumed events may become a normal page
878  *      (not reader page) in ring buffer, and this page will be rewritten
879  *      by events producer.
880  *   B) The page of the consumed events may become a page for splice_read,
881  *      and this page will be returned to system.
882  *
883  * These primitives allow multi process access to different cpu ring buffer
884  * concurrently.
885  *
886  * These primitives don't distinguish read-only and read-consume access.
887  * Multi read-only access are also serialized.
888  */
889 
890 #ifdef CONFIG_SMP
891 static DECLARE_RWSEM(all_cpu_access_lock);
892 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
893 
894 static inline void trace_access_lock(int cpu)
895 {
896 	if (cpu == RING_BUFFER_ALL_CPUS) {
897 		/* gain it for accessing the whole ring buffer. */
898 		down_write(&all_cpu_access_lock);
899 	} else {
900 		/* gain it for accessing a cpu ring buffer. */
901 
902 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
903 		down_read(&all_cpu_access_lock);
904 
905 		/* Secondly block other access to this @cpu ring buffer. */
906 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
907 	}
908 }
909 
910 static inline void trace_access_unlock(int cpu)
911 {
912 	if (cpu == RING_BUFFER_ALL_CPUS) {
913 		up_write(&all_cpu_access_lock);
914 	} else {
915 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
916 		up_read(&all_cpu_access_lock);
917 	}
918 }
919 
920 static inline void trace_access_lock_init(void)
921 {
922 	int cpu;
923 
924 	for_each_possible_cpu(cpu)
925 		mutex_init(&per_cpu(cpu_access_lock, cpu));
926 }
927 
928 #else
929 
930 static DEFINE_MUTEX(access_lock);
931 
932 static inline void trace_access_lock(int cpu)
933 {
934 	(void)cpu;
935 	mutex_lock(&access_lock);
936 }
937 
938 static inline void trace_access_unlock(int cpu)
939 {
940 	(void)cpu;
941 	mutex_unlock(&access_lock);
942 }
943 
944 static inline void trace_access_lock_init(void)
945 {
946 }
947 
948 #endif
949 
950 #ifdef CONFIG_STACKTRACE
951 static void __ftrace_trace_stack(struct trace_buffer *buffer,
952 				 unsigned int trace_ctx,
953 				 int skip, struct pt_regs *regs);
954 static inline void ftrace_trace_stack(struct trace_array *tr,
955 				      struct trace_buffer *buffer,
956 				      unsigned int trace_ctx,
957 				      int skip, struct pt_regs *regs);
958 
959 #else
960 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
961 					unsigned int trace_ctx,
962 					int skip, struct pt_regs *regs)
963 {
964 }
965 static inline void ftrace_trace_stack(struct trace_array *tr,
966 				      struct trace_buffer *buffer,
967 				      unsigned long trace_ctx,
968 				      int skip, struct pt_regs *regs)
969 {
970 }
971 
972 #endif
973 
974 static __always_inline void
975 trace_event_setup(struct ring_buffer_event *event,
976 		  int type, unsigned int trace_ctx)
977 {
978 	struct trace_entry *ent = ring_buffer_event_data(event);
979 
980 	tracing_generic_entry_update(ent, type, trace_ctx);
981 }
982 
983 static __always_inline struct ring_buffer_event *
984 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
985 			  int type,
986 			  unsigned long len,
987 			  unsigned int trace_ctx)
988 {
989 	struct ring_buffer_event *event;
990 
991 	event = ring_buffer_lock_reserve(buffer, len);
992 	if (event != NULL)
993 		trace_event_setup(event, type, trace_ctx);
994 
995 	return event;
996 }
997 
998 void tracer_tracing_on(struct trace_array *tr)
999 {
1000 	if (tr->array_buffer.buffer)
1001 		ring_buffer_record_on(tr->array_buffer.buffer);
1002 	/*
1003 	 * This flag is looked at when buffers haven't been allocated
1004 	 * yet, or by some tracers (like irqsoff), that just want to
1005 	 * know if the ring buffer has been disabled, but it can handle
1006 	 * races of where it gets disabled but we still do a record.
1007 	 * As the check is in the fast path of the tracers, it is more
1008 	 * important to be fast than accurate.
1009 	 */
1010 	tr->buffer_disabled = 0;
1011 	/* Make the flag seen by readers */
1012 	smp_wmb();
1013 }
1014 
1015 /**
1016  * tracing_on - enable tracing buffers
1017  *
1018  * This function enables tracing buffers that may have been
1019  * disabled with tracing_off.
1020  */
1021 void tracing_on(void)
1022 {
1023 	tracer_tracing_on(&global_trace);
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_on);
1026 
1027 
1028 static __always_inline void
1029 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1030 {
1031 	__this_cpu_write(trace_taskinfo_save, true);
1032 
1033 	/* If this is the temp buffer, we need to commit fully */
1034 	if (this_cpu_read(trace_buffered_event) == event) {
1035 		/* Length is in event->array[0] */
1036 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1037 		/* Release the temp buffer */
1038 		this_cpu_dec(trace_buffered_event_cnt);
1039 		/* ring_buffer_unlock_commit() enables preemption */
1040 		preempt_enable_notrace();
1041 	} else
1042 		ring_buffer_unlock_commit(buffer);
1043 }
1044 
1045 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1046 		       const char *str, int size)
1047 {
1048 	struct ring_buffer_event *event;
1049 	struct trace_buffer *buffer;
1050 	struct print_entry *entry;
1051 	unsigned int trace_ctx;
1052 	int alloc;
1053 
1054 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1055 		return 0;
1056 
1057 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1058 		return 0;
1059 
1060 	if (unlikely(tracing_disabled))
1061 		return 0;
1062 
1063 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1064 
1065 	trace_ctx = tracing_gen_ctx();
1066 	buffer = tr->array_buffer.buffer;
1067 	ring_buffer_nest_start(buffer);
1068 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1069 					    trace_ctx);
1070 	if (!event) {
1071 		size = 0;
1072 		goto out;
1073 	}
1074 
1075 	entry = ring_buffer_event_data(event);
1076 	entry->ip = ip;
1077 
1078 	memcpy(&entry->buf, str, size);
1079 
1080 	/* Add a newline if necessary */
1081 	if (entry->buf[size - 1] != '\n') {
1082 		entry->buf[size] = '\n';
1083 		entry->buf[size + 1] = '\0';
1084 	} else
1085 		entry->buf[size] = '\0';
1086 
1087 	__buffer_unlock_commit(buffer, event);
1088 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1089  out:
1090 	ring_buffer_nest_end(buffer);
1091 	return size;
1092 }
1093 EXPORT_SYMBOL_GPL(__trace_array_puts);
1094 
1095 /**
1096  * __trace_puts - write a constant string into the trace buffer.
1097  * @ip:	   The address of the caller
1098  * @str:   The constant string to write
1099  * @size:  The size of the string.
1100  */
1101 int __trace_puts(unsigned long ip, const char *str, int size)
1102 {
1103 	return __trace_array_puts(&global_trace, ip, str, size);
1104 }
1105 EXPORT_SYMBOL_GPL(__trace_puts);
1106 
1107 /**
1108  * __trace_bputs - write the pointer to a constant string into trace buffer
1109  * @ip:	   The address of the caller
1110  * @str:   The constant string to write to the buffer to
1111  */
1112 int __trace_bputs(unsigned long ip, const char *str)
1113 {
1114 	struct ring_buffer_event *event;
1115 	struct trace_buffer *buffer;
1116 	struct bputs_entry *entry;
1117 	unsigned int trace_ctx;
1118 	int size = sizeof(struct bputs_entry);
1119 	int ret = 0;
1120 
1121 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1122 		return 0;
1123 
1124 	if (unlikely(tracing_selftest_running || tracing_disabled))
1125 		return 0;
1126 
1127 	trace_ctx = tracing_gen_ctx();
1128 	buffer = global_trace.array_buffer.buffer;
1129 
1130 	ring_buffer_nest_start(buffer);
1131 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1132 					    trace_ctx);
1133 	if (!event)
1134 		goto out;
1135 
1136 	entry = ring_buffer_event_data(event);
1137 	entry->ip			= ip;
1138 	entry->str			= str;
1139 
1140 	__buffer_unlock_commit(buffer, event);
1141 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1142 
1143 	ret = 1;
1144  out:
1145 	ring_buffer_nest_end(buffer);
1146 	return ret;
1147 }
1148 EXPORT_SYMBOL_GPL(__trace_bputs);
1149 
1150 #ifdef CONFIG_TRACER_SNAPSHOT
1151 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1152 					   void *cond_data)
1153 {
1154 	struct tracer *tracer = tr->current_trace;
1155 	unsigned long flags;
1156 
1157 	if (in_nmi()) {
1158 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1159 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1160 		return;
1161 	}
1162 
1163 	if (!tr->allocated_snapshot) {
1164 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1165 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1166 		tracer_tracing_off(tr);
1167 		return;
1168 	}
1169 
1170 	/* Note, snapshot can not be used when the tracer uses it */
1171 	if (tracer->use_max_tr) {
1172 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1173 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1174 		return;
1175 	}
1176 
1177 	local_irq_save(flags);
1178 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1179 	local_irq_restore(flags);
1180 }
1181 
1182 void tracing_snapshot_instance(struct trace_array *tr)
1183 {
1184 	tracing_snapshot_instance_cond(tr, NULL);
1185 }
1186 
1187 /**
1188  * tracing_snapshot - take a snapshot of the current buffer.
1189  *
1190  * This causes a swap between the snapshot buffer and the current live
1191  * tracing buffer. You can use this to take snapshots of the live
1192  * trace when some condition is triggered, but continue to trace.
1193  *
1194  * Note, make sure to allocate the snapshot with either
1195  * a tracing_snapshot_alloc(), or by doing it manually
1196  * with: echo 1 > /sys/kernel/tracing/snapshot
1197  *
1198  * If the snapshot buffer is not allocated, it will stop tracing.
1199  * Basically making a permanent snapshot.
1200  */
1201 void tracing_snapshot(void)
1202 {
1203 	struct trace_array *tr = &global_trace;
1204 
1205 	tracing_snapshot_instance(tr);
1206 }
1207 EXPORT_SYMBOL_GPL(tracing_snapshot);
1208 
1209 /**
1210  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1211  * @tr:		The tracing instance to snapshot
1212  * @cond_data:	The data to be tested conditionally, and possibly saved
1213  *
1214  * This is the same as tracing_snapshot() except that the snapshot is
1215  * conditional - the snapshot will only happen if the
1216  * cond_snapshot.update() implementation receiving the cond_data
1217  * returns true, which means that the trace array's cond_snapshot
1218  * update() operation used the cond_data to determine whether the
1219  * snapshot should be taken, and if it was, presumably saved it along
1220  * with the snapshot.
1221  */
1222 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1223 {
1224 	tracing_snapshot_instance_cond(tr, cond_data);
1225 }
1226 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1227 
1228 /**
1229  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1230  * @tr:		The tracing instance
1231  *
1232  * When the user enables a conditional snapshot using
1233  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1234  * with the snapshot.  This accessor is used to retrieve it.
1235  *
1236  * Should not be called from cond_snapshot.update(), since it takes
1237  * the tr->max_lock lock, which the code calling
1238  * cond_snapshot.update() has already done.
1239  *
1240  * Returns the cond_data associated with the trace array's snapshot.
1241  */
1242 void *tracing_cond_snapshot_data(struct trace_array *tr)
1243 {
1244 	void *cond_data = NULL;
1245 
1246 	local_irq_disable();
1247 	arch_spin_lock(&tr->max_lock);
1248 
1249 	if (tr->cond_snapshot)
1250 		cond_data = tr->cond_snapshot->cond_data;
1251 
1252 	arch_spin_unlock(&tr->max_lock);
1253 	local_irq_enable();
1254 
1255 	return cond_data;
1256 }
1257 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1258 
1259 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1260 					struct array_buffer *size_buf, int cpu_id);
1261 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1262 
1263 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1264 {
1265 	int ret;
1266 
1267 	if (!tr->allocated_snapshot) {
1268 
1269 		/* allocate spare buffer */
1270 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1271 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1272 		if (ret < 0)
1273 			return ret;
1274 
1275 		tr->allocated_snapshot = true;
1276 	}
1277 
1278 	return 0;
1279 }
1280 
1281 static void free_snapshot(struct trace_array *tr)
1282 {
1283 	/*
1284 	 * We don't free the ring buffer. instead, resize it because
1285 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1286 	 * we want preserve it.
1287 	 */
1288 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1289 	set_buffer_entries(&tr->max_buffer, 1);
1290 	tracing_reset_online_cpus(&tr->max_buffer);
1291 	tr->allocated_snapshot = false;
1292 }
1293 
1294 /**
1295  * tracing_alloc_snapshot - allocate snapshot buffer.
1296  *
1297  * This only allocates the snapshot buffer if it isn't already
1298  * allocated - it doesn't also take a snapshot.
1299  *
1300  * This is meant to be used in cases where the snapshot buffer needs
1301  * to be set up for events that can't sleep but need to be able to
1302  * trigger a snapshot.
1303  */
1304 int tracing_alloc_snapshot(void)
1305 {
1306 	struct trace_array *tr = &global_trace;
1307 	int ret;
1308 
1309 	ret = tracing_alloc_snapshot_instance(tr);
1310 	WARN_ON(ret < 0);
1311 
1312 	return ret;
1313 }
1314 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1315 
1316 /**
1317  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1318  *
1319  * This is similar to tracing_snapshot(), but it will allocate the
1320  * snapshot buffer if it isn't already allocated. Use this only
1321  * where it is safe to sleep, as the allocation may sleep.
1322  *
1323  * This causes a swap between the snapshot buffer and the current live
1324  * tracing buffer. You can use this to take snapshots of the live
1325  * trace when some condition is triggered, but continue to trace.
1326  */
1327 void tracing_snapshot_alloc(void)
1328 {
1329 	int ret;
1330 
1331 	ret = tracing_alloc_snapshot();
1332 	if (ret < 0)
1333 		return;
1334 
1335 	tracing_snapshot();
1336 }
1337 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1338 
1339 /**
1340  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1341  * @tr:		The tracing instance
1342  * @cond_data:	User data to associate with the snapshot
1343  * @update:	Implementation of the cond_snapshot update function
1344  *
1345  * Check whether the conditional snapshot for the given instance has
1346  * already been enabled, or if the current tracer is already using a
1347  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1348  * save the cond_data and update function inside.
1349  *
1350  * Returns 0 if successful, error otherwise.
1351  */
1352 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1353 				 cond_update_fn_t update)
1354 {
1355 	struct cond_snapshot *cond_snapshot;
1356 	int ret = 0;
1357 
1358 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1359 	if (!cond_snapshot)
1360 		return -ENOMEM;
1361 
1362 	cond_snapshot->cond_data = cond_data;
1363 	cond_snapshot->update = update;
1364 
1365 	mutex_lock(&trace_types_lock);
1366 
1367 	ret = tracing_alloc_snapshot_instance(tr);
1368 	if (ret)
1369 		goto fail_unlock;
1370 
1371 	if (tr->current_trace->use_max_tr) {
1372 		ret = -EBUSY;
1373 		goto fail_unlock;
1374 	}
1375 
1376 	/*
1377 	 * The cond_snapshot can only change to NULL without the
1378 	 * trace_types_lock. We don't care if we race with it going
1379 	 * to NULL, but we want to make sure that it's not set to
1380 	 * something other than NULL when we get here, which we can
1381 	 * do safely with only holding the trace_types_lock and not
1382 	 * having to take the max_lock.
1383 	 */
1384 	if (tr->cond_snapshot) {
1385 		ret = -EBUSY;
1386 		goto fail_unlock;
1387 	}
1388 
1389 	local_irq_disable();
1390 	arch_spin_lock(&tr->max_lock);
1391 	tr->cond_snapshot = cond_snapshot;
1392 	arch_spin_unlock(&tr->max_lock);
1393 	local_irq_enable();
1394 
1395 	mutex_unlock(&trace_types_lock);
1396 
1397 	return ret;
1398 
1399  fail_unlock:
1400 	mutex_unlock(&trace_types_lock);
1401 	kfree(cond_snapshot);
1402 	return ret;
1403 }
1404 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1405 
1406 /**
1407  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1408  * @tr:		The tracing instance
1409  *
1410  * Check whether the conditional snapshot for the given instance is
1411  * enabled; if so, free the cond_snapshot associated with it,
1412  * otherwise return -EINVAL.
1413  *
1414  * Returns 0 if successful, error otherwise.
1415  */
1416 int tracing_snapshot_cond_disable(struct trace_array *tr)
1417 {
1418 	int ret = 0;
1419 
1420 	local_irq_disable();
1421 	arch_spin_lock(&tr->max_lock);
1422 
1423 	if (!tr->cond_snapshot)
1424 		ret = -EINVAL;
1425 	else {
1426 		kfree(tr->cond_snapshot);
1427 		tr->cond_snapshot = NULL;
1428 	}
1429 
1430 	arch_spin_unlock(&tr->max_lock);
1431 	local_irq_enable();
1432 
1433 	return ret;
1434 }
1435 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1436 #else
1437 void tracing_snapshot(void)
1438 {
1439 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1440 }
1441 EXPORT_SYMBOL_GPL(tracing_snapshot);
1442 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1443 {
1444 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1445 }
1446 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1447 int tracing_alloc_snapshot(void)
1448 {
1449 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1450 	return -ENODEV;
1451 }
1452 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1453 void tracing_snapshot_alloc(void)
1454 {
1455 	/* Give warning */
1456 	tracing_snapshot();
1457 }
1458 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1459 void *tracing_cond_snapshot_data(struct trace_array *tr)
1460 {
1461 	return NULL;
1462 }
1463 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1464 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1465 {
1466 	return -ENODEV;
1467 }
1468 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1469 int tracing_snapshot_cond_disable(struct trace_array *tr)
1470 {
1471 	return false;
1472 }
1473 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1474 #define free_snapshot(tr)	do { } while (0)
1475 #endif /* CONFIG_TRACER_SNAPSHOT */
1476 
1477 void tracer_tracing_off(struct trace_array *tr)
1478 {
1479 	if (tr->array_buffer.buffer)
1480 		ring_buffer_record_off(tr->array_buffer.buffer);
1481 	/*
1482 	 * This flag is looked at when buffers haven't been allocated
1483 	 * yet, or by some tracers (like irqsoff), that just want to
1484 	 * know if the ring buffer has been disabled, but it can handle
1485 	 * races of where it gets disabled but we still do a record.
1486 	 * As the check is in the fast path of the tracers, it is more
1487 	 * important to be fast than accurate.
1488 	 */
1489 	tr->buffer_disabled = 1;
1490 	/* Make the flag seen by readers */
1491 	smp_wmb();
1492 }
1493 
1494 /**
1495  * tracing_off - turn off tracing buffers
1496  *
1497  * This function stops the tracing buffers from recording data.
1498  * It does not disable any overhead the tracers themselves may
1499  * be causing. This function simply causes all recording to
1500  * the ring buffers to fail.
1501  */
1502 void tracing_off(void)
1503 {
1504 	tracer_tracing_off(&global_trace);
1505 }
1506 EXPORT_SYMBOL_GPL(tracing_off);
1507 
1508 void disable_trace_on_warning(void)
1509 {
1510 	if (__disable_trace_on_warning) {
1511 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1512 			"Disabling tracing due to warning\n");
1513 		tracing_off();
1514 	}
1515 }
1516 
1517 /**
1518  * tracer_tracing_is_on - show real state of ring buffer enabled
1519  * @tr : the trace array to know if ring buffer is enabled
1520  *
1521  * Shows real state of the ring buffer if it is enabled or not.
1522  */
1523 bool tracer_tracing_is_on(struct trace_array *tr)
1524 {
1525 	if (tr->array_buffer.buffer)
1526 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1527 	return !tr->buffer_disabled;
1528 }
1529 
1530 /**
1531  * tracing_is_on - show state of ring buffers enabled
1532  */
1533 int tracing_is_on(void)
1534 {
1535 	return tracer_tracing_is_on(&global_trace);
1536 }
1537 EXPORT_SYMBOL_GPL(tracing_is_on);
1538 
1539 static int __init set_buf_size(char *str)
1540 {
1541 	unsigned long buf_size;
1542 
1543 	if (!str)
1544 		return 0;
1545 	buf_size = memparse(str, &str);
1546 	/*
1547 	 * nr_entries can not be zero and the startup
1548 	 * tests require some buffer space. Therefore
1549 	 * ensure we have at least 4096 bytes of buffer.
1550 	 */
1551 	trace_buf_size = max(4096UL, buf_size);
1552 	return 1;
1553 }
1554 __setup("trace_buf_size=", set_buf_size);
1555 
1556 static int __init set_tracing_thresh(char *str)
1557 {
1558 	unsigned long threshold;
1559 	int ret;
1560 
1561 	if (!str)
1562 		return 0;
1563 	ret = kstrtoul(str, 0, &threshold);
1564 	if (ret < 0)
1565 		return 0;
1566 	tracing_thresh = threshold * 1000;
1567 	return 1;
1568 }
1569 __setup("tracing_thresh=", set_tracing_thresh);
1570 
1571 unsigned long nsecs_to_usecs(unsigned long nsecs)
1572 {
1573 	return nsecs / 1000;
1574 }
1575 
1576 /*
1577  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1578  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1579  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1580  * of strings in the order that the evals (enum) were defined.
1581  */
1582 #undef C
1583 #define C(a, b) b
1584 
1585 /* These must match the bit positions in trace_iterator_flags */
1586 static const char *trace_options[] = {
1587 	TRACE_FLAGS
1588 	NULL
1589 };
1590 
1591 static struct {
1592 	u64 (*func)(void);
1593 	const char *name;
1594 	int in_ns;		/* is this clock in nanoseconds? */
1595 } trace_clocks[] = {
1596 	{ trace_clock_local,		"local",	1 },
1597 	{ trace_clock_global,		"global",	1 },
1598 	{ trace_clock_counter,		"counter",	0 },
1599 	{ trace_clock_jiffies,		"uptime",	0 },
1600 	{ trace_clock,			"perf",		1 },
1601 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1602 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1603 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1604 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1605 	ARCH_TRACE_CLOCKS
1606 };
1607 
1608 bool trace_clock_in_ns(struct trace_array *tr)
1609 {
1610 	if (trace_clocks[tr->clock_id].in_ns)
1611 		return true;
1612 
1613 	return false;
1614 }
1615 
1616 /*
1617  * trace_parser_get_init - gets the buffer for trace parser
1618  */
1619 int trace_parser_get_init(struct trace_parser *parser, int size)
1620 {
1621 	memset(parser, 0, sizeof(*parser));
1622 
1623 	parser->buffer = kmalloc(size, GFP_KERNEL);
1624 	if (!parser->buffer)
1625 		return 1;
1626 
1627 	parser->size = size;
1628 	return 0;
1629 }
1630 
1631 /*
1632  * trace_parser_put - frees the buffer for trace parser
1633  */
1634 void trace_parser_put(struct trace_parser *parser)
1635 {
1636 	kfree(parser->buffer);
1637 	parser->buffer = NULL;
1638 }
1639 
1640 /*
1641  * trace_get_user - reads the user input string separated by  space
1642  * (matched by isspace(ch))
1643  *
1644  * For each string found the 'struct trace_parser' is updated,
1645  * and the function returns.
1646  *
1647  * Returns number of bytes read.
1648  *
1649  * See kernel/trace/trace.h for 'struct trace_parser' details.
1650  */
1651 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1652 	size_t cnt, loff_t *ppos)
1653 {
1654 	char ch;
1655 	size_t read = 0;
1656 	ssize_t ret;
1657 
1658 	if (!*ppos)
1659 		trace_parser_clear(parser);
1660 
1661 	ret = get_user(ch, ubuf++);
1662 	if (ret)
1663 		goto out;
1664 
1665 	read++;
1666 	cnt--;
1667 
1668 	/*
1669 	 * The parser is not finished with the last write,
1670 	 * continue reading the user input without skipping spaces.
1671 	 */
1672 	if (!parser->cont) {
1673 		/* skip white space */
1674 		while (cnt && isspace(ch)) {
1675 			ret = get_user(ch, ubuf++);
1676 			if (ret)
1677 				goto out;
1678 			read++;
1679 			cnt--;
1680 		}
1681 
1682 		parser->idx = 0;
1683 
1684 		/* only spaces were written */
1685 		if (isspace(ch) || !ch) {
1686 			*ppos += read;
1687 			ret = read;
1688 			goto out;
1689 		}
1690 	}
1691 
1692 	/* read the non-space input */
1693 	while (cnt && !isspace(ch) && ch) {
1694 		if (parser->idx < parser->size - 1)
1695 			parser->buffer[parser->idx++] = ch;
1696 		else {
1697 			ret = -EINVAL;
1698 			goto out;
1699 		}
1700 		ret = get_user(ch, ubuf++);
1701 		if (ret)
1702 			goto out;
1703 		read++;
1704 		cnt--;
1705 	}
1706 
1707 	/* We either got finished input or we have to wait for another call. */
1708 	if (isspace(ch) || !ch) {
1709 		parser->buffer[parser->idx] = 0;
1710 		parser->cont = false;
1711 	} else if (parser->idx < parser->size - 1) {
1712 		parser->cont = true;
1713 		parser->buffer[parser->idx++] = ch;
1714 		/* Make sure the parsed string always terminates with '\0'. */
1715 		parser->buffer[parser->idx] = 0;
1716 	} else {
1717 		ret = -EINVAL;
1718 		goto out;
1719 	}
1720 
1721 	*ppos += read;
1722 	ret = read;
1723 
1724 out:
1725 	return ret;
1726 }
1727 
1728 /* TODO add a seq_buf_to_buffer() */
1729 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1730 {
1731 	int len;
1732 
1733 	if (trace_seq_used(s) <= s->seq.readpos)
1734 		return -EBUSY;
1735 
1736 	len = trace_seq_used(s) - s->seq.readpos;
1737 	if (cnt > len)
1738 		cnt = len;
1739 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1740 
1741 	s->seq.readpos += cnt;
1742 	return cnt;
1743 }
1744 
1745 unsigned long __read_mostly	tracing_thresh;
1746 
1747 #ifdef CONFIG_TRACER_MAX_TRACE
1748 static const struct file_operations tracing_max_lat_fops;
1749 
1750 #ifdef LATENCY_FS_NOTIFY
1751 
1752 static struct workqueue_struct *fsnotify_wq;
1753 
1754 static void latency_fsnotify_workfn(struct work_struct *work)
1755 {
1756 	struct trace_array *tr = container_of(work, struct trace_array,
1757 					      fsnotify_work);
1758 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1759 }
1760 
1761 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1762 {
1763 	struct trace_array *tr = container_of(iwork, struct trace_array,
1764 					      fsnotify_irqwork);
1765 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1766 }
1767 
1768 static void trace_create_maxlat_file(struct trace_array *tr,
1769 				     struct dentry *d_tracer)
1770 {
1771 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1772 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1773 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1774 					      TRACE_MODE_WRITE,
1775 					      d_tracer, tr,
1776 					      &tracing_max_lat_fops);
1777 }
1778 
1779 __init static int latency_fsnotify_init(void)
1780 {
1781 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1782 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1783 	if (!fsnotify_wq) {
1784 		pr_err("Unable to allocate tr_max_lat_wq\n");
1785 		return -ENOMEM;
1786 	}
1787 	return 0;
1788 }
1789 
1790 late_initcall_sync(latency_fsnotify_init);
1791 
1792 void latency_fsnotify(struct trace_array *tr)
1793 {
1794 	if (!fsnotify_wq)
1795 		return;
1796 	/*
1797 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1798 	 * possible that we are called from __schedule() or do_idle(), which
1799 	 * could cause a deadlock.
1800 	 */
1801 	irq_work_queue(&tr->fsnotify_irqwork);
1802 }
1803 
1804 #else /* !LATENCY_FS_NOTIFY */
1805 
1806 #define trace_create_maxlat_file(tr, d_tracer)				\
1807 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1808 			  d_tracer, tr, &tracing_max_lat_fops)
1809 
1810 #endif
1811 
1812 /*
1813  * Copy the new maximum trace into the separate maximum-trace
1814  * structure. (this way the maximum trace is permanently saved,
1815  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1816  */
1817 static void
1818 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1819 {
1820 	struct array_buffer *trace_buf = &tr->array_buffer;
1821 	struct array_buffer *max_buf = &tr->max_buffer;
1822 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1823 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1824 
1825 	max_buf->cpu = cpu;
1826 	max_buf->time_start = data->preempt_timestamp;
1827 
1828 	max_data->saved_latency = tr->max_latency;
1829 	max_data->critical_start = data->critical_start;
1830 	max_data->critical_end = data->critical_end;
1831 
1832 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1833 	max_data->pid = tsk->pid;
1834 	/*
1835 	 * If tsk == current, then use current_uid(), as that does not use
1836 	 * RCU. The irq tracer can be called out of RCU scope.
1837 	 */
1838 	if (tsk == current)
1839 		max_data->uid = current_uid();
1840 	else
1841 		max_data->uid = task_uid(tsk);
1842 
1843 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1844 	max_data->policy = tsk->policy;
1845 	max_data->rt_priority = tsk->rt_priority;
1846 
1847 	/* record this tasks comm */
1848 	tracing_record_cmdline(tsk);
1849 	latency_fsnotify(tr);
1850 }
1851 
1852 /**
1853  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1854  * @tr: tracer
1855  * @tsk: the task with the latency
1856  * @cpu: The cpu that initiated the trace.
1857  * @cond_data: User data associated with a conditional snapshot
1858  *
1859  * Flip the buffers between the @tr and the max_tr and record information
1860  * about which task was the cause of this latency.
1861  */
1862 void
1863 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1864 	      void *cond_data)
1865 {
1866 	if (tr->stop_count)
1867 		return;
1868 
1869 	WARN_ON_ONCE(!irqs_disabled());
1870 
1871 	if (!tr->allocated_snapshot) {
1872 		/* Only the nop tracer should hit this when disabling */
1873 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1874 		return;
1875 	}
1876 
1877 	arch_spin_lock(&tr->max_lock);
1878 
1879 	/* Inherit the recordable setting from array_buffer */
1880 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1881 		ring_buffer_record_on(tr->max_buffer.buffer);
1882 	else
1883 		ring_buffer_record_off(tr->max_buffer.buffer);
1884 
1885 #ifdef CONFIG_TRACER_SNAPSHOT
1886 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1887 		arch_spin_unlock(&tr->max_lock);
1888 		return;
1889 	}
1890 #endif
1891 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1892 
1893 	__update_max_tr(tr, tsk, cpu);
1894 
1895 	arch_spin_unlock(&tr->max_lock);
1896 
1897 	/* Any waiters on the old snapshot buffer need to wake up */
1898 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1899 }
1900 
1901 /**
1902  * update_max_tr_single - only copy one trace over, and reset the rest
1903  * @tr: tracer
1904  * @tsk: task with the latency
1905  * @cpu: the cpu of the buffer to copy.
1906  *
1907  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1908  */
1909 void
1910 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1911 {
1912 	int ret;
1913 
1914 	if (tr->stop_count)
1915 		return;
1916 
1917 	WARN_ON_ONCE(!irqs_disabled());
1918 	if (!tr->allocated_snapshot) {
1919 		/* Only the nop tracer should hit this when disabling */
1920 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1921 		return;
1922 	}
1923 
1924 	arch_spin_lock(&tr->max_lock);
1925 
1926 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1927 
1928 	if (ret == -EBUSY) {
1929 		/*
1930 		 * We failed to swap the buffer due to a commit taking
1931 		 * place on this CPU. We fail to record, but we reset
1932 		 * the max trace buffer (no one writes directly to it)
1933 		 * and flag that it failed.
1934 		 * Another reason is resize is in progress.
1935 		 */
1936 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1937 			"Failed to swap buffers due to commit or resize in progress\n");
1938 	}
1939 
1940 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1941 
1942 	__update_max_tr(tr, tsk, cpu);
1943 	arch_spin_unlock(&tr->max_lock);
1944 }
1945 
1946 #endif /* CONFIG_TRACER_MAX_TRACE */
1947 
1948 static int wait_on_pipe(struct trace_iterator *iter, int full)
1949 {
1950 	int ret;
1951 
1952 	/* Iterators are static, they should be filled or empty */
1953 	if (trace_buffer_iter(iter, iter->cpu_file))
1954 		return 0;
1955 
1956 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
1957 
1958 #ifdef CONFIG_TRACER_MAX_TRACE
1959 	/*
1960 	 * Make sure this is still the snapshot buffer, as if a snapshot were
1961 	 * to happen, this would now be the main buffer.
1962 	 */
1963 	if (iter->snapshot)
1964 		iter->array_buffer = &iter->tr->max_buffer;
1965 #endif
1966 	return ret;
1967 }
1968 
1969 #ifdef CONFIG_FTRACE_STARTUP_TEST
1970 static bool selftests_can_run;
1971 
1972 struct trace_selftests {
1973 	struct list_head		list;
1974 	struct tracer			*type;
1975 };
1976 
1977 static LIST_HEAD(postponed_selftests);
1978 
1979 static int save_selftest(struct tracer *type)
1980 {
1981 	struct trace_selftests *selftest;
1982 
1983 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1984 	if (!selftest)
1985 		return -ENOMEM;
1986 
1987 	selftest->type = type;
1988 	list_add(&selftest->list, &postponed_selftests);
1989 	return 0;
1990 }
1991 
1992 static int run_tracer_selftest(struct tracer *type)
1993 {
1994 	struct trace_array *tr = &global_trace;
1995 	struct tracer *saved_tracer = tr->current_trace;
1996 	int ret;
1997 
1998 	if (!type->selftest || tracing_selftest_disabled)
1999 		return 0;
2000 
2001 	/*
2002 	 * If a tracer registers early in boot up (before scheduling is
2003 	 * initialized and such), then do not run its selftests yet.
2004 	 * Instead, run it a little later in the boot process.
2005 	 */
2006 	if (!selftests_can_run)
2007 		return save_selftest(type);
2008 
2009 	if (!tracing_is_on()) {
2010 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2011 			type->name);
2012 		return 0;
2013 	}
2014 
2015 	/*
2016 	 * Run a selftest on this tracer.
2017 	 * Here we reset the trace buffer, and set the current
2018 	 * tracer to be this tracer. The tracer can then run some
2019 	 * internal tracing to verify that everything is in order.
2020 	 * If we fail, we do not register this tracer.
2021 	 */
2022 	tracing_reset_online_cpus(&tr->array_buffer);
2023 
2024 	tr->current_trace = type;
2025 
2026 #ifdef CONFIG_TRACER_MAX_TRACE
2027 	if (type->use_max_tr) {
2028 		/* If we expanded the buffers, make sure the max is expanded too */
2029 		if (ring_buffer_expanded)
2030 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2031 					   RING_BUFFER_ALL_CPUS);
2032 		tr->allocated_snapshot = true;
2033 	}
2034 #endif
2035 
2036 	/* the test is responsible for initializing and enabling */
2037 	pr_info("Testing tracer %s: ", type->name);
2038 	ret = type->selftest(type, tr);
2039 	/* the test is responsible for resetting too */
2040 	tr->current_trace = saved_tracer;
2041 	if (ret) {
2042 		printk(KERN_CONT "FAILED!\n");
2043 		/* Add the warning after printing 'FAILED' */
2044 		WARN_ON(1);
2045 		return -1;
2046 	}
2047 	/* Only reset on passing, to avoid touching corrupted buffers */
2048 	tracing_reset_online_cpus(&tr->array_buffer);
2049 
2050 #ifdef CONFIG_TRACER_MAX_TRACE
2051 	if (type->use_max_tr) {
2052 		tr->allocated_snapshot = false;
2053 
2054 		/* Shrink the max buffer again */
2055 		if (ring_buffer_expanded)
2056 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2057 					   RING_BUFFER_ALL_CPUS);
2058 	}
2059 #endif
2060 
2061 	printk(KERN_CONT "PASSED\n");
2062 	return 0;
2063 }
2064 
2065 static int do_run_tracer_selftest(struct tracer *type)
2066 {
2067 	int ret;
2068 
2069 	/*
2070 	 * Tests can take a long time, especially if they are run one after the
2071 	 * other, as does happen during bootup when all the tracers are
2072 	 * registered. This could cause the soft lockup watchdog to trigger.
2073 	 */
2074 	cond_resched();
2075 
2076 	tracing_selftest_running = true;
2077 	ret = run_tracer_selftest(type);
2078 	tracing_selftest_running = false;
2079 
2080 	return ret;
2081 }
2082 
2083 static __init int init_trace_selftests(void)
2084 {
2085 	struct trace_selftests *p, *n;
2086 	struct tracer *t, **last;
2087 	int ret;
2088 
2089 	selftests_can_run = true;
2090 
2091 	mutex_lock(&trace_types_lock);
2092 
2093 	if (list_empty(&postponed_selftests))
2094 		goto out;
2095 
2096 	pr_info("Running postponed tracer tests:\n");
2097 
2098 	tracing_selftest_running = true;
2099 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2100 		/* This loop can take minutes when sanitizers are enabled, so
2101 		 * lets make sure we allow RCU processing.
2102 		 */
2103 		cond_resched();
2104 		ret = run_tracer_selftest(p->type);
2105 		/* If the test fails, then warn and remove from available_tracers */
2106 		if (ret < 0) {
2107 			WARN(1, "tracer: %s failed selftest, disabling\n",
2108 			     p->type->name);
2109 			last = &trace_types;
2110 			for (t = trace_types; t; t = t->next) {
2111 				if (t == p->type) {
2112 					*last = t->next;
2113 					break;
2114 				}
2115 				last = &t->next;
2116 			}
2117 		}
2118 		list_del(&p->list);
2119 		kfree(p);
2120 	}
2121 	tracing_selftest_running = false;
2122 
2123  out:
2124 	mutex_unlock(&trace_types_lock);
2125 
2126 	return 0;
2127 }
2128 core_initcall(init_trace_selftests);
2129 #else
2130 static inline int run_tracer_selftest(struct tracer *type)
2131 {
2132 	return 0;
2133 }
2134 static inline int do_run_tracer_selftest(struct tracer *type)
2135 {
2136 	return 0;
2137 }
2138 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2139 
2140 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2141 
2142 static void __init apply_trace_boot_options(void);
2143 
2144 /**
2145  * register_tracer - register a tracer with the ftrace system.
2146  * @type: the plugin for the tracer
2147  *
2148  * Register a new plugin tracer.
2149  */
2150 int __init register_tracer(struct tracer *type)
2151 {
2152 	struct tracer *t;
2153 	int ret = 0;
2154 
2155 	if (!type->name) {
2156 		pr_info("Tracer must have a name\n");
2157 		return -1;
2158 	}
2159 
2160 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2161 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2162 		return -1;
2163 	}
2164 
2165 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2166 		pr_warn("Can not register tracer %s due to lockdown\n",
2167 			   type->name);
2168 		return -EPERM;
2169 	}
2170 
2171 	mutex_lock(&trace_types_lock);
2172 
2173 	for (t = trace_types; t; t = t->next) {
2174 		if (strcmp(type->name, t->name) == 0) {
2175 			/* already found */
2176 			pr_info("Tracer %s already registered\n",
2177 				type->name);
2178 			ret = -1;
2179 			goto out;
2180 		}
2181 	}
2182 
2183 	if (!type->set_flag)
2184 		type->set_flag = &dummy_set_flag;
2185 	if (!type->flags) {
2186 		/*allocate a dummy tracer_flags*/
2187 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2188 		if (!type->flags) {
2189 			ret = -ENOMEM;
2190 			goto out;
2191 		}
2192 		type->flags->val = 0;
2193 		type->flags->opts = dummy_tracer_opt;
2194 	} else
2195 		if (!type->flags->opts)
2196 			type->flags->opts = dummy_tracer_opt;
2197 
2198 	/* store the tracer for __set_tracer_option */
2199 	type->flags->trace = type;
2200 
2201 	ret = do_run_tracer_selftest(type);
2202 	if (ret < 0)
2203 		goto out;
2204 
2205 	type->next = trace_types;
2206 	trace_types = type;
2207 	add_tracer_options(&global_trace, type);
2208 
2209  out:
2210 	mutex_unlock(&trace_types_lock);
2211 
2212 	if (ret || !default_bootup_tracer)
2213 		goto out_unlock;
2214 
2215 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2216 		goto out_unlock;
2217 
2218 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2219 	/* Do we want this tracer to start on bootup? */
2220 	tracing_set_tracer(&global_trace, type->name);
2221 	default_bootup_tracer = NULL;
2222 
2223 	apply_trace_boot_options();
2224 
2225 	/* disable other selftests, since this will break it. */
2226 	disable_tracing_selftest("running a tracer");
2227 
2228  out_unlock:
2229 	return ret;
2230 }
2231 
2232 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2233 {
2234 	struct trace_buffer *buffer = buf->buffer;
2235 
2236 	if (!buffer)
2237 		return;
2238 
2239 	ring_buffer_record_disable(buffer);
2240 
2241 	/* Make sure all commits have finished */
2242 	synchronize_rcu();
2243 	ring_buffer_reset_cpu(buffer, cpu);
2244 
2245 	ring_buffer_record_enable(buffer);
2246 }
2247 
2248 void tracing_reset_online_cpus(struct array_buffer *buf)
2249 {
2250 	struct trace_buffer *buffer = buf->buffer;
2251 
2252 	if (!buffer)
2253 		return;
2254 
2255 	ring_buffer_record_disable(buffer);
2256 
2257 	/* Make sure all commits have finished */
2258 	synchronize_rcu();
2259 
2260 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2261 
2262 	ring_buffer_reset_online_cpus(buffer);
2263 
2264 	ring_buffer_record_enable(buffer);
2265 }
2266 
2267 /* Must have trace_types_lock held */
2268 void tracing_reset_all_online_cpus_unlocked(void)
2269 {
2270 	struct trace_array *tr;
2271 
2272 	lockdep_assert_held(&trace_types_lock);
2273 
2274 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2275 		if (!tr->clear_trace)
2276 			continue;
2277 		tr->clear_trace = false;
2278 		tracing_reset_online_cpus(&tr->array_buffer);
2279 #ifdef CONFIG_TRACER_MAX_TRACE
2280 		tracing_reset_online_cpus(&tr->max_buffer);
2281 #endif
2282 	}
2283 }
2284 
2285 void tracing_reset_all_online_cpus(void)
2286 {
2287 	mutex_lock(&trace_types_lock);
2288 	tracing_reset_all_online_cpus_unlocked();
2289 	mutex_unlock(&trace_types_lock);
2290 }
2291 
2292 /*
2293  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2294  * is the tgid last observed corresponding to pid=i.
2295  */
2296 static int *tgid_map;
2297 
2298 /* The maximum valid index into tgid_map. */
2299 static size_t tgid_map_max;
2300 
2301 #define SAVED_CMDLINES_DEFAULT 128
2302 #define NO_CMDLINE_MAP UINT_MAX
2303 /*
2304  * Preemption must be disabled before acquiring trace_cmdline_lock.
2305  * The various trace_arrays' max_lock must be acquired in a context
2306  * where interrupt is disabled.
2307  */
2308 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2309 struct saved_cmdlines_buffer {
2310 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2311 	unsigned *map_cmdline_to_pid;
2312 	unsigned cmdline_num;
2313 	int cmdline_idx;
2314 	char *saved_cmdlines;
2315 };
2316 static struct saved_cmdlines_buffer *savedcmd;
2317 
2318 static inline char *get_saved_cmdlines(int idx)
2319 {
2320 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2321 }
2322 
2323 static inline void set_cmdline(int idx, const char *cmdline)
2324 {
2325 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2326 }
2327 
2328 static int allocate_cmdlines_buffer(unsigned int val,
2329 				    struct saved_cmdlines_buffer *s)
2330 {
2331 	s->map_cmdline_to_pid = kmalloc_array(val,
2332 					      sizeof(*s->map_cmdline_to_pid),
2333 					      GFP_KERNEL);
2334 	if (!s->map_cmdline_to_pid)
2335 		return -ENOMEM;
2336 
2337 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2338 	if (!s->saved_cmdlines) {
2339 		kfree(s->map_cmdline_to_pid);
2340 		return -ENOMEM;
2341 	}
2342 
2343 	s->cmdline_idx = 0;
2344 	s->cmdline_num = val;
2345 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2346 	       sizeof(s->map_pid_to_cmdline));
2347 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2348 	       val * sizeof(*s->map_cmdline_to_pid));
2349 
2350 	return 0;
2351 }
2352 
2353 static int trace_create_savedcmd(void)
2354 {
2355 	int ret;
2356 
2357 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2358 	if (!savedcmd)
2359 		return -ENOMEM;
2360 
2361 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2362 	if (ret < 0) {
2363 		kfree(savedcmd);
2364 		savedcmd = NULL;
2365 		return -ENOMEM;
2366 	}
2367 
2368 	return 0;
2369 }
2370 
2371 int is_tracing_stopped(void)
2372 {
2373 	return global_trace.stop_count;
2374 }
2375 
2376 static void tracing_start_tr(struct trace_array *tr)
2377 {
2378 	struct trace_buffer *buffer;
2379 	unsigned long flags;
2380 
2381 	if (tracing_disabled)
2382 		return;
2383 
2384 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2385 	if (--tr->stop_count) {
2386 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2387 			/* Someone screwed up their debugging */
2388 			tr->stop_count = 0;
2389 		}
2390 		goto out;
2391 	}
2392 
2393 	/* Prevent the buffers from switching */
2394 	arch_spin_lock(&tr->max_lock);
2395 
2396 	buffer = tr->array_buffer.buffer;
2397 	if (buffer)
2398 		ring_buffer_record_enable(buffer);
2399 
2400 #ifdef CONFIG_TRACER_MAX_TRACE
2401 	buffer = tr->max_buffer.buffer;
2402 	if (buffer)
2403 		ring_buffer_record_enable(buffer);
2404 #endif
2405 
2406 	arch_spin_unlock(&tr->max_lock);
2407 
2408  out:
2409 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2410 }
2411 
2412 /**
2413  * tracing_start - quick start of the tracer
2414  *
2415  * If tracing is enabled but was stopped by tracing_stop,
2416  * this will start the tracer back up.
2417  */
2418 void tracing_start(void)
2419 
2420 {
2421 	return tracing_start_tr(&global_trace);
2422 }
2423 
2424 static void tracing_stop_tr(struct trace_array *tr)
2425 {
2426 	struct trace_buffer *buffer;
2427 	unsigned long flags;
2428 
2429 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2430 	if (tr->stop_count++)
2431 		goto out;
2432 
2433 	/* Prevent the buffers from switching */
2434 	arch_spin_lock(&tr->max_lock);
2435 
2436 	buffer = tr->array_buffer.buffer;
2437 	if (buffer)
2438 		ring_buffer_record_disable(buffer);
2439 
2440 #ifdef CONFIG_TRACER_MAX_TRACE
2441 	buffer = tr->max_buffer.buffer;
2442 	if (buffer)
2443 		ring_buffer_record_disable(buffer);
2444 #endif
2445 
2446 	arch_spin_unlock(&tr->max_lock);
2447 
2448  out:
2449 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2450 }
2451 
2452 /**
2453  * tracing_stop - quick stop of the tracer
2454  *
2455  * Light weight way to stop tracing. Use in conjunction with
2456  * tracing_start.
2457  */
2458 void tracing_stop(void)
2459 {
2460 	return tracing_stop_tr(&global_trace);
2461 }
2462 
2463 static int trace_save_cmdline(struct task_struct *tsk)
2464 {
2465 	unsigned tpid, idx;
2466 
2467 	/* treat recording of idle task as a success */
2468 	if (!tsk->pid)
2469 		return 1;
2470 
2471 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2472 
2473 	/*
2474 	 * It's not the end of the world if we don't get
2475 	 * the lock, but we also don't want to spin
2476 	 * nor do we want to disable interrupts,
2477 	 * so if we miss here, then better luck next time.
2478 	 *
2479 	 * This is called within the scheduler and wake up, so interrupts
2480 	 * had better been disabled and run queue lock been held.
2481 	 */
2482 	lockdep_assert_preemption_disabled();
2483 	if (!arch_spin_trylock(&trace_cmdline_lock))
2484 		return 0;
2485 
2486 	idx = savedcmd->map_pid_to_cmdline[tpid];
2487 	if (idx == NO_CMDLINE_MAP) {
2488 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2489 
2490 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2491 		savedcmd->cmdline_idx = idx;
2492 	}
2493 
2494 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2495 	set_cmdline(idx, tsk->comm);
2496 
2497 	arch_spin_unlock(&trace_cmdline_lock);
2498 
2499 	return 1;
2500 }
2501 
2502 static void __trace_find_cmdline(int pid, char comm[])
2503 {
2504 	unsigned map;
2505 	int tpid;
2506 
2507 	if (!pid) {
2508 		strcpy(comm, "<idle>");
2509 		return;
2510 	}
2511 
2512 	if (WARN_ON_ONCE(pid < 0)) {
2513 		strcpy(comm, "<XXX>");
2514 		return;
2515 	}
2516 
2517 	tpid = pid & (PID_MAX_DEFAULT - 1);
2518 	map = savedcmd->map_pid_to_cmdline[tpid];
2519 	if (map != NO_CMDLINE_MAP) {
2520 		tpid = savedcmd->map_cmdline_to_pid[map];
2521 		if (tpid == pid) {
2522 			strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2523 			return;
2524 		}
2525 	}
2526 	strcpy(comm, "<...>");
2527 }
2528 
2529 void trace_find_cmdline(int pid, char comm[])
2530 {
2531 	preempt_disable();
2532 	arch_spin_lock(&trace_cmdline_lock);
2533 
2534 	__trace_find_cmdline(pid, comm);
2535 
2536 	arch_spin_unlock(&trace_cmdline_lock);
2537 	preempt_enable();
2538 }
2539 
2540 static int *trace_find_tgid_ptr(int pid)
2541 {
2542 	/*
2543 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2544 	 * if we observe a non-NULL tgid_map then we also observe the correct
2545 	 * tgid_map_max.
2546 	 */
2547 	int *map = smp_load_acquire(&tgid_map);
2548 
2549 	if (unlikely(!map || pid > tgid_map_max))
2550 		return NULL;
2551 
2552 	return &map[pid];
2553 }
2554 
2555 int trace_find_tgid(int pid)
2556 {
2557 	int *ptr = trace_find_tgid_ptr(pid);
2558 
2559 	return ptr ? *ptr : 0;
2560 }
2561 
2562 static int trace_save_tgid(struct task_struct *tsk)
2563 {
2564 	int *ptr;
2565 
2566 	/* treat recording of idle task as a success */
2567 	if (!tsk->pid)
2568 		return 1;
2569 
2570 	ptr = trace_find_tgid_ptr(tsk->pid);
2571 	if (!ptr)
2572 		return 0;
2573 
2574 	*ptr = tsk->tgid;
2575 	return 1;
2576 }
2577 
2578 static bool tracing_record_taskinfo_skip(int flags)
2579 {
2580 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2581 		return true;
2582 	if (!__this_cpu_read(trace_taskinfo_save))
2583 		return true;
2584 	return false;
2585 }
2586 
2587 /**
2588  * tracing_record_taskinfo - record the task info of a task
2589  *
2590  * @task:  task to record
2591  * @flags: TRACE_RECORD_CMDLINE for recording comm
2592  *         TRACE_RECORD_TGID for recording tgid
2593  */
2594 void tracing_record_taskinfo(struct task_struct *task, int flags)
2595 {
2596 	bool done;
2597 
2598 	if (tracing_record_taskinfo_skip(flags))
2599 		return;
2600 
2601 	/*
2602 	 * Record as much task information as possible. If some fail, continue
2603 	 * to try to record the others.
2604 	 */
2605 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2606 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2607 
2608 	/* If recording any information failed, retry again soon. */
2609 	if (!done)
2610 		return;
2611 
2612 	__this_cpu_write(trace_taskinfo_save, false);
2613 }
2614 
2615 /**
2616  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2617  *
2618  * @prev: previous task during sched_switch
2619  * @next: next task during sched_switch
2620  * @flags: TRACE_RECORD_CMDLINE for recording comm
2621  *         TRACE_RECORD_TGID for recording tgid
2622  */
2623 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2624 					  struct task_struct *next, int flags)
2625 {
2626 	bool done;
2627 
2628 	if (tracing_record_taskinfo_skip(flags))
2629 		return;
2630 
2631 	/*
2632 	 * Record as much task information as possible. If some fail, continue
2633 	 * to try to record the others.
2634 	 */
2635 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2636 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2637 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2638 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2639 
2640 	/* If recording any information failed, retry again soon. */
2641 	if (!done)
2642 		return;
2643 
2644 	__this_cpu_write(trace_taskinfo_save, false);
2645 }
2646 
2647 /* Helpers to record a specific task information */
2648 void tracing_record_cmdline(struct task_struct *task)
2649 {
2650 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2651 }
2652 
2653 void tracing_record_tgid(struct task_struct *task)
2654 {
2655 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2656 }
2657 
2658 /*
2659  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2660  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2661  * simplifies those functions and keeps them in sync.
2662  */
2663 enum print_line_t trace_handle_return(struct trace_seq *s)
2664 {
2665 	return trace_seq_has_overflowed(s) ?
2666 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2667 }
2668 EXPORT_SYMBOL_GPL(trace_handle_return);
2669 
2670 static unsigned short migration_disable_value(void)
2671 {
2672 #if defined(CONFIG_SMP)
2673 	return current->migration_disabled;
2674 #else
2675 	return 0;
2676 #endif
2677 }
2678 
2679 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2680 {
2681 	unsigned int trace_flags = irqs_status;
2682 	unsigned int pc;
2683 
2684 	pc = preempt_count();
2685 
2686 	if (pc & NMI_MASK)
2687 		trace_flags |= TRACE_FLAG_NMI;
2688 	if (pc & HARDIRQ_MASK)
2689 		trace_flags |= TRACE_FLAG_HARDIRQ;
2690 	if (in_serving_softirq())
2691 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2692 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2693 		trace_flags |= TRACE_FLAG_BH_OFF;
2694 
2695 	if (tif_need_resched())
2696 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2697 	if (test_preempt_need_resched())
2698 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2699 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2700 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2701 }
2702 
2703 struct ring_buffer_event *
2704 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2705 			  int type,
2706 			  unsigned long len,
2707 			  unsigned int trace_ctx)
2708 {
2709 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2710 }
2711 
2712 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2713 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2714 static int trace_buffered_event_ref;
2715 
2716 /**
2717  * trace_buffered_event_enable - enable buffering events
2718  *
2719  * When events are being filtered, it is quicker to use a temporary
2720  * buffer to write the event data into if there's a likely chance
2721  * that it will not be committed. The discard of the ring buffer
2722  * is not as fast as committing, and is much slower than copying
2723  * a commit.
2724  *
2725  * When an event is to be filtered, allocate per cpu buffers to
2726  * write the event data into, and if the event is filtered and discarded
2727  * it is simply dropped, otherwise, the entire data is to be committed
2728  * in one shot.
2729  */
2730 void trace_buffered_event_enable(void)
2731 {
2732 	struct ring_buffer_event *event;
2733 	struct page *page;
2734 	int cpu;
2735 
2736 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2737 
2738 	if (trace_buffered_event_ref++)
2739 		return;
2740 
2741 	for_each_tracing_cpu(cpu) {
2742 		page = alloc_pages_node(cpu_to_node(cpu),
2743 					GFP_KERNEL | __GFP_NORETRY, 0);
2744 		/* This is just an optimization and can handle failures */
2745 		if (!page) {
2746 			pr_err("Failed to allocate event buffer\n");
2747 			break;
2748 		}
2749 
2750 		event = page_address(page);
2751 		memset(event, 0, sizeof(*event));
2752 
2753 		per_cpu(trace_buffered_event, cpu) = event;
2754 
2755 		preempt_disable();
2756 		if (cpu == smp_processor_id() &&
2757 		    __this_cpu_read(trace_buffered_event) !=
2758 		    per_cpu(trace_buffered_event, cpu))
2759 			WARN_ON_ONCE(1);
2760 		preempt_enable();
2761 	}
2762 }
2763 
2764 static void enable_trace_buffered_event(void *data)
2765 {
2766 	/* Probably not needed, but do it anyway */
2767 	smp_rmb();
2768 	this_cpu_dec(trace_buffered_event_cnt);
2769 }
2770 
2771 static void disable_trace_buffered_event(void *data)
2772 {
2773 	this_cpu_inc(trace_buffered_event_cnt);
2774 }
2775 
2776 /**
2777  * trace_buffered_event_disable - disable buffering events
2778  *
2779  * When a filter is removed, it is faster to not use the buffered
2780  * events, and to commit directly into the ring buffer. Free up
2781  * the temp buffers when there are no more users. This requires
2782  * special synchronization with current events.
2783  */
2784 void trace_buffered_event_disable(void)
2785 {
2786 	int cpu;
2787 
2788 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2789 
2790 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2791 		return;
2792 
2793 	if (--trace_buffered_event_ref)
2794 		return;
2795 
2796 	/* For each CPU, set the buffer as used. */
2797 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2798 			 NULL, true);
2799 
2800 	/* Wait for all current users to finish */
2801 	synchronize_rcu();
2802 
2803 	for_each_tracing_cpu(cpu) {
2804 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2805 		per_cpu(trace_buffered_event, cpu) = NULL;
2806 	}
2807 
2808 	/*
2809 	 * Wait for all CPUs that potentially started checking if they can use
2810 	 * their event buffer only after the previous synchronize_rcu() call and
2811 	 * they still read a valid pointer from trace_buffered_event. It must be
2812 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2813 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2814 	 */
2815 	synchronize_rcu();
2816 
2817 	/* For each CPU, relinquish the buffer */
2818 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2819 			 true);
2820 }
2821 
2822 static struct trace_buffer *temp_buffer;
2823 
2824 struct ring_buffer_event *
2825 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2826 			  struct trace_event_file *trace_file,
2827 			  int type, unsigned long len,
2828 			  unsigned int trace_ctx)
2829 {
2830 	struct ring_buffer_event *entry;
2831 	struct trace_array *tr = trace_file->tr;
2832 	int val;
2833 
2834 	*current_rb = tr->array_buffer.buffer;
2835 
2836 	if (!tr->no_filter_buffering_ref &&
2837 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2838 		preempt_disable_notrace();
2839 		/*
2840 		 * Filtering is on, so try to use the per cpu buffer first.
2841 		 * This buffer will simulate a ring_buffer_event,
2842 		 * where the type_len is zero and the array[0] will
2843 		 * hold the full length.
2844 		 * (see include/linux/ring-buffer.h for details on
2845 		 *  how the ring_buffer_event is structured).
2846 		 *
2847 		 * Using a temp buffer during filtering and copying it
2848 		 * on a matched filter is quicker than writing directly
2849 		 * into the ring buffer and then discarding it when
2850 		 * it doesn't match. That is because the discard
2851 		 * requires several atomic operations to get right.
2852 		 * Copying on match and doing nothing on a failed match
2853 		 * is still quicker than no copy on match, but having
2854 		 * to discard out of the ring buffer on a failed match.
2855 		 */
2856 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2857 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2858 
2859 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2860 
2861 			/*
2862 			 * Preemption is disabled, but interrupts and NMIs
2863 			 * can still come in now. If that happens after
2864 			 * the above increment, then it will have to go
2865 			 * back to the old method of allocating the event
2866 			 * on the ring buffer, and if the filter fails, it
2867 			 * will have to call ring_buffer_discard_commit()
2868 			 * to remove it.
2869 			 *
2870 			 * Need to also check the unlikely case that the
2871 			 * length is bigger than the temp buffer size.
2872 			 * If that happens, then the reserve is pretty much
2873 			 * guaranteed to fail, as the ring buffer currently
2874 			 * only allows events less than a page. But that may
2875 			 * change in the future, so let the ring buffer reserve
2876 			 * handle the failure in that case.
2877 			 */
2878 			if (val == 1 && likely(len <= max_len)) {
2879 				trace_event_setup(entry, type, trace_ctx);
2880 				entry->array[0] = len;
2881 				/* Return with preemption disabled */
2882 				return entry;
2883 			}
2884 			this_cpu_dec(trace_buffered_event_cnt);
2885 		}
2886 		/* __trace_buffer_lock_reserve() disables preemption */
2887 		preempt_enable_notrace();
2888 	}
2889 
2890 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2891 					    trace_ctx);
2892 	/*
2893 	 * If tracing is off, but we have triggers enabled
2894 	 * we still need to look at the event data. Use the temp_buffer
2895 	 * to store the trace event for the trigger to use. It's recursive
2896 	 * safe and will not be recorded anywhere.
2897 	 */
2898 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2899 		*current_rb = temp_buffer;
2900 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2901 						    trace_ctx);
2902 	}
2903 	return entry;
2904 }
2905 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2906 
2907 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2908 static DEFINE_MUTEX(tracepoint_printk_mutex);
2909 
2910 static void output_printk(struct trace_event_buffer *fbuffer)
2911 {
2912 	struct trace_event_call *event_call;
2913 	struct trace_event_file *file;
2914 	struct trace_event *event;
2915 	unsigned long flags;
2916 	struct trace_iterator *iter = tracepoint_print_iter;
2917 
2918 	/* We should never get here if iter is NULL */
2919 	if (WARN_ON_ONCE(!iter))
2920 		return;
2921 
2922 	event_call = fbuffer->trace_file->event_call;
2923 	if (!event_call || !event_call->event.funcs ||
2924 	    !event_call->event.funcs->trace)
2925 		return;
2926 
2927 	file = fbuffer->trace_file;
2928 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2929 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2930 	     !filter_match_preds(file->filter, fbuffer->entry)))
2931 		return;
2932 
2933 	event = &fbuffer->trace_file->event_call->event;
2934 
2935 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2936 	trace_seq_init(&iter->seq);
2937 	iter->ent = fbuffer->entry;
2938 	event_call->event.funcs->trace(iter, 0, event);
2939 	trace_seq_putc(&iter->seq, 0);
2940 	printk("%s", iter->seq.buffer);
2941 
2942 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2943 }
2944 
2945 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2946 			     void *buffer, size_t *lenp,
2947 			     loff_t *ppos)
2948 {
2949 	int save_tracepoint_printk;
2950 	int ret;
2951 
2952 	mutex_lock(&tracepoint_printk_mutex);
2953 	save_tracepoint_printk = tracepoint_printk;
2954 
2955 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2956 
2957 	/*
2958 	 * This will force exiting early, as tracepoint_printk
2959 	 * is always zero when tracepoint_printk_iter is not allocated
2960 	 */
2961 	if (!tracepoint_print_iter)
2962 		tracepoint_printk = 0;
2963 
2964 	if (save_tracepoint_printk == tracepoint_printk)
2965 		goto out;
2966 
2967 	if (tracepoint_printk)
2968 		static_key_enable(&tracepoint_printk_key.key);
2969 	else
2970 		static_key_disable(&tracepoint_printk_key.key);
2971 
2972  out:
2973 	mutex_unlock(&tracepoint_printk_mutex);
2974 
2975 	return ret;
2976 }
2977 
2978 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2979 {
2980 	enum event_trigger_type tt = ETT_NONE;
2981 	struct trace_event_file *file = fbuffer->trace_file;
2982 
2983 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2984 			fbuffer->entry, &tt))
2985 		goto discard;
2986 
2987 	if (static_key_false(&tracepoint_printk_key.key))
2988 		output_printk(fbuffer);
2989 
2990 	if (static_branch_unlikely(&trace_event_exports_enabled))
2991 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2992 
2993 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2994 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2995 
2996 discard:
2997 	if (tt)
2998 		event_triggers_post_call(file, tt);
2999 
3000 }
3001 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3002 
3003 /*
3004  * Skip 3:
3005  *
3006  *   trace_buffer_unlock_commit_regs()
3007  *   trace_event_buffer_commit()
3008  *   trace_event_raw_event_xxx()
3009  */
3010 # define STACK_SKIP 3
3011 
3012 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3013 				     struct trace_buffer *buffer,
3014 				     struct ring_buffer_event *event,
3015 				     unsigned int trace_ctx,
3016 				     struct pt_regs *regs)
3017 {
3018 	__buffer_unlock_commit(buffer, event);
3019 
3020 	/*
3021 	 * If regs is not set, then skip the necessary functions.
3022 	 * Note, we can still get here via blktrace, wakeup tracer
3023 	 * and mmiotrace, but that's ok if they lose a function or
3024 	 * two. They are not that meaningful.
3025 	 */
3026 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3027 	ftrace_trace_userstack(tr, buffer, trace_ctx);
3028 }
3029 
3030 /*
3031  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3032  */
3033 void
3034 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3035 				   struct ring_buffer_event *event)
3036 {
3037 	__buffer_unlock_commit(buffer, event);
3038 }
3039 
3040 void
3041 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3042 	       parent_ip, unsigned int trace_ctx)
3043 {
3044 	struct trace_event_call *call = &event_function;
3045 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3046 	struct ring_buffer_event *event;
3047 	struct ftrace_entry *entry;
3048 
3049 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3050 					    trace_ctx);
3051 	if (!event)
3052 		return;
3053 	entry	= ring_buffer_event_data(event);
3054 	entry->ip			= ip;
3055 	entry->parent_ip		= parent_ip;
3056 
3057 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3058 		if (static_branch_unlikely(&trace_function_exports_enabled))
3059 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3060 		__buffer_unlock_commit(buffer, event);
3061 	}
3062 }
3063 
3064 #ifdef CONFIG_STACKTRACE
3065 
3066 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3067 #define FTRACE_KSTACK_NESTING	4
3068 
3069 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
3070 
3071 struct ftrace_stack {
3072 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
3073 };
3074 
3075 
3076 struct ftrace_stacks {
3077 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
3078 };
3079 
3080 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3081 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3082 
3083 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3084 				 unsigned int trace_ctx,
3085 				 int skip, struct pt_regs *regs)
3086 {
3087 	struct trace_event_call *call = &event_kernel_stack;
3088 	struct ring_buffer_event *event;
3089 	unsigned int size, nr_entries;
3090 	struct ftrace_stack *fstack;
3091 	struct stack_entry *entry;
3092 	int stackidx;
3093 
3094 	/*
3095 	 * Add one, for this function and the call to save_stack_trace()
3096 	 * If regs is set, then these functions will not be in the way.
3097 	 */
3098 #ifndef CONFIG_UNWINDER_ORC
3099 	if (!regs)
3100 		skip++;
3101 #endif
3102 
3103 	preempt_disable_notrace();
3104 
3105 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3106 
3107 	/* This should never happen. If it does, yell once and skip */
3108 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3109 		goto out;
3110 
3111 	/*
3112 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3113 	 * interrupt will either see the value pre increment or post
3114 	 * increment. If the interrupt happens pre increment it will have
3115 	 * restored the counter when it returns.  We just need a barrier to
3116 	 * keep gcc from moving things around.
3117 	 */
3118 	barrier();
3119 
3120 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3121 	size = ARRAY_SIZE(fstack->calls);
3122 
3123 	if (regs) {
3124 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3125 						   size, skip);
3126 	} else {
3127 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3128 	}
3129 
3130 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3131 				    struct_size(entry, caller, nr_entries),
3132 				    trace_ctx);
3133 	if (!event)
3134 		goto out;
3135 	entry = ring_buffer_event_data(event);
3136 
3137 	entry->size = nr_entries;
3138 	memcpy(&entry->caller, fstack->calls,
3139 	       flex_array_size(entry, caller, nr_entries));
3140 
3141 	if (!call_filter_check_discard(call, entry, buffer, event))
3142 		__buffer_unlock_commit(buffer, event);
3143 
3144  out:
3145 	/* Again, don't let gcc optimize things here */
3146 	barrier();
3147 	__this_cpu_dec(ftrace_stack_reserve);
3148 	preempt_enable_notrace();
3149 
3150 }
3151 
3152 static inline void ftrace_trace_stack(struct trace_array *tr,
3153 				      struct trace_buffer *buffer,
3154 				      unsigned int trace_ctx,
3155 				      int skip, struct pt_regs *regs)
3156 {
3157 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3158 		return;
3159 
3160 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3161 }
3162 
3163 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3164 		   int skip)
3165 {
3166 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3167 
3168 	if (rcu_is_watching()) {
3169 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3170 		return;
3171 	}
3172 
3173 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3174 		return;
3175 
3176 	/*
3177 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3178 	 * but if the above rcu_is_watching() failed, then the NMI
3179 	 * triggered someplace critical, and ct_irq_enter() should
3180 	 * not be called from NMI.
3181 	 */
3182 	if (unlikely(in_nmi()))
3183 		return;
3184 
3185 	ct_irq_enter_irqson();
3186 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3187 	ct_irq_exit_irqson();
3188 }
3189 
3190 /**
3191  * trace_dump_stack - record a stack back trace in the trace buffer
3192  * @skip: Number of functions to skip (helper handlers)
3193  */
3194 void trace_dump_stack(int skip)
3195 {
3196 	if (tracing_disabled || tracing_selftest_running)
3197 		return;
3198 
3199 #ifndef CONFIG_UNWINDER_ORC
3200 	/* Skip 1 to skip this function. */
3201 	skip++;
3202 #endif
3203 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3204 			     tracing_gen_ctx(), skip, NULL);
3205 }
3206 EXPORT_SYMBOL_GPL(trace_dump_stack);
3207 
3208 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3209 static DEFINE_PER_CPU(int, user_stack_count);
3210 
3211 static void
3212 ftrace_trace_userstack(struct trace_array *tr,
3213 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3214 {
3215 	struct trace_event_call *call = &event_user_stack;
3216 	struct ring_buffer_event *event;
3217 	struct userstack_entry *entry;
3218 
3219 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3220 		return;
3221 
3222 	/*
3223 	 * NMIs can not handle page faults, even with fix ups.
3224 	 * The save user stack can (and often does) fault.
3225 	 */
3226 	if (unlikely(in_nmi()))
3227 		return;
3228 
3229 	/*
3230 	 * prevent recursion, since the user stack tracing may
3231 	 * trigger other kernel events.
3232 	 */
3233 	preempt_disable();
3234 	if (__this_cpu_read(user_stack_count))
3235 		goto out;
3236 
3237 	__this_cpu_inc(user_stack_count);
3238 
3239 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3240 					    sizeof(*entry), trace_ctx);
3241 	if (!event)
3242 		goto out_drop_count;
3243 	entry	= ring_buffer_event_data(event);
3244 
3245 	entry->tgid		= current->tgid;
3246 	memset(&entry->caller, 0, sizeof(entry->caller));
3247 
3248 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3249 	if (!call_filter_check_discard(call, entry, buffer, event))
3250 		__buffer_unlock_commit(buffer, event);
3251 
3252  out_drop_count:
3253 	__this_cpu_dec(user_stack_count);
3254  out:
3255 	preempt_enable();
3256 }
3257 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3258 static void ftrace_trace_userstack(struct trace_array *tr,
3259 				   struct trace_buffer *buffer,
3260 				   unsigned int trace_ctx)
3261 {
3262 }
3263 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3264 
3265 #endif /* CONFIG_STACKTRACE */
3266 
3267 static inline void
3268 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3269 			  unsigned long long delta)
3270 {
3271 	entry->bottom_delta_ts = delta & U32_MAX;
3272 	entry->top_delta_ts = (delta >> 32);
3273 }
3274 
3275 void trace_last_func_repeats(struct trace_array *tr,
3276 			     struct trace_func_repeats *last_info,
3277 			     unsigned int trace_ctx)
3278 {
3279 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3280 	struct func_repeats_entry *entry;
3281 	struct ring_buffer_event *event;
3282 	u64 delta;
3283 
3284 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3285 					    sizeof(*entry), trace_ctx);
3286 	if (!event)
3287 		return;
3288 
3289 	delta = ring_buffer_event_time_stamp(buffer, event) -
3290 		last_info->ts_last_call;
3291 
3292 	entry = ring_buffer_event_data(event);
3293 	entry->ip = last_info->ip;
3294 	entry->parent_ip = last_info->parent_ip;
3295 	entry->count = last_info->count;
3296 	func_repeats_set_delta_ts(entry, delta);
3297 
3298 	__buffer_unlock_commit(buffer, event);
3299 }
3300 
3301 /* created for use with alloc_percpu */
3302 struct trace_buffer_struct {
3303 	int nesting;
3304 	char buffer[4][TRACE_BUF_SIZE];
3305 };
3306 
3307 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3308 
3309 /*
3310  * This allows for lockless recording.  If we're nested too deeply, then
3311  * this returns NULL.
3312  */
3313 static char *get_trace_buf(void)
3314 {
3315 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3316 
3317 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3318 		return NULL;
3319 
3320 	buffer->nesting++;
3321 
3322 	/* Interrupts must see nesting incremented before we use the buffer */
3323 	barrier();
3324 	return &buffer->buffer[buffer->nesting - 1][0];
3325 }
3326 
3327 static void put_trace_buf(void)
3328 {
3329 	/* Don't let the decrement of nesting leak before this */
3330 	barrier();
3331 	this_cpu_dec(trace_percpu_buffer->nesting);
3332 }
3333 
3334 static int alloc_percpu_trace_buffer(void)
3335 {
3336 	struct trace_buffer_struct __percpu *buffers;
3337 
3338 	if (trace_percpu_buffer)
3339 		return 0;
3340 
3341 	buffers = alloc_percpu(struct trace_buffer_struct);
3342 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3343 		return -ENOMEM;
3344 
3345 	trace_percpu_buffer = buffers;
3346 	return 0;
3347 }
3348 
3349 static int buffers_allocated;
3350 
3351 void trace_printk_init_buffers(void)
3352 {
3353 	if (buffers_allocated)
3354 		return;
3355 
3356 	if (alloc_percpu_trace_buffer())
3357 		return;
3358 
3359 	/* trace_printk() is for debug use only. Don't use it in production. */
3360 
3361 	pr_warn("\n");
3362 	pr_warn("**********************************************************\n");
3363 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3364 	pr_warn("**                                                      **\n");
3365 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3366 	pr_warn("**                                                      **\n");
3367 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3368 	pr_warn("** unsafe for production use.                           **\n");
3369 	pr_warn("**                                                      **\n");
3370 	pr_warn("** If you see this message and you are not debugging    **\n");
3371 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3372 	pr_warn("**                                                      **\n");
3373 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3374 	pr_warn("**********************************************************\n");
3375 
3376 	/* Expand the buffers to set size */
3377 	tracing_update_buffers();
3378 
3379 	buffers_allocated = 1;
3380 
3381 	/*
3382 	 * trace_printk_init_buffers() can be called by modules.
3383 	 * If that happens, then we need to start cmdline recording
3384 	 * directly here. If the global_trace.buffer is already
3385 	 * allocated here, then this was called by module code.
3386 	 */
3387 	if (global_trace.array_buffer.buffer)
3388 		tracing_start_cmdline_record();
3389 }
3390 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3391 
3392 void trace_printk_start_comm(void)
3393 {
3394 	/* Start tracing comms if trace printk is set */
3395 	if (!buffers_allocated)
3396 		return;
3397 	tracing_start_cmdline_record();
3398 }
3399 
3400 static void trace_printk_start_stop_comm(int enabled)
3401 {
3402 	if (!buffers_allocated)
3403 		return;
3404 
3405 	if (enabled)
3406 		tracing_start_cmdline_record();
3407 	else
3408 		tracing_stop_cmdline_record();
3409 }
3410 
3411 /**
3412  * trace_vbprintk - write binary msg to tracing buffer
3413  * @ip:    The address of the caller
3414  * @fmt:   The string format to write to the buffer
3415  * @args:  Arguments for @fmt
3416  */
3417 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3418 {
3419 	struct trace_event_call *call = &event_bprint;
3420 	struct ring_buffer_event *event;
3421 	struct trace_buffer *buffer;
3422 	struct trace_array *tr = &global_trace;
3423 	struct bprint_entry *entry;
3424 	unsigned int trace_ctx;
3425 	char *tbuffer;
3426 	int len = 0, size;
3427 
3428 	if (unlikely(tracing_selftest_running || tracing_disabled))
3429 		return 0;
3430 
3431 	/* Don't pollute graph traces with trace_vprintk internals */
3432 	pause_graph_tracing();
3433 
3434 	trace_ctx = tracing_gen_ctx();
3435 	preempt_disable_notrace();
3436 
3437 	tbuffer = get_trace_buf();
3438 	if (!tbuffer) {
3439 		len = 0;
3440 		goto out_nobuffer;
3441 	}
3442 
3443 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3444 
3445 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3446 		goto out_put;
3447 
3448 	size = sizeof(*entry) + sizeof(u32) * len;
3449 	buffer = tr->array_buffer.buffer;
3450 	ring_buffer_nest_start(buffer);
3451 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3452 					    trace_ctx);
3453 	if (!event)
3454 		goto out;
3455 	entry = ring_buffer_event_data(event);
3456 	entry->ip			= ip;
3457 	entry->fmt			= fmt;
3458 
3459 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3460 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3461 		__buffer_unlock_commit(buffer, event);
3462 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3463 	}
3464 
3465 out:
3466 	ring_buffer_nest_end(buffer);
3467 out_put:
3468 	put_trace_buf();
3469 
3470 out_nobuffer:
3471 	preempt_enable_notrace();
3472 	unpause_graph_tracing();
3473 
3474 	return len;
3475 }
3476 EXPORT_SYMBOL_GPL(trace_vbprintk);
3477 
3478 __printf(3, 0)
3479 static int
3480 __trace_array_vprintk(struct trace_buffer *buffer,
3481 		      unsigned long ip, const char *fmt, va_list args)
3482 {
3483 	struct trace_event_call *call = &event_print;
3484 	struct ring_buffer_event *event;
3485 	int len = 0, size;
3486 	struct print_entry *entry;
3487 	unsigned int trace_ctx;
3488 	char *tbuffer;
3489 
3490 	if (tracing_disabled)
3491 		return 0;
3492 
3493 	/* Don't pollute graph traces with trace_vprintk internals */
3494 	pause_graph_tracing();
3495 
3496 	trace_ctx = tracing_gen_ctx();
3497 	preempt_disable_notrace();
3498 
3499 
3500 	tbuffer = get_trace_buf();
3501 	if (!tbuffer) {
3502 		len = 0;
3503 		goto out_nobuffer;
3504 	}
3505 
3506 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3507 
3508 	size = sizeof(*entry) + len + 1;
3509 	ring_buffer_nest_start(buffer);
3510 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3511 					    trace_ctx);
3512 	if (!event)
3513 		goto out;
3514 	entry = ring_buffer_event_data(event);
3515 	entry->ip = ip;
3516 
3517 	memcpy(&entry->buf, tbuffer, len + 1);
3518 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3519 		__buffer_unlock_commit(buffer, event);
3520 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3521 	}
3522 
3523 out:
3524 	ring_buffer_nest_end(buffer);
3525 	put_trace_buf();
3526 
3527 out_nobuffer:
3528 	preempt_enable_notrace();
3529 	unpause_graph_tracing();
3530 
3531 	return len;
3532 }
3533 
3534 __printf(3, 0)
3535 int trace_array_vprintk(struct trace_array *tr,
3536 			unsigned long ip, const char *fmt, va_list args)
3537 {
3538 	if (tracing_selftest_running && tr == &global_trace)
3539 		return 0;
3540 
3541 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3542 }
3543 
3544 /**
3545  * trace_array_printk - Print a message to a specific instance
3546  * @tr: The instance trace_array descriptor
3547  * @ip: The instruction pointer that this is called from.
3548  * @fmt: The format to print (printf format)
3549  *
3550  * If a subsystem sets up its own instance, they have the right to
3551  * printk strings into their tracing instance buffer using this
3552  * function. Note, this function will not write into the top level
3553  * buffer (use trace_printk() for that), as writing into the top level
3554  * buffer should only have events that can be individually disabled.
3555  * trace_printk() is only used for debugging a kernel, and should not
3556  * be ever incorporated in normal use.
3557  *
3558  * trace_array_printk() can be used, as it will not add noise to the
3559  * top level tracing buffer.
3560  *
3561  * Note, trace_array_init_printk() must be called on @tr before this
3562  * can be used.
3563  */
3564 __printf(3, 0)
3565 int trace_array_printk(struct trace_array *tr,
3566 		       unsigned long ip, const char *fmt, ...)
3567 {
3568 	int ret;
3569 	va_list ap;
3570 
3571 	if (!tr)
3572 		return -ENOENT;
3573 
3574 	/* This is only allowed for created instances */
3575 	if (tr == &global_trace)
3576 		return 0;
3577 
3578 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3579 		return 0;
3580 
3581 	va_start(ap, fmt);
3582 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3583 	va_end(ap);
3584 	return ret;
3585 }
3586 EXPORT_SYMBOL_GPL(trace_array_printk);
3587 
3588 /**
3589  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3590  * @tr: The trace array to initialize the buffers for
3591  *
3592  * As trace_array_printk() only writes into instances, they are OK to
3593  * have in the kernel (unlike trace_printk()). This needs to be called
3594  * before trace_array_printk() can be used on a trace_array.
3595  */
3596 int trace_array_init_printk(struct trace_array *tr)
3597 {
3598 	if (!tr)
3599 		return -ENOENT;
3600 
3601 	/* This is only allowed for created instances */
3602 	if (tr == &global_trace)
3603 		return -EINVAL;
3604 
3605 	return alloc_percpu_trace_buffer();
3606 }
3607 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3608 
3609 __printf(3, 4)
3610 int trace_array_printk_buf(struct trace_buffer *buffer,
3611 			   unsigned long ip, const char *fmt, ...)
3612 {
3613 	int ret;
3614 	va_list ap;
3615 
3616 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3617 		return 0;
3618 
3619 	va_start(ap, fmt);
3620 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3621 	va_end(ap);
3622 	return ret;
3623 }
3624 
3625 __printf(2, 0)
3626 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3627 {
3628 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3629 }
3630 EXPORT_SYMBOL_GPL(trace_vprintk);
3631 
3632 static void trace_iterator_increment(struct trace_iterator *iter)
3633 {
3634 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3635 
3636 	iter->idx++;
3637 	if (buf_iter)
3638 		ring_buffer_iter_advance(buf_iter);
3639 }
3640 
3641 static struct trace_entry *
3642 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3643 		unsigned long *lost_events)
3644 {
3645 	struct ring_buffer_event *event;
3646 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3647 
3648 	if (buf_iter) {
3649 		event = ring_buffer_iter_peek(buf_iter, ts);
3650 		if (lost_events)
3651 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3652 				(unsigned long)-1 : 0;
3653 	} else {
3654 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3655 					 lost_events);
3656 	}
3657 
3658 	if (event) {
3659 		iter->ent_size = ring_buffer_event_length(event);
3660 		return ring_buffer_event_data(event);
3661 	}
3662 	iter->ent_size = 0;
3663 	return NULL;
3664 }
3665 
3666 static struct trace_entry *
3667 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3668 		  unsigned long *missing_events, u64 *ent_ts)
3669 {
3670 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3671 	struct trace_entry *ent, *next = NULL;
3672 	unsigned long lost_events = 0, next_lost = 0;
3673 	int cpu_file = iter->cpu_file;
3674 	u64 next_ts = 0, ts;
3675 	int next_cpu = -1;
3676 	int next_size = 0;
3677 	int cpu;
3678 
3679 	/*
3680 	 * If we are in a per_cpu trace file, don't bother by iterating over
3681 	 * all cpu and peek directly.
3682 	 */
3683 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3684 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3685 			return NULL;
3686 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3687 		if (ent_cpu)
3688 			*ent_cpu = cpu_file;
3689 
3690 		return ent;
3691 	}
3692 
3693 	for_each_tracing_cpu(cpu) {
3694 
3695 		if (ring_buffer_empty_cpu(buffer, cpu))
3696 			continue;
3697 
3698 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3699 
3700 		/*
3701 		 * Pick the entry with the smallest timestamp:
3702 		 */
3703 		if (ent && (!next || ts < next_ts)) {
3704 			next = ent;
3705 			next_cpu = cpu;
3706 			next_ts = ts;
3707 			next_lost = lost_events;
3708 			next_size = iter->ent_size;
3709 		}
3710 	}
3711 
3712 	iter->ent_size = next_size;
3713 
3714 	if (ent_cpu)
3715 		*ent_cpu = next_cpu;
3716 
3717 	if (ent_ts)
3718 		*ent_ts = next_ts;
3719 
3720 	if (missing_events)
3721 		*missing_events = next_lost;
3722 
3723 	return next;
3724 }
3725 
3726 #define STATIC_FMT_BUF_SIZE	128
3727 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3728 
3729 char *trace_iter_expand_format(struct trace_iterator *iter)
3730 {
3731 	char *tmp;
3732 
3733 	/*
3734 	 * iter->tr is NULL when used with tp_printk, which makes
3735 	 * this get called where it is not safe to call krealloc().
3736 	 */
3737 	if (!iter->tr || iter->fmt == static_fmt_buf)
3738 		return NULL;
3739 
3740 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3741 		       GFP_KERNEL);
3742 	if (tmp) {
3743 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3744 		iter->fmt = tmp;
3745 	}
3746 
3747 	return tmp;
3748 }
3749 
3750 /* Returns true if the string is safe to dereference from an event */
3751 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3752 			   bool star, int len)
3753 {
3754 	unsigned long addr = (unsigned long)str;
3755 	struct trace_event *trace_event;
3756 	struct trace_event_call *event;
3757 
3758 	/* Ignore strings with no length */
3759 	if (star && !len)
3760 		return true;
3761 
3762 	/* OK if part of the event data */
3763 	if ((addr >= (unsigned long)iter->ent) &&
3764 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3765 		return true;
3766 
3767 	/* OK if part of the temp seq buffer */
3768 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3769 	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3770 		return true;
3771 
3772 	/* Core rodata can not be freed */
3773 	if (is_kernel_rodata(addr))
3774 		return true;
3775 
3776 	if (trace_is_tracepoint_string(str))
3777 		return true;
3778 
3779 	/*
3780 	 * Now this could be a module event, referencing core module
3781 	 * data, which is OK.
3782 	 */
3783 	if (!iter->ent)
3784 		return false;
3785 
3786 	trace_event = ftrace_find_event(iter->ent->type);
3787 	if (!trace_event)
3788 		return false;
3789 
3790 	event = container_of(trace_event, struct trace_event_call, event);
3791 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3792 		return false;
3793 
3794 	/* Would rather have rodata, but this will suffice */
3795 	if (within_module_core(addr, event->module))
3796 		return true;
3797 
3798 	return false;
3799 }
3800 
3801 static const char *show_buffer(struct trace_seq *s)
3802 {
3803 	struct seq_buf *seq = &s->seq;
3804 
3805 	seq_buf_terminate(seq);
3806 
3807 	return seq->buffer;
3808 }
3809 
3810 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3811 
3812 static int test_can_verify_check(const char *fmt, ...)
3813 {
3814 	char buf[16];
3815 	va_list ap;
3816 	int ret;
3817 
3818 	/*
3819 	 * The verifier is dependent on vsnprintf() modifies the va_list
3820 	 * passed to it, where it is sent as a reference. Some architectures
3821 	 * (like x86_32) passes it by value, which means that vsnprintf()
3822 	 * does not modify the va_list passed to it, and the verifier
3823 	 * would then need to be able to understand all the values that
3824 	 * vsnprintf can use. If it is passed by value, then the verifier
3825 	 * is disabled.
3826 	 */
3827 	va_start(ap, fmt);
3828 	vsnprintf(buf, 16, "%d", ap);
3829 	ret = va_arg(ap, int);
3830 	va_end(ap);
3831 
3832 	return ret;
3833 }
3834 
3835 static void test_can_verify(void)
3836 {
3837 	if (!test_can_verify_check("%d %d", 0, 1)) {
3838 		pr_info("trace event string verifier disabled\n");
3839 		static_branch_inc(&trace_no_verify);
3840 	}
3841 }
3842 
3843 /**
3844  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3845  * @iter: The iterator that holds the seq buffer and the event being printed
3846  * @fmt: The format used to print the event
3847  * @ap: The va_list holding the data to print from @fmt.
3848  *
3849  * This writes the data into the @iter->seq buffer using the data from
3850  * @fmt and @ap. If the format has a %s, then the source of the string
3851  * is examined to make sure it is safe to print, otherwise it will
3852  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3853  * pointer.
3854  */
3855 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3856 			 va_list ap)
3857 {
3858 	const char *p = fmt;
3859 	const char *str;
3860 	int i, j;
3861 
3862 	if (WARN_ON_ONCE(!fmt))
3863 		return;
3864 
3865 	if (static_branch_unlikely(&trace_no_verify))
3866 		goto print;
3867 
3868 	/* Don't bother checking when doing a ftrace_dump() */
3869 	if (iter->fmt == static_fmt_buf)
3870 		goto print;
3871 
3872 	while (*p) {
3873 		bool star = false;
3874 		int len = 0;
3875 
3876 		j = 0;
3877 
3878 		/* We only care about %s and variants */
3879 		for (i = 0; p[i]; i++) {
3880 			if (i + 1 >= iter->fmt_size) {
3881 				/*
3882 				 * If we can't expand the copy buffer,
3883 				 * just print it.
3884 				 */
3885 				if (!trace_iter_expand_format(iter))
3886 					goto print;
3887 			}
3888 
3889 			if (p[i] == '\\' && p[i+1]) {
3890 				i++;
3891 				continue;
3892 			}
3893 			if (p[i] == '%') {
3894 				/* Need to test cases like %08.*s */
3895 				for (j = 1; p[i+j]; j++) {
3896 					if (isdigit(p[i+j]) ||
3897 					    p[i+j] == '.')
3898 						continue;
3899 					if (p[i+j] == '*') {
3900 						star = true;
3901 						continue;
3902 					}
3903 					break;
3904 				}
3905 				if (p[i+j] == 's')
3906 					break;
3907 				star = false;
3908 			}
3909 			j = 0;
3910 		}
3911 		/* If no %s found then just print normally */
3912 		if (!p[i])
3913 			break;
3914 
3915 		/* Copy up to the %s, and print that */
3916 		strncpy(iter->fmt, p, i);
3917 		iter->fmt[i] = '\0';
3918 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3919 
3920 		/*
3921 		 * If iter->seq is full, the above call no longer guarantees
3922 		 * that ap is in sync with fmt processing, and further calls
3923 		 * to va_arg() can return wrong positional arguments.
3924 		 *
3925 		 * Ensure that ap is no longer used in this case.
3926 		 */
3927 		if (iter->seq.full) {
3928 			p = "";
3929 			break;
3930 		}
3931 
3932 		if (star)
3933 			len = va_arg(ap, int);
3934 
3935 		/* The ap now points to the string data of the %s */
3936 		str = va_arg(ap, const char *);
3937 
3938 		/*
3939 		 * If you hit this warning, it is likely that the
3940 		 * trace event in question used %s on a string that
3941 		 * was saved at the time of the event, but may not be
3942 		 * around when the trace is read. Use __string(),
3943 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3944 		 * instead. See samples/trace_events/trace-events-sample.h
3945 		 * for reference.
3946 		 */
3947 		if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3948 			      "fmt: '%s' current_buffer: '%s'",
3949 			      fmt, show_buffer(&iter->seq))) {
3950 			int ret;
3951 
3952 			/* Try to safely read the string */
3953 			if (star) {
3954 				if (len + 1 > iter->fmt_size)
3955 					len = iter->fmt_size - 1;
3956 				if (len < 0)
3957 					len = 0;
3958 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3959 				iter->fmt[len] = 0;
3960 				star = false;
3961 			} else {
3962 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3963 								  iter->fmt_size);
3964 			}
3965 			if (ret < 0)
3966 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3967 			else
3968 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3969 						 str, iter->fmt);
3970 			str = "[UNSAFE-MEMORY]";
3971 			strcpy(iter->fmt, "%s");
3972 		} else {
3973 			strncpy(iter->fmt, p + i, j + 1);
3974 			iter->fmt[j+1] = '\0';
3975 		}
3976 		if (star)
3977 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
3978 		else
3979 			trace_seq_printf(&iter->seq, iter->fmt, str);
3980 
3981 		p += i + j + 1;
3982 	}
3983  print:
3984 	if (*p)
3985 		trace_seq_vprintf(&iter->seq, p, ap);
3986 }
3987 
3988 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3989 {
3990 	const char *p, *new_fmt;
3991 	char *q;
3992 
3993 	if (WARN_ON_ONCE(!fmt))
3994 		return fmt;
3995 
3996 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3997 		return fmt;
3998 
3999 	p = fmt;
4000 	new_fmt = q = iter->fmt;
4001 	while (*p) {
4002 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
4003 			if (!trace_iter_expand_format(iter))
4004 				return fmt;
4005 
4006 			q += iter->fmt - new_fmt;
4007 			new_fmt = iter->fmt;
4008 		}
4009 
4010 		*q++ = *p++;
4011 
4012 		/* Replace %p with %px */
4013 		if (p[-1] == '%') {
4014 			if (p[0] == '%') {
4015 				*q++ = *p++;
4016 			} else if (p[0] == 'p' && !isalnum(p[1])) {
4017 				*q++ = *p++;
4018 				*q++ = 'x';
4019 			}
4020 		}
4021 	}
4022 	*q = '\0';
4023 
4024 	return new_fmt;
4025 }
4026 
4027 #define STATIC_TEMP_BUF_SIZE	128
4028 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4029 
4030 /* Find the next real entry, without updating the iterator itself */
4031 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4032 					  int *ent_cpu, u64 *ent_ts)
4033 {
4034 	/* __find_next_entry will reset ent_size */
4035 	int ent_size = iter->ent_size;
4036 	struct trace_entry *entry;
4037 
4038 	/*
4039 	 * If called from ftrace_dump(), then the iter->temp buffer
4040 	 * will be the static_temp_buf and not created from kmalloc.
4041 	 * If the entry size is greater than the buffer, we can
4042 	 * not save it. Just return NULL in that case. This is only
4043 	 * used to add markers when two consecutive events' time
4044 	 * stamps have a large delta. See trace_print_lat_context()
4045 	 */
4046 	if (iter->temp == static_temp_buf &&
4047 	    STATIC_TEMP_BUF_SIZE < ent_size)
4048 		return NULL;
4049 
4050 	/*
4051 	 * The __find_next_entry() may call peek_next_entry(), which may
4052 	 * call ring_buffer_peek() that may make the contents of iter->ent
4053 	 * undefined. Need to copy iter->ent now.
4054 	 */
4055 	if (iter->ent && iter->ent != iter->temp) {
4056 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4057 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4058 			void *temp;
4059 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
4060 			if (!temp)
4061 				return NULL;
4062 			kfree(iter->temp);
4063 			iter->temp = temp;
4064 			iter->temp_size = iter->ent_size;
4065 		}
4066 		memcpy(iter->temp, iter->ent, iter->ent_size);
4067 		iter->ent = iter->temp;
4068 	}
4069 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4070 	/* Put back the original ent_size */
4071 	iter->ent_size = ent_size;
4072 
4073 	return entry;
4074 }
4075 
4076 /* Find the next real entry, and increment the iterator to the next entry */
4077 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4078 {
4079 	iter->ent = __find_next_entry(iter, &iter->cpu,
4080 				      &iter->lost_events, &iter->ts);
4081 
4082 	if (iter->ent)
4083 		trace_iterator_increment(iter);
4084 
4085 	return iter->ent ? iter : NULL;
4086 }
4087 
4088 static void trace_consume(struct trace_iterator *iter)
4089 {
4090 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4091 			    &iter->lost_events);
4092 }
4093 
4094 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4095 {
4096 	struct trace_iterator *iter = m->private;
4097 	int i = (int)*pos;
4098 	void *ent;
4099 
4100 	WARN_ON_ONCE(iter->leftover);
4101 
4102 	(*pos)++;
4103 
4104 	/* can't go backwards */
4105 	if (iter->idx > i)
4106 		return NULL;
4107 
4108 	if (iter->idx < 0)
4109 		ent = trace_find_next_entry_inc(iter);
4110 	else
4111 		ent = iter;
4112 
4113 	while (ent && iter->idx < i)
4114 		ent = trace_find_next_entry_inc(iter);
4115 
4116 	iter->pos = *pos;
4117 
4118 	return ent;
4119 }
4120 
4121 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4122 {
4123 	struct ring_buffer_iter *buf_iter;
4124 	unsigned long entries = 0;
4125 	u64 ts;
4126 
4127 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4128 
4129 	buf_iter = trace_buffer_iter(iter, cpu);
4130 	if (!buf_iter)
4131 		return;
4132 
4133 	ring_buffer_iter_reset(buf_iter);
4134 
4135 	/*
4136 	 * We could have the case with the max latency tracers
4137 	 * that a reset never took place on a cpu. This is evident
4138 	 * by the timestamp being before the start of the buffer.
4139 	 */
4140 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4141 		if (ts >= iter->array_buffer->time_start)
4142 			break;
4143 		entries++;
4144 		ring_buffer_iter_advance(buf_iter);
4145 	}
4146 
4147 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4148 }
4149 
4150 /*
4151  * The current tracer is copied to avoid a global locking
4152  * all around.
4153  */
4154 static void *s_start(struct seq_file *m, loff_t *pos)
4155 {
4156 	struct trace_iterator *iter = m->private;
4157 	struct trace_array *tr = iter->tr;
4158 	int cpu_file = iter->cpu_file;
4159 	void *p = NULL;
4160 	loff_t l = 0;
4161 	int cpu;
4162 
4163 	mutex_lock(&trace_types_lock);
4164 	if (unlikely(tr->current_trace != iter->trace)) {
4165 		/* Close iter->trace before switching to the new current tracer */
4166 		if (iter->trace->close)
4167 			iter->trace->close(iter);
4168 		iter->trace = tr->current_trace;
4169 		/* Reopen the new current tracer */
4170 		if (iter->trace->open)
4171 			iter->trace->open(iter);
4172 	}
4173 	mutex_unlock(&trace_types_lock);
4174 
4175 #ifdef CONFIG_TRACER_MAX_TRACE
4176 	if (iter->snapshot && iter->trace->use_max_tr)
4177 		return ERR_PTR(-EBUSY);
4178 #endif
4179 
4180 	if (*pos != iter->pos) {
4181 		iter->ent = NULL;
4182 		iter->cpu = 0;
4183 		iter->idx = -1;
4184 
4185 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4186 			for_each_tracing_cpu(cpu)
4187 				tracing_iter_reset(iter, cpu);
4188 		} else
4189 			tracing_iter_reset(iter, cpu_file);
4190 
4191 		iter->leftover = 0;
4192 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4193 			;
4194 
4195 	} else {
4196 		/*
4197 		 * If we overflowed the seq_file before, then we want
4198 		 * to just reuse the trace_seq buffer again.
4199 		 */
4200 		if (iter->leftover)
4201 			p = iter;
4202 		else {
4203 			l = *pos - 1;
4204 			p = s_next(m, p, &l);
4205 		}
4206 	}
4207 
4208 	trace_event_read_lock();
4209 	trace_access_lock(cpu_file);
4210 	return p;
4211 }
4212 
4213 static void s_stop(struct seq_file *m, void *p)
4214 {
4215 	struct trace_iterator *iter = m->private;
4216 
4217 #ifdef CONFIG_TRACER_MAX_TRACE
4218 	if (iter->snapshot && iter->trace->use_max_tr)
4219 		return;
4220 #endif
4221 
4222 	trace_access_unlock(iter->cpu_file);
4223 	trace_event_read_unlock();
4224 }
4225 
4226 static void
4227 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4228 		      unsigned long *entries, int cpu)
4229 {
4230 	unsigned long count;
4231 
4232 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4233 	/*
4234 	 * If this buffer has skipped entries, then we hold all
4235 	 * entries for the trace and we need to ignore the
4236 	 * ones before the time stamp.
4237 	 */
4238 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4239 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4240 		/* total is the same as the entries */
4241 		*total = count;
4242 	} else
4243 		*total = count +
4244 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4245 	*entries = count;
4246 }
4247 
4248 static void
4249 get_total_entries(struct array_buffer *buf,
4250 		  unsigned long *total, unsigned long *entries)
4251 {
4252 	unsigned long t, e;
4253 	int cpu;
4254 
4255 	*total = 0;
4256 	*entries = 0;
4257 
4258 	for_each_tracing_cpu(cpu) {
4259 		get_total_entries_cpu(buf, &t, &e, cpu);
4260 		*total += t;
4261 		*entries += e;
4262 	}
4263 }
4264 
4265 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4266 {
4267 	unsigned long total, entries;
4268 
4269 	if (!tr)
4270 		tr = &global_trace;
4271 
4272 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4273 
4274 	return entries;
4275 }
4276 
4277 unsigned long trace_total_entries(struct trace_array *tr)
4278 {
4279 	unsigned long total, entries;
4280 
4281 	if (!tr)
4282 		tr = &global_trace;
4283 
4284 	get_total_entries(&tr->array_buffer, &total, &entries);
4285 
4286 	return entries;
4287 }
4288 
4289 static void print_lat_help_header(struct seq_file *m)
4290 {
4291 	seq_puts(m, "#                    _------=> CPU#            \n"
4292 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4293 		    "#                  | / _----=> need-resched    \n"
4294 		    "#                  || / _---=> hardirq/softirq \n"
4295 		    "#                  ||| / _--=> preempt-depth   \n"
4296 		    "#                  |||| / _-=> migrate-disable \n"
4297 		    "#                  ||||| /     delay           \n"
4298 		    "#  cmd     pid     |||||| time  |   caller     \n"
4299 		    "#     \\   /        ||||||  \\    |    /       \n");
4300 }
4301 
4302 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4303 {
4304 	unsigned long total;
4305 	unsigned long entries;
4306 
4307 	get_total_entries(buf, &total, &entries);
4308 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4309 		   entries, total, num_online_cpus());
4310 	seq_puts(m, "#\n");
4311 }
4312 
4313 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4314 				   unsigned int flags)
4315 {
4316 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4317 
4318 	print_event_info(buf, m);
4319 
4320 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4321 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4322 }
4323 
4324 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4325 				       unsigned int flags)
4326 {
4327 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4328 	static const char space[] = "            ";
4329 	int prec = tgid ? 12 : 2;
4330 
4331 	print_event_info(buf, m);
4332 
4333 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4334 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4335 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4336 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4337 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4338 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4339 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4340 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4341 }
4342 
4343 void
4344 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4345 {
4346 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4347 	struct array_buffer *buf = iter->array_buffer;
4348 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4349 	struct tracer *type = iter->trace;
4350 	unsigned long entries;
4351 	unsigned long total;
4352 	const char *name = type->name;
4353 
4354 	get_total_entries(buf, &total, &entries);
4355 
4356 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4357 		   name, UTS_RELEASE);
4358 	seq_puts(m, "# -----------------------------------"
4359 		 "---------------------------------\n");
4360 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4361 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4362 		   nsecs_to_usecs(data->saved_latency),
4363 		   entries,
4364 		   total,
4365 		   buf->cpu,
4366 		   preempt_model_none()      ? "server" :
4367 		   preempt_model_voluntary() ? "desktop" :
4368 		   preempt_model_full()      ? "preempt" :
4369 		   preempt_model_rt()        ? "preempt_rt" :
4370 		   "unknown",
4371 		   /* These are reserved for later use */
4372 		   0, 0, 0, 0);
4373 #ifdef CONFIG_SMP
4374 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4375 #else
4376 	seq_puts(m, ")\n");
4377 #endif
4378 	seq_puts(m, "#    -----------------\n");
4379 	seq_printf(m, "#    | task: %.16s-%d "
4380 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4381 		   data->comm, data->pid,
4382 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4383 		   data->policy, data->rt_priority);
4384 	seq_puts(m, "#    -----------------\n");
4385 
4386 	if (data->critical_start) {
4387 		seq_puts(m, "#  => started at: ");
4388 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4389 		trace_print_seq(m, &iter->seq);
4390 		seq_puts(m, "\n#  => ended at:   ");
4391 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4392 		trace_print_seq(m, &iter->seq);
4393 		seq_puts(m, "\n#\n");
4394 	}
4395 
4396 	seq_puts(m, "#\n");
4397 }
4398 
4399 static void test_cpu_buff_start(struct trace_iterator *iter)
4400 {
4401 	struct trace_seq *s = &iter->seq;
4402 	struct trace_array *tr = iter->tr;
4403 
4404 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4405 		return;
4406 
4407 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4408 		return;
4409 
4410 	if (cpumask_available(iter->started) &&
4411 	    cpumask_test_cpu(iter->cpu, iter->started))
4412 		return;
4413 
4414 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4415 		return;
4416 
4417 	if (cpumask_available(iter->started))
4418 		cpumask_set_cpu(iter->cpu, iter->started);
4419 
4420 	/* Don't print started cpu buffer for the first entry of the trace */
4421 	if (iter->idx > 1)
4422 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4423 				iter->cpu);
4424 }
4425 
4426 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4427 {
4428 	struct trace_array *tr = iter->tr;
4429 	struct trace_seq *s = &iter->seq;
4430 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4431 	struct trace_entry *entry;
4432 	struct trace_event *event;
4433 
4434 	entry = iter->ent;
4435 
4436 	test_cpu_buff_start(iter);
4437 
4438 	event = ftrace_find_event(entry->type);
4439 
4440 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4441 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4442 			trace_print_lat_context(iter);
4443 		else
4444 			trace_print_context(iter);
4445 	}
4446 
4447 	if (trace_seq_has_overflowed(s))
4448 		return TRACE_TYPE_PARTIAL_LINE;
4449 
4450 	if (event) {
4451 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4452 			return print_event_fields(iter, event);
4453 		return event->funcs->trace(iter, sym_flags, event);
4454 	}
4455 
4456 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4457 
4458 	return trace_handle_return(s);
4459 }
4460 
4461 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4462 {
4463 	struct trace_array *tr = iter->tr;
4464 	struct trace_seq *s = &iter->seq;
4465 	struct trace_entry *entry;
4466 	struct trace_event *event;
4467 
4468 	entry = iter->ent;
4469 
4470 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4471 		trace_seq_printf(s, "%d %d %llu ",
4472 				 entry->pid, iter->cpu, iter->ts);
4473 
4474 	if (trace_seq_has_overflowed(s))
4475 		return TRACE_TYPE_PARTIAL_LINE;
4476 
4477 	event = ftrace_find_event(entry->type);
4478 	if (event)
4479 		return event->funcs->raw(iter, 0, event);
4480 
4481 	trace_seq_printf(s, "%d ?\n", entry->type);
4482 
4483 	return trace_handle_return(s);
4484 }
4485 
4486 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4487 {
4488 	struct trace_array *tr = iter->tr;
4489 	struct trace_seq *s = &iter->seq;
4490 	unsigned char newline = '\n';
4491 	struct trace_entry *entry;
4492 	struct trace_event *event;
4493 
4494 	entry = iter->ent;
4495 
4496 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4497 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4498 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4499 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4500 		if (trace_seq_has_overflowed(s))
4501 			return TRACE_TYPE_PARTIAL_LINE;
4502 	}
4503 
4504 	event = ftrace_find_event(entry->type);
4505 	if (event) {
4506 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4507 		if (ret != TRACE_TYPE_HANDLED)
4508 			return ret;
4509 	}
4510 
4511 	SEQ_PUT_FIELD(s, newline);
4512 
4513 	return trace_handle_return(s);
4514 }
4515 
4516 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4517 {
4518 	struct trace_array *tr = iter->tr;
4519 	struct trace_seq *s = &iter->seq;
4520 	struct trace_entry *entry;
4521 	struct trace_event *event;
4522 
4523 	entry = iter->ent;
4524 
4525 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4526 		SEQ_PUT_FIELD(s, entry->pid);
4527 		SEQ_PUT_FIELD(s, iter->cpu);
4528 		SEQ_PUT_FIELD(s, iter->ts);
4529 		if (trace_seq_has_overflowed(s))
4530 			return TRACE_TYPE_PARTIAL_LINE;
4531 	}
4532 
4533 	event = ftrace_find_event(entry->type);
4534 	return event ? event->funcs->binary(iter, 0, event) :
4535 		TRACE_TYPE_HANDLED;
4536 }
4537 
4538 int trace_empty(struct trace_iterator *iter)
4539 {
4540 	struct ring_buffer_iter *buf_iter;
4541 	int cpu;
4542 
4543 	/* If we are looking at one CPU buffer, only check that one */
4544 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4545 		cpu = iter->cpu_file;
4546 		buf_iter = trace_buffer_iter(iter, cpu);
4547 		if (buf_iter) {
4548 			if (!ring_buffer_iter_empty(buf_iter))
4549 				return 0;
4550 		} else {
4551 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4552 				return 0;
4553 		}
4554 		return 1;
4555 	}
4556 
4557 	for_each_tracing_cpu(cpu) {
4558 		buf_iter = trace_buffer_iter(iter, cpu);
4559 		if (buf_iter) {
4560 			if (!ring_buffer_iter_empty(buf_iter))
4561 				return 0;
4562 		} else {
4563 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4564 				return 0;
4565 		}
4566 	}
4567 
4568 	return 1;
4569 }
4570 
4571 /*  Called with trace_event_read_lock() held. */
4572 enum print_line_t print_trace_line(struct trace_iterator *iter)
4573 {
4574 	struct trace_array *tr = iter->tr;
4575 	unsigned long trace_flags = tr->trace_flags;
4576 	enum print_line_t ret;
4577 
4578 	if (iter->lost_events) {
4579 		if (iter->lost_events == (unsigned long)-1)
4580 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4581 					 iter->cpu);
4582 		else
4583 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4584 					 iter->cpu, iter->lost_events);
4585 		if (trace_seq_has_overflowed(&iter->seq))
4586 			return TRACE_TYPE_PARTIAL_LINE;
4587 	}
4588 
4589 	if (iter->trace && iter->trace->print_line) {
4590 		ret = iter->trace->print_line(iter);
4591 		if (ret != TRACE_TYPE_UNHANDLED)
4592 			return ret;
4593 	}
4594 
4595 	if (iter->ent->type == TRACE_BPUTS &&
4596 			trace_flags & TRACE_ITER_PRINTK &&
4597 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4598 		return trace_print_bputs_msg_only(iter);
4599 
4600 	if (iter->ent->type == TRACE_BPRINT &&
4601 			trace_flags & TRACE_ITER_PRINTK &&
4602 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4603 		return trace_print_bprintk_msg_only(iter);
4604 
4605 	if (iter->ent->type == TRACE_PRINT &&
4606 			trace_flags & TRACE_ITER_PRINTK &&
4607 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4608 		return trace_print_printk_msg_only(iter);
4609 
4610 	if (trace_flags & TRACE_ITER_BIN)
4611 		return print_bin_fmt(iter);
4612 
4613 	if (trace_flags & TRACE_ITER_HEX)
4614 		return print_hex_fmt(iter);
4615 
4616 	if (trace_flags & TRACE_ITER_RAW)
4617 		return print_raw_fmt(iter);
4618 
4619 	return print_trace_fmt(iter);
4620 }
4621 
4622 void trace_latency_header(struct seq_file *m)
4623 {
4624 	struct trace_iterator *iter = m->private;
4625 	struct trace_array *tr = iter->tr;
4626 
4627 	/* print nothing if the buffers are empty */
4628 	if (trace_empty(iter))
4629 		return;
4630 
4631 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4632 		print_trace_header(m, iter);
4633 
4634 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4635 		print_lat_help_header(m);
4636 }
4637 
4638 void trace_default_header(struct seq_file *m)
4639 {
4640 	struct trace_iterator *iter = m->private;
4641 	struct trace_array *tr = iter->tr;
4642 	unsigned long trace_flags = tr->trace_flags;
4643 
4644 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4645 		return;
4646 
4647 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4648 		/* print nothing if the buffers are empty */
4649 		if (trace_empty(iter))
4650 			return;
4651 		print_trace_header(m, iter);
4652 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4653 			print_lat_help_header(m);
4654 	} else {
4655 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4656 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4657 				print_func_help_header_irq(iter->array_buffer,
4658 							   m, trace_flags);
4659 			else
4660 				print_func_help_header(iter->array_buffer, m,
4661 						       trace_flags);
4662 		}
4663 	}
4664 }
4665 
4666 static void test_ftrace_alive(struct seq_file *m)
4667 {
4668 	if (!ftrace_is_dead())
4669 		return;
4670 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4671 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4672 }
4673 
4674 #ifdef CONFIG_TRACER_MAX_TRACE
4675 static void show_snapshot_main_help(struct seq_file *m)
4676 {
4677 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4678 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4679 		    "#                      Takes a snapshot of the main buffer.\n"
4680 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4681 		    "#                      (Doesn't have to be '2' works with any number that\n"
4682 		    "#                       is not a '0' or '1')\n");
4683 }
4684 
4685 static void show_snapshot_percpu_help(struct seq_file *m)
4686 {
4687 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4688 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4689 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4690 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4691 #else
4692 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4693 		    "#                     Must use main snapshot file to allocate.\n");
4694 #endif
4695 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4696 		    "#                      (Doesn't have to be '2' works with any number that\n"
4697 		    "#                       is not a '0' or '1')\n");
4698 }
4699 
4700 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4701 {
4702 	if (iter->tr->allocated_snapshot)
4703 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4704 	else
4705 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4706 
4707 	seq_puts(m, "# Snapshot commands:\n");
4708 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4709 		show_snapshot_main_help(m);
4710 	else
4711 		show_snapshot_percpu_help(m);
4712 }
4713 #else
4714 /* Should never be called */
4715 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4716 #endif
4717 
4718 static int s_show(struct seq_file *m, void *v)
4719 {
4720 	struct trace_iterator *iter = v;
4721 	int ret;
4722 
4723 	if (iter->ent == NULL) {
4724 		if (iter->tr) {
4725 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4726 			seq_puts(m, "#\n");
4727 			test_ftrace_alive(m);
4728 		}
4729 		if (iter->snapshot && trace_empty(iter))
4730 			print_snapshot_help(m, iter);
4731 		else if (iter->trace && iter->trace->print_header)
4732 			iter->trace->print_header(m);
4733 		else
4734 			trace_default_header(m);
4735 
4736 	} else if (iter->leftover) {
4737 		/*
4738 		 * If we filled the seq_file buffer earlier, we
4739 		 * want to just show it now.
4740 		 */
4741 		ret = trace_print_seq(m, &iter->seq);
4742 
4743 		/* ret should this time be zero, but you never know */
4744 		iter->leftover = ret;
4745 
4746 	} else {
4747 		ret = print_trace_line(iter);
4748 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4749 			iter->seq.full = 0;
4750 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4751 		}
4752 		ret = trace_print_seq(m, &iter->seq);
4753 		/*
4754 		 * If we overflow the seq_file buffer, then it will
4755 		 * ask us for this data again at start up.
4756 		 * Use that instead.
4757 		 *  ret is 0 if seq_file write succeeded.
4758 		 *        -1 otherwise.
4759 		 */
4760 		iter->leftover = ret;
4761 	}
4762 
4763 	return 0;
4764 }
4765 
4766 /*
4767  * Should be used after trace_array_get(), trace_types_lock
4768  * ensures that i_cdev was already initialized.
4769  */
4770 static inline int tracing_get_cpu(struct inode *inode)
4771 {
4772 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4773 		return (long)inode->i_cdev - 1;
4774 	return RING_BUFFER_ALL_CPUS;
4775 }
4776 
4777 static const struct seq_operations tracer_seq_ops = {
4778 	.start		= s_start,
4779 	.next		= s_next,
4780 	.stop		= s_stop,
4781 	.show		= s_show,
4782 };
4783 
4784 /*
4785  * Note, as iter itself can be allocated and freed in different
4786  * ways, this function is only used to free its content, and not
4787  * the iterator itself. The only requirement to all the allocations
4788  * is that it must zero all fields (kzalloc), as freeing works with
4789  * ethier allocated content or NULL.
4790  */
4791 static void free_trace_iter_content(struct trace_iterator *iter)
4792 {
4793 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4794 	if (iter->fmt != static_fmt_buf)
4795 		kfree(iter->fmt);
4796 
4797 	kfree(iter->temp);
4798 	kfree(iter->buffer_iter);
4799 	mutex_destroy(&iter->mutex);
4800 	free_cpumask_var(iter->started);
4801 }
4802 
4803 static struct trace_iterator *
4804 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4805 {
4806 	struct trace_array *tr = inode->i_private;
4807 	struct trace_iterator *iter;
4808 	int cpu;
4809 
4810 	if (tracing_disabled)
4811 		return ERR_PTR(-ENODEV);
4812 
4813 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4814 	if (!iter)
4815 		return ERR_PTR(-ENOMEM);
4816 
4817 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4818 				    GFP_KERNEL);
4819 	if (!iter->buffer_iter)
4820 		goto release;
4821 
4822 	/*
4823 	 * trace_find_next_entry() may need to save off iter->ent.
4824 	 * It will place it into the iter->temp buffer. As most
4825 	 * events are less than 128, allocate a buffer of that size.
4826 	 * If one is greater, then trace_find_next_entry() will
4827 	 * allocate a new buffer to adjust for the bigger iter->ent.
4828 	 * It's not critical if it fails to get allocated here.
4829 	 */
4830 	iter->temp = kmalloc(128, GFP_KERNEL);
4831 	if (iter->temp)
4832 		iter->temp_size = 128;
4833 
4834 	/*
4835 	 * trace_event_printf() may need to modify given format
4836 	 * string to replace %p with %px so that it shows real address
4837 	 * instead of hash value. However, that is only for the event
4838 	 * tracing, other tracer may not need. Defer the allocation
4839 	 * until it is needed.
4840 	 */
4841 	iter->fmt = NULL;
4842 	iter->fmt_size = 0;
4843 
4844 	mutex_lock(&trace_types_lock);
4845 	iter->trace = tr->current_trace;
4846 
4847 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4848 		goto fail;
4849 
4850 	iter->tr = tr;
4851 
4852 #ifdef CONFIG_TRACER_MAX_TRACE
4853 	/* Currently only the top directory has a snapshot */
4854 	if (tr->current_trace->print_max || snapshot)
4855 		iter->array_buffer = &tr->max_buffer;
4856 	else
4857 #endif
4858 		iter->array_buffer = &tr->array_buffer;
4859 	iter->snapshot = snapshot;
4860 	iter->pos = -1;
4861 	iter->cpu_file = tracing_get_cpu(inode);
4862 	mutex_init(&iter->mutex);
4863 
4864 	/* Notify the tracer early; before we stop tracing. */
4865 	if (iter->trace->open)
4866 		iter->trace->open(iter);
4867 
4868 	/* Annotate start of buffers if we had overruns */
4869 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4870 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4871 
4872 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4873 	if (trace_clocks[tr->clock_id].in_ns)
4874 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4875 
4876 	/*
4877 	 * If pause-on-trace is enabled, then stop the trace while
4878 	 * dumping, unless this is the "snapshot" file
4879 	 */
4880 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4881 		tracing_stop_tr(tr);
4882 
4883 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4884 		for_each_tracing_cpu(cpu) {
4885 			iter->buffer_iter[cpu] =
4886 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4887 							 cpu, GFP_KERNEL);
4888 		}
4889 		ring_buffer_read_prepare_sync();
4890 		for_each_tracing_cpu(cpu) {
4891 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4892 			tracing_iter_reset(iter, cpu);
4893 		}
4894 	} else {
4895 		cpu = iter->cpu_file;
4896 		iter->buffer_iter[cpu] =
4897 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4898 						 cpu, GFP_KERNEL);
4899 		ring_buffer_read_prepare_sync();
4900 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4901 		tracing_iter_reset(iter, cpu);
4902 	}
4903 
4904 	mutex_unlock(&trace_types_lock);
4905 
4906 	return iter;
4907 
4908  fail:
4909 	mutex_unlock(&trace_types_lock);
4910 	free_trace_iter_content(iter);
4911 release:
4912 	seq_release_private(inode, file);
4913 	return ERR_PTR(-ENOMEM);
4914 }
4915 
4916 int tracing_open_generic(struct inode *inode, struct file *filp)
4917 {
4918 	int ret;
4919 
4920 	ret = tracing_check_open_get_tr(NULL);
4921 	if (ret)
4922 		return ret;
4923 
4924 	filp->private_data = inode->i_private;
4925 	return 0;
4926 }
4927 
4928 bool tracing_is_disabled(void)
4929 {
4930 	return (tracing_disabled) ? true: false;
4931 }
4932 
4933 /*
4934  * Open and update trace_array ref count.
4935  * Must have the current trace_array passed to it.
4936  */
4937 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4938 {
4939 	struct trace_array *tr = inode->i_private;
4940 	int ret;
4941 
4942 	ret = tracing_check_open_get_tr(tr);
4943 	if (ret)
4944 		return ret;
4945 
4946 	filp->private_data = inode->i_private;
4947 
4948 	return 0;
4949 }
4950 
4951 /*
4952  * The private pointer of the inode is the trace_event_file.
4953  * Update the tr ref count associated to it.
4954  */
4955 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4956 {
4957 	struct trace_event_file *file = inode->i_private;
4958 	int ret;
4959 
4960 	ret = tracing_check_open_get_tr(file->tr);
4961 	if (ret)
4962 		return ret;
4963 
4964 	mutex_lock(&event_mutex);
4965 
4966 	/* Fail if the file is marked for removal */
4967 	if (file->flags & EVENT_FILE_FL_FREED) {
4968 		trace_array_put(file->tr);
4969 		ret = -ENODEV;
4970 	} else {
4971 		event_file_get(file);
4972 	}
4973 
4974 	mutex_unlock(&event_mutex);
4975 	if (ret)
4976 		return ret;
4977 
4978 	filp->private_data = inode->i_private;
4979 
4980 	return 0;
4981 }
4982 
4983 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4984 {
4985 	struct trace_event_file *file = inode->i_private;
4986 
4987 	trace_array_put(file->tr);
4988 	event_file_put(file);
4989 
4990 	return 0;
4991 }
4992 
4993 static int tracing_mark_open(struct inode *inode, struct file *filp)
4994 {
4995 	stream_open(inode, filp);
4996 	return tracing_open_generic_tr(inode, filp);
4997 }
4998 
4999 static int tracing_release(struct inode *inode, struct file *file)
5000 {
5001 	struct trace_array *tr = inode->i_private;
5002 	struct seq_file *m = file->private_data;
5003 	struct trace_iterator *iter;
5004 	int cpu;
5005 
5006 	if (!(file->f_mode & FMODE_READ)) {
5007 		trace_array_put(tr);
5008 		return 0;
5009 	}
5010 
5011 	/* Writes do not use seq_file */
5012 	iter = m->private;
5013 	mutex_lock(&trace_types_lock);
5014 
5015 	for_each_tracing_cpu(cpu) {
5016 		if (iter->buffer_iter[cpu])
5017 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
5018 	}
5019 
5020 	if (iter->trace && iter->trace->close)
5021 		iter->trace->close(iter);
5022 
5023 	if (!iter->snapshot && tr->stop_count)
5024 		/* reenable tracing if it was previously enabled */
5025 		tracing_start_tr(tr);
5026 
5027 	__trace_array_put(tr);
5028 
5029 	mutex_unlock(&trace_types_lock);
5030 
5031 	free_trace_iter_content(iter);
5032 	seq_release_private(inode, file);
5033 
5034 	return 0;
5035 }
5036 
5037 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
5038 {
5039 	struct trace_array *tr = inode->i_private;
5040 
5041 	trace_array_put(tr);
5042 	return 0;
5043 }
5044 
5045 static int tracing_single_release_tr(struct inode *inode, struct file *file)
5046 {
5047 	struct trace_array *tr = inode->i_private;
5048 
5049 	trace_array_put(tr);
5050 
5051 	return single_release(inode, file);
5052 }
5053 
5054 static int tracing_open(struct inode *inode, struct file *file)
5055 {
5056 	struct trace_array *tr = inode->i_private;
5057 	struct trace_iterator *iter;
5058 	int ret;
5059 
5060 	ret = tracing_check_open_get_tr(tr);
5061 	if (ret)
5062 		return ret;
5063 
5064 	/* If this file was open for write, then erase contents */
5065 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5066 		int cpu = tracing_get_cpu(inode);
5067 		struct array_buffer *trace_buf = &tr->array_buffer;
5068 
5069 #ifdef CONFIG_TRACER_MAX_TRACE
5070 		if (tr->current_trace->print_max)
5071 			trace_buf = &tr->max_buffer;
5072 #endif
5073 
5074 		if (cpu == RING_BUFFER_ALL_CPUS)
5075 			tracing_reset_online_cpus(trace_buf);
5076 		else
5077 			tracing_reset_cpu(trace_buf, cpu);
5078 	}
5079 
5080 	if (file->f_mode & FMODE_READ) {
5081 		iter = __tracing_open(inode, file, false);
5082 		if (IS_ERR(iter))
5083 			ret = PTR_ERR(iter);
5084 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5085 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
5086 	}
5087 
5088 	if (ret < 0)
5089 		trace_array_put(tr);
5090 
5091 	return ret;
5092 }
5093 
5094 /*
5095  * Some tracers are not suitable for instance buffers.
5096  * A tracer is always available for the global array (toplevel)
5097  * or if it explicitly states that it is.
5098  */
5099 static bool
5100 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5101 {
5102 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5103 }
5104 
5105 /* Find the next tracer that this trace array may use */
5106 static struct tracer *
5107 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5108 {
5109 	while (t && !trace_ok_for_array(t, tr))
5110 		t = t->next;
5111 
5112 	return t;
5113 }
5114 
5115 static void *
5116 t_next(struct seq_file *m, void *v, loff_t *pos)
5117 {
5118 	struct trace_array *tr = m->private;
5119 	struct tracer *t = v;
5120 
5121 	(*pos)++;
5122 
5123 	if (t)
5124 		t = get_tracer_for_array(tr, t->next);
5125 
5126 	return t;
5127 }
5128 
5129 static void *t_start(struct seq_file *m, loff_t *pos)
5130 {
5131 	struct trace_array *tr = m->private;
5132 	struct tracer *t;
5133 	loff_t l = 0;
5134 
5135 	mutex_lock(&trace_types_lock);
5136 
5137 	t = get_tracer_for_array(tr, trace_types);
5138 	for (; t && l < *pos; t = t_next(m, t, &l))
5139 			;
5140 
5141 	return t;
5142 }
5143 
5144 static void t_stop(struct seq_file *m, void *p)
5145 {
5146 	mutex_unlock(&trace_types_lock);
5147 }
5148 
5149 static int t_show(struct seq_file *m, void *v)
5150 {
5151 	struct tracer *t = v;
5152 
5153 	if (!t)
5154 		return 0;
5155 
5156 	seq_puts(m, t->name);
5157 	if (t->next)
5158 		seq_putc(m, ' ');
5159 	else
5160 		seq_putc(m, '\n');
5161 
5162 	return 0;
5163 }
5164 
5165 static const struct seq_operations show_traces_seq_ops = {
5166 	.start		= t_start,
5167 	.next		= t_next,
5168 	.stop		= t_stop,
5169 	.show		= t_show,
5170 };
5171 
5172 static int show_traces_open(struct inode *inode, struct file *file)
5173 {
5174 	struct trace_array *tr = inode->i_private;
5175 	struct seq_file *m;
5176 	int ret;
5177 
5178 	ret = tracing_check_open_get_tr(tr);
5179 	if (ret)
5180 		return ret;
5181 
5182 	ret = seq_open(file, &show_traces_seq_ops);
5183 	if (ret) {
5184 		trace_array_put(tr);
5185 		return ret;
5186 	}
5187 
5188 	m = file->private_data;
5189 	m->private = tr;
5190 
5191 	return 0;
5192 }
5193 
5194 static int show_traces_release(struct inode *inode, struct file *file)
5195 {
5196 	struct trace_array *tr = inode->i_private;
5197 
5198 	trace_array_put(tr);
5199 	return seq_release(inode, file);
5200 }
5201 
5202 static ssize_t
5203 tracing_write_stub(struct file *filp, const char __user *ubuf,
5204 		   size_t count, loff_t *ppos)
5205 {
5206 	return count;
5207 }
5208 
5209 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5210 {
5211 	int ret;
5212 
5213 	if (file->f_mode & FMODE_READ)
5214 		ret = seq_lseek(file, offset, whence);
5215 	else
5216 		file->f_pos = ret = 0;
5217 
5218 	return ret;
5219 }
5220 
5221 static const struct file_operations tracing_fops = {
5222 	.open		= tracing_open,
5223 	.read		= seq_read,
5224 	.read_iter	= seq_read_iter,
5225 	.splice_read	= copy_splice_read,
5226 	.write		= tracing_write_stub,
5227 	.llseek		= tracing_lseek,
5228 	.release	= tracing_release,
5229 };
5230 
5231 static const struct file_operations show_traces_fops = {
5232 	.open		= show_traces_open,
5233 	.read		= seq_read,
5234 	.llseek		= seq_lseek,
5235 	.release	= show_traces_release,
5236 };
5237 
5238 static ssize_t
5239 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5240 		     size_t count, loff_t *ppos)
5241 {
5242 	struct trace_array *tr = file_inode(filp)->i_private;
5243 	char *mask_str;
5244 	int len;
5245 
5246 	len = snprintf(NULL, 0, "%*pb\n",
5247 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5248 	mask_str = kmalloc(len, GFP_KERNEL);
5249 	if (!mask_str)
5250 		return -ENOMEM;
5251 
5252 	len = snprintf(mask_str, len, "%*pb\n",
5253 		       cpumask_pr_args(tr->tracing_cpumask));
5254 	if (len >= count) {
5255 		count = -EINVAL;
5256 		goto out_err;
5257 	}
5258 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5259 
5260 out_err:
5261 	kfree(mask_str);
5262 
5263 	return count;
5264 }
5265 
5266 int tracing_set_cpumask(struct trace_array *tr,
5267 			cpumask_var_t tracing_cpumask_new)
5268 {
5269 	int cpu;
5270 
5271 	if (!tr)
5272 		return -EINVAL;
5273 
5274 	local_irq_disable();
5275 	arch_spin_lock(&tr->max_lock);
5276 	for_each_tracing_cpu(cpu) {
5277 		/*
5278 		 * Increase/decrease the disabled counter if we are
5279 		 * about to flip a bit in the cpumask:
5280 		 */
5281 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5282 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5283 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5284 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5285 #ifdef CONFIG_TRACER_MAX_TRACE
5286 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5287 #endif
5288 		}
5289 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5290 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5291 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5292 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5293 #ifdef CONFIG_TRACER_MAX_TRACE
5294 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5295 #endif
5296 		}
5297 	}
5298 	arch_spin_unlock(&tr->max_lock);
5299 	local_irq_enable();
5300 
5301 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5302 
5303 	return 0;
5304 }
5305 
5306 static ssize_t
5307 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5308 		      size_t count, loff_t *ppos)
5309 {
5310 	struct trace_array *tr = file_inode(filp)->i_private;
5311 	cpumask_var_t tracing_cpumask_new;
5312 	int err;
5313 
5314 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5315 		return -ENOMEM;
5316 
5317 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5318 	if (err)
5319 		goto err_free;
5320 
5321 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5322 	if (err)
5323 		goto err_free;
5324 
5325 	free_cpumask_var(tracing_cpumask_new);
5326 
5327 	return count;
5328 
5329 err_free:
5330 	free_cpumask_var(tracing_cpumask_new);
5331 
5332 	return err;
5333 }
5334 
5335 static const struct file_operations tracing_cpumask_fops = {
5336 	.open		= tracing_open_generic_tr,
5337 	.read		= tracing_cpumask_read,
5338 	.write		= tracing_cpumask_write,
5339 	.release	= tracing_release_generic_tr,
5340 	.llseek		= generic_file_llseek,
5341 };
5342 
5343 static int tracing_trace_options_show(struct seq_file *m, void *v)
5344 {
5345 	struct tracer_opt *trace_opts;
5346 	struct trace_array *tr = m->private;
5347 	u32 tracer_flags;
5348 	int i;
5349 
5350 	mutex_lock(&trace_types_lock);
5351 	tracer_flags = tr->current_trace->flags->val;
5352 	trace_opts = tr->current_trace->flags->opts;
5353 
5354 	for (i = 0; trace_options[i]; i++) {
5355 		if (tr->trace_flags & (1 << i))
5356 			seq_printf(m, "%s\n", trace_options[i]);
5357 		else
5358 			seq_printf(m, "no%s\n", trace_options[i]);
5359 	}
5360 
5361 	for (i = 0; trace_opts[i].name; i++) {
5362 		if (tracer_flags & trace_opts[i].bit)
5363 			seq_printf(m, "%s\n", trace_opts[i].name);
5364 		else
5365 			seq_printf(m, "no%s\n", trace_opts[i].name);
5366 	}
5367 	mutex_unlock(&trace_types_lock);
5368 
5369 	return 0;
5370 }
5371 
5372 static int __set_tracer_option(struct trace_array *tr,
5373 			       struct tracer_flags *tracer_flags,
5374 			       struct tracer_opt *opts, int neg)
5375 {
5376 	struct tracer *trace = tracer_flags->trace;
5377 	int ret;
5378 
5379 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5380 	if (ret)
5381 		return ret;
5382 
5383 	if (neg)
5384 		tracer_flags->val &= ~opts->bit;
5385 	else
5386 		tracer_flags->val |= opts->bit;
5387 	return 0;
5388 }
5389 
5390 /* Try to assign a tracer specific option */
5391 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5392 {
5393 	struct tracer *trace = tr->current_trace;
5394 	struct tracer_flags *tracer_flags = trace->flags;
5395 	struct tracer_opt *opts = NULL;
5396 	int i;
5397 
5398 	for (i = 0; tracer_flags->opts[i].name; i++) {
5399 		opts = &tracer_flags->opts[i];
5400 
5401 		if (strcmp(cmp, opts->name) == 0)
5402 			return __set_tracer_option(tr, trace->flags, opts, neg);
5403 	}
5404 
5405 	return -EINVAL;
5406 }
5407 
5408 /* Some tracers require overwrite to stay enabled */
5409 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5410 {
5411 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5412 		return -1;
5413 
5414 	return 0;
5415 }
5416 
5417 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5418 {
5419 	int *map;
5420 
5421 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5422 	    (mask == TRACE_ITER_RECORD_CMD))
5423 		lockdep_assert_held(&event_mutex);
5424 
5425 	/* do nothing if flag is already set */
5426 	if (!!(tr->trace_flags & mask) == !!enabled)
5427 		return 0;
5428 
5429 	/* Give the tracer a chance to approve the change */
5430 	if (tr->current_trace->flag_changed)
5431 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5432 			return -EINVAL;
5433 
5434 	if (enabled)
5435 		tr->trace_flags |= mask;
5436 	else
5437 		tr->trace_flags &= ~mask;
5438 
5439 	if (mask == TRACE_ITER_RECORD_CMD)
5440 		trace_event_enable_cmd_record(enabled);
5441 
5442 	if (mask == TRACE_ITER_RECORD_TGID) {
5443 		if (!tgid_map) {
5444 			tgid_map_max = pid_max;
5445 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5446 				       GFP_KERNEL);
5447 
5448 			/*
5449 			 * Pairs with smp_load_acquire() in
5450 			 * trace_find_tgid_ptr() to ensure that if it observes
5451 			 * the tgid_map we just allocated then it also observes
5452 			 * the corresponding tgid_map_max value.
5453 			 */
5454 			smp_store_release(&tgid_map, map);
5455 		}
5456 		if (!tgid_map) {
5457 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5458 			return -ENOMEM;
5459 		}
5460 
5461 		trace_event_enable_tgid_record(enabled);
5462 	}
5463 
5464 	if (mask == TRACE_ITER_EVENT_FORK)
5465 		trace_event_follow_fork(tr, enabled);
5466 
5467 	if (mask == TRACE_ITER_FUNC_FORK)
5468 		ftrace_pid_follow_fork(tr, enabled);
5469 
5470 	if (mask == TRACE_ITER_OVERWRITE) {
5471 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5472 #ifdef CONFIG_TRACER_MAX_TRACE
5473 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5474 #endif
5475 	}
5476 
5477 	if (mask == TRACE_ITER_PRINTK) {
5478 		trace_printk_start_stop_comm(enabled);
5479 		trace_printk_control(enabled);
5480 	}
5481 
5482 	return 0;
5483 }
5484 
5485 int trace_set_options(struct trace_array *tr, char *option)
5486 {
5487 	char *cmp;
5488 	int neg = 0;
5489 	int ret;
5490 	size_t orig_len = strlen(option);
5491 	int len;
5492 
5493 	cmp = strstrip(option);
5494 
5495 	len = str_has_prefix(cmp, "no");
5496 	if (len)
5497 		neg = 1;
5498 
5499 	cmp += len;
5500 
5501 	mutex_lock(&event_mutex);
5502 	mutex_lock(&trace_types_lock);
5503 
5504 	ret = match_string(trace_options, -1, cmp);
5505 	/* If no option could be set, test the specific tracer options */
5506 	if (ret < 0)
5507 		ret = set_tracer_option(tr, cmp, neg);
5508 	else
5509 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5510 
5511 	mutex_unlock(&trace_types_lock);
5512 	mutex_unlock(&event_mutex);
5513 
5514 	/*
5515 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5516 	 * turn it back into a space.
5517 	 */
5518 	if (orig_len > strlen(option))
5519 		option[strlen(option)] = ' ';
5520 
5521 	return ret;
5522 }
5523 
5524 static void __init apply_trace_boot_options(void)
5525 {
5526 	char *buf = trace_boot_options_buf;
5527 	char *option;
5528 
5529 	while (true) {
5530 		option = strsep(&buf, ",");
5531 
5532 		if (!option)
5533 			break;
5534 
5535 		if (*option)
5536 			trace_set_options(&global_trace, option);
5537 
5538 		/* Put back the comma to allow this to be called again */
5539 		if (buf)
5540 			*(buf - 1) = ',';
5541 	}
5542 }
5543 
5544 static ssize_t
5545 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5546 			size_t cnt, loff_t *ppos)
5547 {
5548 	struct seq_file *m = filp->private_data;
5549 	struct trace_array *tr = m->private;
5550 	char buf[64];
5551 	int ret;
5552 
5553 	if (cnt >= sizeof(buf))
5554 		return -EINVAL;
5555 
5556 	if (copy_from_user(buf, ubuf, cnt))
5557 		return -EFAULT;
5558 
5559 	buf[cnt] = 0;
5560 
5561 	ret = trace_set_options(tr, buf);
5562 	if (ret < 0)
5563 		return ret;
5564 
5565 	*ppos += cnt;
5566 
5567 	return cnt;
5568 }
5569 
5570 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5571 {
5572 	struct trace_array *tr = inode->i_private;
5573 	int ret;
5574 
5575 	ret = tracing_check_open_get_tr(tr);
5576 	if (ret)
5577 		return ret;
5578 
5579 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5580 	if (ret < 0)
5581 		trace_array_put(tr);
5582 
5583 	return ret;
5584 }
5585 
5586 static const struct file_operations tracing_iter_fops = {
5587 	.open		= tracing_trace_options_open,
5588 	.read		= seq_read,
5589 	.llseek		= seq_lseek,
5590 	.release	= tracing_single_release_tr,
5591 	.write		= tracing_trace_options_write,
5592 };
5593 
5594 static const char readme_msg[] =
5595 	"tracing mini-HOWTO:\n\n"
5596 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5597 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5598 	" Important files:\n"
5599 	"  trace\t\t\t- The static contents of the buffer\n"
5600 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5601 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5602 	"  current_tracer\t- function and latency tracers\n"
5603 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5604 	"  error_log\t- error log for failed commands (that support it)\n"
5605 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5606 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5607 	"  trace_clock\t\t- change the clock used to order events\n"
5608 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5609 	"      global:   Synced across CPUs but slows tracing down.\n"
5610 	"     counter:   Not a clock, but just an increment\n"
5611 	"      uptime:   Jiffy counter from time of boot\n"
5612 	"        perf:   Same clock that perf events use\n"
5613 #ifdef CONFIG_X86_64
5614 	"     x86-tsc:   TSC cycle counter\n"
5615 #endif
5616 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5617 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5618 	"    absolute:   Absolute (standalone) timestamp\n"
5619 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5620 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5621 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5622 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5623 	"\t\t\t  Remove sub-buffer with rmdir\n"
5624 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5625 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5626 	"\t\t\t  option name\n"
5627 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5628 #ifdef CONFIG_DYNAMIC_FTRACE
5629 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5630 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5631 	"\t\t\t  functions\n"
5632 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5633 	"\t     modules: Can select a group via module\n"
5634 	"\t      Format: :mod:<module-name>\n"
5635 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5636 	"\t    triggers: a command to perform when function is hit\n"
5637 	"\t      Format: <function>:<trigger>[:count]\n"
5638 	"\t     trigger: traceon, traceoff\n"
5639 	"\t\t      enable_event:<system>:<event>\n"
5640 	"\t\t      disable_event:<system>:<event>\n"
5641 #ifdef CONFIG_STACKTRACE
5642 	"\t\t      stacktrace\n"
5643 #endif
5644 #ifdef CONFIG_TRACER_SNAPSHOT
5645 	"\t\t      snapshot\n"
5646 #endif
5647 	"\t\t      dump\n"
5648 	"\t\t      cpudump\n"
5649 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5650 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5651 	"\t     The first one will disable tracing every time do_fault is hit\n"
5652 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5653 	"\t       The first time do trap is hit and it disables tracing, the\n"
5654 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5655 	"\t       the counter will not decrement. It only decrements when the\n"
5656 	"\t       trigger did work\n"
5657 	"\t     To remove trigger without count:\n"
5658 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5659 	"\t     To remove trigger with a count:\n"
5660 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5661 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5662 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5663 	"\t    modules: Can select a group via module command :mod:\n"
5664 	"\t    Does not accept triggers\n"
5665 #endif /* CONFIG_DYNAMIC_FTRACE */
5666 #ifdef CONFIG_FUNCTION_TRACER
5667 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5668 	"\t\t    (function)\n"
5669 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5670 	"\t\t    (function)\n"
5671 #endif
5672 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5673 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5674 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5675 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5676 #endif
5677 #ifdef CONFIG_TRACER_SNAPSHOT
5678 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5679 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5680 	"\t\t\t  information\n"
5681 #endif
5682 #ifdef CONFIG_STACK_TRACER
5683 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5684 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5685 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5686 	"\t\t\t  new trace)\n"
5687 #ifdef CONFIG_DYNAMIC_FTRACE
5688 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5689 	"\t\t\t  traces\n"
5690 #endif
5691 #endif /* CONFIG_STACK_TRACER */
5692 #ifdef CONFIG_DYNAMIC_EVENTS
5693 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5694 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5695 #endif
5696 #ifdef CONFIG_KPROBE_EVENTS
5697 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5698 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5699 #endif
5700 #ifdef CONFIG_UPROBE_EVENTS
5701 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5702 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5703 #endif
5704 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5705     defined(CONFIG_FPROBE_EVENTS)
5706 	"\t  accepts: event-definitions (one definition per line)\n"
5707 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5708 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5709 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5710 #endif
5711 #ifdef CONFIG_FPROBE_EVENTS
5712 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5713 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5714 #endif
5715 #ifdef CONFIG_HIST_TRIGGERS
5716 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5717 #endif
5718 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5719 	"\t           -:[<group>/][<event>]\n"
5720 #ifdef CONFIG_KPROBE_EVENTS
5721 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5722   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5723 #endif
5724 #ifdef CONFIG_UPROBE_EVENTS
5725   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5726 #endif
5727 	"\t     args: <name>=fetcharg[:type]\n"
5728 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5729 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5730 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5731 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5732 	"\t           <argname>[->field[->field|.field...]],\n"
5733 #else
5734 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5735 #endif
5736 #else
5737 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5738 #endif
5739 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5740 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5741 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5742 	"\t           symstr, <type>\\[<array-size>\\]\n"
5743 #ifdef CONFIG_HIST_TRIGGERS
5744 	"\t    field: <stype> <name>;\n"
5745 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5746 	"\t           [unsigned] char/int/long\n"
5747 #endif
5748 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5749 	"\t            of the <attached-group>/<attached-event>.\n"
5750 #endif
5751 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5752 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5753 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5754 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5755 	"\t\t\t  events\n"
5756 	"      filter\t\t- If set, only events passing filter are traced\n"
5757 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5758 	"\t\t\t  <event>:\n"
5759 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5760 	"      filter\t\t- If set, only events passing filter are traced\n"
5761 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5762 	"\t    Format: <trigger>[:count][if <filter>]\n"
5763 	"\t   trigger: traceon, traceoff\n"
5764 	"\t            enable_event:<system>:<event>\n"
5765 	"\t            disable_event:<system>:<event>\n"
5766 #ifdef CONFIG_HIST_TRIGGERS
5767 	"\t            enable_hist:<system>:<event>\n"
5768 	"\t            disable_hist:<system>:<event>\n"
5769 #endif
5770 #ifdef CONFIG_STACKTRACE
5771 	"\t\t    stacktrace\n"
5772 #endif
5773 #ifdef CONFIG_TRACER_SNAPSHOT
5774 	"\t\t    snapshot\n"
5775 #endif
5776 #ifdef CONFIG_HIST_TRIGGERS
5777 	"\t\t    hist (see below)\n"
5778 #endif
5779 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5780 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5781 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5782 	"\t                  events/block/block_unplug/trigger\n"
5783 	"\t   The first disables tracing every time block_unplug is hit.\n"
5784 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5785 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5786 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5787 	"\t   Like function triggers, the counter is only decremented if it\n"
5788 	"\t    enabled or disabled tracing.\n"
5789 	"\t   To remove a trigger without a count:\n"
5790 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5791 	"\t   To remove a trigger with a count:\n"
5792 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5793 	"\t   Filters can be ignored when removing a trigger.\n"
5794 #ifdef CONFIG_HIST_TRIGGERS
5795 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5796 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5797 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5798 	"\t            [:values=<field1[,field2,...]>]\n"
5799 	"\t            [:sort=<field1[,field2,...]>]\n"
5800 	"\t            [:size=#entries]\n"
5801 	"\t            [:pause][:continue][:clear]\n"
5802 	"\t            [:name=histname1]\n"
5803 	"\t            [:nohitcount]\n"
5804 	"\t            [:<handler>.<action>]\n"
5805 	"\t            [if <filter>]\n\n"
5806 	"\t    Note, special fields can be used as well:\n"
5807 	"\t            common_timestamp - to record current timestamp\n"
5808 	"\t            common_cpu - to record the CPU the event happened on\n"
5809 	"\n"
5810 	"\t    A hist trigger variable can be:\n"
5811 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5812 	"\t        - a reference to another variable e.g. y=$x,\n"
5813 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5814 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5815 	"\n"
5816 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5817 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5818 	"\t    variable reference, field or numeric literal.\n"
5819 	"\n"
5820 	"\t    When a matching event is hit, an entry is added to a hash\n"
5821 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5822 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5823 	"\t    correspond to fields in the event's format description.  Keys\n"
5824 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5825 	"\t    Compound keys consisting of up to two fields can be specified\n"
5826 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5827 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5828 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5829 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5830 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5831 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5832 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5833 	"\t    its histogram data will be shared with other triggers of the\n"
5834 	"\t    same name, and trigger hits will update this common data.\n\n"
5835 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5836 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5837 	"\t    triggers attached to an event, there will be a table for each\n"
5838 	"\t    trigger in the output.  The table displayed for a named\n"
5839 	"\t    trigger will be the same as any other instance having the\n"
5840 	"\t    same name.  The default format used to display a given field\n"
5841 	"\t    can be modified by appending any of the following modifiers\n"
5842 	"\t    to the field name, as applicable:\n\n"
5843 	"\t            .hex        display a number as a hex value\n"
5844 	"\t            .sym        display an address as a symbol\n"
5845 	"\t            .sym-offset display an address as a symbol and offset\n"
5846 	"\t            .execname   display a common_pid as a program name\n"
5847 	"\t            .syscall    display a syscall id as a syscall name\n"
5848 	"\t            .log2       display log2 value rather than raw number\n"
5849 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5850 	"\t            .usecs      display a common_timestamp in microseconds\n"
5851 	"\t            .percent    display a number of percentage value\n"
5852 	"\t            .graph      display a bar-graph of a value\n\n"
5853 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5854 	"\t    trigger or to start a hist trigger but not log any events\n"
5855 	"\t    until told to do so.  'continue' can be used to start or\n"
5856 	"\t    restart a paused hist trigger.\n\n"
5857 	"\t    The 'clear' parameter will clear the contents of a running\n"
5858 	"\t    hist trigger and leave its current paused/active state\n"
5859 	"\t    unchanged.\n\n"
5860 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5861 	"\t    raw hitcount in the histogram.\n\n"
5862 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5863 	"\t    have one event conditionally start and stop another event's\n"
5864 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5865 	"\t    the enable_event and disable_event triggers.\n\n"
5866 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5867 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5868 	"\t        <handler>.<action>\n\n"
5869 	"\t    The available handlers are:\n\n"
5870 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5871 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5872 	"\t        onchange(var)            - invoke action if var changes\n\n"
5873 	"\t    The available actions are:\n\n"
5874 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5875 	"\t        save(field,...)                      - save current event fields\n"
5876 #ifdef CONFIG_TRACER_SNAPSHOT
5877 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5878 #endif
5879 #ifdef CONFIG_SYNTH_EVENTS
5880 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5881 	"\t  Write into this file to define/undefine new synthetic events.\n"
5882 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5883 #endif
5884 #endif
5885 ;
5886 
5887 static ssize_t
5888 tracing_readme_read(struct file *filp, char __user *ubuf,
5889 		       size_t cnt, loff_t *ppos)
5890 {
5891 	return simple_read_from_buffer(ubuf, cnt, ppos,
5892 					readme_msg, strlen(readme_msg));
5893 }
5894 
5895 static const struct file_operations tracing_readme_fops = {
5896 	.open		= tracing_open_generic,
5897 	.read		= tracing_readme_read,
5898 	.llseek		= generic_file_llseek,
5899 };
5900 
5901 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5902 {
5903 	int pid = ++(*pos);
5904 
5905 	return trace_find_tgid_ptr(pid);
5906 }
5907 
5908 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5909 {
5910 	int pid = *pos;
5911 
5912 	return trace_find_tgid_ptr(pid);
5913 }
5914 
5915 static void saved_tgids_stop(struct seq_file *m, void *v)
5916 {
5917 }
5918 
5919 static int saved_tgids_show(struct seq_file *m, void *v)
5920 {
5921 	int *entry = (int *)v;
5922 	int pid = entry - tgid_map;
5923 	int tgid = *entry;
5924 
5925 	if (tgid == 0)
5926 		return SEQ_SKIP;
5927 
5928 	seq_printf(m, "%d %d\n", pid, tgid);
5929 	return 0;
5930 }
5931 
5932 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5933 	.start		= saved_tgids_start,
5934 	.stop		= saved_tgids_stop,
5935 	.next		= saved_tgids_next,
5936 	.show		= saved_tgids_show,
5937 };
5938 
5939 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5940 {
5941 	int ret;
5942 
5943 	ret = tracing_check_open_get_tr(NULL);
5944 	if (ret)
5945 		return ret;
5946 
5947 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5948 }
5949 
5950 
5951 static const struct file_operations tracing_saved_tgids_fops = {
5952 	.open		= tracing_saved_tgids_open,
5953 	.read		= seq_read,
5954 	.llseek		= seq_lseek,
5955 	.release	= seq_release,
5956 };
5957 
5958 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5959 {
5960 	unsigned int *ptr = v;
5961 
5962 	if (*pos || m->count)
5963 		ptr++;
5964 
5965 	(*pos)++;
5966 
5967 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5968 	     ptr++) {
5969 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5970 			continue;
5971 
5972 		return ptr;
5973 	}
5974 
5975 	return NULL;
5976 }
5977 
5978 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5979 {
5980 	void *v;
5981 	loff_t l = 0;
5982 
5983 	preempt_disable();
5984 	arch_spin_lock(&trace_cmdline_lock);
5985 
5986 	v = &savedcmd->map_cmdline_to_pid[0];
5987 	while (l <= *pos) {
5988 		v = saved_cmdlines_next(m, v, &l);
5989 		if (!v)
5990 			return NULL;
5991 	}
5992 
5993 	return v;
5994 }
5995 
5996 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5997 {
5998 	arch_spin_unlock(&trace_cmdline_lock);
5999 	preempt_enable();
6000 }
6001 
6002 static int saved_cmdlines_show(struct seq_file *m, void *v)
6003 {
6004 	char buf[TASK_COMM_LEN];
6005 	unsigned int *pid = v;
6006 
6007 	__trace_find_cmdline(*pid, buf);
6008 	seq_printf(m, "%d %s\n", *pid, buf);
6009 	return 0;
6010 }
6011 
6012 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
6013 	.start		= saved_cmdlines_start,
6014 	.next		= saved_cmdlines_next,
6015 	.stop		= saved_cmdlines_stop,
6016 	.show		= saved_cmdlines_show,
6017 };
6018 
6019 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
6020 {
6021 	int ret;
6022 
6023 	ret = tracing_check_open_get_tr(NULL);
6024 	if (ret)
6025 		return ret;
6026 
6027 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
6028 }
6029 
6030 static const struct file_operations tracing_saved_cmdlines_fops = {
6031 	.open		= tracing_saved_cmdlines_open,
6032 	.read		= seq_read,
6033 	.llseek		= seq_lseek,
6034 	.release	= seq_release,
6035 };
6036 
6037 static ssize_t
6038 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
6039 				 size_t cnt, loff_t *ppos)
6040 {
6041 	char buf[64];
6042 	int r;
6043 
6044 	preempt_disable();
6045 	arch_spin_lock(&trace_cmdline_lock);
6046 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
6047 	arch_spin_unlock(&trace_cmdline_lock);
6048 	preempt_enable();
6049 
6050 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6051 }
6052 
6053 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
6054 {
6055 	kfree(s->saved_cmdlines);
6056 	kfree(s->map_cmdline_to_pid);
6057 	kfree(s);
6058 }
6059 
6060 static int tracing_resize_saved_cmdlines(unsigned int val)
6061 {
6062 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
6063 
6064 	s = kmalloc(sizeof(*s), GFP_KERNEL);
6065 	if (!s)
6066 		return -ENOMEM;
6067 
6068 	if (allocate_cmdlines_buffer(val, s) < 0) {
6069 		kfree(s);
6070 		return -ENOMEM;
6071 	}
6072 
6073 	preempt_disable();
6074 	arch_spin_lock(&trace_cmdline_lock);
6075 	savedcmd_temp = savedcmd;
6076 	savedcmd = s;
6077 	arch_spin_unlock(&trace_cmdline_lock);
6078 	preempt_enable();
6079 	free_saved_cmdlines_buffer(savedcmd_temp);
6080 
6081 	return 0;
6082 }
6083 
6084 static ssize_t
6085 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6086 				  size_t cnt, loff_t *ppos)
6087 {
6088 	unsigned long val;
6089 	int ret;
6090 
6091 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6092 	if (ret)
6093 		return ret;
6094 
6095 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
6096 	if (!val || val > PID_MAX_DEFAULT)
6097 		return -EINVAL;
6098 
6099 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
6100 	if (ret < 0)
6101 		return ret;
6102 
6103 	*ppos += cnt;
6104 
6105 	return cnt;
6106 }
6107 
6108 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6109 	.open		= tracing_open_generic,
6110 	.read		= tracing_saved_cmdlines_size_read,
6111 	.write		= tracing_saved_cmdlines_size_write,
6112 };
6113 
6114 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6115 static union trace_eval_map_item *
6116 update_eval_map(union trace_eval_map_item *ptr)
6117 {
6118 	if (!ptr->map.eval_string) {
6119 		if (ptr->tail.next) {
6120 			ptr = ptr->tail.next;
6121 			/* Set ptr to the next real item (skip head) */
6122 			ptr++;
6123 		} else
6124 			return NULL;
6125 	}
6126 	return ptr;
6127 }
6128 
6129 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6130 {
6131 	union trace_eval_map_item *ptr = v;
6132 
6133 	/*
6134 	 * Paranoid! If ptr points to end, we don't want to increment past it.
6135 	 * This really should never happen.
6136 	 */
6137 	(*pos)++;
6138 	ptr = update_eval_map(ptr);
6139 	if (WARN_ON_ONCE(!ptr))
6140 		return NULL;
6141 
6142 	ptr++;
6143 	ptr = update_eval_map(ptr);
6144 
6145 	return ptr;
6146 }
6147 
6148 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6149 {
6150 	union trace_eval_map_item *v;
6151 	loff_t l = 0;
6152 
6153 	mutex_lock(&trace_eval_mutex);
6154 
6155 	v = trace_eval_maps;
6156 	if (v)
6157 		v++;
6158 
6159 	while (v && l < *pos) {
6160 		v = eval_map_next(m, v, &l);
6161 	}
6162 
6163 	return v;
6164 }
6165 
6166 static void eval_map_stop(struct seq_file *m, void *v)
6167 {
6168 	mutex_unlock(&trace_eval_mutex);
6169 }
6170 
6171 static int eval_map_show(struct seq_file *m, void *v)
6172 {
6173 	union trace_eval_map_item *ptr = v;
6174 
6175 	seq_printf(m, "%s %ld (%s)\n",
6176 		   ptr->map.eval_string, ptr->map.eval_value,
6177 		   ptr->map.system);
6178 
6179 	return 0;
6180 }
6181 
6182 static const struct seq_operations tracing_eval_map_seq_ops = {
6183 	.start		= eval_map_start,
6184 	.next		= eval_map_next,
6185 	.stop		= eval_map_stop,
6186 	.show		= eval_map_show,
6187 };
6188 
6189 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6190 {
6191 	int ret;
6192 
6193 	ret = tracing_check_open_get_tr(NULL);
6194 	if (ret)
6195 		return ret;
6196 
6197 	return seq_open(filp, &tracing_eval_map_seq_ops);
6198 }
6199 
6200 static const struct file_operations tracing_eval_map_fops = {
6201 	.open		= tracing_eval_map_open,
6202 	.read		= seq_read,
6203 	.llseek		= seq_lseek,
6204 	.release	= seq_release,
6205 };
6206 
6207 static inline union trace_eval_map_item *
6208 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6209 {
6210 	/* Return tail of array given the head */
6211 	return ptr + ptr->head.length + 1;
6212 }
6213 
6214 static void
6215 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6216 			   int len)
6217 {
6218 	struct trace_eval_map **stop;
6219 	struct trace_eval_map **map;
6220 	union trace_eval_map_item *map_array;
6221 	union trace_eval_map_item *ptr;
6222 
6223 	stop = start + len;
6224 
6225 	/*
6226 	 * The trace_eval_maps contains the map plus a head and tail item,
6227 	 * where the head holds the module and length of array, and the
6228 	 * tail holds a pointer to the next list.
6229 	 */
6230 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6231 	if (!map_array) {
6232 		pr_warn("Unable to allocate trace eval mapping\n");
6233 		return;
6234 	}
6235 
6236 	mutex_lock(&trace_eval_mutex);
6237 
6238 	if (!trace_eval_maps)
6239 		trace_eval_maps = map_array;
6240 	else {
6241 		ptr = trace_eval_maps;
6242 		for (;;) {
6243 			ptr = trace_eval_jmp_to_tail(ptr);
6244 			if (!ptr->tail.next)
6245 				break;
6246 			ptr = ptr->tail.next;
6247 
6248 		}
6249 		ptr->tail.next = map_array;
6250 	}
6251 	map_array->head.mod = mod;
6252 	map_array->head.length = len;
6253 	map_array++;
6254 
6255 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6256 		map_array->map = **map;
6257 		map_array++;
6258 	}
6259 	memset(map_array, 0, sizeof(*map_array));
6260 
6261 	mutex_unlock(&trace_eval_mutex);
6262 }
6263 
6264 static void trace_create_eval_file(struct dentry *d_tracer)
6265 {
6266 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6267 			  NULL, &tracing_eval_map_fops);
6268 }
6269 
6270 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6271 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6272 static inline void trace_insert_eval_map_file(struct module *mod,
6273 			      struct trace_eval_map **start, int len) { }
6274 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6275 
6276 static void trace_insert_eval_map(struct module *mod,
6277 				  struct trace_eval_map **start, int len)
6278 {
6279 	struct trace_eval_map **map;
6280 
6281 	if (len <= 0)
6282 		return;
6283 
6284 	map = start;
6285 
6286 	trace_event_eval_update(map, len);
6287 
6288 	trace_insert_eval_map_file(mod, start, len);
6289 }
6290 
6291 static ssize_t
6292 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6293 		       size_t cnt, loff_t *ppos)
6294 {
6295 	struct trace_array *tr = filp->private_data;
6296 	char buf[MAX_TRACER_SIZE+2];
6297 	int r;
6298 
6299 	mutex_lock(&trace_types_lock);
6300 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6301 	mutex_unlock(&trace_types_lock);
6302 
6303 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6304 }
6305 
6306 int tracer_init(struct tracer *t, struct trace_array *tr)
6307 {
6308 	tracing_reset_online_cpus(&tr->array_buffer);
6309 	return t->init(tr);
6310 }
6311 
6312 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6313 {
6314 	int cpu;
6315 
6316 	for_each_tracing_cpu(cpu)
6317 		per_cpu_ptr(buf->data, cpu)->entries = val;
6318 }
6319 
6320 static void update_buffer_entries(struct array_buffer *buf, int cpu)
6321 {
6322 	if (cpu == RING_BUFFER_ALL_CPUS) {
6323 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6324 	} else {
6325 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6326 	}
6327 }
6328 
6329 #ifdef CONFIG_TRACER_MAX_TRACE
6330 /* resize @tr's buffer to the size of @size_tr's entries */
6331 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6332 					struct array_buffer *size_buf, int cpu_id)
6333 {
6334 	int cpu, ret = 0;
6335 
6336 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6337 		for_each_tracing_cpu(cpu) {
6338 			ret = ring_buffer_resize(trace_buf->buffer,
6339 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6340 			if (ret < 0)
6341 				break;
6342 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6343 				per_cpu_ptr(size_buf->data, cpu)->entries;
6344 		}
6345 	} else {
6346 		ret = ring_buffer_resize(trace_buf->buffer,
6347 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6348 		if (ret == 0)
6349 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6350 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6351 	}
6352 
6353 	return ret;
6354 }
6355 #endif /* CONFIG_TRACER_MAX_TRACE */
6356 
6357 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6358 					unsigned long size, int cpu)
6359 {
6360 	int ret;
6361 
6362 	/*
6363 	 * If kernel or user changes the size of the ring buffer
6364 	 * we use the size that was given, and we can forget about
6365 	 * expanding it later.
6366 	 */
6367 	ring_buffer_expanded = true;
6368 
6369 	/* May be called before buffers are initialized */
6370 	if (!tr->array_buffer.buffer)
6371 		return 0;
6372 
6373 	/* Do not allow tracing while resizing ring buffer */
6374 	tracing_stop_tr(tr);
6375 
6376 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6377 	if (ret < 0)
6378 		goto out_start;
6379 
6380 #ifdef CONFIG_TRACER_MAX_TRACE
6381 	if (!tr->allocated_snapshot)
6382 		goto out;
6383 
6384 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6385 	if (ret < 0) {
6386 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6387 						     &tr->array_buffer, cpu);
6388 		if (r < 0) {
6389 			/*
6390 			 * AARGH! We are left with different
6391 			 * size max buffer!!!!
6392 			 * The max buffer is our "snapshot" buffer.
6393 			 * When a tracer needs a snapshot (one of the
6394 			 * latency tracers), it swaps the max buffer
6395 			 * with the saved snap shot. We succeeded to
6396 			 * update the size of the main buffer, but failed to
6397 			 * update the size of the max buffer. But when we tried
6398 			 * to reset the main buffer to the original size, we
6399 			 * failed there too. This is very unlikely to
6400 			 * happen, but if it does, warn and kill all
6401 			 * tracing.
6402 			 */
6403 			WARN_ON(1);
6404 			tracing_disabled = 1;
6405 		}
6406 		goto out_start;
6407 	}
6408 
6409 	update_buffer_entries(&tr->max_buffer, cpu);
6410 
6411  out:
6412 #endif /* CONFIG_TRACER_MAX_TRACE */
6413 
6414 	update_buffer_entries(&tr->array_buffer, cpu);
6415  out_start:
6416 	tracing_start_tr(tr);
6417 	return ret;
6418 }
6419 
6420 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6421 				  unsigned long size, int cpu_id)
6422 {
6423 	int ret;
6424 
6425 	mutex_lock(&trace_types_lock);
6426 
6427 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6428 		/* make sure, this cpu is enabled in the mask */
6429 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6430 			ret = -EINVAL;
6431 			goto out;
6432 		}
6433 	}
6434 
6435 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6436 	if (ret < 0)
6437 		ret = -ENOMEM;
6438 
6439 out:
6440 	mutex_unlock(&trace_types_lock);
6441 
6442 	return ret;
6443 }
6444 
6445 
6446 /**
6447  * tracing_update_buffers - used by tracing facility to expand ring buffers
6448  *
6449  * To save on memory when the tracing is never used on a system with it
6450  * configured in. The ring buffers are set to a minimum size. But once
6451  * a user starts to use the tracing facility, then they need to grow
6452  * to their default size.
6453  *
6454  * This function is to be called when a tracer is about to be used.
6455  */
6456 int tracing_update_buffers(void)
6457 {
6458 	int ret = 0;
6459 
6460 	mutex_lock(&trace_types_lock);
6461 	if (!ring_buffer_expanded)
6462 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6463 						RING_BUFFER_ALL_CPUS);
6464 	mutex_unlock(&trace_types_lock);
6465 
6466 	return ret;
6467 }
6468 
6469 struct trace_option_dentry;
6470 
6471 static void
6472 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6473 
6474 /*
6475  * Used to clear out the tracer before deletion of an instance.
6476  * Must have trace_types_lock held.
6477  */
6478 static void tracing_set_nop(struct trace_array *tr)
6479 {
6480 	if (tr->current_trace == &nop_trace)
6481 		return;
6482 
6483 	tr->current_trace->enabled--;
6484 
6485 	if (tr->current_trace->reset)
6486 		tr->current_trace->reset(tr);
6487 
6488 	tr->current_trace = &nop_trace;
6489 }
6490 
6491 static bool tracer_options_updated;
6492 
6493 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6494 {
6495 	/* Only enable if the directory has been created already. */
6496 	if (!tr->dir)
6497 		return;
6498 
6499 	/* Only create trace option files after update_tracer_options finish */
6500 	if (!tracer_options_updated)
6501 		return;
6502 
6503 	create_trace_option_files(tr, t);
6504 }
6505 
6506 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6507 {
6508 	struct tracer *t;
6509 #ifdef CONFIG_TRACER_MAX_TRACE
6510 	bool had_max_tr;
6511 #endif
6512 	int ret = 0;
6513 
6514 	mutex_lock(&trace_types_lock);
6515 
6516 	if (!ring_buffer_expanded) {
6517 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6518 						RING_BUFFER_ALL_CPUS);
6519 		if (ret < 0)
6520 			goto out;
6521 		ret = 0;
6522 	}
6523 
6524 	for (t = trace_types; t; t = t->next) {
6525 		if (strcmp(t->name, buf) == 0)
6526 			break;
6527 	}
6528 	if (!t) {
6529 		ret = -EINVAL;
6530 		goto out;
6531 	}
6532 	if (t == tr->current_trace)
6533 		goto out;
6534 
6535 #ifdef CONFIG_TRACER_SNAPSHOT
6536 	if (t->use_max_tr) {
6537 		local_irq_disable();
6538 		arch_spin_lock(&tr->max_lock);
6539 		if (tr->cond_snapshot)
6540 			ret = -EBUSY;
6541 		arch_spin_unlock(&tr->max_lock);
6542 		local_irq_enable();
6543 		if (ret)
6544 			goto out;
6545 	}
6546 #endif
6547 	/* Some tracers won't work on kernel command line */
6548 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6549 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6550 			t->name);
6551 		goto out;
6552 	}
6553 
6554 	/* Some tracers are only allowed for the top level buffer */
6555 	if (!trace_ok_for_array(t, tr)) {
6556 		ret = -EINVAL;
6557 		goto out;
6558 	}
6559 
6560 	/* If trace pipe files are being read, we can't change the tracer */
6561 	if (tr->trace_ref) {
6562 		ret = -EBUSY;
6563 		goto out;
6564 	}
6565 
6566 	trace_branch_disable();
6567 
6568 	tr->current_trace->enabled--;
6569 
6570 	if (tr->current_trace->reset)
6571 		tr->current_trace->reset(tr);
6572 
6573 #ifdef CONFIG_TRACER_MAX_TRACE
6574 	had_max_tr = tr->current_trace->use_max_tr;
6575 
6576 	/* Current trace needs to be nop_trace before synchronize_rcu */
6577 	tr->current_trace = &nop_trace;
6578 
6579 	if (had_max_tr && !t->use_max_tr) {
6580 		/*
6581 		 * We need to make sure that the update_max_tr sees that
6582 		 * current_trace changed to nop_trace to keep it from
6583 		 * swapping the buffers after we resize it.
6584 		 * The update_max_tr is called from interrupts disabled
6585 		 * so a synchronized_sched() is sufficient.
6586 		 */
6587 		synchronize_rcu();
6588 		free_snapshot(tr);
6589 	}
6590 
6591 	if (t->use_max_tr && !tr->allocated_snapshot) {
6592 		ret = tracing_alloc_snapshot_instance(tr);
6593 		if (ret < 0)
6594 			goto out;
6595 	}
6596 #else
6597 	tr->current_trace = &nop_trace;
6598 #endif
6599 
6600 	if (t->init) {
6601 		ret = tracer_init(t, tr);
6602 		if (ret)
6603 			goto out;
6604 	}
6605 
6606 	tr->current_trace = t;
6607 	tr->current_trace->enabled++;
6608 	trace_branch_enable(tr);
6609  out:
6610 	mutex_unlock(&trace_types_lock);
6611 
6612 	return ret;
6613 }
6614 
6615 static ssize_t
6616 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6617 			size_t cnt, loff_t *ppos)
6618 {
6619 	struct trace_array *tr = filp->private_data;
6620 	char buf[MAX_TRACER_SIZE+1];
6621 	char *name;
6622 	size_t ret;
6623 	int err;
6624 
6625 	ret = cnt;
6626 
6627 	if (cnt > MAX_TRACER_SIZE)
6628 		cnt = MAX_TRACER_SIZE;
6629 
6630 	if (copy_from_user(buf, ubuf, cnt))
6631 		return -EFAULT;
6632 
6633 	buf[cnt] = 0;
6634 
6635 	name = strim(buf);
6636 
6637 	err = tracing_set_tracer(tr, name);
6638 	if (err)
6639 		return err;
6640 
6641 	*ppos += ret;
6642 
6643 	return ret;
6644 }
6645 
6646 static ssize_t
6647 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6648 		   size_t cnt, loff_t *ppos)
6649 {
6650 	char buf[64];
6651 	int r;
6652 
6653 	r = snprintf(buf, sizeof(buf), "%ld\n",
6654 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6655 	if (r > sizeof(buf))
6656 		r = sizeof(buf);
6657 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6658 }
6659 
6660 static ssize_t
6661 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6662 		    size_t cnt, loff_t *ppos)
6663 {
6664 	unsigned long val;
6665 	int ret;
6666 
6667 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6668 	if (ret)
6669 		return ret;
6670 
6671 	*ptr = val * 1000;
6672 
6673 	return cnt;
6674 }
6675 
6676 static ssize_t
6677 tracing_thresh_read(struct file *filp, char __user *ubuf,
6678 		    size_t cnt, loff_t *ppos)
6679 {
6680 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6681 }
6682 
6683 static ssize_t
6684 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6685 		     size_t cnt, loff_t *ppos)
6686 {
6687 	struct trace_array *tr = filp->private_data;
6688 	int ret;
6689 
6690 	mutex_lock(&trace_types_lock);
6691 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6692 	if (ret < 0)
6693 		goto out;
6694 
6695 	if (tr->current_trace->update_thresh) {
6696 		ret = tr->current_trace->update_thresh(tr);
6697 		if (ret < 0)
6698 			goto out;
6699 	}
6700 
6701 	ret = cnt;
6702 out:
6703 	mutex_unlock(&trace_types_lock);
6704 
6705 	return ret;
6706 }
6707 
6708 #ifdef CONFIG_TRACER_MAX_TRACE
6709 
6710 static ssize_t
6711 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6712 		     size_t cnt, loff_t *ppos)
6713 {
6714 	struct trace_array *tr = filp->private_data;
6715 
6716 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6717 }
6718 
6719 static ssize_t
6720 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6721 		      size_t cnt, loff_t *ppos)
6722 {
6723 	struct trace_array *tr = filp->private_data;
6724 
6725 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6726 }
6727 
6728 #endif
6729 
6730 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6731 {
6732 	if (cpu == RING_BUFFER_ALL_CPUS) {
6733 		if (cpumask_empty(tr->pipe_cpumask)) {
6734 			cpumask_setall(tr->pipe_cpumask);
6735 			return 0;
6736 		}
6737 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6738 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6739 		return 0;
6740 	}
6741 	return -EBUSY;
6742 }
6743 
6744 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6745 {
6746 	if (cpu == RING_BUFFER_ALL_CPUS) {
6747 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6748 		cpumask_clear(tr->pipe_cpumask);
6749 	} else {
6750 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6751 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6752 	}
6753 }
6754 
6755 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6756 {
6757 	struct trace_array *tr = inode->i_private;
6758 	struct trace_iterator *iter;
6759 	int cpu;
6760 	int ret;
6761 
6762 	ret = tracing_check_open_get_tr(tr);
6763 	if (ret)
6764 		return ret;
6765 
6766 	mutex_lock(&trace_types_lock);
6767 	cpu = tracing_get_cpu(inode);
6768 	ret = open_pipe_on_cpu(tr, cpu);
6769 	if (ret)
6770 		goto fail_pipe_on_cpu;
6771 
6772 	/* create a buffer to store the information to pass to userspace */
6773 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6774 	if (!iter) {
6775 		ret = -ENOMEM;
6776 		goto fail_alloc_iter;
6777 	}
6778 
6779 	trace_seq_init(&iter->seq);
6780 	iter->trace = tr->current_trace;
6781 
6782 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6783 		ret = -ENOMEM;
6784 		goto fail;
6785 	}
6786 
6787 	/* trace pipe does not show start of buffer */
6788 	cpumask_setall(iter->started);
6789 
6790 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6791 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6792 
6793 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6794 	if (trace_clocks[tr->clock_id].in_ns)
6795 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6796 
6797 	iter->tr = tr;
6798 	iter->array_buffer = &tr->array_buffer;
6799 	iter->cpu_file = cpu;
6800 	mutex_init(&iter->mutex);
6801 	filp->private_data = iter;
6802 
6803 	if (iter->trace->pipe_open)
6804 		iter->trace->pipe_open(iter);
6805 
6806 	nonseekable_open(inode, filp);
6807 
6808 	tr->trace_ref++;
6809 
6810 	mutex_unlock(&trace_types_lock);
6811 	return ret;
6812 
6813 fail:
6814 	kfree(iter);
6815 fail_alloc_iter:
6816 	close_pipe_on_cpu(tr, cpu);
6817 fail_pipe_on_cpu:
6818 	__trace_array_put(tr);
6819 	mutex_unlock(&trace_types_lock);
6820 	return ret;
6821 }
6822 
6823 static int tracing_release_pipe(struct inode *inode, struct file *file)
6824 {
6825 	struct trace_iterator *iter = file->private_data;
6826 	struct trace_array *tr = inode->i_private;
6827 
6828 	mutex_lock(&trace_types_lock);
6829 
6830 	tr->trace_ref--;
6831 
6832 	if (iter->trace->pipe_close)
6833 		iter->trace->pipe_close(iter);
6834 	close_pipe_on_cpu(tr, iter->cpu_file);
6835 	mutex_unlock(&trace_types_lock);
6836 
6837 	free_trace_iter_content(iter);
6838 	kfree(iter);
6839 
6840 	trace_array_put(tr);
6841 
6842 	return 0;
6843 }
6844 
6845 static __poll_t
6846 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6847 {
6848 	struct trace_array *tr = iter->tr;
6849 
6850 	/* Iterators are static, they should be filled or empty */
6851 	if (trace_buffer_iter(iter, iter->cpu_file))
6852 		return EPOLLIN | EPOLLRDNORM;
6853 
6854 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6855 		/*
6856 		 * Always select as readable when in blocking mode
6857 		 */
6858 		return EPOLLIN | EPOLLRDNORM;
6859 	else
6860 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6861 					     filp, poll_table, iter->tr->buffer_percent);
6862 }
6863 
6864 static __poll_t
6865 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6866 {
6867 	struct trace_iterator *iter = filp->private_data;
6868 
6869 	return trace_poll(iter, filp, poll_table);
6870 }
6871 
6872 /* Must be called with iter->mutex held. */
6873 static int tracing_wait_pipe(struct file *filp)
6874 {
6875 	struct trace_iterator *iter = filp->private_data;
6876 	int ret;
6877 
6878 	while (trace_empty(iter)) {
6879 
6880 		if ((filp->f_flags & O_NONBLOCK)) {
6881 			return -EAGAIN;
6882 		}
6883 
6884 		/*
6885 		 * We block until we read something and tracing is disabled.
6886 		 * We still block if tracing is disabled, but we have never
6887 		 * read anything. This allows a user to cat this file, and
6888 		 * then enable tracing. But after we have read something,
6889 		 * we give an EOF when tracing is again disabled.
6890 		 *
6891 		 * iter->pos will be 0 if we haven't read anything.
6892 		 */
6893 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6894 			break;
6895 
6896 		mutex_unlock(&iter->mutex);
6897 
6898 		ret = wait_on_pipe(iter, 0);
6899 
6900 		mutex_lock(&iter->mutex);
6901 
6902 		if (ret)
6903 			return ret;
6904 	}
6905 
6906 	return 1;
6907 }
6908 
6909 /*
6910  * Consumer reader.
6911  */
6912 static ssize_t
6913 tracing_read_pipe(struct file *filp, char __user *ubuf,
6914 		  size_t cnt, loff_t *ppos)
6915 {
6916 	struct trace_iterator *iter = filp->private_data;
6917 	ssize_t sret;
6918 
6919 	/*
6920 	 * Avoid more than one consumer on a single file descriptor
6921 	 * This is just a matter of traces coherency, the ring buffer itself
6922 	 * is protected.
6923 	 */
6924 	mutex_lock(&iter->mutex);
6925 
6926 	/* return any leftover data */
6927 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6928 	if (sret != -EBUSY)
6929 		goto out;
6930 
6931 	trace_seq_init(&iter->seq);
6932 
6933 	if (iter->trace->read) {
6934 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6935 		if (sret)
6936 			goto out;
6937 	}
6938 
6939 waitagain:
6940 	sret = tracing_wait_pipe(filp);
6941 	if (sret <= 0)
6942 		goto out;
6943 
6944 	/* stop when tracing is finished */
6945 	if (trace_empty(iter)) {
6946 		sret = 0;
6947 		goto out;
6948 	}
6949 
6950 	if (cnt >= PAGE_SIZE)
6951 		cnt = PAGE_SIZE - 1;
6952 
6953 	/* reset all but tr, trace, and overruns */
6954 	trace_iterator_reset(iter);
6955 	cpumask_clear(iter->started);
6956 	trace_seq_init(&iter->seq);
6957 
6958 	trace_event_read_lock();
6959 	trace_access_lock(iter->cpu_file);
6960 	while (trace_find_next_entry_inc(iter) != NULL) {
6961 		enum print_line_t ret;
6962 		int save_len = iter->seq.seq.len;
6963 
6964 		ret = print_trace_line(iter);
6965 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6966 			/*
6967 			 * If one print_trace_line() fills entire trace_seq in one shot,
6968 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6969 			 * In this case, we need to consume it, otherwise, loop will peek
6970 			 * this event next time, resulting in an infinite loop.
6971 			 */
6972 			if (save_len == 0) {
6973 				iter->seq.full = 0;
6974 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6975 				trace_consume(iter);
6976 				break;
6977 			}
6978 
6979 			/* In other cases, don't print partial lines */
6980 			iter->seq.seq.len = save_len;
6981 			break;
6982 		}
6983 		if (ret != TRACE_TYPE_NO_CONSUME)
6984 			trace_consume(iter);
6985 
6986 		if (trace_seq_used(&iter->seq) >= cnt)
6987 			break;
6988 
6989 		/*
6990 		 * Setting the full flag means we reached the trace_seq buffer
6991 		 * size and we should leave by partial output condition above.
6992 		 * One of the trace_seq_* functions is not used properly.
6993 		 */
6994 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6995 			  iter->ent->type);
6996 	}
6997 	trace_access_unlock(iter->cpu_file);
6998 	trace_event_read_unlock();
6999 
7000 	/* Now copy what we have to the user */
7001 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
7002 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
7003 		trace_seq_init(&iter->seq);
7004 
7005 	/*
7006 	 * If there was nothing to send to user, in spite of consuming trace
7007 	 * entries, go back to wait for more entries.
7008 	 */
7009 	if (sret == -EBUSY)
7010 		goto waitagain;
7011 
7012 out:
7013 	mutex_unlock(&iter->mutex);
7014 
7015 	return sret;
7016 }
7017 
7018 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
7019 				     unsigned int idx)
7020 {
7021 	__free_page(spd->pages[idx]);
7022 }
7023 
7024 static size_t
7025 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
7026 {
7027 	size_t count;
7028 	int save_len;
7029 	int ret;
7030 
7031 	/* Seq buffer is page-sized, exactly what we need. */
7032 	for (;;) {
7033 		save_len = iter->seq.seq.len;
7034 		ret = print_trace_line(iter);
7035 
7036 		if (trace_seq_has_overflowed(&iter->seq)) {
7037 			iter->seq.seq.len = save_len;
7038 			break;
7039 		}
7040 
7041 		/*
7042 		 * This should not be hit, because it should only
7043 		 * be set if the iter->seq overflowed. But check it
7044 		 * anyway to be safe.
7045 		 */
7046 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
7047 			iter->seq.seq.len = save_len;
7048 			break;
7049 		}
7050 
7051 		count = trace_seq_used(&iter->seq) - save_len;
7052 		if (rem < count) {
7053 			rem = 0;
7054 			iter->seq.seq.len = save_len;
7055 			break;
7056 		}
7057 
7058 		if (ret != TRACE_TYPE_NO_CONSUME)
7059 			trace_consume(iter);
7060 		rem -= count;
7061 		if (!trace_find_next_entry_inc(iter))	{
7062 			rem = 0;
7063 			iter->ent = NULL;
7064 			break;
7065 		}
7066 	}
7067 
7068 	return rem;
7069 }
7070 
7071 static ssize_t tracing_splice_read_pipe(struct file *filp,
7072 					loff_t *ppos,
7073 					struct pipe_inode_info *pipe,
7074 					size_t len,
7075 					unsigned int flags)
7076 {
7077 	struct page *pages_def[PIPE_DEF_BUFFERS];
7078 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7079 	struct trace_iterator *iter = filp->private_data;
7080 	struct splice_pipe_desc spd = {
7081 		.pages		= pages_def,
7082 		.partial	= partial_def,
7083 		.nr_pages	= 0, /* This gets updated below. */
7084 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7085 		.ops		= &default_pipe_buf_ops,
7086 		.spd_release	= tracing_spd_release_pipe,
7087 	};
7088 	ssize_t ret;
7089 	size_t rem;
7090 	unsigned int i;
7091 
7092 	if (splice_grow_spd(pipe, &spd))
7093 		return -ENOMEM;
7094 
7095 	mutex_lock(&iter->mutex);
7096 
7097 	if (iter->trace->splice_read) {
7098 		ret = iter->trace->splice_read(iter, filp,
7099 					       ppos, pipe, len, flags);
7100 		if (ret)
7101 			goto out_err;
7102 	}
7103 
7104 	ret = tracing_wait_pipe(filp);
7105 	if (ret <= 0)
7106 		goto out_err;
7107 
7108 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7109 		ret = -EFAULT;
7110 		goto out_err;
7111 	}
7112 
7113 	trace_event_read_lock();
7114 	trace_access_lock(iter->cpu_file);
7115 
7116 	/* Fill as many pages as possible. */
7117 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7118 		spd.pages[i] = alloc_page(GFP_KERNEL);
7119 		if (!spd.pages[i])
7120 			break;
7121 
7122 		rem = tracing_fill_pipe_page(rem, iter);
7123 
7124 		/* Copy the data into the page, so we can start over. */
7125 		ret = trace_seq_to_buffer(&iter->seq,
7126 					  page_address(spd.pages[i]),
7127 					  trace_seq_used(&iter->seq));
7128 		if (ret < 0) {
7129 			__free_page(spd.pages[i]);
7130 			break;
7131 		}
7132 		spd.partial[i].offset = 0;
7133 		spd.partial[i].len = trace_seq_used(&iter->seq);
7134 
7135 		trace_seq_init(&iter->seq);
7136 	}
7137 
7138 	trace_access_unlock(iter->cpu_file);
7139 	trace_event_read_unlock();
7140 	mutex_unlock(&iter->mutex);
7141 
7142 	spd.nr_pages = i;
7143 
7144 	if (i)
7145 		ret = splice_to_pipe(pipe, &spd);
7146 	else
7147 		ret = 0;
7148 out:
7149 	splice_shrink_spd(&spd);
7150 	return ret;
7151 
7152 out_err:
7153 	mutex_unlock(&iter->mutex);
7154 	goto out;
7155 }
7156 
7157 static ssize_t
7158 tracing_entries_read(struct file *filp, char __user *ubuf,
7159 		     size_t cnt, loff_t *ppos)
7160 {
7161 	struct inode *inode = file_inode(filp);
7162 	struct trace_array *tr = inode->i_private;
7163 	int cpu = tracing_get_cpu(inode);
7164 	char buf[64];
7165 	int r = 0;
7166 	ssize_t ret;
7167 
7168 	mutex_lock(&trace_types_lock);
7169 
7170 	if (cpu == RING_BUFFER_ALL_CPUS) {
7171 		int cpu, buf_size_same;
7172 		unsigned long size;
7173 
7174 		size = 0;
7175 		buf_size_same = 1;
7176 		/* check if all cpu sizes are same */
7177 		for_each_tracing_cpu(cpu) {
7178 			/* fill in the size from first enabled cpu */
7179 			if (size == 0)
7180 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7181 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7182 				buf_size_same = 0;
7183 				break;
7184 			}
7185 		}
7186 
7187 		if (buf_size_same) {
7188 			if (!ring_buffer_expanded)
7189 				r = sprintf(buf, "%lu (expanded: %lu)\n",
7190 					    size >> 10,
7191 					    trace_buf_size >> 10);
7192 			else
7193 				r = sprintf(buf, "%lu\n", size >> 10);
7194 		} else
7195 			r = sprintf(buf, "X\n");
7196 	} else
7197 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7198 
7199 	mutex_unlock(&trace_types_lock);
7200 
7201 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7202 	return ret;
7203 }
7204 
7205 static ssize_t
7206 tracing_entries_write(struct file *filp, const char __user *ubuf,
7207 		      size_t cnt, loff_t *ppos)
7208 {
7209 	struct inode *inode = file_inode(filp);
7210 	struct trace_array *tr = inode->i_private;
7211 	unsigned long val;
7212 	int ret;
7213 
7214 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7215 	if (ret)
7216 		return ret;
7217 
7218 	/* must have at least 1 entry */
7219 	if (!val)
7220 		return -EINVAL;
7221 
7222 	/* value is in KB */
7223 	val <<= 10;
7224 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7225 	if (ret < 0)
7226 		return ret;
7227 
7228 	*ppos += cnt;
7229 
7230 	return cnt;
7231 }
7232 
7233 static ssize_t
7234 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7235 				size_t cnt, loff_t *ppos)
7236 {
7237 	struct trace_array *tr = filp->private_data;
7238 	char buf[64];
7239 	int r, cpu;
7240 	unsigned long size = 0, expanded_size = 0;
7241 
7242 	mutex_lock(&trace_types_lock);
7243 	for_each_tracing_cpu(cpu) {
7244 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7245 		if (!ring_buffer_expanded)
7246 			expanded_size += trace_buf_size >> 10;
7247 	}
7248 	if (ring_buffer_expanded)
7249 		r = sprintf(buf, "%lu\n", size);
7250 	else
7251 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7252 	mutex_unlock(&trace_types_lock);
7253 
7254 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7255 }
7256 
7257 static ssize_t
7258 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7259 			  size_t cnt, loff_t *ppos)
7260 {
7261 	/*
7262 	 * There is no need to read what the user has written, this function
7263 	 * is just to make sure that there is no error when "echo" is used
7264 	 */
7265 
7266 	*ppos += cnt;
7267 
7268 	return cnt;
7269 }
7270 
7271 static int
7272 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7273 {
7274 	struct trace_array *tr = inode->i_private;
7275 
7276 	/* disable tracing ? */
7277 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7278 		tracer_tracing_off(tr);
7279 	/* resize the ring buffer to 0 */
7280 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7281 
7282 	trace_array_put(tr);
7283 
7284 	return 0;
7285 }
7286 
7287 static ssize_t
7288 tracing_mark_write(struct file *filp, const char __user *ubuf,
7289 					size_t cnt, loff_t *fpos)
7290 {
7291 	struct trace_array *tr = filp->private_data;
7292 	struct ring_buffer_event *event;
7293 	enum event_trigger_type tt = ETT_NONE;
7294 	struct trace_buffer *buffer;
7295 	struct print_entry *entry;
7296 	ssize_t written;
7297 	int size;
7298 	int len;
7299 
7300 /* Used in tracing_mark_raw_write() as well */
7301 #define FAULTED_STR "<faulted>"
7302 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7303 
7304 	if (tracing_disabled)
7305 		return -EINVAL;
7306 
7307 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7308 		return -EINVAL;
7309 
7310 	if (cnt > TRACE_BUF_SIZE)
7311 		cnt = TRACE_BUF_SIZE;
7312 
7313 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7314 
7315 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7316 
7317 	/* If less than "<faulted>", then make sure we can still add that */
7318 	if (cnt < FAULTED_SIZE)
7319 		size += FAULTED_SIZE - cnt;
7320 
7321 	buffer = tr->array_buffer.buffer;
7322 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7323 					    tracing_gen_ctx());
7324 	if (unlikely(!event))
7325 		/* Ring buffer disabled, return as if not open for write */
7326 		return -EBADF;
7327 
7328 	entry = ring_buffer_event_data(event);
7329 	entry->ip = _THIS_IP_;
7330 
7331 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7332 	if (len) {
7333 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7334 		cnt = FAULTED_SIZE;
7335 		written = -EFAULT;
7336 	} else
7337 		written = cnt;
7338 
7339 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7340 		/* do not add \n before testing triggers, but add \0 */
7341 		entry->buf[cnt] = '\0';
7342 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7343 	}
7344 
7345 	if (entry->buf[cnt - 1] != '\n') {
7346 		entry->buf[cnt] = '\n';
7347 		entry->buf[cnt + 1] = '\0';
7348 	} else
7349 		entry->buf[cnt] = '\0';
7350 
7351 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7352 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7353 	__buffer_unlock_commit(buffer, event);
7354 
7355 	if (tt)
7356 		event_triggers_post_call(tr->trace_marker_file, tt);
7357 
7358 	return written;
7359 }
7360 
7361 /* Limit it for now to 3K (including tag) */
7362 #define RAW_DATA_MAX_SIZE (1024*3)
7363 
7364 static ssize_t
7365 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7366 					size_t cnt, loff_t *fpos)
7367 {
7368 	struct trace_array *tr = filp->private_data;
7369 	struct ring_buffer_event *event;
7370 	struct trace_buffer *buffer;
7371 	struct raw_data_entry *entry;
7372 	ssize_t written;
7373 	int size;
7374 	int len;
7375 
7376 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7377 
7378 	if (tracing_disabled)
7379 		return -EINVAL;
7380 
7381 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7382 		return -EINVAL;
7383 
7384 	/* The marker must at least have a tag id */
7385 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7386 		return -EINVAL;
7387 
7388 	if (cnt > TRACE_BUF_SIZE)
7389 		cnt = TRACE_BUF_SIZE;
7390 
7391 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7392 
7393 	size = sizeof(*entry) + cnt;
7394 	if (cnt < FAULT_SIZE_ID)
7395 		size += FAULT_SIZE_ID - cnt;
7396 
7397 	buffer = tr->array_buffer.buffer;
7398 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7399 					    tracing_gen_ctx());
7400 	if (!event)
7401 		/* Ring buffer disabled, return as if not open for write */
7402 		return -EBADF;
7403 
7404 	entry = ring_buffer_event_data(event);
7405 
7406 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7407 	if (len) {
7408 		entry->id = -1;
7409 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7410 		written = -EFAULT;
7411 	} else
7412 		written = cnt;
7413 
7414 	__buffer_unlock_commit(buffer, event);
7415 
7416 	return written;
7417 }
7418 
7419 static int tracing_clock_show(struct seq_file *m, void *v)
7420 {
7421 	struct trace_array *tr = m->private;
7422 	int i;
7423 
7424 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7425 		seq_printf(m,
7426 			"%s%s%s%s", i ? " " : "",
7427 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7428 			i == tr->clock_id ? "]" : "");
7429 	seq_putc(m, '\n');
7430 
7431 	return 0;
7432 }
7433 
7434 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7435 {
7436 	int i;
7437 
7438 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7439 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7440 			break;
7441 	}
7442 	if (i == ARRAY_SIZE(trace_clocks))
7443 		return -EINVAL;
7444 
7445 	mutex_lock(&trace_types_lock);
7446 
7447 	tr->clock_id = i;
7448 
7449 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7450 
7451 	/*
7452 	 * New clock may not be consistent with the previous clock.
7453 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7454 	 */
7455 	tracing_reset_online_cpus(&tr->array_buffer);
7456 
7457 #ifdef CONFIG_TRACER_MAX_TRACE
7458 	if (tr->max_buffer.buffer)
7459 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7460 	tracing_reset_online_cpus(&tr->max_buffer);
7461 #endif
7462 
7463 	mutex_unlock(&trace_types_lock);
7464 
7465 	return 0;
7466 }
7467 
7468 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7469 				   size_t cnt, loff_t *fpos)
7470 {
7471 	struct seq_file *m = filp->private_data;
7472 	struct trace_array *tr = m->private;
7473 	char buf[64];
7474 	const char *clockstr;
7475 	int ret;
7476 
7477 	if (cnt >= sizeof(buf))
7478 		return -EINVAL;
7479 
7480 	if (copy_from_user(buf, ubuf, cnt))
7481 		return -EFAULT;
7482 
7483 	buf[cnt] = 0;
7484 
7485 	clockstr = strstrip(buf);
7486 
7487 	ret = tracing_set_clock(tr, clockstr);
7488 	if (ret)
7489 		return ret;
7490 
7491 	*fpos += cnt;
7492 
7493 	return cnt;
7494 }
7495 
7496 static int tracing_clock_open(struct inode *inode, struct file *file)
7497 {
7498 	struct trace_array *tr = inode->i_private;
7499 	int ret;
7500 
7501 	ret = tracing_check_open_get_tr(tr);
7502 	if (ret)
7503 		return ret;
7504 
7505 	ret = single_open(file, tracing_clock_show, inode->i_private);
7506 	if (ret < 0)
7507 		trace_array_put(tr);
7508 
7509 	return ret;
7510 }
7511 
7512 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7513 {
7514 	struct trace_array *tr = m->private;
7515 
7516 	mutex_lock(&trace_types_lock);
7517 
7518 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7519 		seq_puts(m, "delta [absolute]\n");
7520 	else
7521 		seq_puts(m, "[delta] absolute\n");
7522 
7523 	mutex_unlock(&trace_types_lock);
7524 
7525 	return 0;
7526 }
7527 
7528 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7529 {
7530 	struct trace_array *tr = inode->i_private;
7531 	int ret;
7532 
7533 	ret = tracing_check_open_get_tr(tr);
7534 	if (ret)
7535 		return ret;
7536 
7537 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7538 	if (ret < 0)
7539 		trace_array_put(tr);
7540 
7541 	return ret;
7542 }
7543 
7544 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7545 {
7546 	if (rbe == this_cpu_read(trace_buffered_event))
7547 		return ring_buffer_time_stamp(buffer);
7548 
7549 	return ring_buffer_event_time_stamp(buffer, rbe);
7550 }
7551 
7552 /*
7553  * Set or disable using the per CPU trace_buffer_event when possible.
7554  */
7555 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7556 {
7557 	int ret = 0;
7558 
7559 	mutex_lock(&trace_types_lock);
7560 
7561 	if (set && tr->no_filter_buffering_ref++)
7562 		goto out;
7563 
7564 	if (!set) {
7565 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7566 			ret = -EINVAL;
7567 			goto out;
7568 		}
7569 
7570 		--tr->no_filter_buffering_ref;
7571 	}
7572  out:
7573 	mutex_unlock(&trace_types_lock);
7574 
7575 	return ret;
7576 }
7577 
7578 struct ftrace_buffer_info {
7579 	struct trace_iterator	iter;
7580 	void			*spare;
7581 	unsigned int		spare_cpu;
7582 	unsigned int		read;
7583 };
7584 
7585 #ifdef CONFIG_TRACER_SNAPSHOT
7586 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7587 {
7588 	struct trace_array *tr = inode->i_private;
7589 	struct trace_iterator *iter;
7590 	struct seq_file *m;
7591 	int ret;
7592 
7593 	ret = tracing_check_open_get_tr(tr);
7594 	if (ret)
7595 		return ret;
7596 
7597 	if (file->f_mode & FMODE_READ) {
7598 		iter = __tracing_open(inode, file, true);
7599 		if (IS_ERR(iter))
7600 			ret = PTR_ERR(iter);
7601 	} else {
7602 		/* Writes still need the seq_file to hold the private data */
7603 		ret = -ENOMEM;
7604 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7605 		if (!m)
7606 			goto out;
7607 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7608 		if (!iter) {
7609 			kfree(m);
7610 			goto out;
7611 		}
7612 		ret = 0;
7613 
7614 		iter->tr = tr;
7615 		iter->array_buffer = &tr->max_buffer;
7616 		iter->cpu_file = tracing_get_cpu(inode);
7617 		m->private = iter;
7618 		file->private_data = m;
7619 	}
7620 out:
7621 	if (ret < 0)
7622 		trace_array_put(tr);
7623 
7624 	return ret;
7625 }
7626 
7627 static void tracing_swap_cpu_buffer(void *tr)
7628 {
7629 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7630 }
7631 
7632 static ssize_t
7633 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7634 		       loff_t *ppos)
7635 {
7636 	struct seq_file *m = filp->private_data;
7637 	struct trace_iterator *iter = m->private;
7638 	struct trace_array *tr = iter->tr;
7639 	unsigned long val;
7640 	int ret;
7641 
7642 	ret = tracing_update_buffers();
7643 	if (ret < 0)
7644 		return ret;
7645 
7646 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7647 	if (ret)
7648 		return ret;
7649 
7650 	mutex_lock(&trace_types_lock);
7651 
7652 	if (tr->current_trace->use_max_tr) {
7653 		ret = -EBUSY;
7654 		goto out;
7655 	}
7656 
7657 	local_irq_disable();
7658 	arch_spin_lock(&tr->max_lock);
7659 	if (tr->cond_snapshot)
7660 		ret = -EBUSY;
7661 	arch_spin_unlock(&tr->max_lock);
7662 	local_irq_enable();
7663 	if (ret)
7664 		goto out;
7665 
7666 	switch (val) {
7667 	case 0:
7668 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7669 			ret = -EINVAL;
7670 			break;
7671 		}
7672 		if (tr->allocated_snapshot)
7673 			free_snapshot(tr);
7674 		break;
7675 	case 1:
7676 /* Only allow per-cpu swap if the ring buffer supports it */
7677 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7678 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7679 			ret = -EINVAL;
7680 			break;
7681 		}
7682 #endif
7683 		if (tr->allocated_snapshot)
7684 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7685 					&tr->array_buffer, iter->cpu_file);
7686 		else
7687 			ret = tracing_alloc_snapshot_instance(tr);
7688 		if (ret < 0)
7689 			break;
7690 		/* Now, we're going to swap */
7691 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7692 			local_irq_disable();
7693 			update_max_tr(tr, current, smp_processor_id(), NULL);
7694 			local_irq_enable();
7695 		} else {
7696 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7697 						 (void *)tr, 1);
7698 		}
7699 		break;
7700 	default:
7701 		if (tr->allocated_snapshot) {
7702 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7703 				tracing_reset_online_cpus(&tr->max_buffer);
7704 			else
7705 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7706 		}
7707 		break;
7708 	}
7709 
7710 	if (ret >= 0) {
7711 		*ppos += cnt;
7712 		ret = cnt;
7713 	}
7714 out:
7715 	mutex_unlock(&trace_types_lock);
7716 	return ret;
7717 }
7718 
7719 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7720 {
7721 	struct seq_file *m = file->private_data;
7722 	int ret;
7723 
7724 	ret = tracing_release(inode, file);
7725 
7726 	if (file->f_mode & FMODE_READ)
7727 		return ret;
7728 
7729 	/* If write only, the seq_file is just a stub */
7730 	if (m)
7731 		kfree(m->private);
7732 	kfree(m);
7733 
7734 	return 0;
7735 }
7736 
7737 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7738 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7739 				    size_t count, loff_t *ppos);
7740 static int tracing_buffers_release(struct inode *inode, struct file *file);
7741 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7742 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7743 
7744 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7745 {
7746 	struct ftrace_buffer_info *info;
7747 	int ret;
7748 
7749 	/* The following checks for tracefs lockdown */
7750 	ret = tracing_buffers_open(inode, filp);
7751 	if (ret < 0)
7752 		return ret;
7753 
7754 	info = filp->private_data;
7755 
7756 	if (info->iter.trace->use_max_tr) {
7757 		tracing_buffers_release(inode, filp);
7758 		return -EBUSY;
7759 	}
7760 
7761 	info->iter.snapshot = true;
7762 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7763 
7764 	return ret;
7765 }
7766 
7767 #endif /* CONFIG_TRACER_SNAPSHOT */
7768 
7769 
7770 static const struct file_operations tracing_thresh_fops = {
7771 	.open		= tracing_open_generic,
7772 	.read		= tracing_thresh_read,
7773 	.write		= tracing_thresh_write,
7774 	.llseek		= generic_file_llseek,
7775 };
7776 
7777 #ifdef CONFIG_TRACER_MAX_TRACE
7778 static const struct file_operations tracing_max_lat_fops = {
7779 	.open		= tracing_open_generic_tr,
7780 	.read		= tracing_max_lat_read,
7781 	.write		= tracing_max_lat_write,
7782 	.llseek		= generic_file_llseek,
7783 	.release	= tracing_release_generic_tr,
7784 };
7785 #endif
7786 
7787 static const struct file_operations set_tracer_fops = {
7788 	.open		= tracing_open_generic_tr,
7789 	.read		= tracing_set_trace_read,
7790 	.write		= tracing_set_trace_write,
7791 	.llseek		= generic_file_llseek,
7792 	.release	= tracing_release_generic_tr,
7793 };
7794 
7795 static const struct file_operations tracing_pipe_fops = {
7796 	.open		= tracing_open_pipe,
7797 	.poll		= tracing_poll_pipe,
7798 	.read		= tracing_read_pipe,
7799 	.splice_read	= tracing_splice_read_pipe,
7800 	.release	= tracing_release_pipe,
7801 	.llseek		= no_llseek,
7802 };
7803 
7804 static const struct file_operations tracing_entries_fops = {
7805 	.open		= tracing_open_generic_tr,
7806 	.read		= tracing_entries_read,
7807 	.write		= tracing_entries_write,
7808 	.llseek		= generic_file_llseek,
7809 	.release	= tracing_release_generic_tr,
7810 };
7811 
7812 static const struct file_operations tracing_total_entries_fops = {
7813 	.open		= tracing_open_generic_tr,
7814 	.read		= tracing_total_entries_read,
7815 	.llseek		= generic_file_llseek,
7816 	.release	= tracing_release_generic_tr,
7817 };
7818 
7819 static const struct file_operations tracing_free_buffer_fops = {
7820 	.open		= tracing_open_generic_tr,
7821 	.write		= tracing_free_buffer_write,
7822 	.release	= tracing_free_buffer_release,
7823 };
7824 
7825 static const struct file_operations tracing_mark_fops = {
7826 	.open		= tracing_mark_open,
7827 	.write		= tracing_mark_write,
7828 	.release	= tracing_release_generic_tr,
7829 };
7830 
7831 static const struct file_operations tracing_mark_raw_fops = {
7832 	.open		= tracing_mark_open,
7833 	.write		= tracing_mark_raw_write,
7834 	.release	= tracing_release_generic_tr,
7835 };
7836 
7837 static const struct file_operations trace_clock_fops = {
7838 	.open		= tracing_clock_open,
7839 	.read		= seq_read,
7840 	.llseek		= seq_lseek,
7841 	.release	= tracing_single_release_tr,
7842 	.write		= tracing_clock_write,
7843 };
7844 
7845 static const struct file_operations trace_time_stamp_mode_fops = {
7846 	.open		= tracing_time_stamp_mode_open,
7847 	.read		= seq_read,
7848 	.llseek		= seq_lseek,
7849 	.release	= tracing_single_release_tr,
7850 };
7851 
7852 #ifdef CONFIG_TRACER_SNAPSHOT
7853 static const struct file_operations snapshot_fops = {
7854 	.open		= tracing_snapshot_open,
7855 	.read		= seq_read,
7856 	.write		= tracing_snapshot_write,
7857 	.llseek		= tracing_lseek,
7858 	.release	= tracing_snapshot_release,
7859 };
7860 
7861 static const struct file_operations snapshot_raw_fops = {
7862 	.open		= snapshot_raw_open,
7863 	.read		= tracing_buffers_read,
7864 	.release	= tracing_buffers_release,
7865 	.splice_read	= tracing_buffers_splice_read,
7866 	.llseek		= no_llseek,
7867 };
7868 
7869 #endif /* CONFIG_TRACER_SNAPSHOT */
7870 
7871 /*
7872  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7873  * @filp: The active open file structure
7874  * @ubuf: The userspace provided buffer to read value into
7875  * @cnt: The maximum number of bytes to read
7876  * @ppos: The current "file" position
7877  *
7878  * This function implements the write interface for a struct trace_min_max_param.
7879  * The filp->private_data must point to a trace_min_max_param structure that
7880  * defines where to write the value, the min and the max acceptable values,
7881  * and a lock to protect the write.
7882  */
7883 static ssize_t
7884 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7885 {
7886 	struct trace_min_max_param *param = filp->private_data;
7887 	u64 val;
7888 	int err;
7889 
7890 	if (!param)
7891 		return -EFAULT;
7892 
7893 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7894 	if (err)
7895 		return err;
7896 
7897 	if (param->lock)
7898 		mutex_lock(param->lock);
7899 
7900 	if (param->min && val < *param->min)
7901 		err = -EINVAL;
7902 
7903 	if (param->max && val > *param->max)
7904 		err = -EINVAL;
7905 
7906 	if (!err)
7907 		*param->val = val;
7908 
7909 	if (param->lock)
7910 		mutex_unlock(param->lock);
7911 
7912 	if (err)
7913 		return err;
7914 
7915 	return cnt;
7916 }
7917 
7918 /*
7919  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7920  * @filp: The active open file structure
7921  * @ubuf: The userspace provided buffer to read value into
7922  * @cnt: The maximum number of bytes to read
7923  * @ppos: The current "file" position
7924  *
7925  * This function implements the read interface for a struct trace_min_max_param.
7926  * The filp->private_data must point to a trace_min_max_param struct with valid
7927  * data.
7928  */
7929 static ssize_t
7930 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7931 {
7932 	struct trace_min_max_param *param = filp->private_data;
7933 	char buf[U64_STR_SIZE];
7934 	int len;
7935 	u64 val;
7936 
7937 	if (!param)
7938 		return -EFAULT;
7939 
7940 	val = *param->val;
7941 
7942 	if (cnt > sizeof(buf))
7943 		cnt = sizeof(buf);
7944 
7945 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7946 
7947 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7948 }
7949 
7950 const struct file_operations trace_min_max_fops = {
7951 	.open		= tracing_open_generic,
7952 	.read		= trace_min_max_read,
7953 	.write		= trace_min_max_write,
7954 };
7955 
7956 #define TRACING_LOG_ERRS_MAX	8
7957 #define TRACING_LOG_LOC_MAX	128
7958 
7959 #define CMD_PREFIX "  Command: "
7960 
7961 struct err_info {
7962 	const char	**errs;	/* ptr to loc-specific array of err strings */
7963 	u8		type;	/* index into errs -> specific err string */
7964 	u16		pos;	/* caret position */
7965 	u64		ts;
7966 };
7967 
7968 struct tracing_log_err {
7969 	struct list_head	list;
7970 	struct err_info		info;
7971 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7972 	char			*cmd;                     /* what caused err */
7973 };
7974 
7975 static DEFINE_MUTEX(tracing_err_log_lock);
7976 
7977 static struct tracing_log_err *alloc_tracing_log_err(int len)
7978 {
7979 	struct tracing_log_err *err;
7980 
7981 	err = kzalloc(sizeof(*err), GFP_KERNEL);
7982 	if (!err)
7983 		return ERR_PTR(-ENOMEM);
7984 
7985 	err->cmd = kzalloc(len, GFP_KERNEL);
7986 	if (!err->cmd) {
7987 		kfree(err);
7988 		return ERR_PTR(-ENOMEM);
7989 	}
7990 
7991 	return err;
7992 }
7993 
7994 static void free_tracing_log_err(struct tracing_log_err *err)
7995 {
7996 	kfree(err->cmd);
7997 	kfree(err);
7998 }
7999 
8000 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8001 						   int len)
8002 {
8003 	struct tracing_log_err *err;
8004 	char *cmd;
8005 
8006 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8007 		err = alloc_tracing_log_err(len);
8008 		if (PTR_ERR(err) != -ENOMEM)
8009 			tr->n_err_log_entries++;
8010 
8011 		return err;
8012 	}
8013 	cmd = kzalloc(len, GFP_KERNEL);
8014 	if (!cmd)
8015 		return ERR_PTR(-ENOMEM);
8016 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8017 	kfree(err->cmd);
8018 	err->cmd = cmd;
8019 	list_del(&err->list);
8020 
8021 	return err;
8022 }
8023 
8024 /**
8025  * err_pos - find the position of a string within a command for error careting
8026  * @cmd: The tracing command that caused the error
8027  * @str: The string to position the caret at within @cmd
8028  *
8029  * Finds the position of the first occurrence of @str within @cmd.  The
8030  * return value can be passed to tracing_log_err() for caret placement
8031  * within @cmd.
8032  *
8033  * Returns the index within @cmd of the first occurrence of @str or 0
8034  * if @str was not found.
8035  */
8036 unsigned int err_pos(char *cmd, const char *str)
8037 {
8038 	char *found;
8039 
8040 	if (WARN_ON(!strlen(cmd)))
8041 		return 0;
8042 
8043 	found = strstr(cmd, str);
8044 	if (found)
8045 		return found - cmd;
8046 
8047 	return 0;
8048 }
8049 
8050 /**
8051  * tracing_log_err - write an error to the tracing error log
8052  * @tr: The associated trace array for the error (NULL for top level array)
8053  * @loc: A string describing where the error occurred
8054  * @cmd: The tracing command that caused the error
8055  * @errs: The array of loc-specific static error strings
8056  * @type: The index into errs[], which produces the specific static err string
8057  * @pos: The position the caret should be placed in the cmd
8058  *
8059  * Writes an error into tracing/error_log of the form:
8060  *
8061  * <loc>: error: <text>
8062  *   Command: <cmd>
8063  *              ^
8064  *
8065  * tracing/error_log is a small log file containing the last
8066  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8067  * unless there has been a tracing error, and the error log can be
8068  * cleared and have its memory freed by writing the empty string in
8069  * truncation mode to it i.e. echo > tracing/error_log.
8070  *
8071  * NOTE: the @errs array along with the @type param are used to
8072  * produce a static error string - this string is not copied and saved
8073  * when the error is logged - only a pointer to it is saved.  See
8074  * existing callers for examples of how static strings are typically
8075  * defined for use with tracing_log_err().
8076  */
8077 void tracing_log_err(struct trace_array *tr,
8078 		     const char *loc, const char *cmd,
8079 		     const char **errs, u8 type, u16 pos)
8080 {
8081 	struct tracing_log_err *err;
8082 	int len = 0;
8083 
8084 	if (!tr)
8085 		tr = &global_trace;
8086 
8087 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8088 
8089 	mutex_lock(&tracing_err_log_lock);
8090 	err = get_tracing_log_err(tr, len);
8091 	if (PTR_ERR(err) == -ENOMEM) {
8092 		mutex_unlock(&tracing_err_log_lock);
8093 		return;
8094 	}
8095 
8096 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8097 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8098 
8099 	err->info.errs = errs;
8100 	err->info.type = type;
8101 	err->info.pos = pos;
8102 	err->info.ts = local_clock();
8103 
8104 	list_add_tail(&err->list, &tr->err_log);
8105 	mutex_unlock(&tracing_err_log_lock);
8106 }
8107 
8108 static void clear_tracing_err_log(struct trace_array *tr)
8109 {
8110 	struct tracing_log_err *err, *next;
8111 
8112 	mutex_lock(&tracing_err_log_lock);
8113 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
8114 		list_del(&err->list);
8115 		free_tracing_log_err(err);
8116 	}
8117 
8118 	tr->n_err_log_entries = 0;
8119 	mutex_unlock(&tracing_err_log_lock);
8120 }
8121 
8122 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8123 {
8124 	struct trace_array *tr = m->private;
8125 
8126 	mutex_lock(&tracing_err_log_lock);
8127 
8128 	return seq_list_start(&tr->err_log, *pos);
8129 }
8130 
8131 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8132 {
8133 	struct trace_array *tr = m->private;
8134 
8135 	return seq_list_next(v, &tr->err_log, pos);
8136 }
8137 
8138 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8139 {
8140 	mutex_unlock(&tracing_err_log_lock);
8141 }
8142 
8143 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8144 {
8145 	u16 i;
8146 
8147 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8148 		seq_putc(m, ' ');
8149 	for (i = 0; i < pos; i++)
8150 		seq_putc(m, ' ');
8151 	seq_puts(m, "^\n");
8152 }
8153 
8154 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8155 {
8156 	struct tracing_log_err *err = v;
8157 
8158 	if (err) {
8159 		const char *err_text = err->info.errs[err->info.type];
8160 		u64 sec = err->info.ts;
8161 		u32 nsec;
8162 
8163 		nsec = do_div(sec, NSEC_PER_SEC);
8164 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8165 			   err->loc, err_text);
8166 		seq_printf(m, "%s", err->cmd);
8167 		tracing_err_log_show_pos(m, err->info.pos);
8168 	}
8169 
8170 	return 0;
8171 }
8172 
8173 static const struct seq_operations tracing_err_log_seq_ops = {
8174 	.start  = tracing_err_log_seq_start,
8175 	.next   = tracing_err_log_seq_next,
8176 	.stop   = tracing_err_log_seq_stop,
8177 	.show   = tracing_err_log_seq_show
8178 };
8179 
8180 static int tracing_err_log_open(struct inode *inode, struct file *file)
8181 {
8182 	struct trace_array *tr = inode->i_private;
8183 	int ret = 0;
8184 
8185 	ret = tracing_check_open_get_tr(tr);
8186 	if (ret)
8187 		return ret;
8188 
8189 	/* If this file was opened for write, then erase contents */
8190 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8191 		clear_tracing_err_log(tr);
8192 
8193 	if (file->f_mode & FMODE_READ) {
8194 		ret = seq_open(file, &tracing_err_log_seq_ops);
8195 		if (!ret) {
8196 			struct seq_file *m = file->private_data;
8197 			m->private = tr;
8198 		} else {
8199 			trace_array_put(tr);
8200 		}
8201 	}
8202 	return ret;
8203 }
8204 
8205 static ssize_t tracing_err_log_write(struct file *file,
8206 				     const char __user *buffer,
8207 				     size_t count, loff_t *ppos)
8208 {
8209 	return count;
8210 }
8211 
8212 static int tracing_err_log_release(struct inode *inode, struct file *file)
8213 {
8214 	struct trace_array *tr = inode->i_private;
8215 
8216 	trace_array_put(tr);
8217 
8218 	if (file->f_mode & FMODE_READ)
8219 		seq_release(inode, file);
8220 
8221 	return 0;
8222 }
8223 
8224 static const struct file_operations tracing_err_log_fops = {
8225 	.open           = tracing_err_log_open,
8226 	.write		= tracing_err_log_write,
8227 	.read           = seq_read,
8228 	.llseek         = tracing_lseek,
8229 	.release        = tracing_err_log_release,
8230 };
8231 
8232 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8233 {
8234 	struct trace_array *tr = inode->i_private;
8235 	struct ftrace_buffer_info *info;
8236 	int ret;
8237 
8238 	ret = tracing_check_open_get_tr(tr);
8239 	if (ret)
8240 		return ret;
8241 
8242 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8243 	if (!info) {
8244 		trace_array_put(tr);
8245 		return -ENOMEM;
8246 	}
8247 
8248 	mutex_lock(&trace_types_lock);
8249 
8250 	info->iter.tr		= tr;
8251 	info->iter.cpu_file	= tracing_get_cpu(inode);
8252 	info->iter.trace	= tr->current_trace;
8253 	info->iter.array_buffer = &tr->array_buffer;
8254 	info->spare		= NULL;
8255 	/* Force reading ring buffer for first read */
8256 	info->read		= (unsigned int)-1;
8257 
8258 	filp->private_data = info;
8259 
8260 	tr->trace_ref++;
8261 
8262 	mutex_unlock(&trace_types_lock);
8263 
8264 	ret = nonseekable_open(inode, filp);
8265 	if (ret < 0)
8266 		trace_array_put(tr);
8267 
8268 	return ret;
8269 }
8270 
8271 static __poll_t
8272 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8273 {
8274 	struct ftrace_buffer_info *info = filp->private_data;
8275 	struct trace_iterator *iter = &info->iter;
8276 
8277 	return trace_poll(iter, filp, poll_table);
8278 }
8279 
8280 static ssize_t
8281 tracing_buffers_read(struct file *filp, char __user *ubuf,
8282 		     size_t count, loff_t *ppos)
8283 {
8284 	struct ftrace_buffer_info *info = filp->private_data;
8285 	struct trace_iterator *iter = &info->iter;
8286 	ssize_t ret = 0;
8287 	ssize_t size;
8288 
8289 	if (!count)
8290 		return 0;
8291 
8292 #ifdef CONFIG_TRACER_MAX_TRACE
8293 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8294 		return -EBUSY;
8295 #endif
8296 
8297 	if (!info->spare) {
8298 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8299 							  iter->cpu_file);
8300 		if (IS_ERR(info->spare)) {
8301 			ret = PTR_ERR(info->spare);
8302 			info->spare = NULL;
8303 		} else {
8304 			info->spare_cpu = iter->cpu_file;
8305 		}
8306 	}
8307 	if (!info->spare)
8308 		return ret;
8309 
8310 	/* Do we have previous read data to read? */
8311 	if (info->read < PAGE_SIZE)
8312 		goto read;
8313 
8314  again:
8315 	trace_access_lock(iter->cpu_file);
8316 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8317 				    &info->spare,
8318 				    count,
8319 				    iter->cpu_file, 0);
8320 	trace_access_unlock(iter->cpu_file);
8321 
8322 	if (ret < 0) {
8323 		if (trace_empty(iter)) {
8324 			if ((filp->f_flags & O_NONBLOCK))
8325 				return -EAGAIN;
8326 
8327 			ret = wait_on_pipe(iter, 0);
8328 			if (ret)
8329 				return ret;
8330 
8331 			goto again;
8332 		}
8333 		return 0;
8334 	}
8335 
8336 	info->read = 0;
8337  read:
8338 	size = PAGE_SIZE - info->read;
8339 	if (size > count)
8340 		size = count;
8341 
8342 	ret = copy_to_user(ubuf, info->spare + info->read, size);
8343 	if (ret == size)
8344 		return -EFAULT;
8345 
8346 	size -= ret;
8347 
8348 	*ppos += size;
8349 	info->read += size;
8350 
8351 	return size;
8352 }
8353 
8354 static int tracing_buffers_release(struct inode *inode, struct file *file)
8355 {
8356 	struct ftrace_buffer_info *info = file->private_data;
8357 	struct trace_iterator *iter = &info->iter;
8358 
8359 	mutex_lock(&trace_types_lock);
8360 
8361 	iter->tr->trace_ref--;
8362 
8363 	__trace_array_put(iter->tr);
8364 
8365 	iter->wait_index++;
8366 	/* Make sure the waiters see the new wait_index */
8367 	smp_wmb();
8368 
8369 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8370 
8371 	if (info->spare)
8372 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8373 					   info->spare_cpu, info->spare);
8374 	kvfree(info);
8375 
8376 	mutex_unlock(&trace_types_lock);
8377 
8378 	return 0;
8379 }
8380 
8381 struct buffer_ref {
8382 	struct trace_buffer	*buffer;
8383 	void			*page;
8384 	int			cpu;
8385 	refcount_t		refcount;
8386 };
8387 
8388 static void buffer_ref_release(struct buffer_ref *ref)
8389 {
8390 	if (!refcount_dec_and_test(&ref->refcount))
8391 		return;
8392 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8393 	kfree(ref);
8394 }
8395 
8396 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8397 				    struct pipe_buffer *buf)
8398 {
8399 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8400 
8401 	buffer_ref_release(ref);
8402 	buf->private = 0;
8403 }
8404 
8405 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8406 				struct pipe_buffer *buf)
8407 {
8408 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8409 
8410 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8411 		return false;
8412 
8413 	refcount_inc(&ref->refcount);
8414 	return true;
8415 }
8416 
8417 /* Pipe buffer operations for a buffer. */
8418 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8419 	.release		= buffer_pipe_buf_release,
8420 	.get			= buffer_pipe_buf_get,
8421 };
8422 
8423 /*
8424  * Callback from splice_to_pipe(), if we need to release some pages
8425  * at the end of the spd in case we error'ed out in filling the pipe.
8426  */
8427 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8428 {
8429 	struct buffer_ref *ref =
8430 		(struct buffer_ref *)spd->partial[i].private;
8431 
8432 	buffer_ref_release(ref);
8433 	spd->partial[i].private = 0;
8434 }
8435 
8436 static ssize_t
8437 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8438 			    struct pipe_inode_info *pipe, size_t len,
8439 			    unsigned int flags)
8440 {
8441 	struct ftrace_buffer_info *info = file->private_data;
8442 	struct trace_iterator *iter = &info->iter;
8443 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8444 	struct page *pages_def[PIPE_DEF_BUFFERS];
8445 	struct splice_pipe_desc spd = {
8446 		.pages		= pages_def,
8447 		.partial	= partial_def,
8448 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8449 		.ops		= &buffer_pipe_buf_ops,
8450 		.spd_release	= buffer_spd_release,
8451 	};
8452 	struct buffer_ref *ref;
8453 	int entries, i;
8454 	ssize_t ret = 0;
8455 
8456 #ifdef CONFIG_TRACER_MAX_TRACE
8457 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8458 		return -EBUSY;
8459 #endif
8460 
8461 	if (*ppos & (PAGE_SIZE - 1))
8462 		return -EINVAL;
8463 
8464 	if (len & (PAGE_SIZE - 1)) {
8465 		if (len < PAGE_SIZE)
8466 			return -EINVAL;
8467 		len &= PAGE_MASK;
8468 	}
8469 
8470 	if (splice_grow_spd(pipe, &spd))
8471 		return -ENOMEM;
8472 
8473  again:
8474 	trace_access_lock(iter->cpu_file);
8475 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8476 
8477 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8478 		struct page *page;
8479 		int r;
8480 
8481 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8482 		if (!ref) {
8483 			ret = -ENOMEM;
8484 			break;
8485 		}
8486 
8487 		refcount_set(&ref->refcount, 1);
8488 		ref->buffer = iter->array_buffer->buffer;
8489 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8490 		if (IS_ERR(ref->page)) {
8491 			ret = PTR_ERR(ref->page);
8492 			ref->page = NULL;
8493 			kfree(ref);
8494 			break;
8495 		}
8496 		ref->cpu = iter->cpu_file;
8497 
8498 		r = ring_buffer_read_page(ref->buffer, &ref->page,
8499 					  len, iter->cpu_file, 1);
8500 		if (r < 0) {
8501 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8502 						   ref->page);
8503 			kfree(ref);
8504 			break;
8505 		}
8506 
8507 		page = virt_to_page(ref->page);
8508 
8509 		spd.pages[i] = page;
8510 		spd.partial[i].len = PAGE_SIZE;
8511 		spd.partial[i].offset = 0;
8512 		spd.partial[i].private = (unsigned long)ref;
8513 		spd.nr_pages++;
8514 		*ppos += PAGE_SIZE;
8515 
8516 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8517 	}
8518 
8519 	trace_access_unlock(iter->cpu_file);
8520 	spd.nr_pages = i;
8521 
8522 	/* did we read anything? */
8523 	if (!spd.nr_pages) {
8524 		long wait_index;
8525 
8526 		if (ret)
8527 			goto out;
8528 
8529 		ret = -EAGAIN;
8530 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8531 			goto out;
8532 
8533 		wait_index = READ_ONCE(iter->wait_index);
8534 
8535 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8536 		if (ret)
8537 			goto out;
8538 
8539 		/* No need to wait after waking up when tracing is off */
8540 		if (!tracer_tracing_is_on(iter->tr))
8541 			goto out;
8542 
8543 		/* Make sure we see the new wait_index */
8544 		smp_rmb();
8545 		if (wait_index != iter->wait_index)
8546 			goto out;
8547 
8548 		goto again;
8549 	}
8550 
8551 	ret = splice_to_pipe(pipe, &spd);
8552 out:
8553 	splice_shrink_spd(&spd);
8554 
8555 	return ret;
8556 }
8557 
8558 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8559 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8560 {
8561 	struct ftrace_buffer_info *info = file->private_data;
8562 	struct trace_iterator *iter = &info->iter;
8563 
8564 	if (cmd)
8565 		return -ENOIOCTLCMD;
8566 
8567 	mutex_lock(&trace_types_lock);
8568 
8569 	iter->wait_index++;
8570 	/* Make sure the waiters see the new wait_index */
8571 	smp_wmb();
8572 
8573 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8574 
8575 	mutex_unlock(&trace_types_lock);
8576 	return 0;
8577 }
8578 
8579 static const struct file_operations tracing_buffers_fops = {
8580 	.open		= tracing_buffers_open,
8581 	.read		= tracing_buffers_read,
8582 	.poll		= tracing_buffers_poll,
8583 	.release	= tracing_buffers_release,
8584 	.splice_read	= tracing_buffers_splice_read,
8585 	.unlocked_ioctl = tracing_buffers_ioctl,
8586 	.llseek		= no_llseek,
8587 };
8588 
8589 static ssize_t
8590 tracing_stats_read(struct file *filp, char __user *ubuf,
8591 		   size_t count, loff_t *ppos)
8592 {
8593 	struct inode *inode = file_inode(filp);
8594 	struct trace_array *tr = inode->i_private;
8595 	struct array_buffer *trace_buf = &tr->array_buffer;
8596 	int cpu = tracing_get_cpu(inode);
8597 	struct trace_seq *s;
8598 	unsigned long cnt;
8599 	unsigned long long t;
8600 	unsigned long usec_rem;
8601 
8602 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8603 	if (!s)
8604 		return -ENOMEM;
8605 
8606 	trace_seq_init(s);
8607 
8608 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8609 	trace_seq_printf(s, "entries: %ld\n", cnt);
8610 
8611 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8612 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8613 
8614 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8615 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8616 
8617 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8618 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8619 
8620 	if (trace_clocks[tr->clock_id].in_ns) {
8621 		/* local or global for trace_clock */
8622 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8623 		usec_rem = do_div(t, USEC_PER_SEC);
8624 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8625 								t, usec_rem);
8626 
8627 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8628 		usec_rem = do_div(t, USEC_PER_SEC);
8629 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8630 	} else {
8631 		/* counter or tsc mode for trace_clock */
8632 		trace_seq_printf(s, "oldest event ts: %llu\n",
8633 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8634 
8635 		trace_seq_printf(s, "now ts: %llu\n",
8636 				ring_buffer_time_stamp(trace_buf->buffer));
8637 	}
8638 
8639 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8640 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8641 
8642 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8643 	trace_seq_printf(s, "read events: %ld\n", cnt);
8644 
8645 	count = simple_read_from_buffer(ubuf, count, ppos,
8646 					s->buffer, trace_seq_used(s));
8647 
8648 	kfree(s);
8649 
8650 	return count;
8651 }
8652 
8653 static const struct file_operations tracing_stats_fops = {
8654 	.open		= tracing_open_generic_tr,
8655 	.read		= tracing_stats_read,
8656 	.llseek		= generic_file_llseek,
8657 	.release	= tracing_release_generic_tr,
8658 };
8659 
8660 #ifdef CONFIG_DYNAMIC_FTRACE
8661 
8662 static ssize_t
8663 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8664 		  size_t cnt, loff_t *ppos)
8665 {
8666 	ssize_t ret;
8667 	char *buf;
8668 	int r;
8669 
8670 	/* 256 should be plenty to hold the amount needed */
8671 	buf = kmalloc(256, GFP_KERNEL);
8672 	if (!buf)
8673 		return -ENOMEM;
8674 
8675 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8676 		      ftrace_update_tot_cnt,
8677 		      ftrace_number_of_pages,
8678 		      ftrace_number_of_groups);
8679 
8680 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8681 	kfree(buf);
8682 	return ret;
8683 }
8684 
8685 static const struct file_operations tracing_dyn_info_fops = {
8686 	.open		= tracing_open_generic,
8687 	.read		= tracing_read_dyn_info,
8688 	.llseek		= generic_file_llseek,
8689 };
8690 #endif /* CONFIG_DYNAMIC_FTRACE */
8691 
8692 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8693 static void
8694 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8695 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8696 		void *data)
8697 {
8698 	tracing_snapshot_instance(tr);
8699 }
8700 
8701 static void
8702 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8703 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8704 		      void *data)
8705 {
8706 	struct ftrace_func_mapper *mapper = data;
8707 	long *count = NULL;
8708 
8709 	if (mapper)
8710 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8711 
8712 	if (count) {
8713 
8714 		if (*count <= 0)
8715 			return;
8716 
8717 		(*count)--;
8718 	}
8719 
8720 	tracing_snapshot_instance(tr);
8721 }
8722 
8723 static int
8724 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8725 		      struct ftrace_probe_ops *ops, void *data)
8726 {
8727 	struct ftrace_func_mapper *mapper = data;
8728 	long *count = NULL;
8729 
8730 	seq_printf(m, "%ps:", (void *)ip);
8731 
8732 	seq_puts(m, "snapshot");
8733 
8734 	if (mapper)
8735 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8736 
8737 	if (count)
8738 		seq_printf(m, ":count=%ld\n", *count);
8739 	else
8740 		seq_puts(m, ":unlimited\n");
8741 
8742 	return 0;
8743 }
8744 
8745 static int
8746 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8747 		     unsigned long ip, void *init_data, void **data)
8748 {
8749 	struct ftrace_func_mapper *mapper = *data;
8750 
8751 	if (!mapper) {
8752 		mapper = allocate_ftrace_func_mapper();
8753 		if (!mapper)
8754 			return -ENOMEM;
8755 		*data = mapper;
8756 	}
8757 
8758 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8759 }
8760 
8761 static void
8762 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8763 		     unsigned long ip, void *data)
8764 {
8765 	struct ftrace_func_mapper *mapper = data;
8766 
8767 	if (!ip) {
8768 		if (!mapper)
8769 			return;
8770 		free_ftrace_func_mapper(mapper, NULL);
8771 		return;
8772 	}
8773 
8774 	ftrace_func_mapper_remove_ip(mapper, ip);
8775 }
8776 
8777 static struct ftrace_probe_ops snapshot_probe_ops = {
8778 	.func			= ftrace_snapshot,
8779 	.print			= ftrace_snapshot_print,
8780 };
8781 
8782 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8783 	.func			= ftrace_count_snapshot,
8784 	.print			= ftrace_snapshot_print,
8785 	.init			= ftrace_snapshot_init,
8786 	.free			= ftrace_snapshot_free,
8787 };
8788 
8789 static int
8790 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8791 			       char *glob, char *cmd, char *param, int enable)
8792 {
8793 	struct ftrace_probe_ops *ops;
8794 	void *count = (void *)-1;
8795 	char *number;
8796 	int ret;
8797 
8798 	if (!tr)
8799 		return -ENODEV;
8800 
8801 	/* hash funcs only work with set_ftrace_filter */
8802 	if (!enable)
8803 		return -EINVAL;
8804 
8805 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8806 
8807 	if (glob[0] == '!')
8808 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8809 
8810 	if (!param)
8811 		goto out_reg;
8812 
8813 	number = strsep(&param, ":");
8814 
8815 	if (!strlen(number))
8816 		goto out_reg;
8817 
8818 	/*
8819 	 * We use the callback data field (which is a pointer)
8820 	 * as our counter.
8821 	 */
8822 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8823 	if (ret)
8824 		return ret;
8825 
8826  out_reg:
8827 	ret = tracing_alloc_snapshot_instance(tr);
8828 	if (ret < 0)
8829 		goto out;
8830 
8831 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8832 
8833  out:
8834 	return ret < 0 ? ret : 0;
8835 }
8836 
8837 static struct ftrace_func_command ftrace_snapshot_cmd = {
8838 	.name			= "snapshot",
8839 	.func			= ftrace_trace_snapshot_callback,
8840 };
8841 
8842 static __init int register_snapshot_cmd(void)
8843 {
8844 	return register_ftrace_command(&ftrace_snapshot_cmd);
8845 }
8846 #else
8847 static inline __init int register_snapshot_cmd(void) { return 0; }
8848 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8849 
8850 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8851 {
8852 	if (WARN_ON(!tr->dir))
8853 		return ERR_PTR(-ENODEV);
8854 
8855 	/* Top directory uses NULL as the parent */
8856 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8857 		return NULL;
8858 
8859 	/* All sub buffers have a descriptor */
8860 	return tr->dir;
8861 }
8862 
8863 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8864 {
8865 	struct dentry *d_tracer;
8866 
8867 	if (tr->percpu_dir)
8868 		return tr->percpu_dir;
8869 
8870 	d_tracer = tracing_get_dentry(tr);
8871 	if (IS_ERR(d_tracer))
8872 		return NULL;
8873 
8874 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8875 
8876 	MEM_FAIL(!tr->percpu_dir,
8877 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8878 
8879 	return tr->percpu_dir;
8880 }
8881 
8882 static struct dentry *
8883 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8884 		      void *data, long cpu, const struct file_operations *fops)
8885 {
8886 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8887 
8888 	if (ret) /* See tracing_get_cpu() */
8889 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8890 	return ret;
8891 }
8892 
8893 static void
8894 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8895 {
8896 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8897 	struct dentry *d_cpu;
8898 	char cpu_dir[30]; /* 30 characters should be more than enough */
8899 
8900 	if (!d_percpu)
8901 		return;
8902 
8903 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8904 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8905 	if (!d_cpu) {
8906 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8907 		return;
8908 	}
8909 
8910 	/* per cpu trace_pipe */
8911 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8912 				tr, cpu, &tracing_pipe_fops);
8913 
8914 	/* per cpu trace */
8915 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8916 				tr, cpu, &tracing_fops);
8917 
8918 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8919 				tr, cpu, &tracing_buffers_fops);
8920 
8921 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8922 				tr, cpu, &tracing_stats_fops);
8923 
8924 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8925 				tr, cpu, &tracing_entries_fops);
8926 
8927 #ifdef CONFIG_TRACER_SNAPSHOT
8928 	trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8929 				tr, cpu, &snapshot_fops);
8930 
8931 	trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8932 				tr, cpu, &snapshot_raw_fops);
8933 #endif
8934 }
8935 
8936 #ifdef CONFIG_FTRACE_SELFTEST
8937 /* Let selftest have access to static functions in this file */
8938 #include "trace_selftest.c"
8939 #endif
8940 
8941 static ssize_t
8942 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8943 			loff_t *ppos)
8944 {
8945 	struct trace_option_dentry *topt = filp->private_data;
8946 	char *buf;
8947 
8948 	if (topt->flags->val & topt->opt->bit)
8949 		buf = "1\n";
8950 	else
8951 		buf = "0\n";
8952 
8953 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8954 }
8955 
8956 static ssize_t
8957 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8958 			 loff_t *ppos)
8959 {
8960 	struct trace_option_dentry *topt = filp->private_data;
8961 	unsigned long val;
8962 	int ret;
8963 
8964 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8965 	if (ret)
8966 		return ret;
8967 
8968 	if (val != 0 && val != 1)
8969 		return -EINVAL;
8970 
8971 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8972 		mutex_lock(&trace_types_lock);
8973 		ret = __set_tracer_option(topt->tr, topt->flags,
8974 					  topt->opt, !val);
8975 		mutex_unlock(&trace_types_lock);
8976 		if (ret)
8977 			return ret;
8978 	}
8979 
8980 	*ppos += cnt;
8981 
8982 	return cnt;
8983 }
8984 
8985 static int tracing_open_options(struct inode *inode, struct file *filp)
8986 {
8987 	struct trace_option_dentry *topt = inode->i_private;
8988 	int ret;
8989 
8990 	ret = tracing_check_open_get_tr(topt->tr);
8991 	if (ret)
8992 		return ret;
8993 
8994 	filp->private_data = inode->i_private;
8995 	return 0;
8996 }
8997 
8998 static int tracing_release_options(struct inode *inode, struct file *file)
8999 {
9000 	struct trace_option_dentry *topt = file->private_data;
9001 
9002 	trace_array_put(topt->tr);
9003 	return 0;
9004 }
9005 
9006 static const struct file_operations trace_options_fops = {
9007 	.open = tracing_open_options,
9008 	.read = trace_options_read,
9009 	.write = trace_options_write,
9010 	.llseek	= generic_file_llseek,
9011 	.release = tracing_release_options,
9012 };
9013 
9014 /*
9015  * In order to pass in both the trace_array descriptor as well as the index
9016  * to the flag that the trace option file represents, the trace_array
9017  * has a character array of trace_flags_index[], which holds the index
9018  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9019  * The address of this character array is passed to the flag option file
9020  * read/write callbacks.
9021  *
9022  * In order to extract both the index and the trace_array descriptor,
9023  * get_tr_index() uses the following algorithm.
9024  *
9025  *   idx = *ptr;
9026  *
9027  * As the pointer itself contains the address of the index (remember
9028  * index[1] == 1).
9029  *
9030  * Then to get the trace_array descriptor, by subtracting that index
9031  * from the ptr, we get to the start of the index itself.
9032  *
9033  *   ptr - idx == &index[0]
9034  *
9035  * Then a simple container_of() from that pointer gets us to the
9036  * trace_array descriptor.
9037  */
9038 static void get_tr_index(void *data, struct trace_array **ptr,
9039 			 unsigned int *pindex)
9040 {
9041 	*pindex = *(unsigned char *)data;
9042 
9043 	*ptr = container_of(data - *pindex, struct trace_array,
9044 			    trace_flags_index);
9045 }
9046 
9047 static ssize_t
9048 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9049 			loff_t *ppos)
9050 {
9051 	void *tr_index = filp->private_data;
9052 	struct trace_array *tr;
9053 	unsigned int index;
9054 	char *buf;
9055 
9056 	get_tr_index(tr_index, &tr, &index);
9057 
9058 	if (tr->trace_flags & (1 << index))
9059 		buf = "1\n";
9060 	else
9061 		buf = "0\n";
9062 
9063 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9064 }
9065 
9066 static ssize_t
9067 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9068 			 loff_t *ppos)
9069 {
9070 	void *tr_index = filp->private_data;
9071 	struct trace_array *tr;
9072 	unsigned int index;
9073 	unsigned long val;
9074 	int ret;
9075 
9076 	get_tr_index(tr_index, &tr, &index);
9077 
9078 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9079 	if (ret)
9080 		return ret;
9081 
9082 	if (val != 0 && val != 1)
9083 		return -EINVAL;
9084 
9085 	mutex_lock(&event_mutex);
9086 	mutex_lock(&trace_types_lock);
9087 	ret = set_tracer_flag(tr, 1 << index, val);
9088 	mutex_unlock(&trace_types_lock);
9089 	mutex_unlock(&event_mutex);
9090 
9091 	if (ret < 0)
9092 		return ret;
9093 
9094 	*ppos += cnt;
9095 
9096 	return cnt;
9097 }
9098 
9099 static const struct file_operations trace_options_core_fops = {
9100 	.open = tracing_open_generic,
9101 	.read = trace_options_core_read,
9102 	.write = trace_options_core_write,
9103 	.llseek = generic_file_llseek,
9104 };
9105 
9106 struct dentry *trace_create_file(const char *name,
9107 				 umode_t mode,
9108 				 struct dentry *parent,
9109 				 void *data,
9110 				 const struct file_operations *fops)
9111 {
9112 	struct dentry *ret;
9113 
9114 	ret = tracefs_create_file(name, mode, parent, data, fops);
9115 	if (!ret)
9116 		pr_warn("Could not create tracefs '%s' entry\n", name);
9117 
9118 	return ret;
9119 }
9120 
9121 
9122 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9123 {
9124 	struct dentry *d_tracer;
9125 
9126 	if (tr->options)
9127 		return tr->options;
9128 
9129 	d_tracer = tracing_get_dentry(tr);
9130 	if (IS_ERR(d_tracer))
9131 		return NULL;
9132 
9133 	tr->options = tracefs_create_dir("options", d_tracer);
9134 	if (!tr->options) {
9135 		pr_warn("Could not create tracefs directory 'options'\n");
9136 		return NULL;
9137 	}
9138 
9139 	return tr->options;
9140 }
9141 
9142 static void
9143 create_trace_option_file(struct trace_array *tr,
9144 			 struct trace_option_dentry *topt,
9145 			 struct tracer_flags *flags,
9146 			 struct tracer_opt *opt)
9147 {
9148 	struct dentry *t_options;
9149 
9150 	t_options = trace_options_init_dentry(tr);
9151 	if (!t_options)
9152 		return;
9153 
9154 	topt->flags = flags;
9155 	topt->opt = opt;
9156 	topt->tr = tr;
9157 
9158 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9159 					t_options, topt, &trace_options_fops);
9160 
9161 }
9162 
9163 static void
9164 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9165 {
9166 	struct trace_option_dentry *topts;
9167 	struct trace_options *tr_topts;
9168 	struct tracer_flags *flags;
9169 	struct tracer_opt *opts;
9170 	int cnt;
9171 	int i;
9172 
9173 	if (!tracer)
9174 		return;
9175 
9176 	flags = tracer->flags;
9177 
9178 	if (!flags || !flags->opts)
9179 		return;
9180 
9181 	/*
9182 	 * If this is an instance, only create flags for tracers
9183 	 * the instance may have.
9184 	 */
9185 	if (!trace_ok_for_array(tracer, tr))
9186 		return;
9187 
9188 	for (i = 0; i < tr->nr_topts; i++) {
9189 		/* Make sure there's no duplicate flags. */
9190 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9191 			return;
9192 	}
9193 
9194 	opts = flags->opts;
9195 
9196 	for (cnt = 0; opts[cnt].name; cnt++)
9197 		;
9198 
9199 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9200 	if (!topts)
9201 		return;
9202 
9203 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9204 			    GFP_KERNEL);
9205 	if (!tr_topts) {
9206 		kfree(topts);
9207 		return;
9208 	}
9209 
9210 	tr->topts = tr_topts;
9211 	tr->topts[tr->nr_topts].tracer = tracer;
9212 	tr->topts[tr->nr_topts].topts = topts;
9213 	tr->nr_topts++;
9214 
9215 	for (cnt = 0; opts[cnt].name; cnt++) {
9216 		create_trace_option_file(tr, &topts[cnt], flags,
9217 					 &opts[cnt]);
9218 		MEM_FAIL(topts[cnt].entry == NULL,
9219 			  "Failed to create trace option: %s",
9220 			  opts[cnt].name);
9221 	}
9222 }
9223 
9224 static struct dentry *
9225 create_trace_option_core_file(struct trace_array *tr,
9226 			      const char *option, long index)
9227 {
9228 	struct dentry *t_options;
9229 
9230 	t_options = trace_options_init_dentry(tr);
9231 	if (!t_options)
9232 		return NULL;
9233 
9234 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9235 				 (void *)&tr->trace_flags_index[index],
9236 				 &trace_options_core_fops);
9237 }
9238 
9239 static void create_trace_options_dir(struct trace_array *tr)
9240 {
9241 	struct dentry *t_options;
9242 	bool top_level = tr == &global_trace;
9243 	int i;
9244 
9245 	t_options = trace_options_init_dentry(tr);
9246 	if (!t_options)
9247 		return;
9248 
9249 	for (i = 0; trace_options[i]; i++) {
9250 		if (top_level ||
9251 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9252 			create_trace_option_core_file(tr, trace_options[i], i);
9253 	}
9254 }
9255 
9256 static ssize_t
9257 rb_simple_read(struct file *filp, char __user *ubuf,
9258 	       size_t cnt, loff_t *ppos)
9259 {
9260 	struct trace_array *tr = filp->private_data;
9261 	char buf[64];
9262 	int r;
9263 
9264 	r = tracer_tracing_is_on(tr);
9265 	r = sprintf(buf, "%d\n", r);
9266 
9267 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9268 }
9269 
9270 static ssize_t
9271 rb_simple_write(struct file *filp, const char __user *ubuf,
9272 		size_t cnt, loff_t *ppos)
9273 {
9274 	struct trace_array *tr = filp->private_data;
9275 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9276 	unsigned long val;
9277 	int ret;
9278 
9279 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9280 	if (ret)
9281 		return ret;
9282 
9283 	if (buffer) {
9284 		mutex_lock(&trace_types_lock);
9285 		if (!!val == tracer_tracing_is_on(tr)) {
9286 			val = 0; /* do nothing */
9287 		} else if (val) {
9288 			tracer_tracing_on(tr);
9289 			if (tr->current_trace->start)
9290 				tr->current_trace->start(tr);
9291 		} else {
9292 			tracer_tracing_off(tr);
9293 			if (tr->current_trace->stop)
9294 				tr->current_trace->stop(tr);
9295 			/* Wake up any waiters */
9296 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9297 		}
9298 		mutex_unlock(&trace_types_lock);
9299 	}
9300 
9301 	(*ppos)++;
9302 
9303 	return cnt;
9304 }
9305 
9306 static const struct file_operations rb_simple_fops = {
9307 	.open		= tracing_open_generic_tr,
9308 	.read		= rb_simple_read,
9309 	.write		= rb_simple_write,
9310 	.release	= tracing_release_generic_tr,
9311 	.llseek		= default_llseek,
9312 };
9313 
9314 static ssize_t
9315 buffer_percent_read(struct file *filp, char __user *ubuf,
9316 		    size_t cnt, loff_t *ppos)
9317 {
9318 	struct trace_array *tr = filp->private_data;
9319 	char buf[64];
9320 	int r;
9321 
9322 	r = tr->buffer_percent;
9323 	r = sprintf(buf, "%d\n", r);
9324 
9325 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9326 }
9327 
9328 static ssize_t
9329 buffer_percent_write(struct file *filp, const char __user *ubuf,
9330 		     size_t cnt, loff_t *ppos)
9331 {
9332 	struct trace_array *tr = filp->private_data;
9333 	unsigned long val;
9334 	int ret;
9335 
9336 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9337 	if (ret)
9338 		return ret;
9339 
9340 	if (val > 100)
9341 		return -EINVAL;
9342 
9343 	tr->buffer_percent = val;
9344 
9345 	(*ppos)++;
9346 
9347 	return cnt;
9348 }
9349 
9350 static const struct file_operations buffer_percent_fops = {
9351 	.open		= tracing_open_generic_tr,
9352 	.read		= buffer_percent_read,
9353 	.write		= buffer_percent_write,
9354 	.release	= tracing_release_generic_tr,
9355 	.llseek		= default_llseek,
9356 };
9357 
9358 static struct dentry *trace_instance_dir;
9359 
9360 static void
9361 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9362 
9363 static int
9364 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9365 {
9366 	enum ring_buffer_flags rb_flags;
9367 
9368 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9369 
9370 	buf->tr = tr;
9371 
9372 	buf->buffer = ring_buffer_alloc(size, rb_flags);
9373 	if (!buf->buffer)
9374 		return -ENOMEM;
9375 
9376 	buf->data = alloc_percpu(struct trace_array_cpu);
9377 	if (!buf->data) {
9378 		ring_buffer_free(buf->buffer);
9379 		buf->buffer = NULL;
9380 		return -ENOMEM;
9381 	}
9382 
9383 	/* Allocate the first page for all buffers */
9384 	set_buffer_entries(&tr->array_buffer,
9385 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9386 
9387 	return 0;
9388 }
9389 
9390 static void free_trace_buffer(struct array_buffer *buf)
9391 {
9392 	if (buf->buffer) {
9393 		ring_buffer_free(buf->buffer);
9394 		buf->buffer = NULL;
9395 		free_percpu(buf->data);
9396 		buf->data = NULL;
9397 	}
9398 }
9399 
9400 static int allocate_trace_buffers(struct trace_array *tr, int size)
9401 {
9402 	int ret;
9403 
9404 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9405 	if (ret)
9406 		return ret;
9407 
9408 #ifdef CONFIG_TRACER_MAX_TRACE
9409 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9410 				    allocate_snapshot ? size : 1);
9411 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9412 		free_trace_buffer(&tr->array_buffer);
9413 		return -ENOMEM;
9414 	}
9415 	tr->allocated_snapshot = allocate_snapshot;
9416 
9417 	allocate_snapshot = false;
9418 #endif
9419 
9420 	return 0;
9421 }
9422 
9423 static void free_trace_buffers(struct trace_array *tr)
9424 {
9425 	if (!tr)
9426 		return;
9427 
9428 	free_trace_buffer(&tr->array_buffer);
9429 
9430 #ifdef CONFIG_TRACER_MAX_TRACE
9431 	free_trace_buffer(&tr->max_buffer);
9432 #endif
9433 }
9434 
9435 static void init_trace_flags_index(struct trace_array *tr)
9436 {
9437 	int i;
9438 
9439 	/* Used by the trace options files */
9440 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9441 		tr->trace_flags_index[i] = i;
9442 }
9443 
9444 static void __update_tracer_options(struct trace_array *tr)
9445 {
9446 	struct tracer *t;
9447 
9448 	for (t = trace_types; t; t = t->next)
9449 		add_tracer_options(tr, t);
9450 }
9451 
9452 static void update_tracer_options(struct trace_array *tr)
9453 {
9454 	mutex_lock(&trace_types_lock);
9455 	tracer_options_updated = true;
9456 	__update_tracer_options(tr);
9457 	mutex_unlock(&trace_types_lock);
9458 }
9459 
9460 /* Must have trace_types_lock held */
9461 struct trace_array *trace_array_find(const char *instance)
9462 {
9463 	struct trace_array *tr, *found = NULL;
9464 
9465 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9466 		if (tr->name && strcmp(tr->name, instance) == 0) {
9467 			found = tr;
9468 			break;
9469 		}
9470 	}
9471 
9472 	return found;
9473 }
9474 
9475 struct trace_array *trace_array_find_get(const char *instance)
9476 {
9477 	struct trace_array *tr;
9478 
9479 	mutex_lock(&trace_types_lock);
9480 	tr = trace_array_find(instance);
9481 	if (tr)
9482 		tr->ref++;
9483 	mutex_unlock(&trace_types_lock);
9484 
9485 	return tr;
9486 }
9487 
9488 static int trace_array_create_dir(struct trace_array *tr)
9489 {
9490 	int ret;
9491 
9492 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9493 	if (!tr->dir)
9494 		return -EINVAL;
9495 
9496 	ret = event_trace_add_tracer(tr->dir, tr);
9497 	if (ret) {
9498 		tracefs_remove(tr->dir);
9499 		return ret;
9500 	}
9501 
9502 	init_tracer_tracefs(tr, tr->dir);
9503 	__update_tracer_options(tr);
9504 
9505 	return ret;
9506 }
9507 
9508 static struct trace_array *trace_array_create(const char *name)
9509 {
9510 	struct trace_array *tr;
9511 	int ret;
9512 
9513 	ret = -ENOMEM;
9514 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9515 	if (!tr)
9516 		return ERR_PTR(ret);
9517 
9518 	tr->name = kstrdup(name, GFP_KERNEL);
9519 	if (!tr->name)
9520 		goto out_free_tr;
9521 
9522 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9523 		goto out_free_tr;
9524 
9525 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9526 		goto out_free_tr;
9527 
9528 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9529 
9530 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9531 
9532 	raw_spin_lock_init(&tr->start_lock);
9533 
9534 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9535 
9536 	tr->current_trace = &nop_trace;
9537 
9538 	INIT_LIST_HEAD(&tr->systems);
9539 	INIT_LIST_HEAD(&tr->events);
9540 	INIT_LIST_HEAD(&tr->hist_vars);
9541 	INIT_LIST_HEAD(&tr->err_log);
9542 
9543 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9544 		goto out_free_tr;
9545 
9546 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9547 		goto out_free_tr;
9548 
9549 	ftrace_init_trace_array(tr);
9550 
9551 	init_trace_flags_index(tr);
9552 
9553 	if (trace_instance_dir) {
9554 		ret = trace_array_create_dir(tr);
9555 		if (ret)
9556 			goto out_free_tr;
9557 	} else
9558 		__trace_early_add_events(tr);
9559 
9560 	list_add(&tr->list, &ftrace_trace_arrays);
9561 
9562 	tr->ref++;
9563 
9564 	return tr;
9565 
9566  out_free_tr:
9567 	ftrace_free_ftrace_ops(tr);
9568 	free_trace_buffers(tr);
9569 	free_cpumask_var(tr->pipe_cpumask);
9570 	free_cpumask_var(tr->tracing_cpumask);
9571 	kfree(tr->name);
9572 	kfree(tr);
9573 
9574 	return ERR_PTR(ret);
9575 }
9576 
9577 static int instance_mkdir(const char *name)
9578 {
9579 	struct trace_array *tr;
9580 	int ret;
9581 
9582 	mutex_lock(&event_mutex);
9583 	mutex_lock(&trace_types_lock);
9584 
9585 	ret = -EEXIST;
9586 	if (trace_array_find(name))
9587 		goto out_unlock;
9588 
9589 	tr = trace_array_create(name);
9590 
9591 	ret = PTR_ERR_OR_ZERO(tr);
9592 
9593 out_unlock:
9594 	mutex_unlock(&trace_types_lock);
9595 	mutex_unlock(&event_mutex);
9596 	return ret;
9597 }
9598 
9599 /**
9600  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9601  * @name: The name of the trace array to be looked up/created.
9602  *
9603  * Returns pointer to trace array with given name.
9604  * NULL, if it cannot be created.
9605  *
9606  * NOTE: This function increments the reference counter associated with the
9607  * trace array returned. This makes sure it cannot be freed while in use.
9608  * Use trace_array_put() once the trace array is no longer needed.
9609  * If the trace_array is to be freed, trace_array_destroy() needs to
9610  * be called after the trace_array_put(), or simply let user space delete
9611  * it from the tracefs instances directory. But until the
9612  * trace_array_put() is called, user space can not delete it.
9613  *
9614  */
9615 struct trace_array *trace_array_get_by_name(const char *name)
9616 {
9617 	struct trace_array *tr;
9618 
9619 	mutex_lock(&event_mutex);
9620 	mutex_lock(&trace_types_lock);
9621 
9622 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9623 		if (tr->name && strcmp(tr->name, name) == 0)
9624 			goto out_unlock;
9625 	}
9626 
9627 	tr = trace_array_create(name);
9628 
9629 	if (IS_ERR(tr))
9630 		tr = NULL;
9631 out_unlock:
9632 	if (tr)
9633 		tr->ref++;
9634 
9635 	mutex_unlock(&trace_types_lock);
9636 	mutex_unlock(&event_mutex);
9637 	return tr;
9638 }
9639 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9640 
9641 static int __remove_instance(struct trace_array *tr)
9642 {
9643 	int i;
9644 
9645 	/* Reference counter for a newly created trace array = 1. */
9646 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9647 		return -EBUSY;
9648 
9649 	list_del(&tr->list);
9650 
9651 	/* Disable all the flags that were enabled coming in */
9652 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9653 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9654 			set_tracer_flag(tr, 1 << i, 0);
9655 	}
9656 
9657 	tracing_set_nop(tr);
9658 	clear_ftrace_function_probes(tr);
9659 	event_trace_del_tracer(tr);
9660 	ftrace_clear_pids(tr);
9661 	ftrace_destroy_function_files(tr);
9662 	tracefs_remove(tr->dir);
9663 	free_percpu(tr->last_func_repeats);
9664 	free_trace_buffers(tr);
9665 	clear_tracing_err_log(tr);
9666 
9667 	for (i = 0; i < tr->nr_topts; i++) {
9668 		kfree(tr->topts[i].topts);
9669 	}
9670 	kfree(tr->topts);
9671 
9672 	free_cpumask_var(tr->pipe_cpumask);
9673 	free_cpumask_var(tr->tracing_cpumask);
9674 	kfree(tr->name);
9675 	kfree(tr);
9676 
9677 	return 0;
9678 }
9679 
9680 int trace_array_destroy(struct trace_array *this_tr)
9681 {
9682 	struct trace_array *tr;
9683 	int ret;
9684 
9685 	if (!this_tr)
9686 		return -EINVAL;
9687 
9688 	mutex_lock(&event_mutex);
9689 	mutex_lock(&trace_types_lock);
9690 
9691 	ret = -ENODEV;
9692 
9693 	/* Making sure trace array exists before destroying it. */
9694 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9695 		if (tr == this_tr) {
9696 			ret = __remove_instance(tr);
9697 			break;
9698 		}
9699 	}
9700 
9701 	mutex_unlock(&trace_types_lock);
9702 	mutex_unlock(&event_mutex);
9703 
9704 	return ret;
9705 }
9706 EXPORT_SYMBOL_GPL(trace_array_destroy);
9707 
9708 static int instance_rmdir(const char *name)
9709 {
9710 	struct trace_array *tr;
9711 	int ret;
9712 
9713 	mutex_lock(&event_mutex);
9714 	mutex_lock(&trace_types_lock);
9715 
9716 	ret = -ENODEV;
9717 	tr = trace_array_find(name);
9718 	if (tr)
9719 		ret = __remove_instance(tr);
9720 
9721 	mutex_unlock(&trace_types_lock);
9722 	mutex_unlock(&event_mutex);
9723 
9724 	return ret;
9725 }
9726 
9727 static __init void create_trace_instances(struct dentry *d_tracer)
9728 {
9729 	struct trace_array *tr;
9730 
9731 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9732 							 instance_mkdir,
9733 							 instance_rmdir);
9734 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9735 		return;
9736 
9737 	mutex_lock(&event_mutex);
9738 	mutex_lock(&trace_types_lock);
9739 
9740 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9741 		if (!tr->name)
9742 			continue;
9743 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9744 			     "Failed to create instance directory\n"))
9745 			break;
9746 	}
9747 
9748 	mutex_unlock(&trace_types_lock);
9749 	mutex_unlock(&event_mutex);
9750 }
9751 
9752 static void
9753 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9754 {
9755 	struct trace_event_file *file;
9756 	int cpu;
9757 
9758 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9759 			tr, &show_traces_fops);
9760 
9761 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9762 			tr, &set_tracer_fops);
9763 
9764 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9765 			  tr, &tracing_cpumask_fops);
9766 
9767 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9768 			  tr, &tracing_iter_fops);
9769 
9770 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9771 			  tr, &tracing_fops);
9772 
9773 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9774 			  tr, &tracing_pipe_fops);
9775 
9776 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9777 			  tr, &tracing_entries_fops);
9778 
9779 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9780 			  tr, &tracing_total_entries_fops);
9781 
9782 	trace_create_file("free_buffer", 0200, d_tracer,
9783 			  tr, &tracing_free_buffer_fops);
9784 
9785 	trace_create_file("trace_marker", 0220, d_tracer,
9786 			  tr, &tracing_mark_fops);
9787 
9788 	file = __find_event_file(tr, "ftrace", "print");
9789 	if (file && file->ef)
9790 		eventfs_add_file("trigger", TRACE_MODE_WRITE, file->ef,
9791 				  file, &event_trigger_fops);
9792 	tr->trace_marker_file = file;
9793 
9794 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9795 			  tr, &tracing_mark_raw_fops);
9796 
9797 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9798 			  &trace_clock_fops);
9799 
9800 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9801 			  tr, &rb_simple_fops);
9802 
9803 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9804 			  &trace_time_stamp_mode_fops);
9805 
9806 	tr->buffer_percent = 50;
9807 
9808 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9809 			tr, &buffer_percent_fops);
9810 
9811 	create_trace_options_dir(tr);
9812 
9813 #ifdef CONFIG_TRACER_MAX_TRACE
9814 	trace_create_maxlat_file(tr, d_tracer);
9815 #endif
9816 
9817 	if (ftrace_create_function_files(tr, d_tracer))
9818 		MEM_FAIL(1, "Could not allocate function filter files");
9819 
9820 #ifdef CONFIG_TRACER_SNAPSHOT
9821 	trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9822 			  tr, &snapshot_fops);
9823 #endif
9824 
9825 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9826 			  tr, &tracing_err_log_fops);
9827 
9828 	for_each_tracing_cpu(cpu)
9829 		tracing_init_tracefs_percpu(tr, cpu);
9830 
9831 	ftrace_init_tracefs(tr, d_tracer);
9832 }
9833 
9834 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9835 {
9836 	struct vfsmount *mnt;
9837 	struct file_system_type *type;
9838 
9839 	/*
9840 	 * To maintain backward compatibility for tools that mount
9841 	 * debugfs to get to the tracing facility, tracefs is automatically
9842 	 * mounted to the debugfs/tracing directory.
9843 	 */
9844 	type = get_fs_type("tracefs");
9845 	if (!type)
9846 		return NULL;
9847 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9848 	put_filesystem(type);
9849 	if (IS_ERR(mnt))
9850 		return NULL;
9851 	mntget(mnt);
9852 
9853 	return mnt;
9854 }
9855 
9856 /**
9857  * tracing_init_dentry - initialize top level trace array
9858  *
9859  * This is called when creating files or directories in the tracing
9860  * directory. It is called via fs_initcall() by any of the boot up code
9861  * and expects to return the dentry of the top level tracing directory.
9862  */
9863 int tracing_init_dentry(void)
9864 {
9865 	struct trace_array *tr = &global_trace;
9866 
9867 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9868 		pr_warn("Tracing disabled due to lockdown\n");
9869 		return -EPERM;
9870 	}
9871 
9872 	/* The top level trace array uses  NULL as parent */
9873 	if (tr->dir)
9874 		return 0;
9875 
9876 	if (WARN_ON(!tracefs_initialized()))
9877 		return -ENODEV;
9878 
9879 	/*
9880 	 * As there may still be users that expect the tracing
9881 	 * files to exist in debugfs/tracing, we must automount
9882 	 * the tracefs file system there, so older tools still
9883 	 * work with the newer kernel.
9884 	 */
9885 	tr->dir = debugfs_create_automount("tracing", NULL,
9886 					   trace_automount, NULL);
9887 
9888 	return 0;
9889 }
9890 
9891 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9892 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9893 
9894 static struct workqueue_struct *eval_map_wq __initdata;
9895 static struct work_struct eval_map_work __initdata;
9896 static struct work_struct tracerfs_init_work __initdata;
9897 
9898 static void __init eval_map_work_func(struct work_struct *work)
9899 {
9900 	int len;
9901 
9902 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9903 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9904 }
9905 
9906 static int __init trace_eval_init(void)
9907 {
9908 	INIT_WORK(&eval_map_work, eval_map_work_func);
9909 
9910 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9911 	if (!eval_map_wq) {
9912 		pr_err("Unable to allocate eval_map_wq\n");
9913 		/* Do work here */
9914 		eval_map_work_func(&eval_map_work);
9915 		return -ENOMEM;
9916 	}
9917 
9918 	queue_work(eval_map_wq, &eval_map_work);
9919 	return 0;
9920 }
9921 
9922 subsys_initcall(trace_eval_init);
9923 
9924 static int __init trace_eval_sync(void)
9925 {
9926 	/* Make sure the eval map updates are finished */
9927 	if (eval_map_wq)
9928 		destroy_workqueue(eval_map_wq);
9929 	return 0;
9930 }
9931 
9932 late_initcall_sync(trace_eval_sync);
9933 
9934 
9935 #ifdef CONFIG_MODULES
9936 static void trace_module_add_evals(struct module *mod)
9937 {
9938 	if (!mod->num_trace_evals)
9939 		return;
9940 
9941 	/*
9942 	 * Modules with bad taint do not have events created, do
9943 	 * not bother with enums either.
9944 	 */
9945 	if (trace_module_has_bad_taint(mod))
9946 		return;
9947 
9948 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9949 }
9950 
9951 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9952 static void trace_module_remove_evals(struct module *mod)
9953 {
9954 	union trace_eval_map_item *map;
9955 	union trace_eval_map_item **last = &trace_eval_maps;
9956 
9957 	if (!mod->num_trace_evals)
9958 		return;
9959 
9960 	mutex_lock(&trace_eval_mutex);
9961 
9962 	map = trace_eval_maps;
9963 
9964 	while (map) {
9965 		if (map->head.mod == mod)
9966 			break;
9967 		map = trace_eval_jmp_to_tail(map);
9968 		last = &map->tail.next;
9969 		map = map->tail.next;
9970 	}
9971 	if (!map)
9972 		goto out;
9973 
9974 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9975 	kfree(map);
9976  out:
9977 	mutex_unlock(&trace_eval_mutex);
9978 }
9979 #else
9980 static inline void trace_module_remove_evals(struct module *mod) { }
9981 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9982 
9983 static int trace_module_notify(struct notifier_block *self,
9984 			       unsigned long val, void *data)
9985 {
9986 	struct module *mod = data;
9987 
9988 	switch (val) {
9989 	case MODULE_STATE_COMING:
9990 		trace_module_add_evals(mod);
9991 		break;
9992 	case MODULE_STATE_GOING:
9993 		trace_module_remove_evals(mod);
9994 		break;
9995 	}
9996 
9997 	return NOTIFY_OK;
9998 }
9999 
10000 static struct notifier_block trace_module_nb = {
10001 	.notifier_call = trace_module_notify,
10002 	.priority = 0,
10003 };
10004 #endif /* CONFIG_MODULES */
10005 
10006 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10007 {
10008 
10009 	event_trace_init();
10010 
10011 	init_tracer_tracefs(&global_trace, NULL);
10012 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10013 
10014 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10015 			&global_trace, &tracing_thresh_fops);
10016 
10017 	trace_create_file("README", TRACE_MODE_READ, NULL,
10018 			NULL, &tracing_readme_fops);
10019 
10020 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10021 			NULL, &tracing_saved_cmdlines_fops);
10022 
10023 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10024 			  NULL, &tracing_saved_cmdlines_size_fops);
10025 
10026 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10027 			NULL, &tracing_saved_tgids_fops);
10028 
10029 	trace_create_eval_file(NULL);
10030 
10031 #ifdef CONFIG_MODULES
10032 	register_module_notifier(&trace_module_nb);
10033 #endif
10034 
10035 #ifdef CONFIG_DYNAMIC_FTRACE
10036 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10037 			NULL, &tracing_dyn_info_fops);
10038 #endif
10039 
10040 	create_trace_instances(NULL);
10041 
10042 	update_tracer_options(&global_trace);
10043 }
10044 
10045 static __init int tracer_init_tracefs(void)
10046 {
10047 	int ret;
10048 
10049 	trace_access_lock_init();
10050 
10051 	ret = tracing_init_dentry();
10052 	if (ret)
10053 		return 0;
10054 
10055 	if (eval_map_wq) {
10056 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10057 		queue_work(eval_map_wq, &tracerfs_init_work);
10058 	} else {
10059 		tracer_init_tracefs_work_func(NULL);
10060 	}
10061 
10062 	rv_init_interface();
10063 
10064 	return 0;
10065 }
10066 
10067 fs_initcall(tracer_init_tracefs);
10068 
10069 static int trace_die_panic_handler(struct notifier_block *self,
10070 				unsigned long ev, void *unused);
10071 
10072 static struct notifier_block trace_panic_notifier = {
10073 	.notifier_call = trace_die_panic_handler,
10074 	.priority = INT_MAX - 1,
10075 };
10076 
10077 static struct notifier_block trace_die_notifier = {
10078 	.notifier_call = trace_die_panic_handler,
10079 	.priority = INT_MAX - 1,
10080 };
10081 
10082 /*
10083  * The idea is to execute the following die/panic callback early, in order
10084  * to avoid showing irrelevant information in the trace (like other panic
10085  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10086  * warnings get disabled (to prevent potential log flooding).
10087  */
10088 static int trace_die_panic_handler(struct notifier_block *self,
10089 				unsigned long ev, void *unused)
10090 {
10091 	if (!ftrace_dump_on_oops)
10092 		return NOTIFY_DONE;
10093 
10094 	/* The die notifier requires DIE_OOPS to trigger */
10095 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10096 		return NOTIFY_DONE;
10097 
10098 	ftrace_dump(ftrace_dump_on_oops);
10099 
10100 	return NOTIFY_DONE;
10101 }
10102 
10103 /*
10104  * printk is set to max of 1024, we really don't need it that big.
10105  * Nothing should be printing 1000 characters anyway.
10106  */
10107 #define TRACE_MAX_PRINT		1000
10108 
10109 /*
10110  * Define here KERN_TRACE so that we have one place to modify
10111  * it if we decide to change what log level the ftrace dump
10112  * should be at.
10113  */
10114 #define KERN_TRACE		KERN_EMERG
10115 
10116 void
10117 trace_printk_seq(struct trace_seq *s)
10118 {
10119 	/* Probably should print a warning here. */
10120 	if (s->seq.len >= TRACE_MAX_PRINT)
10121 		s->seq.len = TRACE_MAX_PRINT;
10122 
10123 	/*
10124 	 * More paranoid code. Although the buffer size is set to
10125 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10126 	 * an extra layer of protection.
10127 	 */
10128 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10129 		s->seq.len = s->seq.size - 1;
10130 
10131 	/* should be zero ended, but we are paranoid. */
10132 	s->buffer[s->seq.len] = 0;
10133 
10134 	printk(KERN_TRACE "%s", s->buffer);
10135 
10136 	trace_seq_init(s);
10137 }
10138 
10139 void trace_init_global_iter(struct trace_iterator *iter)
10140 {
10141 	iter->tr = &global_trace;
10142 	iter->trace = iter->tr->current_trace;
10143 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10144 	iter->array_buffer = &global_trace.array_buffer;
10145 
10146 	if (iter->trace && iter->trace->open)
10147 		iter->trace->open(iter);
10148 
10149 	/* Annotate start of buffers if we had overruns */
10150 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10151 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10152 
10153 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10154 	if (trace_clocks[iter->tr->clock_id].in_ns)
10155 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10156 
10157 	/* Can not use kmalloc for iter.temp and iter.fmt */
10158 	iter->temp = static_temp_buf;
10159 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10160 	iter->fmt = static_fmt_buf;
10161 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10162 }
10163 
10164 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10165 {
10166 	/* use static because iter can be a bit big for the stack */
10167 	static struct trace_iterator iter;
10168 	static atomic_t dump_running;
10169 	struct trace_array *tr = &global_trace;
10170 	unsigned int old_userobj;
10171 	unsigned long flags;
10172 	int cnt = 0, cpu;
10173 
10174 	/* Only allow one dump user at a time. */
10175 	if (atomic_inc_return(&dump_running) != 1) {
10176 		atomic_dec(&dump_running);
10177 		return;
10178 	}
10179 
10180 	/*
10181 	 * Always turn off tracing when we dump.
10182 	 * We don't need to show trace output of what happens
10183 	 * between multiple crashes.
10184 	 *
10185 	 * If the user does a sysrq-z, then they can re-enable
10186 	 * tracing with echo 1 > tracing_on.
10187 	 */
10188 	tracing_off();
10189 
10190 	local_irq_save(flags);
10191 
10192 	/* Simulate the iterator */
10193 	trace_init_global_iter(&iter);
10194 
10195 	for_each_tracing_cpu(cpu) {
10196 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10197 	}
10198 
10199 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10200 
10201 	/* don't look at user memory in panic mode */
10202 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10203 
10204 	switch (oops_dump_mode) {
10205 	case DUMP_ALL:
10206 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10207 		break;
10208 	case DUMP_ORIG:
10209 		iter.cpu_file = raw_smp_processor_id();
10210 		break;
10211 	case DUMP_NONE:
10212 		goto out_enable;
10213 	default:
10214 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10215 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10216 	}
10217 
10218 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
10219 
10220 	/* Did function tracer already get disabled? */
10221 	if (ftrace_is_dead()) {
10222 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10223 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10224 	}
10225 
10226 	/*
10227 	 * We need to stop all tracing on all CPUS to read
10228 	 * the next buffer. This is a bit expensive, but is
10229 	 * not done often. We fill all what we can read,
10230 	 * and then release the locks again.
10231 	 */
10232 
10233 	while (!trace_empty(&iter)) {
10234 
10235 		if (!cnt)
10236 			printk(KERN_TRACE "---------------------------------\n");
10237 
10238 		cnt++;
10239 
10240 		trace_iterator_reset(&iter);
10241 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10242 
10243 		if (trace_find_next_entry_inc(&iter) != NULL) {
10244 			int ret;
10245 
10246 			ret = print_trace_line(&iter);
10247 			if (ret != TRACE_TYPE_NO_CONSUME)
10248 				trace_consume(&iter);
10249 		}
10250 		touch_nmi_watchdog();
10251 
10252 		trace_printk_seq(&iter.seq);
10253 	}
10254 
10255 	if (!cnt)
10256 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10257 	else
10258 		printk(KERN_TRACE "---------------------------------\n");
10259 
10260  out_enable:
10261 	tr->trace_flags |= old_userobj;
10262 
10263 	for_each_tracing_cpu(cpu) {
10264 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10265 	}
10266 	atomic_dec(&dump_running);
10267 	local_irq_restore(flags);
10268 }
10269 EXPORT_SYMBOL_GPL(ftrace_dump);
10270 
10271 #define WRITE_BUFSIZE  4096
10272 
10273 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10274 				size_t count, loff_t *ppos,
10275 				int (*createfn)(const char *))
10276 {
10277 	char *kbuf, *buf, *tmp;
10278 	int ret = 0;
10279 	size_t done = 0;
10280 	size_t size;
10281 
10282 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10283 	if (!kbuf)
10284 		return -ENOMEM;
10285 
10286 	while (done < count) {
10287 		size = count - done;
10288 
10289 		if (size >= WRITE_BUFSIZE)
10290 			size = WRITE_BUFSIZE - 1;
10291 
10292 		if (copy_from_user(kbuf, buffer + done, size)) {
10293 			ret = -EFAULT;
10294 			goto out;
10295 		}
10296 		kbuf[size] = '\0';
10297 		buf = kbuf;
10298 		do {
10299 			tmp = strchr(buf, '\n');
10300 			if (tmp) {
10301 				*tmp = '\0';
10302 				size = tmp - buf + 1;
10303 			} else {
10304 				size = strlen(buf);
10305 				if (done + size < count) {
10306 					if (buf != kbuf)
10307 						break;
10308 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10309 					pr_warn("Line length is too long: Should be less than %d\n",
10310 						WRITE_BUFSIZE - 2);
10311 					ret = -EINVAL;
10312 					goto out;
10313 				}
10314 			}
10315 			done += size;
10316 
10317 			/* Remove comments */
10318 			tmp = strchr(buf, '#');
10319 
10320 			if (tmp)
10321 				*tmp = '\0';
10322 
10323 			ret = createfn(buf);
10324 			if (ret)
10325 				goto out;
10326 			buf += size;
10327 
10328 		} while (done < count);
10329 	}
10330 	ret = done;
10331 
10332 out:
10333 	kfree(kbuf);
10334 
10335 	return ret;
10336 }
10337 
10338 #ifdef CONFIG_TRACER_MAX_TRACE
10339 __init static bool tr_needs_alloc_snapshot(const char *name)
10340 {
10341 	char *test;
10342 	int len = strlen(name);
10343 	bool ret;
10344 
10345 	if (!boot_snapshot_index)
10346 		return false;
10347 
10348 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10349 	    boot_snapshot_info[len] == '\t')
10350 		return true;
10351 
10352 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10353 	if (!test)
10354 		return false;
10355 
10356 	sprintf(test, "\t%s\t", name);
10357 	ret = strstr(boot_snapshot_info, test) == NULL;
10358 	kfree(test);
10359 	return ret;
10360 }
10361 
10362 __init static void do_allocate_snapshot(const char *name)
10363 {
10364 	if (!tr_needs_alloc_snapshot(name))
10365 		return;
10366 
10367 	/*
10368 	 * When allocate_snapshot is set, the next call to
10369 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10370 	 * will allocate the snapshot buffer. That will alse clear
10371 	 * this flag.
10372 	 */
10373 	allocate_snapshot = true;
10374 }
10375 #else
10376 static inline void do_allocate_snapshot(const char *name) { }
10377 #endif
10378 
10379 __init static void enable_instances(void)
10380 {
10381 	struct trace_array *tr;
10382 	char *curr_str;
10383 	char *str;
10384 	char *tok;
10385 
10386 	/* A tab is always appended */
10387 	boot_instance_info[boot_instance_index - 1] = '\0';
10388 	str = boot_instance_info;
10389 
10390 	while ((curr_str = strsep(&str, "\t"))) {
10391 
10392 		tok = strsep(&curr_str, ",");
10393 
10394 		if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10395 			do_allocate_snapshot(tok);
10396 
10397 		tr = trace_array_get_by_name(tok);
10398 		if (!tr) {
10399 			pr_warn("Failed to create instance buffer %s\n", curr_str);
10400 			continue;
10401 		}
10402 		/* Allow user space to delete it */
10403 		trace_array_put(tr);
10404 
10405 		while ((tok = strsep(&curr_str, ","))) {
10406 			early_enable_events(tr, tok, true);
10407 		}
10408 	}
10409 }
10410 
10411 __init static int tracer_alloc_buffers(void)
10412 {
10413 	int ring_buf_size;
10414 	int ret = -ENOMEM;
10415 
10416 
10417 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10418 		pr_warn("Tracing disabled due to lockdown\n");
10419 		return -EPERM;
10420 	}
10421 
10422 	/*
10423 	 * Make sure we don't accidentally add more trace options
10424 	 * than we have bits for.
10425 	 */
10426 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10427 
10428 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10429 		goto out;
10430 
10431 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10432 		goto out_free_buffer_mask;
10433 
10434 	/* Only allocate trace_printk buffers if a trace_printk exists */
10435 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10436 		/* Must be called before global_trace.buffer is allocated */
10437 		trace_printk_init_buffers();
10438 
10439 	/* To save memory, keep the ring buffer size to its minimum */
10440 	if (ring_buffer_expanded)
10441 		ring_buf_size = trace_buf_size;
10442 	else
10443 		ring_buf_size = 1;
10444 
10445 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10446 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10447 
10448 	raw_spin_lock_init(&global_trace.start_lock);
10449 
10450 	/*
10451 	 * The prepare callbacks allocates some memory for the ring buffer. We
10452 	 * don't free the buffer if the CPU goes down. If we were to free
10453 	 * the buffer, then the user would lose any trace that was in the
10454 	 * buffer. The memory will be removed once the "instance" is removed.
10455 	 */
10456 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10457 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10458 				      NULL);
10459 	if (ret < 0)
10460 		goto out_free_cpumask;
10461 	/* Used for event triggers */
10462 	ret = -ENOMEM;
10463 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10464 	if (!temp_buffer)
10465 		goto out_rm_hp_state;
10466 
10467 	if (trace_create_savedcmd() < 0)
10468 		goto out_free_temp_buffer;
10469 
10470 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10471 		goto out_free_savedcmd;
10472 
10473 	/* TODO: make the number of buffers hot pluggable with CPUS */
10474 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10475 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10476 		goto out_free_pipe_cpumask;
10477 	}
10478 	if (global_trace.buffer_disabled)
10479 		tracing_off();
10480 
10481 	if (trace_boot_clock) {
10482 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10483 		if (ret < 0)
10484 			pr_warn("Trace clock %s not defined, going back to default\n",
10485 				trace_boot_clock);
10486 	}
10487 
10488 	/*
10489 	 * register_tracer() might reference current_trace, so it
10490 	 * needs to be set before we register anything. This is
10491 	 * just a bootstrap of current_trace anyway.
10492 	 */
10493 	global_trace.current_trace = &nop_trace;
10494 
10495 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10496 
10497 	ftrace_init_global_array_ops(&global_trace);
10498 
10499 	init_trace_flags_index(&global_trace);
10500 
10501 	register_tracer(&nop_trace);
10502 
10503 	/* Function tracing may start here (via kernel command line) */
10504 	init_function_trace();
10505 
10506 	/* All seems OK, enable tracing */
10507 	tracing_disabled = 0;
10508 
10509 	atomic_notifier_chain_register(&panic_notifier_list,
10510 				       &trace_panic_notifier);
10511 
10512 	register_die_notifier(&trace_die_notifier);
10513 
10514 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10515 
10516 	INIT_LIST_HEAD(&global_trace.systems);
10517 	INIT_LIST_HEAD(&global_trace.events);
10518 	INIT_LIST_HEAD(&global_trace.hist_vars);
10519 	INIT_LIST_HEAD(&global_trace.err_log);
10520 	list_add(&global_trace.list, &ftrace_trace_arrays);
10521 
10522 	apply_trace_boot_options();
10523 
10524 	register_snapshot_cmd();
10525 
10526 	test_can_verify();
10527 
10528 	return 0;
10529 
10530 out_free_pipe_cpumask:
10531 	free_cpumask_var(global_trace.pipe_cpumask);
10532 out_free_savedcmd:
10533 	free_saved_cmdlines_buffer(savedcmd);
10534 out_free_temp_buffer:
10535 	ring_buffer_free(temp_buffer);
10536 out_rm_hp_state:
10537 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10538 out_free_cpumask:
10539 	free_cpumask_var(global_trace.tracing_cpumask);
10540 out_free_buffer_mask:
10541 	free_cpumask_var(tracing_buffer_mask);
10542 out:
10543 	return ret;
10544 }
10545 
10546 void __init ftrace_boot_snapshot(void)
10547 {
10548 #ifdef CONFIG_TRACER_MAX_TRACE
10549 	struct trace_array *tr;
10550 
10551 	if (!snapshot_at_boot)
10552 		return;
10553 
10554 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10555 		if (!tr->allocated_snapshot)
10556 			continue;
10557 
10558 		tracing_snapshot_instance(tr);
10559 		trace_array_puts(tr, "** Boot snapshot taken **\n");
10560 	}
10561 #endif
10562 }
10563 
10564 void __init early_trace_init(void)
10565 {
10566 	if (tracepoint_printk) {
10567 		tracepoint_print_iter =
10568 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10569 		if (MEM_FAIL(!tracepoint_print_iter,
10570 			     "Failed to allocate trace iterator\n"))
10571 			tracepoint_printk = 0;
10572 		else
10573 			static_key_enable(&tracepoint_printk_key.key);
10574 	}
10575 	tracer_alloc_buffers();
10576 
10577 	init_events();
10578 }
10579 
10580 void __init trace_init(void)
10581 {
10582 	trace_event_init();
10583 
10584 	if (boot_instance_index)
10585 		enable_instances();
10586 }
10587 
10588 __init static void clear_boot_tracer(void)
10589 {
10590 	/*
10591 	 * The default tracer at boot buffer is an init section.
10592 	 * This function is called in lateinit. If we did not
10593 	 * find the boot tracer, then clear it out, to prevent
10594 	 * later registration from accessing the buffer that is
10595 	 * about to be freed.
10596 	 */
10597 	if (!default_bootup_tracer)
10598 		return;
10599 
10600 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10601 	       default_bootup_tracer);
10602 	default_bootup_tracer = NULL;
10603 }
10604 
10605 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10606 __init static void tracing_set_default_clock(void)
10607 {
10608 	/* sched_clock_stable() is determined in late_initcall */
10609 	if (!trace_boot_clock && !sched_clock_stable()) {
10610 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10611 			pr_warn("Can not set tracing clock due to lockdown\n");
10612 			return;
10613 		}
10614 
10615 		printk(KERN_WARNING
10616 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10617 		       "If you want to keep using the local clock, then add:\n"
10618 		       "  \"trace_clock=local\"\n"
10619 		       "on the kernel command line\n");
10620 		tracing_set_clock(&global_trace, "global");
10621 	}
10622 }
10623 #else
10624 static inline void tracing_set_default_clock(void) { }
10625 #endif
10626 
10627 __init static int late_trace_init(void)
10628 {
10629 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10630 		static_key_disable(&tracepoint_printk_key.key);
10631 		tracepoint_printk = 0;
10632 	}
10633 
10634 	tracing_set_default_clock();
10635 	clear_boot_tracer();
10636 	return 0;
10637 }
10638 
10639 late_initcall_sync(late_trace_init);
10640