xref: /openbmc/linux/kernel/trace/trace.c (revision 5c816641)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 
53 #include "trace.h"
54 #include "trace_output.h"
55 
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61 
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70 
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76 
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80 	if (!tracing_selftest_disabled) {
81 		tracing_selftest_disabled = true;
82 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
83 	}
84 }
85 #endif
86 
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92 
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95 	{ }
96 };
97 
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101 	return 0;
102 }
103 
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110 
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118 
119 cpumask_var_t __read_mostly	tracing_buffer_mask;
120 
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136 
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138 
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141 
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145 	struct module			*mod;
146 	unsigned long			length;
147 };
148 
149 union trace_eval_map_item;
150 
151 struct trace_eval_map_tail {
152 	/*
153 	 * "end" is first and points to NULL as it must be different
154 	 * than "mod" or "eval_string"
155 	 */
156 	union trace_eval_map_item	*next;
157 	const char			*end;	/* points to NULL */
158 };
159 
160 static DEFINE_MUTEX(trace_eval_mutex);
161 
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170 	struct trace_eval_map		map;
171 	struct trace_eval_map_head	head;
172 	struct trace_eval_map_tail	tail;
173 };
174 
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177 
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180 				   struct trace_buffer *buffer,
181 				   unsigned int trace_ctx);
182 
183 #define MAX_TRACER_SIZE		100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186 
187 static bool allocate_snapshot;
188 
189 static int __init set_cmdline_ftrace(char *str)
190 {
191 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
192 	default_bootup_tracer = bootup_tracer_buf;
193 	/* We are using ftrace early, expand it */
194 	ring_buffer_expanded = true;
195 	return 1;
196 }
197 __setup("ftrace=", set_cmdline_ftrace);
198 
199 static int __init set_ftrace_dump_on_oops(char *str)
200 {
201 	if (*str++ != '=' || !*str || !strcmp("1", str)) {
202 		ftrace_dump_on_oops = DUMP_ALL;
203 		return 1;
204 	}
205 
206 	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
207 		ftrace_dump_on_oops = DUMP_ORIG;
208                 return 1;
209         }
210 
211         return 0;
212 }
213 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
214 
215 static int __init stop_trace_on_warning(char *str)
216 {
217 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
218 		__disable_trace_on_warning = 1;
219 	return 1;
220 }
221 __setup("traceoff_on_warning", stop_trace_on_warning);
222 
223 static int __init boot_alloc_snapshot(char *str)
224 {
225 	allocate_snapshot = true;
226 	/* We also need the main ring buffer expanded */
227 	ring_buffer_expanded = true;
228 	return 1;
229 }
230 __setup("alloc_snapshot", boot_alloc_snapshot);
231 
232 
233 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
234 
235 static int __init set_trace_boot_options(char *str)
236 {
237 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
238 	return 0;
239 }
240 __setup("trace_options=", set_trace_boot_options);
241 
242 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
243 static char *trace_boot_clock __initdata;
244 
245 static int __init set_trace_boot_clock(char *str)
246 {
247 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
248 	trace_boot_clock = trace_boot_clock_buf;
249 	return 0;
250 }
251 __setup("trace_clock=", set_trace_boot_clock);
252 
253 static int __init set_tracepoint_printk(char *str)
254 {
255 	/* Ignore the "tp_printk_stop_on_boot" param */
256 	if (*str == '_')
257 		return 0;
258 
259 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
260 		tracepoint_printk = 1;
261 	return 1;
262 }
263 __setup("tp_printk", set_tracepoint_printk);
264 
265 static int __init set_tracepoint_printk_stop(char *str)
266 {
267 	tracepoint_printk_stop_on_boot = true;
268 	return 1;
269 }
270 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
271 
272 unsigned long long ns2usecs(u64 nsec)
273 {
274 	nsec += 500;
275 	do_div(nsec, 1000);
276 	return nsec;
277 }
278 
279 static void
280 trace_process_export(struct trace_export *export,
281 	       struct ring_buffer_event *event, int flag)
282 {
283 	struct trace_entry *entry;
284 	unsigned int size = 0;
285 
286 	if (export->flags & flag) {
287 		entry = ring_buffer_event_data(event);
288 		size = ring_buffer_event_length(event);
289 		export->write(export, entry, size);
290 	}
291 }
292 
293 static DEFINE_MUTEX(ftrace_export_lock);
294 
295 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
296 
297 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
298 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
299 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
300 
301 static inline void ftrace_exports_enable(struct trace_export *export)
302 {
303 	if (export->flags & TRACE_EXPORT_FUNCTION)
304 		static_branch_inc(&trace_function_exports_enabled);
305 
306 	if (export->flags & TRACE_EXPORT_EVENT)
307 		static_branch_inc(&trace_event_exports_enabled);
308 
309 	if (export->flags & TRACE_EXPORT_MARKER)
310 		static_branch_inc(&trace_marker_exports_enabled);
311 }
312 
313 static inline void ftrace_exports_disable(struct trace_export *export)
314 {
315 	if (export->flags & TRACE_EXPORT_FUNCTION)
316 		static_branch_dec(&trace_function_exports_enabled);
317 
318 	if (export->flags & TRACE_EXPORT_EVENT)
319 		static_branch_dec(&trace_event_exports_enabled);
320 
321 	if (export->flags & TRACE_EXPORT_MARKER)
322 		static_branch_dec(&trace_marker_exports_enabled);
323 }
324 
325 static void ftrace_exports(struct ring_buffer_event *event, int flag)
326 {
327 	struct trace_export *export;
328 
329 	preempt_disable_notrace();
330 
331 	export = rcu_dereference_raw_check(ftrace_exports_list);
332 	while (export) {
333 		trace_process_export(export, event, flag);
334 		export = rcu_dereference_raw_check(export->next);
335 	}
336 
337 	preempt_enable_notrace();
338 }
339 
340 static inline void
341 add_trace_export(struct trace_export **list, struct trace_export *export)
342 {
343 	rcu_assign_pointer(export->next, *list);
344 	/*
345 	 * We are entering export into the list but another
346 	 * CPU might be walking that list. We need to make sure
347 	 * the export->next pointer is valid before another CPU sees
348 	 * the export pointer included into the list.
349 	 */
350 	rcu_assign_pointer(*list, export);
351 }
352 
353 static inline int
354 rm_trace_export(struct trace_export **list, struct trace_export *export)
355 {
356 	struct trace_export **p;
357 
358 	for (p = list; *p != NULL; p = &(*p)->next)
359 		if (*p == export)
360 			break;
361 
362 	if (*p != export)
363 		return -1;
364 
365 	rcu_assign_pointer(*p, (*p)->next);
366 
367 	return 0;
368 }
369 
370 static inline void
371 add_ftrace_export(struct trace_export **list, struct trace_export *export)
372 {
373 	ftrace_exports_enable(export);
374 
375 	add_trace_export(list, export);
376 }
377 
378 static inline int
379 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
380 {
381 	int ret;
382 
383 	ret = rm_trace_export(list, export);
384 	ftrace_exports_disable(export);
385 
386 	return ret;
387 }
388 
389 int register_ftrace_export(struct trace_export *export)
390 {
391 	if (WARN_ON_ONCE(!export->write))
392 		return -1;
393 
394 	mutex_lock(&ftrace_export_lock);
395 
396 	add_ftrace_export(&ftrace_exports_list, export);
397 
398 	mutex_unlock(&ftrace_export_lock);
399 
400 	return 0;
401 }
402 EXPORT_SYMBOL_GPL(register_ftrace_export);
403 
404 int unregister_ftrace_export(struct trace_export *export)
405 {
406 	int ret;
407 
408 	mutex_lock(&ftrace_export_lock);
409 
410 	ret = rm_ftrace_export(&ftrace_exports_list, export);
411 
412 	mutex_unlock(&ftrace_export_lock);
413 
414 	return ret;
415 }
416 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
417 
418 /* trace_flags holds trace_options default values */
419 #define TRACE_DEFAULT_FLAGS						\
420 	(FUNCTION_DEFAULT_FLAGS |					\
421 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
422 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
423 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
424 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
425 	 TRACE_ITER_HASH_PTR)
426 
427 /* trace_options that are only supported by global_trace */
428 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
429 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
430 
431 /* trace_flags that are default zero for instances */
432 #define ZEROED_TRACE_FLAGS \
433 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
434 
435 /*
436  * The global_trace is the descriptor that holds the top-level tracing
437  * buffers for the live tracing.
438  */
439 static struct trace_array global_trace = {
440 	.trace_flags = TRACE_DEFAULT_FLAGS,
441 };
442 
443 LIST_HEAD(ftrace_trace_arrays);
444 
445 int trace_array_get(struct trace_array *this_tr)
446 {
447 	struct trace_array *tr;
448 	int ret = -ENODEV;
449 
450 	mutex_lock(&trace_types_lock);
451 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
452 		if (tr == this_tr) {
453 			tr->ref++;
454 			ret = 0;
455 			break;
456 		}
457 	}
458 	mutex_unlock(&trace_types_lock);
459 
460 	return ret;
461 }
462 
463 static void __trace_array_put(struct trace_array *this_tr)
464 {
465 	WARN_ON(!this_tr->ref);
466 	this_tr->ref--;
467 }
468 
469 /**
470  * trace_array_put - Decrement the reference counter for this trace array.
471  * @this_tr : pointer to the trace array
472  *
473  * NOTE: Use this when we no longer need the trace array returned by
474  * trace_array_get_by_name(). This ensures the trace array can be later
475  * destroyed.
476  *
477  */
478 void trace_array_put(struct trace_array *this_tr)
479 {
480 	if (!this_tr)
481 		return;
482 
483 	mutex_lock(&trace_types_lock);
484 	__trace_array_put(this_tr);
485 	mutex_unlock(&trace_types_lock);
486 }
487 EXPORT_SYMBOL_GPL(trace_array_put);
488 
489 int tracing_check_open_get_tr(struct trace_array *tr)
490 {
491 	int ret;
492 
493 	ret = security_locked_down(LOCKDOWN_TRACEFS);
494 	if (ret)
495 		return ret;
496 
497 	if (tracing_disabled)
498 		return -ENODEV;
499 
500 	if (tr && trace_array_get(tr) < 0)
501 		return -ENODEV;
502 
503 	return 0;
504 }
505 
506 int call_filter_check_discard(struct trace_event_call *call, void *rec,
507 			      struct trace_buffer *buffer,
508 			      struct ring_buffer_event *event)
509 {
510 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
511 	    !filter_match_preds(call->filter, rec)) {
512 		__trace_event_discard_commit(buffer, event);
513 		return 1;
514 	}
515 
516 	return 0;
517 }
518 
519 /**
520  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
521  * @filtered_pids: The list of pids to check
522  * @search_pid: The PID to find in @filtered_pids
523  *
524  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
525  */
526 bool
527 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
528 {
529 	return trace_pid_list_is_set(filtered_pids, search_pid);
530 }
531 
532 /**
533  * trace_ignore_this_task - should a task be ignored for tracing
534  * @filtered_pids: The list of pids to check
535  * @filtered_no_pids: The list of pids not to be traced
536  * @task: The task that should be ignored if not filtered
537  *
538  * Checks if @task should be traced or not from @filtered_pids.
539  * Returns true if @task should *NOT* be traced.
540  * Returns false if @task should be traced.
541  */
542 bool
543 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
544 		       struct trace_pid_list *filtered_no_pids,
545 		       struct task_struct *task)
546 {
547 	/*
548 	 * If filtered_no_pids is not empty, and the task's pid is listed
549 	 * in filtered_no_pids, then return true.
550 	 * Otherwise, if filtered_pids is empty, that means we can
551 	 * trace all tasks. If it has content, then only trace pids
552 	 * within filtered_pids.
553 	 */
554 
555 	return (filtered_pids &&
556 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
557 		(filtered_no_pids &&
558 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
559 }
560 
561 /**
562  * trace_filter_add_remove_task - Add or remove a task from a pid_list
563  * @pid_list: The list to modify
564  * @self: The current task for fork or NULL for exit
565  * @task: The task to add or remove
566  *
567  * If adding a task, if @self is defined, the task is only added if @self
568  * is also included in @pid_list. This happens on fork and tasks should
569  * only be added when the parent is listed. If @self is NULL, then the
570  * @task pid will be removed from the list, which would happen on exit
571  * of a task.
572  */
573 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
574 				  struct task_struct *self,
575 				  struct task_struct *task)
576 {
577 	if (!pid_list)
578 		return;
579 
580 	/* For forks, we only add if the forking task is listed */
581 	if (self) {
582 		if (!trace_find_filtered_pid(pid_list, self->pid))
583 			return;
584 	}
585 
586 	/* "self" is set for forks, and NULL for exits */
587 	if (self)
588 		trace_pid_list_set(pid_list, task->pid);
589 	else
590 		trace_pid_list_clear(pid_list, task->pid);
591 }
592 
593 /**
594  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
595  * @pid_list: The pid list to show
596  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
597  * @pos: The position of the file
598  *
599  * This is used by the seq_file "next" operation to iterate the pids
600  * listed in a trace_pid_list structure.
601  *
602  * Returns the pid+1 as we want to display pid of zero, but NULL would
603  * stop the iteration.
604  */
605 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
606 {
607 	long pid = (unsigned long)v;
608 	unsigned int next;
609 
610 	(*pos)++;
611 
612 	/* pid already is +1 of the actual previous bit */
613 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
614 		return NULL;
615 
616 	pid = next;
617 
618 	/* Return pid + 1 to allow zero to be represented */
619 	return (void *)(pid + 1);
620 }
621 
622 /**
623  * trace_pid_start - Used for seq_file to start reading pid lists
624  * @pid_list: The pid list to show
625  * @pos: The position of the file
626  *
627  * This is used by seq_file "start" operation to start the iteration
628  * of listing pids.
629  *
630  * Returns the pid+1 as we want to display pid of zero, but NULL would
631  * stop the iteration.
632  */
633 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
634 {
635 	unsigned long pid;
636 	unsigned int first;
637 	loff_t l = 0;
638 
639 	if (trace_pid_list_first(pid_list, &first) < 0)
640 		return NULL;
641 
642 	pid = first;
643 
644 	/* Return pid + 1 so that zero can be the exit value */
645 	for (pid++; pid && l < *pos;
646 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
647 		;
648 	return (void *)pid;
649 }
650 
651 /**
652  * trace_pid_show - show the current pid in seq_file processing
653  * @m: The seq_file structure to write into
654  * @v: A void pointer of the pid (+1) value to display
655  *
656  * Can be directly used by seq_file operations to display the current
657  * pid value.
658  */
659 int trace_pid_show(struct seq_file *m, void *v)
660 {
661 	unsigned long pid = (unsigned long)v - 1;
662 
663 	seq_printf(m, "%lu\n", pid);
664 	return 0;
665 }
666 
667 /* 128 should be much more than enough */
668 #define PID_BUF_SIZE		127
669 
670 int trace_pid_write(struct trace_pid_list *filtered_pids,
671 		    struct trace_pid_list **new_pid_list,
672 		    const char __user *ubuf, size_t cnt)
673 {
674 	struct trace_pid_list *pid_list;
675 	struct trace_parser parser;
676 	unsigned long val;
677 	int nr_pids = 0;
678 	ssize_t read = 0;
679 	ssize_t ret;
680 	loff_t pos;
681 	pid_t pid;
682 
683 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
684 		return -ENOMEM;
685 
686 	/*
687 	 * Always recreate a new array. The write is an all or nothing
688 	 * operation. Always create a new array when adding new pids by
689 	 * the user. If the operation fails, then the current list is
690 	 * not modified.
691 	 */
692 	pid_list = trace_pid_list_alloc();
693 	if (!pid_list) {
694 		trace_parser_put(&parser);
695 		return -ENOMEM;
696 	}
697 
698 	if (filtered_pids) {
699 		/* copy the current bits to the new max */
700 		ret = trace_pid_list_first(filtered_pids, &pid);
701 		while (!ret) {
702 			trace_pid_list_set(pid_list, pid);
703 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
704 			nr_pids++;
705 		}
706 	}
707 
708 	ret = 0;
709 	while (cnt > 0) {
710 
711 		pos = 0;
712 
713 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
714 		if (ret < 0 || !trace_parser_loaded(&parser))
715 			break;
716 
717 		read += ret;
718 		ubuf += ret;
719 		cnt -= ret;
720 
721 		ret = -EINVAL;
722 		if (kstrtoul(parser.buffer, 0, &val))
723 			break;
724 
725 		pid = (pid_t)val;
726 
727 		if (trace_pid_list_set(pid_list, pid) < 0) {
728 			ret = -1;
729 			break;
730 		}
731 		nr_pids++;
732 
733 		trace_parser_clear(&parser);
734 		ret = 0;
735 	}
736 	trace_parser_put(&parser);
737 
738 	if (ret < 0) {
739 		trace_pid_list_free(pid_list);
740 		return ret;
741 	}
742 
743 	if (!nr_pids) {
744 		/* Cleared the list of pids */
745 		trace_pid_list_free(pid_list);
746 		read = ret;
747 		pid_list = NULL;
748 	}
749 
750 	*new_pid_list = pid_list;
751 
752 	return read;
753 }
754 
755 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
756 {
757 	u64 ts;
758 
759 	/* Early boot up does not have a buffer yet */
760 	if (!buf->buffer)
761 		return trace_clock_local();
762 
763 	ts = ring_buffer_time_stamp(buf->buffer);
764 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
765 
766 	return ts;
767 }
768 
769 u64 ftrace_now(int cpu)
770 {
771 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
772 }
773 
774 /**
775  * tracing_is_enabled - Show if global_trace has been enabled
776  *
777  * Shows if the global trace has been enabled or not. It uses the
778  * mirror flag "buffer_disabled" to be used in fast paths such as for
779  * the irqsoff tracer. But it may be inaccurate due to races. If you
780  * need to know the accurate state, use tracing_is_on() which is a little
781  * slower, but accurate.
782  */
783 int tracing_is_enabled(void)
784 {
785 	/*
786 	 * For quick access (irqsoff uses this in fast path), just
787 	 * return the mirror variable of the state of the ring buffer.
788 	 * It's a little racy, but we don't really care.
789 	 */
790 	smp_rmb();
791 	return !global_trace.buffer_disabled;
792 }
793 
794 /*
795  * trace_buf_size is the size in bytes that is allocated
796  * for a buffer. Note, the number of bytes is always rounded
797  * to page size.
798  *
799  * This number is purposely set to a low number of 16384.
800  * If the dump on oops happens, it will be much appreciated
801  * to not have to wait for all that output. Anyway this can be
802  * boot time and run time configurable.
803  */
804 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
805 
806 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
807 
808 /* trace_types holds a link list of available tracers. */
809 static struct tracer		*trace_types __read_mostly;
810 
811 /*
812  * trace_types_lock is used to protect the trace_types list.
813  */
814 DEFINE_MUTEX(trace_types_lock);
815 
816 /*
817  * serialize the access of the ring buffer
818  *
819  * ring buffer serializes readers, but it is low level protection.
820  * The validity of the events (which returns by ring_buffer_peek() ..etc)
821  * are not protected by ring buffer.
822  *
823  * The content of events may become garbage if we allow other process consumes
824  * these events concurrently:
825  *   A) the page of the consumed events may become a normal page
826  *      (not reader page) in ring buffer, and this page will be rewritten
827  *      by events producer.
828  *   B) The page of the consumed events may become a page for splice_read,
829  *      and this page will be returned to system.
830  *
831  * These primitives allow multi process access to different cpu ring buffer
832  * concurrently.
833  *
834  * These primitives don't distinguish read-only and read-consume access.
835  * Multi read-only access are also serialized.
836  */
837 
838 #ifdef CONFIG_SMP
839 static DECLARE_RWSEM(all_cpu_access_lock);
840 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
841 
842 static inline void trace_access_lock(int cpu)
843 {
844 	if (cpu == RING_BUFFER_ALL_CPUS) {
845 		/* gain it for accessing the whole ring buffer. */
846 		down_write(&all_cpu_access_lock);
847 	} else {
848 		/* gain it for accessing a cpu ring buffer. */
849 
850 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
851 		down_read(&all_cpu_access_lock);
852 
853 		/* Secondly block other access to this @cpu ring buffer. */
854 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
855 	}
856 }
857 
858 static inline void trace_access_unlock(int cpu)
859 {
860 	if (cpu == RING_BUFFER_ALL_CPUS) {
861 		up_write(&all_cpu_access_lock);
862 	} else {
863 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
864 		up_read(&all_cpu_access_lock);
865 	}
866 }
867 
868 static inline void trace_access_lock_init(void)
869 {
870 	int cpu;
871 
872 	for_each_possible_cpu(cpu)
873 		mutex_init(&per_cpu(cpu_access_lock, cpu));
874 }
875 
876 #else
877 
878 static DEFINE_MUTEX(access_lock);
879 
880 static inline void trace_access_lock(int cpu)
881 {
882 	(void)cpu;
883 	mutex_lock(&access_lock);
884 }
885 
886 static inline void trace_access_unlock(int cpu)
887 {
888 	(void)cpu;
889 	mutex_unlock(&access_lock);
890 }
891 
892 static inline void trace_access_lock_init(void)
893 {
894 }
895 
896 #endif
897 
898 #ifdef CONFIG_STACKTRACE
899 static void __ftrace_trace_stack(struct trace_buffer *buffer,
900 				 unsigned int trace_ctx,
901 				 int skip, struct pt_regs *regs);
902 static inline void ftrace_trace_stack(struct trace_array *tr,
903 				      struct trace_buffer *buffer,
904 				      unsigned int trace_ctx,
905 				      int skip, struct pt_regs *regs);
906 
907 #else
908 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
909 					unsigned int trace_ctx,
910 					int skip, struct pt_regs *regs)
911 {
912 }
913 static inline void ftrace_trace_stack(struct trace_array *tr,
914 				      struct trace_buffer *buffer,
915 				      unsigned long trace_ctx,
916 				      int skip, struct pt_regs *regs)
917 {
918 }
919 
920 #endif
921 
922 static __always_inline void
923 trace_event_setup(struct ring_buffer_event *event,
924 		  int type, unsigned int trace_ctx)
925 {
926 	struct trace_entry *ent = ring_buffer_event_data(event);
927 
928 	tracing_generic_entry_update(ent, type, trace_ctx);
929 }
930 
931 static __always_inline struct ring_buffer_event *
932 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
933 			  int type,
934 			  unsigned long len,
935 			  unsigned int trace_ctx)
936 {
937 	struct ring_buffer_event *event;
938 
939 	event = ring_buffer_lock_reserve(buffer, len);
940 	if (event != NULL)
941 		trace_event_setup(event, type, trace_ctx);
942 
943 	return event;
944 }
945 
946 void tracer_tracing_on(struct trace_array *tr)
947 {
948 	if (tr->array_buffer.buffer)
949 		ring_buffer_record_on(tr->array_buffer.buffer);
950 	/*
951 	 * This flag is looked at when buffers haven't been allocated
952 	 * yet, or by some tracers (like irqsoff), that just want to
953 	 * know if the ring buffer has been disabled, but it can handle
954 	 * races of where it gets disabled but we still do a record.
955 	 * As the check is in the fast path of the tracers, it is more
956 	 * important to be fast than accurate.
957 	 */
958 	tr->buffer_disabled = 0;
959 	/* Make the flag seen by readers */
960 	smp_wmb();
961 }
962 
963 /**
964  * tracing_on - enable tracing buffers
965  *
966  * This function enables tracing buffers that may have been
967  * disabled with tracing_off.
968  */
969 void tracing_on(void)
970 {
971 	tracer_tracing_on(&global_trace);
972 }
973 EXPORT_SYMBOL_GPL(tracing_on);
974 
975 
976 static __always_inline void
977 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
978 {
979 	__this_cpu_write(trace_taskinfo_save, true);
980 
981 	/* If this is the temp buffer, we need to commit fully */
982 	if (this_cpu_read(trace_buffered_event) == event) {
983 		/* Length is in event->array[0] */
984 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
985 		/* Release the temp buffer */
986 		this_cpu_dec(trace_buffered_event_cnt);
987 		/* ring_buffer_unlock_commit() enables preemption */
988 		preempt_enable_notrace();
989 	} else
990 		ring_buffer_unlock_commit(buffer, event);
991 }
992 
993 /**
994  * __trace_puts - write a constant string into the trace buffer.
995  * @ip:	   The address of the caller
996  * @str:   The constant string to write
997  * @size:  The size of the string.
998  */
999 int __trace_puts(unsigned long ip, const char *str, int size)
1000 {
1001 	struct ring_buffer_event *event;
1002 	struct trace_buffer *buffer;
1003 	struct print_entry *entry;
1004 	unsigned int trace_ctx;
1005 	int alloc;
1006 
1007 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1008 		return 0;
1009 
1010 	if (unlikely(tracing_selftest_running || tracing_disabled))
1011 		return 0;
1012 
1013 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1014 
1015 	trace_ctx = tracing_gen_ctx();
1016 	buffer = global_trace.array_buffer.buffer;
1017 	ring_buffer_nest_start(buffer);
1018 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1019 					    trace_ctx);
1020 	if (!event) {
1021 		size = 0;
1022 		goto out;
1023 	}
1024 
1025 	entry = ring_buffer_event_data(event);
1026 	entry->ip = ip;
1027 
1028 	memcpy(&entry->buf, str, size);
1029 
1030 	/* Add a newline if necessary */
1031 	if (entry->buf[size - 1] != '\n') {
1032 		entry->buf[size] = '\n';
1033 		entry->buf[size + 1] = '\0';
1034 	} else
1035 		entry->buf[size] = '\0';
1036 
1037 	__buffer_unlock_commit(buffer, event);
1038 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1039  out:
1040 	ring_buffer_nest_end(buffer);
1041 	return size;
1042 }
1043 EXPORT_SYMBOL_GPL(__trace_puts);
1044 
1045 /**
1046  * __trace_bputs - write the pointer to a constant string into trace buffer
1047  * @ip:	   The address of the caller
1048  * @str:   The constant string to write to the buffer to
1049  */
1050 int __trace_bputs(unsigned long ip, const char *str)
1051 {
1052 	struct ring_buffer_event *event;
1053 	struct trace_buffer *buffer;
1054 	struct bputs_entry *entry;
1055 	unsigned int trace_ctx;
1056 	int size = sizeof(struct bputs_entry);
1057 	int ret = 0;
1058 
1059 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1060 		return 0;
1061 
1062 	if (unlikely(tracing_selftest_running || tracing_disabled))
1063 		return 0;
1064 
1065 	trace_ctx = tracing_gen_ctx();
1066 	buffer = global_trace.array_buffer.buffer;
1067 
1068 	ring_buffer_nest_start(buffer);
1069 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1070 					    trace_ctx);
1071 	if (!event)
1072 		goto out;
1073 
1074 	entry = ring_buffer_event_data(event);
1075 	entry->ip			= ip;
1076 	entry->str			= str;
1077 
1078 	__buffer_unlock_commit(buffer, event);
1079 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1080 
1081 	ret = 1;
1082  out:
1083 	ring_buffer_nest_end(buffer);
1084 	return ret;
1085 }
1086 EXPORT_SYMBOL_GPL(__trace_bputs);
1087 
1088 #ifdef CONFIG_TRACER_SNAPSHOT
1089 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1090 					   void *cond_data)
1091 {
1092 	struct tracer *tracer = tr->current_trace;
1093 	unsigned long flags;
1094 
1095 	if (in_nmi()) {
1096 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1097 		internal_trace_puts("*** snapshot is being ignored        ***\n");
1098 		return;
1099 	}
1100 
1101 	if (!tr->allocated_snapshot) {
1102 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1103 		internal_trace_puts("*** stopping trace here!   ***\n");
1104 		tracing_off();
1105 		return;
1106 	}
1107 
1108 	/* Note, snapshot can not be used when the tracer uses it */
1109 	if (tracer->use_max_tr) {
1110 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1111 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1112 		return;
1113 	}
1114 
1115 	local_irq_save(flags);
1116 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1117 	local_irq_restore(flags);
1118 }
1119 
1120 void tracing_snapshot_instance(struct trace_array *tr)
1121 {
1122 	tracing_snapshot_instance_cond(tr, NULL);
1123 }
1124 
1125 /**
1126  * tracing_snapshot - take a snapshot of the current buffer.
1127  *
1128  * This causes a swap between the snapshot buffer and the current live
1129  * tracing buffer. You can use this to take snapshots of the live
1130  * trace when some condition is triggered, but continue to trace.
1131  *
1132  * Note, make sure to allocate the snapshot with either
1133  * a tracing_snapshot_alloc(), or by doing it manually
1134  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1135  *
1136  * If the snapshot buffer is not allocated, it will stop tracing.
1137  * Basically making a permanent snapshot.
1138  */
1139 void tracing_snapshot(void)
1140 {
1141 	struct trace_array *tr = &global_trace;
1142 
1143 	tracing_snapshot_instance(tr);
1144 }
1145 EXPORT_SYMBOL_GPL(tracing_snapshot);
1146 
1147 /**
1148  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1149  * @tr:		The tracing instance to snapshot
1150  * @cond_data:	The data to be tested conditionally, and possibly saved
1151  *
1152  * This is the same as tracing_snapshot() except that the snapshot is
1153  * conditional - the snapshot will only happen if the
1154  * cond_snapshot.update() implementation receiving the cond_data
1155  * returns true, which means that the trace array's cond_snapshot
1156  * update() operation used the cond_data to determine whether the
1157  * snapshot should be taken, and if it was, presumably saved it along
1158  * with the snapshot.
1159  */
1160 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1161 {
1162 	tracing_snapshot_instance_cond(tr, cond_data);
1163 }
1164 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1165 
1166 /**
1167  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1168  * @tr:		The tracing instance
1169  *
1170  * When the user enables a conditional snapshot using
1171  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1172  * with the snapshot.  This accessor is used to retrieve it.
1173  *
1174  * Should not be called from cond_snapshot.update(), since it takes
1175  * the tr->max_lock lock, which the code calling
1176  * cond_snapshot.update() has already done.
1177  *
1178  * Returns the cond_data associated with the trace array's snapshot.
1179  */
1180 void *tracing_cond_snapshot_data(struct trace_array *tr)
1181 {
1182 	void *cond_data = NULL;
1183 
1184 	arch_spin_lock(&tr->max_lock);
1185 
1186 	if (tr->cond_snapshot)
1187 		cond_data = tr->cond_snapshot->cond_data;
1188 
1189 	arch_spin_unlock(&tr->max_lock);
1190 
1191 	return cond_data;
1192 }
1193 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1194 
1195 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1196 					struct array_buffer *size_buf, int cpu_id);
1197 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1198 
1199 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1200 {
1201 	int ret;
1202 
1203 	if (!tr->allocated_snapshot) {
1204 
1205 		/* allocate spare buffer */
1206 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1207 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1208 		if (ret < 0)
1209 			return ret;
1210 
1211 		tr->allocated_snapshot = true;
1212 	}
1213 
1214 	return 0;
1215 }
1216 
1217 static void free_snapshot(struct trace_array *tr)
1218 {
1219 	/*
1220 	 * We don't free the ring buffer. instead, resize it because
1221 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1222 	 * we want preserve it.
1223 	 */
1224 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1225 	set_buffer_entries(&tr->max_buffer, 1);
1226 	tracing_reset_online_cpus(&tr->max_buffer);
1227 	tr->allocated_snapshot = false;
1228 }
1229 
1230 /**
1231  * tracing_alloc_snapshot - allocate snapshot buffer.
1232  *
1233  * This only allocates the snapshot buffer if it isn't already
1234  * allocated - it doesn't also take a snapshot.
1235  *
1236  * This is meant to be used in cases where the snapshot buffer needs
1237  * to be set up for events that can't sleep but need to be able to
1238  * trigger a snapshot.
1239  */
1240 int tracing_alloc_snapshot(void)
1241 {
1242 	struct trace_array *tr = &global_trace;
1243 	int ret;
1244 
1245 	ret = tracing_alloc_snapshot_instance(tr);
1246 	WARN_ON(ret < 0);
1247 
1248 	return ret;
1249 }
1250 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1251 
1252 /**
1253  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1254  *
1255  * This is similar to tracing_snapshot(), but it will allocate the
1256  * snapshot buffer if it isn't already allocated. Use this only
1257  * where it is safe to sleep, as the allocation may sleep.
1258  *
1259  * This causes a swap between the snapshot buffer and the current live
1260  * tracing buffer. You can use this to take snapshots of the live
1261  * trace when some condition is triggered, but continue to trace.
1262  */
1263 void tracing_snapshot_alloc(void)
1264 {
1265 	int ret;
1266 
1267 	ret = tracing_alloc_snapshot();
1268 	if (ret < 0)
1269 		return;
1270 
1271 	tracing_snapshot();
1272 }
1273 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1274 
1275 /**
1276  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1277  * @tr:		The tracing instance
1278  * @cond_data:	User data to associate with the snapshot
1279  * @update:	Implementation of the cond_snapshot update function
1280  *
1281  * Check whether the conditional snapshot for the given instance has
1282  * already been enabled, or if the current tracer is already using a
1283  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1284  * save the cond_data and update function inside.
1285  *
1286  * Returns 0 if successful, error otherwise.
1287  */
1288 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1289 				 cond_update_fn_t update)
1290 {
1291 	struct cond_snapshot *cond_snapshot;
1292 	int ret = 0;
1293 
1294 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1295 	if (!cond_snapshot)
1296 		return -ENOMEM;
1297 
1298 	cond_snapshot->cond_data = cond_data;
1299 	cond_snapshot->update = update;
1300 
1301 	mutex_lock(&trace_types_lock);
1302 
1303 	ret = tracing_alloc_snapshot_instance(tr);
1304 	if (ret)
1305 		goto fail_unlock;
1306 
1307 	if (tr->current_trace->use_max_tr) {
1308 		ret = -EBUSY;
1309 		goto fail_unlock;
1310 	}
1311 
1312 	/*
1313 	 * The cond_snapshot can only change to NULL without the
1314 	 * trace_types_lock. We don't care if we race with it going
1315 	 * to NULL, but we want to make sure that it's not set to
1316 	 * something other than NULL when we get here, which we can
1317 	 * do safely with only holding the trace_types_lock and not
1318 	 * having to take the max_lock.
1319 	 */
1320 	if (tr->cond_snapshot) {
1321 		ret = -EBUSY;
1322 		goto fail_unlock;
1323 	}
1324 
1325 	arch_spin_lock(&tr->max_lock);
1326 	tr->cond_snapshot = cond_snapshot;
1327 	arch_spin_unlock(&tr->max_lock);
1328 
1329 	mutex_unlock(&trace_types_lock);
1330 
1331 	return ret;
1332 
1333  fail_unlock:
1334 	mutex_unlock(&trace_types_lock);
1335 	kfree(cond_snapshot);
1336 	return ret;
1337 }
1338 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1339 
1340 /**
1341  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1342  * @tr:		The tracing instance
1343  *
1344  * Check whether the conditional snapshot for the given instance is
1345  * enabled; if so, free the cond_snapshot associated with it,
1346  * otherwise return -EINVAL.
1347  *
1348  * Returns 0 if successful, error otherwise.
1349  */
1350 int tracing_snapshot_cond_disable(struct trace_array *tr)
1351 {
1352 	int ret = 0;
1353 
1354 	arch_spin_lock(&tr->max_lock);
1355 
1356 	if (!tr->cond_snapshot)
1357 		ret = -EINVAL;
1358 	else {
1359 		kfree(tr->cond_snapshot);
1360 		tr->cond_snapshot = NULL;
1361 	}
1362 
1363 	arch_spin_unlock(&tr->max_lock);
1364 
1365 	return ret;
1366 }
1367 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1368 #else
1369 void tracing_snapshot(void)
1370 {
1371 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1372 }
1373 EXPORT_SYMBOL_GPL(tracing_snapshot);
1374 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1375 {
1376 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1377 }
1378 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1379 int tracing_alloc_snapshot(void)
1380 {
1381 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1382 	return -ENODEV;
1383 }
1384 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1385 void tracing_snapshot_alloc(void)
1386 {
1387 	/* Give warning */
1388 	tracing_snapshot();
1389 }
1390 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1391 void *tracing_cond_snapshot_data(struct trace_array *tr)
1392 {
1393 	return NULL;
1394 }
1395 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1396 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1397 {
1398 	return -ENODEV;
1399 }
1400 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1401 int tracing_snapshot_cond_disable(struct trace_array *tr)
1402 {
1403 	return false;
1404 }
1405 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1406 #endif /* CONFIG_TRACER_SNAPSHOT */
1407 
1408 void tracer_tracing_off(struct trace_array *tr)
1409 {
1410 	if (tr->array_buffer.buffer)
1411 		ring_buffer_record_off(tr->array_buffer.buffer);
1412 	/*
1413 	 * This flag is looked at when buffers haven't been allocated
1414 	 * yet, or by some tracers (like irqsoff), that just want to
1415 	 * know if the ring buffer has been disabled, but it can handle
1416 	 * races of where it gets disabled but we still do a record.
1417 	 * As the check is in the fast path of the tracers, it is more
1418 	 * important to be fast than accurate.
1419 	 */
1420 	tr->buffer_disabled = 1;
1421 	/* Make the flag seen by readers */
1422 	smp_wmb();
1423 }
1424 
1425 /**
1426  * tracing_off - turn off tracing buffers
1427  *
1428  * This function stops the tracing buffers from recording data.
1429  * It does not disable any overhead the tracers themselves may
1430  * be causing. This function simply causes all recording to
1431  * the ring buffers to fail.
1432  */
1433 void tracing_off(void)
1434 {
1435 	tracer_tracing_off(&global_trace);
1436 }
1437 EXPORT_SYMBOL_GPL(tracing_off);
1438 
1439 void disable_trace_on_warning(void)
1440 {
1441 	if (__disable_trace_on_warning) {
1442 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1443 			"Disabling tracing due to warning\n");
1444 		tracing_off();
1445 	}
1446 }
1447 
1448 /**
1449  * tracer_tracing_is_on - show real state of ring buffer enabled
1450  * @tr : the trace array to know if ring buffer is enabled
1451  *
1452  * Shows real state of the ring buffer if it is enabled or not.
1453  */
1454 bool tracer_tracing_is_on(struct trace_array *tr)
1455 {
1456 	if (tr->array_buffer.buffer)
1457 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1458 	return !tr->buffer_disabled;
1459 }
1460 
1461 /**
1462  * tracing_is_on - show state of ring buffers enabled
1463  */
1464 int tracing_is_on(void)
1465 {
1466 	return tracer_tracing_is_on(&global_trace);
1467 }
1468 EXPORT_SYMBOL_GPL(tracing_is_on);
1469 
1470 static int __init set_buf_size(char *str)
1471 {
1472 	unsigned long buf_size;
1473 
1474 	if (!str)
1475 		return 0;
1476 	buf_size = memparse(str, &str);
1477 	/* nr_entries can not be zero */
1478 	if (buf_size == 0)
1479 		return 0;
1480 	trace_buf_size = buf_size;
1481 	return 1;
1482 }
1483 __setup("trace_buf_size=", set_buf_size);
1484 
1485 static int __init set_tracing_thresh(char *str)
1486 {
1487 	unsigned long threshold;
1488 	int ret;
1489 
1490 	if (!str)
1491 		return 0;
1492 	ret = kstrtoul(str, 0, &threshold);
1493 	if (ret < 0)
1494 		return 0;
1495 	tracing_thresh = threshold * 1000;
1496 	return 1;
1497 }
1498 __setup("tracing_thresh=", set_tracing_thresh);
1499 
1500 unsigned long nsecs_to_usecs(unsigned long nsecs)
1501 {
1502 	return nsecs / 1000;
1503 }
1504 
1505 /*
1506  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1507  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1508  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1509  * of strings in the order that the evals (enum) were defined.
1510  */
1511 #undef C
1512 #define C(a, b) b
1513 
1514 /* These must match the bit positions in trace_iterator_flags */
1515 static const char *trace_options[] = {
1516 	TRACE_FLAGS
1517 	NULL
1518 };
1519 
1520 static struct {
1521 	u64 (*func)(void);
1522 	const char *name;
1523 	int in_ns;		/* is this clock in nanoseconds? */
1524 } trace_clocks[] = {
1525 	{ trace_clock_local,		"local",	1 },
1526 	{ trace_clock_global,		"global",	1 },
1527 	{ trace_clock_counter,		"counter",	0 },
1528 	{ trace_clock_jiffies,		"uptime",	0 },
1529 	{ trace_clock,			"perf",		1 },
1530 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1531 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1532 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1533 	ARCH_TRACE_CLOCKS
1534 };
1535 
1536 bool trace_clock_in_ns(struct trace_array *tr)
1537 {
1538 	if (trace_clocks[tr->clock_id].in_ns)
1539 		return true;
1540 
1541 	return false;
1542 }
1543 
1544 /*
1545  * trace_parser_get_init - gets the buffer for trace parser
1546  */
1547 int trace_parser_get_init(struct trace_parser *parser, int size)
1548 {
1549 	memset(parser, 0, sizeof(*parser));
1550 
1551 	parser->buffer = kmalloc(size, GFP_KERNEL);
1552 	if (!parser->buffer)
1553 		return 1;
1554 
1555 	parser->size = size;
1556 	return 0;
1557 }
1558 
1559 /*
1560  * trace_parser_put - frees the buffer for trace parser
1561  */
1562 void trace_parser_put(struct trace_parser *parser)
1563 {
1564 	kfree(parser->buffer);
1565 	parser->buffer = NULL;
1566 }
1567 
1568 /*
1569  * trace_get_user - reads the user input string separated by  space
1570  * (matched by isspace(ch))
1571  *
1572  * For each string found the 'struct trace_parser' is updated,
1573  * and the function returns.
1574  *
1575  * Returns number of bytes read.
1576  *
1577  * See kernel/trace/trace.h for 'struct trace_parser' details.
1578  */
1579 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1580 	size_t cnt, loff_t *ppos)
1581 {
1582 	char ch;
1583 	size_t read = 0;
1584 	ssize_t ret;
1585 
1586 	if (!*ppos)
1587 		trace_parser_clear(parser);
1588 
1589 	ret = get_user(ch, ubuf++);
1590 	if (ret)
1591 		goto out;
1592 
1593 	read++;
1594 	cnt--;
1595 
1596 	/*
1597 	 * The parser is not finished with the last write,
1598 	 * continue reading the user input without skipping spaces.
1599 	 */
1600 	if (!parser->cont) {
1601 		/* skip white space */
1602 		while (cnt && isspace(ch)) {
1603 			ret = get_user(ch, ubuf++);
1604 			if (ret)
1605 				goto out;
1606 			read++;
1607 			cnt--;
1608 		}
1609 
1610 		parser->idx = 0;
1611 
1612 		/* only spaces were written */
1613 		if (isspace(ch) || !ch) {
1614 			*ppos += read;
1615 			ret = read;
1616 			goto out;
1617 		}
1618 	}
1619 
1620 	/* read the non-space input */
1621 	while (cnt && !isspace(ch) && ch) {
1622 		if (parser->idx < parser->size - 1)
1623 			parser->buffer[parser->idx++] = ch;
1624 		else {
1625 			ret = -EINVAL;
1626 			goto out;
1627 		}
1628 		ret = get_user(ch, ubuf++);
1629 		if (ret)
1630 			goto out;
1631 		read++;
1632 		cnt--;
1633 	}
1634 
1635 	/* We either got finished input or we have to wait for another call. */
1636 	if (isspace(ch) || !ch) {
1637 		parser->buffer[parser->idx] = 0;
1638 		parser->cont = false;
1639 	} else if (parser->idx < parser->size - 1) {
1640 		parser->cont = true;
1641 		parser->buffer[parser->idx++] = ch;
1642 		/* Make sure the parsed string always terminates with '\0'. */
1643 		parser->buffer[parser->idx] = 0;
1644 	} else {
1645 		ret = -EINVAL;
1646 		goto out;
1647 	}
1648 
1649 	*ppos += read;
1650 	ret = read;
1651 
1652 out:
1653 	return ret;
1654 }
1655 
1656 /* TODO add a seq_buf_to_buffer() */
1657 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1658 {
1659 	int len;
1660 
1661 	if (trace_seq_used(s) <= s->seq.readpos)
1662 		return -EBUSY;
1663 
1664 	len = trace_seq_used(s) - s->seq.readpos;
1665 	if (cnt > len)
1666 		cnt = len;
1667 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1668 
1669 	s->seq.readpos += cnt;
1670 	return cnt;
1671 }
1672 
1673 unsigned long __read_mostly	tracing_thresh;
1674 static const struct file_operations tracing_max_lat_fops;
1675 
1676 #ifdef LATENCY_FS_NOTIFY
1677 
1678 static struct workqueue_struct *fsnotify_wq;
1679 
1680 static void latency_fsnotify_workfn(struct work_struct *work)
1681 {
1682 	struct trace_array *tr = container_of(work, struct trace_array,
1683 					      fsnotify_work);
1684 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1685 }
1686 
1687 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1688 {
1689 	struct trace_array *tr = container_of(iwork, struct trace_array,
1690 					      fsnotify_irqwork);
1691 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1692 }
1693 
1694 static void trace_create_maxlat_file(struct trace_array *tr,
1695 				     struct dentry *d_tracer)
1696 {
1697 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1698 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1699 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1700 					      TRACE_MODE_WRITE,
1701 					      d_tracer, &tr->max_latency,
1702 					      &tracing_max_lat_fops);
1703 }
1704 
1705 __init static int latency_fsnotify_init(void)
1706 {
1707 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1708 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1709 	if (!fsnotify_wq) {
1710 		pr_err("Unable to allocate tr_max_lat_wq\n");
1711 		return -ENOMEM;
1712 	}
1713 	return 0;
1714 }
1715 
1716 late_initcall_sync(latency_fsnotify_init);
1717 
1718 void latency_fsnotify(struct trace_array *tr)
1719 {
1720 	if (!fsnotify_wq)
1721 		return;
1722 	/*
1723 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1724 	 * possible that we are called from __schedule() or do_idle(), which
1725 	 * could cause a deadlock.
1726 	 */
1727 	irq_work_queue(&tr->fsnotify_irqwork);
1728 }
1729 
1730 #elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)	\
1731 	|| defined(CONFIG_OSNOISE_TRACER)
1732 
1733 #define trace_create_maxlat_file(tr, d_tracer)				\
1734 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1735 			  d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1736 
1737 #else
1738 #define trace_create_maxlat_file(tr, d_tracer)	 do { } while (0)
1739 #endif
1740 
1741 #ifdef CONFIG_TRACER_MAX_TRACE
1742 /*
1743  * Copy the new maximum trace into the separate maximum-trace
1744  * structure. (this way the maximum trace is permanently saved,
1745  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1746  */
1747 static void
1748 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1749 {
1750 	struct array_buffer *trace_buf = &tr->array_buffer;
1751 	struct array_buffer *max_buf = &tr->max_buffer;
1752 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1753 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1754 
1755 	max_buf->cpu = cpu;
1756 	max_buf->time_start = data->preempt_timestamp;
1757 
1758 	max_data->saved_latency = tr->max_latency;
1759 	max_data->critical_start = data->critical_start;
1760 	max_data->critical_end = data->critical_end;
1761 
1762 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1763 	max_data->pid = tsk->pid;
1764 	/*
1765 	 * If tsk == current, then use current_uid(), as that does not use
1766 	 * RCU. The irq tracer can be called out of RCU scope.
1767 	 */
1768 	if (tsk == current)
1769 		max_data->uid = current_uid();
1770 	else
1771 		max_data->uid = task_uid(tsk);
1772 
1773 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1774 	max_data->policy = tsk->policy;
1775 	max_data->rt_priority = tsk->rt_priority;
1776 
1777 	/* record this tasks comm */
1778 	tracing_record_cmdline(tsk);
1779 	latency_fsnotify(tr);
1780 }
1781 
1782 /**
1783  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1784  * @tr: tracer
1785  * @tsk: the task with the latency
1786  * @cpu: The cpu that initiated the trace.
1787  * @cond_data: User data associated with a conditional snapshot
1788  *
1789  * Flip the buffers between the @tr and the max_tr and record information
1790  * about which task was the cause of this latency.
1791  */
1792 void
1793 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1794 	      void *cond_data)
1795 {
1796 	if (tr->stop_count)
1797 		return;
1798 
1799 	WARN_ON_ONCE(!irqs_disabled());
1800 
1801 	if (!tr->allocated_snapshot) {
1802 		/* Only the nop tracer should hit this when disabling */
1803 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1804 		return;
1805 	}
1806 
1807 	arch_spin_lock(&tr->max_lock);
1808 
1809 	/* Inherit the recordable setting from array_buffer */
1810 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1811 		ring_buffer_record_on(tr->max_buffer.buffer);
1812 	else
1813 		ring_buffer_record_off(tr->max_buffer.buffer);
1814 
1815 #ifdef CONFIG_TRACER_SNAPSHOT
1816 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1817 		goto out_unlock;
1818 #endif
1819 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1820 
1821 	__update_max_tr(tr, tsk, cpu);
1822 
1823  out_unlock:
1824 	arch_spin_unlock(&tr->max_lock);
1825 }
1826 
1827 /**
1828  * update_max_tr_single - only copy one trace over, and reset the rest
1829  * @tr: tracer
1830  * @tsk: task with the latency
1831  * @cpu: the cpu of the buffer to copy.
1832  *
1833  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1834  */
1835 void
1836 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1837 {
1838 	int ret;
1839 
1840 	if (tr->stop_count)
1841 		return;
1842 
1843 	WARN_ON_ONCE(!irqs_disabled());
1844 	if (!tr->allocated_snapshot) {
1845 		/* Only the nop tracer should hit this when disabling */
1846 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1847 		return;
1848 	}
1849 
1850 	arch_spin_lock(&tr->max_lock);
1851 
1852 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1853 
1854 	if (ret == -EBUSY) {
1855 		/*
1856 		 * We failed to swap the buffer due to a commit taking
1857 		 * place on this CPU. We fail to record, but we reset
1858 		 * the max trace buffer (no one writes directly to it)
1859 		 * and flag that it failed.
1860 		 */
1861 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1862 			"Failed to swap buffers due to commit in progress\n");
1863 	}
1864 
1865 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1866 
1867 	__update_max_tr(tr, tsk, cpu);
1868 	arch_spin_unlock(&tr->max_lock);
1869 }
1870 #endif /* CONFIG_TRACER_MAX_TRACE */
1871 
1872 static int wait_on_pipe(struct trace_iterator *iter, int full)
1873 {
1874 	/* Iterators are static, they should be filled or empty */
1875 	if (trace_buffer_iter(iter, iter->cpu_file))
1876 		return 0;
1877 
1878 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1879 				full);
1880 }
1881 
1882 #ifdef CONFIG_FTRACE_STARTUP_TEST
1883 static bool selftests_can_run;
1884 
1885 struct trace_selftests {
1886 	struct list_head		list;
1887 	struct tracer			*type;
1888 };
1889 
1890 static LIST_HEAD(postponed_selftests);
1891 
1892 static int save_selftest(struct tracer *type)
1893 {
1894 	struct trace_selftests *selftest;
1895 
1896 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1897 	if (!selftest)
1898 		return -ENOMEM;
1899 
1900 	selftest->type = type;
1901 	list_add(&selftest->list, &postponed_selftests);
1902 	return 0;
1903 }
1904 
1905 static int run_tracer_selftest(struct tracer *type)
1906 {
1907 	struct trace_array *tr = &global_trace;
1908 	struct tracer *saved_tracer = tr->current_trace;
1909 	int ret;
1910 
1911 	if (!type->selftest || tracing_selftest_disabled)
1912 		return 0;
1913 
1914 	/*
1915 	 * If a tracer registers early in boot up (before scheduling is
1916 	 * initialized and such), then do not run its selftests yet.
1917 	 * Instead, run it a little later in the boot process.
1918 	 */
1919 	if (!selftests_can_run)
1920 		return save_selftest(type);
1921 
1922 	if (!tracing_is_on()) {
1923 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1924 			type->name);
1925 		return 0;
1926 	}
1927 
1928 	/*
1929 	 * Run a selftest on this tracer.
1930 	 * Here we reset the trace buffer, and set the current
1931 	 * tracer to be this tracer. The tracer can then run some
1932 	 * internal tracing to verify that everything is in order.
1933 	 * If we fail, we do not register this tracer.
1934 	 */
1935 	tracing_reset_online_cpus(&tr->array_buffer);
1936 
1937 	tr->current_trace = type;
1938 
1939 #ifdef CONFIG_TRACER_MAX_TRACE
1940 	if (type->use_max_tr) {
1941 		/* If we expanded the buffers, make sure the max is expanded too */
1942 		if (ring_buffer_expanded)
1943 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1944 					   RING_BUFFER_ALL_CPUS);
1945 		tr->allocated_snapshot = true;
1946 	}
1947 #endif
1948 
1949 	/* the test is responsible for initializing and enabling */
1950 	pr_info("Testing tracer %s: ", type->name);
1951 	ret = type->selftest(type, tr);
1952 	/* the test is responsible for resetting too */
1953 	tr->current_trace = saved_tracer;
1954 	if (ret) {
1955 		printk(KERN_CONT "FAILED!\n");
1956 		/* Add the warning after printing 'FAILED' */
1957 		WARN_ON(1);
1958 		return -1;
1959 	}
1960 	/* Only reset on passing, to avoid touching corrupted buffers */
1961 	tracing_reset_online_cpus(&tr->array_buffer);
1962 
1963 #ifdef CONFIG_TRACER_MAX_TRACE
1964 	if (type->use_max_tr) {
1965 		tr->allocated_snapshot = false;
1966 
1967 		/* Shrink the max buffer again */
1968 		if (ring_buffer_expanded)
1969 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1970 					   RING_BUFFER_ALL_CPUS);
1971 	}
1972 #endif
1973 
1974 	printk(KERN_CONT "PASSED\n");
1975 	return 0;
1976 }
1977 
1978 static __init int init_trace_selftests(void)
1979 {
1980 	struct trace_selftests *p, *n;
1981 	struct tracer *t, **last;
1982 	int ret;
1983 
1984 	selftests_can_run = true;
1985 
1986 	mutex_lock(&trace_types_lock);
1987 
1988 	if (list_empty(&postponed_selftests))
1989 		goto out;
1990 
1991 	pr_info("Running postponed tracer tests:\n");
1992 
1993 	tracing_selftest_running = true;
1994 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1995 		/* This loop can take minutes when sanitizers are enabled, so
1996 		 * lets make sure we allow RCU processing.
1997 		 */
1998 		cond_resched();
1999 		ret = run_tracer_selftest(p->type);
2000 		/* If the test fails, then warn and remove from available_tracers */
2001 		if (ret < 0) {
2002 			WARN(1, "tracer: %s failed selftest, disabling\n",
2003 			     p->type->name);
2004 			last = &trace_types;
2005 			for (t = trace_types; t; t = t->next) {
2006 				if (t == p->type) {
2007 					*last = t->next;
2008 					break;
2009 				}
2010 				last = &t->next;
2011 			}
2012 		}
2013 		list_del(&p->list);
2014 		kfree(p);
2015 	}
2016 	tracing_selftest_running = false;
2017 
2018  out:
2019 	mutex_unlock(&trace_types_lock);
2020 
2021 	return 0;
2022 }
2023 core_initcall(init_trace_selftests);
2024 #else
2025 static inline int run_tracer_selftest(struct tracer *type)
2026 {
2027 	return 0;
2028 }
2029 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2030 
2031 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2032 
2033 static void __init apply_trace_boot_options(void);
2034 
2035 /**
2036  * register_tracer - register a tracer with the ftrace system.
2037  * @type: the plugin for the tracer
2038  *
2039  * Register a new plugin tracer.
2040  */
2041 int __init register_tracer(struct tracer *type)
2042 {
2043 	struct tracer *t;
2044 	int ret = 0;
2045 
2046 	if (!type->name) {
2047 		pr_info("Tracer must have a name\n");
2048 		return -1;
2049 	}
2050 
2051 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2052 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2053 		return -1;
2054 	}
2055 
2056 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2057 		pr_warn("Can not register tracer %s due to lockdown\n",
2058 			   type->name);
2059 		return -EPERM;
2060 	}
2061 
2062 	mutex_lock(&trace_types_lock);
2063 
2064 	tracing_selftest_running = true;
2065 
2066 	for (t = trace_types; t; t = t->next) {
2067 		if (strcmp(type->name, t->name) == 0) {
2068 			/* already found */
2069 			pr_info("Tracer %s already registered\n",
2070 				type->name);
2071 			ret = -1;
2072 			goto out;
2073 		}
2074 	}
2075 
2076 	if (!type->set_flag)
2077 		type->set_flag = &dummy_set_flag;
2078 	if (!type->flags) {
2079 		/*allocate a dummy tracer_flags*/
2080 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2081 		if (!type->flags) {
2082 			ret = -ENOMEM;
2083 			goto out;
2084 		}
2085 		type->flags->val = 0;
2086 		type->flags->opts = dummy_tracer_opt;
2087 	} else
2088 		if (!type->flags->opts)
2089 			type->flags->opts = dummy_tracer_opt;
2090 
2091 	/* store the tracer for __set_tracer_option */
2092 	type->flags->trace = type;
2093 
2094 	ret = run_tracer_selftest(type);
2095 	if (ret < 0)
2096 		goto out;
2097 
2098 	type->next = trace_types;
2099 	trace_types = type;
2100 	add_tracer_options(&global_trace, type);
2101 
2102  out:
2103 	tracing_selftest_running = false;
2104 	mutex_unlock(&trace_types_lock);
2105 
2106 	if (ret || !default_bootup_tracer)
2107 		goto out_unlock;
2108 
2109 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2110 		goto out_unlock;
2111 
2112 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2113 	/* Do we want this tracer to start on bootup? */
2114 	tracing_set_tracer(&global_trace, type->name);
2115 	default_bootup_tracer = NULL;
2116 
2117 	apply_trace_boot_options();
2118 
2119 	/* disable other selftests, since this will break it. */
2120 	disable_tracing_selftest("running a tracer");
2121 
2122  out_unlock:
2123 	return ret;
2124 }
2125 
2126 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2127 {
2128 	struct trace_buffer *buffer = buf->buffer;
2129 
2130 	if (!buffer)
2131 		return;
2132 
2133 	ring_buffer_record_disable(buffer);
2134 
2135 	/* Make sure all commits have finished */
2136 	synchronize_rcu();
2137 	ring_buffer_reset_cpu(buffer, cpu);
2138 
2139 	ring_buffer_record_enable(buffer);
2140 }
2141 
2142 void tracing_reset_online_cpus(struct array_buffer *buf)
2143 {
2144 	struct trace_buffer *buffer = buf->buffer;
2145 
2146 	if (!buffer)
2147 		return;
2148 
2149 	ring_buffer_record_disable(buffer);
2150 
2151 	/* Make sure all commits have finished */
2152 	synchronize_rcu();
2153 
2154 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2155 
2156 	ring_buffer_reset_online_cpus(buffer);
2157 
2158 	ring_buffer_record_enable(buffer);
2159 }
2160 
2161 /* Must have trace_types_lock held */
2162 void tracing_reset_all_online_cpus(void)
2163 {
2164 	struct trace_array *tr;
2165 
2166 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2167 		if (!tr->clear_trace)
2168 			continue;
2169 		tr->clear_trace = false;
2170 		tracing_reset_online_cpus(&tr->array_buffer);
2171 #ifdef CONFIG_TRACER_MAX_TRACE
2172 		tracing_reset_online_cpus(&tr->max_buffer);
2173 #endif
2174 	}
2175 }
2176 
2177 /*
2178  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2179  * is the tgid last observed corresponding to pid=i.
2180  */
2181 static int *tgid_map;
2182 
2183 /* The maximum valid index into tgid_map. */
2184 static size_t tgid_map_max;
2185 
2186 #define SAVED_CMDLINES_DEFAULT 128
2187 #define NO_CMDLINE_MAP UINT_MAX
2188 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2189 struct saved_cmdlines_buffer {
2190 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2191 	unsigned *map_cmdline_to_pid;
2192 	unsigned cmdline_num;
2193 	int cmdline_idx;
2194 	char *saved_cmdlines;
2195 };
2196 static struct saved_cmdlines_buffer *savedcmd;
2197 
2198 static inline char *get_saved_cmdlines(int idx)
2199 {
2200 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2201 }
2202 
2203 static inline void set_cmdline(int idx, const char *cmdline)
2204 {
2205 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2206 }
2207 
2208 static int allocate_cmdlines_buffer(unsigned int val,
2209 				    struct saved_cmdlines_buffer *s)
2210 {
2211 	s->map_cmdline_to_pid = kmalloc_array(val,
2212 					      sizeof(*s->map_cmdline_to_pid),
2213 					      GFP_KERNEL);
2214 	if (!s->map_cmdline_to_pid)
2215 		return -ENOMEM;
2216 
2217 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2218 	if (!s->saved_cmdlines) {
2219 		kfree(s->map_cmdline_to_pid);
2220 		return -ENOMEM;
2221 	}
2222 
2223 	s->cmdline_idx = 0;
2224 	s->cmdline_num = val;
2225 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2226 	       sizeof(s->map_pid_to_cmdline));
2227 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2228 	       val * sizeof(*s->map_cmdline_to_pid));
2229 
2230 	return 0;
2231 }
2232 
2233 static int trace_create_savedcmd(void)
2234 {
2235 	int ret;
2236 
2237 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2238 	if (!savedcmd)
2239 		return -ENOMEM;
2240 
2241 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2242 	if (ret < 0) {
2243 		kfree(savedcmd);
2244 		savedcmd = NULL;
2245 		return -ENOMEM;
2246 	}
2247 
2248 	return 0;
2249 }
2250 
2251 int is_tracing_stopped(void)
2252 {
2253 	return global_trace.stop_count;
2254 }
2255 
2256 /**
2257  * tracing_start - quick start of the tracer
2258  *
2259  * If tracing is enabled but was stopped by tracing_stop,
2260  * this will start the tracer back up.
2261  */
2262 void tracing_start(void)
2263 {
2264 	struct trace_buffer *buffer;
2265 	unsigned long flags;
2266 
2267 	if (tracing_disabled)
2268 		return;
2269 
2270 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2271 	if (--global_trace.stop_count) {
2272 		if (global_trace.stop_count < 0) {
2273 			/* Someone screwed up their debugging */
2274 			WARN_ON_ONCE(1);
2275 			global_trace.stop_count = 0;
2276 		}
2277 		goto out;
2278 	}
2279 
2280 	/* Prevent the buffers from switching */
2281 	arch_spin_lock(&global_trace.max_lock);
2282 
2283 	buffer = global_trace.array_buffer.buffer;
2284 	if (buffer)
2285 		ring_buffer_record_enable(buffer);
2286 
2287 #ifdef CONFIG_TRACER_MAX_TRACE
2288 	buffer = global_trace.max_buffer.buffer;
2289 	if (buffer)
2290 		ring_buffer_record_enable(buffer);
2291 #endif
2292 
2293 	arch_spin_unlock(&global_trace.max_lock);
2294 
2295  out:
2296 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2297 }
2298 
2299 static void tracing_start_tr(struct trace_array *tr)
2300 {
2301 	struct trace_buffer *buffer;
2302 	unsigned long flags;
2303 
2304 	if (tracing_disabled)
2305 		return;
2306 
2307 	/* If global, we need to also start the max tracer */
2308 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2309 		return tracing_start();
2310 
2311 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2312 
2313 	if (--tr->stop_count) {
2314 		if (tr->stop_count < 0) {
2315 			/* Someone screwed up their debugging */
2316 			WARN_ON_ONCE(1);
2317 			tr->stop_count = 0;
2318 		}
2319 		goto out;
2320 	}
2321 
2322 	buffer = tr->array_buffer.buffer;
2323 	if (buffer)
2324 		ring_buffer_record_enable(buffer);
2325 
2326  out:
2327 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2328 }
2329 
2330 /**
2331  * tracing_stop - quick stop of the tracer
2332  *
2333  * Light weight way to stop tracing. Use in conjunction with
2334  * tracing_start.
2335  */
2336 void tracing_stop(void)
2337 {
2338 	struct trace_buffer *buffer;
2339 	unsigned long flags;
2340 
2341 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2342 	if (global_trace.stop_count++)
2343 		goto out;
2344 
2345 	/* Prevent the buffers from switching */
2346 	arch_spin_lock(&global_trace.max_lock);
2347 
2348 	buffer = global_trace.array_buffer.buffer;
2349 	if (buffer)
2350 		ring_buffer_record_disable(buffer);
2351 
2352 #ifdef CONFIG_TRACER_MAX_TRACE
2353 	buffer = global_trace.max_buffer.buffer;
2354 	if (buffer)
2355 		ring_buffer_record_disable(buffer);
2356 #endif
2357 
2358 	arch_spin_unlock(&global_trace.max_lock);
2359 
2360  out:
2361 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2362 }
2363 
2364 static void tracing_stop_tr(struct trace_array *tr)
2365 {
2366 	struct trace_buffer *buffer;
2367 	unsigned long flags;
2368 
2369 	/* If global, we need to also stop the max tracer */
2370 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2371 		return tracing_stop();
2372 
2373 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2374 	if (tr->stop_count++)
2375 		goto out;
2376 
2377 	buffer = tr->array_buffer.buffer;
2378 	if (buffer)
2379 		ring_buffer_record_disable(buffer);
2380 
2381  out:
2382 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2383 }
2384 
2385 static int trace_save_cmdline(struct task_struct *tsk)
2386 {
2387 	unsigned tpid, idx;
2388 
2389 	/* treat recording of idle task as a success */
2390 	if (!tsk->pid)
2391 		return 1;
2392 
2393 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2394 
2395 	/*
2396 	 * It's not the end of the world if we don't get
2397 	 * the lock, but we also don't want to spin
2398 	 * nor do we want to disable interrupts,
2399 	 * so if we miss here, then better luck next time.
2400 	 */
2401 	if (!arch_spin_trylock(&trace_cmdline_lock))
2402 		return 0;
2403 
2404 	idx = savedcmd->map_pid_to_cmdline[tpid];
2405 	if (idx == NO_CMDLINE_MAP) {
2406 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2407 
2408 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2409 		savedcmd->cmdline_idx = idx;
2410 	}
2411 
2412 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2413 	set_cmdline(idx, tsk->comm);
2414 
2415 	arch_spin_unlock(&trace_cmdline_lock);
2416 
2417 	return 1;
2418 }
2419 
2420 static void __trace_find_cmdline(int pid, char comm[])
2421 {
2422 	unsigned map;
2423 	int tpid;
2424 
2425 	if (!pid) {
2426 		strcpy(comm, "<idle>");
2427 		return;
2428 	}
2429 
2430 	if (WARN_ON_ONCE(pid < 0)) {
2431 		strcpy(comm, "<XXX>");
2432 		return;
2433 	}
2434 
2435 	tpid = pid & (PID_MAX_DEFAULT - 1);
2436 	map = savedcmd->map_pid_to_cmdline[tpid];
2437 	if (map != NO_CMDLINE_MAP) {
2438 		tpid = savedcmd->map_cmdline_to_pid[map];
2439 		if (tpid == pid) {
2440 			strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2441 			return;
2442 		}
2443 	}
2444 	strcpy(comm, "<...>");
2445 }
2446 
2447 void trace_find_cmdline(int pid, char comm[])
2448 {
2449 	preempt_disable();
2450 	arch_spin_lock(&trace_cmdline_lock);
2451 
2452 	__trace_find_cmdline(pid, comm);
2453 
2454 	arch_spin_unlock(&trace_cmdline_lock);
2455 	preempt_enable();
2456 }
2457 
2458 static int *trace_find_tgid_ptr(int pid)
2459 {
2460 	/*
2461 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2462 	 * if we observe a non-NULL tgid_map then we also observe the correct
2463 	 * tgid_map_max.
2464 	 */
2465 	int *map = smp_load_acquire(&tgid_map);
2466 
2467 	if (unlikely(!map || pid > tgid_map_max))
2468 		return NULL;
2469 
2470 	return &map[pid];
2471 }
2472 
2473 int trace_find_tgid(int pid)
2474 {
2475 	int *ptr = trace_find_tgid_ptr(pid);
2476 
2477 	return ptr ? *ptr : 0;
2478 }
2479 
2480 static int trace_save_tgid(struct task_struct *tsk)
2481 {
2482 	int *ptr;
2483 
2484 	/* treat recording of idle task as a success */
2485 	if (!tsk->pid)
2486 		return 1;
2487 
2488 	ptr = trace_find_tgid_ptr(tsk->pid);
2489 	if (!ptr)
2490 		return 0;
2491 
2492 	*ptr = tsk->tgid;
2493 	return 1;
2494 }
2495 
2496 static bool tracing_record_taskinfo_skip(int flags)
2497 {
2498 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2499 		return true;
2500 	if (!__this_cpu_read(trace_taskinfo_save))
2501 		return true;
2502 	return false;
2503 }
2504 
2505 /**
2506  * tracing_record_taskinfo - record the task info of a task
2507  *
2508  * @task:  task to record
2509  * @flags: TRACE_RECORD_CMDLINE for recording comm
2510  *         TRACE_RECORD_TGID for recording tgid
2511  */
2512 void tracing_record_taskinfo(struct task_struct *task, int flags)
2513 {
2514 	bool done;
2515 
2516 	if (tracing_record_taskinfo_skip(flags))
2517 		return;
2518 
2519 	/*
2520 	 * Record as much task information as possible. If some fail, continue
2521 	 * to try to record the others.
2522 	 */
2523 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2524 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2525 
2526 	/* If recording any information failed, retry again soon. */
2527 	if (!done)
2528 		return;
2529 
2530 	__this_cpu_write(trace_taskinfo_save, false);
2531 }
2532 
2533 /**
2534  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2535  *
2536  * @prev: previous task during sched_switch
2537  * @next: next task during sched_switch
2538  * @flags: TRACE_RECORD_CMDLINE for recording comm
2539  *         TRACE_RECORD_TGID for recording tgid
2540  */
2541 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2542 					  struct task_struct *next, int flags)
2543 {
2544 	bool done;
2545 
2546 	if (tracing_record_taskinfo_skip(flags))
2547 		return;
2548 
2549 	/*
2550 	 * Record as much task information as possible. If some fail, continue
2551 	 * to try to record the others.
2552 	 */
2553 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2554 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2555 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2556 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2557 
2558 	/* If recording any information failed, retry again soon. */
2559 	if (!done)
2560 		return;
2561 
2562 	__this_cpu_write(trace_taskinfo_save, false);
2563 }
2564 
2565 /* Helpers to record a specific task information */
2566 void tracing_record_cmdline(struct task_struct *task)
2567 {
2568 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2569 }
2570 
2571 void tracing_record_tgid(struct task_struct *task)
2572 {
2573 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2574 }
2575 
2576 /*
2577  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2578  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2579  * simplifies those functions and keeps them in sync.
2580  */
2581 enum print_line_t trace_handle_return(struct trace_seq *s)
2582 {
2583 	return trace_seq_has_overflowed(s) ?
2584 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2585 }
2586 EXPORT_SYMBOL_GPL(trace_handle_return);
2587 
2588 static unsigned short migration_disable_value(void)
2589 {
2590 #if defined(CONFIG_SMP)
2591 	return current->migration_disabled;
2592 #else
2593 	return 0;
2594 #endif
2595 }
2596 
2597 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2598 {
2599 	unsigned int trace_flags = irqs_status;
2600 	unsigned int pc;
2601 
2602 	pc = preempt_count();
2603 
2604 	if (pc & NMI_MASK)
2605 		trace_flags |= TRACE_FLAG_NMI;
2606 	if (pc & HARDIRQ_MASK)
2607 		trace_flags |= TRACE_FLAG_HARDIRQ;
2608 	if (in_serving_softirq())
2609 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2610 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2611 		trace_flags |= TRACE_FLAG_BH_OFF;
2612 
2613 	if (tif_need_resched())
2614 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2615 	if (test_preempt_need_resched())
2616 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2617 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2618 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2619 }
2620 
2621 struct ring_buffer_event *
2622 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2623 			  int type,
2624 			  unsigned long len,
2625 			  unsigned int trace_ctx)
2626 {
2627 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2628 }
2629 
2630 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2631 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2632 static int trace_buffered_event_ref;
2633 
2634 /**
2635  * trace_buffered_event_enable - enable buffering events
2636  *
2637  * When events are being filtered, it is quicker to use a temporary
2638  * buffer to write the event data into if there's a likely chance
2639  * that it will not be committed. The discard of the ring buffer
2640  * is not as fast as committing, and is much slower than copying
2641  * a commit.
2642  *
2643  * When an event is to be filtered, allocate per cpu buffers to
2644  * write the event data into, and if the event is filtered and discarded
2645  * it is simply dropped, otherwise, the entire data is to be committed
2646  * in one shot.
2647  */
2648 void trace_buffered_event_enable(void)
2649 {
2650 	struct ring_buffer_event *event;
2651 	struct page *page;
2652 	int cpu;
2653 
2654 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2655 
2656 	if (trace_buffered_event_ref++)
2657 		return;
2658 
2659 	for_each_tracing_cpu(cpu) {
2660 		page = alloc_pages_node(cpu_to_node(cpu),
2661 					GFP_KERNEL | __GFP_NORETRY, 0);
2662 		if (!page)
2663 			goto failed;
2664 
2665 		event = page_address(page);
2666 		memset(event, 0, sizeof(*event));
2667 
2668 		per_cpu(trace_buffered_event, cpu) = event;
2669 
2670 		preempt_disable();
2671 		if (cpu == smp_processor_id() &&
2672 		    __this_cpu_read(trace_buffered_event) !=
2673 		    per_cpu(trace_buffered_event, cpu))
2674 			WARN_ON_ONCE(1);
2675 		preempt_enable();
2676 	}
2677 
2678 	return;
2679  failed:
2680 	trace_buffered_event_disable();
2681 }
2682 
2683 static void enable_trace_buffered_event(void *data)
2684 {
2685 	/* Probably not needed, but do it anyway */
2686 	smp_rmb();
2687 	this_cpu_dec(trace_buffered_event_cnt);
2688 }
2689 
2690 static void disable_trace_buffered_event(void *data)
2691 {
2692 	this_cpu_inc(trace_buffered_event_cnt);
2693 }
2694 
2695 /**
2696  * trace_buffered_event_disable - disable buffering events
2697  *
2698  * When a filter is removed, it is faster to not use the buffered
2699  * events, and to commit directly into the ring buffer. Free up
2700  * the temp buffers when there are no more users. This requires
2701  * special synchronization with current events.
2702  */
2703 void trace_buffered_event_disable(void)
2704 {
2705 	int cpu;
2706 
2707 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2708 
2709 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2710 		return;
2711 
2712 	if (--trace_buffered_event_ref)
2713 		return;
2714 
2715 	preempt_disable();
2716 	/* For each CPU, set the buffer as used. */
2717 	smp_call_function_many(tracing_buffer_mask,
2718 			       disable_trace_buffered_event, NULL, 1);
2719 	preempt_enable();
2720 
2721 	/* Wait for all current users to finish */
2722 	synchronize_rcu();
2723 
2724 	for_each_tracing_cpu(cpu) {
2725 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2726 		per_cpu(trace_buffered_event, cpu) = NULL;
2727 	}
2728 	/*
2729 	 * Make sure trace_buffered_event is NULL before clearing
2730 	 * trace_buffered_event_cnt.
2731 	 */
2732 	smp_wmb();
2733 
2734 	preempt_disable();
2735 	/* Do the work on each cpu */
2736 	smp_call_function_many(tracing_buffer_mask,
2737 			       enable_trace_buffered_event, NULL, 1);
2738 	preempt_enable();
2739 }
2740 
2741 static struct trace_buffer *temp_buffer;
2742 
2743 struct ring_buffer_event *
2744 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2745 			  struct trace_event_file *trace_file,
2746 			  int type, unsigned long len,
2747 			  unsigned int trace_ctx)
2748 {
2749 	struct ring_buffer_event *entry;
2750 	struct trace_array *tr = trace_file->tr;
2751 	int val;
2752 
2753 	*current_rb = tr->array_buffer.buffer;
2754 
2755 	if (!tr->no_filter_buffering_ref &&
2756 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2757 		preempt_disable_notrace();
2758 		/*
2759 		 * Filtering is on, so try to use the per cpu buffer first.
2760 		 * This buffer will simulate a ring_buffer_event,
2761 		 * where the type_len is zero and the array[0] will
2762 		 * hold the full length.
2763 		 * (see include/linux/ring-buffer.h for details on
2764 		 *  how the ring_buffer_event is structured).
2765 		 *
2766 		 * Using a temp buffer during filtering and copying it
2767 		 * on a matched filter is quicker than writing directly
2768 		 * into the ring buffer and then discarding it when
2769 		 * it doesn't match. That is because the discard
2770 		 * requires several atomic operations to get right.
2771 		 * Copying on match and doing nothing on a failed match
2772 		 * is still quicker than no copy on match, but having
2773 		 * to discard out of the ring buffer on a failed match.
2774 		 */
2775 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2776 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2777 
2778 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2779 
2780 			/*
2781 			 * Preemption is disabled, but interrupts and NMIs
2782 			 * can still come in now. If that happens after
2783 			 * the above increment, then it will have to go
2784 			 * back to the old method of allocating the event
2785 			 * on the ring buffer, and if the filter fails, it
2786 			 * will have to call ring_buffer_discard_commit()
2787 			 * to remove it.
2788 			 *
2789 			 * Need to also check the unlikely case that the
2790 			 * length is bigger than the temp buffer size.
2791 			 * If that happens, then the reserve is pretty much
2792 			 * guaranteed to fail, as the ring buffer currently
2793 			 * only allows events less than a page. But that may
2794 			 * change in the future, so let the ring buffer reserve
2795 			 * handle the failure in that case.
2796 			 */
2797 			if (val == 1 && likely(len <= max_len)) {
2798 				trace_event_setup(entry, type, trace_ctx);
2799 				entry->array[0] = len;
2800 				/* Return with preemption disabled */
2801 				return entry;
2802 			}
2803 			this_cpu_dec(trace_buffered_event_cnt);
2804 		}
2805 		/* __trace_buffer_lock_reserve() disables preemption */
2806 		preempt_enable_notrace();
2807 	}
2808 
2809 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2810 					    trace_ctx);
2811 	/*
2812 	 * If tracing is off, but we have triggers enabled
2813 	 * we still need to look at the event data. Use the temp_buffer
2814 	 * to store the trace event for the trigger to use. It's recursive
2815 	 * safe and will not be recorded anywhere.
2816 	 */
2817 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2818 		*current_rb = temp_buffer;
2819 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2820 						    trace_ctx);
2821 	}
2822 	return entry;
2823 }
2824 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2825 
2826 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2827 static DEFINE_MUTEX(tracepoint_printk_mutex);
2828 
2829 static void output_printk(struct trace_event_buffer *fbuffer)
2830 {
2831 	struct trace_event_call *event_call;
2832 	struct trace_event_file *file;
2833 	struct trace_event *event;
2834 	unsigned long flags;
2835 	struct trace_iterator *iter = tracepoint_print_iter;
2836 
2837 	/* We should never get here if iter is NULL */
2838 	if (WARN_ON_ONCE(!iter))
2839 		return;
2840 
2841 	event_call = fbuffer->trace_file->event_call;
2842 	if (!event_call || !event_call->event.funcs ||
2843 	    !event_call->event.funcs->trace)
2844 		return;
2845 
2846 	file = fbuffer->trace_file;
2847 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2848 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2849 	     !filter_match_preds(file->filter, fbuffer->entry)))
2850 		return;
2851 
2852 	event = &fbuffer->trace_file->event_call->event;
2853 
2854 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2855 	trace_seq_init(&iter->seq);
2856 	iter->ent = fbuffer->entry;
2857 	event_call->event.funcs->trace(iter, 0, event);
2858 	trace_seq_putc(&iter->seq, 0);
2859 	printk("%s", iter->seq.buffer);
2860 
2861 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2862 }
2863 
2864 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2865 			     void *buffer, size_t *lenp,
2866 			     loff_t *ppos)
2867 {
2868 	int save_tracepoint_printk;
2869 	int ret;
2870 
2871 	mutex_lock(&tracepoint_printk_mutex);
2872 	save_tracepoint_printk = tracepoint_printk;
2873 
2874 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2875 
2876 	/*
2877 	 * This will force exiting early, as tracepoint_printk
2878 	 * is always zero when tracepoint_printk_iter is not allocated
2879 	 */
2880 	if (!tracepoint_print_iter)
2881 		tracepoint_printk = 0;
2882 
2883 	if (save_tracepoint_printk == tracepoint_printk)
2884 		goto out;
2885 
2886 	if (tracepoint_printk)
2887 		static_key_enable(&tracepoint_printk_key.key);
2888 	else
2889 		static_key_disable(&tracepoint_printk_key.key);
2890 
2891  out:
2892 	mutex_unlock(&tracepoint_printk_mutex);
2893 
2894 	return ret;
2895 }
2896 
2897 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2898 {
2899 	enum event_trigger_type tt = ETT_NONE;
2900 	struct trace_event_file *file = fbuffer->trace_file;
2901 
2902 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2903 			fbuffer->entry, &tt))
2904 		goto discard;
2905 
2906 	if (static_key_false(&tracepoint_printk_key.key))
2907 		output_printk(fbuffer);
2908 
2909 	if (static_branch_unlikely(&trace_event_exports_enabled))
2910 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2911 
2912 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2913 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2914 
2915 discard:
2916 	if (tt)
2917 		event_triggers_post_call(file, tt);
2918 
2919 }
2920 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2921 
2922 /*
2923  * Skip 3:
2924  *
2925  *   trace_buffer_unlock_commit_regs()
2926  *   trace_event_buffer_commit()
2927  *   trace_event_raw_event_xxx()
2928  */
2929 # define STACK_SKIP 3
2930 
2931 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2932 				     struct trace_buffer *buffer,
2933 				     struct ring_buffer_event *event,
2934 				     unsigned int trace_ctx,
2935 				     struct pt_regs *regs)
2936 {
2937 	__buffer_unlock_commit(buffer, event);
2938 
2939 	/*
2940 	 * If regs is not set, then skip the necessary functions.
2941 	 * Note, we can still get here via blktrace, wakeup tracer
2942 	 * and mmiotrace, but that's ok if they lose a function or
2943 	 * two. They are not that meaningful.
2944 	 */
2945 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2946 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2947 }
2948 
2949 /*
2950  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2951  */
2952 void
2953 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2954 				   struct ring_buffer_event *event)
2955 {
2956 	__buffer_unlock_commit(buffer, event);
2957 }
2958 
2959 void
2960 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2961 	       parent_ip, unsigned int trace_ctx)
2962 {
2963 	struct trace_event_call *call = &event_function;
2964 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2965 	struct ring_buffer_event *event;
2966 	struct ftrace_entry *entry;
2967 
2968 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2969 					    trace_ctx);
2970 	if (!event)
2971 		return;
2972 	entry	= ring_buffer_event_data(event);
2973 	entry->ip			= ip;
2974 	entry->parent_ip		= parent_ip;
2975 
2976 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2977 		if (static_branch_unlikely(&trace_function_exports_enabled))
2978 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2979 		__buffer_unlock_commit(buffer, event);
2980 	}
2981 }
2982 
2983 #ifdef CONFIG_STACKTRACE
2984 
2985 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2986 #define FTRACE_KSTACK_NESTING	4
2987 
2988 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2989 
2990 struct ftrace_stack {
2991 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2992 };
2993 
2994 
2995 struct ftrace_stacks {
2996 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2997 };
2998 
2999 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3000 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3001 
3002 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3003 				 unsigned int trace_ctx,
3004 				 int skip, struct pt_regs *regs)
3005 {
3006 	struct trace_event_call *call = &event_kernel_stack;
3007 	struct ring_buffer_event *event;
3008 	unsigned int size, nr_entries;
3009 	struct ftrace_stack *fstack;
3010 	struct stack_entry *entry;
3011 	int stackidx;
3012 
3013 	/*
3014 	 * Add one, for this function and the call to save_stack_trace()
3015 	 * If regs is set, then these functions will not be in the way.
3016 	 */
3017 #ifndef CONFIG_UNWINDER_ORC
3018 	if (!regs)
3019 		skip++;
3020 #endif
3021 
3022 	preempt_disable_notrace();
3023 
3024 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3025 
3026 	/* This should never happen. If it does, yell once and skip */
3027 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3028 		goto out;
3029 
3030 	/*
3031 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3032 	 * interrupt will either see the value pre increment or post
3033 	 * increment. If the interrupt happens pre increment it will have
3034 	 * restored the counter when it returns.  We just need a barrier to
3035 	 * keep gcc from moving things around.
3036 	 */
3037 	barrier();
3038 
3039 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3040 	size = ARRAY_SIZE(fstack->calls);
3041 
3042 	if (regs) {
3043 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3044 						   size, skip);
3045 	} else {
3046 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3047 	}
3048 
3049 	size = nr_entries * sizeof(unsigned long);
3050 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3051 				    (sizeof(*entry) - sizeof(entry->caller)) + size,
3052 				    trace_ctx);
3053 	if (!event)
3054 		goto out;
3055 	entry = ring_buffer_event_data(event);
3056 
3057 	memcpy(&entry->caller, fstack->calls, size);
3058 	entry->size = nr_entries;
3059 
3060 	if (!call_filter_check_discard(call, entry, buffer, event))
3061 		__buffer_unlock_commit(buffer, event);
3062 
3063  out:
3064 	/* Again, don't let gcc optimize things here */
3065 	barrier();
3066 	__this_cpu_dec(ftrace_stack_reserve);
3067 	preempt_enable_notrace();
3068 
3069 }
3070 
3071 static inline void ftrace_trace_stack(struct trace_array *tr,
3072 				      struct trace_buffer *buffer,
3073 				      unsigned int trace_ctx,
3074 				      int skip, struct pt_regs *regs)
3075 {
3076 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3077 		return;
3078 
3079 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3080 }
3081 
3082 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3083 		   int skip)
3084 {
3085 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3086 
3087 	if (rcu_is_watching()) {
3088 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3089 		return;
3090 	}
3091 
3092 	/*
3093 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3094 	 * but if the above rcu_is_watching() failed, then the NMI
3095 	 * triggered someplace critical, and rcu_irq_enter() should
3096 	 * not be called from NMI.
3097 	 */
3098 	if (unlikely(in_nmi()))
3099 		return;
3100 
3101 	rcu_irq_enter_irqson();
3102 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3103 	rcu_irq_exit_irqson();
3104 }
3105 
3106 /**
3107  * trace_dump_stack - record a stack back trace in the trace buffer
3108  * @skip: Number of functions to skip (helper handlers)
3109  */
3110 void trace_dump_stack(int skip)
3111 {
3112 	if (tracing_disabled || tracing_selftest_running)
3113 		return;
3114 
3115 #ifndef CONFIG_UNWINDER_ORC
3116 	/* Skip 1 to skip this function. */
3117 	skip++;
3118 #endif
3119 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3120 			     tracing_gen_ctx(), skip, NULL);
3121 }
3122 EXPORT_SYMBOL_GPL(trace_dump_stack);
3123 
3124 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3125 static DEFINE_PER_CPU(int, user_stack_count);
3126 
3127 static void
3128 ftrace_trace_userstack(struct trace_array *tr,
3129 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3130 {
3131 	struct trace_event_call *call = &event_user_stack;
3132 	struct ring_buffer_event *event;
3133 	struct userstack_entry *entry;
3134 
3135 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3136 		return;
3137 
3138 	/*
3139 	 * NMIs can not handle page faults, even with fix ups.
3140 	 * The save user stack can (and often does) fault.
3141 	 */
3142 	if (unlikely(in_nmi()))
3143 		return;
3144 
3145 	/*
3146 	 * prevent recursion, since the user stack tracing may
3147 	 * trigger other kernel events.
3148 	 */
3149 	preempt_disable();
3150 	if (__this_cpu_read(user_stack_count))
3151 		goto out;
3152 
3153 	__this_cpu_inc(user_stack_count);
3154 
3155 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3156 					    sizeof(*entry), trace_ctx);
3157 	if (!event)
3158 		goto out_drop_count;
3159 	entry	= ring_buffer_event_data(event);
3160 
3161 	entry->tgid		= current->tgid;
3162 	memset(&entry->caller, 0, sizeof(entry->caller));
3163 
3164 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3165 	if (!call_filter_check_discard(call, entry, buffer, event))
3166 		__buffer_unlock_commit(buffer, event);
3167 
3168  out_drop_count:
3169 	__this_cpu_dec(user_stack_count);
3170  out:
3171 	preempt_enable();
3172 }
3173 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3174 static void ftrace_trace_userstack(struct trace_array *tr,
3175 				   struct trace_buffer *buffer,
3176 				   unsigned int trace_ctx)
3177 {
3178 }
3179 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3180 
3181 #endif /* CONFIG_STACKTRACE */
3182 
3183 static inline void
3184 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3185 			  unsigned long long delta)
3186 {
3187 	entry->bottom_delta_ts = delta & U32_MAX;
3188 	entry->top_delta_ts = (delta >> 32);
3189 }
3190 
3191 void trace_last_func_repeats(struct trace_array *tr,
3192 			     struct trace_func_repeats *last_info,
3193 			     unsigned int trace_ctx)
3194 {
3195 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3196 	struct func_repeats_entry *entry;
3197 	struct ring_buffer_event *event;
3198 	u64 delta;
3199 
3200 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3201 					    sizeof(*entry), trace_ctx);
3202 	if (!event)
3203 		return;
3204 
3205 	delta = ring_buffer_event_time_stamp(buffer, event) -
3206 		last_info->ts_last_call;
3207 
3208 	entry = ring_buffer_event_data(event);
3209 	entry->ip = last_info->ip;
3210 	entry->parent_ip = last_info->parent_ip;
3211 	entry->count = last_info->count;
3212 	func_repeats_set_delta_ts(entry, delta);
3213 
3214 	__buffer_unlock_commit(buffer, event);
3215 }
3216 
3217 /* created for use with alloc_percpu */
3218 struct trace_buffer_struct {
3219 	int nesting;
3220 	char buffer[4][TRACE_BUF_SIZE];
3221 };
3222 
3223 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3224 
3225 /*
3226  * This allows for lockless recording.  If we're nested too deeply, then
3227  * this returns NULL.
3228  */
3229 static char *get_trace_buf(void)
3230 {
3231 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3232 
3233 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3234 		return NULL;
3235 
3236 	buffer->nesting++;
3237 
3238 	/* Interrupts must see nesting incremented before we use the buffer */
3239 	barrier();
3240 	return &buffer->buffer[buffer->nesting - 1][0];
3241 }
3242 
3243 static void put_trace_buf(void)
3244 {
3245 	/* Don't let the decrement of nesting leak before this */
3246 	barrier();
3247 	this_cpu_dec(trace_percpu_buffer->nesting);
3248 }
3249 
3250 static int alloc_percpu_trace_buffer(void)
3251 {
3252 	struct trace_buffer_struct __percpu *buffers;
3253 
3254 	if (trace_percpu_buffer)
3255 		return 0;
3256 
3257 	buffers = alloc_percpu(struct trace_buffer_struct);
3258 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3259 		return -ENOMEM;
3260 
3261 	trace_percpu_buffer = buffers;
3262 	return 0;
3263 }
3264 
3265 static int buffers_allocated;
3266 
3267 void trace_printk_init_buffers(void)
3268 {
3269 	if (buffers_allocated)
3270 		return;
3271 
3272 	if (alloc_percpu_trace_buffer())
3273 		return;
3274 
3275 	/* trace_printk() is for debug use only. Don't use it in production. */
3276 
3277 	pr_warn("\n");
3278 	pr_warn("**********************************************************\n");
3279 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3280 	pr_warn("**                                                      **\n");
3281 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3282 	pr_warn("**                                                      **\n");
3283 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3284 	pr_warn("** unsafe for production use.                           **\n");
3285 	pr_warn("**                                                      **\n");
3286 	pr_warn("** If you see this message and you are not debugging    **\n");
3287 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3288 	pr_warn("**                                                      **\n");
3289 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3290 	pr_warn("**********************************************************\n");
3291 
3292 	/* Expand the buffers to set size */
3293 	tracing_update_buffers();
3294 
3295 	buffers_allocated = 1;
3296 
3297 	/*
3298 	 * trace_printk_init_buffers() can be called by modules.
3299 	 * If that happens, then we need to start cmdline recording
3300 	 * directly here. If the global_trace.buffer is already
3301 	 * allocated here, then this was called by module code.
3302 	 */
3303 	if (global_trace.array_buffer.buffer)
3304 		tracing_start_cmdline_record();
3305 }
3306 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3307 
3308 void trace_printk_start_comm(void)
3309 {
3310 	/* Start tracing comms if trace printk is set */
3311 	if (!buffers_allocated)
3312 		return;
3313 	tracing_start_cmdline_record();
3314 }
3315 
3316 static void trace_printk_start_stop_comm(int enabled)
3317 {
3318 	if (!buffers_allocated)
3319 		return;
3320 
3321 	if (enabled)
3322 		tracing_start_cmdline_record();
3323 	else
3324 		tracing_stop_cmdline_record();
3325 }
3326 
3327 /**
3328  * trace_vbprintk - write binary msg to tracing buffer
3329  * @ip:    The address of the caller
3330  * @fmt:   The string format to write to the buffer
3331  * @args:  Arguments for @fmt
3332  */
3333 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3334 {
3335 	struct trace_event_call *call = &event_bprint;
3336 	struct ring_buffer_event *event;
3337 	struct trace_buffer *buffer;
3338 	struct trace_array *tr = &global_trace;
3339 	struct bprint_entry *entry;
3340 	unsigned int trace_ctx;
3341 	char *tbuffer;
3342 	int len = 0, size;
3343 
3344 	if (unlikely(tracing_selftest_running || tracing_disabled))
3345 		return 0;
3346 
3347 	/* Don't pollute graph traces with trace_vprintk internals */
3348 	pause_graph_tracing();
3349 
3350 	trace_ctx = tracing_gen_ctx();
3351 	preempt_disable_notrace();
3352 
3353 	tbuffer = get_trace_buf();
3354 	if (!tbuffer) {
3355 		len = 0;
3356 		goto out_nobuffer;
3357 	}
3358 
3359 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3360 
3361 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3362 		goto out_put;
3363 
3364 	size = sizeof(*entry) + sizeof(u32) * len;
3365 	buffer = tr->array_buffer.buffer;
3366 	ring_buffer_nest_start(buffer);
3367 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3368 					    trace_ctx);
3369 	if (!event)
3370 		goto out;
3371 	entry = ring_buffer_event_data(event);
3372 	entry->ip			= ip;
3373 	entry->fmt			= fmt;
3374 
3375 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3376 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3377 		__buffer_unlock_commit(buffer, event);
3378 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3379 	}
3380 
3381 out:
3382 	ring_buffer_nest_end(buffer);
3383 out_put:
3384 	put_trace_buf();
3385 
3386 out_nobuffer:
3387 	preempt_enable_notrace();
3388 	unpause_graph_tracing();
3389 
3390 	return len;
3391 }
3392 EXPORT_SYMBOL_GPL(trace_vbprintk);
3393 
3394 __printf(3, 0)
3395 static int
3396 __trace_array_vprintk(struct trace_buffer *buffer,
3397 		      unsigned long ip, const char *fmt, va_list args)
3398 {
3399 	struct trace_event_call *call = &event_print;
3400 	struct ring_buffer_event *event;
3401 	int len = 0, size;
3402 	struct print_entry *entry;
3403 	unsigned int trace_ctx;
3404 	char *tbuffer;
3405 
3406 	if (tracing_disabled || tracing_selftest_running)
3407 		return 0;
3408 
3409 	/* Don't pollute graph traces with trace_vprintk internals */
3410 	pause_graph_tracing();
3411 
3412 	trace_ctx = tracing_gen_ctx();
3413 	preempt_disable_notrace();
3414 
3415 
3416 	tbuffer = get_trace_buf();
3417 	if (!tbuffer) {
3418 		len = 0;
3419 		goto out_nobuffer;
3420 	}
3421 
3422 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3423 
3424 	size = sizeof(*entry) + len + 1;
3425 	ring_buffer_nest_start(buffer);
3426 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3427 					    trace_ctx);
3428 	if (!event)
3429 		goto out;
3430 	entry = ring_buffer_event_data(event);
3431 	entry->ip = ip;
3432 
3433 	memcpy(&entry->buf, tbuffer, len + 1);
3434 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3435 		__buffer_unlock_commit(buffer, event);
3436 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3437 	}
3438 
3439 out:
3440 	ring_buffer_nest_end(buffer);
3441 	put_trace_buf();
3442 
3443 out_nobuffer:
3444 	preempt_enable_notrace();
3445 	unpause_graph_tracing();
3446 
3447 	return len;
3448 }
3449 
3450 __printf(3, 0)
3451 int trace_array_vprintk(struct trace_array *tr,
3452 			unsigned long ip, const char *fmt, va_list args)
3453 {
3454 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3455 }
3456 
3457 /**
3458  * trace_array_printk - Print a message to a specific instance
3459  * @tr: The instance trace_array descriptor
3460  * @ip: The instruction pointer that this is called from.
3461  * @fmt: The format to print (printf format)
3462  *
3463  * If a subsystem sets up its own instance, they have the right to
3464  * printk strings into their tracing instance buffer using this
3465  * function. Note, this function will not write into the top level
3466  * buffer (use trace_printk() for that), as writing into the top level
3467  * buffer should only have events that can be individually disabled.
3468  * trace_printk() is only used for debugging a kernel, and should not
3469  * be ever incorporated in normal use.
3470  *
3471  * trace_array_printk() can be used, as it will not add noise to the
3472  * top level tracing buffer.
3473  *
3474  * Note, trace_array_init_printk() must be called on @tr before this
3475  * can be used.
3476  */
3477 __printf(3, 0)
3478 int trace_array_printk(struct trace_array *tr,
3479 		       unsigned long ip, const char *fmt, ...)
3480 {
3481 	int ret;
3482 	va_list ap;
3483 
3484 	if (!tr)
3485 		return -ENOENT;
3486 
3487 	/* This is only allowed for created instances */
3488 	if (tr == &global_trace)
3489 		return 0;
3490 
3491 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3492 		return 0;
3493 
3494 	va_start(ap, fmt);
3495 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3496 	va_end(ap);
3497 	return ret;
3498 }
3499 EXPORT_SYMBOL_GPL(trace_array_printk);
3500 
3501 /**
3502  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3503  * @tr: The trace array to initialize the buffers for
3504  *
3505  * As trace_array_printk() only writes into instances, they are OK to
3506  * have in the kernel (unlike trace_printk()). This needs to be called
3507  * before trace_array_printk() can be used on a trace_array.
3508  */
3509 int trace_array_init_printk(struct trace_array *tr)
3510 {
3511 	if (!tr)
3512 		return -ENOENT;
3513 
3514 	/* This is only allowed for created instances */
3515 	if (tr == &global_trace)
3516 		return -EINVAL;
3517 
3518 	return alloc_percpu_trace_buffer();
3519 }
3520 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3521 
3522 __printf(3, 4)
3523 int trace_array_printk_buf(struct trace_buffer *buffer,
3524 			   unsigned long ip, const char *fmt, ...)
3525 {
3526 	int ret;
3527 	va_list ap;
3528 
3529 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3530 		return 0;
3531 
3532 	va_start(ap, fmt);
3533 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3534 	va_end(ap);
3535 	return ret;
3536 }
3537 
3538 __printf(2, 0)
3539 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3540 {
3541 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3542 }
3543 EXPORT_SYMBOL_GPL(trace_vprintk);
3544 
3545 static void trace_iterator_increment(struct trace_iterator *iter)
3546 {
3547 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3548 
3549 	iter->idx++;
3550 	if (buf_iter)
3551 		ring_buffer_iter_advance(buf_iter);
3552 }
3553 
3554 static struct trace_entry *
3555 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3556 		unsigned long *lost_events)
3557 {
3558 	struct ring_buffer_event *event;
3559 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3560 
3561 	if (buf_iter) {
3562 		event = ring_buffer_iter_peek(buf_iter, ts);
3563 		if (lost_events)
3564 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3565 				(unsigned long)-1 : 0;
3566 	} else {
3567 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3568 					 lost_events);
3569 	}
3570 
3571 	if (event) {
3572 		iter->ent_size = ring_buffer_event_length(event);
3573 		return ring_buffer_event_data(event);
3574 	}
3575 	iter->ent_size = 0;
3576 	return NULL;
3577 }
3578 
3579 static struct trace_entry *
3580 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3581 		  unsigned long *missing_events, u64 *ent_ts)
3582 {
3583 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3584 	struct trace_entry *ent, *next = NULL;
3585 	unsigned long lost_events = 0, next_lost = 0;
3586 	int cpu_file = iter->cpu_file;
3587 	u64 next_ts = 0, ts;
3588 	int next_cpu = -1;
3589 	int next_size = 0;
3590 	int cpu;
3591 
3592 	/*
3593 	 * If we are in a per_cpu trace file, don't bother by iterating over
3594 	 * all cpu and peek directly.
3595 	 */
3596 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3597 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3598 			return NULL;
3599 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3600 		if (ent_cpu)
3601 			*ent_cpu = cpu_file;
3602 
3603 		return ent;
3604 	}
3605 
3606 	for_each_tracing_cpu(cpu) {
3607 
3608 		if (ring_buffer_empty_cpu(buffer, cpu))
3609 			continue;
3610 
3611 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3612 
3613 		/*
3614 		 * Pick the entry with the smallest timestamp:
3615 		 */
3616 		if (ent && (!next || ts < next_ts)) {
3617 			next = ent;
3618 			next_cpu = cpu;
3619 			next_ts = ts;
3620 			next_lost = lost_events;
3621 			next_size = iter->ent_size;
3622 		}
3623 	}
3624 
3625 	iter->ent_size = next_size;
3626 
3627 	if (ent_cpu)
3628 		*ent_cpu = next_cpu;
3629 
3630 	if (ent_ts)
3631 		*ent_ts = next_ts;
3632 
3633 	if (missing_events)
3634 		*missing_events = next_lost;
3635 
3636 	return next;
3637 }
3638 
3639 #define STATIC_FMT_BUF_SIZE	128
3640 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3641 
3642 static char *trace_iter_expand_format(struct trace_iterator *iter)
3643 {
3644 	char *tmp;
3645 
3646 	/*
3647 	 * iter->tr is NULL when used with tp_printk, which makes
3648 	 * this get called where it is not safe to call krealloc().
3649 	 */
3650 	if (!iter->tr || iter->fmt == static_fmt_buf)
3651 		return NULL;
3652 
3653 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3654 		       GFP_KERNEL);
3655 	if (tmp) {
3656 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3657 		iter->fmt = tmp;
3658 	}
3659 
3660 	return tmp;
3661 }
3662 
3663 /* Returns true if the string is safe to dereference from an event */
3664 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3665 {
3666 	unsigned long addr = (unsigned long)str;
3667 	struct trace_event *trace_event;
3668 	struct trace_event_call *event;
3669 
3670 	/* OK if part of the event data */
3671 	if ((addr >= (unsigned long)iter->ent) &&
3672 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3673 		return true;
3674 
3675 	/* OK if part of the temp seq buffer */
3676 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3677 	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3678 		return true;
3679 
3680 	/* Core rodata can not be freed */
3681 	if (is_kernel_rodata(addr))
3682 		return true;
3683 
3684 	if (trace_is_tracepoint_string(str))
3685 		return true;
3686 
3687 	/*
3688 	 * Now this could be a module event, referencing core module
3689 	 * data, which is OK.
3690 	 */
3691 	if (!iter->ent)
3692 		return false;
3693 
3694 	trace_event = ftrace_find_event(iter->ent->type);
3695 	if (!trace_event)
3696 		return false;
3697 
3698 	event = container_of(trace_event, struct trace_event_call, event);
3699 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3700 		return false;
3701 
3702 	/* Would rather have rodata, but this will suffice */
3703 	if (within_module_core(addr, event->module))
3704 		return true;
3705 
3706 	return false;
3707 }
3708 
3709 static const char *show_buffer(struct trace_seq *s)
3710 {
3711 	struct seq_buf *seq = &s->seq;
3712 
3713 	seq_buf_terminate(seq);
3714 
3715 	return seq->buffer;
3716 }
3717 
3718 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3719 
3720 static int test_can_verify_check(const char *fmt, ...)
3721 {
3722 	char buf[16];
3723 	va_list ap;
3724 	int ret;
3725 
3726 	/*
3727 	 * The verifier is dependent on vsnprintf() modifies the va_list
3728 	 * passed to it, where it is sent as a reference. Some architectures
3729 	 * (like x86_32) passes it by value, which means that vsnprintf()
3730 	 * does not modify the va_list passed to it, and the verifier
3731 	 * would then need to be able to understand all the values that
3732 	 * vsnprintf can use. If it is passed by value, then the verifier
3733 	 * is disabled.
3734 	 */
3735 	va_start(ap, fmt);
3736 	vsnprintf(buf, 16, "%d", ap);
3737 	ret = va_arg(ap, int);
3738 	va_end(ap);
3739 
3740 	return ret;
3741 }
3742 
3743 static void test_can_verify(void)
3744 {
3745 	if (!test_can_verify_check("%d %d", 0, 1)) {
3746 		pr_info("trace event string verifier disabled\n");
3747 		static_branch_inc(&trace_no_verify);
3748 	}
3749 }
3750 
3751 /**
3752  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3753  * @iter: The iterator that holds the seq buffer and the event being printed
3754  * @fmt: The format used to print the event
3755  * @ap: The va_list holding the data to print from @fmt.
3756  *
3757  * This writes the data into the @iter->seq buffer using the data from
3758  * @fmt and @ap. If the format has a %s, then the source of the string
3759  * is examined to make sure it is safe to print, otherwise it will
3760  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3761  * pointer.
3762  */
3763 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3764 			 va_list ap)
3765 {
3766 	const char *p = fmt;
3767 	const char *str;
3768 	int i, j;
3769 
3770 	if (WARN_ON_ONCE(!fmt))
3771 		return;
3772 
3773 	if (static_branch_unlikely(&trace_no_verify))
3774 		goto print;
3775 
3776 	/* Don't bother checking when doing a ftrace_dump() */
3777 	if (iter->fmt == static_fmt_buf)
3778 		goto print;
3779 
3780 	while (*p) {
3781 		bool star = false;
3782 		int len = 0;
3783 
3784 		j = 0;
3785 
3786 		/* We only care about %s and variants */
3787 		for (i = 0; p[i]; i++) {
3788 			if (i + 1 >= iter->fmt_size) {
3789 				/*
3790 				 * If we can't expand the copy buffer,
3791 				 * just print it.
3792 				 */
3793 				if (!trace_iter_expand_format(iter))
3794 					goto print;
3795 			}
3796 
3797 			if (p[i] == '\\' && p[i+1]) {
3798 				i++;
3799 				continue;
3800 			}
3801 			if (p[i] == '%') {
3802 				/* Need to test cases like %08.*s */
3803 				for (j = 1; p[i+j]; j++) {
3804 					if (isdigit(p[i+j]) ||
3805 					    p[i+j] == '.')
3806 						continue;
3807 					if (p[i+j] == '*') {
3808 						star = true;
3809 						continue;
3810 					}
3811 					break;
3812 				}
3813 				if (p[i+j] == 's')
3814 					break;
3815 				star = false;
3816 			}
3817 			j = 0;
3818 		}
3819 		/* If no %s found then just print normally */
3820 		if (!p[i])
3821 			break;
3822 
3823 		/* Copy up to the %s, and print that */
3824 		strncpy(iter->fmt, p, i);
3825 		iter->fmt[i] = '\0';
3826 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3827 
3828 		/*
3829 		 * If iter->seq is full, the above call no longer guarantees
3830 		 * that ap is in sync with fmt processing, and further calls
3831 		 * to va_arg() can return wrong positional arguments.
3832 		 *
3833 		 * Ensure that ap is no longer used in this case.
3834 		 */
3835 		if (iter->seq.full) {
3836 			p = "";
3837 			break;
3838 		}
3839 
3840 		if (star)
3841 			len = va_arg(ap, int);
3842 
3843 		/* The ap now points to the string data of the %s */
3844 		str = va_arg(ap, const char *);
3845 
3846 		/*
3847 		 * If you hit this warning, it is likely that the
3848 		 * trace event in question used %s on a string that
3849 		 * was saved at the time of the event, but may not be
3850 		 * around when the trace is read. Use __string(),
3851 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3852 		 * instead. See samples/trace_events/trace-events-sample.h
3853 		 * for reference.
3854 		 */
3855 		if (WARN_ONCE(!trace_safe_str(iter, str),
3856 			      "fmt: '%s' current_buffer: '%s'",
3857 			      fmt, show_buffer(&iter->seq))) {
3858 			int ret;
3859 
3860 			/* Try to safely read the string */
3861 			if (star) {
3862 				if (len + 1 > iter->fmt_size)
3863 					len = iter->fmt_size - 1;
3864 				if (len < 0)
3865 					len = 0;
3866 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3867 				iter->fmt[len] = 0;
3868 				star = false;
3869 			} else {
3870 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3871 								  iter->fmt_size);
3872 			}
3873 			if (ret < 0)
3874 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3875 			else
3876 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3877 						 str, iter->fmt);
3878 			str = "[UNSAFE-MEMORY]";
3879 			strcpy(iter->fmt, "%s");
3880 		} else {
3881 			strncpy(iter->fmt, p + i, j + 1);
3882 			iter->fmt[j+1] = '\0';
3883 		}
3884 		if (star)
3885 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
3886 		else
3887 			trace_seq_printf(&iter->seq, iter->fmt, str);
3888 
3889 		p += i + j + 1;
3890 	}
3891  print:
3892 	if (*p)
3893 		trace_seq_vprintf(&iter->seq, p, ap);
3894 }
3895 
3896 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3897 {
3898 	const char *p, *new_fmt;
3899 	char *q;
3900 
3901 	if (WARN_ON_ONCE(!fmt))
3902 		return fmt;
3903 
3904 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3905 		return fmt;
3906 
3907 	p = fmt;
3908 	new_fmt = q = iter->fmt;
3909 	while (*p) {
3910 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3911 			if (!trace_iter_expand_format(iter))
3912 				return fmt;
3913 
3914 			q += iter->fmt - new_fmt;
3915 			new_fmt = iter->fmt;
3916 		}
3917 
3918 		*q++ = *p++;
3919 
3920 		/* Replace %p with %px */
3921 		if (p[-1] == '%') {
3922 			if (p[0] == '%') {
3923 				*q++ = *p++;
3924 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3925 				*q++ = *p++;
3926 				*q++ = 'x';
3927 			}
3928 		}
3929 	}
3930 	*q = '\0';
3931 
3932 	return new_fmt;
3933 }
3934 
3935 #define STATIC_TEMP_BUF_SIZE	128
3936 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3937 
3938 /* Find the next real entry, without updating the iterator itself */
3939 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3940 					  int *ent_cpu, u64 *ent_ts)
3941 {
3942 	/* __find_next_entry will reset ent_size */
3943 	int ent_size = iter->ent_size;
3944 	struct trace_entry *entry;
3945 
3946 	/*
3947 	 * If called from ftrace_dump(), then the iter->temp buffer
3948 	 * will be the static_temp_buf and not created from kmalloc.
3949 	 * If the entry size is greater than the buffer, we can
3950 	 * not save it. Just return NULL in that case. This is only
3951 	 * used to add markers when two consecutive events' time
3952 	 * stamps have a large delta. See trace_print_lat_context()
3953 	 */
3954 	if (iter->temp == static_temp_buf &&
3955 	    STATIC_TEMP_BUF_SIZE < ent_size)
3956 		return NULL;
3957 
3958 	/*
3959 	 * The __find_next_entry() may call peek_next_entry(), which may
3960 	 * call ring_buffer_peek() that may make the contents of iter->ent
3961 	 * undefined. Need to copy iter->ent now.
3962 	 */
3963 	if (iter->ent && iter->ent != iter->temp) {
3964 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3965 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3966 			void *temp;
3967 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3968 			if (!temp)
3969 				return NULL;
3970 			kfree(iter->temp);
3971 			iter->temp = temp;
3972 			iter->temp_size = iter->ent_size;
3973 		}
3974 		memcpy(iter->temp, iter->ent, iter->ent_size);
3975 		iter->ent = iter->temp;
3976 	}
3977 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3978 	/* Put back the original ent_size */
3979 	iter->ent_size = ent_size;
3980 
3981 	return entry;
3982 }
3983 
3984 /* Find the next real entry, and increment the iterator to the next entry */
3985 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3986 {
3987 	iter->ent = __find_next_entry(iter, &iter->cpu,
3988 				      &iter->lost_events, &iter->ts);
3989 
3990 	if (iter->ent)
3991 		trace_iterator_increment(iter);
3992 
3993 	return iter->ent ? iter : NULL;
3994 }
3995 
3996 static void trace_consume(struct trace_iterator *iter)
3997 {
3998 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3999 			    &iter->lost_events);
4000 }
4001 
4002 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4003 {
4004 	struct trace_iterator *iter = m->private;
4005 	int i = (int)*pos;
4006 	void *ent;
4007 
4008 	WARN_ON_ONCE(iter->leftover);
4009 
4010 	(*pos)++;
4011 
4012 	/* can't go backwards */
4013 	if (iter->idx > i)
4014 		return NULL;
4015 
4016 	if (iter->idx < 0)
4017 		ent = trace_find_next_entry_inc(iter);
4018 	else
4019 		ent = iter;
4020 
4021 	while (ent && iter->idx < i)
4022 		ent = trace_find_next_entry_inc(iter);
4023 
4024 	iter->pos = *pos;
4025 
4026 	return ent;
4027 }
4028 
4029 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4030 {
4031 	struct ring_buffer_iter *buf_iter;
4032 	unsigned long entries = 0;
4033 	u64 ts;
4034 
4035 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4036 
4037 	buf_iter = trace_buffer_iter(iter, cpu);
4038 	if (!buf_iter)
4039 		return;
4040 
4041 	ring_buffer_iter_reset(buf_iter);
4042 
4043 	/*
4044 	 * We could have the case with the max latency tracers
4045 	 * that a reset never took place on a cpu. This is evident
4046 	 * by the timestamp being before the start of the buffer.
4047 	 */
4048 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4049 		if (ts >= iter->array_buffer->time_start)
4050 			break;
4051 		entries++;
4052 		ring_buffer_iter_advance(buf_iter);
4053 	}
4054 
4055 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4056 }
4057 
4058 /*
4059  * The current tracer is copied to avoid a global locking
4060  * all around.
4061  */
4062 static void *s_start(struct seq_file *m, loff_t *pos)
4063 {
4064 	struct trace_iterator *iter = m->private;
4065 	struct trace_array *tr = iter->tr;
4066 	int cpu_file = iter->cpu_file;
4067 	void *p = NULL;
4068 	loff_t l = 0;
4069 	int cpu;
4070 
4071 	/*
4072 	 * copy the tracer to avoid using a global lock all around.
4073 	 * iter->trace is a copy of current_trace, the pointer to the
4074 	 * name may be used instead of a strcmp(), as iter->trace->name
4075 	 * will point to the same string as current_trace->name.
4076 	 */
4077 	mutex_lock(&trace_types_lock);
4078 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4079 		*iter->trace = *tr->current_trace;
4080 	mutex_unlock(&trace_types_lock);
4081 
4082 #ifdef CONFIG_TRACER_MAX_TRACE
4083 	if (iter->snapshot && iter->trace->use_max_tr)
4084 		return ERR_PTR(-EBUSY);
4085 #endif
4086 
4087 	if (*pos != iter->pos) {
4088 		iter->ent = NULL;
4089 		iter->cpu = 0;
4090 		iter->idx = -1;
4091 
4092 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4093 			for_each_tracing_cpu(cpu)
4094 				tracing_iter_reset(iter, cpu);
4095 		} else
4096 			tracing_iter_reset(iter, cpu_file);
4097 
4098 		iter->leftover = 0;
4099 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4100 			;
4101 
4102 	} else {
4103 		/*
4104 		 * If we overflowed the seq_file before, then we want
4105 		 * to just reuse the trace_seq buffer again.
4106 		 */
4107 		if (iter->leftover)
4108 			p = iter;
4109 		else {
4110 			l = *pos - 1;
4111 			p = s_next(m, p, &l);
4112 		}
4113 	}
4114 
4115 	trace_event_read_lock();
4116 	trace_access_lock(cpu_file);
4117 	return p;
4118 }
4119 
4120 static void s_stop(struct seq_file *m, void *p)
4121 {
4122 	struct trace_iterator *iter = m->private;
4123 
4124 #ifdef CONFIG_TRACER_MAX_TRACE
4125 	if (iter->snapshot && iter->trace->use_max_tr)
4126 		return;
4127 #endif
4128 
4129 	trace_access_unlock(iter->cpu_file);
4130 	trace_event_read_unlock();
4131 }
4132 
4133 static void
4134 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4135 		      unsigned long *entries, int cpu)
4136 {
4137 	unsigned long count;
4138 
4139 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4140 	/*
4141 	 * If this buffer has skipped entries, then we hold all
4142 	 * entries for the trace and we need to ignore the
4143 	 * ones before the time stamp.
4144 	 */
4145 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4146 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4147 		/* total is the same as the entries */
4148 		*total = count;
4149 	} else
4150 		*total = count +
4151 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4152 	*entries = count;
4153 }
4154 
4155 static void
4156 get_total_entries(struct array_buffer *buf,
4157 		  unsigned long *total, unsigned long *entries)
4158 {
4159 	unsigned long t, e;
4160 	int cpu;
4161 
4162 	*total = 0;
4163 	*entries = 0;
4164 
4165 	for_each_tracing_cpu(cpu) {
4166 		get_total_entries_cpu(buf, &t, &e, cpu);
4167 		*total += t;
4168 		*entries += e;
4169 	}
4170 }
4171 
4172 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4173 {
4174 	unsigned long total, entries;
4175 
4176 	if (!tr)
4177 		tr = &global_trace;
4178 
4179 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4180 
4181 	return entries;
4182 }
4183 
4184 unsigned long trace_total_entries(struct trace_array *tr)
4185 {
4186 	unsigned long total, entries;
4187 
4188 	if (!tr)
4189 		tr = &global_trace;
4190 
4191 	get_total_entries(&tr->array_buffer, &total, &entries);
4192 
4193 	return entries;
4194 }
4195 
4196 static void print_lat_help_header(struct seq_file *m)
4197 {
4198 	seq_puts(m, "#                    _------=> CPU#            \n"
4199 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4200 		    "#                  | / _----=> need-resched    \n"
4201 		    "#                  || / _---=> hardirq/softirq \n"
4202 		    "#                  ||| / _--=> preempt-depth   \n"
4203 		    "#                  |||| / _-=> migrate-disable \n"
4204 		    "#                  ||||| /     delay           \n"
4205 		    "#  cmd     pid     |||||| time  |   caller     \n"
4206 		    "#     \\   /        ||||||  \\    |    /       \n");
4207 }
4208 
4209 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4210 {
4211 	unsigned long total;
4212 	unsigned long entries;
4213 
4214 	get_total_entries(buf, &total, &entries);
4215 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4216 		   entries, total, num_online_cpus());
4217 	seq_puts(m, "#\n");
4218 }
4219 
4220 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4221 				   unsigned int flags)
4222 {
4223 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4224 
4225 	print_event_info(buf, m);
4226 
4227 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4228 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4229 }
4230 
4231 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4232 				       unsigned int flags)
4233 {
4234 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4235 	const char *space = "            ";
4236 	int prec = tgid ? 12 : 2;
4237 
4238 	print_event_info(buf, m);
4239 
4240 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4241 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4242 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4243 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4244 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4245 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4246 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4247 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4248 }
4249 
4250 void
4251 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4252 {
4253 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4254 	struct array_buffer *buf = iter->array_buffer;
4255 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4256 	struct tracer *type = iter->trace;
4257 	unsigned long entries;
4258 	unsigned long total;
4259 	const char *name = "preemption";
4260 
4261 	name = type->name;
4262 
4263 	get_total_entries(buf, &total, &entries);
4264 
4265 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4266 		   name, UTS_RELEASE);
4267 	seq_puts(m, "# -----------------------------------"
4268 		 "---------------------------------\n");
4269 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4270 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4271 		   nsecs_to_usecs(data->saved_latency),
4272 		   entries,
4273 		   total,
4274 		   buf->cpu,
4275 #if defined(CONFIG_PREEMPT_NONE)
4276 		   "server",
4277 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4278 		   "desktop",
4279 #elif defined(CONFIG_PREEMPT)
4280 		   "preempt",
4281 #elif defined(CONFIG_PREEMPT_RT)
4282 		   "preempt_rt",
4283 #else
4284 		   "unknown",
4285 #endif
4286 		   /* These are reserved for later use */
4287 		   0, 0, 0, 0);
4288 #ifdef CONFIG_SMP
4289 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4290 #else
4291 	seq_puts(m, ")\n");
4292 #endif
4293 	seq_puts(m, "#    -----------------\n");
4294 	seq_printf(m, "#    | task: %.16s-%d "
4295 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4296 		   data->comm, data->pid,
4297 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4298 		   data->policy, data->rt_priority);
4299 	seq_puts(m, "#    -----------------\n");
4300 
4301 	if (data->critical_start) {
4302 		seq_puts(m, "#  => started at: ");
4303 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4304 		trace_print_seq(m, &iter->seq);
4305 		seq_puts(m, "\n#  => ended at:   ");
4306 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4307 		trace_print_seq(m, &iter->seq);
4308 		seq_puts(m, "\n#\n");
4309 	}
4310 
4311 	seq_puts(m, "#\n");
4312 }
4313 
4314 static void test_cpu_buff_start(struct trace_iterator *iter)
4315 {
4316 	struct trace_seq *s = &iter->seq;
4317 	struct trace_array *tr = iter->tr;
4318 
4319 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4320 		return;
4321 
4322 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4323 		return;
4324 
4325 	if (cpumask_available(iter->started) &&
4326 	    cpumask_test_cpu(iter->cpu, iter->started))
4327 		return;
4328 
4329 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4330 		return;
4331 
4332 	if (cpumask_available(iter->started))
4333 		cpumask_set_cpu(iter->cpu, iter->started);
4334 
4335 	/* Don't print started cpu buffer for the first entry of the trace */
4336 	if (iter->idx > 1)
4337 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4338 				iter->cpu);
4339 }
4340 
4341 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4342 {
4343 	struct trace_array *tr = iter->tr;
4344 	struct trace_seq *s = &iter->seq;
4345 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4346 	struct trace_entry *entry;
4347 	struct trace_event *event;
4348 
4349 	entry = iter->ent;
4350 
4351 	test_cpu_buff_start(iter);
4352 
4353 	event = ftrace_find_event(entry->type);
4354 
4355 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4356 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4357 			trace_print_lat_context(iter);
4358 		else
4359 			trace_print_context(iter);
4360 	}
4361 
4362 	if (trace_seq_has_overflowed(s))
4363 		return TRACE_TYPE_PARTIAL_LINE;
4364 
4365 	if (event)
4366 		return event->funcs->trace(iter, sym_flags, event);
4367 
4368 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4369 
4370 	return trace_handle_return(s);
4371 }
4372 
4373 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4374 {
4375 	struct trace_array *tr = iter->tr;
4376 	struct trace_seq *s = &iter->seq;
4377 	struct trace_entry *entry;
4378 	struct trace_event *event;
4379 
4380 	entry = iter->ent;
4381 
4382 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4383 		trace_seq_printf(s, "%d %d %llu ",
4384 				 entry->pid, iter->cpu, iter->ts);
4385 
4386 	if (trace_seq_has_overflowed(s))
4387 		return TRACE_TYPE_PARTIAL_LINE;
4388 
4389 	event = ftrace_find_event(entry->type);
4390 	if (event)
4391 		return event->funcs->raw(iter, 0, event);
4392 
4393 	trace_seq_printf(s, "%d ?\n", entry->type);
4394 
4395 	return trace_handle_return(s);
4396 }
4397 
4398 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4399 {
4400 	struct trace_array *tr = iter->tr;
4401 	struct trace_seq *s = &iter->seq;
4402 	unsigned char newline = '\n';
4403 	struct trace_entry *entry;
4404 	struct trace_event *event;
4405 
4406 	entry = iter->ent;
4407 
4408 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4409 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4410 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4411 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4412 		if (trace_seq_has_overflowed(s))
4413 			return TRACE_TYPE_PARTIAL_LINE;
4414 	}
4415 
4416 	event = ftrace_find_event(entry->type);
4417 	if (event) {
4418 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4419 		if (ret != TRACE_TYPE_HANDLED)
4420 			return ret;
4421 	}
4422 
4423 	SEQ_PUT_FIELD(s, newline);
4424 
4425 	return trace_handle_return(s);
4426 }
4427 
4428 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4429 {
4430 	struct trace_array *tr = iter->tr;
4431 	struct trace_seq *s = &iter->seq;
4432 	struct trace_entry *entry;
4433 	struct trace_event *event;
4434 
4435 	entry = iter->ent;
4436 
4437 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4438 		SEQ_PUT_FIELD(s, entry->pid);
4439 		SEQ_PUT_FIELD(s, iter->cpu);
4440 		SEQ_PUT_FIELD(s, iter->ts);
4441 		if (trace_seq_has_overflowed(s))
4442 			return TRACE_TYPE_PARTIAL_LINE;
4443 	}
4444 
4445 	event = ftrace_find_event(entry->type);
4446 	return event ? event->funcs->binary(iter, 0, event) :
4447 		TRACE_TYPE_HANDLED;
4448 }
4449 
4450 int trace_empty(struct trace_iterator *iter)
4451 {
4452 	struct ring_buffer_iter *buf_iter;
4453 	int cpu;
4454 
4455 	/* If we are looking at one CPU buffer, only check that one */
4456 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4457 		cpu = iter->cpu_file;
4458 		buf_iter = trace_buffer_iter(iter, cpu);
4459 		if (buf_iter) {
4460 			if (!ring_buffer_iter_empty(buf_iter))
4461 				return 0;
4462 		} else {
4463 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4464 				return 0;
4465 		}
4466 		return 1;
4467 	}
4468 
4469 	for_each_tracing_cpu(cpu) {
4470 		buf_iter = trace_buffer_iter(iter, cpu);
4471 		if (buf_iter) {
4472 			if (!ring_buffer_iter_empty(buf_iter))
4473 				return 0;
4474 		} else {
4475 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4476 				return 0;
4477 		}
4478 	}
4479 
4480 	return 1;
4481 }
4482 
4483 /*  Called with trace_event_read_lock() held. */
4484 enum print_line_t print_trace_line(struct trace_iterator *iter)
4485 {
4486 	struct trace_array *tr = iter->tr;
4487 	unsigned long trace_flags = tr->trace_flags;
4488 	enum print_line_t ret;
4489 
4490 	if (iter->lost_events) {
4491 		if (iter->lost_events == (unsigned long)-1)
4492 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4493 					 iter->cpu);
4494 		else
4495 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4496 					 iter->cpu, iter->lost_events);
4497 		if (trace_seq_has_overflowed(&iter->seq))
4498 			return TRACE_TYPE_PARTIAL_LINE;
4499 	}
4500 
4501 	if (iter->trace && iter->trace->print_line) {
4502 		ret = iter->trace->print_line(iter);
4503 		if (ret != TRACE_TYPE_UNHANDLED)
4504 			return ret;
4505 	}
4506 
4507 	if (iter->ent->type == TRACE_BPUTS &&
4508 			trace_flags & TRACE_ITER_PRINTK &&
4509 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4510 		return trace_print_bputs_msg_only(iter);
4511 
4512 	if (iter->ent->type == TRACE_BPRINT &&
4513 			trace_flags & TRACE_ITER_PRINTK &&
4514 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4515 		return trace_print_bprintk_msg_only(iter);
4516 
4517 	if (iter->ent->type == TRACE_PRINT &&
4518 			trace_flags & TRACE_ITER_PRINTK &&
4519 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4520 		return trace_print_printk_msg_only(iter);
4521 
4522 	if (trace_flags & TRACE_ITER_BIN)
4523 		return print_bin_fmt(iter);
4524 
4525 	if (trace_flags & TRACE_ITER_HEX)
4526 		return print_hex_fmt(iter);
4527 
4528 	if (trace_flags & TRACE_ITER_RAW)
4529 		return print_raw_fmt(iter);
4530 
4531 	return print_trace_fmt(iter);
4532 }
4533 
4534 void trace_latency_header(struct seq_file *m)
4535 {
4536 	struct trace_iterator *iter = m->private;
4537 	struct trace_array *tr = iter->tr;
4538 
4539 	/* print nothing if the buffers are empty */
4540 	if (trace_empty(iter))
4541 		return;
4542 
4543 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4544 		print_trace_header(m, iter);
4545 
4546 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4547 		print_lat_help_header(m);
4548 }
4549 
4550 void trace_default_header(struct seq_file *m)
4551 {
4552 	struct trace_iterator *iter = m->private;
4553 	struct trace_array *tr = iter->tr;
4554 	unsigned long trace_flags = tr->trace_flags;
4555 
4556 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4557 		return;
4558 
4559 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4560 		/* print nothing if the buffers are empty */
4561 		if (trace_empty(iter))
4562 			return;
4563 		print_trace_header(m, iter);
4564 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4565 			print_lat_help_header(m);
4566 	} else {
4567 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4568 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4569 				print_func_help_header_irq(iter->array_buffer,
4570 							   m, trace_flags);
4571 			else
4572 				print_func_help_header(iter->array_buffer, m,
4573 						       trace_flags);
4574 		}
4575 	}
4576 }
4577 
4578 static void test_ftrace_alive(struct seq_file *m)
4579 {
4580 	if (!ftrace_is_dead())
4581 		return;
4582 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4583 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4584 }
4585 
4586 #ifdef CONFIG_TRACER_MAX_TRACE
4587 static void show_snapshot_main_help(struct seq_file *m)
4588 {
4589 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4590 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4591 		    "#                      Takes a snapshot of the main buffer.\n"
4592 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4593 		    "#                      (Doesn't have to be '2' works with any number that\n"
4594 		    "#                       is not a '0' or '1')\n");
4595 }
4596 
4597 static void show_snapshot_percpu_help(struct seq_file *m)
4598 {
4599 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4600 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4601 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4602 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4603 #else
4604 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4605 		    "#                     Must use main snapshot file to allocate.\n");
4606 #endif
4607 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4608 		    "#                      (Doesn't have to be '2' works with any number that\n"
4609 		    "#                       is not a '0' or '1')\n");
4610 }
4611 
4612 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4613 {
4614 	if (iter->tr->allocated_snapshot)
4615 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4616 	else
4617 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4618 
4619 	seq_puts(m, "# Snapshot commands:\n");
4620 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4621 		show_snapshot_main_help(m);
4622 	else
4623 		show_snapshot_percpu_help(m);
4624 }
4625 #else
4626 /* Should never be called */
4627 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4628 #endif
4629 
4630 static int s_show(struct seq_file *m, void *v)
4631 {
4632 	struct trace_iterator *iter = v;
4633 	int ret;
4634 
4635 	if (iter->ent == NULL) {
4636 		if (iter->tr) {
4637 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4638 			seq_puts(m, "#\n");
4639 			test_ftrace_alive(m);
4640 		}
4641 		if (iter->snapshot && trace_empty(iter))
4642 			print_snapshot_help(m, iter);
4643 		else if (iter->trace && iter->trace->print_header)
4644 			iter->trace->print_header(m);
4645 		else
4646 			trace_default_header(m);
4647 
4648 	} else if (iter->leftover) {
4649 		/*
4650 		 * If we filled the seq_file buffer earlier, we
4651 		 * want to just show it now.
4652 		 */
4653 		ret = trace_print_seq(m, &iter->seq);
4654 
4655 		/* ret should this time be zero, but you never know */
4656 		iter->leftover = ret;
4657 
4658 	} else {
4659 		print_trace_line(iter);
4660 		ret = trace_print_seq(m, &iter->seq);
4661 		/*
4662 		 * If we overflow the seq_file buffer, then it will
4663 		 * ask us for this data again at start up.
4664 		 * Use that instead.
4665 		 *  ret is 0 if seq_file write succeeded.
4666 		 *        -1 otherwise.
4667 		 */
4668 		iter->leftover = ret;
4669 	}
4670 
4671 	return 0;
4672 }
4673 
4674 /*
4675  * Should be used after trace_array_get(), trace_types_lock
4676  * ensures that i_cdev was already initialized.
4677  */
4678 static inline int tracing_get_cpu(struct inode *inode)
4679 {
4680 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4681 		return (long)inode->i_cdev - 1;
4682 	return RING_BUFFER_ALL_CPUS;
4683 }
4684 
4685 static const struct seq_operations tracer_seq_ops = {
4686 	.start		= s_start,
4687 	.next		= s_next,
4688 	.stop		= s_stop,
4689 	.show		= s_show,
4690 };
4691 
4692 static struct trace_iterator *
4693 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4694 {
4695 	struct trace_array *tr = inode->i_private;
4696 	struct trace_iterator *iter;
4697 	int cpu;
4698 
4699 	if (tracing_disabled)
4700 		return ERR_PTR(-ENODEV);
4701 
4702 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4703 	if (!iter)
4704 		return ERR_PTR(-ENOMEM);
4705 
4706 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4707 				    GFP_KERNEL);
4708 	if (!iter->buffer_iter)
4709 		goto release;
4710 
4711 	/*
4712 	 * trace_find_next_entry() may need to save off iter->ent.
4713 	 * It will place it into the iter->temp buffer. As most
4714 	 * events are less than 128, allocate a buffer of that size.
4715 	 * If one is greater, then trace_find_next_entry() will
4716 	 * allocate a new buffer to adjust for the bigger iter->ent.
4717 	 * It's not critical if it fails to get allocated here.
4718 	 */
4719 	iter->temp = kmalloc(128, GFP_KERNEL);
4720 	if (iter->temp)
4721 		iter->temp_size = 128;
4722 
4723 	/*
4724 	 * trace_event_printf() may need to modify given format
4725 	 * string to replace %p with %px so that it shows real address
4726 	 * instead of hash value. However, that is only for the event
4727 	 * tracing, other tracer may not need. Defer the allocation
4728 	 * until it is needed.
4729 	 */
4730 	iter->fmt = NULL;
4731 	iter->fmt_size = 0;
4732 
4733 	/*
4734 	 * We make a copy of the current tracer to avoid concurrent
4735 	 * changes on it while we are reading.
4736 	 */
4737 	mutex_lock(&trace_types_lock);
4738 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4739 	if (!iter->trace)
4740 		goto fail;
4741 
4742 	*iter->trace = *tr->current_trace;
4743 
4744 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4745 		goto fail;
4746 
4747 	iter->tr = tr;
4748 
4749 #ifdef CONFIG_TRACER_MAX_TRACE
4750 	/* Currently only the top directory has a snapshot */
4751 	if (tr->current_trace->print_max || snapshot)
4752 		iter->array_buffer = &tr->max_buffer;
4753 	else
4754 #endif
4755 		iter->array_buffer = &tr->array_buffer;
4756 	iter->snapshot = snapshot;
4757 	iter->pos = -1;
4758 	iter->cpu_file = tracing_get_cpu(inode);
4759 	mutex_init(&iter->mutex);
4760 
4761 	/* Notify the tracer early; before we stop tracing. */
4762 	if (iter->trace->open)
4763 		iter->trace->open(iter);
4764 
4765 	/* Annotate start of buffers if we had overruns */
4766 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4767 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4768 
4769 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4770 	if (trace_clocks[tr->clock_id].in_ns)
4771 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4772 
4773 	/*
4774 	 * If pause-on-trace is enabled, then stop the trace while
4775 	 * dumping, unless this is the "snapshot" file
4776 	 */
4777 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4778 		tracing_stop_tr(tr);
4779 
4780 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4781 		for_each_tracing_cpu(cpu) {
4782 			iter->buffer_iter[cpu] =
4783 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4784 							 cpu, GFP_KERNEL);
4785 		}
4786 		ring_buffer_read_prepare_sync();
4787 		for_each_tracing_cpu(cpu) {
4788 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4789 			tracing_iter_reset(iter, cpu);
4790 		}
4791 	} else {
4792 		cpu = iter->cpu_file;
4793 		iter->buffer_iter[cpu] =
4794 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4795 						 cpu, GFP_KERNEL);
4796 		ring_buffer_read_prepare_sync();
4797 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4798 		tracing_iter_reset(iter, cpu);
4799 	}
4800 
4801 	mutex_unlock(&trace_types_lock);
4802 
4803 	return iter;
4804 
4805  fail:
4806 	mutex_unlock(&trace_types_lock);
4807 	kfree(iter->trace);
4808 	kfree(iter->temp);
4809 	kfree(iter->buffer_iter);
4810 release:
4811 	seq_release_private(inode, file);
4812 	return ERR_PTR(-ENOMEM);
4813 }
4814 
4815 int tracing_open_generic(struct inode *inode, struct file *filp)
4816 {
4817 	int ret;
4818 
4819 	ret = tracing_check_open_get_tr(NULL);
4820 	if (ret)
4821 		return ret;
4822 
4823 	filp->private_data = inode->i_private;
4824 	return 0;
4825 }
4826 
4827 bool tracing_is_disabled(void)
4828 {
4829 	return (tracing_disabled) ? true: false;
4830 }
4831 
4832 /*
4833  * Open and update trace_array ref count.
4834  * Must have the current trace_array passed to it.
4835  */
4836 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4837 {
4838 	struct trace_array *tr = inode->i_private;
4839 	int ret;
4840 
4841 	ret = tracing_check_open_get_tr(tr);
4842 	if (ret)
4843 		return ret;
4844 
4845 	filp->private_data = inode->i_private;
4846 
4847 	return 0;
4848 }
4849 
4850 static int tracing_mark_open(struct inode *inode, struct file *filp)
4851 {
4852 	stream_open(inode, filp);
4853 	return tracing_open_generic_tr(inode, filp);
4854 }
4855 
4856 static int tracing_release(struct inode *inode, struct file *file)
4857 {
4858 	struct trace_array *tr = inode->i_private;
4859 	struct seq_file *m = file->private_data;
4860 	struct trace_iterator *iter;
4861 	int cpu;
4862 
4863 	if (!(file->f_mode & FMODE_READ)) {
4864 		trace_array_put(tr);
4865 		return 0;
4866 	}
4867 
4868 	/* Writes do not use seq_file */
4869 	iter = m->private;
4870 	mutex_lock(&trace_types_lock);
4871 
4872 	for_each_tracing_cpu(cpu) {
4873 		if (iter->buffer_iter[cpu])
4874 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4875 	}
4876 
4877 	if (iter->trace && iter->trace->close)
4878 		iter->trace->close(iter);
4879 
4880 	if (!iter->snapshot && tr->stop_count)
4881 		/* reenable tracing if it was previously enabled */
4882 		tracing_start_tr(tr);
4883 
4884 	__trace_array_put(tr);
4885 
4886 	mutex_unlock(&trace_types_lock);
4887 
4888 	mutex_destroy(&iter->mutex);
4889 	free_cpumask_var(iter->started);
4890 	kfree(iter->fmt);
4891 	kfree(iter->temp);
4892 	kfree(iter->trace);
4893 	kfree(iter->buffer_iter);
4894 	seq_release_private(inode, file);
4895 
4896 	return 0;
4897 }
4898 
4899 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4900 {
4901 	struct trace_array *tr = inode->i_private;
4902 
4903 	trace_array_put(tr);
4904 	return 0;
4905 }
4906 
4907 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4908 {
4909 	struct trace_array *tr = inode->i_private;
4910 
4911 	trace_array_put(tr);
4912 
4913 	return single_release(inode, file);
4914 }
4915 
4916 static int tracing_open(struct inode *inode, struct file *file)
4917 {
4918 	struct trace_array *tr = inode->i_private;
4919 	struct trace_iterator *iter;
4920 	int ret;
4921 
4922 	ret = tracing_check_open_get_tr(tr);
4923 	if (ret)
4924 		return ret;
4925 
4926 	/* If this file was open for write, then erase contents */
4927 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4928 		int cpu = tracing_get_cpu(inode);
4929 		struct array_buffer *trace_buf = &tr->array_buffer;
4930 
4931 #ifdef CONFIG_TRACER_MAX_TRACE
4932 		if (tr->current_trace->print_max)
4933 			trace_buf = &tr->max_buffer;
4934 #endif
4935 
4936 		if (cpu == RING_BUFFER_ALL_CPUS)
4937 			tracing_reset_online_cpus(trace_buf);
4938 		else
4939 			tracing_reset_cpu(trace_buf, cpu);
4940 	}
4941 
4942 	if (file->f_mode & FMODE_READ) {
4943 		iter = __tracing_open(inode, file, false);
4944 		if (IS_ERR(iter))
4945 			ret = PTR_ERR(iter);
4946 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4947 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4948 	}
4949 
4950 	if (ret < 0)
4951 		trace_array_put(tr);
4952 
4953 	return ret;
4954 }
4955 
4956 /*
4957  * Some tracers are not suitable for instance buffers.
4958  * A tracer is always available for the global array (toplevel)
4959  * or if it explicitly states that it is.
4960  */
4961 static bool
4962 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4963 {
4964 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4965 }
4966 
4967 /* Find the next tracer that this trace array may use */
4968 static struct tracer *
4969 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4970 {
4971 	while (t && !trace_ok_for_array(t, tr))
4972 		t = t->next;
4973 
4974 	return t;
4975 }
4976 
4977 static void *
4978 t_next(struct seq_file *m, void *v, loff_t *pos)
4979 {
4980 	struct trace_array *tr = m->private;
4981 	struct tracer *t = v;
4982 
4983 	(*pos)++;
4984 
4985 	if (t)
4986 		t = get_tracer_for_array(tr, t->next);
4987 
4988 	return t;
4989 }
4990 
4991 static void *t_start(struct seq_file *m, loff_t *pos)
4992 {
4993 	struct trace_array *tr = m->private;
4994 	struct tracer *t;
4995 	loff_t l = 0;
4996 
4997 	mutex_lock(&trace_types_lock);
4998 
4999 	t = get_tracer_for_array(tr, trace_types);
5000 	for (; t && l < *pos; t = t_next(m, t, &l))
5001 			;
5002 
5003 	return t;
5004 }
5005 
5006 static void t_stop(struct seq_file *m, void *p)
5007 {
5008 	mutex_unlock(&trace_types_lock);
5009 }
5010 
5011 static int t_show(struct seq_file *m, void *v)
5012 {
5013 	struct tracer *t = v;
5014 
5015 	if (!t)
5016 		return 0;
5017 
5018 	seq_puts(m, t->name);
5019 	if (t->next)
5020 		seq_putc(m, ' ');
5021 	else
5022 		seq_putc(m, '\n');
5023 
5024 	return 0;
5025 }
5026 
5027 static const struct seq_operations show_traces_seq_ops = {
5028 	.start		= t_start,
5029 	.next		= t_next,
5030 	.stop		= t_stop,
5031 	.show		= t_show,
5032 };
5033 
5034 static int show_traces_open(struct inode *inode, struct file *file)
5035 {
5036 	struct trace_array *tr = inode->i_private;
5037 	struct seq_file *m;
5038 	int ret;
5039 
5040 	ret = tracing_check_open_get_tr(tr);
5041 	if (ret)
5042 		return ret;
5043 
5044 	ret = seq_open(file, &show_traces_seq_ops);
5045 	if (ret) {
5046 		trace_array_put(tr);
5047 		return ret;
5048 	}
5049 
5050 	m = file->private_data;
5051 	m->private = tr;
5052 
5053 	return 0;
5054 }
5055 
5056 static int show_traces_release(struct inode *inode, struct file *file)
5057 {
5058 	struct trace_array *tr = inode->i_private;
5059 
5060 	trace_array_put(tr);
5061 	return seq_release(inode, file);
5062 }
5063 
5064 static ssize_t
5065 tracing_write_stub(struct file *filp, const char __user *ubuf,
5066 		   size_t count, loff_t *ppos)
5067 {
5068 	return count;
5069 }
5070 
5071 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5072 {
5073 	int ret;
5074 
5075 	if (file->f_mode & FMODE_READ)
5076 		ret = seq_lseek(file, offset, whence);
5077 	else
5078 		file->f_pos = ret = 0;
5079 
5080 	return ret;
5081 }
5082 
5083 static const struct file_operations tracing_fops = {
5084 	.open		= tracing_open,
5085 	.read		= seq_read,
5086 	.write		= tracing_write_stub,
5087 	.llseek		= tracing_lseek,
5088 	.release	= tracing_release,
5089 };
5090 
5091 static const struct file_operations show_traces_fops = {
5092 	.open		= show_traces_open,
5093 	.read		= seq_read,
5094 	.llseek		= seq_lseek,
5095 	.release	= show_traces_release,
5096 };
5097 
5098 static ssize_t
5099 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5100 		     size_t count, loff_t *ppos)
5101 {
5102 	struct trace_array *tr = file_inode(filp)->i_private;
5103 	char *mask_str;
5104 	int len;
5105 
5106 	len = snprintf(NULL, 0, "%*pb\n",
5107 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5108 	mask_str = kmalloc(len, GFP_KERNEL);
5109 	if (!mask_str)
5110 		return -ENOMEM;
5111 
5112 	len = snprintf(mask_str, len, "%*pb\n",
5113 		       cpumask_pr_args(tr->tracing_cpumask));
5114 	if (len >= count) {
5115 		count = -EINVAL;
5116 		goto out_err;
5117 	}
5118 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5119 
5120 out_err:
5121 	kfree(mask_str);
5122 
5123 	return count;
5124 }
5125 
5126 int tracing_set_cpumask(struct trace_array *tr,
5127 			cpumask_var_t tracing_cpumask_new)
5128 {
5129 	int cpu;
5130 
5131 	if (!tr)
5132 		return -EINVAL;
5133 
5134 	local_irq_disable();
5135 	arch_spin_lock(&tr->max_lock);
5136 	for_each_tracing_cpu(cpu) {
5137 		/*
5138 		 * Increase/decrease the disabled counter if we are
5139 		 * about to flip a bit in the cpumask:
5140 		 */
5141 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5142 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5143 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5144 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5145 		}
5146 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5147 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5148 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5149 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5150 		}
5151 	}
5152 	arch_spin_unlock(&tr->max_lock);
5153 	local_irq_enable();
5154 
5155 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5156 
5157 	return 0;
5158 }
5159 
5160 static ssize_t
5161 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5162 		      size_t count, loff_t *ppos)
5163 {
5164 	struct trace_array *tr = file_inode(filp)->i_private;
5165 	cpumask_var_t tracing_cpumask_new;
5166 	int err;
5167 
5168 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5169 		return -ENOMEM;
5170 
5171 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5172 	if (err)
5173 		goto err_free;
5174 
5175 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5176 	if (err)
5177 		goto err_free;
5178 
5179 	free_cpumask_var(tracing_cpumask_new);
5180 
5181 	return count;
5182 
5183 err_free:
5184 	free_cpumask_var(tracing_cpumask_new);
5185 
5186 	return err;
5187 }
5188 
5189 static const struct file_operations tracing_cpumask_fops = {
5190 	.open		= tracing_open_generic_tr,
5191 	.read		= tracing_cpumask_read,
5192 	.write		= tracing_cpumask_write,
5193 	.release	= tracing_release_generic_tr,
5194 	.llseek		= generic_file_llseek,
5195 };
5196 
5197 static int tracing_trace_options_show(struct seq_file *m, void *v)
5198 {
5199 	struct tracer_opt *trace_opts;
5200 	struct trace_array *tr = m->private;
5201 	u32 tracer_flags;
5202 	int i;
5203 
5204 	mutex_lock(&trace_types_lock);
5205 	tracer_flags = tr->current_trace->flags->val;
5206 	trace_opts = tr->current_trace->flags->opts;
5207 
5208 	for (i = 0; trace_options[i]; i++) {
5209 		if (tr->trace_flags & (1 << i))
5210 			seq_printf(m, "%s\n", trace_options[i]);
5211 		else
5212 			seq_printf(m, "no%s\n", trace_options[i]);
5213 	}
5214 
5215 	for (i = 0; trace_opts[i].name; i++) {
5216 		if (tracer_flags & trace_opts[i].bit)
5217 			seq_printf(m, "%s\n", trace_opts[i].name);
5218 		else
5219 			seq_printf(m, "no%s\n", trace_opts[i].name);
5220 	}
5221 	mutex_unlock(&trace_types_lock);
5222 
5223 	return 0;
5224 }
5225 
5226 static int __set_tracer_option(struct trace_array *tr,
5227 			       struct tracer_flags *tracer_flags,
5228 			       struct tracer_opt *opts, int neg)
5229 {
5230 	struct tracer *trace = tracer_flags->trace;
5231 	int ret;
5232 
5233 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5234 	if (ret)
5235 		return ret;
5236 
5237 	if (neg)
5238 		tracer_flags->val &= ~opts->bit;
5239 	else
5240 		tracer_flags->val |= opts->bit;
5241 	return 0;
5242 }
5243 
5244 /* Try to assign a tracer specific option */
5245 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5246 {
5247 	struct tracer *trace = tr->current_trace;
5248 	struct tracer_flags *tracer_flags = trace->flags;
5249 	struct tracer_opt *opts = NULL;
5250 	int i;
5251 
5252 	for (i = 0; tracer_flags->opts[i].name; i++) {
5253 		opts = &tracer_flags->opts[i];
5254 
5255 		if (strcmp(cmp, opts->name) == 0)
5256 			return __set_tracer_option(tr, trace->flags, opts, neg);
5257 	}
5258 
5259 	return -EINVAL;
5260 }
5261 
5262 /* Some tracers require overwrite to stay enabled */
5263 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5264 {
5265 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5266 		return -1;
5267 
5268 	return 0;
5269 }
5270 
5271 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5272 {
5273 	int *map;
5274 
5275 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5276 	    (mask == TRACE_ITER_RECORD_CMD))
5277 		lockdep_assert_held(&event_mutex);
5278 
5279 	/* do nothing if flag is already set */
5280 	if (!!(tr->trace_flags & mask) == !!enabled)
5281 		return 0;
5282 
5283 	/* Give the tracer a chance to approve the change */
5284 	if (tr->current_trace->flag_changed)
5285 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5286 			return -EINVAL;
5287 
5288 	if (enabled)
5289 		tr->trace_flags |= mask;
5290 	else
5291 		tr->trace_flags &= ~mask;
5292 
5293 	if (mask == TRACE_ITER_RECORD_CMD)
5294 		trace_event_enable_cmd_record(enabled);
5295 
5296 	if (mask == TRACE_ITER_RECORD_TGID) {
5297 		if (!tgid_map) {
5298 			tgid_map_max = pid_max;
5299 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5300 				       GFP_KERNEL);
5301 
5302 			/*
5303 			 * Pairs with smp_load_acquire() in
5304 			 * trace_find_tgid_ptr() to ensure that if it observes
5305 			 * the tgid_map we just allocated then it also observes
5306 			 * the corresponding tgid_map_max value.
5307 			 */
5308 			smp_store_release(&tgid_map, map);
5309 		}
5310 		if (!tgid_map) {
5311 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5312 			return -ENOMEM;
5313 		}
5314 
5315 		trace_event_enable_tgid_record(enabled);
5316 	}
5317 
5318 	if (mask == TRACE_ITER_EVENT_FORK)
5319 		trace_event_follow_fork(tr, enabled);
5320 
5321 	if (mask == TRACE_ITER_FUNC_FORK)
5322 		ftrace_pid_follow_fork(tr, enabled);
5323 
5324 	if (mask == TRACE_ITER_OVERWRITE) {
5325 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5326 #ifdef CONFIG_TRACER_MAX_TRACE
5327 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5328 #endif
5329 	}
5330 
5331 	if (mask == TRACE_ITER_PRINTK) {
5332 		trace_printk_start_stop_comm(enabled);
5333 		trace_printk_control(enabled);
5334 	}
5335 
5336 	return 0;
5337 }
5338 
5339 int trace_set_options(struct trace_array *tr, char *option)
5340 {
5341 	char *cmp;
5342 	int neg = 0;
5343 	int ret;
5344 	size_t orig_len = strlen(option);
5345 	int len;
5346 
5347 	cmp = strstrip(option);
5348 
5349 	len = str_has_prefix(cmp, "no");
5350 	if (len)
5351 		neg = 1;
5352 
5353 	cmp += len;
5354 
5355 	mutex_lock(&event_mutex);
5356 	mutex_lock(&trace_types_lock);
5357 
5358 	ret = match_string(trace_options, -1, cmp);
5359 	/* If no option could be set, test the specific tracer options */
5360 	if (ret < 0)
5361 		ret = set_tracer_option(tr, cmp, neg);
5362 	else
5363 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5364 
5365 	mutex_unlock(&trace_types_lock);
5366 	mutex_unlock(&event_mutex);
5367 
5368 	/*
5369 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5370 	 * turn it back into a space.
5371 	 */
5372 	if (orig_len > strlen(option))
5373 		option[strlen(option)] = ' ';
5374 
5375 	return ret;
5376 }
5377 
5378 static void __init apply_trace_boot_options(void)
5379 {
5380 	char *buf = trace_boot_options_buf;
5381 	char *option;
5382 
5383 	while (true) {
5384 		option = strsep(&buf, ",");
5385 
5386 		if (!option)
5387 			break;
5388 
5389 		if (*option)
5390 			trace_set_options(&global_trace, option);
5391 
5392 		/* Put back the comma to allow this to be called again */
5393 		if (buf)
5394 			*(buf - 1) = ',';
5395 	}
5396 }
5397 
5398 static ssize_t
5399 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5400 			size_t cnt, loff_t *ppos)
5401 {
5402 	struct seq_file *m = filp->private_data;
5403 	struct trace_array *tr = m->private;
5404 	char buf[64];
5405 	int ret;
5406 
5407 	if (cnt >= sizeof(buf))
5408 		return -EINVAL;
5409 
5410 	if (copy_from_user(buf, ubuf, cnt))
5411 		return -EFAULT;
5412 
5413 	buf[cnt] = 0;
5414 
5415 	ret = trace_set_options(tr, buf);
5416 	if (ret < 0)
5417 		return ret;
5418 
5419 	*ppos += cnt;
5420 
5421 	return cnt;
5422 }
5423 
5424 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5425 {
5426 	struct trace_array *tr = inode->i_private;
5427 	int ret;
5428 
5429 	ret = tracing_check_open_get_tr(tr);
5430 	if (ret)
5431 		return ret;
5432 
5433 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5434 	if (ret < 0)
5435 		trace_array_put(tr);
5436 
5437 	return ret;
5438 }
5439 
5440 static const struct file_operations tracing_iter_fops = {
5441 	.open		= tracing_trace_options_open,
5442 	.read		= seq_read,
5443 	.llseek		= seq_lseek,
5444 	.release	= tracing_single_release_tr,
5445 	.write		= tracing_trace_options_write,
5446 };
5447 
5448 static const char readme_msg[] =
5449 	"tracing mini-HOWTO:\n\n"
5450 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5451 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5452 	" Important files:\n"
5453 	"  trace\t\t\t- The static contents of the buffer\n"
5454 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5455 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5456 	"  current_tracer\t- function and latency tracers\n"
5457 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5458 	"  error_log\t- error log for failed commands (that support it)\n"
5459 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5460 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5461 	"  trace_clock\t\t-change the clock used to order events\n"
5462 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5463 	"      global:   Synced across CPUs but slows tracing down.\n"
5464 	"     counter:   Not a clock, but just an increment\n"
5465 	"      uptime:   Jiffy counter from time of boot\n"
5466 	"        perf:   Same clock that perf events use\n"
5467 #ifdef CONFIG_X86_64
5468 	"     x86-tsc:   TSC cycle counter\n"
5469 #endif
5470 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
5471 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5472 	"    absolute:   Absolute (standalone) timestamp\n"
5473 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5474 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5475 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5476 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5477 	"\t\t\t  Remove sub-buffer with rmdir\n"
5478 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5479 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5480 	"\t\t\t  option name\n"
5481 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5482 #ifdef CONFIG_DYNAMIC_FTRACE
5483 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5484 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5485 	"\t\t\t  functions\n"
5486 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5487 	"\t     modules: Can select a group via module\n"
5488 	"\t      Format: :mod:<module-name>\n"
5489 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5490 	"\t    triggers: a command to perform when function is hit\n"
5491 	"\t      Format: <function>:<trigger>[:count]\n"
5492 	"\t     trigger: traceon, traceoff\n"
5493 	"\t\t      enable_event:<system>:<event>\n"
5494 	"\t\t      disable_event:<system>:<event>\n"
5495 #ifdef CONFIG_STACKTRACE
5496 	"\t\t      stacktrace\n"
5497 #endif
5498 #ifdef CONFIG_TRACER_SNAPSHOT
5499 	"\t\t      snapshot\n"
5500 #endif
5501 	"\t\t      dump\n"
5502 	"\t\t      cpudump\n"
5503 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5504 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5505 	"\t     The first one will disable tracing every time do_fault is hit\n"
5506 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5507 	"\t       The first time do trap is hit and it disables tracing, the\n"
5508 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5509 	"\t       the counter will not decrement. It only decrements when the\n"
5510 	"\t       trigger did work\n"
5511 	"\t     To remove trigger without count:\n"
5512 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5513 	"\t     To remove trigger with a count:\n"
5514 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5515 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5516 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5517 	"\t    modules: Can select a group via module command :mod:\n"
5518 	"\t    Does not accept triggers\n"
5519 #endif /* CONFIG_DYNAMIC_FTRACE */
5520 #ifdef CONFIG_FUNCTION_TRACER
5521 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5522 	"\t\t    (function)\n"
5523 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5524 	"\t\t    (function)\n"
5525 #endif
5526 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5527 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5528 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5529 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5530 #endif
5531 #ifdef CONFIG_TRACER_SNAPSHOT
5532 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5533 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5534 	"\t\t\t  information\n"
5535 #endif
5536 #ifdef CONFIG_STACK_TRACER
5537 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5538 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5539 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5540 	"\t\t\t  new trace)\n"
5541 #ifdef CONFIG_DYNAMIC_FTRACE
5542 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5543 	"\t\t\t  traces\n"
5544 #endif
5545 #endif /* CONFIG_STACK_TRACER */
5546 #ifdef CONFIG_DYNAMIC_EVENTS
5547 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5548 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5549 #endif
5550 #ifdef CONFIG_KPROBE_EVENTS
5551 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5552 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5553 #endif
5554 #ifdef CONFIG_UPROBE_EVENTS
5555 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5556 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5557 #endif
5558 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5559 	"\t  accepts: event-definitions (one definition per line)\n"
5560 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5561 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5562 #ifdef CONFIG_HIST_TRIGGERS
5563 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5564 #endif
5565 	"\t           e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
5566 	"\t           -:[<group>/]<event>\n"
5567 #ifdef CONFIG_KPROBE_EVENTS
5568 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5569   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5570 #endif
5571 #ifdef CONFIG_UPROBE_EVENTS
5572   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5573 #endif
5574 	"\t     args: <name>=fetcharg[:type]\n"
5575 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5576 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5577 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5578 #else
5579 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5580 #endif
5581 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5582 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5583 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5584 	"\t           <type>\\[<array-size>\\]\n"
5585 #ifdef CONFIG_HIST_TRIGGERS
5586 	"\t    field: <stype> <name>;\n"
5587 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5588 	"\t           [unsigned] char/int/long\n"
5589 #endif
5590 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5591 	"\t            of the <attached-group>/<attached-event>.\n"
5592 #endif
5593 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5594 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5595 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5596 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5597 	"\t\t\t  events\n"
5598 	"      filter\t\t- If set, only events passing filter are traced\n"
5599 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5600 	"\t\t\t  <event>:\n"
5601 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5602 	"      filter\t\t- If set, only events passing filter are traced\n"
5603 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5604 	"\t    Format: <trigger>[:count][if <filter>]\n"
5605 	"\t   trigger: traceon, traceoff\n"
5606 	"\t            enable_event:<system>:<event>\n"
5607 	"\t            disable_event:<system>:<event>\n"
5608 #ifdef CONFIG_HIST_TRIGGERS
5609 	"\t            enable_hist:<system>:<event>\n"
5610 	"\t            disable_hist:<system>:<event>\n"
5611 #endif
5612 #ifdef CONFIG_STACKTRACE
5613 	"\t\t    stacktrace\n"
5614 #endif
5615 #ifdef CONFIG_TRACER_SNAPSHOT
5616 	"\t\t    snapshot\n"
5617 #endif
5618 #ifdef CONFIG_HIST_TRIGGERS
5619 	"\t\t    hist (see below)\n"
5620 #endif
5621 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5622 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5623 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5624 	"\t                  events/block/block_unplug/trigger\n"
5625 	"\t   The first disables tracing every time block_unplug is hit.\n"
5626 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5627 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5628 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5629 	"\t   Like function triggers, the counter is only decremented if it\n"
5630 	"\t    enabled or disabled tracing.\n"
5631 	"\t   To remove a trigger without a count:\n"
5632 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5633 	"\t   To remove a trigger with a count:\n"
5634 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5635 	"\t   Filters can be ignored when removing a trigger.\n"
5636 #ifdef CONFIG_HIST_TRIGGERS
5637 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5638 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5639 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5640 	"\t            [:values=<field1[,field2,...]>]\n"
5641 	"\t            [:sort=<field1[,field2,...]>]\n"
5642 	"\t            [:size=#entries]\n"
5643 	"\t            [:pause][:continue][:clear]\n"
5644 	"\t            [:name=histname1]\n"
5645 	"\t            [:<handler>.<action>]\n"
5646 	"\t            [if <filter>]\n\n"
5647 	"\t    Note, special fields can be used as well:\n"
5648 	"\t            common_timestamp - to record current timestamp\n"
5649 	"\t            common_cpu - to record the CPU the event happened on\n"
5650 	"\n"
5651 	"\t    A hist trigger variable can be:\n"
5652 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5653 	"\t        - a reference to another variable e.g. y=$x,\n"
5654 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5655 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5656 	"\n"
5657 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5658 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5659 	"\t    variable reference, field or numeric literal.\n"
5660 	"\n"
5661 	"\t    When a matching event is hit, an entry is added to a hash\n"
5662 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5663 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5664 	"\t    correspond to fields in the event's format description.  Keys\n"
5665 	"\t    can be any field, or the special string 'stacktrace'.\n"
5666 	"\t    Compound keys consisting of up to two fields can be specified\n"
5667 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5668 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5669 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5670 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5671 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5672 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5673 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5674 	"\t    its histogram data will be shared with other triggers of the\n"
5675 	"\t    same name, and trigger hits will update this common data.\n\n"
5676 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5677 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5678 	"\t    triggers attached to an event, there will be a table for each\n"
5679 	"\t    trigger in the output.  The table displayed for a named\n"
5680 	"\t    trigger will be the same as any other instance having the\n"
5681 	"\t    same name.  The default format used to display a given field\n"
5682 	"\t    can be modified by appending any of the following modifiers\n"
5683 	"\t    to the field name, as applicable:\n\n"
5684 	"\t            .hex        display a number as a hex value\n"
5685 	"\t            .sym        display an address as a symbol\n"
5686 	"\t            .sym-offset display an address as a symbol and offset\n"
5687 	"\t            .execname   display a common_pid as a program name\n"
5688 	"\t            .syscall    display a syscall id as a syscall name\n"
5689 	"\t            .log2       display log2 value rather than raw number\n"
5690 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5691 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5692 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5693 	"\t    trigger or to start a hist trigger but not log any events\n"
5694 	"\t    until told to do so.  'continue' can be used to start or\n"
5695 	"\t    restart a paused hist trigger.\n\n"
5696 	"\t    The 'clear' parameter will clear the contents of a running\n"
5697 	"\t    hist trigger and leave its current paused/active state\n"
5698 	"\t    unchanged.\n\n"
5699 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5700 	"\t    have one event conditionally start and stop another event's\n"
5701 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5702 	"\t    the enable_event and disable_event triggers.\n\n"
5703 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5704 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5705 	"\t        <handler>.<action>\n\n"
5706 	"\t    The available handlers are:\n\n"
5707 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5708 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5709 	"\t        onchange(var)            - invoke action if var changes\n\n"
5710 	"\t    The available actions are:\n\n"
5711 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5712 	"\t        save(field,...)                      - save current event fields\n"
5713 #ifdef CONFIG_TRACER_SNAPSHOT
5714 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5715 #endif
5716 #ifdef CONFIG_SYNTH_EVENTS
5717 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5718 	"\t  Write into this file to define/undefine new synthetic events.\n"
5719 	"\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5720 #endif
5721 #endif
5722 ;
5723 
5724 static ssize_t
5725 tracing_readme_read(struct file *filp, char __user *ubuf,
5726 		       size_t cnt, loff_t *ppos)
5727 {
5728 	return simple_read_from_buffer(ubuf, cnt, ppos,
5729 					readme_msg, strlen(readme_msg));
5730 }
5731 
5732 static const struct file_operations tracing_readme_fops = {
5733 	.open		= tracing_open_generic,
5734 	.read		= tracing_readme_read,
5735 	.llseek		= generic_file_llseek,
5736 };
5737 
5738 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5739 {
5740 	int pid = ++(*pos);
5741 
5742 	return trace_find_tgid_ptr(pid);
5743 }
5744 
5745 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5746 {
5747 	int pid = *pos;
5748 
5749 	return trace_find_tgid_ptr(pid);
5750 }
5751 
5752 static void saved_tgids_stop(struct seq_file *m, void *v)
5753 {
5754 }
5755 
5756 static int saved_tgids_show(struct seq_file *m, void *v)
5757 {
5758 	int *entry = (int *)v;
5759 	int pid = entry - tgid_map;
5760 	int tgid = *entry;
5761 
5762 	if (tgid == 0)
5763 		return SEQ_SKIP;
5764 
5765 	seq_printf(m, "%d %d\n", pid, tgid);
5766 	return 0;
5767 }
5768 
5769 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5770 	.start		= saved_tgids_start,
5771 	.stop		= saved_tgids_stop,
5772 	.next		= saved_tgids_next,
5773 	.show		= saved_tgids_show,
5774 };
5775 
5776 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5777 {
5778 	int ret;
5779 
5780 	ret = tracing_check_open_get_tr(NULL);
5781 	if (ret)
5782 		return ret;
5783 
5784 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5785 }
5786 
5787 
5788 static const struct file_operations tracing_saved_tgids_fops = {
5789 	.open		= tracing_saved_tgids_open,
5790 	.read		= seq_read,
5791 	.llseek		= seq_lseek,
5792 	.release	= seq_release,
5793 };
5794 
5795 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5796 {
5797 	unsigned int *ptr = v;
5798 
5799 	if (*pos || m->count)
5800 		ptr++;
5801 
5802 	(*pos)++;
5803 
5804 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5805 	     ptr++) {
5806 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5807 			continue;
5808 
5809 		return ptr;
5810 	}
5811 
5812 	return NULL;
5813 }
5814 
5815 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5816 {
5817 	void *v;
5818 	loff_t l = 0;
5819 
5820 	preempt_disable();
5821 	arch_spin_lock(&trace_cmdline_lock);
5822 
5823 	v = &savedcmd->map_cmdline_to_pid[0];
5824 	while (l <= *pos) {
5825 		v = saved_cmdlines_next(m, v, &l);
5826 		if (!v)
5827 			return NULL;
5828 	}
5829 
5830 	return v;
5831 }
5832 
5833 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5834 {
5835 	arch_spin_unlock(&trace_cmdline_lock);
5836 	preempt_enable();
5837 }
5838 
5839 static int saved_cmdlines_show(struct seq_file *m, void *v)
5840 {
5841 	char buf[TASK_COMM_LEN];
5842 	unsigned int *pid = v;
5843 
5844 	__trace_find_cmdline(*pid, buf);
5845 	seq_printf(m, "%d %s\n", *pid, buf);
5846 	return 0;
5847 }
5848 
5849 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5850 	.start		= saved_cmdlines_start,
5851 	.next		= saved_cmdlines_next,
5852 	.stop		= saved_cmdlines_stop,
5853 	.show		= saved_cmdlines_show,
5854 };
5855 
5856 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5857 {
5858 	int ret;
5859 
5860 	ret = tracing_check_open_get_tr(NULL);
5861 	if (ret)
5862 		return ret;
5863 
5864 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5865 }
5866 
5867 static const struct file_operations tracing_saved_cmdlines_fops = {
5868 	.open		= tracing_saved_cmdlines_open,
5869 	.read		= seq_read,
5870 	.llseek		= seq_lseek,
5871 	.release	= seq_release,
5872 };
5873 
5874 static ssize_t
5875 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5876 				 size_t cnt, loff_t *ppos)
5877 {
5878 	char buf[64];
5879 	int r;
5880 
5881 	arch_spin_lock(&trace_cmdline_lock);
5882 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5883 	arch_spin_unlock(&trace_cmdline_lock);
5884 
5885 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5886 }
5887 
5888 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5889 {
5890 	kfree(s->saved_cmdlines);
5891 	kfree(s->map_cmdline_to_pid);
5892 	kfree(s);
5893 }
5894 
5895 static int tracing_resize_saved_cmdlines(unsigned int val)
5896 {
5897 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5898 
5899 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5900 	if (!s)
5901 		return -ENOMEM;
5902 
5903 	if (allocate_cmdlines_buffer(val, s) < 0) {
5904 		kfree(s);
5905 		return -ENOMEM;
5906 	}
5907 
5908 	arch_spin_lock(&trace_cmdline_lock);
5909 	savedcmd_temp = savedcmd;
5910 	savedcmd = s;
5911 	arch_spin_unlock(&trace_cmdline_lock);
5912 	free_saved_cmdlines_buffer(savedcmd_temp);
5913 
5914 	return 0;
5915 }
5916 
5917 static ssize_t
5918 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5919 				  size_t cnt, loff_t *ppos)
5920 {
5921 	unsigned long val;
5922 	int ret;
5923 
5924 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5925 	if (ret)
5926 		return ret;
5927 
5928 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5929 	if (!val || val > PID_MAX_DEFAULT)
5930 		return -EINVAL;
5931 
5932 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5933 	if (ret < 0)
5934 		return ret;
5935 
5936 	*ppos += cnt;
5937 
5938 	return cnt;
5939 }
5940 
5941 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5942 	.open		= tracing_open_generic,
5943 	.read		= tracing_saved_cmdlines_size_read,
5944 	.write		= tracing_saved_cmdlines_size_write,
5945 };
5946 
5947 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5948 static union trace_eval_map_item *
5949 update_eval_map(union trace_eval_map_item *ptr)
5950 {
5951 	if (!ptr->map.eval_string) {
5952 		if (ptr->tail.next) {
5953 			ptr = ptr->tail.next;
5954 			/* Set ptr to the next real item (skip head) */
5955 			ptr++;
5956 		} else
5957 			return NULL;
5958 	}
5959 	return ptr;
5960 }
5961 
5962 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5963 {
5964 	union trace_eval_map_item *ptr = v;
5965 
5966 	/*
5967 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5968 	 * This really should never happen.
5969 	 */
5970 	(*pos)++;
5971 	ptr = update_eval_map(ptr);
5972 	if (WARN_ON_ONCE(!ptr))
5973 		return NULL;
5974 
5975 	ptr++;
5976 	ptr = update_eval_map(ptr);
5977 
5978 	return ptr;
5979 }
5980 
5981 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5982 {
5983 	union trace_eval_map_item *v;
5984 	loff_t l = 0;
5985 
5986 	mutex_lock(&trace_eval_mutex);
5987 
5988 	v = trace_eval_maps;
5989 	if (v)
5990 		v++;
5991 
5992 	while (v && l < *pos) {
5993 		v = eval_map_next(m, v, &l);
5994 	}
5995 
5996 	return v;
5997 }
5998 
5999 static void eval_map_stop(struct seq_file *m, void *v)
6000 {
6001 	mutex_unlock(&trace_eval_mutex);
6002 }
6003 
6004 static int eval_map_show(struct seq_file *m, void *v)
6005 {
6006 	union trace_eval_map_item *ptr = v;
6007 
6008 	seq_printf(m, "%s %ld (%s)\n",
6009 		   ptr->map.eval_string, ptr->map.eval_value,
6010 		   ptr->map.system);
6011 
6012 	return 0;
6013 }
6014 
6015 static const struct seq_operations tracing_eval_map_seq_ops = {
6016 	.start		= eval_map_start,
6017 	.next		= eval_map_next,
6018 	.stop		= eval_map_stop,
6019 	.show		= eval_map_show,
6020 };
6021 
6022 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6023 {
6024 	int ret;
6025 
6026 	ret = tracing_check_open_get_tr(NULL);
6027 	if (ret)
6028 		return ret;
6029 
6030 	return seq_open(filp, &tracing_eval_map_seq_ops);
6031 }
6032 
6033 static const struct file_operations tracing_eval_map_fops = {
6034 	.open		= tracing_eval_map_open,
6035 	.read		= seq_read,
6036 	.llseek		= seq_lseek,
6037 	.release	= seq_release,
6038 };
6039 
6040 static inline union trace_eval_map_item *
6041 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6042 {
6043 	/* Return tail of array given the head */
6044 	return ptr + ptr->head.length + 1;
6045 }
6046 
6047 static void
6048 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6049 			   int len)
6050 {
6051 	struct trace_eval_map **stop;
6052 	struct trace_eval_map **map;
6053 	union trace_eval_map_item *map_array;
6054 	union trace_eval_map_item *ptr;
6055 
6056 	stop = start + len;
6057 
6058 	/*
6059 	 * The trace_eval_maps contains the map plus a head and tail item,
6060 	 * where the head holds the module and length of array, and the
6061 	 * tail holds a pointer to the next list.
6062 	 */
6063 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6064 	if (!map_array) {
6065 		pr_warn("Unable to allocate trace eval mapping\n");
6066 		return;
6067 	}
6068 
6069 	mutex_lock(&trace_eval_mutex);
6070 
6071 	if (!trace_eval_maps)
6072 		trace_eval_maps = map_array;
6073 	else {
6074 		ptr = trace_eval_maps;
6075 		for (;;) {
6076 			ptr = trace_eval_jmp_to_tail(ptr);
6077 			if (!ptr->tail.next)
6078 				break;
6079 			ptr = ptr->tail.next;
6080 
6081 		}
6082 		ptr->tail.next = map_array;
6083 	}
6084 	map_array->head.mod = mod;
6085 	map_array->head.length = len;
6086 	map_array++;
6087 
6088 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6089 		map_array->map = **map;
6090 		map_array++;
6091 	}
6092 	memset(map_array, 0, sizeof(*map_array));
6093 
6094 	mutex_unlock(&trace_eval_mutex);
6095 }
6096 
6097 static void trace_create_eval_file(struct dentry *d_tracer)
6098 {
6099 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6100 			  NULL, &tracing_eval_map_fops);
6101 }
6102 
6103 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6104 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6105 static inline void trace_insert_eval_map_file(struct module *mod,
6106 			      struct trace_eval_map **start, int len) { }
6107 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6108 
6109 static void trace_insert_eval_map(struct module *mod,
6110 				  struct trace_eval_map **start, int len)
6111 {
6112 	struct trace_eval_map **map;
6113 
6114 	if (len <= 0)
6115 		return;
6116 
6117 	map = start;
6118 
6119 	trace_event_eval_update(map, len);
6120 
6121 	trace_insert_eval_map_file(mod, start, len);
6122 }
6123 
6124 static ssize_t
6125 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6126 		       size_t cnt, loff_t *ppos)
6127 {
6128 	struct trace_array *tr = filp->private_data;
6129 	char buf[MAX_TRACER_SIZE+2];
6130 	int r;
6131 
6132 	mutex_lock(&trace_types_lock);
6133 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6134 	mutex_unlock(&trace_types_lock);
6135 
6136 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6137 }
6138 
6139 int tracer_init(struct tracer *t, struct trace_array *tr)
6140 {
6141 	tracing_reset_online_cpus(&tr->array_buffer);
6142 	return t->init(tr);
6143 }
6144 
6145 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6146 {
6147 	int cpu;
6148 
6149 	for_each_tracing_cpu(cpu)
6150 		per_cpu_ptr(buf->data, cpu)->entries = val;
6151 }
6152 
6153 #ifdef CONFIG_TRACER_MAX_TRACE
6154 /* resize @tr's buffer to the size of @size_tr's entries */
6155 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6156 					struct array_buffer *size_buf, int cpu_id)
6157 {
6158 	int cpu, ret = 0;
6159 
6160 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6161 		for_each_tracing_cpu(cpu) {
6162 			ret = ring_buffer_resize(trace_buf->buffer,
6163 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6164 			if (ret < 0)
6165 				break;
6166 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6167 				per_cpu_ptr(size_buf->data, cpu)->entries;
6168 		}
6169 	} else {
6170 		ret = ring_buffer_resize(trace_buf->buffer,
6171 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6172 		if (ret == 0)
6173 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6174 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6175 	}
6176 
6177 	return ret;
6178 }
6179 #endif /* CONFIG_TRACER_MAX_TRACE */
6180 
6181 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6182 					unsigned long size, int cpu)
6183 {
6184 	int ret;
6185 
6186 	/*
6187 	 * If kernel or user changes the size of the ring buffer
6188 	 * we use the size that was given, and we can forget about
6189 	 * expanding it later.
6190 	 */
6191 	ring_buffer_expanded = true;
6192 
6193 	/* May be called before buffers are initialized */
6194 	if (!tr->array_buffer.buffer)
6195 		return 0;
6196 
6197 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6198 	if (ret < 0)
6199 		return ret;
6200 
6201 #ifdef CONFIG_TRACER_MAX_TRACE
6202 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6203 	    !tr->current_trace->use_max_tr)
6204 		goto out;
6205 
6206 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6207 	if (ret < 0) {
6208 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6209 						     &tr->array_buffer, cpu);
6210 		if (r < 0) {
6211 			/*
6212 			 * AARGH! We are left with different
6213 			 * size max buffer!!!!
6214 			 * The max buffer is our "snapshot" buffer.
6215 			 * When a tracer needs a snapshot (one of the
6216 			 * latency tracers), it swaps the max buffer
6217 			 * with the saved snap shot. We succeeded to
6218 			 * update the size of the main buffer, but failed to
6219 			 * update the size of the max buffer. But when we tried
6220 			 * to reset the main buffer to the original size, we
6221 			 * failed there too. This is very unlikely to
6222 			 * happen, but if it does, warn and kill all
6223 			 * tracing.
6224 			 */
6225 			WARN_ON(1);
6226 			tracing_disabled = 1;
6227 		}
6228 		return ret;
6229 	}
6230 
6231 	if (cpu == RING_BUFFER_ALL_CPUS)
6232 		set_buffer_entries(&tr->max_buffer, size);
6233 	else
6234 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6235 
6236  out:
6237 #endif /* CONFIG_TRACER_MAX_TRACE */
6238 
6239 	if (cpu == RING_BUFFER_ALL_CPUS)
6240 		set_buffer_entries(&tr->array_buffer, size);
6241 	else
6242 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6243 
6244 	return ret;
6245 }
6246 
6247 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6248 				  unsigned long size, int cpu_id)
6249 {
6250 	int ret;
6251 
6252 	mutex_lock(&trace_types_lock);
6253 
6254 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6255 		/* make sure, this cpu is enabled in the mask */
6256 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6257 			ret = -EINVAL;
6258 			goto out;
6259 		}
6260 	}
6261 
6262 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6263 	if (ret < 0)
6264 		ret = -ENOMEM;
6265 
6266 out:
6267 	mutex_unlock(&trace_types_lock);
6268 
6269 	return ret;
6270 }
6271 
6272 
6273 /**
6274  * tracing_update_buffers - used by tracing facility to expand ring buffers
6275  *
6276  * To save on memory when the tracing is never used on a system with it
6277  * configured in. The ring buffers are set to a minimum size. But once
6278  * a user starts to use the tracing facility, then they need to grow
6279  * to their default size.
6280  *
6281  * This function is to be called when a tracer is about to be used.
6282  */
6283 int tracing_update_buffers(void)
6284 {
6285 	int ret = 0;
6286 
6287 	mutex_lock(&trace_types_lock);
6288 	if (!ring_buffer_expanded)
6289 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6290 						RING_BUFFER_ALL_CPUS);
6291 	mutex_unlock(&trace_types_lock);
6292 
6293 	return ret;
6294 }
6295 
6296 struct trace_option_dentry;
6297 
6298 static void
6299 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6300 
6301 /*
6302  * Used to clear out the tracer before deletion of an instance.
6303  * Must have trace_types_lock held.
6304  */
6305 static void tracing_set_nop(struct trace_array *tr)
6306 {
6307 	if (tr->current_trace == &nop_trace)
6308 		return;
6309 
6310 	tr->current_trace->enabled--;
6311 
6312 	if (tr->current_trace->reset)
6313 		tr->current_trace->reset(tr);
6314 
6315 	tr->current_trace = &nop_trace;
6316 }
6317 
6318 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6319 {
6320 	/* Only enable if the directory has been created already. */
6321 	if (!tr->dir)
6322 		return;
6323 
6324 	create_trace_option_files(tr, t);
6325 }
6326 
6327 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6328 {
6329 	struct tracer *t;
6330 #ifdef CONFIG_TRACER_MAX_TRACE
6331 	bool had_max_tr;
6332 #endif
6333 	int ret = 0;
6334 
6335 	mutex_lock(&trace_types_lock);
6336 
6337 	if (!ring_buffer_expanded) {
6338 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6339 						RING_BUFFER_ALL_CPUS);
6340 		if (ret < 0)
6341 			goto out;
6342 		ret = 0;
6343 	}
6344 
6345 	for (t = trace_types; t; t = t->next) {
6346 		if (strcmp(t->name, buf) == 0)
6347 			break;
6348 	}
6349 	if (!t) {
6350 		ret = -EINVAL;
6351 		goto out;
6352 	}
6353 	if (t == tr->current_trace)
6354 		goto out;
6355 
6356 #ifdef CONFIG_TRACER_SNAPSHOT
6357 	if (t->use_max_tr) {
6358 		arch_spin_lock(&tr->max_lock);
6359 		if (tr->cond_snapshot)
6360 			ret = -EBUSY;
6361 		arch_spin_unlock(&tr->max_lock);
6362 		if (ret)
6363 			goto out;
6364 	}
6365 #endif
6366 	/* Some tracers won't work on kernel command line */
6367 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6368 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6369 			t->name);
6370 		goto out;
6371 	}
6372 
6373 	/* Some tracers are only allowed for the top level buffer */
6374 	if (!trace_ok_for_array(t, tr)) {
6375 		ret = -EINVAL;
6376 		goto out;
6377 	}
6378 
6379 	/* If trace pipe files are being read, we can't change the tracer */
6380 	if (tr->trace_ref) {
6381 		ret = -EBUSY;
6382 		goto out;
6383 	}
6384 
6385 	trace_branch_disable();
6386 
6387 	tr->current_trace->enabled--;
6388 
6389 	if (tr->current_trace->reset)
6390 		tr->current_trace->reset(tr);
6391 
6392 	/* Current trace needs to be nop_trace before synchronize_rcu */
6393 	tr->current_trace = &nop_trace;
6394 
6395 #ifdef CONFIG_TRACER_MAX_TRACE
6396 	had_max_tr = tr->allocated_snapshot;
6397 
6398 	if (had_max_tr && !t->use_max_tr) {
6399 		/*
6400 		 * We need to make sure that the update_max_tr sees that
6401 		 * current_trace changed to nop_trace to keep it from
6402 		 * swapping the buffers after we resize it.
6403 		 * The update_max_tr is called from interrupts disabled
6404 		 * so a synchronized_sched() is sufficient.
6405 		 */
6406 		synchronize_rcu();
6407 		free_snapshot(tr);
6408 	}
6409 #endif
6410 
6411 #ifdef CONFIG_TRACER_MAX_TRACE
6412 	if (t->use_max_tr && !had_max_tr) {
6413 		ret = tracing_alloc_snapshot_instance(tr);
6414 		if (ret < 0)
6415 			goto out;
6416 	}
6417 #endif
6418 
6419 	if (t->init) {
6420 		ret = tracer_init(t, tr);
6421 		if (ret)
6422 			goto out;
6423 	}
6424 
6425 	tr->current_trace = t;
6426 	tr->current_trace->enabled++;
6427 	trace_branch_enable(tr);
6428  out:
6429 	mutex_unlock(&trace_types_lock);
6430 
6431 	return ret;
6432 }
6433 
6434 static ssize_t
6435 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6436 			size_t cnt, loff_t *ppos)
6437 {
6438 	struct trace_array *tr = filp->private_data;
6439 	char buf[MAX_TRACER_SIZE+1];
6440 	int i;
6441 	size_t ret;
6442 	int err;
6443 
6444 	ret = cnt;
6445 
6446 	if (cnt > MAX_TRACER_SIZE)
6447 		cnt = MAX_TRACER_SIZE;
6448 
6449 	if (copy_from_user(buf, ubuf, cnt))
6450 		return -EFAULT;
6451 
6452 	buf[cnt] = 0;
6453 
6454 	/* strip ending whitespace. */
6455 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6456 		buf[i] = 0;
6457 
6458 	err = tracing_set_tracer(tr, buf);
6459 	if (err)
6460 		return err;
6461 
6462 	*ppos += ret;
6463 
6464 	return ret;
6465 }
6466 
6467 static ssize_t
6468 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6469 		   size_t cnt, loff_t *ppos)
6470 {
6471 	char buf[64];
6472 	int r;
6473 
6474 	r = snprintf(buf, sizeof(buf), "%ld\n",
6475 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6476 	if (r > sizeof(buf))
6477 		r = sizeof(buf);
6478 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6479 }
6480 
6481 static ssize_t
6482 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6483 		    size_t cnt, loff_t *ppos)
6484 {
6485 	unsigned long val;
6486 	int ret;
6487 
6488 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6489 	if (ret)
6490 		return ret;
6491 
6492 	*ptr = val * 1000;
6493 
6494 	return cnt;
6495 }
6496 
6497 static ssize_t
6498 tracing_thresh_read(struct file *filp, char __user *ubuf,
6499 		    size_t cnt, loff_t *ppos)
6500 {
6501 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6502 }
6503 
6504 static ssize_t
6505 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6506 		     size_t cnt, loff_t *ppos)
6507 {
6508 	struct trace_array *tr = filp->private_data;
6509 	int ret;
6510 
6511 	mutex_lock(&trace_types_lock);
6512 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6513 	if (ret < 0)
6514 		goto out;
6515 
6516 	if (tr->current_trace->update_thresh) {
6517 		ret = tr->current_trace->update_thresh(tr);
6518 		if (ret < 0)
6519 			goto out;
6520 	}
6521 
6522 	ret = cnt;
6523 out:
6524 	mutex_unlock(&trace_types_lock);
6525 
6526 	return ret;
6527 }
6528 
6529 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6530 
6531 static ssize_t
6532 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6533 		     size_t cnt, loff_t *ppos)
6534 {
6535 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6536 }
6537 
6538 static ssize_t
6539 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6540 		      size_t cnt, loff_t *ppos)
6541 {
6542 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6543 }
6544 
6545 #endif
6546 
6547 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6548 {
6549 	struct trace_array *tr = inode->i_private;
6550 	struct trace_iterator *iter;
6551 	int ret;
6552 
6553 	ret = tracing_check_open_get_tr(tr);
6554 	if (ret)
6555 		return ret;
6556 
6557 	mutex_lock(&trace_types_lock);
6558 
6559 	/* create a buffer to store the information to pass to userspace */
6560 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6561 	if (!iter) {
6562 		ret = -ENOMEM;
6563 		__trace_array_put(tr);
6564 		goto out;
6565 	}
6566 
6567 	trace_seq_init(&iter->seq);
6568 	iter->trace = tr->current_trace;
6569 
6570 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6571 		ret = -ENOMEM;
6572 		goto fail;
6573 	}
6574 
6575 	/* trace pipe does not show start of buffer */
6576 	cpumask_setall(iter->started);
6577 
6578 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6579 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6580 
6581 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6582 	if (trace_clocks[tr->clock_id].in_ns)
6583 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6584 
6585 	iter->tr = tr;
6586 	iter->array_buffer = &tr->array_buffer;
6587 	iter->cpu_file = tracing_get_cpu(inode);
6588 	mutex_init(&iter->mutex);
6589 	filp->private_data = iter;
6590 
6591 	if (iter->trace->pipe_open)
6592 		iter->trace->pipe_open(iter);
6593 
6594 	nonseekable_open(inode, filp);
6595 
6596 	tr->trace_ref++;
6597 out:
6598 	mutex_unlock(&trace_types_lock);
6599 	return ret;
6600 
6601 fail:
6602 	kfree(iter);
6603 	__trace_array_put(tr);
6604 	mutex_unlock(&trace_types_lock);
6605 	return ret;
6606 }
6607 
6608 static int tracing_release_pipe(struct inode *inode, struct file *file)
6609 {
6610 	struct trace_iterator *iter = file->private_data;
6611 	struct trace_array *tr = inode->i_private;
6612 
6613 	mutex_lock(&trace_types_lock);
6614 
6615 	tr->trace_ref--;
6616 
6617 	if (iter->trace->pipe_close)
6618 		iter->trace->pipe_close(iter);
6619 
6620 	mutex_unlock(&trace_types_lock);
6621 
6622 	free_cpumask_var(iter->started);
6623 	mutex_destroy(&iter->mutex);
6624 	kfree(iter);
6625 
6626 	trace_array_put(tr);
6627 
6628 	return 0;
6629 }
6630 
6631 static __poll_t
6632 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6633 {
6634 	struct trace_array *tr = iter->tr;
6635 
6636 	/* Iterators are static, they should be filled or empty */
6637 	if (trace_buffer_iter(iter, iter->cpu_file))
6638 		return EPOLLIN | EPOLLRDNORM;
6639 
6640 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6641 		/*
6642 		 * Always select as readable when in blocking mode
6643 		 */
6644 		return EPOLLIN | EPOLLRDNORM;
6645 	else
6646 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6647 					     filp, poll_table);
6648 }
6649 
6650 static __poll_t
6651 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6652 {
6653 	struct trace_iterator *iter = filp->private_data;
6654 
6655 	return trace_poll(iter, filp, poll_table);
6656 }
6657 
6658 /* Must be called with iter->mutex held. */
6659 static int tracing_wait_pipe(struct file *filp)
6660 {
6661 	struct trace_iterator *iter = filp->private_data;
6662 	int ret;
6663 
6664 	while (trace_empty(iter)) {
6665 
6666 		if ((filp->f_flags & O_NONBLOCK)) {
6667 			return -EAGAIN;
6668 		}
6669 
6670 		/*
6671 		 * We block until we read something and tracing is disabled.
6672 		 * We still block if tracing is disabled, but we have never
6673 		 * read anything. This allows a user to cat this file, and
6674 		 * then enable tracing. But after we have read something,
6675 		 * we give an EOF when tracing is again disabled.
6676 		 *
6677 		 * iter->pos will be 0 if we haven't read anything.
6678 		 */
6679 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6680 			break;
6681 
6682 		mutex_unlock(&iter->mutex);
6683 
6684 		ret = wait_on_pipe(iter, 0);
6685 
6686 		mutex_lock(&iter->mutex);
6687 
6688 		if (ret)
6689 			return ret;
6690 	}
6691 
6692 	return 1;
6693 }
6694 
6695 /*
6696  * Consumer reader.
6697  */
6698 static ssize_t
6699 tracing_read_pipe(struct file *filp, char __user *ubuf,
6700 		  size_t cnt, loff_t *ppos)
6701 {
6702 	struct trace_iterator *iter = filp->private_data;
6703 	ssize_t sret;
6704 
6705 	/*
6706 	 * Avoid more than one consumer on a single file descriptor
6707 	 * This is just a matter of traces coherency, the ring buffer itself
6708 	 * is protected.
6709 	 */
6710 	mutex_lock(&iter->mutex);
6711 
6712 	/* return any leftover data */
6713 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6714 	if (sret != -EBUSY)
6715 		goto out;
6716 
6717 	trace_seq_init(&iter->seq);
6718 
6719 	if (iter->trace->read) {
6720 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6721 		if (sret)
6722 			goto out;
6723 	}
6724 
6725 waitagain:
6726 	sret = tracing_wait_pipe(filp);
6727 	if (sret <= 0)
6728 		goto out;
6729 
6730 	/* stop when tracing is finished */
6731 	if (trace_empty(iter)) {
6732 		sret = 0;
6733 		goto out;
6734 	}
6735 
6736 	if (cnt >= PAGE_SIZE)
6737 		cnt = PAGE_SIZE - 1;
6738 
6739 	/* reset all but tr, trace, and overruns */
6740 	trace_iterator_reset(iter);
6741 	cpumask_clear(iter->started);
6742 	trace_seq_init(&iter->seq);
6743 
6744 	trace_event_read_lock();
6745 	trace_access_lock(iter->cpu_file);
6746 	while (trace_find_next_entry_inc(iter) != NULL) {
6747 		enum print_line_t ret;
6748 		int save_len = iter->seq.seq.len;
6749 
6750 		ret = print_trace_line(iter);
6751 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6752 			/* don't print partial lines */
6753 			iter->seq.seq.len = save_len;
6754 			break;
6755 		}
6756 		if (ret != TRACE_TYPE_NO_CONSUME)
6757 			trace_consume(iter);
6758 
6759 		if (trace_seq_used(&iter->seq) >= cnt)
6760 			break;
6761 
6762 		/*
6763 		 * Setting the full flag means we reached the trace_seq buffer
6764 		 * size and we should leave by partial output condition above.
6765 		 * One of the trace_seq_* functions is not used properly.
6766 		 */
6767 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6768 			  iter->ent->type);
6769 	}
6770 	trace_access_unlock(iter->cpu_file);
6771 	trace_event_read_unlock();
6772 
6773 	/* Now copy what we have to the user */
6774 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6775 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6776 		trace_seq_init(&iter->seq);
6777 
6778 	/*
6779 	 * If there was nothing to send to user, in spite of consuming trace
6780 	 * entries, go back to wait for more entries.
6781 	 */
6782 	if (sret == -EBUSY)
6783 		goto waitagain;
6784 
6785 out:
6786 	mutex_unlock(&iter->mutex);
6787 
6788 	return sret;
6789 }
6790 
6791 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6792 				     unsigned int idx)
6793 {
6794 	__free_page(spd->pages[idx]);
6795 }
6796 
6797 static size_t
6798 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6799 {
6800 	size_t count;
6801 	int save_len;
6802 	int ret;
6803 
6804 	/* Seq buffer is page-sized, exactly what we need. */
6805 	for (;;) {
6806 		save_len = iter->seq.seq.len;
6807 		ret = print_trace_line(iter);
6808 
6809 		if (trace_seq_has_overflowed(&iter->seq)) {
6810 			iter->seq.seq.len = save_len;
6811 			break;
6812 		}
6813 
6814 		/*
6815 		 * This should not be hit, because it should only
6816 		 * be set if the iter->seq overflowed. But check it
6817 		 * anyway to be safe.
6818 		 */
6819 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6820 			iter->seq.seq.len = save_len;
6821 			break;
6822 		}
6823 
6824 		count = trace_seq_used(&iter->seq) - save_len;
6825 		if (rem < count) {
6826 			rem = 0;
6827 			iter->seq.seq.len = save_len;
6828 			break;
6829 		}
6830 
6831 		if (ret != TRACE_TYPE_NO_CONSUME)
6832 			trace_consume(iter);
6833 		rem -= count;
6834 		if (!trace_find_next_entry_inc(iter))	{
6835 			rem = 0;
6836 			iter->ent = NULL;
6837 			break;
6838 		}
6839 	}
6840 
6841 	return rem;
6842 }
6843 
6844 static ssize_t tracing_splice_read_pipe(struct file *filp,
6845 					loff_t *ppos,
6846 					struct pipe_inode_info *pipe,
6847 					size_t len,
6848 					unsigned int flags)
6849 {
6850 	struct page *pages_def[PIPE_DEF_BUFFERS];
6851 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6852 	struct trace_iterator *iter = filp->private_data;
6853 	struct splice_pipe_desc spd = {
6854 		.pages		= pages_def,
6855 		.partial	= partial_def,
6856 		.nr_pages	= 0, /* This gets updated below. */
6857 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6858 		.ops		= &default_pipe_buf_ops,
6859 		.spd_release	= tracing_spd_release_pipe,
6860 	};
6861 	ssize_t ret;
6862 	size_t rem;
6863 	unsigned int i;
6864 
6865 	if (splice_grow_spd(pipe, &spd))
6866 		return -ENOMEM;
6867 
6868 	mutex_lock(&iter->mutex);
6869 
6870 	if (iter->trace->splice_read) {
6871 		ret = iter->trace->splice_read(iter, filp,
6872 					       ppos, pipe, len, flags);
6873 		if (ret)
6874 			goto out_err;
6875 	}
6876 
6877 	ret = tracing_wait_pipe(filp);
6878 	if (ret <= 0)
6879 		goto out_err;
6880 
6881 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6882 		ret = -EFAULT;
6883 		goto out_err;
6884 	}
6885 
6886 	trace_event_read_lock();
6887 	trace_access_lock(iter->cpu_file);
6888 
6889 	/* Fill as many pages as possible. */
6890 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6891 		spd.pages[i] = alloc_page(GFP_KERNEL);
6892 		if (!spd.pages[i])
6893 			break;
6894 
6895 		rem = tracing_fill_pipe_page(rem, iter);
6896 
6897 		/* Copy the data into the page, so we can start over. */
6898 		ret = trace_seq_to_buffer(&iter->seq,
6899 					  page_address(spd.pages[i]),
6900 					  trace_seq_used(&iter->seq));
6901 		if (ret < 0) {
6902 			__free_page(spd.pages[i]);
6903 			break;
6904 		}
6905 		spd.partial[i].offset = 0;
6906 		spd.partial[i].len = trace_seq_used(&iter->seq);
6907 
6908 		trace_seq_init(&iter->seq);
6909 	}
6910 
6911 	trace_access_unlock(iter->cpu_file);
6912 	trace_event_read_unlock();
6913 	mutex_unlock(&iter->mutex);
6914 
6915 	spd.nr_pages = i;
6916 
6917 	if (i)
6918 		ret = splice_to_pipe(pipe, &spd);
6919 	else
6920 		ret = 0;
6921 out:
6922 	splice_shrink_spd(&spd);
6923 	return ret;
6924 
6925 out_err:
6926 	mutex_unlock(&iter->mutex);
6927 	goto out;
6928 }
6929 
6930 static ssize_t
6931 tracing_entries_read(struct file *filp, char __user *ubuf,
6932 		     size_t cnt, loff_t *ppos)
6933 {
6934 	struct inode *inode = file_inode(filp);
6935 	struct trace_array *tr = inode->i_private;
6936 	int cpu = tracing_get_cpu(inode);
6937 	char buf[64];
6938 	int r = 0;
6939 	ssize_t ret;
6940 
6941 	mutex_lock(&trace_types_lock);
6942 
6943 	if (cpu == RING_BUFFER_ALL_CPUS) {
6944 		int cpu, buf_size_same;
6945 		unsigned long size;
6946 
6947 		size = 0;
6948 		buf_size_same = 1;
6949 		/* check if all cpu sizes are same */
6950 		for_each_tracing_cpu(cpu) {
6951 			/* fill in the size from first enabled cpu */
6952 			if (size == 0)
6953 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6954 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6955 				buf_size_same = 0;
6956 				break;
6957 			}
6958 		}
6959 
6960 		if (buf_size_same) {
6961 			if (!ring_buffer_expanded)
6962 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6963 					    size >> 10,
6964 					    trace_buf_size >> 10);
6965 			else
6966 				r = sprintf(buf, "%lu\n", size >> 10);
6967 		} else
6968 			r = sprintf(buf, "X\n");
6969 	} else
6970 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6971 
6972 	mutex_unlock(&trace_types_lock);
6973 
6974 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6975 	return ret;
6976 }
6977 
6978 static ssize_t
6979 tracing_entries_write(struct file *filp, const char __user *ubuf,
6980 		      size_t cnt, loff_t *ppos)
6981 {
6982 	struct inode *inode = file_inode(filp);
6983 	struct trace_array *tr = inode->i_private;
6984 	unsigned long val;
6985 	int ret;
6986 
6987 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6988 	if (ret)
6989 		return ret;
6990 
6991 	/* must have at least 1 entry */
6992 	if (!val)
6993 		return -EINVAL;
6994 
6995 	/* value is in KB */
6996 	val <<= 10;
6997 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6998 	if (ret < 0)
6999 		return ret;
7000 
7001 	*ppos += cnt;
7002 
7003 	return cnt;
7004 }
7005 
7006 static ssize_t
7007 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7008 				size_t cnt, loff_t *ppos)
7009 {
7010 	struct trace_array *tr = filp->private_data;
7011 	char buf[64];
7012 	int r, cpu;
7013 	unsigned long size = 0, expanded_size = 0;
7014 
7015 	mutex_lock(&trace_types_lock);
7016 	for_each_tracing_cpu(cpu) {
7017 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7018 		if (!ring_buffer_expanded)
7019 			expanded_size += trace_buf_size >> 10;
7020 	}
7021 	if (ring_buffer_expanded)
7022 		r = sprintf(buf, "%lu\n", size);
7023 	else
7024 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7025 	mutex_unlock(&trace_types_lock);
7026 
7027 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7028 }
7029 
7030 static ssize_t
7031 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7032 			  size_t cnt, loff_t *ppos)
7033 {
7034 	/*
7035 	 * There is no need to read what the user has written, this function
7036 	 * is just to make sure that there is no error when "echo" is used
7037 	 */
7038 
7039 	*ppos += cnt;
7040 
7041 	return cnt;
7042 }
7043 
7044 static int
7045 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7046 {
7047 	struct trace_array *tr = inode->i_private;
7048 
7049 	/* disable tracing ? */
7050 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7051 		tracer_tracing_off(tr);
7052 	/* resize the ring buffer to 0 */
7053 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7054 
7055 	trace_array_put(tr);
7056 
7057 	return 0;
7058 }
7059 
7060 static ssize_t
7061 tracing_mark_write(struct file *filp, const char __user *ubuf,
7062 					size_t cnt, loff_t *fpos)
7063 {
7064 	struct trace_array *tr = filp->private_data;
7065 	struct ring_buffer_event *event;
7066 	enum event_trigger_type tt = ETT_NONE;
7067 	struct trace_buffer *buffer;
7068 	struct print_entry *entry;
7069 	ssize_t written;
7070 	int size;
7071 	int len;
7072 
7073 /* Used in tracing_mark_raw_write() as well */
7074 #define FAULTED_STR "<faulted>"
7075 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7076 
7077 	if (tracing_disabled)
7078 		return -EINVAL;
7079 
7080 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7081 		return -EINVAL;
7082 
7083 	if (cnt > TRACE_BUF_SIZE)
7084 		cnt = TRACE_BUF_SIZE;
7085 
7086 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7087 
7088 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7089 
7090 	/* If less than "<faulted>", then make sure we can still add that */
7091 	if (cnt < FAULTED_SIZE)
7092 		size += FAULTED_SIZE - cnt;
7093 
7094 	buffer = tr->array_buffer.buffer;
7095 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7096 					    tracing_gen_ctx());
7097 	if (unlikely(!event))
7098 		/* Ring buffer disabled, return as if not open for write */
7099 		return -EBADF;
7100 
7101 	entry = ring_buffer_event_data(event);
7102 	entry->ip = _THIS_IP_;
7103 
7104 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7105 	if (len) {
7106 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7107 		cnt = FAULTED_SIZE;
7108 		written = -EFAULT;
7109 	} else
7110 		written = cnt;
7111 
7112 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7113 		/* do not add \n before testing triggers, but add \0 */
7114 		entry->buf[cnt] = '\0';
7115 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7116 	}
7117 
7118 	if (entry->buf[cnt - 1] != '\n') {
7119 		entry->buf[cnt] = '\n';
7120 		entry->buf[cnt + 1] = '\0';
7121 	} else
7122 		entry->buf[cnt] = '\0';
7123 
7124 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7125 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7126 	__buffer_unlock_commit(buffer, event);
7127 
7128 	if (tt)
7129 		event_triggers_post_call(tr->trace_marker_file, tt);
7130 
7131 	return written;
7132 }
7133 
7134 /* Limit it for now to 3K (including tag) */
7135 #define RAW_DATA_MAX_SIZE (1024*3)
7136 
7137 static ssize_t
7138 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7139 					size_t cnt, loff_t *fpos)
7140 {
7141 	struct trace_array *tr = filp->private_data;
7142 	struct ring_buffer_event *event;
7143 	struct trace_buffer *buffer;
7144 	struct raw_data_entry *entry;
7145 	ssize_t written;
7146 	int size;
7147 	int len;
7148 
7149 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7150 
7151 	if (tracing_disabled)
7152 		return -EINVAL;
7153 
7154 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7155 		return -EINVAL;
7156 
7157 	/* The marker must at least have a tag id */
7158 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7159 		return -EINVAL;
7160 
7161 	if (cnt > TRACE_BUF_SIZE)
7162 		cnt = TRACE_BUF_SIZE;
7163 
7164 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7165 
7166 	size = sizeof(*entry) + cnt;
7167 	if (cnt < FAULT_SIZE_ID)
7168 		size += FAULT_SIZE_ID - cnt;
7169 
7170 	buffer = tr->array_buffer.buffer;
7171 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7172 					    tracing_gen_ctx());
7173 	if (!event)
7174 		/* Ring buffer disabled, return as if not open for write */
7175 		return -EBADF;
7176 
7177 	entry = ring_buffer_event_data(event);
7178 
7179 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7180 	if (len) {
7181 		entry->id = -1;
7182 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7183 		written = -EFAULT;
7184 	} else
7185 		written = cnt;
7186 
7187 	__buffer_unlock_commit(buffer, event);
7188 
7189 	return written;
7190 }
7191 
7192 static int tracing_clock_show(struct seq_file *m, void *v)
7193 {
7194 	struct trace_array *tr = m->private;
7195 	int i;
7196 
7197 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7198 		seq_printf(m,
7199 			"%s%s%s%s", i ? " " : "",
7200 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7201 			i == tr->clock_id ? "]" : "");
7202 	seq_putc(m, '\n');
7203 
7204 	return 0;
7205 }
7206 
7207 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7208 {
7209 	int i;
7210 
7211 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7212 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7213 			break;
7214 	}
7215 	if (i == ARRAY_SIZE(trace_clocks))
7216 		return -EINVAL;
7217 
7218 	mutex_lock(&trace_types_lock);
7219 
7220 	tr->clock_id = i;
7221 
7222 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7223 
7224 	/*
7225 	 * New clock may not be consistent with the previous clock.
7226 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7227 	 */
7228 	tracing_reset_online_cpus(&tr->array_buffer);
7229 
7230 #ifdef CONFIG_TRACER_MAX_TRACE
7231 	if (tr->max_buffer.buffer)
7232 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7233 	tracing_reset_online_cpus(&tr->max_buffer);
7234 #endif
7235 
7236 	mutex_unlock(&trace_types_lock);
7237 
7238 	return 0;
7239 }
7240 
7241 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7242 				   size_t cnt, loff_t *fpos)
7243 {
7244 	struct seq_file *m = filp->private_data;
7245 	struct trace_array *tr = m->private;
7246 	char buf[64];
7247 	const char *clockstr;
7248 	int ret;
7249 
7250 	if (cnt >= sizeof(buf))
7251 		return -EINVAL;
7252 
7253 	if (copy_from_user(buf, ubuf, cnt))
7254 		return -EFAULT;
7255 
7256 	buf[cnt] = 0;
7257 
7258 	clockstr = strstrip(buf);
7259 
7260 	ret = tracing_set_clock(tr, clockstr);
7261 	if (ret)
7262 		return ret;
7263 
7264 	*fpos += cnt;
7265 
7266 	return cnt;
7267 }
7268 
7269 static int tracing_clock_open(struct inode *inode, struct file *file)
7270 {
7271 	struct trace_array *tr = inode->i_private;
7272 	int ret;
7273 
7274 	ret = tracing_check_open_get_tr(tr);
7275 	if (ret)
7276 		return ret;
7277 
7278 	ret = single_open(file, tracing_clock_show, inode->i_private);
7279 	if (ret < 0)
7280 		trace_array_put(tr);
7281 
7282 	return ret;
7283 }
7284 
7285 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7286 {
7287 	struct trace_array *tr = m->private;
7288 
7289 	mutex_lock(&trace_types_lock);
7290 
7291 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7292 		seq_puts(m, "delta [absolute]\n");
7293 	else
7294 		seq_puts(m, "[delta] absolute\n");
7295 
7296 	mutex_unlock(&trace_types_lock);
7297 
7298 	return 0;
7299 }
7300 
7301 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7302 {
7303 	struct trace_array *tr = inode->i_private;
7304 	int ret;
7305 
7306 	ret = tracing_check_open_get_tr(tr);
7307 	if (ret)
7308 		return ret;
7309 
7310 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7311 	if (ret < 0)
7312 		trace_array_put(tr);
7313 
7314 	return ret;
7315 }
7316 
7317 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7318 {
7319 	if (rbe == this_cpu_read(trace_buffered_event))
7320 		return ring_buffer_time_stamp(buffer);
7321 
7322 	return ring_buffer_event_time_stamp(buffer, rbe);
7323 }
7324 
7325 /*
7326  * Set or disable using the per CPU trace_buffer_event when possible.
7327  */
7328 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7329 {
7330 	int ret = 0;
7331 
7332 	mutex_lock(&trace_types_lock);
7333 
7334 	if (set && tr->no_filter_buffering_ref++)
7335 		goto out;
7336 
7337 	if (!set) {
7338 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7339 			ret = -EINVAL;
7340 			goto out;
7341 		}
7342 
7343 		--tr->no_filter_buffering_ref;
7344 	}
7345  out:
7346 	mutex_unlock(&trace_types_lock);
7347 
7348 	return ret;
7349 }
7350 
7351 struct ftrace_buffer_info {
7352 	struct trace_iterator	iter;
7353 	void			*spare;
7354 	unsigned int		spare_cpu;
7355 	unsigned int		read;
7356 };
7357 
7358 #ifdef CONFIG_TRACER_SNAPSHOT
7359 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7360 {
7361 	struct trace_array *tr = inode->i_private;
7362 	struct trace_iterator *iter;
7363 	struct seq_file *m;
7364 	int ret;
7365 
7366 	ret = tracing_check_open_get_tr(tr);
7367 	if (ret)
7368 		return ret;
7369 
7370 	if (file->f_mode & FMODE_READ) {
7371 		iter = __tracing_open(inode, file, true);
7372 		if (IS_ERR(iter))
7373 			ret = PTR_ERR(iter);
7374 	} else {
7375 		/* Writes still need the seq_file to hold the private data */
7376 		ret = -ENOMEM;
7377 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7378 		if (!m)
7379 			goto out;
7380 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7381 		if (!iter) {
7382 			kfree(m);
7383 			goto out;
7384 		}
7385 		ret = 0;
7386 
7387 		iter->tr = tr;
7388 		iter->array_buffer = &tr->max_buffer;
7389 		iter->cpu_file = tracing_get_cpu(inode);
7390 		m->private = iter;
7391 		file->private_data = m;
7392 	}
7393 out:
7394 	if (ret < 0)
7395 		trace_array_put(tr);
7396 
7397 	return ret;
7398 }
7399 
7400 static ssize_t
7401 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7402 		       loff_t *ppos)
7403 {
7404 	struct seq_file *m = filp->private_data;
7405 	struct trace_iterator *iter = m->private;
7406 	struct trace_array *tr = iter->tr;
7407 	unsigned long val;
7408 	int ret;
7409 
7410 	ret = tracing_update_buffers();
7411 	if (ret < 0)
7412 		return ret;
7413 
7414 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7415 	if (ret)
7416 		return ret;
7417 
7418 	mutex_lock(&trace_types_lock);
7419 
7420 	if (tr->current_trace->use_max_tr) {
7421 		ret = -EBUSY;
7422 		goto out;
7423 	}
7424 
7425 	arch_spin_lock(&tr->max_lock);
7426 	if (tr->cond_snapshot)
7427 		ret = -EBUSY;
7428 	arch_spin_unlock(&tr->max_lock);
7429 	if (ret)
7430 		goto out;
7431 
7432 	switch (val) {
7433 	case 0:
7434 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7435 			ret = -EINVAL;
7436 			break;
7437 		}
7438 		if (tr->allocated_snapshot)
7439 			free_snapshot(tr);
7440 		break;
7441 	case 1:
7442 /* Only allow per-cpu swap if the ring buffer supports it */
7443 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7444 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7445 			ret = -EINVAL;
7446 			break;
7447 		}
7448 #endif
7449 		if (tr->allocated_snapshot)
7450 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7451 					&tr->array_buffer, iter->cpu_file);
7452 		else
7453 			ret = tracing_alloc_snapshot_instance(tr);
7454 		if (ret < 0)
7455 			break;
7456 		local_irq_disable();
7457 		/* Now, we're going to swap */
7458 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7459 			update_max_tr(tr, current, smp_processor_id(), NULL);
7460 		else
7461 			update_max_tr_single(tr, current, iter->cpu_file);
7462 		local_irq_enable();
7463 		break;
7464 	default:
7465 		if (tr->allocated_snapshot) {
7466 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7467 				tracing_reset_online_cpus(&tr->max_buffer);
7468 			else
7469 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7470 		}
7471 		break;
7472 	}
7473 
7474 	if (ret >= 0) {
7475 		*ppos += cnt;
7476 		ret = cnt;
7477 	}
7478 out:
7479 	mutex_unlock(&trace_types_lock);
7480 	return ret;
7481 }
7482 
7483 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7484 {
7485 	struct seq_file *m = file->private_data;
7486 	int ret;
7487 
7488 	ret = tracing_release(inode, file);
7489 
7490 	if (file->f_mode & FMODE_READ)
7491 		return ret;
7492 
7493 	/* If write only, the seq_file is just a stub */
7494 	if (m)
7495 		kfree(m->private);
7496 	kfree(m);
7497 
7498 	return 0;
7499 }
7500 
7501 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7502 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7503 				    size_t count, loff_t *ppos);
7504 static int tracing_buffers_release(struct inode *inode, struct file *file);
7505 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7506 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7507 
7508 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7509 {
7510 	struct ftrace_buffer_info *info;
7511 	int ret;
7512 
7513 	/* The following checks for tracefs lockdown */
7514 	ret = tracing_buffers_open(inode, filp);
7515 	if (ret < 0)
7516 		return ret;
7517 
7518 	info = filp->private_data;
7519 
7520 	if (info->iter.trace->use_max_tr) {
7521 		tracing_buffers_release(inode, filp);
7522 		return -EBUSY;
7523 	}
7524 
7525 	info->iter.snapshot = true;
7526 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7527 
7528 	return ret;
7529 }
7530 
7531 #endif /* CONFIG_TRACER_SNAPSHOT */
7532 
7533 
7534 static const struct file_operations tracing_thresh_fops = {
7535 	.open		= tracing_open_generic,
7536 	.read		= tracing_thresh_read,
7537 	.write		= tracing_thresh_write,
7538 	.llseek		= generic_file_llseek,
7539 };
7540 
7541 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7542 static const struct file_operations tracing_max_lat_fops = {
7543 	.open		= tracing_open_generic,
7544 	.read		= tracing_max_lat_read,
7545 	.write		= tracing_max_lat_write,
7546 	.llseek		= generic_file_llseek,
7547 };
7548 #endif
7549 
7550 static const struct file_operations set_tracer_fops = {
7551 	.open		= tracing_open_generic,
7552 	.read		= tracing_set_trace_read,
7553 	.write		= tracing_set_trace_write,
7554 	.llseek		= generic_file_llseek,
7555 };
7556 
7557 static const struct file_operations tracing_pipe_fops = {
7558 	.open		= tracing_open_pipe,
7559 	.poll		= tracing_poll_pipe,
7560 	.read		= tracing_read_pipe,
7561 	.splice_read	= tracing_splice_read_pipe,
7562 	.release	= tracing_release_pipe,
7563 	.llseek		= no_llseek,
7564 };
7565 
7566 static const struct file_operations tracing_entries_fops = {
7567 	.open		= tracing_open_generic_tr,
7568 	.read		= tracing_entries_read,
7569 	.write		= tracing_entries_write,
7570 	.llseek		= generic_file_llseek,
7571 	.release	= tracing_release_generic_tr,
7572 };
7573 
7574 static const struct file_operations tracing_total_entries_fops = {
7575 	.open		= tracing_open_generic_tr,
7576 	.read		= tracing_total_entries_read,
7577 	.llseek		= generic_file_llseek,
7578 	.release	= tracing_release_generic_tr,
7579 };
7580 
7581 static const struct file_operations tracing_free_buffer_fops = {
7582 	.open		= tracing_open_generic_tr,
7583 	.write		= tracing_free_buffer_write,
7584 	.release	= tracing_free_buffer_release,
7585 };
7586 
7587 static const struct file_operations tracing_mark_fops = {
7588 	.open		= tracing_mark_open,
7589 	.write		= tracing_mark_write,
7590 	.release	= tracing_release_generic_tr,
7591 };
7592 
7593 static const struct file_operations tracing_mark_raw_fops = {
7594 	.open		= tracing_mark_open,
7595 	.write		= tracing_mark_raw_write,
7596 	.release	= tracing_release_generic_tr,
7597 };
7598 
7599 static const struct file_operations trace_clock_fops = {
7600 	.open		= tracing_clock_open,
7601 	.read		= seq_read,
7602 	.llseek		= seq_lseek,
7603 	.release	= tracing_single_release_tr,
7604 	.write		= tracing_clock_write,
7605 };
7606 
7607 static const struct file_operations trace_time_stamp_mode_fops = {
7608 	.open		= tracing_time_stamp_mode_open,
7609 	.read		= seq_read,
7610 	.llseek		= seq_lseek,
7611 	.release	= tracing_single_release_tr,
7612 };
7613 
7614 #ifdef CONFIG_TRACER_SNAPSHOT
7615 static const struct file_operations snapshot_fops = {
7616 	.open		= tracing_snapshot_open,
7617 	.read		= seq_read,
7618 	.write		= tracing_snapshot_write,
7619 	.llseek		= tracing_lseek,
7620 	.release	= tracing_snapshot_release,
7621 };
7622 
7623 static const struct file_operations snapshot_raw_fops = {
7624 	.open		= snapshot_raw_open,
7625 	.read		= tracing_buffers_read,
7626 	.release	= tracing_buffers_release,
7627 	.splice_read	= tracing_buffers_splice_read,
7628 	.llseek		= no_llseek,
7629 };
7630 
7631 #endif /* CONFIG_TRACER_SNAPSHOT */
7632 
7633 /*
7634  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7635  * @filp: The active open file structure
7636  * @ubuf: The userspace provided buffer to read value into
7637  * @cnt: The maximum number of bytes to read
7638  * @ppos: The current "file" position
7639  *
7640  * This function implements the write interface for a struct trace_min_max_param.
7641  * The filp->private_data must point to a trace_min_max_param structure that
7642  * defines where to write the value, the min and the max acceptable values,
7643  * and a lock to protect the write.
7644  */
7645 static ssize_t
7646 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7647 {
7648 	struct trace_min_max_param *param = filp->private_data;
7649 	u64 val;
7650 	int err;
7651 
7652 	if (!param)
7653 		return -EFAULT;
7654 
7655 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7656 	if (err)
7657 		return err;
7658 
7659 	if (param->lock)
7660 		mutex_lock(param->lock);
7661 
7662 	if (param->min && val < *param->min)
7663 		err = -EINVAL;
7664 
7665 	if (param->max && val > *param->max)
7666 		err = -EINVAL;
7667 
7668 	if (!err)
7669 		*param->val = val;
7670 
7671 	if (param->lock)
7672 		mutex_unlock(param->lock);
7673 
7674 	if (err)
7675 		return err;
7676 
7677 	return cnt;
7678 }
7679 
7680 /*
7681  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7682  * @filp: The active open file structure
7683  * @ubuf: The userspace provided buffer to read value into
7684  * @cnt: The maximum number of bytes to read
7685  * @ppos: The current "file" position
7686  *
7687  * This function implements the read interface for a struct trace_min_max_param.
7688  * The filp->private_data must point to a trace_min_max_param struct with valid
7689  * data.
7690  */
7691 static ssize_t
7692 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7693 {
7694 	struct trace_min_max_param *param = filp->private_data;
7695 	char buf[U64_STR_SIZE];
7696 	int len;
7697 	u64 val;
7698 
7699 	if (!param)
7700 		return -EFAULT;
7701 
7702 	val = *param->val;
7703 
7704 	if (cnt > sizeof(buf))
7705 		cnt = sizeof(buf);
7706 
7707 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7708 
7709 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7710 }
7711 
7712 const struct file_operations trace_min_max_fops = {
7713 	.open		= tracing_open_generic,
7714 	.read		= trace_min_max_read,
7715 	.write		= trace_min_max_write,
7716 };
7717 
7718 #define TRACING_LOG_ERRS_MAX	8
7719 #define TRACING_LOG_LOC_MAX	128
7720 
7721 #define CMD_PREFIX "  Command: "
7722 
7723 struct err_info {
7724 	const char	**errs;	/* ptr to loc-specific array of err strings */
7725 	u8		type;	/* index into errs -> specific err string */
7726 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7727 	u64		ts;
7728 };
7729 
7730 struct tracing_log_err {
7731 	struct list_head	list;
7732 	struct err_info		info;
7733 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7734 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7735 };
7736 
7737 static DEFINE_MUTEX(tracing_err_log_lock);
7738 
7739 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7740 {
7741 	struct tracing_log_err *err;
7742 
7743 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7744 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7745 		if (!err)
7746 			err = ERR_PTR(-ENOMEM);
7747 		else
7748 			tr->n_err_log_entries++;
7749 
7750 		return err;
7751 	}
7752 
7753 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7754 	list_del(&err->list);
7755 
7756 	return err;
7757 }
7758 
7759 /**
7760  * err_pos - find the position of a string within a command for error careting
7761  * @cmd: The tracing command that caused the error
7762  * @str: The string to position the caret at within @cmd
7763  *
7764  * Finds the position of the first occurrence of @str within @cmd.  The
7765  * return value can be passed to tracing_log_err() for caret placement
7766  * within @cmd.
7767  *
7768  * Returns the index within @cmd of the first occurrence of @str or 0
7769  * if @str was not found.
7770  */
7771 unsigned int err_pos(char *cmd, const char *str)
7772 {
7773 	char *found;
7774 
7775 	if (WARN_ON(!strlen(cmd)))
7776 		return 0;
7777 
7778 	found = strstr(cmd, str);
7779 	if (found)
7780 		return found - cmd;
7781 
7782 	return 0;
7783 }
7784 
7785 /**
7786  * tracing_log_err - write an error to the tracing error log
7787  * @tr: The associated trace array for the error (NULL for top level array)
7788  * @loc: A string describing where the error occurred
7789  * @cmd: The tracing command that caused the error
7790  * @errs: The array of loc-specific static error strings
7791  * @type: The index into errs[], which produces the specific static err string
7792  * @pos: The position the caret should be placed in the cmd
7793  *
7794  * Writes an error into tracing/error_log of the form:
7795  *
7796  * <loc>: error: <text>
7797  *   Command: <cmd>
7798  *              ^
7799  *
7800  * tracing/error_log is a small log file containing the last
7801  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7802  * unless there has been a tracing error, and the error log can be
7803  * cleared and have its memory freed by writing the empty string in
7804  * truncation mode to it i.e. echo > tracing/error_log.
7805  *
7806  * NOTE: the @errs array along with the @type param are used to
7807  * produce a static error string - this string is not copied and saved
7808  * when the error is logged - only a pointer to it is saved.  See
7809  * existing callers for examples of how static strings are typically
7810  * defined for use with tracing_log_err().
7811  */
7812 void tracing_log_err(struct trace_array *tr,
7813 		     const char *loc, const char *cmd,
7814 		     const char **errs, u8 type, u8 pos)
7815 {
7816 	struct tracing_log_err *err;
7817 
7818 	if (!tr)
7819 		tr = &global_trace;
7820 
7821 	mutex_lock(&tracing_err_log_lock);
7822 	err = get_tracing_log_err(tr);
7823 	if (PTR_ERR(err) == -ENOMEM) {
7824 		mutex_unlock(&tracing_err_log_lock);
7825 		return;
7826 	}
7827 
7828 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7829 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7830 
7831 	err->info.errs = errs;
7832 	err->info.type = type;
7833 	err->info.pos = pos;
7834 	err->info.ts = local_clock();
7835 
7836 	list_add_tail(&err->list, &tr->err_log);
7837 	mutex_unlock(&tracing_err_log_lock);
7838 }
7839 
7840 static void clear_tracing_err_log(struct trace_array *tr)
7841 {
7842 	struct tracing_log_err *err, *next;
7843 
7844 	mutex_lock(&tracing_err_log_lock);
7845 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7846 		list_del(&err->list);
7847 		kfree(err);
7848 	}
7849 
7850 	tr->n_err_log_entries = 0;
7851 	mutex_unlock(&tracing_err_log_lock);
7852 }
7853 
7854 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7855 {
7856 	struct trace_array *tr = m->private;
7857 
7858 	mutex_lock(&tracing_err_log_lock);
7859 
7860 	return seq_list_start(&tr->err_log, *pos);
7861 }
7862 
7863 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7864 {
7865 	struct trace_array *tr = m->private;
7866 
7867 	return seq_list_next(v, &tr->err_log, pos);
7868 }
7869 
7870 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7871 {
7872 	mutex_unlock(&tracing_err_log_lock);
7873 }
7874 
7875 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7876 {
7877 	u8 i;
7878 
7879 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7880 		seq_putc(m, ' ');
7881 	for (i = 0; i < pos; i++)
7882 		seq_putc(m, ' ');
7883 	seq_puts(m, "^\n");
7884 }
7885 
7886 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7887 {
7888 	struct tracing_log_err *err = v;
7889 
7890 	if (err) {
7891 		const char *err_text = err->info.errs[err->info.type];
7892 		u64 sec = err->info.ts;
7893 		u32 nsec;
7894 
7895 		nsec = do_div(sec, NSEC_PER_SEC);
7896 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7897 			   err->loc, err_text);
7898 		seq_printf(m, "%s", err->cmd);
7899 		tracing_err_log_show_pos(m, err->info.pos);
7900 	}
7901 
7902 	return 0;
7903 }
7904 
7905 static const struct seq_operations tracing_err_log_seq_ops = {
7906 	.start  = tracing_err_log_seq_start,
7907 	.next   = tracing_err_log_seq_next,
7908 	.stop   = tracing_err_log_seq_stop,
7909 	.show   = tracing_err_log_seq_show
7910 };
7911 
7912 static int tracing_err_log_open(struct inode *inode, struct file *file)
7913 {
7914 	struct trace_array *tr = inode->i_private;
7915 	int ret = 0;
7916 
7917 	ret = tracing_check_open_get_tr(tr);
7918 	if (ret)
7919 		return ret;
7920 
7921 	/* If this file was opened for write, then erase contents */
7922 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7923 		clear_tracing_err_log(tr);
7924 
7925 	if (file->f_mode & FMODE_READ) {
7926 		ret = seq_open(file, &tracing_err_log_seq_ops);
7927 		if (!ret) {
7928 			struct seq_file *m = file->private_data;
7929 			m->private = tr;
7930 		} else {
7931 			trace_array_put(tr);
7932 		}
7933 	}
7934 	return ret;
7935 }
7936 
7937 static ssize_t tracing_err_log_write(struct file *file,
7938 				     const char __user *buffer,
7939 				     size_t count, loff_t *ppos)
7940 {
7941 	return count;
7942 }
7943 
7944 static int tracing_err_log_release(struct inode *inode, struct file *file)
7945 {
7946 	struct trace_array *tr = inode->i_private;
7947 
7948 	trace_array_put(tr);
7949 
7950 	if (file->f_mode & FMODE_READ)
7951 		seq_release(inode, file);
7952 
7953 	return 0;
7954 }
7955 
7956 static const struct file_operations tracing_err_log_fops = {
7957 	.open           = tracing_err_log_open,
7958 	.write		= tracing_err_log_write,
7959 	.read           = seq_read,
7960 	.llseek         = seq_lseek,
7961 	.release        = tracing_err_log_release,
7962 };
7963 
7964 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7965 {
7966 	struct trace_array *tr = inode->i_private;
7967 	struct ftrace_buffer_info *info;
7968 	int ret;
7969 
7970 	ret = tracing_check_open_get_tr(tr);
7971 	if (ret)
7972 		return ret;
7973 
7974 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
7975 	if (!info) {
7976 		trace_array_put(tr);
7977 		return -ENOMEM;
7978 	}
7979 
7980 	mutex_lock(&trace_types_lock);
7981 
7982 	info->iter.tr		= tr;
7983 	info->iter.cpu_file	= tracing_get_cpu(inode);
7984 	info->iter.trace	= tr->current_trace;
7985 	info->iter.array_buffer = &tr->array_buffer;
7986 	info->spare		= NULL;
7987 	/* Force reading ring buffer for first read */
7988 	info->read		= (unsigned int)-1;
7989 
7990 	filp->private_data = info;
7991 
7992 	tr->trace_ref++;
7993 
7994 	mutex_unlock(&trace_types_lock);
7995 
7996 	ret = nonseekable_open(inode, filp);
7997 	if (ret < 0)
7998 		trace_array_put(tr);
7999 
8000 	return ret;
8001 }
8002 
8003 static __poll_t
8004 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8005 {
8006 	struct ftrace_buffer_info *info = filp->private_data;
8007 	struct trace_iterator *iter = &info->iter;
8008 
8009 	return trace_poll(iter, filp, poll_table);
8010 }
8011 
8012 static ssize_t
8013 tracing_buffers_read(struct file *filp, char __user *ubuf,
8014 		     size_t count, loff_t *ppos)
8015 {
8016 	struct ftrace_buffer_info *info = filp->private_data;
8017 	struct trace_iterator *iter = &info->iter;
8018 	ssize_t ret = 0;
8019 	ssize_t size;
8020 
8021 	if (!count)
8022 		return 0;
8023 
8024 #ifdef CONFIG_TRACER_MAX_TRACE
8025 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8026 		return -EBUSY;
8027 #endif
8028 
8029 	if (!info->spare) {
8030 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8031 							  iter->cpu_file);
8032 		if (IS_ERR(info->spare)) {
8033 			ret = PTR_ERR(info->spare);
8034 			info->spare = NULL;
8035 		} else {
8036 			info->spare_cpu = iter->cpu_file;
8037 		}
8038 	}
8039 	if (!info->spare)
8040 		return ret;
8041 
8042 	/* Do we have previous read data to read? */
8043 	if (info->read < PAGE_SIZE)
8044 		goto read;
8045 
8046  again:
8047 	trace_access_lock(iter->cpu_file);
8048 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8049 				    &info->spare,
8050 				    count,
8051 				    iter->cpu_file, 0);
8052 	trace_access_unlock(iter->cpu_file);
8053 
8054 	if (ret < 0) {
8055 		if (trace_empty(iter)) {
8056 			if ((filp->f_flags & O_NONBLOCK))
8057 				return -EAGAIN;
8058 
8059 			ret = wait_on_pipe(iter, 0);
8060 			if (ret)
8061 				return ret;
8062 
8063 			goto again;
8064 		}
8065 		return 0;
8066 	}
8067 
8068 	info->read = 0;
8069  read:
8070 	size = PAGE_SIZE - info->read;
8071 	if (size > count)
8072 		size = count;
8073 
8074 	ret = copy_to_user(ubuf, info->spare + info->read, size);
8075 	if (ret == size)
8076 		return -EFAULT;
8077 
8078 	size -= ret;
8079 
8080 	*ppos += size;
8081 	info->read += size;
8082 
8083 	return size;
8084 }
8085 
8086 static int tracing_buffers_release(struct inode *inode, struct file *file)
8087 {
8088 	struct ftrace_buffer_info *info = file->private_data;
8089 	struct trace_iterator *iter = &info->iter;
8090 
8091 	mutex_lock(&trace_types_lock);
8092 
8093 	iter->tr->trace_ref--;
8094 
8095 	__trace_array_put(iter->tr);
8096 
8097 	if (info->spare)
8098 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8099 					   info->spare_cpu, info->spare);
8100 	kvfree(info);
8101 
8102 	mutex_unlock(&trace_types_lock);
8103 
8104 	return 0;
8105 }
8106 
8107 struct buffer_ref {
8108 	struct trace_buffer	*buffer;
8109 	void			*page;
8110 	int			cpu;
8111 	refcount_t		refcount;
8112 };
8113 
8114 static void buffer_ref_release(struct buffer_ref *ref)
8115 {
8116 	if (!refcount_dec_and_test(&ref->refcount))
8117 		return;
8118 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8119 	kfree(ref);
8120 }
8121 
8122 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8123 				    struct pipe_buffer *buf)
8124 {
8125 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8126 
8127 	buffer_ref_release(ref);
8128 	buf->private = 0;
8129 }
8130 
8131 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8132 				struct pipe_buffer *buf)
8133 {
8134 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8135 
8136 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8137 		return false;
8138 
8139 	refcount_inc(&ref->refcount);
8140 	return true;
8141 }
8142 
8143 /* Pipe buffer operations for a buffer. */
8144 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8145 	.release		= buffer_pipe_buf_release,
8146 	.get			= buffer_pipe_buf_get,
8147 };
8148 
8149 /*
8150  * Callback from splice_to_pipe(), if we need to release some pages
8151  * at the end of the spd in case we error'ed out in filling the pipe.
8152  */
8153 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8154 {
8155 	struct buffer_ref *ref =
8156 		(struct buffer_ref *)spd->partial[i].private;
8157 
8158 	buffer_ref_release(ref);
8159 	spd->partial[i].private = 0;
8160 }
8161 
8162 static ssize_t
8163 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8164 			    struct pipe_inode_info *pipe, size_t len,
8165 			    unsigned int flags)
8166 {
8167 	struct ftrace_buffer_info *info = file->private_data;
8168 	struct trace_iterator *iter = &info->iter;
8169 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8170 	struct page *pages_def[PIPE_DEF_BUFFERS];
8171 	struct splice_pipe_desc spd = {
8172 		.pages		= pages_def,
8173 		.partial	= partial_def,
8174 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8175 		.ops		= &buffer_pipe_buf_ops,
8176 		.spd_release	= buffer_spd_release,
8177 	};
8178 	struct buffer_ref *ref;
8179 	int entries, i;
8180 	ssize_t ret = 0;
8181 
8182 #ifdef CONFIG_TRACER_MAX_TRACE
8183 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8184 		return -EBUSY;
8185 #endif
8186 
8187 	if (*ppos & (PAGE_SIZE - 1))
8188 		return -EINVAL;
8189 
8190 	if (len & (PAGE_SIZE - 1)) {
8191 		if (len < PAGE_SIZE)
8192 			return -EINVAL;
8193 		len &= PAGE_MASK;
8194 	}
8195 
8196 	if (splice_grow_spd(pipe, &spd))
8197 		return -ENOMEM;
8198 
8199  again:
8200 	trace_access_lock(iter->cpu_file);
8201 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8202 
8203 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8204 		struct page *page;
8205 		int r;
8206 
8207 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8208 		if (!ref) {
8209 			ret = -ENOMEM;
8210 			break;
8211 		}
8212 
8213 		refcount_set(&ref->refcount, 1);
8214 		ref->buffer = iter->array_buffer->buffer;
8215 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8216 		if (IS_ERR(ref->page)) {
8217 			ret = PTR_ERR(ref->page);
8218 			ref->page = NULL;
8219 			kfree(ref);
8220 			break;
8221 		}
8222 		ref->cpu = iter->cpu_file;
8223 
8224 		r = ring_buffer_read_page(ref->buffer, &ref->page,
8225 					  len, iter->cpu_file, 1);
8226 		if (r < 0) {
8227 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8228 						   ref->page);
8229 			kfree(ref);
8230 			break;
8231 		}
8232 
8233 		page = virt_to_page(ref->page);
8234 
8235 		spd.pages[i] = page;
8236 		spd.partial[i].len = PAGE_SIZE;
8237 		spd.partial[i].offset = 0;
8238 		spd.partial[i].private = (unsigned long)ref;
8239 		spd.nr_pages++;
8240 		*ppos += PAGE_SIZE;
8241 
8242 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8243 	}
8244 
8245 	trace_access_unlock(iter->cpu_file);
8246 	spd.nr_pages = i;
8247 
8248 	/* did we read anything? */
8249 	if (!spd.nr_pages) {
8250 		if (ret)
8251 			goto out;
8252 
8253 		ret = -EAGAIN;
8254 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8255 			goto out;
8256 
8257 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8258 		if (ret)
8259 			goto out;
8260 
8261 		goto again;
8262 	}
8263 
8264 	ret = splice_to_pipe(pipe, &spd);
8265 out:
8266 	splice_shrink_spd(&spd);
8267 
8268 	return ret;
8269 }
8270 
8271 static const struct file_operations tracing_buffers_fops = {
8272 	.open		= tracing_buffers_open,
8273 	.read		= tracing_buffers_read,
8274 	.poll		= tracing_buffers_poll,
8275 	.release	= tracing_buffers_release,
8276 	.splice_read	= tracing_buffers_splice_read,
8277 	.llseek		= no_llseek,
8278 };
8279 
8280 static ssize_t
8281 tracing_stats_read(struct file *filp, char __user *ubuf,
8282 		   size_t count, loff_t *ppos)
8283 {
8284 	struct inode *inode = file_inode(filp);
8285 	struct trace_array *tr = inode->i_private;
8286 	struct array_buffer *trace_buf = &tr->array_buffer;
8287 	int cpu = tracing_get_cpu(inode);
8288 	struct trace_seq *s;
8289 	unsigned long cnt;
8290 	unsigned long long t;
8291 	unsigned long usec_rem;
8292 
8293 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8294 	if (!s)
8295 		return -ENOMEM;
8296 
8297 	trace_seq_init(s);
8298 
8299 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8300 	trace_seq_printf(s, "entries: %ld\n", cnt);
8301 
8302 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8303 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8304 
8305 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8306 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8307 
8308 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8309 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8310 
8311 	if (trace_clocks[tr->clock_id].in_ns) {
8312 		/* local or global for trace_clock */
8313 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8314 		usec_rem = do_div(t, USEC_PER_SEC);
8315 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8316 								t, usec_rem);
8317 
8318 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8319 		usec_rem = do_div(t, USEC_PER_SEC);
8320 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8321 	} else {
8322 		/* counter or tsc mode for trace_clock */
8323 		trace_seq_printf(s, "oldest event ts: %llu\n",
8324 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8325 
8326 		trace_seq_printf(s, "now ts: %llu\n",
8327 				ring_buffer_time_stamp(trace_buf->buffer));
8328 	}
8329 
8330 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8331 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8332 
8333 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8334 	trace_seq_printf(s, "read events: %ld\n", cnt);
8335 
8336 	count = simple_read_from_buffer(ubuf, count, ppos,
8337 					s->buffer, trace_seq_used(s));
8338 
8339 	kfree(s);
8340 
8341 	return count;
8342 }
8343 
8344 static const struct file_operations tracing_stats_fops = {
8345 	.open		= tracing_open_generic_tr,
8346 	.read		= tracing_stats_read,
8347 	.llseek		= generic_file_llseek,
8348 	.release	= tracing_release_generic_tr,
8349 };
8350 
8351 #ifdef CONFIG_DYNAMIC_FTRACE
8352 
8353 static ssize_t
8354 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8355 		  size_t cnt, loff_t *ppos)
8356 {
8357 	ssize_t ret;
8358 	char *buf;
8359 	int r;
8360 
8361 	/* 256 should be plenty to hold the amount needed */
8362 	buf = kmalloc(256, GFP_KERNEL);
8363 	if (!buf)
8364 		return -ENOMEM;
8365 
8366 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8367 		      ftrace_update_tot_cnt,
8368 		      ftrace_number_of_pages,
8369 		      ftrace_number_of_groups);
8370 
8371 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8372 	kfree(buf);
8373 	return ret;
8374 }
8375 
8376 static const struct file_operations tracing_dyn_info_fops = {
8377 	.open		= tracing_open_generic,
8378 	.read		= tracing_read_dyn_info,
8379 	.llseek		= generic_file_llseek,
8380 };
8381 #endif /* CONFIG_DYNAMIC_FTRACE */
8382 
8383 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8384 static void
8385 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8386 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8387 		void *data)
8388 {
8389 	tracing_snapshot_instance(tr);
8390 }
8391 
8392 static void
8393 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8394 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8395 		      void *data)
8396 {
8397 	struct ftrace_func_mapper *mapper = data;
8398 	long *count = NULL;
8399 
8400 	if (mapper)
8401 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8402 
8403 	if (count) {
8404 
8405 		if (*count <= 0)
8406 			return;
8407 
8408 		(*count)--;
8409 	}
8410 
8411 	tracing_snapshot_instance(tr);
8412 }
8413 
8414 static int
8415 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8416 		      struct ftrace_probe_ops *ops, void *data)
8417 {
8418 	struct ftrace_func_mapper *mapper = data;
8419 	long *count = NULL;
8420 
8421 	seq_printf(m, "%ps:", (void *)ip);
8422 
8423 	seq_puts(m, "snapshot");
8424 
8425 	if (mapper)
8426 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8427 
8428 	if (count)
8429 		seq_printf(m, ":count=%ld\n", *count);
8430 	else
8431 		seq_puts(m, ":unlimited\n");
8432 
8433 	return 0;
8434 }
8435 
8436 static int
8437 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8438 		     unsigned long ip, void *init_data, void **data)
8439 {
8440 	struct ftrace_func_mapper *mapper = *data;
8441 
8442 	if (!mapper) {
8443 		mapper = allocate_ftrace_func_mapper();
8444 		if (!mapper)
8445 			return -ENOMEM;
8446 		*data = mapper;
8447 	}
8448 
8449 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8450 }
8451 
8452 static void
8453 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8454 		     unsigned long ip, void *data)
8455 {
8456 	struct ftrace_func_mapper *mapper = data;
8457 
8458 	if (!ip) {
8459 		if (!mapper)
8460 			return;
8461 		free_ftrace_func_mapper(mapper, NULL);
8462 		return;
8463 	}
8464 
8465 	ftrace_func_mapper_remove_ip(mapper, ip);
8466 }
8467 
8468 static struct ftrace_probe_ops snapshot_probe_ops = {
8469 	.func			= ftrace_snapshot,
8470 	.print			= ftrace_snapshot_print,
8471 };
8472 
8473 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8474 	.func			= ftrace_count_snapshot,
8475 	.print			= ftrace_snapshot_print,
8476 	.init			= ftrace_snapshot_init,
8477 	.free			= ftrace_snapshot_free,
8478 };
8479 
8480 static int
8481 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8482 			       char *glob, char *cmd, char *param, int enable)
8483 {
8484 	struct ftrace_probe_ops *ops;
8485 	void *count = (void *)-1;
8486 	char *number;
8487 	int ret;
8488 
8489 	if (!tr)
8490 		return -ENODEV;
8491 
8492 	/* hash funcs only work with set_ftrace_filter */
8493 	if (!enable)
8494 		return -EINVAL;
8495 
8496 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8497 
8498 	if (glob[0] == '!')
8499 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8500 
8501 	if (!param)
8502 		goto out_reg;
8503 
8504 	number = strsep(&param, ":");
8505 
8506 	if (!strlen(number))
8507 		goto out_reg;
8508 
8509 	/*
8510 	 * We use the callback data field (which is a pointer)
8511 	 * as our counter.
8512 	 */
8513 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8514 	if (ret)
8515 		return ret;
8516 
8517  out_reg:
8518 	ret = tracing_alloc_snapshot_instance(tr);
8519 	if (ret < 0)
8520 		goto out;
8521 
8522 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8523 
8524  out:
8525 	return ret < 0 ? ret : 0;
8526 }
8527 
8528 static struct ftrace_func_command ftrace_snapshot_cmd = {
8529 	.name			= "snapshot",
8530 	.func			= ftrace_trace_snapshot_callback,
8531 };
8532 
8533 static __init int register_snapshot_cmd(void)
8534 {
8535 	return register_ftrace_command(&ftrace_snapshot_cmd);
8536 }
8537 #else
8538 static inline __init int register_snapshot_cmd(void) { return 0; }
8539 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8540 
8541 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8542 {
8543 	if (WARN_ON(!tr->dir))
8544 		return ERR_PTR(-ENODEV);
8545 
8546 	/* Top directory uses NULL as the parent */
8547 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8548 		return NULL;
8549 
8550 	/* All sub buffers have a descriptor */
8551 	return tr->dir;
8552 }
8553 
8554 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8555 {
8556 	struct dentry *d_tracer;
8557 
8558 	if (tr->percpu_dir)
8559 		return tr->percpu_dir;
8560 
8561 	d_tracer = tracing_get_dentry(tr);
8562 	if (IS_ERR(d_tracer))
8563 		return NULL;
8564 
8565 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8566 
8567 	MEM_FAIL(!tr->percpu_dir,
8568 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8569 
8570 	return tr->percpu_dir;
8571 }
8572 
8573 static struct dentry *
8574 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8575 		      void *data, long cpu, const struct file_operations *fops)
8576 {
8577 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8578 
8579 	if (ret) /* See tracing_get_cpu() */
8580 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8581 	return ret;
8582 }
8583 
8584 static void
8585 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8586 {
8587 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8588 	struct dentry *d_cpu;
8589 	char cpu_dir[30]; /* 30 characters should be more than enough */
8590 
8591 	if (!d_percpu)
8592 		return;
8593 
8594 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8595 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8596 	if (!d_cpu) {
8597 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8598 		return;
8599 	}
8600 
8601 	/* per cpu trace_pipe */
8602 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8603 				tr, cpu, &tracing_pipe_fops);
8604 
8605 	/* per cpu trace */
8606 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8607 				tr, cpu, &tracing_fops);
8608 
8609 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8610 				tr, cpu, &tracing_buffers_fops);
8611 
8612 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8613 				tr, cpu, &tracing_stats_fops);
8614 
8615 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8616 				tr, cpu, &tracing_entries_fops);
8617 
8618 #ifdef CONFIG_TRACER_SNAPSHOT
8619 	trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8620 				tr, cpu, &snapshot_fops);
8621 
8622 	trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8623 				tr, cpu, &snapshot_raw_fops);
8624 #endif
8625 }
8626 
8627 #ifdef CONFIG_FTRACE_SELFTEST
8628 /* Let selftest have access to static functions in this file */
8629 #include "trace_selftest.c"
8630 #endif
8631 
8632 static ssize_t
8633 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8634 			loff_t *ppos)
8635 {
8636 	struct trace_option_dentry *topt = filp->private_data;
8637 	char *buf;
8638 
8639 	if (topt->flags->val & topt->opt->bit)
8640 		buf = "1\n";
8641 	else
8642 		buf = "0\n";
8643 
8644 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8645 }
8646 
8647 static ssize_t
8648 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8649 			 loff_t *ppos)
8650 {
8651 	struct trace_option_dentry *topt = filp->private_data;
8652 	unsigned long val;
8653 	int ret;
8654 
8655 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8656 	if (ret)
8657 		return ret;
8658 
8659 	if (val != 0 && val != 1)
8660 		return -EINVAL;
8661 
8662 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8663 		mutex_lock(&trace_types_lock);
8664 		ret = __set_tracer_option(topt->tr, topt->flags,
8665 					  topt->opt, !val);
8666 		mutex_unlock(&trace_types_lock);
8667 		if (ret)
8668 			return ret;
8669 	}
8670 
8671 	*ppos += cnt;
8672 
8673 	return cnt;
8674 }
8675 
8676 
8677 static const struct file_operations trace_options_fops = {
8678 	.open = tracing_open_generic,
8679 	.read = trace_options_read,
8680 	.write = trace_options_write,
8681 	.llseek	= generic_file_llseek,
8682 };
8683 
8684 /*
8685  * In order to pass in both the trace_array descriptor as well as the index
8686  * to the flag that the trace option file represents, the trace_array
8687  * has a character array of trace_flags_index[], which holds the index
8688  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8689  * The address of this character array is passed to the flag option file
8690  * read/write callbacks.
8691  *
8692  * In order to extract both the index and the trace_array descriptor,
8693  * get_tr_index() uses the following algorithm.
8694  *
8695  *   idx = *ptr;
8696  *
8697  * As the pointer itself contains the address of the index (remember
8698  * index[1] == 1).
8699  *
8700  * Then to get the trace_array descriptor, by subtracting that index
8701  * from the ptr, we get to the start of the index itself.
8702  *
8703  *   ptr - idx == &index[0]
8704  *
8705  * Then a simple container_of() from that pointer gets us to the
8706  * trace_array descriptor.
8707  */
8708 static void get_tr_index(void *data, struct trace_array **ptr,
8709 			 unsigned int *pindex)
8710 {
8711 	*pindex = *(unsigned char *)data;
8712 
8713 	*ptr = container_of(data - *pindex, struct trace_array,
8714 			    trace_flags_index);
8715 }
8716 
8717 static ssize_t
8718 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8719 			loff_t *ppos)
8720 {
8721 	void *tr_index = filp->private_data;
8722 	struct trace_array *tr;
8723 	unsigned int index;
8724 	char *buf;
8725 
8726 	get_tr_index(tr_index, &tr, &index);
8727 
8728 	if (tr->trace_flags & (1 << index))
8729 		buf = "1\n";
8730 	else
8731 		buf = "0\n";
8732 
8733 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8734 }
8735 
8736 static ssize_t
8737 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8738 			 loff_t *ppos)
8739 {
8740 	void *tr_index = filp->private_data;
8741 	struct trace_array *tr;
8742 	unsigned int index;
8743 	unsigned long val;
8744 	int ret;
8745 
8746 	get_tr_index(tr_index, &tr, &index);
8747 
8748 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8749 	if (ret)
8750 		return ret;
8751 
8752 	if (val != 0 && val != 1)
8753 		return -EINVAL;
8754 
8755 	mutex_lock(&event_mutex);
8756 	mutex_lock(&trace_types_lock);
8757 	ret = set_tracer_flag(tr, 1 << index, val);
8758 	mutex_unlock(&trace_types_lock);
8759 	mutex_unlock(&event_mutex);
8760 
8761 	if (ret < 0)
8762 		return ret;
8763 
8764 	*ppos += cnt;
8765 
8766 	return cnt;
8767 }
8768 
8769 static const struct file_operations trace_options_core_fops = {
8770 	.open = tracing_open_generic,
8771 	.read = trace_options_core_read,
8772 	.write = trace_options_core_write,
8773 	.llseek = generic_file_llseek,
8774 };
8775 
8776 struct dentry *trace_create_file(const char *name,
8777 				 umode_t mode,
8778 				 struct dentry *parent,
8779 				 void *data,
8780 				 const struct file_operations *fops)
8781 {
8782 	struct dentry *ret;
8783 
8784 	ret = tracefs_create_file(name, mode, parent, data, fops);
8785 	if (!ret)
8786 		pr_warn("Could not create tracefs '%s' entry\n", name);
8787 
8788 	return ret;
8789 }
8790 
8791 
8792 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8793 {
8794 	struct dentry *d_tracer;
8795 
8796 	if (tr->options)
8797 		return tr->options;
8798 
8799 	d_tracer = tracing_get_dentry(tr);
8800 	if (IS_ERR(d_tracer))
8801 		return NULL;
8802 
8803 	tr->options = tracefs_create_dir("options", d_tracer);
8804 	if (!tr->options) {
8805 		pr_warn("Could not create tracefs directory 'options'\n");
8806 		return NULL;
8807 	}
8808 
8809 	return tr->options;
8810 }
8811 
8812 static void
8813 create_trace_option_file(struct trace_array *tr,
8814 			 struct trace_option_dentry *topt,
8815 			 struct tracer_flags *flags,
8816 			 struct tracer_opt *opt)
8817 {
8818 	struct dentry *t_options;
8819 
8820 	t_options = trace_options_init_dentry(tr);
8821 	if (!t_options)
8822 		return;
8823 
8824 	topt->flags = flags;
8825 	topt->opt = opt;
8826 	topt->tr = tr;
8827 
8828 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8829 					t_options, topt, &trace_options_fops);
8830 
8831 }
8832 
8833 static void
8834 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8835 {
8836 	struct trace_option_dentry *topts;
8837 	struct trace_options *tr_topts;
8838 	struct tracer_flags *flags;
8839 	struct tracer_opt *opts;
8840 	int cnt;
8841 	int i;
8842 
8843 	if (!tracer)
8844 		return;
8845 
8846 	flags = tracer->flags;
8847 
8848 	if (!flags || !flags->opts)
8849 		return;
8850 
8851 	/*
8852 	 * If this is an instance, only create flags for tracers
8853 	 * the instance may have.
8854 	 */
8855 	if (!trace_ok_for_array(tracer, tr))
8856 		return;
8857 
8858 	for (i = 0; i < tr->nr_topts; i++) {
8859 		/* Make sure there's no duplicate flags. */
8860 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8861 			return;
8862 	}
8863 
8864 	opts = flags->opts;
8865 
8866 	for (cnt = 0; opts[cnt].name; cnt++)
8867 		;
8868 
8869 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8870 	if (!topts)
8871 		return;
8872 
8873 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8874 			    GFP_KERNEL);
8875 	if (!tr_topts) {
8876 		kfree(topts);
8877 		return;
8878 	}
8879 
8880 	tr->topts = tr_topts;
8881 	tr->topts[tr->nr_topts].tracer = tracer;
8882 	tr->topts[tr->nr_topts].topts = topts;
8883 	tr->nr_topts++;
8884 
8885 	for (cnt = 0; opts[cnt].name; cnt++) {
8886 		create_trace_option_file(tr, &topts[cnt], flags,
8887 					 &opts[cnt]);
8888 		MEM_FAIL(topts[cnt].entry == NULL,
8889 			  "Failed to create trace option: %s",
8890 			  opts[cnt].name);
8891 	}
8892 }
8893 
8894 static struct dentry *
8895 create_trace_option_core_file(struct trace_array *tr,
8896 			      const char *option, long index)
8897 {
8898 	struct dentry *t_options;
8899 
8900 	t_options = trace_options_init_dentry(tr);
8901 	if (!t_options)
8902 		return NULL;
8903 
8904 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
8905 				 (void *)&tr->trace_flags_index[index],
8906 				 &trace_options_core_fops);
8907 }
8908 
8909 static void create_trace_options_dir(struct trace_array *tr)
8910 {
8911 	struct dentry *t_options;
8912 	bool top_level = tr == &global_trace;
8913 	int i;
8914 
8915 	t_options = trace_options_init_dentry(tr);
8916 	if (!t_options)
8917 		return;
8918 
8919 	for (i = 0; trace_options[i]; i++) {
8920 		if (top_level ||
8921 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8922 			create_trace_option_core_file(tr, trace_options[i], i);
8923 	}
8924 }
8925 
8926 static ssize_t
8927 rb_simple_read(struct file *filp, char __user *ubuf,
8928 	       size_t cnt, loff_t *ppos)
8929 {
8930 	struct trace_array *tr = filp->private_data;
8931 	char buf[64];
8932 	int r;
8933 
8934 	r = tracer_tracing_is_on(tr);
8935 	r = sprintf(buf, "%d\n", r);
8936 
8937 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8938 }
8939 
8940 static ssize_t
8941 rb_simple_write(struct file *filp, const char __user *ubuf,
8942 		size_t cnt, loff_t *ppos)
8943 {
8944 	struct trace_array *tr = filp->private_data;
8945 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8946 	unsigned long val;
8947 	int ret;
8948 
8949 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8950 	if (ret)
8951 		return ret;
8952 
8953 	if (buffer) {
8954 		mutex_lock(&trace_types_lock);
8955 		if (!!val == tracer_tracing_is_on(tr)) {
8956 			val = 0; /* do nothing */
8957 		} else if (val) {
8958 			tracer_tracing_on(tr);
8959 			if (tr->current_trace->start)
8960 				tr->current_trace->start(tr);
8961 		} else {
8962 			tracer_tracing_off(tr);
8963 			if (tr->current_trace->stop)
8964 				tr->current_trace->stop(tr);
8965 		}
8966 		mutex_unlock(&trace_types_lock);
8967 	}
8968 
8969 	(*ppos)++;
8970 
8971 	return cnt;
8972 }
8973 
8974 static const struct file_operations rb_simple_fops = {
8975 	.open		= tracing_open_generic_tr,
8976 	.read		= rb_simple_read,
8977 	.write		= rb_simple_write,
8978 	.release	= tracing_release_generic_tr,
8979 	.llseek		= default_llseek,
8980 };
8981 
8982 static ssize_t
8983 buffer_percent_read(struct file *filp, char __user *ubuf,
8984 		    size_t cnt, loff_t *ppos)
8985 {
8986 	struct trace_array *tr = filp->private_data;
8987 	char buf[64];
8988 	int r;
8989 
8990 	r = tr->buffer_percent;
8991 	r = sprintf(buf, "%d\n", r);
8992 
8993 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8994 }
8995 
8996 static ssize_t
8997 buffer_percent_write(struct file *filp, const char __user *ubuf,
8998 		     size_t cnt, loff_t *ppos)
8999 {
9000 	struct trace_array *tr = filp->private_data;
9001 	unsigned long val;
9002 	int ret;
9003 
9004 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9005 	if (ret)
9006 		return ret;
9007 
9008 	if (val > 100)
9009 		return -EINVAL;
9010 
9011 	if (!val)
9012 		val = 1;
9013 
9014 	tr->buffer_percent = val;
9015 
9016 	(*ppos)++;
9017 
9018 	return cnt;
9019 }
9020 
9021 static const struct file_operations buffer_percent_fops = {
9022 	.open		= tracing_open_generic_tr,
9023 	.read		= buffer_percent_read,
9024 	.write		= buffer_percent_write,
9025 	.release	= tracing_release_generic_tr,
9026 	.llseek		= default_llseek,
9027 };
9028 
9029 static struct dentry *trace_instance_dir;
9030 
9031 static void
9032 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9033 
9034 static int
9035 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9036 {
9037 	enum ring_buffer_flags rb_flags;
9038 
9039 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9040 
9041 	buf->tr = tr;
9042 
9043 	buf->buffer = ring_buffer_alloc(size, rb_flags);
9044 	if (!buf->buffer)
9045 		return -ENOMEM;
9046 
9047 	buf->data = alloc_percpu(struct trace_array_cpu);
9048 	if (!buf->data) {
9049 		ring_buffer_free(buf->buffer);
9050 		buf->buffer = NULL;
9051 		return -ENOMEM;
9052 	}
9053 
9054 	/* Allocate the first page for all buffers */
9055 	set_buffer_entries(&tr->array_buffer,
9056 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9057 
9058 	return 0;
9059 }
9060 
9061 static int allocate_trace_buffers(struct trace_array *tr, int size)
9062 {
9063 	int ret;
9064 
9065 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9066 	if (ret)
9067 		return ret;
9068 
9069 #ifdef CONFIG_TRACER_MAX_TRACE
9070 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9071 				    allocate_snapshot ? size : 1);
9072 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9073 		ring_buffer_free(tr->array_buffer.buffer);
9074 		tr->array_buffer.buffer = NULL;
9075 		free_percpu(tr->array_buffer.data);
9076 		tr->array_buffer.data = NULL;
9077 		return -ENOMEM;
9078 	}
9079 	tr->allocated_snapshot = allocate_snapshot;
9080 
9081 	/*
9082 	 * Only the top level trace array gets its snapshot allocated
9083 	 * from the kernel command line.
9084 	 */
9085 	allocate_snapshot = false;
9086 #endif
9087 
9088 	return 0;
9089 }
9090 
9091 static void free_trace_buffer(struct array_buffer *buf)
9092 {
9093 	if (buf->buffer) {
9094 		ring_buffer_free(buf->buffer);
9095 		buf->buffer = NULL;
9096 		free_percpu(buf->data);
9097 		buf->data = NULL;
9098 	}
9099 }
9100 
9101 static void free_trace_buffers(struct trace_array *tr)
9102 {
9103 	if (!tr)
9104 		return;
9105 
9106 	free_trace_buffer(&tr->array_buffer);
9107 
9108 #ifdef CONFIG_TRACER_MAX_TRACE
9109 	free_trace_buffer(&tr->max_buffer);
9110 #endif
9111 }
9112 
9113 static void init_trace_flags_index(struct trace_array *tr)
9114 {
9115 	int i;
9116 
9117 	/* Used by the trace options files */
9118 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9119 		tr->trace_flags_index[i] = i;
9120 }
9121 
9122 static void __update_tracer_options(struct trace_array *tr)
9123 {
9124 	struct tracer *t;
9125 
9126 	for (t = trace_types; t; t = t->next)
9127 		add_tracer_options(tr, t);
9128 }
9129 
9130 static void update_tracer_options(struct trace_array *tr)
9131 {
9132 	mutex_lock(&trace_types_lock);
9133 	__update_tracer_options(tr);
9134 	mutex_unlock(&trace_types_lock);
9135 }
9136 
9137 /* Must have trace_types_lock held */
9138 struct trace_array *trace_array_find(const char *instance)
9139 {
9140 	struct trace_array *tr, *found = NULL;
9141 
9142 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9143 		if (tr->name && strcmp(tr->name, instance) == 0) {
9144 			found = tr;
9145 			break;
9146 		}
9147 	}
9148 
9149 	return found;
9150 }
9151 
9152 struct trace_array *trace_array_find_get(const char *instance)
9153 {
9154 	struct trace_array *tr;
9155 
9156 	mutex_lock(&trace_types_lock);
9157 	tr = trace_array_find(instance);
9158 	if (tr)
9159 		tr->ref++;
9160 	mutex_unlock(&trace_types_lock);
9161 
9162 	return tr;
9163 }
9164 
9165 static int trace_array_create_dir(struct trace_array *tr)
9166 {
9167 	int ret;
9168 
9169 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9170 	if (!tr->dir)
9171 		return -EINVAL;
9172 
9173 	ret = event_trace_add_tracer(tr->dir, tr);
9174 	if (ret) {
9175 		tracefs_remove(tr->dir);
9176 		return ret;
9177 	}
9178 
9179 	init_tracer_tracefs(tr, tr->dir);
9180 	__update_tracer_options(tr);
9181 
9182 	return ret;
9183 }
9184 
9185 static struct trace_array *trace_array_create(const char *name)
9186 {
9187 	struct trace_array *tr;
9188 	int ret;
9189 
9190 	ret = -ENOMEM;
9191 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9192 	if (!tr)
9193 		return ERR_PTR(ret);
9194 
9195 	tr->name = kstrdup(name, GFP_KERNEL);
9196 	if (!tr->name)
9197 		goto out_free_tr;
9198 
9199 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9200 		goto out_free_tr;
9201 
9202 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9203 
9204 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9205 
9206 	raw_spin_lock_init(&tr->start_lock);
9207 
9208 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9209 
9210 	tr->current_trace = &nop_trace;
9211 
9212 	INIT_LIST_HEAD(&tr->systems);
9213 	INIT_LIST_HEAD(&tr->events);
9214 	INIT_LIST_HEAD(&tr->hist_vars);
9215 	INIT_LIST_HEAD(&tr->err_log);
9216 
9217 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9218 		goto out_free_tr;
9219 
9220 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9221 		goto out_free_tr;
9222 
9223 	ftrace_init_trace_array(tr);
9224 
9225 	init_trace_flags_index(tr);
9226 
9227 	if (trace_instance_dir) {
9228 		ret = trace_array_create_dir(tr);
9229 		if (ret)
9230 			goto out_free_tr;
9231 	} else
9232 		__trace_early_add_events(tr);
9233 
9234 	list_add(&tr->list, &ftrace_trace_arrays);
9235 
9236 	tr->ref++;
9237 
9238 	return tr;
9239 
9240  out_free_tr:
9241 	ftrace_free_ftrace_ops(tr);
9242 	free_trace_buffers(tr);
9243 	free_cpumask_var(tr->tracing_cpumask);
9244 	kfree(tr->name);
9245 	kfree(tr);
9246 
9247 	return ERR_PTR(ret);
9248 }
9249 
9250 static int instance_mkdir(const char *name)
9251 {
9252 	struct trace_array *tr;
9253 	int ret;
9254 
9255 	mutex_lock(&event_mutex);
9256 	mutex_lock(&trace_types_lock);
9257 
9258 	ret = -EEXIST;
9259 	if (trace_array_find(name))
9260 		goto out_unlock;
9261 
9262 	tr = trace_array_create(name);
9263 
9264 	ret = PTR_ERR_OR_ZERO(tr);
9265 
9266 out_unlock:
9267 	mutex_unlock(&trace_types_lock);
9268 	mutex_unlock(&event_mutex);
9269 	return ret;
9270 }
9271 
9272 /**
9273  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9274  * @name: The name of the trace array to be looked up/created.
9275  *
9276  * Returns pointer to trace array with given name.
9277  * NULL, if it cannot be created.
9278  *
9279  * NOTE: This function increments the reference counter associated with the
9280  * trace array returned. This makes sure it cannot be freed while in use.
9281  * Use trace_array_put() once the trace array is no longer needed.
9282  * If the trace_array is to be freed, trace_array_destroy() needs to
9283  * be called after the trace_array_put(), or simply let user space delete
9284  * it from the tracefs instances directory. But until the
9285  * trace_array_put() is called, user space can not delete it.
9286  *
9287  */
9288 struct trace_array *trace_array_get_by_name(const char *name)
9289 {
9290 	struct trace_array *tr;
9291 
9292 	mutex_lock(&event_mutex);
9293 	mutex_lock(&trace_types_lock);
9294 
9295 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9296 		if (tr->name && strcmp(tr->name, name) == 0)
9297 			goto out_unlock;
9298 	}
9299 
9300 	tr = trace_array_create(name);
9301 
9302 	if (IS_ERR(tr))
9303 		tr = NULL;
9304 out_unlock:
9305 	if (tr)
9306 		tr->ref++;
9307 
9308 	mutex_unlock(&trace_types_lock);
9309 	mutex_unlock(&event_mutex);
9310 	return tr;
9311 }
9312 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9313 
9314 static int __remove_instance(struct trace_array *tr)
9315 {
9316 	int i;
9317 
9318 	/* Reference counter for a newly created trace array = 1. */
9319 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9320 		return -EBUSY;
9321 
9322 	list_del(&tr->list);
9323 
9324 	/* Disable all the flags that were enabled coming in */
9325 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9326 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9327 			set_tracer_flag(tr, 1 << i, 0);
9328 	}
9329 
9330 	tracing_set_nop(tr);
9331 	clear_ftrace_function_probes(tr);
9332 	event_trace_del_tracer(tr);
9333 	ftrace_clear_pids(tr);
9334 	ftrace_destroy_function_files(tr);
9335 	tracefs_remove(tr->dir);
9336 	free_percpu(tr->last_func_repeats);
9337 	free_trace_buffers(tr);
9338 
9339 	for (i = 0; i < tr->nr_topts; i++) {
9340 		kfree(tr->topts[i].topts);
9341 	}
9342 	kfree(tr->topts);
9343 
9344 	free_cpumask_var(tr->tracing_cpumask);
9345 	kfree(tr->name);
9346 	kfree(tr);
9347 
9348 	return 0;
9349 }
9350 
9351 int trace_array_destroy(struct trace_array *this_tr)
9352 {
9353 	struct trace_array *tr;
9354 	int ret;
9355 
9356 	if (!this_tr)
9357 		return -EINVAL;
9358 
9359 	mutex_lock(&event_mutex);
9360 	mutex_lock(&trace_types_lock);
9361 
9362 	ret = -ENODEV;
9363 
9364 	/* Making sure trace array exists before destroying it. */
9365 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9366 		if (tr == this_tr) {
9367 			ret = __remove_instance(tr);
9368 			break;
9369 		}
9370 	}
9371 
9372 	mutex_unlock(&trace_types_lock);
9373 	mutex_unlock(&event_mutex);
9374 
9375 	return ret;
9376 }
9377 EXPORT_SYMBOL_GPL(trace_array_destroy);
9378 
9379 static int instance_rmdir(const char *name)
9380 {
9381 	struct trace_array *tr;
9382 	int ret;
9383 
9384 	mutex_lock(&event_mutex);
9385 	mutex_lock(&trace_types_lock);
9386 
9387 	ret = -ENODEV;
9388 	tr = trace_array_find(name);
9389 	if (tr)
9390 		ret = __remove_instance(tr);
9391 
9392 	mutex_unlock(&trace_types_lock);
9393 	mutex_unlock(&event_mutex);
9394 
9395 	return ret;
9396 }
9397 
9398 static __init void create_trace_instances(struct dentry *d_tracer)
9399 {
9400 	struct trace_array *tr;
9401 
9402 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9403 							 instance_mkdir,
9404 							 instance_rmdir);
9405 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9406 		return;
9407 
9408 	mutex_lock(&event_mutex);
9409 	mutex_lock(&trace_types_lock);
9410 
9411 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9412 		if (!tr->name)
9413 			continue;
9414 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9415 			     "Failed to create instance directory\n"))
9416 			break;
9417 	}
9418 
9419 	mutex_unlock(&trace_types_lock);
9420 	mutex_unlock(&event_mutex);
9421 }
9422 
9423 static void
9424 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9425 {
9426 	struct trace_event_file *file;
9427 	int cpu;
9428 
9429 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9430 			tr, &show_traces_fops);
9431 
9432 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9433 			tr, &set_tracer_fops);
9434 
9435 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9436 			  tr, &tracing_cpumask_fops);
9437 
9438 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9439 			  tr, &tracing_iter_fops);
9440 
9441 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9442 			  tr, &tracing_fops);
9443 
9444 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9445 			  tr, &tracing_pipe_fops);
9446 
9447 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9448 			  tr, &tracing_entries_fops);
9449 
9450 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9451 			  tr, &tracing_total_entries_fops);
9452 
9453 	trace_create_file("free_buffer", 0200, d_tracer,
9454 			  tr, &tracing_free_buffer_fops);
9455 
9456 	trace_create_file("trace_marker", 0220, d_tracer,
9457 			  tr, &tracing_mark_fops);
9458 
9459 	file = __find_event_file(tr, "ftrace", "print");
9460 	if (file && file->dir)
9461 		trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9462 				  file, &event_trigger_fops);
9463 	tr->trace_marker_file = file;
9464 
9465 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9466 			  tr, &tracing_mark_raw_fops);
9467 
9468 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9469 			  &trace_clock_fops);
9470 
9471 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9472 			  tr, &rb_simple_fops);
9473 
9474 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9475 			  &trace_time_stamp_mode_fops);
9476 
9477 	tr->buffer_percent = 50;
9478 
9479 	trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
9480 			tr, &buffer_percent_fops);
9481 
9482 	create_trace_options_dir(tr);
9483 
9484 	trace_create_maxlat_file(tr, d_tracer);
9485 
9486 	if (ftrace_create_function_files(tr, d_tracer))
9487 		MEM_FAIL(1, "Could not allocate function filter files");
9488 
9489 #ifdef CONFIG_TRACER_SNAPSHOT
9490 	trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9491 			  tr, &snapshot_fops);
9492 #endif
9493 
9494 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9495 			  tr, &tracing_err_log_fops);
9496 
9497 	for_each_tracing_cpu(cpu)
9498 		tracing_init_tracefs_percpu(tr, cpu);
9499 
9500 	ftrace_init_tracefs(tr, d_tracer);
9501 }
9502 
9503 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9504 {
9505 	struct vfsmount *mnt;
9506 	struct file_system_type *type;
9507 
9508 	/*
9509 	 * To maintain backward compatibility for tools that mount
9510 	 * debugfs to get to the tracing facility, tracefs is automatically
9511 	 * mounted to the debugfs/tracing directory.
9512 	 */
9513 	type = get_fs_type("tracefs");
9514 	if (!type)
9515 		return NULL;
9516 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9517 	put_filesystem(type);
9518 	if (IS_ERR(mnt))
9519 		return NULL;
9520 	mntget(mnt);
9521 
9522 	return mnt;
9523 }
9524 
9525 /**
9526  * tracing_init_dentry - initialize top level trace array
9527  *
9528  * This is called when creating files or directories in the tracing
9529  * directory. It is called via fs_initcall() by any of the boot up code
9530  * and expects to return the dentry of the top level tracing directory.
9531  */
9532 int tracing_init_dentry(void)
9533 {
9534 	struct trace_array *tr = &global_trace;
9535 
9536 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9537 		pr_warn("Tracing disabled due to lockdown\n");
9538 		return -EPERM;
9539 	}
9540 
9541 	/* The top level trace array uses  NULL as parent */
9542 	if (tr->dir)
9543 		return 0;
9544 
9545 	if (WARN_ON(!tracefs_initialized()))
9546 		return -ENODEV;
9547 
9548 	/*
9549 	 * As there may still be users that expect the tracing
9550 	 * files to exist in debugfs/tracing, we must automount
9551 	 * the tracefs file system there, so older tools still
9552 	 * work with the newer kernel.
9553 	 */
9554 	tr->dir = debugfs_create_automount("tracing", NULL,
9555 					   trace_automount, NULL);
9556 
9557 	return 0;
9558 }
9559 
9560 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9561 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9562 
9563 static struct workqueue_struct *eval_map_wq __initdata;
9564 static struct work_struct eval_map_work __initdata;
9565 
9566 static void __init eval_map_work_func(struct work_struct *work)
9567 {
9568 	int len;
9569 
9570 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9571 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9572 }
9573 
9574 static int __init trace_eval_init(void)
9575 {
9576 	INIT_WORK(&eval_map_work, eval_map_work_func);
9577 
9578 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9579 	if (!eval_map_wq) {
9580 		pr_err("Unable to allocate eval_map_wq\n");
9581 		/* Do work here */
9582 		eval_map_work_func(&eval_map_work);
9583 		return -ENOMEM;
9584 	}
9585 
9586 	queue_work(eval_map_wq, &eval_map_work);
9587 	return 0;
9588 }
9589 
9590 static int __init trace_eval_sync(void)
9591 {
9592 	/* Make sure the eval map updates are finished */
9593 	if (eval_map_wq)
9594 		destroy_workqueue(eval_map_wq);
9595 	return 0;
9596 }
9597 
9598 late_initcall_sync(trace_eval_sync);
9599 
9600 
9601 #ifdef CONFIG_MODULES
9602 static void trace_module_add_evals(struct module *mod)
9603 {
9604 	if (!mod->num_trace_evals)
9605 		return;
9606 
9607 	/*
9608 	 * Modules with bad taint do not have events created, do
9609 	 * not bother with enums either.
9610 	 */
9611 	if (trace_module_has_bad_taint(mod))
9612 		return;
9613 
9614 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9615 }
9616 
9617 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9618 static void trace_module_remove_evals(struct module *mod)
9619 {
9620 	union trace_eval_map_item *map;
9621 	union trace_eval_map_item **last = &trace_eval_maps;
9622 
9623 	if (!mod->num_trace_evals)
9624 		return;
9625 
9626 	mutex_lock(&trace_eval_mutex);
9627 
9628 	map = trace_eval_maps;
9629 
9630 	while (map) {
9631 		if (map->head.mod == mod)
9632 			break;
9633 		map = trace_eval_jmp_to_tail(map);
9634 		last = &map->tail.next;
9635 		map = map->tail.next;
9636 	}
9637 	if (!map)
9638 		goto out;
9639 
9640 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9641 	kfree(map);
9642  out:
9643 	mutex_unlock(&trace_eval_mutex);
9644 }
9645 #else
9646 static inline void trace_module_remove_evals(struct module *mod) { }
9647 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9648 
9649 static int trace_module_notify(struct notifier_block *self,
9650 			       unsigned long val, void *data)
9651 {
9652 	struct module *mod = data;
9653 
9654 	switch (val) {
9655 	case MODULE_STATE_COMING:
9656 		trace_module_add_evals(mod);
9657 		break;
9658 	case MODULE_STATE_GOING:
9659 		trace_module_remove_evals(mod);
9660 		break;
9661 	}
9662 
9663 	return NOTIFY_OK;
9664 }
9665 
9666 static struct notifier_block trace_module_nb = {
9667 	.notifier_call = trace_module_notify,
9668 	.priority = 0,
9669 };
9670 #endif /* CONFIG_MODULES */
9671 
9672 static __init int tracer_init_tracefs(void)
9673 {
9674 	int ret;
9675 
9676 	trace_access_lock_init();
9677 
9678 	ret = tracing_init_dentry();
9679 	if (ret)
9680 		return 0;
9681 
9682 	event_trace_init();
9683 
9684 	init_tracer_tracefs(&global_trace, NULL);
9685 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9686 
9687 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9688 			&global_trace, &tracing_thresh_fops);
9689 
9690 	trace_create_file("README", TRACE_MODE_READ, NULL,
9691 			NULL, &tracing_readme_fops);
9692 
9693 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9694 			NULL, &tracing_saved_cmdlines_fops);
9695 
9696 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9697 			  NULL, &tracing_saved_cmdlines_size_fops);
9698 
9699 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9700 			NULL, &tracing_saved_tgids_fops);
9701 
9702 	trace_eval_init();
9703 
9704 	trace_create_eval_file(NULL);
9705 
9706 #ifdef CONFIG_MODULES
9707 	register_module_notifier(&trace_module_nb);
9708 #endif
9709 
9710 #ifdef CONFIG_DYNAMIC_FTRACE
9711 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9712 			NULL, &tracing_dyn_info_fops);
9713 #endif
9714 
9715 	create_trace_instances(NULL);
9716 
9717 	update_tracer_options(&global_trace);
9718 
9719 	return 0;
9720 }
9721 
9722 fs_initcall(tracer_init_tracefs);
9723 
9724 static int trace_panic_handler(struct notifier_block *this,
9725 			       unsigned long event, void *unused)
9726 {
9727 	if (ftrace_dump_on_oops)
9728 		ftrace_dump(ftrace_dump_on_oops);
9729 	return NOTIFY_OK;
9730 }
9731 
9732 static struct notifier_block trace_panic_notifier = {
9733 	.notifier_call  = trace_panic_handler,
9734 	.next           = NULL,
9735 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9736 };
9737 
9738 static int trace_die_handler(struct notifier_block *self,
9739 			     unsigned long val,
9740 			     void *data)
9741 {
9742 	switch (val) {
9743 	case DIE_OOPS:
9744 		if (ftrace_dump_on_oops)
9745 			ftrace_dump(ftrace_dump_on_oops);
9746 		break;
9747 	default:
9748 		break;
9749 	}
9750 	return NOTIFY_OK;
9751 }
9752 
9753 static struct notifier_block trace_die_notifier = {
9754 	.notifier_call = trace_die_handler,
9755 	.priority = 200
9756 };
9757 
9758 /*
9759  * printk is set to max of 1024, we really don't need it that big.
9760  * Nothing should be printing 1000 characters anyway.
9761  */
9762 #define TRACE_MAX_PRINT		1000
9763 
9764 /*
9765  * Define here KERN_TRACE so that we have one place to modify
9766  * it if we decide to change what log level the ftrace dump
9767  * should be at.
9768  */
9769 #define KERN_TRACE		KERN_EMERG
9770 
9771 void
9772 trace_printk_seq(struct trace_seq *s)
9773 {
9774 	/* Probably should print a warning here. */
9775 	if (s->seq.len >= TRACE_MAX_PRINT)
9776 		s->seq.len = TRACE_MAX_PRINT;
9777 
9778 	/*
9779 	 * More paranoid code. Although the buffer size is set to
9780 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9781 	 * an extra layer of protection.
9782 	 */
9783 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9784 		s->seq.len = s->seq.size - 1;
9785 
9786 	/* should be zero ended, but we are paranoid. */
9787 	s->buffer[s->seq.len] = 0;
9788 
9789 	printk(KERN_TRACE "%s", s->buffer);
9790 
9791 	trace_seq_init(s);
9792 }
9793 
9794 void trace_init_global_iter(struct trace_iterator *iter)
9795 {
9796 	iter->tr = &global_trace;
9797 	iter->trace = iter->tr->current_trace;
9798 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9799 	iter->array_buffer = &global_trace.array_buffer;
9800 
9801 	if (iter->trace && iter->trace->open)
9802 		iter->trace->open(iter);
9803 
9804 	/* Annotate start of buffers if we had overruns */
9805 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9806 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9807 
9808 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9809 	if (trace_clocks[iter->tr->clock_id].in_ns)
9810 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9811 }
9812 
9813 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9814 {
9815 	/* use static because iter can be a bit big for the stack */
9816 	static struct trace_iterator iter;
9817 	static atomic_t dump_running;
9818 	struct trace_array *tr = &global_trace;
9819 	unsigned int old_userobj;
9820 	unsigned long flags;
9821 	int cnt = 0, cpu;
9822 
9823 	/* Only allow one dump user at a time. */
9824 	if (atomic_inc_return(&dump_running) != 1) {
9825 		atomic_dec(&dump_running);
9826 		return;
9827 	}
9828 
9829 	/*
9830 	 * Always turn off tracing when we dump.
9831 	 * We don't need to show trace output of what happens
9832 	 * between multiple crashes.
9833 	 *
9834 	 * If the user does a sysrq-z, then they can re-enable
9835 	 * tracing with echo 1 > tracing_on.
9836 	 */
9837 	tracing_off();
9838 
9839 	local_irq_save(flags);
9840 
9841 	/* Simulate the iterator */
9842 	trace_init_global_iter(&iter);
9843 	/* Can not use kmalloc for iter.temp and iter.fmt */
9844 	iter.temp = static_temp_buf;
9845 	iter.temp_size = STATIC_TEMP_BUF_SIZE;
9846 	iter.fmt = static_fmt_buf;
9847 	iter.fmt_size = STATIC_FMT_BUF_SIZE;
9848 
9849 	for_each_tracing_cpu(cpu) {
9850 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9851 	}
9852 
9853 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9854 
9855 	/* don't look at user memory in panic mode */
9856 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9857 
9858 	switch (oops_dump_mode) {
9859 	case DUMP_ALL:
9860 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9861 		break;
9862 	case DUMP_ORIG:
9863 		iter.cpu_file = raw_smp_processor_id();
9864 		break;
9865 	case DUMP_NONE:
9866 		goto out_enable;
9867 	default:
9868 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9869 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9870 	}
9871 
9872 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9873 
9874 	/* Did function tracer already get disabled? */
9875 	if (ftrace_is_dead()) {
9876 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9877 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9878 	}
9879 
9880 	/*
9881 	 * We need to stop all tracing on all CPUS to read
9882 	 * the next buffer. This is a bit expensive, but is
9883 	 * not done often. We fill all what we can read,
9884 	 * and then release the locks again.
9885 	 */
9886 
9887 	while (!trace_empty(&iter)) {
9888 
9889 		if (!cnt)
9890 			printk(KERN_TRACE "---------------------------------\n");
9891 
9892 		cnt++;
9893 
9894 		trace_iterator_reset(&iter);
9895 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9896 
9897 		if (trace_find_next_entry_inc(&iter) != NULL) {
9898 			int ret;
9899 
9900 			ret = print_trace_line(&iter);
9901 			if (ret != TRACE_TYPE_NO_CONSUME)
9902 				trace_consume(&iter);
9903 		}
9904 		touch_nmi_watchdog();
9905 
9906 		trace_printk_seq(&iter.seq);
9907 	}
9908 
9909 	if (!cnt)
9910 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9911 	else
9912 		printk(KERN_TRACE "---------------------------------\n");
9913 
9914  out_enable:
9915 	tr->trace_flags |= old_userobj;
9916 
9917 	for_each_tracing_cpu(cpu) {
9918 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9919 	}
9920 	atomic_dec(&dump_running);
9921 	local_irq_restore(flags);
9922 }
9923 EXPORT_SYMBOL_GPL(ftrace_dump);
9924 
9925 #define WRITE_BUFSIZE  4096
9926 
9927 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9928 				size_t count, loff_t *ppos,
9929 				int (*createfn)(const char *))
9930 {
9931 	char *kbuf, *buf, *tmp;
9932 	int ret = 0;
9933 	size_t done = 0;
9934 	size_t size;
9935 
9936 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9937 	if (!kbuf)
9938 		return -ENOMEM;
9939 
9940 	while (done < count) {
9941 		size = count - done;
9942 
9943 		if (size >= WRITE_BUFSIZE)
9944 			size = WRITE_BUFSIZE - 1;
9945 
9946 		if (copy_from_user(kbuf, buffer + done, size)) {
9947 			ret = -EFAULT;
9948 			goto out;
9949 		}
9950 		kbuf[size] = '\0';
9951 		buf = kbuf;
9952 		do {
9953 			tmp = strchr(buf, '\n');
9954 			if (tmp) {
9955 				*tmp = '\0';
9956 				size = tmp - buf + 1;
9957 			} else {
9958 				size = strlen(buf);
9959 				if (done + size < count) {
9960 					if (buf != kbuf)
9961 						break;
9962 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9963 					pr_warn("Line length is too long: Should be less than %d\n",
9964 						WRITE_BUFSIZE - 2);
9965 					ret = -EINVAL;
9966 					goto out;
9967 				}
9968 			}
9969 			done += size;
9970 
9971 			/* Remove comments */
9972 			tmp = strchr(buf, '#');
9973 
9974 			if (tmp)
9975 				*tmp = '\0';
9976 
9977 			ret = createfn(buf);
9978 			if (ret)
9979 				goto out;
9980 			buf += size;
9981 
9982 		} while (done < count);
9983 	}
9984 	ret = done;
9985 
9986 out:
9987 	kfree(kbuf);
9988 
9989 	return ret;
9990 }
9991 
9992 __init static int tracer_alloc_buffers(void)
9993 {
9994 	int ring_buf_size;
9995 	int ret = -ENOMEM;
9996 
9997 
9998 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9999 		pr_warn("Tracing disabled due to lockdown\n");
10000 		return -EPERM;
10001 	}
10002 
10003 	/*
10004 	 * Make sure we don't accidentally add more trace options
10005 	 * than we have bits for.
10006 	 */
10007 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10008 
10009 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10010 		goto out;
10011 
10012 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10013 		goto out_free_buffer_mask;
10014 
10015 	/* Only allocate trace_printk buffers if a trace_printk exists */
10016 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10017 		/* Must be called before global_trace.buffer is allocated */
10018 		trace_printk_init_buffers();
10019 
10020 	/* To save memory, keep the ring buffer size to its minimum */
10021 	if (ring_buffer_expanded)
10022 		ring_buf_size = trace_buf_size;
10023 	else
10024 		ring_buf_size = 1;
10025 
10026 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10027 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10028 
10029 	raw_spin_lock_init(&global_trace.start_lock);
10030 
10031 	/*
10032 	 * The prepare callbacks allocates some memory for the ring buffer. We
10033 	 * don't free the buffer if the CPU goes down. If we were to free
10034 	 * the buffer, then the user would lose any trace that was in the
10035 	 * buffer. The memory will be removed once the "instance" is removed.
10036 	 */
10037 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10038 				      "trace/RB:preapre", trace_rb_cpu_prepare,
10039 				      NULL);
10040 	if (ret < 0)
10041 		goto out_free_cpumask;
10042 	/* Used for event triggers */
10043 	ret = -ENOMEM;
10044 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10045 	if (!temp_buffer)
10046 		goto out_rm_hp_state;
10047 
10048 	if (trace_create_savedcmd() < 0)
10049 		goto out_free_temp_buffer;
10050 
10051 	/* TODO: make the number of buffers hot pluggable with CPUS */
10052 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10053 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10054 		goto out_free_savedcmd;
10055 	}
10056 
10057 	if (global_trace.buffer_disabled)
10058 		tracing_off();
10059 
10060 	if (trace_boot_clock) {
10061 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10062 		if (ret < 0)
10063 			pr_warn("Trace clock %s not defined, going back to default\n",
10064 				trace_boot_clock);
10065 	}
10066 
10067 	/*
10068 	 * register_tracer() might reference current_trace, so it
10069 	 * needs to be set before we register anything. This is
10070 	 * just a bootstrap of current_trace anyway.
10071 	 */
10072 	global_trace.current_trace = &nop_trace;
10073 
10074 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10075 
10076 	ftrace_init_global_array_ops(&global_trace);
10077 
10078 	init_trace_flags_index(&global_trace);
10079 
10080 	register_tracer(&nop_trace);
10081 
10082 	/* Function tracing may start here (via kernel command line) */
10083 	init_function_trace();
10084 
10085 	/* All seems OK, enable tracing */
10086 	tracing_disabled = 0;
10087 
10088 	atomic_notifier_chain_register(&panic_notifier_list,
10089 				       &trace_panic_notifier);
10090 
10091 	register_die_notifier(&trace_die_notifier);
10092 
10093 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10094 
10095 	INIT_LIST_HEAD(&global_trace.systems);
10096 	INIT_LIST_HEAD(&global_trace.events);
10097 	INIT_LIST_HEAD(&global_trace.hist_vars);
10098 	INIT_LIST_HEAD(&global_trace.err_log);
10099 	list_add(&global_trace.list, &ftrace_trace_arrays);
10100 
10101 	apply_trace_boot_options();
10102 
10103 	register_snapshot_cmd();
10104 
10105 	test_can_verify();
10106 
10107 	return 0;
10108 
10109 out_free_savedcmd:
10110 	free_saved_cmdlines_buffer(savedcmd);
10111 out_free_temp_buffer:
10112 	ring_buffer_free(temp_buffer);
10113 out_rm_hp_state:
10114 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10115 out_free_cpumask:
10116 	free_cpumask_var(global_trace.tracing_cpumask);
10117 out_free_buffer_mask:
10118 	free_cpumask_var(tracing_buffer_mask);
10119 out:
10120 	return ret;
10121 }
10122 
10123 void __init early_trace_init(void)
10124 {
10125 	if (tracepoint_printk) {
10126 		tracepoint_print_iter =
10127 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10128 		if (MEM_FAIL(!tracepoint_print_iter,
10129 			     "Failed to allocate trace iterator\n"))
10130 			tracepoint_printk = 0;
10131 		else
10132 			static_key_enable(&tracepoint_printk_key.key);
10133 	}
10134 	tracer_alloc_buffers();
10135 }
10136 
10137 void __init trace_init(void)
10138 {
10139 	trace_event_init();
10140 }
10141 
10142 __init static void clear_boot_tracer(void)
10143 {
10144 	/*
10145 	 * The default tracer at boot buffer is an init section.
10146 	 * This function is called in lateinit. If we did not
10147 	 * find the boot tracer, then clear it out, to prevent
10148 	 * later registration from accessing the buffer that is
10149 	 * about to be freed.
10150 	 */
10151 	if (!default_bootup_tracer)
10152 		return;
10153 
10154 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10155 	       default_bootup_tracer);
10156 	default_bootup_tracer = NULL;
10157 }
10158 
10159 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10160 __init static void tracing_set_default_clock(void)
10161 {
10162 	/* sched_clock_stable() is determined in late_initcall */
10163 	if (!trace_boot_clock && !sched_clock_stable()) {
10164 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10165 			pr_warn("Can not set tracing clock due to lockdown\n");
10166 			return;
10167 		}
10168 
10169 		printk(KERN_WARNING
10170 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10171 		       "If you want to keep using the local clock, then add:\n"
10172 		       "  \"trace_clock=local\"\n"
10173 		       "on the kernel command line\n");
10174 		tracing_set_clock(&global_trace, "global");
10175 	}
10176 }
10177 #else
10178 static inline void tracing_set_default_clock(void) { }
10179 #endif
10180 
10181 __init static int late_trace_init(void)
10182 {
10183 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10184 		static_key_disable(&tracepoint_printk_key.key);
10185 		tracepoint_printk = 0;
10186 	}
10187 
10188 	tracing_set_default_clock();
10189 	clear_boot_tracer();
10190 	return 0;
10191 }
10192 
10193 late_initcall_sync(late_trace_init);
10194