xref: /openbmc/linux/kernel/trace/trace.c (revision 2c64e9cb)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/seq_file.h>
21 #include <linux/notifier.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/poll.h>
42 #include <linux/nmi.h>
43 #include <linux/fs.h>
44 #include <linux/trace.h>
45 #include <linux/sched/clock.h>
46 #include <linux/sched/rt.h>
47 
48 #include "trace.h"
49 #include "trace_output.h"
50 
51 /*
52  * On boot up, the ring buffer is set to the minimum size, so that
53  * we do not waste memory on systems that are not using tracing.
54  */
55 bool ring_buffer_expanded;
56 
57 /*
58  * We need to change this state when a selftest is running.
59  * A selftest will lurk into the ring-buffer to count the
60  * entries inserted during the selftest although some concurrent
61  * insertions into the ring-buffer such as trace_printk could occurred
62  * at the same time, giving false positive or negative results.
63  */
64 static bool __read_mostly tracing_selftest_running;
65 
66 /*
67  * If a tracer is running, we do not want to run SELFTEST.
68  */
69 bool __read_mostly tracing_selftest_disabled;
70 
71 /* Pipe tracepoints to printk */
72 struct trace_iterator *tracepoint_print_iter;
73 int tracepoint_printk;
74 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
75 
76 /* For tracers that don't implement custom flags */
77 static struct tracer_opt dummy_tracer_opt[] = {
78 	{ }
79 };
80 
81 static int
82 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
83 {
84 	return 0;
85 }
86 
87 /*
88  * To prevent the comm cache from being overwritten when no
89  * tracing is active, only save the comm when a trace event
90  * occurred.
91  */
92 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
93 
94 /*
95  * Kill all tracing for good (never come back).
96  * It is initialized to 1 but will turn to zero if the initialization
97  * of the tracer is successful. But that is the only place that sets
98  * this back to zero.
99  */
100 static int tracing_disabled = 1;
101 
102 cpumask_var_t __read_mostly	tracing_buffer_mask;
103 
104 /*
105  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106  *
107  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108  * is set, then ftrace_dump is called. This will output the contents
109  * of the ftrace buffers to the console.  This is very useful for
110  * capturing traces that lead to crashes and outputing it to a
111  * serial console.
112  *
113  * It is default off, but you can enable it with either specifying
114  * "ftrace_dump_on_oops" in the kernel command line, or setting
115  * /proc/sys/kernel/ftrace_dump_on_oops
116  * Set 1 if you want to dump buffers of all CPUs
117  * Set 2 if you want to dump the buffer of the CPU that triggered oops
118  */
119 
120 enum ftrace_dump_mode ftrace_dump_on_oops;
121 
122 /* When set, tracing will stop when a WARN*() is hit */
123 int __disable_trace_on_warning;
124 
125 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
126 /* Map of enums to their values, for "eval_map" file */
127 struct trace_eval_map_head {
128 	struct module			*mod;
129 	unsigned long			length;
130 };
131 
132 union trace_eval_map_item;
133 
134 struct trace_eval_map_tail {
135 	/*
136 	 * "end" is first and points to NULL as it must be different
137 	 * than "mod" or "eval_string"
138 	 */
139 	union trace_eval_map_item	*next;
140 	const char			*end;	/* points to NULL */
141 };
142 
143 static DEFINE_MUTEX(trace_eval_mutex);
144 
145 /*
146  * The trace_eval_maps are saved in an array with two extra elements,
147  * one at the beginning, and one at the end. The beginning item contains
148  * the count of the saved maps (head.length), and the module they
149  * belong to if not built in (head.mod). The ending item contains a
150  * pointer to the next array of saved eval_map items.
151  */
152 union trace_eval_map_item {
153 	struct trace_eval_map		map;
154 	struct trace_eval_map_head	head;
155 	struct trace_eval_map_tail	tail;
156 };
157 
158 static union trace_eval_map_item *trace_eval_maps;
159 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160 
161 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
162 static void ftrace_trace_userstack(struct ring_buffer *buffer,
163 				   unsigned long flags, int pc);
164 
165 #define MAX_TRACER_SIZE		100
166 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
167 static char *default_bootup_tracer;
168 
169 static bool allocate_snapshot;
170 
171 static int __init set_cmdline_ftrace(char *str)
172 {
173 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
174 	default_bootup_tracer = bootup_tracer_buf;
175 	/* We are using ftrace early, expand it */
176 	ring_buffer_expanded = true;
177 	return 1;
178 }
179 __setup("ftrace=", set_cmdline_ftrace);
180 
181 static int __init set_ftrace_dump_on_oops(char *str)
182 {
183 	if (*str++ != '=' || !*str) {
184 		ftrace_dump_on_oops = DUMP_ALL;
185 		return 1;
186 	}
187 
188 	if (!strcmp("orig_cpu", str)) {
189 		ftrace_dump_on_oops = DUMP_ORIG;
190                 return 1;
191         }
192 
193         return 0;
194 }
195 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
196 
197 static int __init stop_trace_on_warning(char *str)
198 {
199 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
200 		__disable_trace_on_warning = 1;
201 	return 1;
202 }
203 __setup("traceoff_on_warning", stop_trace_on_warning);
204 
205 static int __init boot_alloc_snapshot(char *str)
206 {
207 	allocate_snapshot = true;
208 	/* We also need the main ring buffer expanded */
209 	ring_buffer_expanded = true;
210 	return 1;
211 }
212 __setup("alloc_snapshot", boot_alloc_snapshot);
213 
214 
215 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
216 
217 static int __init set_trace_boot_options(char *str)
218 {
219 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
220 	return 0;
221 }
222 __setup("trace_options=", set_trace_boot_options);
223 
224 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
225 static char *trace_boot_clock __initdata;
226 
227 static int __init set_trace_boot_clock(char *str)
228 {
229 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
230 	trace_boot_clock = trace_boot_clock_buf;
231 	return 0;
232 }
233 __setup("trace_clock=", set_trace_boot_clock);
234 
235 static int __init set_tracepoint_printk(char *str)
236 {
237 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
238 		tracepoint_printk = 1;
239 	return 1;
240 }
241 __setup("tp_printk", set_tracepoint_printk);
242 
243 unsigned long long ns2usecs(u64 nsec)
244 {
245 	nsec += 500;
246 	do_div(nsec, 1000);
247 	return nsec;
248 }
249 
250 /* trace_flags holds trace_options default values */
251 #define TRACE_DEFAULT_FLAGS						\
252 	(FUNCTION_DEFAULT_FLAGS |					\
253 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
254 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
255 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
256 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
257 
258 /* trace_options that are only supported by global_trace */
259 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
260 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
261 
262 /* trace_flags that are default zero for instances */
263 #define ZEROED_TRACE_FLAGS \
264 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
265 
266 /*
267  * The global_trace is the descriptor that holds the top-level tracing
268  * buffers for the live tracing.
269  */
270 static struct trace_array global_trace = {
271 	.trace_flags = TRACE_DEFAULT_FLAGS,
272 };
273 
274 LIST_HEAD(ftrace_trace_arrays);
275 
276 int trace_array_get(struct trace_array *this_tr)
277 {
278 	struct trace_array *tr;
279 	int ret = -ENODEV;
280 
281 	mutex_lock(&trace_types_lock);
282 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
283 		if (tr == this_tr) {
284 			tr->ref++;
285 			ret = 0;
286 			break;
287 		}
288 	}
289 	mutex_unlock(&trace_types_lock);
290 
291 	return ret;
292 }
293 
294 static void __trace_array_put(struct trace_array *this_tr)
295 {
296 	WARN_ON(!this_tr->ref);
297 	this_tr->ref--;
298 }
299 
300 void trace_array_put(struct trace_array *this_tr)
301 {
302 	mutex_lock(&trace_types_lock);
303 	__trace_array_put(this_tr);
304 	mutex_unlock(&trace_types_lock);
305 }
306 
307 int call_filter_check_discard(struct trace_event_call *call, void *rec,
308 			      struct ring_buffer *buffer,
309 			      struct ring_buffer_event *event)
310 {
311 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
312 	    !filter_match_preds(call->filter, rec)) {
313 		__trace_event_discard_commit(buffer, event);
314 		return 1;
315 	}
316 
317 	return 0;
318 }
319 
320 void trace_free_pid_list(struct trace_pid_list *pid_list)
321 {
322 	vfree(pid_list->pids);
323 	kfree(pid_list);
324 }
325 
326 /**
327  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
328  * @filtered_pids: The list of pids to check
329  * @search_pid: The PID to find in @filtered_pids
330  *
331  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
332  */
333 bool
334 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
335 {
336 	/*
337 	 * If pid_max changed after filtered_pids was created, we
338 	 * by default ignore all pids greater than the previous pid_max.
339 	 */
340 	if (search_pid >= filtered_pids->pid_max)
341 		return false;
342 
343 	return test_bit(search_pid, filtered_pids->pids);
344 }
345 
346 /**
347  * trace_ignore_this_task - should a task be ignored for tracing
348  * @filtered_pids: The list of pids to check
349  * @task: The task that should be ignored if not filtered
350  *
351  * Checks if @task should be traced or not from @filtered_pids.
352  * Returns true if @task should *NOT* be traced.
353  * Returns false if @task should be traced.
354  */
355 bool
356 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
357 {
358 	/*
359 	 * Return false, because if filtered_pids does not exist,
360 	 * all pids are good to trace.
361 	 */
362 	if (!filtered_pids)
363 		return false;
364 
365 	return !trace_find_filtered_pid(filtered_pids, task->pid);
366 }
367 
368 /**
369  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
370  * @pid_list: The list to modify
371  * @self: The current task for fork or NULL for exit
372  * @task: The task to add or remove
373  *
374  * If adding a task, if @self is defined, the task is only added if @self
375  * is also included in @pid_list. This happens on fork and tasks should
376  * only be added when the parent is listed. If @self is NULL, then the
377  * @task pid will be removed from the list, which would happen on exit
378  * of a task.
379  */
380 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
381 				  struct task_struct *self,
382 				  struct task_struct *task)
383 {
384 	if (!pid_list)
385 		return;
386 
387 	/* For forks, we only add if the forking task is listed */
388 	if (self) {
389 		if (!trace_find_filtered_pid(pid_list, self->pid))
390 			return;
391 	}
392 
393 	/* Sorry, but we don't support pid_max changing after setting */
394 	if (task->pid >= pid_list->pid_max)
395 		return;
396 
397 	/* "self" is set for forks, and NULL for exits */
398 	if (self)
399 		set_bit(task->pid, pid_list->pids);
400 	else
401 		clear_bit(task->pid, pid_list->pids);
402 }
403 
404 /**
405  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
406  * @pid_list: The pid list to show
407  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
408  * @pos: The position of the file
409  *
410  * This is used by the seq_file "next" operation to iterate the pids
411  * listed in a trace_pid_list structure.
412  *
413  * Returns the pid+1 as we want to display pid of zero, but NULL would
414  * stop the iteration.
415  */
416 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
417 {
418 	unsigned long pid = (unsigned long)v;
419 
420 	(*pos)++;
421 
422 	/* pid already is +1 of the actual prevous bit */
423 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
424 
425 	/* Return pid + 1 to allow zero to be represented */
426 	if (pid < pid_list->pid_max)
427 		return (void *)(pid + 1);
428 
429 	return NULL;
430 }
431 
432 /**
433  * trace_pid_start - Used for seq_file to start reading pid lists
434  * @pid_list: The pid list to show
435  * @pos: The position of the file
436  *
437  * This is used by seq_file "start" operation to start the iteration
438  * of listing pids.
439  *
440  * Returns the pid+1 as we want to display pid of zero, but NULL would
441  * stop the iteration.
442  */
443 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
444 {
445 	unsigned long pid;
446 	loff_t l = 0;
447 
448 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
449 	if (pid >= pid_list->pid_max)
450 		return NULL;
451 
452 	/* Return pid + 1 so that zero can be the exit value */
453 	for (pid++; pid && l < *pos;
454 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
455 		;
456 	return (void *)pid;
457 }
458 
459 /**
460  * trace_pid_show - show the current pid in seq_file processing
461  * @m: The seq_file structure to write into
462  * @v: A void pointer of the pid (+1) value to display
463  *
464  * Can be directly used by seq_file operations to display the current
465  * pid value.
466  */
467 int trace_pid_show(struct seq_file *m, void *v)
468 {
469 	unsigned long pid = (unsigned long)v - 1;
470 
471 	seq_printf(m, "%lu\n", pid);
472 	return 0;
473 }
474 
475 /* 128 should be much more than enough */
476 #define PID_BUF_SIZE		127
477 
478 int trace_pid_write(struct trace_pid_list *filtered_pids,
479 		    struct trace_pid_list **new_pid_list,
480 		    const char __user *ubuf, size_t cnt)
481 {
482 	struct trace_pid_list *pid_list;
483 	struct trace_parser parser;
484 	unsigned long val;
485 	int nr_pids = 0;
486 	ssize_t read = 0;
487 	ssize_t ret = 0;
488 	loff_t pos;
489 	pid_t pid;
490 
491 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
492 		return -ENOMEM;
493 
494 	/*
495 	 * Always recreate a new array. The write is an all or nothing
496 	 * operation. Always create a new array when adding new pids by
497 	 * the user. If the operation fails, then the current list is
498 	 * not modified.
499 	 */
500 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
501 	if (!pid_list) {
502 		trace_parser_put(&parser);
503 		return -ENOMEM;
504 	}
505 
506 	pid_list->pid_max = READ_ONCE(pid_max);
507 
508 	/* Only truncating will shrink pid_max */
509 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
510 		pid_list->pid_max = filtered_pids->pid_max;
511 
512 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
513 	if (!pid_list->pids) {
514 		trace_parser_put(&parser);
515 		kfree(pid_list);
516 		return -ENOMEM;
517 	}
518 
519 	if (filtered_pids) {
520 		/* copy the current bits to the new max */
521 		for_each_set_bit(pid, filtered_pids->pids,
522 				 filtered_pids->pid_max) {
523 			set_bit(pid, pid_list->pids);
524 			nr_pids++;
525 		}
526 	}
527 
528 	while (cnt > 0) {
529 
530 		pos = 0;
531 
532 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
533 		if (ret < 0 || !trace_parser_loaded(&parser))
534 			break;
535 
536 		read += ret;
537 		ubuf += ret;
538 		cnt -= ret;
539 
540 		ret = -EINVAL;
541 		if (kstrtoul(parser.buffer, 0, &val))
542 			break;
543 		if (val >= pid_list->pid_max)
544 			break;
545 
546 		pid = (pid_t)val;
547 
548 		set_bit(pid, pid_list->pids);
549 		nr_pids++;
550 
551 		trace_parser_clear(&parser);
552 		ret = 0;
553 	}
554 	trace_parser_put(&parser);
555 
556 	if (ret < 0) {
557 		trace_free_pid_list(pid_list);
558 		return ret;
559 	}
560 
561 	if (!nr_pids) {
562 		/* Cleared the list of pids */
563 		trace_free_pid_list(pid_list);
564 		read = ret;
565 		pid_list = NULL;
566 	}
567 
568 	*new_pid_list = pid_list;
569 
570 	return read;
571 }
572 
573 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
574 {
575 	u64 ts;
576 
577 	/* Early boot up does not have a buffer yet */
578 	if (!buf->buffer)
579 		return trace_clock_local();
580 
581 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
582 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
583 
584 	return ts;
585 }
586 
587 u64 ftrace_now(int cpu)
588 {
589 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
590 }
591 
592 /**
593  * tracing_is_enabled - Show if global_trace has been disabled
594  *
595  * Shows if the global trace has been enabled or not. It uses the
596  * mirror flag "buffer_disabled" to be used in fast paths such as for
597  * the irqsoff tracer. But it may be inaccurate due to races. If you
598  * need to know the accurate state, use tracing_is_on() which is a little
599  * slower, but accurate.
600  */
601 int tracing_is_enabled(void)
602 {
603 	/*
604 	 * For quick access (irqsoff uses this in fast path), just
605 	 * return the mirror variable of the state of the ring buffer.
606 	 * It's a little racy, but we don't really care.
607 	 */
608 	smp_rmb();
609 	return !global_trace.buffer_disabled;
610 }
611 
612 /*
613  * trace_buf_size is the size in bytes that is allocated
614  * for a buffer. Note, the number of bytes is always rounded
615  * to page size.
616  *
617  * This number is purposely set to a low number of 16384.
618  * If the dump on oops happens, it will be much appreciated
619  * to not have to wait for all that output. Anyway this can be
620  * boot time and run time configurable.
621  */
622 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
623 
624 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
625 
626 /* trace_types holds a link list of available tracers. */
627 static struct tracer		*trace_types __read_mostly;
628 
629 /*
630  * trace_types_lock is used to protect the trace_types list.
631  */
632 DEFINE_MUTEX(trace_types_lock);
633 
634 /*
635  * serialize the access of the ring buffer
636  *
637  * ring buffer serializes readers, but it is low level protection.
638  * The validity of the events (which returns by ring_buffer_peek() ..etc)
639  * are not protected by ring buffer.
640  *
641  * The content of events may become garbage if we allow other process consumes
642  * these events concurrently:
643  *   A) the page of the consumed events may become a normal page
644  *      (not reader page) in ring buffer, and this page will be rewrited
645  *      by events producer.
646  *   B) The page of the consumed events may become a page for splice_read,
647  *      and this page will be returned to system.
648  *
649  * These primitives allow multi process access to different cpu ring buffer
650  * concurrently.
651  *
652  * These primitives don't distinguish read-only and read-consume access.
653  * Multi read-only access are also serialized.
654  */
655 
656 #ifdef CONFIG_SMP
657 static DECLARE_RWSEM(all_cpu_access_lock);
658 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
659 
660 static inline void trace_access_lock(int cpu)
661 {
662 	if (cpu == RING_BUFFER_ALL_CPUS) {
663 		/* gain it for accessing the whole ring buffer. */
664 		down_write(&all_cpu_access_lock);
665 	} else {
666 		/* gain it for accessing a cpu ring buffer. */
667 
668 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
669 		down_read(&all_cpu_access_lock);
670 
671 		/* Secondly block other access to this @cpu ring buffer. */
672 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
673 	}
674 }
675 
676 static inline void trace_access_unlock(int cpu)
677 {
678 	if (cpu == RING_BUFFER_ALL_CPUS) {
679 		up_write(&all_cpu_access_lock);
680 	} else {
681 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
682 		up_read(&all_cpu_access_lock);
683 	}
684 }
685 
686 static inline void trace_access_lock_init(void)
687 {
688 	int cpu;
689 
690 	for_each_possible_cpu(cpu)
691 		mutex_init(&per_cpu(cpu_access_lock, cpu));
692 }
693 
694 #else
695 
696 static DEFINE_MUTEX(access_lock);
697 
698 static inline void trace_access_lock(int cpu)
699 {
700 	(void)cpu;
701 	mutex_lock(&access_lock);
702 }
703 
704 static inline void trace_access_unlock(int cpu)
705 {
706 	(void)cpu;
707 	mutex_unlock(&access_lock);
708 }
709 
710 static inline void trace_access_lock_init(void)
711 {
712 }
713 
714 #endif
715 
716 #ifdef CONFIG_STACKTRACE
717 static void __ftrace_trace_stack(struct ring_buffer *buffer,
718 				 unsigned long flags,
719 				 int skip, int pc, struct pt_regs *regs);
720 static inline void ftrace_trace_stack(struct trace_array *tr,
721 				      struct ring_buffer *buffer,
722 				      unsigned long flags,
723 				      int skip, int pc, struct pt_regs *regs);
724 
725 #else
726 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
727 					unsigned long flags,
728 					int skip, int pc, struct pt_regs *regs)
729 {
730 }
731 static inline void ftrace_trace_stack(struct trace_array *tr,
732 				      struct ring_buffer *buffer,
733 				      unsigned long flags,
734 				      int skip, int pc, struct pt_regs *regs)
735 {
736 }
737 
738 #endif
739 
740 static __always_inline void
741 trace_event_setup(struct ring_buffer_event *event,
742 		  int type, unsigned long flags, int pc)
743 {
744 	struct trace_entry *ent = ring_buffer_event_data(event);
745 
746 	tracing_generic_entry_update(ent, flags, pc);
747 	ent->type = type;
748 }
749 
750 static __always_inline struct ring_buffer_event *
751 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
752 			  int type,
753 			  unsigned long len,
754 			  unsigned long flags, int pc)
755 {
756 	struct ring_buffer_event *event;
757 
758 	event = ring_buffer_lock_reserve(buffer, len);
759 	if (event != NULL)
760 		trace_event_setup(event, type, flags, pc);
761 
762 	return event;
763 }
764 
765 void tracer_tracing_on(struct trace_array *tr)
766 {
767 	if (tr->trace_buffer.buffer)
768 		ring_buffer_record_on(tr->trace_buffer.buffer);
769 	/*
770 	 * This flag is looked at when buffers haven't been allocated
771 	 * yet, or by some tracers (like irqsoff), that just want to
772 	 * know if the ring buffer has been disabled, but it can handle
773 	 * races of where it gets disabled but we still do a record.
774 	 * As the check is in the fast path of the tracers, it is more
775 	 * important to be fast than accurate.
776 	 */
777 	tr->buffer_disabled = 0;
778 	/* Make the flag seen by readers */
779 	smp_wmb();
780 }
781 
782 /**
783  * tracing_on - enable tracing buffers
784  *
785  * This function enables tracing buffers that may have been
786  * disabled with tracing_off.
787  */
788 void tracing_on(void)
789 {
790 	tracer_tracing_on(&global_trace);
791 }
792 EXPORT_SYMBOL_GPL(tracing_on);
793 
794 
795 static __always_inline void
796 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
797 {
798 	__this_cpu_write(trace_taskinfo_save, true);
799 
800 	/* If this is the temp buffer, we need to commit fully */
801 	if (this_cpu_read(trace_buffered_event) == event) {
802 		/* Length is in event->array[0] */
803 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
804 		/* Release the temp buffer */
805 		this_cpu_dec(trace_buffered_event_cnt);
806 	} else
807 		ring_buffer_unlock_commit(buffer, event);
808 }
809 
810 /**
811  * __trace_puts - write a constant string into the trace buffer.
812  * @ip:	   The address of the caller
813  * @str:   The constant string to write
814  * @size:  The size of the string.
815  */
816 int __trace_puts(unsigned long ip, const char *str, int size)
817 {
818 	struct ring_buffer_event *event;
819 	struct ring_buffer *buffer;
820 	struct print_entry *entry;
821 	unsigned long irq_flags;
822 	int alloc;
823 	int pc;
824 
825 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
826 		return 0;
827 
828 	pc = preempt_count();
829 
830 	if (unlikely(tracing_selftest_running || tracing_disabled))
831 		return 0;
832 
833 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
834 
835 	local_save_flags(irq_flags);
836 	buffer = global_trace.trace_buffer.buffer;
837 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
838 					    irq_flags, pc);
839 	if (!event)
840 		return 0;
841 
842 	entry = ring_buffer_event_data(event);
843 	entry->ip = ip;
844 
845 	memcpy(&entry->buf, str, size);
846 
847 	/* Add a newline if necessary */
848 	if (entry->buf[size - 1] != '\n') {
849 		entry->buf[size] = '\n';
850 		entry->buf[size + 1] = '\0';
851 	} else
852 		entry->buf[size] = '\0';
853 
854 	__buffer_unlock_commit(buffer, event);
855 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
856 
857 	return size;
858 }
859 EXPORT_SYMBOL_GPL(__trace_puts);
860 
861 /**
862  * __trace_bputs - write the pointer to a constant string into trace buffer
863  * @ip:	   The address of the caller
864  * @str:   The constant string to write to the buffer to
865  */
866 int __trace_bputs(unsigned long ip, const char *str)
867 {
868 	struct ring_buffer_event *event;
869 	struct ring_buffer *buffer;
870 	struct bputs_entry *entry;
871 	unsigned long irq_flags;
872 	int size = sizeof(struct bputs_entry);
873 	int pc;
874 
875 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
876 		return 0;
877 
878 	pc = preempt_count();
879 
880 	if (unlikely(tracing_selftest_running || tracing_disabled))
881 		return 0;
882 
883 	local_save_flags(irq_flags);
884 	buffer = global_trace.trace_buffer.buffer;
885 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
886 					    irq_flags, pc);
887 	if (!event)
888 		return 0;
889 
890 	entry = ring_buffer_event_data(event);
891 	entry->ip			= ip;
892 	entry->str			= str;
893 
894 	__buffer_unlock_commit(buffer, event);
895 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
896 
897 	return 1;
898 }
899 EXPORT_SYMBOL_GPL(__trace_bputs);
900 
901 #ifdef CONFIG_TRACER_SNAPSHOT
902 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
903 {
904 	struct tracer *tracer = tr->current_trace;
905 	unsigned long flags;
906 
907 	if (in_nmi()) {
908 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
909 		internal_trace_puts("*** snapshot is being ignored        ***\n");
910 		return;
911 	}
912 
913 	if (!tr->allocated_snapshot) {
914 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
915 		internal_trace_puts("*** stopping trace here!   ***\n");
916 		tracing_off();
917 		return;
918 	}
919 
920 	/* Note, snapshot can not be used when the tracer uses it */
921 	if (tracer->use_max_tr) {
922 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
923 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
924 		return;
925 	}
926 
927 	local_irq_save(flags);
928 	update_max_tr(tr, current, smp_processor_id(), cond_data);
929 	local_irq_restore(flags);
930 }
931 
932 void tracing_snapshot_instance(struct trace_array *tr)
933 {
934 	tracing_snapshot_instance_cond(tr, NULL);
935 }
936 
937 /**
938  * tracing_snapshot - take a snapshot of the current buffer.
939  *
940  * This causes a swap between the snapshot buffer and the current live
941  * tracing buffer. You can use this to take snapshots of the live
942  * trace when some condition is triggered, but continue to trace.
943  *
944  * Note, make sure to allocate the snapshot with either
945  * a tracing_snapshot_alloc(), or by doing it manually
946  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
947  *
948  * If the snapshot buffer is not allocated, it will stop tracing.
949  * Basically making a permanent snapshot.
950  */
951 void tracing_snapshot(void)
952 {
953 	struct trace_array *tr = &global_trace;
954 
955 	tracing_snapshot_instance(tr);
956 }
957 EXPORT_SYMBOL_GPL(tracing_snapshot);
958 
959 /**
960  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
961  * @tr:		The tracing instance to snapshot
962  * @cond_data:	The data to be tested conditionally, and possibly saved
963  *
964  * This is the same as tracing_snapshot() except that the snapshot is
965  * conditional - the snapshot will only happen if the
966  * cond_snapshot.update() implementation receiving the cond_data
967  * returns true, which means that the trace array's cond_snapshot
968  * update() operation used the cond_data to determine whether the
969  * snapshot should be taken, and if it was, presumably saved it along
970  * with the snapshot.
971  */
972 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
973 {
974 	tracing_snapshot_instance_cond(tr, cond_data);
975 }
976 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
977 
978 /**
979  * tracing_snapshot_cond_data - get the user data associated with a snapshot
980  * @tr:		The tracing instance
981  *
982  * When the user enables a conditional snapshot using
983  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
984  * with the snapshot.  This accessor is used to retrieve it.
985  *
986  * Should not be called from cond_snapshot.update(), since it takes
987  * the tr->max_lock lock, which the code calling
988  * cond_snapshot.update() has already done.
989  *
990  * Returns the cond_data associated with the trace array's snapshot.
991  */
992 void *tracing_cond_snapshot_data(struct trace_array *tr)
993 {
994 	void *cond_data = NULL;
995 
996 	arch_spin_lock(&tr->max_lock);
997 
998 	if (tr->cond_snapshot)
999 		cond_data = tr->cond_snapshot->cond_data;
1000 
1001 	arch_spin_unlock(&tr->max_lock);
1002 
1003 	return cond_data;
1004 }
1005 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1006 
1007 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1008 					struct trace_buffer *size_buf, int cpu_id);
1009 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1010 
1011 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1012 {
1013 	int ret;
1014 
1015 	if (!tr->allocated_snapshot) {
1016 
1017 		/* allocate spare buffer */
1018 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1019 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1020 		if (ret < 0)
1021 			return ret;
1022 
1023 		tr->allocated_snapshot = true;
1024 	}
1025 
1026 	return 0;
1027 }
1028 
1029 static void free_snapshot(struct trace_array *tr)
1030 {
1031 	/*
1032 	 * We don't free the ring buffer. instead, resize it because
1033 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1034 	 * we want preserve it.
1035 	 */
1036 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1037 	set_buffer_entries(&tr->max_buffer, 1);
1038 	tracing_reset_online_cpus(&tr->max_buffer);
1039 	tr->allocated_snapshot = false;
1040 }
1041 
1042 /**
1043  * tracing_alloc_snapshot - allocate snapshot buffer.
1044  *
1045  * This only allocates the snapshot buffer if it isn't already
1046  * allocated - it doesn't also take a snapshot.
1047  *
1048  * This is meant to be used in cases where the snapshot buffer needs
1049  * to be set up for events that can't sleep but need to be able to
1050  * trigger a snapshot.
1051  */
1052 int tracing_alloc_snapshot(void)
1053 {
1054 	struct trace_array *tr = &global_trace;
1055 	int ret;
1056 
1057 	ret = tracing_alloc_snapshot_instance(tr);
1058 	WARN_ON(ret < 0);
1059 
1060 	return ret;
1061 }
1062 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1063 
1064 /**
1065  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1066  *
1067  * This is similar to tracing_snapshot(), but it will allocate the
1068  * snapshot buffer if it isn't already allocated. Use this only
1069  * where it is safe to sleep, as the allocation may sleep.
1070  *
1071  * This causes a swap between the snapshot buffer and the current live
1072  * tracing buffer. You can use this to take snapshots of the live
1073  * trace when some condition is triggered, but continue to trace.
1074  */
1075 void tracing_snapshot_alloc(void)
1076 {
1077 	int ret;
1078 
1079 	ret = tracing_alloc_snapshot();
1080 	if (ret < 0)
1081 		return;
1082 
1083 	tracing_snapshot();
1084 }
1085 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1086 
1087 /**
1088  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1089  * @tr:		The tracing instance
1090  * @cond_data:	User data to associate with the snapshot
1091  * @update:	Implementation of the cond_snapshot update function
1092  *
1093  * Check whether the conditional snapshot for the given instance has
1094  * already been enabled, or if the current tracer is already using a
1095  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1096  * save the cond_data and update function inside.
1097  *
1098  * Returns 0 if successful, error otherwise.
1099  */
1100 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1101 				 cond_update_fn_t update)
1102 {
1103 	struct cond_snapshot *cond_snapshot;
1104 	int ret = 0;
1105 
1106 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1107 	if (!cond_snapshot)
1108 		return -ENOMEM;
1109 
1110 	cond_snapshot->cond_data = cond_data;
1111 	cond_snapshot->update = update;
1112 
1113 	mutex_lock(&trace_types_lock);
1114 
1115 	ret = tracing_alloc_snapshot_instance(tr);
1116 	if (ret)
1117 		goto fail_unlock;
1118 
1119 	if (tr->current_trace->use_max_tr) {
1120 		ret = -EBUSY;
1121 		goto fail_unlock;
1122 	}
1123 
1124 	/*
1125 	 * The cond_snapshot can only change to NULL without the
1126 	 * trace_types_lock. We don't care if we race with it going
1127 	 * to NULL, but we want to make sure that it's not set to
1128 	 * something other than NULL when we get here, which we can
1129 	 * do safely with only holding the trace_types_lock and not
1130 	 * having to take the max_lock.
1131 	 */
1132 	if (tr->cond_snapshot) {
1133 		ret = -EBUSY;
1134 		goto fail_unlock;
1135 	}
1136 
1137 	arch_spin_lock(&tr->max_lock);
1138 	tr->cond_snapshot = cond_snapshot;
1139 	arch_spin_unlock(&tr->max_lock);
1140 
1141 	mutex_unlock(&trace_types_lock);
1142 
1143 	return ret;
1144 
1145  fail_unlock:
1146 	mutex_unlock(&trace_types_lock);
1147 	kfree(cond_snapshot);
1148 	return ret;
1149 }
1150 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1151 
1152 /**
1153  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1154  * @tr:		The tracing instance
1155  *
1156  * Check whether the conditional snapshot for the given instance is
1157  * enabled; if so, free the cond_snapshot associated with it,
1158  * otherwise return -EINVAL.
1159  *
1160  * Returns 0 if successful, error otherwise.
1161  */
1162 int tracing_snapshot_cond_disable(struct trace_array *tr)
1163 {
1164 	int ret = 0;
1165 
1166 	arch_spin_lock(&tr->max_lock);
1167 
1168 	if (!tr->cond_snapshot)
1169 		ret = -EINVAL;
1170 	else {
1171 		kfree(tr->cond_snapshot);
1172 		tr->cond_snapshot = NULL;
1173 	}
1174 
1175 	arch_spin_unlock(&tr->max_lock);
1176 
1177 	return ret;
1178 }
1179 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1180 #else
1181 void tracing_snapshot(void)
1182 {
1183 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1184 }
1185 EXPORT_SYMBOL_GPL(tracing_snapshot);
1186 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1187 {
1188 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1189 }
1190 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1191 int tracing_alloc_snapshot(void)
1192 {
1193 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1194 	return -ENODEV;
1195 }
1196 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1197 void tracing_snapshot_alloc(void)
1198 {
1199 	/* Give warning */
1200 	tracing_snapshot();
1201 }
1202 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1203 void *tracing_cond_snapshot_data(struct trace_array *tr)
1204 {
1205 	return NULL;
1206 }
1207 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1208 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1209 {
1210 	return -ENODEV;
1211 }
1212 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1213 int tracing_snapshot_cond_disable(struct trace_array *tr)
1214 {
1215 	return false;
1216 }
1217 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1218 #endif /* CONFIG_TRACER_SNAPSHOT */
1219 
1220 void tracer_tracing_off(struct trace_array *tr)
1221 {
1222 	if (tr->trace_buffer.buffer)
1223 		ring_buffer_record_off(tr->trace_buffer.buffer);
1224 	/*
1225 	 * This flag is looked at when buffers haven't been allocated
1226 	 * yet, or by some tracers (like irqsoff), that just want to
1227 	 * know if the ring buffer has been disabled, but it can handle
1228 	 * races of where it gets disabled but we still do a record.
1229 	 * As the check is in the fast path of the tracers, it is more
1230 	 * important to be fast than accurate.
1231 	 */
1232 	tr->buffer_disabled = 1;
1233 	/* Make the flag seen by readers */
1234 	smp_wmb();
1235 }
1236 
1237 /**
1238  * tracing_off - turn off tracing buffers
1239  *
1240  * This function stops the tracing buffers from recording data.
1241  * It does not disable any overhead the tracers themselves may
1242  * be causing. This function simply causes all recording to
1243  * the ring buffers to fail.
1244  */
1245 void tracing_off(void)
1246 {
1247 	tracer_tracing_off(&global_trace);
1248 }
1249 EXPORT_SYMBOL_GPL(tracing_off);
1250 
1251 void disable_trace_on_warning(void)
1252 {
1253 	if (__disable_trace_on_warning)
1254 		tracing_off();
1255 }
1256 
1257 /**
1258  * tracer_tracing_is_on - show real state of ring buffer enabled
1259  * @tr : the trace array to know if ring buffer is enabled
1260  *
1261  * Shows real state of the ring buffer if it is enabled or not.
1262  */
1263 bool tracer_tracing_is_on(struct trace_array *tr)
1264 {
1265 	if (tr->trace_buffer.buffer)
1266 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1267 	return !tr->buffer_disabled;
1268 }
1269 
1270 /**
1271  * tracing_is_on - show state of ring buffers enabled
1272  */
1273 int tracing_is_on(void)
1274 {
1275 	return tracer_tracing_is_on(&global_trace);
1276 }
1277 EXPORT_SYMBOL_GPL(tracing_is_on);
1278 
1279 static int __init set_buf_size(char *str)
1280 {
1281 	unsigned long buf_size;
1282 
1283 	if (!str)
1284 		return 0;
1285 	buf_size = memparse(str, &str);
1286 	/* nr_entries can not be zero */
1287 	if (buf_size == 0)
1288 		return 0;
1289 	trace_buf_size = buf_size;
1290 	return 1;
1291 }
1292 __setup("trace_buf_size=", set_buf_size);
1293 
1294 static int __init set_tracing_thresh(char *str)
1295 {
1296 	unsigned long threshold;
1297 	int ret;
1298 
1299 	if (!str)
1300 		return 0;
1301 	ret = kstrtoul(str, 0, &threshold);
1302 	if (ret < 0)
1303 		return 0;
1304 	tracing_thresh = threshold * 1000;
1305 	return 1;
1306 }
1307 __setup("tracing_thresh=", set_tracing_thresh);
1308 
1309 unsigned long nsecs_to_usecs(unsigned long nsecs)
1310 {
1311 	return nsecs / 1000;
1312 }
1313 
1314 /*
1315  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1316  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1317  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1318  * of strings in the order that the evals (enum) were defined.
1319  */
1320 #undef C
1321 #define C(a, b) b
1322 
1323 /* These must match the bit postions in trace_iterator_flags */
1324 static const char *trace_options[] = {
1325 	TRACE_FLAGS
1326 	NULL
1327 };
1328 
1329 static struct {
1330 	u64 (*func)(void);
1331 	const char *name;
1332 	int in_ns;		/* is this clock in nanoseconds? */
1333 } trace_clocks[] = {
1334 	{ trace_clock_local,		"local",	1 },
1335 	{ trace_clock_global,		"global",	1 },
1336 	{ trace_clock_counter,		"counter",	0 },
1337 	{ trace_clock_jiffies,		"uptime",	0 },
1338 	{ trace_clock,			"perf",		1 },
1339 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1340 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1341 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1342 	ARCH_TRACE_CLOCKS
1343 };
1344 
1345 bool trace_clock_in_ns(struct trace_array *tr)
1346 {
1347 	if (trace_clocks[tr->clock_id].in_ns)
1348 		return true;
1349 
1350 	return false;
1351 }
1352 
1353 /*
1354  * trace_parser_get_init - gets the buffer for trace parser
1355  */
1356 int trace_parser_get_init(struct trace_parser *parser, int size)
1357 {
1358 	memset(parser, 0, sizeof(*parser));
1359 
1360 	parser->buffer = kmalloc(size, GFP_KERNEL);
1361 	if (!parser->buffer)
1362 		return 1;
1363 
1364 	parser->size = size;
1365 	return 0;
1366 }
1367 
1368 /*
1369  * trace_parser_put - frees the buffer for trace parser
1370  */
1371 void trace_parser_put(struct trace_parser *parser)
1372 {
1373 	kfree(parser->buffer);
1374 	parser->buffer = NULL;
1375 }
1376 
1377 /*
1378  * trace_get_user - reads the user input string separated by  space
1379  * (matched by isspace(ch))
1380  *
1381  * For each string found the 'struct trace_parser' is updated,
1382  * and the function returns.
1383  *
1384  * Returns number of bytes read.
1385  *
1386  * See kernel/trace/trace.h for 'struct trace_parser' details.
1387  */
1388 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1389 	size_t cnt, loff_t *ppos)
1390 {
1391 	char ch;
1392 	size_t read = 0;
1393 	ssize_t ret;
1394 
1395 	if (!*ppos)
1396 		trace_parser_clear(parser);
1397 
1398 	ret = get_user(ch, ubuf++);
1399 	if (ret)
1400 		goto out;
1401 
1402 	read++;
1403 	cnt--;
1404 
1405 	/*
1406 	 * The parser is not finished with the last write,
1407 	 * continue reading the user input without skipping spaces.
1408 	 */
1409 	if (!parser->cont) {
1410 		/* skip white space */
1411 		while (cnt && isspace(ch)) {
1412 			ret = get_user(ch, ubuf++);
1413 			if (ret)
1414 				goto out;
1415 			read++;
1416 			cnt--;
1417 		}
1418 
1419 		parser->idx = 0;
1420 
1421 		/* only spaces were written */
1422 		if (isspace(ch) || !ch) {
1423 			*ppos += read;
1424 			ret = read;
1425 			goto out;
1426 		}
1427 	}
1428 
1429 	/* read the non-space input */
1430 	while (cnt && !isspace(ch) && ch) {
1431 		if (parser->idx < parser->size - 1)
1432 			parser->buffer[parser->idx++] = ch;
1433 		else {
1434 			ret = -EINVAL;
1435 			goto out;
1436 		}
1437 		ret = get_user(ch, ubuf++);
1438 		if (ret)
1439 			goto out;
1440 		read++;
1441 		cnt--;
1442 	}
1443 
1444 	/* We either got finished input or we have to wait for another call. */
1445 	if (isspace(ch) || !ch) {
1446 		parser->buffer[parser->idx] = 0;
1447 		parser->cont = false;
1448 	} else if (parser->idx < parser->size - 1) {
1449 		parser->cont = true;
1450 		parser->buffer[parser->idx++] = ch;
1451 		/* Make sure the parsed string always terminates with '\0'. */
1452 		parser->buffer[parser->idx] = 0;
1453 	} else {
1454 		ret = -EINVAL;
1455 		goto out;
1456 	}
1457 
1458 	*ppos += read;
1459 	ret = read;
1460 
1461 out:
1462 	return ret;
1463 }
1464 
1465 /* TODO add a seq_buf_to_buffer() */
1466 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1467 {
1468 	int len;
1469 
1470 	if (trace_seq_used(s) <= s->seq.readpos)
1471 		return -EBUSY;
1472 
1473 	len = trace_seq_used(s) - s->seq.readpos;
1474 	if (cnt > len)
1475 		cnt = len;
1476 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1477 
1478 	s->seq.readpos += cnt;
1479 	return cnt;
1480 }
1481 
1482 unsigned long __read_mostly	tracing_thresh;
1483 
1484 #ifdef CONFIG_TRACER_MAX_TRACE
1485 /*
1486  * Copy the new maximum trace into the separate maximum-trace
1487  * structure. (this way the maximum trace is permanently saved,
1488  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1489  */
1490 static void
1491 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1492 {
1493 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1494 	struct trace_buffer *max_buf = &tr->max_buffer;
1495 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1496 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1497 
1498 	max_buf->cpu = cpu;
1499 	max_buf->time_start = data->preempt_timestamp;
1500 
1501 	max_data->saved_latency = tr->max_latency;
1502 	max_data->critical_start = data->critical_start;
1503 	max_data->critical_end = data->critical_end;
1504 
1505 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1506 	max_data->pid = tsk->pid;
1507 	/*
1508 	 * If tsk == current, then use current_uid(), as that does not use
1509 	 * RCU. The irq tracer can be called out of RCU scope.
1510 	 */
1511 	if (tsk == current)
1512 		max_data->uid = current_uid();
1513 	else
1514 		max_data->uid = task_uid(tsk);
1515 
1516 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1517 	max_data->policy = tsk->policy;
1518 	max_data->rt_priority = tsk->rt_priority;
1519 
1520 	/* record this tasks comm */
1521 	tracing_record_cmdline(tsk);
1522 }
1523 
1524 /**
1525  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1526  * @tr: tracer
1527  * @tsk: the task with the latency
1528  * @cpu: The cpu that initiated the trace.
1529  * @cond_data: User data associated with a conditional snapshot
1530  *
1531  * Flip the buffers between the @tr and the max_tr and record information
1532  * about which task was the cause of this latency.
1533  */
1534 void
1535 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1536 	      void *cond_data)
1537 {
1538 	if (tr->stop_count)
1539 		return;
1540 
1541 	WARN_ON_ONCE(!irqs_disabled());
1542 
1543 	if (!tr->allocated_snapshot) {
1544 		/* Only the nop tracer should hit this when disabling */
1545 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1546 		return;
1547 	}
1548 
1549 	arch_spin_lock(&tr->max_lock);
1550 
1551 	/* Inherit the recordable setting from trace_buffer */
1552 	if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1553 		ring_buffer_record_on(tr->max_buffer.buffer);
1554 	else
1555 		ring_buffer_record_off(tr->max_buffer.buffer);
1556 
1557 #ifdef CONFIG_TRACER_SNAPSHOT
1558 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1559 		goto out_unlock;
1560 #endif
1561 	swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1562 
1563 	__update_max_tr(tr, tsk, cpu);
1564 
1565  out_unlock:
1566 	arch_spin_unlock(&tr->max_lock);
1567 }
1568 
1569 /**
1570  * update_max_tr_single - only copy one trace over, and reset the rest
1571  * @tr - tracer
1572  * @tsk - task with the latency
1573  * @cpu - the cpu of the buffer to copy.
1574  *
1575  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1576  */
1577 void
1578 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1579 {
1580 	int ret;
1581 
1582 	if (tr->stop_count)
1583 		return;
1584 
1585 	WARN_ON_ONCE(!irqs_disabled());
1586 	if (!tr->allocated_snapshot) {
1587 		/* Only the nop tracer should hit this when disabling */
1588 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1589 		return;
1590 	}
1591 
1592 	arch_spin_lock(&tr->max_lock);
1593 
1594 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1595 
1596 	if (ret == -EBUSY) {
1597 		/*
1598 		 * We failed to swap the buffer due to a commit taking
1599 		 * place on this CPU. We fail to record, but we reset
1600 		 * the max trace buffer (no one writes directly to it)
1601 		 * and flag that it failed.
1602 		 */
1603 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1604 			"Failed to swap buffers due to commit in progress\n");
1605 	}
1606 
1607 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1608 
1609 	__update_max_tr(tr, tsk, cpu);
1610 	arch_spin_unlock(&tr->max_lock);
1611 }
1612 #endif /* CONFIG_TRACER_MAX_TRACE */
1613 
1614 static int wait_on_pipe(struct trace_iterator *iter, int full)
1615 {
1616 	/* Iterators are static, they should be filled or empty */
1617 	if (trace_buffer_iter(iter, iter->cpu_file))
1618 		return 0;
1619 
1620 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1621 				full);
1622 }
1623 
1624 #ifdef CONFIG_FTRACE_STARTUP_TEST
1625 static bool selftests_can_run;
1626 
1627 struct trace_selftests {
1628 	struct list_head		list;
1629 	struct tracer			*type;
1630 };
1631 
1632 static LIST_HEAD(postponed_selftests);
1633 
1634 static int save_selftest(struct tracer *type)
1635 {
1636 	struct trace_selftests *selftest;
1637 
1638 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1639 	if (!selftest)
1640 		return -ENOMEM;
1641 
1642 	selftest->type = type;
1643 	list_add(&selftest->list, &postponed_selftests);
1644 	return 0;
1645 }
1646 
1647 static int run_tracer_selftest(struct tracer *type)
1648 {
1649 	struct trace_array *tr = &global_trace;
1650 	struct tracer *saved_tracer = tr->current_trace;
1651 	int ret;
1652 
1653 	if (!type->selftest || tracing_selftest_disabled)
1654 		return 0;
1655 
1656 	/*
1657 	 * If a tracer registers early in boot up (before scheduling is
1658 	 * initialized and such), then do not run its selftests yet.
1659 	 * Instead, run it a little later in the boot process.
1660 	 */
1661 	if (!selftests_can_run)
1662 		return save_selftest(type);
1663 
1664 	/*
1665 	 * Run a selftest on this tracer.
1666 	 * Here we reset the trace buffer, and set the current
1667 	 * tracer to be this tracer. The tracer can then run some
1668 	 * internal tracing to verify that everything is in order.
1669 	 * If we fail, we do not register this tracer.
1670 	 */
1671 	tracing_reset_online_cpus(&tr->trace_buffer);
1672 
1673 	tr->current_trace = type;
1674 
1675 #ifdef CONFIG_TRACER_MAX_TRACE
1676 	if (type->use_max_tr) {
1677 		/* If we expanded the buffers, make sure the max is expanded too */
1678 		if (ring_buffer_expanded)
1679 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1680 					   RING_BUFFER_ALL_CPUS);
1681 		tr->allocated_snapshot = true;
1682 	}
1683 #endif
1684 
1685 	/* the test is responsible for initializing and enabling */
1686 	pr_info("Testing tracer %s: ", type->name);
1687 	ret = type->selftest(type, tr);
1688 	/* the test is responsible for resetting too */
1689 	tr->current_trace = saved_tracer;
1690 	if (ret) {
1691 		printk(KERN_CONT "FAILED!\n");
1692 		/* Add the warning after printing 'FAILED' */
1693 		WARN_ON(1);
1694 		return -1;
1695 	}
1696 	/* Only reset on passing, to avoid touching corrupted buffers */
1697 	tracing_reset_online_cpus(&tr->trace_buffer);
1698 
1699 #ifdef CONFIG_TRACER_MAX_TRACE
1700 	if (type->use_max_tr) {
1701 		tr->allocated_snapshot = false;
1702 
1703 		/* Shrink the max buffer again */
1704 		if (ring_buffer_expanded)
1705 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1706 					   RING_BUFFER_ALL_CPUS);
1707 	}
1708 #endif
1709 
1710 	printk(KERN_CONT "PASSED\n");
1711 	return 0;
1712 }
1713 
1714 static __init int init_trace_selftests(void)
1715 {
1716 	struct trace_selftests *p, *n;
1717 	struct tracer *t, **last;
1718 	int ret;
1719 
1720 	selftests_can_run = true;
1721 
1722 	mutex_lock(&trace_types_lock);
1723 
1724 	if (list_empty(&postponed_selftests))
1725 		goto out;
1726 
1727 	pr_info("Running postponed tracer tests:\n");
1728 
1729 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1730 		ret = run_tracer_selftest(p->type);
1731 		/* If the test fails, then warn and remove from available_tracers */
1732 		if (ret < 0) {
1733 			WARN(1, "tracer: %s failed selftest, disabling\n",
1734 			     p->type->name);
1735 			last = &trace_types;
1736 			for (t = trace_types; t; t = t->next) {
1737 				if (t == p->type) {
1738 					*last = t->next;
1739 					break;
1740 				}
1741 				last = &t->next;
1742 			}
1743 		}
1744 		list_del(&p->list);
1745 		kfree(p);
1746 	}
1747 
1748  out:
1749 	mutex_unlock(&trace_types_lock);
1750 
1751 	return 0;
1752 }
1753 core_initcall(init_trace_selftests);
1754 #else
1755 static inline int run_tracer_selftest(struct tracer *type)
1756 {
1757 	return 0;
1758 }
1759 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1760 
1761 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1762 
1763 static void __init apply_trace_boot_options(void);
1764 
1765 /**
1766  * register_tracer - register a tracer with the ftrace system.
1767  * @type - the plugin for the tracer
1768  *
1769  * Register a new plugin tracer.
1770  */
1771 int __init register_tracer(struct tracer *type)
1772 {
1773 	struct tracer *t;
1774 	int ret = 0;
1775 
1776 	if (!type->name) {
1777 		pr_info("Tracer must have a name\n");
1778 		return -1;
1779 	}
1780 
1781 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1782 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1783 		return -1;
1784 	}
1785 
1786 	mutex_lock(&trace_types_lock);
1787 
1788 	tracing_selftest_running = true;
1789 
1790 	for (t = trace_types; t; t = t->next) {
1791 		if (strcmp(type->name, t->name) == 0) {
1792 			/* already found */
1793 			pr_info("Tracer %s already registered\n",
1794 				type->name);
1795 			ret = -1;
1796 			goto out;
1797 		}
1798 	}
1799 
1800 	if (!type->set_flag)
1801 		type->set_flag = &dummy_set_flag;
1802 	if (!type->flags) {
1803 		/*allocate a dummy tracer_flags*/
1804 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1805 		if (!type->flags) {
1806 			ret = -ENOMEM;
1807 			goto out;
1808 		}
1809 		type->flags->val = 0;
1810 		type->flags->opts = dummy_tracer_opt;
1811 	} else
1812 		if (!type->flags->opts)
1813 			type->flags->opts = dummy_tracer_opt;
1814 
1815 	/* store the tracer for __set_tracer_option */
1816 	type->flags->trace = type;
1817 
1818 	ret = run_tracer_selftest(type);
1819 	if (ret < 0)
1820 		goto out;
1821 
1822 	type->next = trace_types;
1823 	trace_types = type;
1824 	add_tracer_options(&global_trace, type);
1825 
1826  out:
1827 	tracing_selftest_running = false;
1828 	mutex_unlock(&trace_types_lock);
1829 
1830 	if (ret || !default_bootup_tracer)
1831 		goto out_unlock;
1832 
1833 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1834 		goto out_unlock;
1835 
1836 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1837 	/* Do we want this tracer to start on bootup? */
1838 	tracing_set_tracer(&global_trace, type->name);
1839 	default_bootup_tracer = NULL;
1840 
1841 	apply_trace_boot_options();
1842 
1843 	/* disable other selftests, since this will break it. */
1844 	tracing_selftest_disabled = true;
1845 #ifdef CONFIG_FTRACE_STARTUP_TEST
1846 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1847 	       type->name);
1848 #endif
1849 
1850  out_unlock:
1851 	return ret;
1852 }
1853 
1854 void tracing_reset(struct trace_buffer *buf, int cpu)
1855 {
1856 	struct ring_buffer *buffer = buf->buffer;
1857 
1858 	if (!buffer)
1859 		return;
1860 
1861 	ring_buffer_record_disable(buffer);
1862 
1863 	/* Make sure all commits have finished */
1864 	synchronize_rcu();
1865 	ring_buffer_reset_cpu(buffer, cpu);
1866 
1867 	ring_buffer_record_enable(buffer);
1868 }
1869 
1870 void tracing_reset_online_cpus(struct trace_buffer *buf)
1871 {
1872 	struct ring_buffer *buffer = buf->buffer;
1873 	int cpu;
1874 
1875 	if (!buffer)
1876 		return;
1877 
1878 	ring_buffer_record_disable(buffer);
1879 
1880 	/* Make sure all commits have finished */
1881 	synchronize_rcu();
1882 
1883 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1884 
1885 	for_each_online_cpu(cpu)
1886 		ring_buffer_reset_cpu(buffer, cpu);
1887 
1888 	ring_buffer_record_enable(buffer);
1889 }
1890 
1891 /* Must have trace_types_lock held */
1892 void tracing_reset_all_online_cpus(void)
1893 {
1894 	struct trace_array *tr;
1895 
1896 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1897 		if (!tr->clear_trace)
1898 			continue;
1899 		tr->clear_trace = false;
1900 		tracing_reset_online_cpus(&tr->trace_buffer);
1901 #ifdef CONFIG_TRACER_MAX_TRACE
1902 		tracing_reset_online_cpus(&tr->max_buffer);
1903 #endif
1904 	}
1905 }
1906 
1907 static int *tgid_map;
1908 
1909 #define SAVED_CMDLINES_DEFAULT 128
1910 #define NO_CMDLINE_MAP UINT_MAX
1911 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1912 struct saved_cmdlines_buffer {
1913 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1914 	unsigned *map_cmdline_to_pid;
1915 	unsigned cmdline_num;
1916 	int cmdline_idx;
1917 	char *saved_cmdlines;
1918 };
1919 static struct saved_cmdlines_buffer *savedcmd;
1920 
1921 /* temporary disable recording */
1922 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1923 
1924 static inline char *get_saved_cmdlines(int idx)
1925 {
1926 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1927 }
1928 
1929 static inline void set_cmdline(int idx, const char *cmdline)
1930 {
1931 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1932 }
1933 
1934 static int allocate_cmdlines_buffer(unsigned int val,
1935 				    struct saved_cmdlines_buffer *s)
1936 {
1937 	s->map_cmdline_to_pid = kmalloc_array(val,
1938 					      sizeof(*s->map_cmdline_to_pid),
1939 					      GFP_KERNEL);
1940 	if (!s->map_cmdline_to_pid)
1941 		return -ENOMEM;
1942 
1943 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1944 	if (!s->saved_cmdlines) {
1945 		kfree(s->map_cmdline_to_pid);
1946 		return -ENOMEM;
1947 	}
1948 
1949 	s->cmdline_idx = 0;
1950 	s->cmdline_num = val;
1951 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1952 	       sizeof(s->map_pid_to_cmdline));
1953 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1954 	       val * sizeof(*s->map_cmdline_to_pid));
1955 
1956 	return 0;
1957 }
1958 
1959 static int trace_create_savedcmd(void)
1960 {
1961 	int ret;
1962 
1963 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1964 	if (!savedcmd)
1965 		return -ENOMEM;
1966 
1967 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1968 	if (ret < 0) {
1969 		kfree(savedcmd);
1970 		savedcmd = NULL;
1971 		return -ENOMEM;
1972 	}
1973 
1974 	return 0;
1975 }
1976 
1977 int is_tracing_stopped(void)
1978 {
1979 	return global_trace.stop_count;
1980 }
1981 
1982 /**
1983  * tracing_start - quick start of the tracer
1984  *
1985  * If tracing is enabled but was stopped by tracing_stop,
1986  * this will start the tracer back up.
1987  */
1988 void tracing_start(void)
1989 {
1990 	struct ring_buffer *buffer;
1991 	unsigned long flags;
1992 
1993 	if (tracing_disabled)
1994 		return;
1995 
1996 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1997 	if (--global_trace.stop_count) {
1998 		if (global_trace.stop_count < 0) {
1999 			/* Someone screwed up their debugging */
2000 			WARN_ON_ONCE(1);
2001 			global_trace.stop_count = 0;
2002 		}
2003 		goto out;
2004 	}
2005 
2006 	/* Prevent the buffers from switching */
2007 	arch_spin_lock(&global_trace.max_lock);
2008 
2009 	buffer = global_trace.trace_buffer.buffer;
2010 	if (buffer)
2011 		ring_buffer_record_enable(buffer);
2012 
2013 #ifdef CONFIG_TRACER_MAX_TRACE
2014 	buffer = global_trace.max_buffer.buffer;
2015 	if (buffer)
2016 		ring_buffer_record_enable(buffer);
2017 #endif
2018 
2019 	arch_spin_unlock(&global_trace.max_lock);
2020 
2021  out:
2022 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2023 }
2024 
2025 static void tracing_start_tr(struct trace_array *tr)
2026 {
2027 	struct ring_buffer *buffer;
2028 	unsigned long flags;
2029 
2030 	if (tracing_disabled)
2031 		return;
2032 
2033 	/* If global, we need to also start the max tracer */
2034 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2035 		return tracing_start();
2036 
2037 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2038 
2039 	if (--tr->stop_count) {
2040 		if (tr->stop_count < 0) {
2041 			/* Someone screwed up their debugging */
2042 			WARN_ON_ONCE(1);
2043 			tr->stop_count = 0;
2044 		}
2045 		goto out;
2046 	}
2047 
2048 	buffer = tr->trace_buffer.buffer;
2049 	if (buffer)
2050 		ring_buffer_record_enable(buffer);
2051 
2052  out:
2053 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2054 }
2055 
2056 /**
2057  * tracing_stop - quick stop of the tracer
2058  *
2059  * Light weight way to stop tracing. Use in conjunction with
2060  * tracing_start.
2061  */
2062 void tracing_stop(void)
2063 {
2064 	struct ring_buffer *buffer;
2065 	unsigned long flags;
2066 
2067 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2068 	if (global_trace.stop_count++)
2069 		goto out;
2070 
2071 	/* Prevent the buffers from switching */
2072 	arch_spin_lock(&global_trace.max_lock);
2073 
2074 	buffer = global_trace.trace_buffer.buffer;
2075 	if (buffer)
2076 		ring_buffer_record_disable(buffer);
2077 
2078 #ifdef CONFIG_TRACER_MAX_TRACE
2079 	buffer = global_trace.max_buffer.buffer;
2080 	if (buffer)
2081 		ring_buffer_record_disable(buffer);
2082 #endif
2083 
2084 	arch_spin_unlock(&global_trace.max_lock);
2085 
2086  out:
2087 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2088 }
2089 
2090 static void tracing_stop_tr(struct trace_array *tr)
2091 {
2092 	struct ring_buffer *buffer;
2093 	unsigned long flags;
2094 
2095 	/* If global, we need to also stop the max tracer */
2096 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2097 		return tracing_stop();
2098 
2099 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2100 	if (tr->stop_count++)
2101 		goto out;
2102 
2103 	buffer = tr->trace_buffer.buffer;
2104 	if (buffer)
2105 		ring_buffer_record_disable(buffer);
2106 
2107  out:
2108 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2109 }
2110 
2111 static int trace_save_cmdline(struct task_struct *tsk)
2112 {
2113 	unsigned pid, idx;
2114 
2115 	/* treat recording of idle task as a success */
2116 	if (!tsk->pid)
2117 		return 1;
2118 
2119 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2120 		return 0;
2121 
2122 	/*
2123 	 * It's not the end of the world if we don't get
2124 	 * the lock, but we also don't want to spin
2125 	 * nor do we want to disable interrupts,
2126 	 * so if we miss here, then better luck next time.
2127 	 */
2128 	if (!arch_spin_trylock(&trace_cmdline_lock))
2129 		return 0;
2130 
2131 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2132 	if (idx == NO_CMDLINE_MAP) {
2133 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2134 
2135 		/*
2136 		 * Check whether the cmdline buffer at idx has a pid
2137 		 * mapped. We are going to overwrite that entry so we
2138 		 * need to clear the map_pid_to_cmdline. Otherwise we
2139 		 * would read the new comm for the old pid.
2140 		 */
2141 		pid = savedcmd->map_cmdline_to_pid[idx];
2142 		if (pid != NO_CMDLINE_MAP)
2143 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2144 
2145 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2146 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2147 
2148 		savedcmd->cmdline_idx = idx;
2149 	}
2150 
2151 	set_cmdline(idx, tsk->comm);
2152 
2153 	arch_spin_unlock(&trace_cmdline_lock);
2154 
2155 	return 1;
2156 }
2157 
2158 static void __trace_find_cmdline(int pid, char comm[])
2159 {
2160 	unsigned map;
2161 
2162 	if (!pid) {
2163 		strcpy(comm, "<idle>");
2164 		return;
2165 	}
2166 
2167 	if (WARN_ON_ONCE(pid < 0)) {
2168 		strcpy(comm, "<XXX>");
2169 		return;
2170 	}
2171 
2172 	if (pid > PID_MAX_DEFAULT) {
2173 		strcpy(comm, "<...>");
2174 		return;
2175 	}
2176 
2177 	map = savedcmd->map_pid_to_cmdline[pid];
2178 	if (map != NO_CMDLINE_MAP)
2179 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2180 	else
2181 		strcpy(comm, "<...>");
2182 }
2183 
2184 void trace_find_cmdline(int pid, char comm[])
2185 {
2186 	preempt_disable();
2187 	arch_spin_lock(&trace_cmdline_lock);
2188 
2189 	__trace_find_cmdline(pid, comm);
2190 
2191 	arch_spin_unlock(&trace_cmdline_lock);
2192 	preempt_enable();
2193 }
2194 
2195 int trace_find_tgid(int pid)
2196 {
2197 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2198 		return 0;
2199 
2200 	return tgid_map[pid];
2201 }
2202 
2203 static int trace_save_tgid(struct task_struct *tsk)
2204 {
2205 	/* treat recording of idle task as a success */
2206 	if (!tsk->pid)
2207 		return 1;
2208 
2209 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2210 		return 0;
2211 
2212 	tgid_map[tsk->pid] = tsk->tgid;
2213 	return 1;
2214 }
2215 
2216 static bool tracing_record_taskinfo_skip(int flags)
2217 {
2218 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2219 		return true;
2220 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2221 		return true;
2222 	if (!__this_cpu_read(trace_taskinfo_save))
2223 		return true;
2224 	return false;
2225 }
2226 
2227 /**
2228  * tracing_record_taskinfo - record the task info of a task
2229  *
2230  * @task  - task to record
2231  * @flags - TRACE_RECORD_CMDLINE for recording comm
2232  *        - TRACE_RECORD_TGID for recording tgid
2233  */
2234 void tracing_record_taskinfo(struct task_struct *task, int flags)
2235 {
2236 	bool done;
2237 
2238 	if (tracing_record_taskinfo_skip(flags))
2239 		return;
2240 
2241 	/*
2242 	 * Record as much task information as possible. If some fail, continue
2243 	 * to try to record the others.
2244 	 */
2245 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2246 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2247 
2248 	/* If recording any information failed, retry again soon. */
2249 	if (!done)
2250 		return;
2251 
2252 	__this_cpu_write(trace_taskinfo_save, false);
2253 }
2254 
2255 /**
2256  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2257  *
2258  * @prev - previous task during sched_switch
2259  * @next - next task during sched_switch
2260  * @flags - TRACE_RECORD_CMDLINE for recording comm
2261  *          TRACE_RECORD_TGID for recording tgid
2262  */
2263 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2264 					  struct task_struct *next, int flags)
2265 {
2266 	bool done;
2267 
2268 	if (tracing_record_taskinfo_skip(flags))
2269 		return;
2270 
2271 	/*
2272 	 * Record as much task information as possible. If some fail, continue
2273 	 * to try to record the others.
2274 	 */
2275 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2276 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2277 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2278 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2279 
2280 	/* If recording any information failed, retry again soon. */
2281 	if (!done)
2282 		return;
2283 
2284 	__this_cpu_write(trace_taskinfo_save, false);
2285 }
2286 
2287 /* Helpers to record a specific task information */
2288 void tracing_record_cmdline(struct task_struct *task)
2289 {
2290 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2291 }
2292 
2293 void tracing_record_tgid(struct task_struct *task)
2294 {
2295 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2296 }
2297 
2298 /*
2299  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2300  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2301  * simplifies those functions and keeps them in sync.
2302  */
2303 enum print_line_t trace_handle_return(struct trace_seq *s)
2304 {
2305 	return trace_seq_has_overflowed(s) ?
2306 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2307 }
2308 EXPORT_SYMBOL_GPL(trace_handle_return);
2309 
2310 void
2311 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2312 			     int pc)
2313 {
2314 	struct task_struct *tsk = current;
2315 
2316 	entry->preempt_count		= pc & 0xff;
2317 	entry->pid			= (tsk) ? tsk->pid : 0;
2318 	entry->flags =
2319 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2320 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2321 #else
2322 		TRACE_FLAG_IRQS_NOSUPPORT |
2323 #endif
2324 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2325 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2326 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2327 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2328 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2329 }
2330 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2331 
2332 struct ring_buffer_event *
2333 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2334 			  int type,
2335 			  unsigned long len,
2336 			  unsigned long flags, int pc)
2337 {
2338 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2339 }
2340 
2341 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2342 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2343 static int trace_buffered_event_ref;
2344 
2345 /**
2346  * trace_buffered_event_enable - enable buffering events
2347  *
2348  * When events are being filtered, it is quicker to use a temporary
2349  * buffer to write the event data into if there's a likely chance
2350  * that it will not be committed. The discard of the ring buffer
2351  * is not as fast as committing, and is much slower than copying
2352  * a commit.
2353  *
2354  * When an event is to be filtered, allocate per cpu buffers to
2355  * write the event data into, and if the event is filtered and discarded
2356  * it is simply dropped, otherwise, the entire data is to be committed
2357  * in one shot.
2358  */
2359 void trace_buffered_event_enable(void)
2360 {
2361 	struct ring_buffer_event *event;
2362 	struct page *page;
2363 	int cpu;
2364 
2365 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2366 
2367 	if (trace_buffered_event_ref++)
2368 		return;
2369 
2370 	for_each_tracing_cpu(cpu) {
2371 		page = alloc_pages_node(cpu_to_node(cpu),
2372 					GFP_KERNEL | __GFP_NORETRY, 0);
2373 		if (!page)
2374 			goto failed;
2375 
2376 		event = page_address(page);
2377 		memset(event, 0, sizeof(*event));
2378 
2379 		per_cpu(trace_buffered_event, cpu) = event;
2380 
2381 		preempt_disable();
2382 		if (cpu == smp_processor_id() &&
2383 		    this_cpu_read(trace_buffered_event) !=
2384 		    per_cpu(trace_buffered_event, cpu))
2385 			WARN_ON_ONCE(1);
2386 		preempt_enable();
2387 	}
2388 
2389 	return;
2390  failed:
2391 	trace_buffered_event_disable();
2392 }
2393 
2394 static void enable_trace_buffered_event(void *data)
2395 {
2396 	/* Probably not needed, but do it anyway */
2397 	smp_rmb();
2398 	this_cpu_dec(trace_buffered_event_cnt);
2399 }
2400 
2401 static void disable_trace_buffered_event(void *data)
2402 {
2403 	this_cpu_inc(trace_buffered_event_cnt);
2404 }
2405 
2406 /**
2407  * trace_buffered_event_disable - disable buffering events
2408  *
2409  * When a filter is removed, it is faster to not use the buffered
2410  * events, and to commit directly into the ring buffer. Free up
2411  * the temp buffers when there are no more users. This requires
2412  * special synchronization with current events.
2413  */
2414 void trace_buffered_event_disable(void)
2415 {
2416 	int cpu;
2417 
2418 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2419 
2420 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2421 		return;
2422 
2423 	if (--trace_buffered_event_ref)
2424 		return;
2425 
2426 	preempt_disable();
2427 	/* For each CPU, set the buffer as used. */
2428 	smp_call_function_many(tracing_buffer_mask,
2429 			       disable_trace_buffered_event, NULL, 1);
2430 	preempt_enable();
2431 
2432 	/* Wait for all current users to finish */
2433 	synchronize_rcu();
2434 
2435 	for_each_tracing_cpu(cpu) {
2436 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2437 		per_cpu(trace_buffered_event, cpu) = NULL;
2438 	}
2439 	/*
2440 	 * Make sure trace_buffered_event is NULL before clearing
2441 	 * trace_buffered_event_cnt.
2442 	 */
2443 	smp_wmb();
2444 
2445 	preempt_disable();
2446 	/* Do the work on each cpu */
2447 	smp_call_function_many(tracing_buffer_mask,
2448 			       enable_trace_buffered_event, NULL, 1);
2449 	preempt_enable();
2450 }
2451 
2452 static struct ring_buffer *temp_buffer;
2453 
2454 struct ring_buffer_event *
2455 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2456 			  struct trace_event_file *trace_file,
2457 			  int type, unsigned long len,
2458 			  unsigned long flags, int pc)
2459 {
2460 	struct ring_buffer_event *entry;
2461 	int val;
2462 
2463 	*current_rb = trace_file->tr->trace_buffer.buffer;
2464 
2465 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2466 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2467 	    (entry = this_cpu_read(trace_buffered_event))) {
2468 		/* Try to use the per cpu buffer first */
2469 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2470 		if (val == 1) {
2471 			trace_event_setup(entry, type, flags, pc);
2472 			entry->array[0] = len;
2473 			return entry;
2474 		}
2475 		this_cpu_dec(trace_buffered_event_cnt);
2476 	}
2477 
2478 	entry = __trace_buffer_lock_reserve(*current_rb,
2479 					    type, len, flags, pc);
2480 	/*
2481 	 * If tracing is off, but we have triggers enabled
2482 	 * we still need to look at the event data. Use the temp_buffer
2483 	 * to store the trace event for the tigger to use. It's recusive
2484 	 * safe and will not be recorded anywhere.
2485 	 */
2486 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2487 		*current_rb = temp_buffer;
2488 		entry = __trace_buffer_lock_reserve(*current_rb,
2489 						    type, len, flags, pc);
2490 	}
2491 	return entry;
2492 }
2493 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2494 
2495 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2496 static DEFINE_MUTEX(tracepoint_printk_mutex);
2497 
2498 static void output_printk(struct trace_event_buffer *fbuffer)
2499 {
2500 	struct trace_event_call *event_call;
2501 	struct trace_event *event;
2502 	unsigned long flags;
2503 	struct trace_iterator *iter = tracepoint_print_iter;
2504 
2505 	/* We should never get here if iter is NULL */
2506 	if (WARN_ON_ONCE(!iter))
2507 		return;
2508 
2509 	event_call = fbuffer->trace_file->event_call;
2510 	if (!event_call || !event_call->event.funcs ||
2511 	    !event_call->event.funcs->trace)
2512 		return;
2513 
2514 	event = &fbuffer->trace_file->event_call->event;
2515 
2516 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2517 	trace_seq_init(&iter->seq);
2518 	iter->ent = fbuffer->entry;
2519 	event_call->event.funcs->trace(iter, 0, event);
2520 	trace_seq_putc(&iter->seq, 0);
2521 	printk("%s", iter->seq.buffer);
2522 
2523 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2524 }
2525 
2526 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2527 			     void __user *buffer, size_t *lenp,
2528 			     loff_t *ppos)
2529 {
2530 	int save_tracepoint_printk;
2531 	int ret;
2532 
2533 	mutex_lock(&tracepoint_printk_mutex);
2534 	save_tracepoint_printk = tracepoint_printk;
2535 
2536 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2537 
2538 	/*
2539 	 * This will force exiting early, as tracepoint_printk
2540 	 * is always zero when tracepoint_printk_iter is not allocated
2541 	 */
2542 	if (!tracepoint_print_iter)
2543 		tracepoint_printk = 0;
2544 
2545 	if (save_tracepoint_printk == tracepoint_printk)
2546 		goto out;
2547 
2548 	if (tracepoint_printk)
2549 		static_key_enable(&tracepoint_printk_key.key);
2550 	else
2551 		static_key_disable(&tracepoint_printk_key.key);
2552 
2553  out:
2554 	mutex_unlock(&tracepoint_printk_mutex);
2555 
2556 	return ret;
2557 }
2558 
2559 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2560 {
2561 	if (static_key_false(&tracepoint_printk_key.key))
2562 		output_printk(fbuffer);
2563 
2564 	event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2565 				    fbuffer->event, fbuffer->entry,
2566 				    fbuffer->flags, fbuffer->pc);
2567 }
2568 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2569 
2570 /*
2571  * Skip 3:
2572  *
2573  *   trace_buffer_unlock_commit_regs()
2574  *   trace_event_buffer_commit()
2575  *   trace_event_raw_event_xxx()
2576  */
2577 # define STACK_SKIP 3
2578 
2579 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2580 				     struct ring_buffer *buffer,
2581 				     struct ring_buffer_event *event,
2582 				     unsigned long flags, int pc,
2583 				     struct pt_regs *regs)
2584 {
2585 	__buffer_unlock_commit(buffer, event);
2586 
2587 	/*
2588 	 * If regs is not set, then skip the necessary functions.
2589 	 * Note, we can still get here via blktrace, wakeup tracer
2590 	 * and mmiotrace, but that's ok if they lose a function or
2591 	 * two. They are not that meaningful.
2592 	 */
2593 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2594 	ftrace_trace_userstack(buffer, flags, pc);
2595 }
2596 
2597 /*
2598  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2599  */
2600 void
2601 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2602 				   struct ring_buffer_event *event)
2603 {
2604 	__buffer_unlock_commit(buffer, event);
2605 }
2606 
2607 static void
2608 trace_process_export(struct trace_export *export,
2609 	       struct ring_buffer_event *event)
2610 {
2611 	struct trace_entry *entry;
2612 	unsigned int size = 0;
2613 
2614 	entry = ring_buffer_event_data(event);
2615 	size = ring_buffer_event_length(event);
2616 	export->write(export, entry, size);
2617 }
2618 
2619 static DEFINE_MUTEX(ftrace_export_lock);
2620 
2621 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2622 
2623 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2624 
2625 static inline void ftrace_exports_enable(void)
2626 {
2627 	static_branch_enable(&ftrace_exports_enabled);
2628 }
2629 
2630 static inline void ftrace_exports_disable(void)
2631 {
2632 	static_branch_disable(&ftrace_exports_enabled);
2633 }
2634 
2635 static void ftrace_exports(struct ring_buffer_event *event)
2636 {
2637 	struct trace_export *export;
2638 
2639 	preempt_disable_notrace();
2640 
2641 	export = rcu_dereference_raw_notrace(ftrace_exports_list);
2642 	while (export) {
2643 		trace_process_export(export, event);
2644 		export = rcu_dereference_raw_notrace(export->next);
2645 	}
2646 
2647 	preempt_enable_notrace();
2648 }
2649 
2650 static inline void
2651 add_trace_export(struct trace_export **list, struct trace_export *export)
2652 {
2653 	rcu_assign_pointer(export->next, *list);
2654 	/*
2655 	 * We are entering export into the list but another
2656 	 * CPU might be walking that list. We need to make sure
2657 	 * the export->next pointer is valid before another CPU sees
2658 	 * the export pointer included into the list.
2659 	 */
2660 	rcu_assign_pointer(*list, export);
2661 }
2662 
2663 static inline int
2664 rm_trace_export(struct trace_export **list, struct trace_export *export)
2665 {
2666 	struct trace_export **p;
2667 
2668 	for (p = list; *p != NULL; p = &(*p)->next)
2669 		if (*p == export)
2670 			break;
2671 
2672 	if (*p != export)
2673 		return -1;
2674 
2675 	rcu_assign_pointer(*p, (*p)->next);
2676 
2677 	return 0;
2678 }
2679 
2680 static inline void
2681 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2682 {
2683 	if (*list == NULL)
2684 		ftrace_exports_enable();
2685 
2686 	add_trace_export(list, export);
2687 }
2688 
2689 static inline int
2690 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2691 {
2692 	int ret;
2693 
2694 	ret = rm_trace_export(list, export);
2695 	if (*list == NULL)
2696 		ftrace_exports_disable();
2697 
2698 	return ret;
2699 }
2700 
2701 int register_ftrace_export(struct trace_export *export)
2702 {
2703 	if (WARN_ON_ONCE(!export->write))
2704 		return -1;
2705 
2706 	mutex_lock(&ftrace_export_lock);
2707 
2708 	add_ftrace_export(&ftrace_exports_list, export);
2709 
2710 	mutex_unlock(&ftrace_export_lock);
2711 
2712 	return 0;
2713 }
2714 EXPORT_SYMBOL_GPL(register_ftrace_export);
2715 
2716 int unregister_ftrace_export(struct trace_export *export)
2717 {
2718 	int ret;
2719 
2720 	mutex_lock(&ftrace_export_lock);
2721 
2722 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2723 
2724 	mutex_unlock(&ftrace_export_lock);
2725 
2726 	return ret;
2727 }
2728 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2729 
2730 void
2731 trace_function(struct trace_array *tr,
2732 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2733 	       int pc)
2734 {
2735 	struct trace_event_call *call = &event_function;
2736 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2737 	struct ring_buffer_event *event;
2738 	struct ftrace_entry *entry;
2739 
2740 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2741 					    flags, pc);
2742 	if (!event)
2743 		return;
2744 	entry	= ring_buffer_event_data(event);
2745 	entry->ip			= ip;
2746 	entry->parent_ip		= parent_ip;
2747 
2748 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2749 		if (static_branch_unlikely(&ftrace_exports_enabled))
2750 			ftrace_exports(event);
2751 		__buffer_unlock_commit(buffer, event);
2752 	}
2753 }
2754 
2755 #ifdef CONFIG_STACKTRACE
2756 
2757 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2758 #define FTRACE_KSTACK_NESTING	4
2759 
2760 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2761 
2762 struct ftrace_stack {
2763 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2764 };
2765 
2766 
2767 struct ftrace_stacks {
2768 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2769 };
2770 
2771 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2772 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2773 
2774 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2775 				 unsigned long flags,
2776 				 int skip, int pc, struct pt_regs *regs)
2777 {
2778 	struct trace_event_call *call = &event_kernel_stack;
2779 	struct ring_buffer_event *event;
2780 	unsigned int size, nr_entries;
2781 	struct ftrace_stack *fstack;
2782 	struct stack_entry *entry;
2783 	int stackidx;
2784 
2785 	/*
2786 	 * Add one, for this function and the call to save_stack_trace()
2787 	 * If regs is set, then these functions will not be in the way.
2788 	 */
2789 #ifndef CONFIG_UNWINDER_ORC
2790 	if (!regs)
2791 		skip++;
2792 #endif
2793 
2794 	/*
2795 	 * Since events can happen in NMIs there's no safe way to
2796 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2797 	 * or NMI comes in, it will just have to use the default
2798 	 * FTRACE_STACK_SIZE.
2799 	 */
2800 	preempt_disable_notrace();
2801 
2802 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2803 
2804 	/* This should never happen. If it does, yell once and skip */
2805 	if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2806 		goto out;
2807 
2808 	/*
2809 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2810 	 * interrupt will either see the value pre increment or post
2811 	 * increment. If the interrupt happens pre increment it will have
2812 	 * restored the counter when it returns.  We just need a barrier to
2813 	 * keep gcc from moving things around.
2814 	 */
2815 	barrier();
2816 
2817 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2818 	size = ARRAY_SIZE(fstack->calls);
2819 
2820 	if (regs) {
2821 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2822 						   size, skip);
2823 	} else {
2824 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2825 	}
2826 
2827 	size = nr_entries * sizeof(unsigned long);
2828 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2829 					    sizeof(*entry) + size, flags, pc);
2830 	if (!event)
2831 		goto out;
2832 	entry = ring_buffer_event_data(event);
2833 
2834 	memcpy(&entry->caller, fstack->calls, size);
2835 	entry->size = nr_entries;
2836 
2837 	if (!call_filter_check_discard(call, entry, buffer, event))
2838 		__buffer_unlock_commit(buffer, event);
2839 
2840  out:
2841 	/* Again, don't let gcc optimize things here */
2842 	barrier();
2843 	__this_cpu_dec(ftrace_stack_reserve);
2844 	preempt_enable_notrace();
2845 
2846 }
2847 
2848 static inline void ftrace_trace_stack(struct trace_array *tr,
2849 				      struct ring_buffer *buffer,
2850 				      unsigned long flags,
2851 				      int skip, int pc, struct pt_regs *regs)
2852 {
2853 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2854 		return;
2855 
2856 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2857 }
2858 
2859 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2860 		   int pc)
2861 {
2862 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2863 
2864 	if (rcu_is_watching()) {
2865 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2866 		return;
2867 	}
2868 
2869 	/*
2870 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2871 	 * but if the above rcu_is_watching() failed, then the NMI
2872 	 * triggered someplace critical, and rcu_irq_enter() should
2873 	 * not be called from NMI.
2874 	 */
2875 	if (unlikely(in_nmi()))
2876 		return;
2877 
2878 	rcu_irq_enter_irqson();
2879 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2880 	rcu_irq_exit_irqson();
2881 }
2882 
2883 /**
2884  * trace_dump_stack - record a stack back trace in the trace buffer
2885  * @skip: Number of functions to skip (helper handlers)
2886  */
2887 void trace_dump_stack(int skip)
2888 {
2889 	unsigned long flags;
2890 
2891 	if (tracing_disabled || tracing_selftest_running)
2892 		return;
2893 
2894 	local_save_flags(flags);
2895 
2896 #ifndef CONFIG_UNWINDER_ORC
2897 	/* Skip 1 to skip this function. */
2898 	skip++;
2899 #endif
2900 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2901 			     flags, skip, preempt_count(), NULL);
2902 }
2903 EXPORT_SYMBOL_GPL(trace_dump_stack);
2904 
2905 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
2906 static DEFINE_PER_CPU(int, user_stack_count);
2907 
2908 static void
2909 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2910 {
2911 	struct trace_event_call *call = &event_user_stack;
2912 	struct ring_buffer_event *event;
2913 	struct userstack_entry *entry;
2914 
2915 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2916 		return;
2917 
2918 	/*
2919 	 * NMIs can not handle page faults, even with fix ups.
2920 	 * The save user stack can (and often does) fault.
2921 	 */
2922 	if (unlikely(in_nmi()))
2923 		return;
2924 
2925 	/*
2926 	 * prevent recursion, since the user stack tracing may
2927 	 * trigger other kernel events.
2928 	 */
2929 	preempt_disable();
2930 	if (__this_cpu_read(user_stack_count))
2931 		goto out;
2932 
2933 	__this_cpu_inc(user_stack_count);
2934 
2935 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2936 					    sizeof(*entry), flags, pc);
2937 	if (!event)
2938 		goto out_drop_count;
2939 	entry	= ring_buffer_event_data(event);
2940 
2941 	entry->tgid		= current->tgid;
2942 	memset(&entry->caller, 0, sizeof(entry->caller));
2943 
2944 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
2945 	if (!call_filter_check_discard(call, entry, buffer, event))
2946 		__buffer_unlock_commit(buffer, event);
2947 
2948  out_drop_count:
2949 	__this_cpu_dec(user_stack_count);
2950  out:
2951 	preempt_enable();
2952 }
2953 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
2954 static void ftrace_trace_userstack(struct ring_buffer *buffer,
2955 				   unsigned long flags, int pc)
2956 {
2957 }
2958 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
2959 
2960 #endif /* CONFIG_STACKTRACE */
2961 
2962 /* created for use with alloc_percpu */
2963 struct trace_buffer_struct {
2964 	int nesting;
2965 	char buffer[4][TRACE_BUF_SIZE];
2966 };
2967 
2968 static struct trace_buffer_struct *trace_percpu_buffer;
2969 
2970 /*
2971  * Thise allows for lockless recording.  If we're nested too deeply, then
2972  * this returns NULL.
2973  */
2974 static char *get_trace_buf(void)
2975 {
2976 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2977 
2978 	if (!buffer || buffer->nesting >= 4)
2979 		return NULL;
2980 
2981 	buffer->nesting++;
2982 
2983 	/* Interrupts must see nesting incremented before we use the buffer */
2984 	barrier();
2985 	return &buffer->buffer[buffer->nesting][0];
2986 }
2987 
2988 static void put_trace_buf(void)
2989 {
2990 	/* Don't let the decrement of nesting leak before this */
2991 	barrier();
2992 	this_cpu_dec(trace_percpu_buffer->nesting);
2993 }
2994 
2995 static int alloc_percpu_trace_buffer(void)
2996 {
2997 	struct trace_buffer_struct *buffers;
2998 
2999 	buffers = alloc_percpu(struct trace_buffer_struct);
3000 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3001 		return -ENOMEM;
3002 
3003 	trace_percpu_buffer = buffers;
3004 	return 0;
3005 }
3006 
3007 static int buffers_allocated;
3008 
3009 void trace_printk_init_buffers(void)
3010 {
3011 	if (buffers_allocated)
3012 		return;
3013 
3014 	if (alloc_percpu_trace_buffer())
3015 		return;
3016 
3017 	/* trace_printk() is for debug use only. Don't use it in production. */
3018 
3019 	pr_warn("\n");
3020 	pr_warn("**********************************************************\n");
3021 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3022 	pr_warn("**                                                      **\n");
3023 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3024 	pr_warn("**                                                      **\n");
3025 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3026 	pr_warn("** unsafe for production use.                           **\n");
3027 	pr_warn("**                                                      **\n");
3028 	pr_warn("** If you see this message and you are not debugging    **\n");
3029 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3030 	pr_warn("**                                                      **\n");
3031 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3032 	pr_warn("**********************************************************\n");
3033 
3034 	/* Expand the buffers to set size */
3035 	tracing_update_buffers();
3036 
3037 	buffers_allocated = 1;
3038 
3039 	/*
3040 	 * trace_printk_init_buffers() can be called by modules.
3041 	 * If that happens, then we need to start cmdline recording
3042 	 * directly here. If the global_trace.buffer is already
3043 	 * allocated here, then this was called by module code.
3044 	 */
3045 	if (global_trace.trace_buffer.buffer)
3046 		tracing_start_cmdline_record();
3047 }
3048 
3049 void trace_printk_start_comm(void)
3050 {
3051 	/* Start tracing comms if trace printk is set */
3052 	if (!buffers_allocated)
3053 		return;
3054 	tracing_start_cmdline_record();
3055 }
3056 
3057 static void trace_printk_start_stop_comm(int enabled)
3058 {
3059 	if (!buffers_allocated)
3060 		return;
3061 
3062 	if (enabled)
3063 		tracing_start_cmdline_record();
3064 	else
3065 		tracing_stop_cmdline_record();
3066 }
3067 
3068 /**
3069  * trace_vbprintk - write binary msg to tracing buffer
3070  *
3071  */
3072 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3073 {
3074 	struct trace_event_call *call = &event_bprint;
3075 	struct ring_buffer_event *event;
3076 	struct ring_buffer *buffer;
3077 	struct trace_array *tr = &global_trace;
3078 	struct bprint_entry *entry;
3079 	unsigned long flags;
3080 	char *tbuffer;
3081 	int len = 0, size, pc;
3082 
3083 	if (unlikely(tracing_selftest_running || tracing_disabled))
3084 		return 0;
3085 
3086 	/* Don't pollute graph traces with trace_vprintk internals */
3087 	pause_graph_tracing();
3088 
3089 	pc = preempt_count();
3090 	preempt_disable_notrace();
3091 
3092 	tbuffer = get_trace_buf();
3093 	if (!tbuffer) {
3094 		len = 0;
3095 		goto out_nobuffer;
3096 	}
3097 
3098 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3099 
3100 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3101 		goto out;
3102 
3103 	local_save_flags(flags);
3104 	size = sizeof(*entry) + sizeof(u32) * len;
3105 	buffer = tr->trace_buffer.buffer;
3106 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3107 					    flags, pc);
3108 	if (!event)
3109 		goto out;
3110 	entry = ring_buffer_event_data(event);
3111 	entry->ip			= ip;
3112 	entry->fmt			= fmt;
3113 
3114 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3115 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3116 		__buffer_unlock_commit(buffer, event);
3117 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3118 	}
3119 
3120 out:
3121 	put_trace_buf();
3122 
3123 out_nobuffer:
3124 	preempt_enable_notrace();
3125 	unpause_graph_tracing();
3126 
3127 	return len;
3128 }
3129 EXPORT_SYMBOL_GPL(trace_vbprintk);
3130 
3131 __printf(3, 0)
3132 static int
3133 __trace_array_vprintk(struct ring_buffer *buffer,
3134 		      unsigned long ip, const char *fmt, va_list args)
3135 {
3136 	struct trace_event_call *call = &event_print;
3137 	struct ring_buffer_event *event;
3138 	int len = 0, size, pc;
3139 	struct print_entry *entry;
3140 	unsigned long flags;
3141 	char *tbuffer;
3142 
3143 	if (tracing_disabled || tracing_selftest_running)
3144 		return 0;
3145 
3146 	/* Don't pollute graph traces with trace_vprintk internals */
3147 	pause_graph_tracing();
3148 
3149 	pc = preempt_count();
3150 	preempt_disable_notrace();
3151 
3152 
3153 	tbuffer = get_trace_buf();
3154 	if (!tbuffer) {
3155 		len = 0;
3156 		goto out_nobuffer;
3157 	}
3158 
3159 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3160 
3161 	local_save_flags(flags);
3162 	size = sizeof(*entry) + len + 1;
3163 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3164 					    flags, pc);
3165 	if (!event)
3166 		goto out;
3167 	entry = ring_buffer_event_data(event);
3168 	entry->ip = ip;
3169 
3170 	memcpy(&entry->buf, tbuffer, len + 1);
3171 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3172 		__buffer_unlock_commit(buffer, event);
3173 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3174 	}
3175 
3176 out:
3177 	put_trace_buf();
3178 
3179 out_nobuffer:
3180 	preempt_enable_notrace();
3181 	unpause_graph_tracing();
3182 
3183 	return len;
3184 }
3185 
3186 __printf(3, 0)
3187 int trace_array_vprintk(struct trace_array *tr,
3188 			unsigned long ip, const char *fmt, va_list args)
3189 {
3190 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3191 }
3192 
3193 __printf(3, 0)
3194 int trace_array_printk(struct trace_array *tr,
3195 		       unsigned long ip, const char *fmt, ...)
3196 {
3197 	int ret;
3198 	va_list ap;
3199 
3200 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3201 		return 0;
3202 
3203 	va_start(ap, fmt);
3204 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3205 	va_end(ap);
3206 	return ret;
3207 }
3208 
3209 __printf(3, 4)
3210 int trace_array_printk_buf(struct ring_buffer *buffer,
3211 			   unsigned long ip, const char *fmt, ...)
3212 {
3213 	int ret;
3214 	va_list ap;
3215 
3216 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3217 		return 0;
3218 
3219 	va_start(ap, fmt);
3220 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3221 	va_end(ap);
3222 	return ret;
3223 }
3224 
3225 __printf(2, 0)
3226 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3227 {
3228 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3229 }
3230 EXPORT_SYMBOL_GPL(trace_vprintk);
3231 
3232 static void trace_iterator_increment(struct trace_iterator *iter)
3233 {
3234 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3235 
3236 	iter->idx++;
3237 	if (buf_iter)
3238 		ring_buffer_read(buf_iter, NULL);
3239 }
3240 
3241 static struct trace_entry *
3242 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3243 		unsigned long *lost_events)
3244 {
3245 	struct ring_buffer_event *event;
3246 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3247 
3248 	if (buf_iter)
3249 		event = ring_buffer_iter_peek(buf_iter, ts);
3250 	else
3251 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3252 					 lost_events);
3253 
3254 	if (event) {
3255 		iter->ent_size = ring_buffer_event_length(event);
3256 		return ring_buffer_event_data(event);
3257 	}
3258 	iter->ent_size = 0;
3259 	return NULL;
3260 }
3261 
3262 static struct trace_entry *
3263 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3264 		  unsigned long *missing_events, u64 *ent_ts)
3265 {
3266 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
3267 	struct trace_entry *ent, *next = NULL;
3268 	unsigned long lost_events = 0, next_lost = 0;
3269 	int cpu_file = iter->cpu_file;
3270 	u64 next_ts = 0, ts;
3271 	int next_cpu = -1;
3272 	int next_size = 0;
3273 	int cpu;
3274 
3275 	/*
3276 	 * If we are in a per_cpu trace file, don't bother by iterating over
3277 	 * all cpu and peek directly.
3278 	 */
3279 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3280 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3281 			return NULL;
3282 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3283 		if (ent_cpu)
3284 			*ent_cpu = cpu_file;
3285 
3286 		return ent;
3287 	}
3288 
3289 	for_each_tracing_cpu(cpu) {
3290 
3291 		if (ring_buffer_empty_cpu(buffer, cpu))
3292 			continue;
3293 
3294 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3295 
3296 		/*
3297 		 * Pick the entry with the smallest timestamp:
3298 		 */
3299 		if (ent && (!next || ts < next_ts)) {
3300 			next = ent;
3301 			next_cpu = cpu;
3302 			next_ts = ts;
3303 			next_lost = lost_events;
3304 			next_size = iter->ent_size;
3305 		}
3306 	}
3307 
3308 	iter->ent_size = next_size;
3309 
3310 	if (ent_cpu)
3311 		*ent_cpu = next_cpu;
3312 
3313 	if (ent_ts)
3314 		*ent_ts = next_ts;
3315 
3316 	if (missing_events)
3317 		*missing_events = next_lost;
3318 
3319 	return next;
3320 }
3321 
3322 /* Find the next real entry, without updating the iterator itself */
3323 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3324 					  int *ent_cpu, u64 *ent_ts)
3325 {
3326 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3327 }
3328 
3329 /* Find the next real entry, and increment the iterator to the next entry */
3330 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3331 {
3332 	iter->ent = __find_next_entry(iter, &iter->cpu,
3333 				      &iter->lost_events, &iter->ts);
3334 
3335 	if (iter->ent)
3336 		trace_iterator_increment(iter);
3337 
3338 	return iter->ent ? iter : NULL;
3339 }
3340 
3341 static void trace_consume(struct trace_iterator *iter)
3342 {
3343 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3344 			    &iter->lost_events);
3345 }
3346 
3347 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3348 {
3349 	struct trace_iterator *iter = m->private;
3350 	int i = (int)*pos;
3351 	void *ent;
3352 
3353 	WARN_ON_ONCE(iter->leftover);
3354 
3355 	(*pos)++;
3356 
3357 	/* can't go backwards */
3358 	if (iter->idx > i)
3359 		return NULL;
3360 
3361 	if (iter->idx < 0)
3362 		ent = trace_find_next_entry_inc(iter);
3363 	else
3364 		ent = iter;
3365 
3366 	while (ent && iter->idx < i)
3367 		ent = trace_find_next_entry_inc(iter);
3368 
3369 	iter->pos = *pos;
3370 
3371 	return ent;
3372 }
3373 
3374 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3375 {
3376 	struct ring_buffer_event *event;
3377 	struct ring_buffer_iter *buf_iter;
3378 	unsigned long entries = 0;
3379 	u64 ts;
3380 
3381 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3382 
3383 	buf_iter = trace_buffer_iter(iter, cpu);
3384 	if (!buf_iter)
3385 		return;
3386 
3387 	ring_buffer_iter_reset(buf_iter);
3388 
3389 	/*
3390 	 * We could have the case with the max latency tracers
3391 	 * that a reset never took place on a cpu. This is evident
3392 	 * by the timestamp being before the start of the buffer.
3393 	 */
3394 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3395 		if (ts >= iter->trace_buffer->time_start)
3396 			break;
3397 		entries++;
3398 		ring_buffer_read(buf_iter, NULL);
3399 	}
3400 
3401 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3402 }
3403 
3404 /*
3405  * The current tracer is copied to avoid a global locking
3406  * all around.
3407  */
3408 static void *s_start(struct seq_file *m, loff_t *pos)
3409 {
3410 	struct trace_iterator *iter = m->private;
3411 	struct trace_array *tr = iter->tr;
3412 	int cpu_file = iter->cpu_file;
3413 	void *p = NULL;
3414 	loff_t l = 0;
3415 	int cpu;
3416 
3417 	/*
3418 	 * copy the tracer to avoid using a global lock all around.
3419 	 * iter->trace is a copy of current_trace, the pointer to the
3420 	 * name may be used instead of a strcmp(), as iter->trace->name
3421 	 * will point to the same string as current_trace->name.
3422 	 */
3423 	mutex_lock(&trace_types_lock);
3424 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3425 		*iter->trace = *tr->current_trace;
3426 	mutex_unlock(&trace_types_lock);
3427 
3428 #ifdef CONFIG_TRACER_MAX_TRACE
3429 	if (iter->snapshot && iter->trace->use_max_tr)
3430 		return ERR_PTR(-EBUSY);
3431 #endif
3432 
3433 	if (!iter->snapshot)
3434 		atomic_inc(&trace_record_taskinfo_disabled);
3435 
3436 	if (*pos != iter->pos) {
3437 		iter->ent = NULL;
3438 		iter->cpu = 0;
3439 		iter->idx = -1;
3440 
3441 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3442 			for_each_tracing_cpu(cpu)
3443 				tracing_iter_reset(iter, cpu);
3444 		} else
3445 			tracing_iter_reset(iter, cpu_file);
3446 
3447 		iter->leftover = 0;
3448 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3449 			;
3450 
3451 	} else {
3452 		/*
3453 		 * If we overflowed the seq_file before, then we want
3454 		 * to just reuse the trace_seq buffer again.
3455 		 */
3456 		if (iter->leftover)
3457 			p = iter;
3458 		else {
3459 			l = *pos - 1;
3460 			p = s_next(m, p, &l);
3461 		}
3462 	}
3463 
3464 	trace_event_read_lock();
3465 	trace_access_lock(cpu_file);
3466 	return p;
3467 }
3468 
3469 static void s_stop(struct seq_file *m, void *p)
3470 {
3471 	struct trace_iterator *iter = m->private;
3472 
3473 #ifdef CONFIG_TRACER_MAX_TRACE
3474 	if (iter->snapshot && iter->trace->use_max_tr)
3475 		return;
3476 #endif
3477 
3478 	if (!iter->snapshot)
3479 		atomic_dec(&trace_record_taskinfo_disabled);
3480 
3481 	trace_access_unlock(iter->cpu_file);
3482 	trace_event_read_unlock();
3483 }
3484 
3485 static void
3486 get_total_entries(struct trace_buffer *buf,
3487 		  unsigned long *total, unsigned long *entries)
3488 {
3489 	unsigned long count;
3490 	int cpu;
3491 
3492 	*total = 0;
3493 	*entries = 0;
3494 
3495 	for_each_tracing_cpu(cpu) {
3496 		count = ring_buffer_entries_cpu(buf->buffer, cpu);
3497 		/*
3498 		 * If this buffer has skipped entries, then we hold all
3499 		 * entries for the trace and we need to ignore the
3500 		 * ones before the time stamp.
3501 		 */
3502 		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3503 			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3504 			/* total is the same as the entries */
3505 			*total += count;
3506 		} else
3507 			*total += count +
3508 				ring_buffer_overrun_cpu(buf->buffer, cpu);
3509 		*entries += count;
3510 	}
3511 }
3512 
3513 static void print_lat_help_header(struct seq_file *m)
3514 {
3515 	seq_puts(m, "#                  _------=> CPU#            \n"
3516 		    "#                 / _-----=> irqs-off        \n"
3517 		    "#                | / _----=> need-resched    \n"
3518 		    "#                || / _---=> hardirq/softirq \n"
3519 		    "#                ||| / _--=> preempt-depth   \n"
3520 		    "#                |||| /     delay            \n"
3521 		    "#  cmd     pid   ||||| time  |   caller      \n"
3522 		    "#     \\   /      |||||  \\    |   /         \n");
3523 }
3524 
3525 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3526 {
3527 	unsigned long total;
3528 	unsigned long entries;
3529 
3530 	get_total_entries(buf, &total, &entries);
3531 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3532 		   entries, total, num_online_cpus());
3533 	seq_puts(m, "#\n");
3534 }
3535 
3536 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3537 				   unsigned int flags)
3538 {
3539 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3540 
3541 	print_event_info(buf, m);
3542 
3543 	seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3544 	seq_printf(m, "#              | |     %s    |       |         |\n",	 tgid ? "  |      " : "");
3545 }
3546 
3547 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3548 				       unsigned int flags)
3549 {
3550 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3551 	const char tgid_space[] = "          ";
3552 	const char space[] = "  ";
3553 
3554 	print_event_info(buf, m);
3555 
3556 	seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3557 		   tgid ? tgid_space : space);
3558 	seq_printf(m, "#                          %s / _----=> need-resched\n",
3559 		   tgid ? tgid_space : space);
3560 	seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3561 		   tgid ? tgid_space : space);
3562 	seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3563 		   tgid ? tgid_space : space);
3564 	seq_printf(m, "#                          %s||| /     delay\n",
3565 		   tgid ? tgid_space : space);
3566 	seq_printf(m, "#           TASK-PID %sCPU#  ||||    TIMESTAMP  FUNCTION\n",
3567 		   tgid ? "   TGID   " : space);
3568 	seq_printf(m, "#              | |   %s  |   ||||       |         |\n",
3569 		   tgid ? "     |    " : space);
3570 }
3571 
3572 void
3573 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3574 {
3575 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3576 	struct trace_buffer *buf = iter->trace_buffer;
3577 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3578 	struct tracer *type = iter->trace;
3579 	unsigned long entries;
3580 	unsigned long total;
3581 	const char *name = "preemption";
3582 
3583 	name = type->name;
3584 
3585 	get_total_entries(buf, &total, &entries);
3586 
3587 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3588 		   name, UTS_RELEASE);
3589 	seq_puts(m, "# -----------------------------------"
3590 		 "---------------------------------\n");
3591 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3592 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3593 		   nsecs_to_usecs(data->saved_latency),
3594 		   entries,
3595 		   total,
3596 		   buf->cpu,
3597 #if defined(CONFIG_PREEMPT_NONE)
3598 		   "server",
3599 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3600 		   "desktop",
3601 #elif defined(CONFIG_PREEMPT)
3602 		   "preempt",
3603 #else
3604 		   "unknown",
3605 #endif
3606 		   /* These are reserved for later use */
3607 		   0, 0, 0, 0);
3608 #ifdef CONFIG_SMP
3609 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3610 #else
3611 	seq_puts(m, ")\n");
3612 #endif
3613 	seq_puts(m, "#    -----------------\n");
3614 	seq_printf(m, "#    | task: %.16s-%d "
3615 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3616 		   data->comm, data->pid,
3617 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3618 		   data->policy, data->rt_priority);
3619 	seq_puts(m, "#    -----------------\n");
3620 
3621 	if (data->critical_start) {
3622 		seq_puts(m, "#  => started at: ");
3623 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3624 		trace_print_seq(m, &iter->seq);
3625 		seq_puts(m, "\n#  => ended at:   ");
3626 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3627 		trace_print_seq(m, &iter->seq);
3628 		seq_puts(m, "\n#\n");
3629 	}
3630 
3631 	seq_puts(m, "#\n");
3632 }
3633 
3634 static void test_cpu_buff_start(struct trace_iterator *iter)
3635 {
3636 	struct trace_seq *s = &iter->seq;
3637 	struct trace_array *tr = iter->tr;
3638 
3639 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3640 		return;
3641 
3642 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3643 		return;
3644 
3645 	if (cpumask_available(iter->started) &&
3646 	    cpumask_test_cpu(iter->cpu, iter->started))
3647 		return;
3648 
3649 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3650 		return;
3651 
3652 	if (cpumask_available(iter->started))
3653 		cpumask_set_cpu(iter->cpu, iter->started);
3654 
3655 	/* Don't print started cpu buffer for the first entry of the trace */
3656 	if (iter->idx > 1)
3657 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3658 				iter->cpu);
3659 }
3660 
3661 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3662 {
3663 	struct trace_array *tr = iter->tr;
3664 	struct trace_seq *s = &iter->seq;
3665 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3666 	struct trace_entry *entry;
3667 	struct trace_event *event;
3668 
3669 	entry = iter->ent;
3670 
3671 	test_cpu_buff_start(iter);
3672 
3673 	event = ftrace_find_event(entry->type);
3674 
3675 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3676 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3677 			trace_print_lat_context(iter);
3678 		else
3679 			trace_print_context(iter);
3680 	}
3681 
3682 	if (trace_seq_has_overflowed(s))
3683 		return TRACE_TYPE_PARTIAL_LINE;
3684 
3685 	if (event)
3686 		return event->funcs->trace(iter, sym_flags, event);
3687 
3688 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3689 
3690 	return trace_handle_return(s);
3691 }
3692 
3693 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3694 {
3695 	struct trace_array *tr = iter->tr;
3696 	struct trace_seq *s = &iter->seq;
3697 	struct trace_entry *entry;
3698 	struct trace_event *event;
3699 
3700 	entry = iter->ent;
3701 
3702 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3703 		trace_seq_printf(s, "%d %d %llu ",
3704 				 entry->pid, iter->cpu, iter->ts);
3705 
3706 	if (trace_seq_has_overflowed(s))
3707 		return TRACE_TYPE_PARTIAL_LINE;
3708 
3709 	event = ftrace_find_event(entry->type);
3710 	if (event)
3711 		return event->funcs->raw(iter, 0, event);
3712 
3713 	trace_seq_printf(s, "%d ?\n", entry->type);
3714 
3715 	return trace_handle_return(s);
3716 }
3717 
3718 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3719 {
3720 	struct trace_array *tr = iter->tr;
3721 	struct trace_seq *s = &iter->seq;
3722 	unsigned char newline = '\n';
3723 	struct trace_entry *entry;
3724 	struct trace_event *event;
3725 
3726 	entry = iter->ent;
3727 
3728 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3729 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3730 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3731 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3732 		if (trace_seq_has_overflowed(s))
3733 			return TRACE_TYPE_PARTIAL_LINE;
3734 	}
3735 
3736 	event = ftrace_find_event(entry->type);
3737 	if (event) {
3738 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3739 		if (ret != TRACE_TYPE_HANDLED)
3740 			return ret;
3741 	}
3742 
3743 	SEQ_PUT_FIELD(s, newline);
3744 
3745 	return trace_handle_return(s);
3746 }
3747 
3748 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3749 {
3750 	struct trace_array *tr = iter->tr;
3751 	struct trace_seq *s = &iter->seq;
3752 	struct trace_entry *entry;
3753 	struct trace_event *event;
3754 
3755 	entry = iter->ent;
3756 
3757 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3758 		SEQ_PUT_FIELD(s, entry->pid);
3759 		SEQ_PUT_FIELD(s, iter->cpu);
3760 		SEQ_PUT_FIELD(s, iter->ts);
3761 		if (trace_seq_has_overflowed(s))
3762 			return TRACE_TYPE_PARTIAL_LINE;
3763 	}
3764 
3765 	event = ftrace_find_event(entry->type);
3766 	return event ? event->funcs->binary(iter, 0, event) :
3767 		TRACE_TYPE_HANDLED;
3768 }
3769 
3770 int trace_empty(struct trace_iterator *iter)
3771 {
3772 	struct ring_buffer_iter *buf_iter;
3773 	int cpu;
3774 
3775 	/* If we are looking at one CPU buffer, only check that one */
3776 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3777 		cpu = iter->cpu_file;
3778 		buf_iter = trace_buffer_iter(iter, cpu);
3779 		if (buf_iter) {
3780 			if (!ring_buffer_iter_empty(buf_iter))
3781 				return 0;
3782 		} else {
3783 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3784 				return 0;
3785 		}
3786 		return 1;
3787 	}
3788 
3789 	for_each_tracing_cpu(cpu) {
3790 		buf_iter = trace_buffer_iter(iter, cpu);
3791 		if (buf_iter) {
3792 			if (!ring_buffer_iter_empty(buf_iter))
3793 				return 0;
3794 		} else {
3795 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3796 				return 0;
3797 		}
3798 	}
3799 
3800 	return 1;
3801 }
3802 
3803 /*  Called with trace_event_read_lock() held. */
3804 enum print_line_t print_trace_line(struct trace_iterator *iter)
3805 {
3806 	struct trace_array *tr = iter->tr;
3807 	unsigned long trace_flags = tr->trace_flags;
3808 	enum print_line_t ret;
3809 
3810 	if (iter->lost_events) {
3811 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3812 				 iter->cpu, iter->lost_events);
3813 		if (trace_seq_has_overflowed(&iter->seq))
3814 			return TRACE_TYPE_PARTIAL_LINE;
3815 	}
3816 
3817 	if (iter->trace && iter->trace->print_line) {
3818 		ret = iter->trace->print_line(iter);
3819 		if (ret != TRACE_TYPE_UNHANDLED)
3820 			return ret;
3821 	}
3822 
3823 	if (iter->ent->type == TRACE_BPUTS &&
3824 			trace_flags & TRACE_ITER_PRINTK &&
3825 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3826 		return trace_print_bputs_msg_only(iter);
3827 
3828 	if (iter->ent->type == TRACE_BPRINT &&
3829 			trace_flags & TRACE_ITER_PRINTK &&
3830 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3831 		return trace_print_bprintk_msg_only(iter);
3832 
3833 	if (iter->ent->type == TRACE_PRINT &&
3834 			trace_flags & TRACE_ITER_PRINTK &&
3835 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3836 		return trace_print_printk_msg_only(iter);
3837 
3838 	if (trace_flags & TRACE_ITER_BIN)
3839 		return print_bin_fmt(iter);
3840 
3841 	if (trace_flags & TRACE_ITER_HEX)
3842 		return print_hex_fmt(iter);
3843 
3844 	if (trace_flags & TRACE_ITER_RAW)
3845 		return print_raw_fmt(iter);
3846 
3847 	return print_trace_fmt(iter);
3848 }
3849 
3850 void trace_latency_header(struct seq_file *m)
3851 {
3852 	struct trace_iterator *iter = m->private;
3853 	struct trace_array *tr = iter->tr;
3854 
3855 	/* print nothing if the buffers are empty */
3856 	if (trace_empty(iter))
3857 		return;
3858 
3859 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3860 		print_trace_header(m, iter);
3861 
3862 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3863 		print_lat_help_header(m);
3864 }
3865 
3866 void trace_default_header(struct seq_file *m)
3867 {
3868 	struct trace_iterator *iter = m->private;
3869 	struct trace_array *tr = iter->tr;
3870 	unsigned long trace_flags = tr->trace_flags;
3871 
3872 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3873 		return;
3874 
3875 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3876 		/* print nothing if the buffers are empty */
3877 		if (trace_empty(iter))
3878 			return;
3879 		print_trace_header(m, iter);
3880 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3881 			print_lat_help_header(m);
3882 	} else {
3883 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3884 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3885 				print_func_help_header_irq(iter->trace_buffer,
3886 							   m, trace_flags);
3887 			else
3888 				print_func_help_header(iter->trace_buffer, m,
3889 						       trace_flags);
3890 		}
3891 	}
3892 }
3893 
3894 static void test_ftrace_alive(struct seq_file *m)
3895 {
3896 	if (!ftrace_is_dead())
3897 		return;
3898 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3899 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3900 }
3901 
3902 #ifdef CONFIG_TRACER_MAX_TRACE
3903 static void show_snapshot_main_help(struct seq_file *m)
3904 {
3905 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3906 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3907 		    "#                      Takes a snapshot of the main buffer.\n"
3908 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3909 		    "#                      (Doesn't have to be '2' works with any number that\n"
3910 		    "#                       is not a '0' or '1')\n");
3911 }
3912 
3913 static void show_snapshot_percpu_help(struct seq_file *m)
3914 {
3915 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3916 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3917 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3918 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3919 #else
3920 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3921 		    "#                     Must use main snapshot file to allocate.\n");
3922 #endif
3923 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3924 		    "#                      (Doesn't have to be '2' works with any number that\n"
3925 		    "#                       is not a '0' or '1')\n");
3926 }
3927 
3928 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3929 {
3930 	if (iter->tr->allocated_snapshot)
3931 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3932 	else
3933 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3934 
3935 	seq_puts(m, "# Snapshot commands:\n");
3936 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3937 		show_snapshot_main_help(m);
3938 	else
3939 		show_snapshot_percpu_help(m);
3940 }
3941 #else
3942 /* Should never be called */
3943 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3944 #endif
3945 
3946 static int s_show(struct seq_file *m, void *v)
3947 {
3948 	struct trace_iterator *iter = v;
3949 	int ret;
3950 
3951 	if (iter->ent == NULL) {
3952 		if (iter->tr) {
3953 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3954 			seq_puts(m, "#\n");
3955 			test_ftrace_alive(m);
3956 		}
3957 		if (iter->snapshot && trace_empty(iter))
3958 			print_snapshot_help(m, iter);
3959 		else if (iter->trace && iter->trace->print_header)
3960 			iter->trace->print_header(m);
3961 		else
3962 			trace_default_header(m);
3963 
3964 	} else if (iter->leftover) {
3965 		/*
3966 		 * If we filled the seq_file buffer earlier, we
3967 		 * want to just show it now.
3968 		 */
3969 		ret = trace_print_seq(m, &iter->seq);
3970 
3971 		/* ret should this time be zero, but you never know */
3972 		iter->leftover = ret;
3973 
3974 	} else {
3975 		print_trace_line(iter);
3976 		ret = trace_print_seq(m, &iter->seq);
3977 		/*
3978 		 * If we overflow the seq_file buffer, then it will
3979 		 * ask us for this data again at start up.
3980 		 * Use that instead.
3981 		 *  ret is 0 if seq_file write succeeded.
3982 		 *        -1 otherwise.
3983 		 */
3984 		iter->leftover = ret;
3985 	}
3986 
3987 	return 0;
3988 }
3989 
3990 /*
3991  * Should be used after trace_array_get(), trace_types_lock
3992  * ensures that i_cdev was already initialized.
3993  */
3994 static inline int tracing_get_cpu(struct inode *inode)
3995 {
3996 	if (inode->i_cdev) /* See trace_create_cpu_file() */
3997 		return (long)inode->i_cdev - 1;
3998 	return RING_BUFFER_ALL_CPUS;
3999 }
4000 
4001 static const struct seq_operations tracer_seq_ops = {
4002 	.start		= s_start,
4003 	.next		= s_next,
4004 	.stop		= s_stop,
4005 	.show		= s_show,
4006 };
4007 
4008 static struct trace_iterator *
4009 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4010 {
4011 	struct trace_array *tr = inode->i_private;
4012 	struct trace_iterator *iter;
4013 	int cpu;
4014 
4015 	if (tracing_disabled)
4016 		return ERR_PTR(-ENODEV);
4017 
4018 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4019 	if (!iter)
4020 		return ERR_PTR(-ENOMEM);
4021 
4022 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4023 				    GFP_KERNEL);
4024 	if (!iter->buffer_iter)
4025 		goto release;
4026 
4027 	/*
4028 	 * We make a copy of the current tracer to avoid concurrent
4029 	 * changes on it while we are reading.
4030 	 */
4031 	mutex_lock(&trace_types_lock);
4032 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4033 	if (!iter->trace)
4034 		goto fail;
4035 
4036 	*iter->trace = *tr->current_trace;
4037 
4038 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4039 		goto fail;
4040 
4041 	iter->tr = tr;
4042 
4043 #ifdef CONFIG_TRACER_MAX_TRACE
4044 	/* Currently only the top directory has a snapshot */
4045 	if (tr->current_trace->print_max || snapshot)
4046 		iter->trace_buffer = &tr->max_buffer;
4047 	else
4048 #endif
4049 		iter->trace_buffer = &tr->trace_buffer;
4050 	iter->snapshot = snapshot;
4051 	iter->pos = -1;
4052 	iter->cpu_file = tracing_get_cpu(inode);
4053 	mutex_init(&iter->mutex);
4054 
4055 	/* Notify the tracer early; before we stop tracing. */
4056 	if (iter->trace && iter->trace->open)
4057 		iter->trace->open(iter);
4058 
4059 	/* Annotate start of buffers if we had overruns */
4060 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
4061 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4062 
4063 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4064 	if (trace_clocks[tr->clock_id].in_ns)
4065 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4066 
4067 	/* stop the trace while dumping if we are not opening "snapshot" */
4068 	if (!iter->snapshot)
4069 		tracing_stop_tr(tr);
4070 
4071 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4072 		for_each_tracing_cpu(cpu) {
4073 			iter->buffer_iter[cpu] =
4074 				ring_buffer_read_prepare(iter->trace_buffer->buffer,
4075 							 cpu, GFP_KERNEL);
4076 		}
4077 		ring_buffer_read_prepare_sync();
4078 		for_each_tracing_cpu(cpu) {
4079 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4080 			tracing_iter_reset(iter, cpu);
4081 		}
4082 	} else {
4083 		cpu = iter->cpu_file;
4084 		iter->buffer_iter[cpu] =
4085 			ring_buffer_read_prepare(iter->trace_buffer->buffer,
4086 						 cpu, GFP_KERNEL);
4087 		ring_buffer_read_prepare_sync();
4088 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4089 		tracing_iter_reset(iter, cpu);
4090 	}
4091 
4092 	mutex_unlock(&trace_types_lock);
4093 
4094 	return iter;
4095 
4096  fail:
4097 	mutex_unlock(&trace_types_lock);
4098 	kfree(iter->trace);
4099 	kfree(iter->buffer_iter);
4100 release:
4101 	seq_release_private(inode, file);
4102 	return ERR_PTR(-ENOMEM);
4103 }
4104 
4105 int tracing_open_generic(struct inode *inode, struct file *filp)
4106 {
4107 	if (tracing_disabled)
4108 		return -ENODEV;
4109 
4110 	filp->private_data = inode->i_private;
4111 	return 0;
4112 }
4113 
4114 bool tracing_is_disabled(void)
4115 {
4116 	return (tracing_disabled) ? true: false;
4117 }
4118 
4119 /*
4120  * Open and update trace_array ref count.
4121  * Must have the current trace_array passed to it.
4122  */
4123 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4124 {
4125 	struct trace_array *tr = inode->i_private;
4126 
4127 	if (tracing_disabled)
4128 		return -ENODEV;
4129 
4130 	if (trace_array_get(tr) < 0)
4131 		return -ENODEV;
4132 
4133 	filp->private_data = inode->i_private;
4134 
4135 	return 0;
4136 }
4137 
4138 static int tracing_release(struct inode *inode, struct file *file)
4139 {
4140 	struct trace_array *tr = inode->i_private;
4141 	struct seq_file *m = file->private_data;
4142 	struct trace_iterator *iter;
4143 	int cpu;
4144 
4145 	if (!(file->f_mode & FMODE_READ)) {
4146 		trace_array_put(tr);
4147 		return 0;
4148 	}
4149 
4150 	/* Writes do not use seq_file */
4151 	iter = m->private;
4152 	mutex_lock(&trace_types_lock);
4153 
4154 	for_each_tracing_cpu(cpu) {
4155 		if (iter->buffer_iter[cpu])
4156 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4157 	}
4158 
4159 	if (iter->trace && iter->trace->close)
4160 		iter->trace->close(iter);
4161 
4162 	if (!iter->snapshot)
4163 		/* reenable tracing if it was previously enabled */
4164 		tracing_start_tr(tr);
4165 
4166 	__trace_array_put(tr);
4167 
4168 	mutex_unlock(&trace_types_lock);
4169 
4170 	mutex_destroy(&iter->mutex);
4171 	free_cpumask_var(iter->started);
4172 	kfree(iter->trace);
4173 	kfree(iter->buffer_iter);
4174 	seq_release_private(inode, file);
4175 
4176 	return 0;
4177 }
4178 
4179 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4180 {
4181 	struct trace_array *tr = inode->i_private;
4182 
4183 	trace_array_put(tr);
4184 	return 0;
4185 }
4186 
4187 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4188 {
4189 	struct trace_array *tr = inode->i_private;
4190 
4191 	trace_array_put(tr);
4192 
4193 	return single_release(inode, file);
4194 }
4195 
4196 static int tracing_open(struct inode *inode, struct file *file)
4197 {
4198 	struct trace_array *tr = inode->i_private;
4199 	struct trace_iterator *iter;
4200 	int ret = 0;
4201 
4202 	if (trace_array_get(tr) < 0)
4203 		return -ENODEV;
4204 
4205 	/* If this file was open for write, then erase contents */
4206 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4207 		int cpu = tracing_get_cpu(inode);
4208 		struct trace_buffer *trace_buf = &tr->trace_buffer;
4209 
4210 #ifdef CONFIG_TRACER_MAX_TRACE
4211 		if (tr->current_trace->print_max)
4212 			trace_buf = &tr->max_buffer;
4213 #endif
4214 
4215 		if (cpu == RING_BUFFER_ALL_CPUS)
4216 			tracing_reset_online_cpus(trace_buf);
4217 		else
4218 			tracing_reset(trace_buf, cpu);
4219 	}
4220 
4221 	if (file->f_mode & FMODE_READ) {
4222 		iter = __tracing_open(inode, file, false);
4223 		if (IS_ERR(iter))
4224 			ret = PTR_ERR(iter);
4225 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4226 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4227 	}
4228 
4229 	if (ret < 0)
4230 		trace_array_put(tr);
4231 
4232 	return ret;
4233 }
4234 
4235 /*
4236  * Some tracers are not suitable for instance buffers.
4237  * A tracer is always available for the global array (toplevel)
4238  * or if it explicitly states that it is.
4239  */
4240 static bool
4241 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4242 {
4243 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4244 }
4245 
4246 /* Find the next tracer that this trace array may use */
4247 static struct tracer *
4248 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4249 {
4250 	while (t && !trace_ok_for_array(t, tr))
4251 		t = t->next;
4252 
4253 	return t;
4254 }
4255 
4256 static void *
4257 t_next(struct seq_file *m, void *v, loff_t *pos)
4258 {
4259 	struct trace_array *tr = m->private;
4260 	struct tracer *t = v;
4261 
4262 	(*pos)++;
4263 
4264 	if (t)
4265 		t = get_tracer_for_array(tr, t->next);
4266 
4267 	return t;
4268 }
4269 
4270 static void *t_start(struct seq_file *m, loff_t *pos)
4271 {
4272 	struct trace_array *tr = m->private;
4273 	struct tracer *t;
4274 	loff_t l = 0;
4275 
4276 	mutex_lock(&trace_types_lock);
4277 
4278 	t = get_tracer_for_array(tr, trace_types);
4279 	for (; t && l < *pos; t = t_next(m, t, &l))
4280 			;
4281 
4282 	return t;
4283 }
4284 
4285 static void t_stop(struct seq_file *m, void *p)
4286 {
4287 	mutex_unlock(&trace_types_lock);
4288 }
4289 
4290 static int t_show(struct seq_file *m, void *v)
4291 {
4292 	struct tracer *t = v;
4293 
4294 	if (!t)
4295 		return 0;
4296 
4297 	seq_puts(m, t->name);
4298 	if (t->next)
4299 		seq_putc(m, ' ');
4300 	else
4301 		seq_putc(m, '\n');
4302 
4303 	return 0;
4304 }
4305 
4306 static const struct seq_operations show_traces_seq_ops = {
4307 	.start		= t_start,
4308 	.next		= t_next,
4309 	.stop		= t_stop,
4310 	.show		= t_show,
4311 };
4312 
4313 static int show_traces_open(struct inode *inode, struct file *file)
4314 {
4315 	struct trace_array *tr = inode->i_private;
4316 	struct seq_file *m;
4317 	int ret;
4318 
4319 	if (tracing_disabled)
4320 		return -ENODEV;
4321 
4322 	ret = seq_open(file, &show_traces_seq_ops);
4323 	if (ret)
4324 		return ret;
4325 
4326 	m = file->private_data;
4327 	m->private = tr;
4328 
4329 	return 0;
4330 }
4331 
4332 static ssize_t
4333 tracing_write_stub(struct file *filp, const char __user *ubuf,
4334 		   size_t count, loff_t *ppos)
4335 {
4336 	return count;
4337 }
4338 
4339 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4340 {
4341 	int ret;
4342 
4343 	if (file->f_mode & FMODE_READ)
4344 		ret = seq_lseek(file, offset, whence);
4345 	else
4346 		file->f_pos = ret = 0;
4347 
4348 	return ret;
4349 }
4350 
4351 static const struct file_operations tracing_fops = {
4352 	.open		= tracing_open,
4353 	.read		= seq_read,
4354 	.write		= tracing_write_stub,
4355 	.llseek		= tracing_lseek,
4356 	.release	= tracing_release,
4357 };
4358 
4359 static const struct file_operations show_traces_fops = {
4360 	.open		= show_traces_open,
4361 	.read		= seq_read,
4362 	.release	= seq_release,
4363 	.llseek		= seq_lseek,
4364 };
4365 
4366 static ssize_t
4367 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4368 		     size_t count, loff_t *ppos)
4369 {
4370 	struct trace_array *tr = file_inode(filp)->i_private;
4371 	char *mask_str;
4372 	int len;
4373 
4374 	len = snprintf(NULL, 0, "%*pb\n",
4375 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4376 	mask_str = kmalloc(len, GFP_KERNEL);
4377 	if (!mask_str)
4378 		return -ENOMEM;
4379 
4380 	len = snprintf(mask_str, len, "%*pb\n",
4381 		       cpumask_pr_args(tr->tracing_cpumask));
4382 	if (len >= count) {
4383 		count = -EINVAL;
4384 		goto out_err;
4385 	}
4386 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4387 
4388 out_err:
4389 	kfree(mask_str);
4390 
4391 	return count;
4392 }
4393 
4394 static ssize_t
4395 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4396 		      size_t count, loff_t *ppos)
4397 {
4398 	struct trace_array *tr = file_inode(filp)->i_private;
4399 	cpumask_var_t tracing_cpumask_new;
4400 	int err, cpu;
4401 
4402 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4403 		return -ENOMEM;
4404 
4405 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4406 	if (err)
4407 		goto err_unlock;
4408 
4409 	local_irq_disable();
4410 	arch_spin_lock(&tr->max_lock);
4411 	for_each_tracing_cpu(cpu) {
4412 		/*
4413 		 * Increase/decrease the disabled counter if we are
4414 		 * about to flip a bit in the cpumask:
4415 		 */
4416 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4417 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4418 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4419 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4420 		}
4421 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4422 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4423 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4424 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4425 		}
4426 	}
4427 	arch_spin_unlock(&tr->max_lock);
4428 	local_irq_enable();
4429 
4430 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4431 	free_cpumask_var(tracing_cpumask_new);
4432 
4433 	return count;
4434 
4435 err_unlock:
4436 	free_cpumask_var(tracing_cpumask_new);
4437 
4438 	return err;
4439 }
4440 
4441 static const struct file_operations tracing_cpumask_fops = {
4442 	.open		= tracing_open_generic_tr,
4443 	.read		= tracing_cpumask_read,
4444 	.write		= tracing_cpumask_write,
4445 	.release	= tracing_release_generic_tr,
4446 	.llseek		= generic_file_llseek,
4447 };
4448 
4449 static int tracing_trace_options_show(struct seq_file *m, void *v)
4450 {
4451 	struct tracer_opt *trace_opts;
4452 	struct trace_array *tr = m->private;
4453 	u32 tracer_flags;
4454 	int i;
4455 
4456 	mutex_lock(&trace_types_lock);
4457 	tracer_flags = tr->current_trace->flags->val;
4458 	trace_opts = tr->current_trace->flags->opts;
4459 
4460 	for (i = 0; trace_options[i]; i++) {
4461 		if (tr->trace_flags & (1 << i))
4462 			seq_printf(m, "%s\n", trace_options[i]);
4463 		else
4464 			seq_printf(m, "no%s\n", trace_options[i]);
4465 	}
4466 
4467 	for (i = 0; trace_opts[i].name; i++) {
4468 		if (tracer_flags & trace_opts[i].bit)
4469 			seq_printf(m, "%s\n", trace_opts[i].name);
4470 		else
4471 			seq_printf(m, "no%s\n", trace_opts[i].name);
4472 	}
4473 	mutex_unlock(&trace_types_lock);
4474 
4475 	return 0;
4476 }
4477 
4478 static int __set_tracer_option(struct trace_array *tr,
4479 			       struct tracer_flags *tracer_flags,
4480 			       struct tracer_opt *opts, int neg)
4481 {
4482 	struct tracer *trace = tracer_flags->trace;
4483 	int ret;
4484 
4485 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4486 	if (ret)
4487 		return ret;
4488 
4489 	if (neg)
4490 		tracer_flags->val &= ~opts->bit;
4491 	else
4492 		tracer_flags->val |= opts->bit;
4493 	return 0;
4494 }
4495 
4496 /* Try to assign a tracer specific option */
4497 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4498 {
4499 	struct tracer *trace = tr->current_trace;
4500 	struct tracer_flags *tracer_flags = trace->flags;
4501 	struct tracer_opt *opts = NULL;
4502 	int i;
4503 
4504 	for (i = 0; tracer_flags->opts[i].name; i++) {
4505 		opts = &tracer_flags->opts[i];
4506 
4507 		if (strcmp(cmp, opts->name) == 0)
4508 			return __set_tracer_option(tr, trace->flags, opts, neg);
4509 	}
4510 
4511 	return -EINVAL;
4512 }
4513 
4514 /* Some tracers require overwrite to stay enabled */
4515 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4516 {
4517 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4518 		return -1;
4519 
4520 	return 0;
4521 }
4522 
4523 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4524 {
4525 	/* do nothing if flag is already set */
4526 	if (!!(tr->trace_flags & mask) == !!enabled)
4527 		return 0;
4528 
4529 	/* Give the tracer a chance to approve the change */
4530 	if (tr->current_trace->flag_changed)
4531 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4532 			return -EINVAL;
4533 
4534 	if (enabled)
4535 		tr->trace_flags |= mask;
4536 	else
4537 		tr->trace_flags &= ~mask;
4538 
4539 	if (mask == TRACE_ITER_RECORD_CMD)
4540 		trace_event_enable_cmd_record(enabled);
4541 
4542 	if (mask == TRACE_ITER_RECORD_TGID) {
4543 		if (!tgid_map)
4544 			tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4545 					   sizeof(*tgid_map),
4546 					   GFP_KERNEL);
4547 		if (!tgid_map) {
4548 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4549 			return -ENOMEM;
4550 		}
4551 
4552 		trace_event_enable_tgid_record(enabled);
4553 	}
4554 
4555 	if (mask == TRACE_ITER_EVENT_FORK)
4556 		trace_event_follow_fork(tr, enabled);
4557 
4558 	if (mask == TRACE_ITER_FUNC_FORK)
4559 		ftrace_pid_follow_fork(tr, enabled);
4560 
4561 	if (mask == TRACE_ITER_OVERWRITE) {
4562 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4563 #ifdef CONFIG_TRACER_MAX_TRACE
4564 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4565 #endif
4566 	}
4567 
4568 	if (mask == TRACE_ITER_PRINTK) {
4569 		trace_printk_start_stop_comm(enabled);
4570 		trace_printk_control(enabled);
4571 	}
4572 
4573 	return 0;
4574 }
4575 
4576 static int trace_set_options(struct trace_array *tr, char *option)
4577 {
4578 	char *cmp;
4579 	int neg = 0;
4580 	int ret;
4581 	size_t orig_len = strlen(option);
4582 	int len;
4583 
4584 	cmp = strstrip(option);
4585 
4586 	len = str_has_prefix(cmp, "no");
4587 	if (len)
4588 		neg = 1;
4589 
4590 	cmp += len;
4591 
4592 	mutex_lock(&trace_types_lock);
4593 
4594 	ret = match_string(trace_options, -1, cmp);
4595 	/* If no option could be set, test the specific tracer options */
4596 	if (ret < 0)
4597 		ret = set_tracer_option(tr, cmp, neg);
4598 	else
4599 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4600 
4601 	mutex_unlock(&trace_types_lock);
4602 
4603 	/*
4604 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4605 	 * turn it back into a space.
4606 	 */
4607 	if (orig_len > strlen(option))
4608 		option[strlen(option)] = ' ';
4609 
4610 	return ret;
4611 }
4612 
4613 static void __init apply_trace_boot_options(void)
4614 {
4615 	char *buf = trace_boot_options_buf;
4616 	char *option;
4617 
4618 	while (true) {
4619 		option = strsep(&buf, ",");
4620 
4621 		if (!option)
4622 			break;
4623 
4624 		if (*option)
4625 			trace_set_options(&global_trace, option);
4626 
4627 		/* Put back the comma to allow this to be called again */
4628 		if (buf)
4629 			*(buf - 1) = ',';
4630 	}
4631 }
4632 
4633 static ssize_t
4634 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4635 			size_t cnt, loff_t *ppos)
4636 {
4637 	struct seq_file *m = filp->private_data;
4638 	struct trace_array *tr = m->private;
4639 	char buf[64];
4640 	int ret;
4641 
4642 	if (cnt >= sizeof(buf))
4643 		return -EINVAL;
4644 
4645 	if (copy_from_user(buf, ubuf, cnt))
4646 		return -EFAULT;
4647 
4648 	buf[cnt] = 0;
4649 
4650 	ret = trace_set_options(tr, buf);
4651 	if (ret < 0)
4652 		return ret;
4653 
4654 	*ppos += cnt;
4655 
4656 	return cnt;
4657 }
4658 
4659 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4660 {
4661 	struct trace_array *tr = inode->i_private;
4662 	int ret;
4663 
4664 	if (tracing_disabled)
4665 		return -ENODEV;
4666 
4667 	if (trace_array_get(tr) < 0)
4668 		return -ENODEV;
4669 
4670 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4671 	if (ret < 0)
4672 		trace_array_put(tr);
4673 
4674 	return ret;
4675 }
4676 
4677 static const struct file_operations tracing_iter_fops = {
4678 	.open		= tracing_trace_options_open,
4679 	.read		= seq_read,
4680 	.llseek		= seq_lseek,
4681 	.release	= tracing_single_release_tr,
4682 	.write		= tracing_trace_options_write,
4683 };
4684 
4685 static const char readme_msg[] =
4686 	"tracing mini-HOWTO:\n\n"
4687 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4688 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4689 	" Important files:\n"
4690 	"  trace\t\t\t- The static contents of the buffer\n"
4691 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4692 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4693 	"  current_tracer\t- function and latency tracers\n"
4694 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4695 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4696 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4697 	"  trace_clock\t\t-change the clock used to order events\n"
4698 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4699 	"      global:   Synced across CPUs but slows tracing down.\n"
4700 	"     counter:   Not a clock, but just an increment\n"
4701 	"      uptime:   Jiffy counter from time of boot\n"
4702 	"        perf:   Same clock that perf events use\n"
4703 #ifdef CONFIG_X86_64
4704 	"     x86-tsc:   TSC cycle counter\n"
4705 #endif
4706 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
4707 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4708 	"    absolute:   Absolute (standalone) timestamp\n"
4709 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4710 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4711 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4712 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4713 	"\t\t\t  Remove sub-buffer with rmdir\n"
4714 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4715 	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4716 	"\t\t\t  option name\n"
4717 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4718 #ifdef CONFIG_DYNAMIC_FTRACE
4719 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4720 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4721 	"\t\t\t  functions\n"
4722 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4723 	"\t     modules: Can select a group via module\n"
4724 	"\t      Format: :mod:<module-name>\n"
4725 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4726 	"\t    triggers: a command to perform when function is hit\n"
4727 	"\t      Format: <function>:<trigger>[:count]\n"
4728 	"\t     trigger: traceon, traceoff\n"
4729 	"\t\t      enable_event:<system>:<event>\n"
4730 	"\t\t      disable_event:<system>:<event>\n"
4731 #ifdef CONFIG_STACKTRACE
4732 	"\t\t      stacktrace\n"
4733 #endif
4734 #ifdef CONFIG_TRACER_SNAPSHOT
4735 	"\t\t      snapshot\n"
4736 #endif
4737 	"\t\t      dump\n"
4738 	"\t\t      cpudump\n"
4739 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4740 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4741 	"\t     The first one will disable tracing every time do_fault is hit\n"
4742 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4743 	"\t       The first time do trap is hit and it disables tracing, the\n"
4744 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4745 	"\t       the counter will not decrement. It only decrements when the\n"
4746 	"\t       trigger did work\n"
4747 	"\t     To remove trigger without count:\n"
4748 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4749 	"\t     To remove trigger with a count:\n"
4750 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4751 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4752 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4753 	"\t    modules: Can select a group via module command :mod:\n"
4754 	"\t    Does not accept triggers\n"
4755 #endif /* CONFIG_DYNAMIC_FTRACE */
4756 #ifdef CONFIG_FUNCTION_TRACER
4757 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4758 	"\t\t    (function)\n"
4759 #endif
4760 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4761 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4762 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4763 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4764 #endif
4765 #ifdef CONFIG_TRACER_SNAPSHOT
4766 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4767 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4768 	"\t\t\t  information\n"
4769 #endif
4770 #ifdef CONFIG_STACK_TRACER
4771 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4772 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4773 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4774 	"\t\t\t  new trace)\n"
4775 #ifdef CONFIG_DYNAMIC_FTRACE
4776 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4777 	"\t\t\t  traces\n"
4778 #endif
4779 #endif /* CONFIG_STACK_TRACER */
4780 #ifdef CONFIG_DYNAMIC_EVENTS
4781 	"  dynamic_events\t\t- Add/remove/show the generic dynamic events\n"
4782 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4783 #endif
4784 #ifdef CONFIG_KPROBE_EVENTS
4785 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4786 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4787 #endif
4788 #ifdef CONFIG_UPROBE_EVENTS
4789 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4790 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4791 #endif
4792 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4793 	"\t  accepts: event-definitions (one definition per line)\n"
4794 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4795 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4796 #ifdef CONFIG_HIST_TRIGGERS
4797 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
4798 #endif
4799 	"\t           -:[<group>/]<event>\n"
4800 #ifdef CONFIG_KPROBE_EVENTS
4801 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4802   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4803 #endif
4804 #ifdef CONFIG_UPROBE_EVENTS
4805   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4806 #endif
4807 	"\t     args: <name>=fetcharg[:type]\n"
4808 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4809 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4810 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>\n"
4811 #else
4812 	"\t           $stack<index>, $stack, $retval, $comm\n"
4813 #endif
4814 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4815 	"\t           b<bit-width>@<bit-offset>/<container-size>,\n"
4816 	"\t           <type>\\[<array-size>\\]\n"
4817 #ifdef CONFIG_HIST_TRIGGERS
4818 	"\t    field: <stype> <name>;\n"
4819 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4820 	"\t           [unsigned] char/int/long\n"
4821 #endif
4822 #endif
4823 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4824 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4825 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4826 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4827 	"\t\t\t  events\n"
4828 	"      filter\t\t- If set, only events passing filter are traced\n"
4829 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4830 	"\t\t\t  <event>:\n"
4831 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4832 	"      filter\t\t- If set, only events passing filter are traced\n"
4833 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4834 	"\t    Format: <trigger>[:count][if <filter>]\n"
4835 	"\t   trigger: traceon, traceoff\n"
4836 	"\t            enable_event:<system>:<event>\n"
4837 	"\t            disable_event:<system>:<event>\n"
4838 #ifdef CONFIG_HIST_TRIGGERS
4839 	"\t            enable_hist:<system>:<event>\n"
4840 	"\t            disable_hist:<system>:<event>\n"
4841 #endif
4842 #ifdef CONFIG_STACKTRACE
4843 	"\t\t    stacktrace\n"
4844 #endif
4845 #ifdef CONFIG_TRACER_SNAPSHOT
4846 	"\t\t    snapshot\n"
4847 #endif
4848 #ifdef CONFIG_HIST_TRIGGERS
4849 	"\t\t    hist (see below)\n"
4850 #endif
4851 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4852 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4853 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4854 	"\t                  events/block/block_unplug/trigger\n"
4855 	"\t   The first disables tracing every time block_unplug is hit.\n"
4856 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4857 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4858 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4859 	"\t   Like function triggers, the counter is only decremented if it\n"
4860 	"\t    enabled or disabled tracing.\n"
4861 	"\t   To remove a trigger without a count:\n"
4862 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4863 	"\t   To remove a trigger with a count:\n"
4864 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4865 	"\t   Filters can be ignored when removing a trigger.\n"
4866 #ifdef CONFIG_HIST_TRIGGERS
4867 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4868 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4869 	"\t            [:values=<field1[,field2,...]>]\n"
4870 	"\t            [:sort=<field1[,field2,...]>]\n"
4871 	"\t            [:size=#entries]\n"
4872 	"\t            [:pause][:continue][:clear]\n"
4873 	"\t            [:name=histname1]\n"
4874 	"\t            [:<handler>.<action>]\n"
4875 	"\t            [if <filter>]\n\n"
4876 	"\t    When a matching event is hit, an entry is added to a hash\n"
4877 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4878 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4879 	"\t    correspond to fields in the event's format description.  Keys\n"
4880 	"\t    can be any field, or the special string 'stacktrace'.\n"
4881 	"\t    Compound keys consisting of up to two fields can be specified\n"
4882 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4883 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4884 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4885 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4886 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4887 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4888 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4889 	"\t    its histogram data will be shared with other triggers of the\n"
4890 	"\t    same name, and trigger hits will update this common data.\n\n"
4891 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4892 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4893 	"\t    triggers attached to an event, there will be a table for each\n"
4894 	"\t    trigger in the output.  The table displayed for a named\n"
4895 	"\t    trigger will be the same as any other instance having the\n"
4896 	"\t    same name.  The default format used to display a given field\n"
4897 	"\t    can be modified by appending any of the following modifiers\n"
4898 	"\t    to the field name, as applicable:\n\n"
4899 	"\t            .hex        display a number as a hex value\n"
4900 	"\t            .sym        display an address as a symbol\n"
4901 	"\t            .sym-offset display an address as a symbol and offset\n"
4902 	"\t            .execname   display a common_pid as a program name\n"
4903 	"\t            .syscall    display a syscall id as a syscall name\n"
4904 	"\t            .log2       display log2 value rather than raw number\n"
4905 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
4906 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4907 	"\t    trigger or to start a hist trigger but not log any events\n"
4908 	"\t    until told to do so.  'continue' can be used to start or\n"
4909 	"\t    restart a paused hist trigger.\n\n"
4910 	"\t    The 'clear' parameter will clear the contents of a running\n"
4911 	"\t    hist trigger and leave its current paused/active state\n"
4912 	"\t    unchanged.\n\n"
4913 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4914 	"\t    have one event conditionally start and stop another event's\n"
4915 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
4916 	"\t    the enable_event and disable_event triggers.\n\n"
4917 	"\t    Hist trigger handlers and actions are executed whenever a\n"
4918 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
4919 	"\t        <handler>.<action>\n\n"
4920 	"\t    The available handlers are:\n\n"
4921 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
4922 	"\t        onmax(var)               - invoke if var exceeds current max\n"
4923 	"\t        onchange(var)            - invoke action if var changes\n\n"
4924 	"\t    The available actions are:\n\n"
4925 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
4926 	"\t        save(field,...)                      - save current event fields\n"
4927 #ifdef CONFIG_TRACER_SNAPSHOT
4928 	"\t        snapshot()                           - snapshot the trace buffer\n"
4929 #endif
4930 #endif
4931 ;
4932 
4933 static ssize_t
4934 tracing_readme_read(struct file *filp, char __user *ubuf,
4935 		       size_t cnt, loff_t *ppos)
4936 {
4937 	return simple_read_from_buffer(ubuf, cnt, ppos,
4938 					readme_msg, strlen(readme_msg));
4939 }
4940 
4941 static const struct file_operations tracing_readme_fops = {
4942 	.open		= tracing_open_generic,
4943 	.read		= tracing_readme_read,
4944 	.llseek		= generic_file_llseek,
4945 };
4946 
4947 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4948 {
4949 	int *ptr = v;
4950 
4951 	if (*pos || m->count)
4952 		ptr++;
4953 
4954 	(*pos)++;
4955 
4956 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4957 		if (trace_find_tgid(*ptr))
4958 			return ptr;
4959 	}
4960 
4961 	return NULL;
4962 }
4963 
4964 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4965 {
4966 	void *v;
4967 	loff_t l = 0;
4968 
4969 	if (!tgid_map)
4970 		return NULL;
4971 
4972 	v = &tgid_map[0];
4973 	while (l <= *pos) {
4974 		v = saved_tgids_next(m, v, &l);
4975 		if (!v)
4976 			return NULL;
4977 	}
4978 
4979 	return v;
4980 }
4981 
4982 static void saved_tgids_stop(struct seq_file *m, void *v)
4983 {
4984 }
4985 
4986 static int saved_tgids_show(struct seq_file *m, void *v)
4987 {
4988 	int pid = (int *)v - tgid_map;
4989 
4990 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4991 	return 0;
4992 }
4993 
4994 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4995 	.start		= saved_tgids_start,
4996 	.stop		= saved_tgids_stop,
4997 	.next		= saved_tgids_next,
4998 	.show		= saved_tgids_show,
4999 };
5000 
5001 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5002 {
5003 	if (tracing_disabled)
5004 		return -ENODEV;
5005 
5006 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5007 }
5008 
5009 
5010 static const struct file_operations tracing_saved_tgids_fops = {
5011 	.open		= tracing_saved_tgids_open,
5012 	.read		= seq_read,
5013 	.llseek		= seq_lseek,
5014 	.release	= seq_release,
5015 };
5016 
5017 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5018 {
5019 	unsigned int *ptr = v;
5020 
5021 	if (*pos || m->count)
5022 		ptr++;
5023 
5024 	(*pos)++;
5025 
5026 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5027 	     ptr++) {
5028 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5029 			continue;
5030 
5031 		return ptr;
5032 	}
5033 
5034 	return NULL;
5035 }
5036 
5037 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5038 {
5039 	void *v;
5040 	loff_t l = 0;
5041 
5042 	preempt_disable();
5043 	arch_spin_lock(&trace_cmdline_lock);
5044 
5045 	v = &savedcmd->map_cmdline_to_pid[0];
5046 	while (l <= *pos) {
5047 		v = saved_cmdlines_next(m, v, &l);
5048 		if (!v)
5049 			return NULL;
5050 	}
5051 
5052 	return v;
5053 }
5054 
5055 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5056 {
5057 	arch_spin_unlock(&trace_cmdline_lock);
5058 	preempt_enable();
5059 }
5060 
5061 static int saved_cmdlines_show(struct seq_file *m, void *v)
5062 {
5063 	char buf[TASK_COMM_LEN];
5064 	unsigned int *pid = v;
5065 
5066 	__trace_find_cmdline(*pid, buf);
5067 	seq_printf(m, "%d %s\n", *pid, buf);
5068 	return 0;
5069 }
5070 
5071 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5072 	.start		= saved_cmdlines_start,
5073 	.next		= saved_cmdlines_next,
5074 	.stop		= saved_cmdlines_stop,
5075 	.show		= saved_cmdlines_show,
5076 };
5077 
5078 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5079 {
5080 	if (tracing_disabled)
5081 		return -ENODEV;
5082 
5083 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5084 }
5085 
5086 static const struct file_operations tracing_saved_cmdlines_fops = {
5087 	.open		= tracing_saved_cmdlines_open,
5088 	.read		= seq_read,
5089 	.llseek		= seq_lseek,
5090 	.release	= seq_release,
5091 };
5092 
5093 static ssize_t
5094 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5095 				 size_t cnt, loff_t *ppos)
5096 {
5097 	char buf[64];
5098 	int r;
5099 
5100 	arch_spin_lock(&trace_cmdline_lock);
5101 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5102 	arch_spin_unlock(&trace_cmdline_lock);
5103 
5104 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5105 }
5106 
5107 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5108 {
5109 	kfree(s->saved_cmdlines);
5110 	kfree(s->map_cmdline_to_pid);
5111 	kfree(s);
5112 }
5113 
5114 static int tracing_resize_saved_cmdlines(unsigned int val)
5115 {
5116 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5117 
5118 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5119 	if (!s)
5120 		return -ENOMEM;
5121 
5122 	if (allocate_cmdlines_buffer(val, s) < 0) {
5123 		kfree(s);
5124 		return -ENOMEM;
5125 	}
5126 
5127 	arch_spin_lock(&trace_cmdline_lock);
5128 	savedcmd_temp = savedcmd;
5129 	savedcmd = s;
5130 	arch_spin_unlock(&trace_cmdline_lock);
5131 	free_saved_cmdlines_buffer(savedcmd_temp);
5132 
5133 	return 0;
5134 }
5135 
5136 static ssize_t
5137 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5138 				  size_t cnt, loff_t *ppos)
5139 {
5140 	unsigned long val;
5141 	int ret;
5142 
5143 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5144 	if (ret)
5145 		return ret;
5146 
5147 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5148 	if (!val || val > PID_MAX_DEFAULT)
5149 		return -EINVAL;
5150 
5151 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5152 	if (ret < 0)
5153 		return ret;
5154 
5155 	*ppos += cnt;
5156 
5157 	return cnt;
5158 }
5159 
5160 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5161 	.open		= tracing_open_generic,
5162 	.read		= tracing_saved_cmdlines_size_read,
5163 	.write		= tracing_saved_cmdlines_size_write,
5164 };
5165 
5166 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5167 static union trace_eval_map_item *
5168 update_eval_map(union trace_eval_map_item *ptr)
5169 {
5170 	if (!ptr->map.eval_string) {
5171 		if (ptr->tail.next) {
5172 			ptr = ptr->tail.next;
5173 			/* Set ptr to the next real item (skip head) */
5174 			ptr++;
5175 		} else
5176 			return NULL;
5177 	}
5178 	return ptr;
5179 }
5180 
5181 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5182 {
5183 	union trace_eval_map_item *ptr = v;
5184 
5185 	/*
5186 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5187 	 * This really should never happen.
5188 	 */
5189 	ptr = update_eval_map(ptr);
5190 	if (WARN_ON_ONCE(!ptr))
5191 		return NULL;
5192 
5193 	ptr++;
5194 
5195 	(*pos)++;
5196 
5197 	ptr = update_eval_map(ptr);
5198 
5199 	return ptr;
5200 }
5201 
5202 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5203 {
5204 	union trace_eval_map_item *v;
5205 	loff_t l = 0;
5206 
5207 	mutex_lock(&trace_eval_mutex);
5208 
5209 	v = trace_eval_maps;
5210 	if (v)
5211 		v++;
5212 
5213 	while (v && l < *pos) {
5214 		v = eval_map_next(m, v, &l);
5215 	}
5216 
5217 	return v;
5218 }
5219 
5220 static void eval_map_stop(struct seq_file *m, void *v)
5221 {
5222 	mutex_unlock(&trace_eval_mutex);
5223 }
5224 
5225 static int eval_map_show(struct seq_file *m, void *v)
5226 {
5227 	union trace_eval_map_item *ptr = v;
5228 
5229 	seq_printf(m, "%s %ld (%s)\n",
5230 		   ptr->map.eval_string, ptr->map.eval_value,
5231 		   ptr->map.system);
5232 
5233 	return 0;
5234 }
5235 
5236 static const struct seq_operations tracing_eval_map_seq_ops = {
5237 	.start		= eval_map_start,
5238 	.next		= eval_map_next,
5239 	.stop		= eval_map_stop,
5240 	.show		= eval_map_show,
5241 };
5242 
5243 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5244 {
5245 	if (tracing_disabled)
5246 		return -ENODEV;
5247 
5248 	return seq_open(filp, &tracing_eval_map_seq_ops);
5249 }
5250 
5251 static const struct file_operations tracing_eval_map_fops = {
5252 	.open		= tracing_eval_map_open,
5253 	.read		= seq_read,
5254 	.llseek		= seq_lseek,
5255 	.release	= seq_release,
5256 };
5257 
5258 static inline union trace_eval_map_item *
5259 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5260 {
5261 	/* Return tail of array given the head */
5262 	return ptr + ptr->head.length + 1;
5263 }
5264 
5265 static void
5266 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5267 			   int len)
5268 {
5269 	struct trace_eval_map **stop;
5270 	struct trace_eval_map **map;
5271 	union trace_eval_map_item *map_array;
5272 	union trace_eval_map_item *ptr;
5273 
5274 	stop = start + len;
5275 
5276 	/*
5277 	 * The trace_eval_maps contains the map plus a head and tail item,
5278 	 * where the head holds the module and length of array, and the
5279 	 * tail holds a pointer to the next list.
5280 	 */
5281 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5282 	if (!map_array) {
5283 		pr_warn("Unable to allocate trace eval mapping\n");
5284 		return;
5285 	}
5286 
5287 	mutex_lock(&trace_eval_mutex);
5288 
5289 	if (!trace_eval_maps)
5290 		trace_eval_maps = map_array;
5291 	else {
5292 		ptr = trace_eval_maps;
5293 		for (;;) {
5294 			ptr = trace_eval_jmp_to_tail(ptr);
5295 			if (!ptr->tail.next)
5296 				break;
5297 			ptr = ptr->tail.next;
5298 
5299 		}
5300 		ptr->tail.next = map_array;
5301 	}
5302 	map_array->head.mod = mod;
5303 	map_array->head.length = len;
5304 	map_array++;
5305 
5306 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5307 		map_array->map = **map;
5308 		map_array++;
5309 	}
5310 	memset(map_array, 0, sizeof(*map_array));
5311 
5312 	mutex_unlock(&trace_eval_mutex);
5313 }
5314 
5315 static void trace_create_eval_file(struct dentry *d_tracer)
5316 {
5317 	trace_create_file("eval_map", 0444, d_tracer,
5318 			  NULL, &tracing_eval_map_fops);
5319 }
5320 
5321 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5322 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5323 static inline void trace_insert_eval_map_file(struct module *mod,
5324 			      struct trace_eval_map **start, int len) { }
5325 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5326 
5327 static void trace_insert_eval_map(struct module *mod,
5328 				  struct trace_eval_map **start, int len)
5329 {
5330 	struct trace_eval_map **map;
5331 
5332 	if (len <= 0)
5333 		return;
5334 
5335 	map = start;
5336 
5337 	trace_event_eval_update(map, len);
5338 
5339 	trace_insert_eval_map_file(mod, start, len);
5340 }
5341 
5342 static ssize_t
5343 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5344 		       size_t cnt, loff_t *ppos)
5345 {
5346 	struct trace_array *tr = filp->private_data;
5347 	char buf[MAX_TRACER_SIZE+2];
5348 	int r;
5349 
5350 	mutex_lock(&trace_types_lock);
5351 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5352 	mutex_unlock(&trace_types_lock);
5353 
5354 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5355 }
5356 
5357 int tracer_init(struct tracer *t, struct trace_array *tr)
5358 {
5359 	tracing_reset_online_cpus(&tr->trace_buffer);
5360 	return t->init(tr);
5361 }
5362 
5363 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5364 {
5365 	int cpu;
5366 
5367 	for_each_tracing_cpu(cpu)
5368 		per_cpu_ptr(buf->data, cpu)->entries = val;
5369 }
5370 
5371 #ifdef CONFIG_TRACER_MAX_TRACE
5372 /* resize @tr's buffer to the size of @size_tr's entries */
5373 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5374 					struct trace_buffer *size_buf, int cpu_id)
5375 {
5376 	int cpu, ret = 0;
5377 
5378 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5379 		for_each_tracing_cpu(cpu) {
5380 			ret = ring_buffer_resize(trace_buf->buffer,
5381 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5382 			if (ret < 0)
5383 				break;
5384 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5385 				per_cpu_ptr(size_buf->data, cpu)->entries;
5386 		}
5387 	} else {
5388 		ret = ring_buffer_resize(trace_buf->buffer,
5389 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5390 		if (ret == 0)
5391 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5392 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5393 	}
5394 
5395 	return ret;
5396 }
5397 #endif /* CONFIG_TRACER_MAX_TRACE */
5398 
5399 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5400 					unsigned long size, int cpu)
5401 {
5402 	int ret;
5403 
5404 	/*
5405 	 * If kernel or user changes the size of the ring buffer
5406 	 * we use the size that was given, and we can forget about
5407 	 * expanding it later.
5408 	 */
5409 	ring_buffer_expanded = true;
5410 
5411 	/* May be called before buffers are initialized */
5412 	if (!tr->trace_buffer.buffer)
5413 		return 0;
5414 
5415 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5416 	if (ret < 0)
5417 		return ret;
5418 
5419 #ifdef CONFIG_TRACER_MAX_TRACE
5420 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5421 	    !tr->current_trace->use_max_tr)
5422 		goto out;
5423 
5424 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5425 	if (ret < 0) {
5426 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5427 						     &tr->trace_buffer, cpu);
5428 		if (r < 0) {
5429 			/*
5430 			 * AARGH! We are left with different
5431 			 * size max buffer!!!!
5432 			 * The max buffer is our "snapshot" buffer.
5433 			 * When a tracer needs a snapshot (one of the
5434 			 * latency tracers), it swaps the max buffer
5435 			 * with the saved snap shot. We succeeded to
5436 			 * update the size of the main buffer, but failed to
5437 			 * update the size of the max buffer. But when we tried
5438 			 * to reset the main buffer to the original size, we
5439 			 * failed there too. This is very unlikely to
5440 			 * happen, but if it does, warn and kill all
5441 			 * tracing.
5442 			 */
5443 			WARN_ON(1);
5444 			tracing_disabled = 1;
5445 		}
5446 		return ret;
5447 	}
5448 
5449 	if (cpu == RING_BUFFER_ALL_CPUS)
5450 		set_buffer_entries(&tr->max_buffer, size);
5451 	else
5452 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5453 
5454  out:
5455 #endif /* CONFIG_TRACER_MAX_TRACE */
5456 
5457 	if (cpu == RING_BUFFER_ALL_CPUS)
5458 		set_buffer_entries(&tr->trace_buffer, size);
5459 	else
5460 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5461 
5462 	return ret;
5463 }
5464 
5465 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5466 					  unsigned long size, int cpu_id)
5467 {
5468 	int ret = size;
5469 
5470 	mutex_lock(&trace_types_lock);
5471 
5472 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5473 		/* make sure, this cpu is enabled in the mask */
5474 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5475 			ret = -EINVAL;
5476 			goto out;
5477 		}
5478 	}
5479 
5480 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5481 	if (ret < 0)
5482 		ret = -ENOMEM;
5483 
5484 out:
5485 	mutex_unlock(&trace_types_lock);
5486 
5487 	return ret;
5488 }
5489 
5490 
5491 /**
5492  * tracing_update_buffers - used by tracing facility to expand ring buffers
5493  *
5494  * To save on memory when the tracing is never used on a system with it
5495  * configured in. The ring buffers are set to a minimum size. But once
5496  * a user starts to use the tracing facility, then they need to grow
5497  * to their default size.
5498  *
5499  * This function is to be called when a tracer is about to be used.
5500  */
5501 int tracing_update_buffers(void)
5502 {
5503 	int ret = 0;
5504 
5505 	mutex_lock(&trace_types_lock);
5506 	if (!ring_buffer_expanded)
5507 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5508 						RING_BUFFER_ALL_CPUS);
5509 	mutex_unlock(&trace_types_lock);
5510 
5511 	return ret;
5512 }
5513 
5514 struct trace_option_dentry;
5515 
5516 static void
5517 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5518 
5519 /*
5520  * Used to clear out the tracer before deletion of an instance.
5521  * Must have trace_types_lock held.
5522  */
5523 static void tracing_set_nop(struct trace_array *tr)
5524 {
5525 	if (tr->current_trace == &nop_trace)
5526 		return;
5527 
5528 	tr->current_trace->enabled--;
5529 
5530 	if (tr->current_trace->reset)
5531 		tr->current_trace->reset(tr);
5532 
5533 	tr->current_trace = &nop_trace;
5534 }
5535 
5536 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5537 {
5538 	/* Only enable if the directory has been created already. */
5539 	if (!tr->dir)
5540 		return;
5541 
5542 	create_trace_option_files(tr, t);
5543 }
5544 
5545 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5546 {
5547 	struct tracer *t;
5548 #ifdef CONFIG_TRACER_MAX_TRACE
5549 	bool had_max_tr;
5550 #endif
5551 	int ret = 0;
5552 
5553 	mutex_lock(&trace_types_lock);
5554 
5555 	if (!ring_buffer_expanded) {
5556 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5557 						RING_BUFFER_ALL_CPUS);
5558 		if (ret < 0)
5559 			goto out;
5560 		ret = 0;
5561 	}
5562 
5563 	for (t = trace_types; t; t = t->next) {
5564 		if (strcmp(t->name, buf) == 0)
5565 			break;
5566 	}
5567 	if (!t) {
5568 		ret = -EINVAL;
5569 		goto out;
5570 	}
5571 	if (t == tr->current_trace)
5572 		goto out;
5573 
5574 #ifdef CONFIG_TRACER_SNAPSHOT
5575 	if (t->use_max_tr) {
5576 		arch_spin_lock(&tr->max_lock);
5577 		if (tr->cond_snapshot)
5578 			ret = -EBUSY;
5579 		arch_spin_unlock(&tr->max_lock);
5580 		if (ret)
5581 			goto out;
5582 	}
5583 #endif
5584 	/* Some tracers won't work on kernel command line */
5585 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5586 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5587 			t->name);
5588 		goto out;
5589 	}
5590 
5591 	/* Some tracers are only allowed for the top level buffer */
5592 	if (!trace_ok_for_array(t, tr)) {
5593 		ret = -EINVAL;
5594 		goto out;
5595 	}
5596 
5597 	/* If trace pipe files are being read, we can't change the tracer */
5598 	if (tr->current_trace->ref) {
5599 		ret = -EBUSY;
5600 		goto out;
5601 	}
5602 
5603 	trace_branch_disable();
5604 
5605 	tr->current_trace->enabled--;
5606 
5607 	if (tr->current_trace->reset)
5608 		tr->current_trace->reset(tr);
5609 
5610 	/* Current trace needs to be nop_trace before synchronize_rcu */
5611 	tr->current_trace = &nop_trace;
5612 
5613 #ifdef CONFIG_TRACER_MAX_TRACE
5614 	had_max_tr = tr->allocated_snapshot;
5615 
5616 	if (had_max_tr && !t->use_max_tr) {
5617 		/*
5618 		 * We need to make sure that the update_max_tr sees that
5619 		 * current_trace changed to nop_trace to keep it from
5620 		 * swapping the buffers after we resize it.
5621 		 * The update_max_tr is called from interrupts disabled
5622 		 * so a synchronized_sched() is sufficient.
5623 		 */
5624 		synchronize_rcu();
5625 		free_snapshot(tr);
5626 	}
5627 #endif
5628 
5629 #ifdef CONFIG_TRACER_MAX_TRACE
5630 	if (t->use_max_tr && !had_max_tr) {
5631 		ret = tracing_alloc_snapshot_instance(tr);
5632 		if (ret < 0)
5633 			goto out;
5634 	}
5635 #endif
5636 
5637 	if (t->init) {
5638 		ret = tracer_init(t, tr);
5639 		if (ret)
5640 			goto out;
5641 	}
5642 
5643 	tr->current_trace = t;
5644 	tr->current_trace->enabled++;
5645 	trace_branch_enable(tr);
5646  out:
5647 	mutex_unlock(&trace_types_lock);
5648 
5649 	return ret;
5650 }
5651 
5652 static ssize_t
5653 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5654 			size_t cnt, loff_t *ppos)
5655 {
5656 	struct trace_array *tr = filp->private_data;
5657 	char buf[MAX_TRACER_SIZE+1];
5658 	int i;
5659 	size_t ret;
5660 	int err;
5661 
5662 	ret = cnt;
5663 
5664 	if (cnt > MAX_TRACER_SIZE)
5665 		cnt = MAX_TRACER_SIZE;
5666 
5667 	if (copy_from_user(buf, ubuf, cnt))
5668 		return -EFAULT;
5669 
5670 	buf[cnt] = 0;
5671 
5672 	/* strip ending whitespace. */
5673 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5674 		buf[i] = 0;
5675 
5676 	err = tracing_set_tracer(tr, buf);
5677 	if (err)
5678 		return err;
5679 
5680 	*ppos += ret;
5681 
5682 	return ret;
5683 }
5684 
5685 static ssize_t
5686 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5687 		   size_t cnt, loff_t *ppos)
5688 {
5689 	char buf[64];
5690 	int r;
5691 
5692 	r = snprintf(buf, sizeof(buf), "%ld\n",
5693 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5694 	if (r > sizeof(buf))
5695 		r = sizeof(buf);
5696 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5697 }
5698 
5699 static ssize_t
5700 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5701 		    size_t cnt, loff_t *ppos)
5702 {
5703 	unsigned long val;
5704 	int ret;
5705 
5706 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5707 	if (ret)
5708 		return ret;
5709 
5710 	*ptr = val * 1000;
5711 
5712 	return cnt;
5713 }
5714 
5715 static ssize_t
5716 tracing_thresh_read(struct file *filp, char __user *ubuf,
5717 		    size_t cnt, loff_t *ppos)
5718 {
5719 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5720 }
5721 
5722 static ssize_t
5723 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5724 		     size_t cnt, loff_t *ppos)
5725 {
5726 	struct trace_array *tr = filp->private_data;
5727 	int ret;
5728 
5729 	mutex_lock(&trace_types_lock);
5730 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5731 	if (ret < 0)
5732 		goto out;
5733 
5734 	if (tr->current_trace->update_thresh) {
5735 		ret = tr->current_trace->update_thresh(tr);
5736 		if (ret < 0)
5737 			goto out;
5738 	}
5739 
5740 	ret = cnt;
5741 out:
5742 	mutex_unlock(&trace_types_lock);
5743 
5744 	return ret;
5745 }
5746 
5747 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5748 
5749 static ssize_t
5750 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5751 		     size_t cnt, loff_t *ppos)
5752 {
5753 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5754 }
5755 
5756 static ssize_t
5757 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5758 		      size_t cnt, loff_t *ppos)
5759 {
5760 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5761 }
5762 
5763 #endif
5764 
5765 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5766 {
5767 	struct trace_array *tr = inode->i_private;
5768 	struct trace_iterator *iter;
5769 	int ret = 0;
5770 
5771 	if (tracing_disabled)
5772 		return -ENODEV;
5773 
5774 	if (trace_array_get(tr) < 0)
5775 		return -ENODEV;
5776 
5777 	mutex_lock(&trace_types_lock);
5778 
5779 	/* create a buffer to store the information to pass to userspace */
5780 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5781 	if (!iter) {
5782 		ret = -ENOMEM;
5783 		__trace_array_put(tr);
5784 		goto out;
5785 	}
5786 
5787 	trace_seq_init(&iter->seq);
5788 	iter->trace = tr->current_trace;
5789 
5790 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5791 		ret = -ENOMEM;
5792 		goto fail;
5793 	}
5794 
5795 	/* trace pipe does not show start of buffer */
5796 	cpumask_setall(iter->started);
5797 
5798 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5799 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5800 
5801 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5802 	if (trace_clocks[tr->clock_id].in_ns)
5803 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5804 
5805 	iter->tr = tr;
5806 	iter->trace_buffer = &tr->trace_buffer;
5807 	iter->cpu_file = tracing_get_cpu(inode);
5808 	mutex_init(&iter->mutex);
5809 	filp->private_data = iter;
5810 
5811 	if (iter->trace->pipe_open)
5812 		iter->trace->pipe_open(iter);
5813 
5814 	nonseekable_open(inode, filp);
5815 
5816 	tr->current_trace->ref++;
5817 out:
5818 	mutex_unlock(&trace_types_lock);
5819 	return ret;
5820 
5821 fail:
5822 	kfree(iter);
5823 	__trace_array_put(tr);
5824 	mutex_unlock(&trace_types_lock);
5825 	return ret;
5826 }
5827 
5828 static int tracing_release_pipe(struct inode *inode, struct file *file)
5829 {
5830 	struct trace_iterator *iter = file->private_data;
5831 	struct trace_array *tr = inode->i_private;
5832 
5833 	mutex_lock(&trace_types_lock);
5834 
5835 	tr->current_trace->ref--;
5836 
5837 	if (iter->trace->pipe_close)
5838 		iter->trace->pipe_close(iter);
5839 
5840 	mutex_unlock(&trace_types_lock);
5841 
5842 	free_cpumask_var(iter->started);
5843 	mutex_destroy(&iter->mutex);
5844 	kfree(iter);
5845 
5846 	trace_array_put(tr);
5847 
5848 	return 0;
5849 }
5850 
5851 static __poll_t
5852 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5853 {
5854 	struct trace_array *tr = iter->tr;
5855 
5856 	/* Iterators are static, they should be filled or empty */
5857 	if (trace_buffer_iter(iter, iter->cpu_file))
5858 		return EPOLLIN | EPOLLRDNORM;
5859 
5860 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5861 		/*
5862 		 * Always select as readable when in blocking mode
5863 		 */
5864 		return EPOLLIN | EPOLLRDNORM;
5865 	else
5866 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5867 					     filp, poll_table);
5868 }
5869 
5870 static __poll_t
5871 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5872 {
5873 	struct trace_iterator *iter = filp->private_data;
5874 
5875 	return trace_poll(iter, filp, poll_table);
5876 }
5877 
5878 /* Must be called with iter->mutex held. */
5879 static int tracing_wait_pipe(struct file *filp)
5880 {
5881 	struct trace_iterator *iter = filp->private_data;
5882 	int ret;
5883 
5884 	while (trace_empty(iter)) {
5885 
5886 		if ((filp->f_flags & O_NONBLOCK)) {
5887 			return -EAGAIN;
5888 		}
5889 
5890 		/*
5891 		 * We block until we read something and tracing is disabled.
5892 		 * We still block if tracing is disabled, but we have never
5893 		 * read anything. This allows a user to cat this file, and
5894 		 * then enable tracing. But after we have read something,
5895 		 * we give an EOF when tracing is again disabled.
5896 		 *
5897 		 * iter->pos will be 0 if we haven't read anything.
5898 		 */
5899 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5900 			break;
5901 
5902 		mutex_unlock(&iter->mutex);
5903 
5904 		ret = wait_on_pipe(iter, 0);
5905 
5906 		mutex_lock(&iter->mutex);
5907 
5908 		if (ret)
5909 			return ret;
5910 	}
5911 
5912 	return 1;
5913 }
5914 
5915 /*
5916  * Consumer reader.
5917  */
5918 static ssize_t
5919 tracing_read_pipe(struct file *filp, char __user *ubuf,
5920 		  size_t cnt, loff_t *ppos)
5921 {
5922 	struct trace_iterator *iter = filp->private_data;
5923 	ssize_t sret;
5924 
5925 	/*
5926 	 * Avoid more than one consumer on a single file descriptor
5927 	 * This is just a matter of traces coherency, the ring buffer itself
5928 	 * is protected.
5929 	 */
5930 	mutex_lock(&iter->mutex);
5931 
5932 	/* return any leftover data */
5933 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5934 	if (sret != -EBUSY)
5935 		goto out;
5936 
5937 	trace_seq_init(&iter->seq);
5938 
5939 	if (iter->trace->read) {
5940 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5941 		if (sret)
5942 			goto out;
5943 	}
5944 
5945 waitagain:
5946 	sret = tracing_wait_pipe(filp);
5947 	if (sret <= 0)
5948 		goto out;
5949 
5950 	/* stop when tracing is finished */
5951 	if (trace_empty(iter)) {
5952 		sret = 0;
5953 		goto out;
5954 	}
5955 
5956 	if (cnt >= PAGE_SIZE)
5957 		cnt = PAGE_SIZE - 1;
5958 
5959 	/* reset all but tr, trace, and overruns */
5960 	memset(&iter->seq, 0,
5961 	       sizeof(struct trace_iterator) -
5962 	       offsetof(struct trace_iterator, seq));
5963 	cpumask_clear(iter->started);
5964 	iter->pos = -1;
5965 
5966 	trace_event_read_lock();
5967 	trace_access_lock(iter->cpu_file);
5968 	while (trace_find_next_entry_inc(iter) != NULL) {
5969 		enum print_line_t ret;
5970 		int save_len = iter->seq.seq.len;
5971 
5972 		ret = print_trace_line(iter);
5973 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5974 			/* don't print partial lines */
5975 			iter->seq.seq.len = save_len;
5976 			break;
5977 		}
5978 		if (ret != TRACE_TYPE_NO_CONSUME)
5979 			trace_consume(iter);
5980 
5981 		if (trace_seq_used(&iter->seq) >= cnt)
5982 			break;
5983 
5984 		/*
5985 		 * Setting the full flag means we reached the trace_seq buffer
5986 		 * size and we should leave by partial output condition above.
5987 		 * One of the trace_seq_* functions is not used properly.
5988 		 */
5989 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5990 			  iter->ent->type);
5991 	}
5992 	trace_access_unlock(iter->cpu_file);
5993 	trace_event_read_unlock();
5994 
5995 	/* Now copy what we have to the user */
5996 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5997 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5998 		trace_seq_init(&iter->seq);
5999 
6000 	/*
6001 	 * If there was nothing to send to user, in spite of consuming trace
6002 	 * entries, go back to wait for more entries.
6003 	 */
6004 	if (sret == -EBUSY)
6005 		goto waitagain;
6006 
6007 out:
6008 	mutex_unlock(&iter->mutex);
6009 
6010 	return sret;
6011 }
6012 
6013 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6014 				     unsigned int idx)
6015 {
6016 	__free_page(spd->pages[idx]);
6017 }
6018 
6019 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6020 	.confirm		= generic_pipe_buf_confirm,
6021 	.release		= generic_pipe_buf_release,
6022 	.steal			= generic_pipe_buf_steal,
6023 	.get			= generic_pipe_buf_get,
6024 };
6025 
6026 static size_t
6027 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6028 {
6029 	size_t count;
6030 	int save_len;
6031 	int ret;
6032 
6033 	/* Seq buffer is page-sized, exactly what we need. */
6034 	for (;;) {
6035 		save_len = iter->seq.seq.len;
6036 		ret = print_trace_line(iter);
6037 
6038 		if (trace_seq_has_overflowed(&iter->seq)) {
6039 			iter->seq.seq.len = save_len;
6040 			break;
6041 		}
6042 
6043 		/*
6044 		 * This should not be hit, because it should only
6045 		 * be set if the iter->seq overflowed. But check it
6046 		 * anyway to be safe.
6047 		 */
6048 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6049 			iter->seq.seq.len = save_len;
6050 			break;
6051 		}
6052 
6053 		count = trace_seq_used(&iter->seq) - save_len;
6054 		if (rem < count) {
6055 			rem = 0;
6056 			iter->seq.seq.len = save_len;
6057 			break;
6058 		}
6059 
6060 		if (ret != TRACE_TYPE_NO_CONSUME)
6061 			trace_consume(iter);
6062 		rem -= count;
6063 		if (!trace_find_next_entry_inc(iter))	{
6064 			rem = 0;
6065 			iter->ent = NULL;
6066 			break;
6067 		}
6068 	}
6069 
6070 	return rem;
6071 }
6072 
6073 static ssize_t tracing_splice_read_pipe(struct file *filp,
6074 					loff_t *ppos,
6075 					struct pipe_inode_info *pipe,
6076 					size_t len,
6077 					unsigned int flags)
6078 {
6079 	struct page *pages_def[PIPE_DEF_BUFFERS];
6080 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6081 	struct trace_iterator *iter = filp->private_data;
6082 	struct splice_pipe_desc spd = {
6083 		.pages		= pages_def,
6084 		.partial	= partial_def,
6085 		.nr_pages	= 0, /* This gets updated below. */
6086 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6087 		.ops		= &tracing_pipe_buf_ops,
6088 		.spd_release	= tracing_spd_release_pipe,
6089 	};
6090 	ssize_t ret;
6091 	size_t rem;
6092 	unsigned int i;
6093 
6094 	if (splice_grow_spd(pipe, &spd))
6095 		return -ENOMEM;
6096 
6097 	mutex_lock(&iter->mutex);
6098 
6099 	if (iter->trace->splice_read) {
6100 		ret = iter->trace->splice_read(iter, filp,
6101 					       ppos, pipe, len, flags);
6102 		if (ret)
6103 			goto out_err;
6104 	}
6105 
6106 	ret = tracing_wait_pipe(filp);
6107 	if (ret <= 0)
6108 		goto out_err;
6109 
6110 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6111 		ret = -EFAULT;
6112 		goto out_err;
6113 	}
6114 
6115 	trace_event_read_lock();
6116 	trace_access_lock(iter->cpu_file);
6117 
6118 	/* Fill as many pages as possible. */
6119 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6120 		spd.pages[i] = alloc_page(GFP_KERNEL);
6121 		if (!spd.pages[i])
6122 			break;
6123 
6124 		rem = tracing_fill_pipe_page(rem, iter);
6125 
6126 		/* Copy the data into the page, so we can start over. */
6127 		ret = trace_seq_to_buffer(&iter->seq,
6128 					  page_address(spd.pages[i]),
6129 					  trace_seq_used(&iter->seq));
6130 		if (ret < 0) {
6131 			__free_page(spd.pages[i]);
6132 			break;
6133 		}
6134 		spd.partial[i].offset = 0;
6135 		spd.partial[i].len = trace_seq_used(&iter->seq);
6136 
6137 		trace_seq_init(&iter->seq);
6138 	}
6139 
6140 	trace_access_unlock(iter->cpu_file);
6141 	trace_event_read_unlock();
6142 	mutex_unlock(&iter->mutex);
6143 
6144 	spd.nr_pages = i;
6145 
6146 	if (i)
6147 		ret = splice_to_pipe(pipe, &spd);
6148 	else
6149 		ret = 0;
6150 out:
6151 	splice_shrink_spd(&spd);
6152 	return ret;
6153 
6154 out_err:
6155 	mutex_unlock(&iter->mutex);
6156 	goto out;
6157 }
6158 
6159 static ssize_t
6160 tracing_entries_read(struct file *filp, char __user *ubuf,
6161 		     size_t cnt, loff_t *ppos)
6162 {
6163 	struct inode *inode = file_inode(filp);
6164 	struct trace_array *tr = inode->i_private;
6165 	int cpu = tracing_get_cpu(inode);
6166 	char buf[64];
6167 	int r = 0;
6168 	ssize_t ret;
6169 
6170 	mutex_lock(&trace_types_lock);
6171 
6172 	if (cpu == RING_BUFFER_ALL_CPUS) {
6173 		int cpu, buf_size_same;
6174 		unsigned long size;
6175 
6176 		size = 0;
6177 		buf_size_same = 1;
6178 		/* check if all cpu sizes are same */
6179 		for_each_tracing_cpu(cpu) {
6180 			/* fill in the size from first enabled cpu */
6181 			if (size == 0)
6182 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6183 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6184 				buf_size_same = 0;
6185 				break;
6186 			}
6187 		}
6188 
6189 		if (buf_size_same) {
6190 			if (!ring_buffer_expanded)
6191 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6192 					    size >> 10,
6193 					    trace_buf_size >> 10);
6194 			else
6195 				r = sprintf(buf, "%lu\n", size >> 10);
6196 		} else
6197 			r = sprintf(buf, "X\n");
6198 	} else
6199 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6200 
6201 	mutex_unlock(&trace_types_lock);
6202 
6203 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6204 	return ret;
6205 }
6206 
6207 static ssize_t
6208 tracing_entries_write(struct file *filp, const char __user *ubuf,
6209 		      size_t cnt, loff_t *ppos)
6210 {
6211 	struct inode *inode = file_inode(filp);
6212 	struct trace_array *tr = inode->i_private;
6213 	unsigned long val;
6214 	int ret;
6215 
6216 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6217 	if (ret)
6218 		return ret;
6219 
6220 	/* must have at least 1 entry */
6221 	if (!val)
6222 		return -EINVAL;
6223 
6224 	/* value is in KB */
6225 	val <<= 10;
6226 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6227 	if (ret < 0)
6228 		return ret;
6229 
6230 	*ppos += cnt;
6231 
6232 	return cnt;
6233 }
6234 
6235 static ssize_t
6236 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6237 				size_t cnt, loff_t *ppos)
6238 {
6239 	struct trace_array *tr = filp->private_data;
6240 	char buf[64];
6241 	int r, cpu;
6242 	unsigned long size = 0, expanded_size = 0;
6243 
6244 	mutex_lock(&trace_types_lock);
6245 	for_each_tracing_cpu(cpu) {
6246 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6247 		if (!ring_buffer_expanded)
6248 			expanded_size += trace_buf_size >> 10;
6249 	}
6250 	if (ring_buffer_expanded)
6251 		r = sprintf(buf, "%lu\n", size);
6252 	else
6253 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6254 	mutex_unlock(&trace_types_lock);
6255 
6256 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6257 }
6258 
6259 static ssize_t
6260 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6261 			  size_t cnt, loff_t *ppos)
6262 {
6263 	/*
6264 	 * There is no need to read what the user has written, this function
6265 	 * is just to make sure that there is no error when "echo" is used
6266 	 */
6267 
6268 	*ppos += cnt;
6269 
6270 	return cnt;
6271 }
6272 
6273 static int
6274 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6275 {
6276 	struct trace_array *tr = inode->i_private;
6277 
6278 	/* disable tracing ? */
6279 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6280 		tracer_tracing_off(tr);
6281 	/* resize the ring buffer to 0 */
6282 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6283 
6284 	trace_array_put(tr);
6285 
6286 	return 0;
6287 }
6288 
6289 static ssize_t
6290 tracing_mark_write(struct file *filp, const char __user *ubuf,
6291 					size_t cnt, loff_t *fpos)
6292 {
6293 	struct trace_array *tr = filp->private_data;
6294 	struct ring_buffer_event *event;
6295 	enum event_trigger_type tt = ETT_NONE;
6296 	struct ring_buffer *buffer;
6297 	struct print_entry *entry;
6298 	unsigned long irq_flags;
6299 	const char faulted[] = "<faulted>";
6300 	ssize_t written;
6301 	int size;
6302 	int len;
6303 
6304 /* Used in tracing_mark_raw_write() as well */
6305 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6306 
6307 	if (tracing_disabled)
6308 		return -EINVAL;
6309 
6310 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6311 		return -EINVAL;
6312 
6313 	if (cnt > TRACE_BUF_SIZE)
6314 		cnt = TRACE_BUF_SIZE;
6315 
6316 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6317 
6318 	local_save_flags(irq_flags);
6319 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6320 
6321 	/* If less than "<faulted>", then make sure we can still add that */
6322 	if (cnt < FAULTED_SIZE)
6323 		size += FAULTED_SIZE - cnt;
6324 
6325 	buffer = tr->trace_buffer.buffer;
6326 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6327 					    irq_flags, preempt_count());
6328 	if (unlikely(!event))
6329 		/* Ring buffer disabled, return as if not open for write */
6330 		return -EBADF;
6331 
6332 	entry = ring_buffer_event_data(event);
6333 	entry->ip = _THIS_IP_;
6334 
6335 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6336 	if (len) {
6337 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6338 		cnt = FAULTED_SIZE;
6339 		written = -EFAULT;
6340 	} else
6341 		written = cnt;
6342 	len = cnt;
6343 
6344 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6345 		/* do not add \n before testing triggers, but add \0 */
6346 		entry->buf[cnt] = '\0';
6347 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6348 	}
6349 
6350 	if (entry->buf[cnt - 1] != '\n') {
6351 		entry->buf[cnt] = '\n';
6352 		entry->buf[cnt + 1] = '\0';
6353 	} else
6354 		entry->buf[cnt] = '\0';
6355 
6356 	__buffer_unlock_commit(buffer, event);
6357 
6358 	if (tt)
6359 		event_triggers_post_call(tr->trace_marker_file, tt);
6360 
6361 	if (written > 0)
6362 		*fpos += written;
6363 
6364 	return written;
6365 }
6366 
6367 /* Limit it for now to 3K (including tag) */
6368 #define RAW_DATA_MAX_SIZE (1024*3)
6369 
6370 static ssize_t
6371 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6372 					size_t cnt, loff_t *fpos)
6373 {
6374 	struct trace_array *tr = filp->private_data;
6375 	struct ring_buffer_event *event;
6376 	struct ring_buffer *buffer;
6377 	struct raw_data_entry *entry;
6378 	const char faulted[] = "<faulted>";
6379 	unsigned long irq_flags;
6380 	ssize_t written;
6381 	int size;
6382 	int len;
6383 
6384 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6385 
6386 	if (tracing_disabled)
6387 		return -EINVAL;
6388 
6389 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6390 		return -EINVAL;
6391 
6392 	/* The marker must at least have a tag id */
6393 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6394 		return -EINVAL;
6395 
6396 	if (cnt > TRACE_BUF_SIZE)
6397 		cnt = TRACE_BUF_SIZE;
6398 
6399 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6400 
6401 	local_save_flags(irq_flags);
6402 	size = sizeof(*entry) + cnt;
6403 	if (cnt < FAULT_SIZE_ID)
6404 		size += FAULT_SIZE_ID - cnt;
6405 
6406 	buffer = tr->trace_buffer.buffer;
6407 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6408 					    irq_flags, preempt_count());
6409 	if (!event)
6410 		/* Ring buffer disabled, return as if not open for write */
6411 		return -EBADF;
6412 
6413 	entry = ring_buffer_event_data(event);
6414 
6415 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6416 	if (len) {
6417 		entry->id = -1;
6418 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6419 		written = -EFAULT;
6420 	} else
6421 		written = cnt;
6422 
6423 	__buffer_unlock_commit(buffer, event);
6424 
6425 	if (written > 0)
6426 		*fpos += written;
6427 
6428 	return written;
6429 }
6430 
6431 static int tracing_clock_show(struct seq_file *m, void *v)
6432 {
6433 	struct trace_array *tr = m->private;
6434 	int i;
6435 
6436 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6437 		seq_printf(m,
6438 			"%s%s%s%s", i ? " " : "",
6439 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6440 			i == tr->clock_id ? "]" : "");
6441 	seq_putc(m, '\n');
6442 
6443 	return 0;
6444 }
6445 
6446 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6447 {
6448 	int i;
6449 
6450 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6451 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6452 			break;
6453 	}
6454 	if (i == ARRAY_SIZE(trace_clocks))
6455 		return -EINVAL;
6456 
6457 	mutex_lock(&trace_types_lock);
6458 
6459 	tr->clock_id = i;
6460 
6461 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6462 
6463 	/*
6464 	 * New clock may not be consistent with the previous clock.
6465 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6466 	 */
6467 	tracing_reset_online_cpus(&tr->trace_buffer);
6468 
6469 #ifdef CONFIG_TRACER_MAX_TRACE
6470 	if (tr->max_buffer.buffer)
6471 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6472 	tracing_reset_online_cpus(&tr->max_buffer);
6473 #endif
6474 
6475 	mutex_unlock(&trace_types_lock);
6476 
6477 	return 0;
6478 }
6479 
6480 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6481 				   size_t cnt, loff_t *fpos)
6482 {
6483 	struct seq_file *m = filp->private_data;
6484 	struct trace_array *tr = m->private;
6485 	char buf[64];
6486 	const char *clockstr;
6487 	int ret;
6488 
6489 	if (cnt >= sizeof(buf))
6490 		return -EINVAL;
6491 
6492 	if (copy_from_user(buf, ubuf, cnt))
6493 		return -EFAULT;
6494 
6495 	buf[cnt] = 0;
6496 
6497 	clockstr = strstrip(buf);
6498 
6499 	ret = tracing_set_clock(tr, clockstr);
6500 	if (ret)
6501 		return ret;
6502 
6503 	*fpos += cnt;
6504 
6505 	return cnt;
6506 }
6507 
6508 static int tracing_clock_open(struct inode *inode, struct file *file)
6509 {
6510 	struct trace_array *tr = inode->i_private;
6511 	int ret;
6512 
6513 	if (tracing_disabled)
6514 		return -ENODEV;
6515 
6516 	if (trace_array_get(tr))
6517 		return -ENODEV;
6518 
6519 	ret = single_open(file, tracing_clock_show, inode->i_private);
6520 	if (ret < 0)
6521 		trace_array_put(tr);
6522 
6523 	return ret;
6524 }
6525 
6526 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6527 {
6528 	struct trace_array *tr = m->private;
6529 
6530 	mutex_lock(&trace_types_lock);
6531 
6532 	if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6533 		seq_puts(m, "delta [absolute]\n");
6534 	else
6535 		seq_puts(m, "[delta] absolute\n");
6536 
6537 	mutex_unlock(&trace_types_lock);
6538 
6539 	return 0;
6540 }
6541 
6542 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6543 {
6544 	struct trace_array *tr = inode->i_private;
6545 	int ret;
6546 
6547 	if (tracing_disabled)
6548 		return -ENODEV;
6549 
6550 	if (trace_array_get(tr))
6551 		return -ENODEV;
6552 
6553 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6554 	if (ret < 0)
6555 		trace_array_put(tr);
6556 
6557 	return ret;
6558 }
6559 
6560 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6561 {
6562 	int ret = 0;
6563 
6564 	mutex_lock(&trace_types_lock);
6565 
6566 	if (abs && tr->time_stamp_abs_ref++)
6567 		goto out;
6568 
6569 	if (!abs) {
6570 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6571 			ret = -EINVAL;
6572 			goto out;
6573 		}
6574 
6575 		if (--tr->time_stamp_abs_ref)
6576 			goto out;
6577 	}
6578 
6579 	ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6580 
6581 #ifdef CONFIG_TRACER_MAX_TRACE
6582 	if (tr->max_buffer.buffer)
6583 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6584 #endif
6585  out:
6586 	mutex_unlock(&trace_types_lock);
6587 
6588 	return ret;
6589 }
6590 
6591 struct ftrace_buffer_info {
6592 	struct trace_iterator	iter;
6593 	void			*spare;
6594 	unsigned int		spare_cpu;
6595 	unsigned int		read;
6596 };
6597 
6598 #ifdef CONFIG_TRACER_SNAPSHOT
6599 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6600 {
6601 	struct trace_array *tr = inode->i_private;
6602 	struct trace_iterator *iter;
6603 	struct seq_file *m;
6604 	int ret = 0;
6605 
6606 	if (trace_array_get(tr) < 0)
6607 		return -ENODEV;
6608 
6609 	if (file->f_mode & FMODE_READ) {
6610 		iter = __tracing_open(inode, file, true);
6611 		if (IS_ERR(iter))
6612 			ret = PTR_ERR(iter);
6613 	} else {
6614 		/* Writes still need the seq_file to hold the private data */
6615 		ret = -ENOMEM;
6616 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6617 		if (!m)
6618 			goto out;
6619 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6620 		if (!iter) {
6621 			kfree(m);
6622 			goto out;
6623 		}
6624 		ret = 0;
6625 
6626 		iter->tr = tr;
6627 		iter->trace_buffer = &tr->max_buffer;
6628 		iter->cpu_file = tracing_get_cpu(inode);
6629 		m->private = iter;
6630 		file->private_data = m;
6631 	}
6632 out:
6633 	if (ret < 0)
6634 		trace_array_put(tr);
6635 
6636 	return ret;
6637 }
6638 
6639 static ssize_t
6640 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6641 		       loff_t *ppos)
6642 {
6643 	struct seq_file *m = filp->private_data;
6644 	struct trace_iterator *iter = m->private;
6645 	struct trace_array *tr = iter->tr;
6646 	unsigned long val;
6647 	int ret;
6648 
6649 	ret = tracing_update_buffers();
6650 	if (ret < 0)
6651 		return ret;
6652 
6653 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6654 	if (ret)
6655 		return ret;
6656 
6657 	mutex_lock(&trace_types_lock);
6658 
6659 	if (tr->current_trace->use_max_tr) {
6660 		ret = -EBUSY;
6661 		goto out;
6662 	}
6663 
6664 	arch_spin_lock(&tr->max_lock);
6665 	if (tr->cond_snapshot)
6666 		ret = -EBUSY;
6667 	arch_spin_unlock(&tr->max_lock);
6668 	if (ret)
6669 		goto out;
6670 
6671 	switch (val) {
6672 	case 0:
6673 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6674 			ret = -EINVAL;
6675 			break;
6676 		}
6677 		if (tr->allocated_snapshot)
6678 			free_snapshot(tr);
6679 		break;
6680 	case 1:
6681 /* Only allow per-cpu swap if the ring buffer supports it */
6682 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6683 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6684 			ret = -EINVAL;
6685 			break;
6686 		}
6687 #endif
6688 		if (!tr->allocated_snapshot) {
6689 			ret = tracing_alloc_snapshot_instance(tr);
6690 			if (ret < 0)
6691 				break;
6692 		}
6693 		local_irq_disable();
6694 		/* Now, we're going to swap */
6695 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6696 			update_max_tr(tr, current, smp_processor_id(), NULL);
6697 		else
6698 			update_max_tr_single(tr, current, iter->cpu_file);
6699 		local_irq_enable();
6700 		break;
6701 	default:
6702 		if (tr->allocated_snapshot) {
6703 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6704 				tracing_reset_online_cpus(&tr->max_buffer);
6705 			else
6706 				tracing_reset(&tr->max_buffer, iter->cpu_file);
6707 		}
6708 		break;
6709 	}
6710 
6711 	if (ret >= 0) {
6712 		*ppos += cnt;
6713 		ret = cnt;
6714 	}
6715 out:
6716 	mutex_unlock(&trace_types_lock);
6717 	return ret;
6718 }
6719 
6720 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6721 {
6722 	struct seq_file *m = file->private_data;
6723 	int ret;
6724 
6725 	ret = tracing_release(inode, file);
6726 
6727 	if (file->f_mode & FMODE_READ)
6728 		return ret;
6729 
6730 	/* If write only, the seq_file is just a stub */
6731 	if (m)
6732 		kfree(m->private);
6733 	kfree(m);
6734 
6735 	return 0;
6736 }
6737 
6738 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6739 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6740 				    size_t count, loff_t *ppos);
6741 static int tracing_buffers_release(struct inode *inode, struct file *file);
6742 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6743 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6744 
6745 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6746 {
6747 	struct ftrace_buffer_info *info;
6748 	int ret;
6749 
6750 	ret = tracing_buffers_open(inode, filp);
6751 	if (ret < 0)
6752 		return ret;
6753 
6754 	info = filp->private_data;
6755 
6756 	if (info->iter.trace->use_max_tr) {
6757 		tracing_buffers_release(inode, filp);
6758 		return -EBUSY;
6759 	}
6760 
6761 	info->iter.snapshot = true;
6762 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
6763 
6764 	return ret;
6765 }
6766 
6767 #endif /* CONFIG_TRACER_SNAPSHOT */
6768 
6769 
6770 static const struct file_operations tracing_thresh_fops = {
6771 	.open		= tracing_open_generic,
6772 	.read		= tracing_thresh_read,
6773 	.write		= tracing_thresh_write,
6774 	.llseek		= generic_file_llseek,
6775 };
6776 
6777 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6778 static const struct file_operations tracing_max_lat_fops = {
6779 	.open		= tracing_open_generic,
6780 	.read		= tracing_max_lat_read,
6781 	.write		= tracing_max_lat_write,
6782 	.llseek		= generic_file_llseek,
6783 };
6784 #endif
6785 
6786 static const struct file_operations set_tracer_fops = {
6787 	.open		= tracing_open_generic,
6788 	.read		= tracing_set_trace_read,
6789 	.write		= tracing_set_trace_write,
6790 	.llseek		= generic_file_llseek,
6791 };
6792 
6793 static const struct file_operations tracing_pipe_fops = {
6794 	.open		= tracing_open_pipe,
6795 	.poll		= tracing_poll_pipe,
6796 	.read		= tracing_read_pipe,
6797 	.splice_read	= tracing_splice_read_pipe,
6798 	.release	= tracing_release_pipe,
6799 	.llseek		= no_llseek,
6800 };
6801 
6802 static const struct file_operations tracing_entries_fops = {
6803 	.open		= tracing_open_generic_tr,
6804 	.read		= tracing_entries_read,
6805 	.write		= tracing_entries_write,
6806 	.llseek		= generic_file_llseek,
6807 	.release	= tracing_release_generic_tr,
6808 };
6809 
6810 static const struct file_operations tracing_total_entries_fops = {
6811 	.open		= tracing_open_generic_tr,
6812 	.read		= tracing_total_entries_read,
6813 	.llseek		= generic_file_llseek,
6814 	.release	= tracing_release_generic_tr,
6815 };
6816 
6817 static const struct file_operations tracing_free_buffer_fops = {
6818 	.open		= tracing_open_generic_tr,
6819 	.write		= tracing_free_buffer_write,
6820 	.release	= tracing_free_buffer_release,
6821 };
6822 
6823 static const struct file_operations tracing_mark_fops = {
6824 	.open		= tracing_open_generic_tr,
6825 	.write		= tracing_mark_write,
6826 	.llseek		= generic_file_llseek,
6827 	.release	= tracing_release_generic_tr,
6828 };
6829 
6830 static const struct file_operations tracing_mark_raw_fops = {
6831 	.open		= tracing_open_generic_tr,
6832 	.write		= tracing_mark_raw_write,
6833 	.llseek		= generic_file_llseek,
6834 	.release	= tracing_release_generic_tr,
6835 };
6836 
6837 static const struct file_operations trace_clock_fops = {
6838 	.open		= tracing_clock_open,
6839 	.read		= seq_read,
6840 	.llseek		= seq_lseek,
6841 	.release	= tracing_single_release_tr,
6842 	.write		= tracing_clock_write,
6843 };
6844 
6845 static const struct file_operations trace_time_stamp_mode_fops = {
6846 	.open		= tracing_time_stamp_mode_open,
6847 	.read		= seq_read,
6848 	.llseek		= seq_lseek,
6849 	.release	= tracing_single_release_tr,
6850 };
6851 
6852 #ifdef CONFIG_TRACER_SNAPSHOT
6853 static const struct file_operations snapshot_fops = {
6854 	.open		= tracing_snapshot_open,
6855 	.read		= seq_read,
6856 	.write		= tracing_snapshot_write,
6857 	.llseek		= tracing_lseek,
6858 	.release	= tracing_snapshot_release,
6859 };
6860 
6861 static const struct file_operations snapshot_raw_fops = {
6862 	.open		= snapshot_raw_open,
6863 	.read		= tracing_buffers_read,
6864 	.release	= tracing_buffers_release,
6865 	.splice_read	= tracing_buffers_splice_read,
6866 	.llseek		= no_llseek,
6867 };
6868 
6869 #endif /* CONFIG_TRACER_SNAPSHOT */
6870 
6871 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6872 {
6873 	struct trace_array *tr = inode->i_private;
6874 	struct ftrace_buffer_info *info;
6875 	int ret;
6876 
6877 	if (tracing_disabled)
6878 		return -ENODEV;
6879 
6880 	if (trace_array_get(tr) < 0)
6881 		return -ENODEV;
6882 
6883 	info = kzalloc(sizeof(*info), GFP_KERNEL);
6884 	if (!info) {
6885 		trace_array_put(tr);
6886 		return -ENOMEM;
6887 	}
6888 
6889 	mutex_lock(&trace_types_lock);
6890 
6891 	info->iter.tr		= tr;
6892 	info->iter.cpu_file	= tracing_get_cpu(inode);
6893 	info->iter.trace	= tr->current_trace;
6894 	info->iter.trace_buffer = &tr->trace_buffer;
6895 	info->spare		= NULL;
6896 	/* Force reading ring buffer for first read */
6897 	info->read		= (unsigned int)-1;
6898 
6899 	filp->private_data = info;
6900 
6901 	tr->current_trace->ref++;
6902 
6903 	mutex_unlock(&trace_types_lock);
6904 
6905 	ret = nonseekable_open(inode, filp);
6906 	if (ret < 0)
6907 		trace_array_put(tr);
6908 
6909 	return ret;
6910 }
6911 
6912 static __poll_t
6913 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6914 {
6915 	struct ftrace_buffer_info *info = filp->private_data;
6916 	struct trace_iterator *iter = &info->iter;
6917 
6918 	return trace_poll(iter, filp, poll_table);
6919 }
6920 
6921 static ssize_t
6922 tracing_buffers_read(struct file *filp, char __user *ubuf,
6923 		     size_t count, loff_t *ppos)
6924 {
6925 	struct ftrace_buffer_info *info = filp->private_data;
6926 	struct trace_iterator *iter = &info->iter;
6927 	ssize_t ret = 0;
6928 	ssize_t size;
6929 
6930 	if (!count)
6931 		return 0;
6932 
6933 #ifdef CONFIG_TRACER_MAX_TRACE
6934 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6935 		return -EBUSY;
6936 #endif
6937 
6938 	if (!info->spare) {
6939 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6940 							  iter->cpu_file);
6941 		if (IS_ERR(info->spare)) {
6942 			ret = PTR_ERR(info->spare);
6943 			info->spare = NULL;
6944 		} else {
6945 			info->spare_cpu = iter->cpu_file;
6946 		}
6947 	}
6948 	if (!info->spare)
6949 		return ret;
6950 
6951 	/* Do we have previous read data to read? */
6952 	if (info->read < PAGE_SIZE)
6953 		goto read;
6954 
6955  again:
6956 	trace_access_lock(iter->cpu_file);
6957 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6958 				    &info->spare,
6959 				    count,
6960 				    iter->cpu_file, 0);
6961 	trace_access_unlock(iter->cpu_file);
6962 
6963 	if (ret < 0) {
6964 		if (trace_empty(iter)) {
6965 			if ((filp->f_flags & O_NONBLOCK))
6966 				return -EAGAIN;
6967 
6968 			ret = wait_on_pipe(iter, 0);
6969 			if (ret)
6970 				return ret;
6971 
6972 			goto again;
6973 		}
6974 		return 0;
6975 	}
6976 
6977 	info->read = 0;
6978  read:
6979 	size = PAGE_SIZE - info->read;
6980 	if (size > count)
6981 		size = count;
6982 
6983 	ret = copy_to_user(ubuf, info->spare + info->read, size);
6984 	if (ret == size)
6985 		return -EFAULT;
6986 
6987 	size -= ret;
6988 
6989 	*ppos += size;
6990 	info->read += size;
6991 
6992 	return size;
6993 }
6994 
6995 static int tracing_buffers_release(struct inode *inode, struct file *file)
6996 {
6997 	struct ftrace_buffer_info *info = file->private_data;
6998 	struct trace_iterator *iter = &info->iter;
6999 
7000 	mutex_lock(&trace_types_lock);
7001 
7002 	iter->tr->current_trace->ref--;
7003 
7004 	__trace_array_put(iter->tr);
7005 
7006 	if (info->spare)
7007 		ring_buffer_free_read_page(iter->trace_buffer->buffer,
7008 					   info->spare_cpu, info->spare);
7009 	kfree(info);
7010 
7011 	mutex_unlock(&trace_types_lock);
7012 
7013 	return 0;
7014 }
7015 
7016 struct buffer_ref {
7017 	struct ring_buffer	*buffer;
7018 	void			*page;
7019 	int			cpu;
7020 	refcount_t		refcount;
7021 };
7022 
7023 static void buffer_ref_release(struct buffer_ref *ref)
7024 {
7025 	if (!refcount_dec_and_test(&ref->refcount))
7026 		return;
7027 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7028 	kfree(ref);
7029 }
7030 
7031 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7032 				    struct pipe_buffer *buf)
7033 {
7034 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7035 
7036 	buffer_ref_release(ref);
7037 	buf->private = 0;
7038 }
7039 
7040 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7041 				struct pipe_buffer *buf)
7042 {
7043 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7044 
7045 	if (refcount_read(&ref->refcount) > INT_MAX/2)
7046 		return false;
7047 
7048 	refcount_inc(&ref->refcount);
7049 	return true;
7050 }
7051 
7052 /* Pipe buffer operations for a buffer. */
7053 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7054 	.confirm		= generic_pipe_buf_confirm,
7055 	.release		= buffer_pipe_buf_release,
7056 	.steal			= generic_pipe_buf_nosteal,
7057 	.get			= buffer_pipe_buf_get,
7058 };
7059 
7060 /*
7061  * Callback from splice_to_pipe(), if we need to release some pages
7062  * at the end of the spd in case we error'ed out in filling the pipe.
7063  */
7064 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7065 {
7066 	struct buffer_ref *ref =
7067 		(struct buffer_ref *)spd->partial[i].private;
7068 
7069 	buffer_ref_release(ref);
7070 	spd->partial[i].private = 0;
7071 }
7072 
7073 static ssize_t
7074 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7075 			    struct pipe_inode_info *pipe, size_t len,
7076 			    unsigned int flags)
7077 {
7078 	struct ftrace_buffer_info *info = file->private_data;
7079 	struct trace_iterator *iter = &info->iter;
7080 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7081 	struct page *pages_def[PIPE_DEF_BUFFERS];
7082 	struct splice_pipe_desc spd = {
7083 		.pages		= pages_def,
7084 		.partial	= partial_def,
7085 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7086 		.ops		= &buffer_pipe_buf_ops,
7087 		.spd_release	= buffer_spd_release,
7088 	};
7089 	struct buffer_ref *ref;
7090 	int entries, i;
7091 	ssize_t ret = 0;
7092 
7093 #ifdef CONFIG_TRACER_MAX_TRACE
7094 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7095 		return -EBUSY;
7096 #endif
7097 
7098 	if (*ppos & (PAGE_SIZE - 1))
7099 		return -EINVAL;
7100 
7101 	if (len & (PAGE_SIZE - 1)) {
7102 		if (len < PAGE_SIZE)
7103 			return -EINVAL;
7104 		len &= PAGE_MASK;
7105 	}
7106 
7107 	if (splice_grow_spd(pipe, &spd))
7108 		return -ENOMEM;
7109 
7110  again:
7111 	trace_access_lock(iter->cpu_file);
7112 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7113 
7114 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7115 		struct page *page;
7116 		int r;
7117 
7118 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7119 		if (!ref) {
7120 			ret = -ENOMEM;
7121 			break;
7122 		}
7123 
7124 		refcount_set(&ref->refcount, 1);
7125 		ref->buffer = iter->trace_buffer->buffer;
7126 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7127 		if (IS_ERR(ref->page)) {
7128 			ret = PTR_ERR(ref->page);
7129 			ref->page = NULL;
7130 			kfree(ref);
7131 			break;
7132 		}
7133 		ref->cpu = iter->cpu_file;
7134 
7135 		r = ring_buffer_read_page(ref->buffer, &ref->page,
7136 					  len, iter->cpu_file, 1);
7137 		if (r < 0) {
7138 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
7139 						   ref->page);
7140 			kfree(ref);
7141 			break;
7142 		}
7143 
7144 		page = virt_to_page(ref->page);
7145 
7146 		spd.pages[i] = page;
7147 		spd.partial[i].len = PAGE_SIZE;
7148 		spd.partial[i].offset = 0;
7149 		spd.partial[i].private = (unsigned long)ref;
7150 		spd.nr_pages++;
7151 		*ppos += PAGE_SIZE;
7152 
7153 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7154 	}
7155 
7156 	trace_access_unlock(iter->cpu_file);
7157 	spd.nr_pages = i;
7158 
7159 	/* did we read anything? */
7160 	if (!spd.nr_pages) {
7161 		if (ret)
7162 			goto out;
7163 
7164 		ret = -EAGAIN;
7165 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7166 			goto out;
7167 
7168 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7169 		if (ret)
7170 			goto out;
7171 
7172 		goto again;
7173 	}
7174 
7175 	ret = splice_to_pipe(pipe, &spd);
7176 out:
7177 	splice_shrink_spd(&spd);
7178 
7179 	return ret;
7180 }
7181 
7182 static const struct file_operations tracing_buffers_fops = {
7183 	.open		= tracing_buffers_open,
7184 	.read		= tracing_buffers_read,
7185 	.poll		= tracing_buffers_poll,
7186 	.release	= tracing_buffers_release,
7187 	.splice_read	= tracing_buffers_splice_read,
7188 	.llseek		= no_llseek,
7189 };
7190 
7191 static ssize_t
7192 tracing_stats_read(struct file *filp, char __user *ubuf,
7193 		   size_t count, loff_t *ppos)
7194 {
7195 	struct inode *inode = file_inode(filp);
7196 	struct trace_array *tr = inode->i_private;
7197 	struct trace_buffer *trace_buf = &tr->trace_buffer;
7198 	int cpu = tracing_get_cpu(inode);
7199 	struct trace_seq *s;
7200 	unsigned long cnt;
7201 	unsigned long long t;
7202 	unsigned long usec_rem;
7203 
7204 	s = kmalloc(sizeof(*s), GFP_KERNEL);
7205 	if (!s)
7206 		return -ENOMEM;
7207 
7208 	trace_seq_init(s);
7209 
7210 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7211 	trace_seq_printf(s, "entries: %ld\n", cnt);
7212 
7213 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7214 	trace_seq_printf(s, "overrun: %ld\n", cnt);
7215 
7216 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7217 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7218 
7219 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7220 	trace_seq_printf(s, "bytes: %ld\n", cnt);
7221 
7222 	if (trace_clocks[tr->clock_id].in_ns) {
7223 		/* local or global for trace_clock */
7224 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7225 		usec_rem = do_div(t, USEC_PER_SEC);
7226 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7227 								t, usec_rem);
7228 
7229 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7230 		usec_rem = do_div(t, USEC_PER_SEC);
7231 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7232 	} else {
7233 		/* counter or tsc mode for trace_clock */
7234 		trace_seq_printf(s, "oldest event ts: %llu\n",
7235 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7236 
7237 		trace_seq_printf(s, "now ts: %llu\n",
7238 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7239 	}
7240 
7241 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7242 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7243 
7244 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7245 	trace_seq_printf(s, "read events: %ld\n", cnt);
7246 
7247 	count = simple_read_from_buffer(ubuf, count, ppos,
7248 					s->buffer, trace_seq_used(s));
7249 
7250 	kfree(s);
7251 
7252 	return count;
7253 }
7254 
7255 static const struct file_operations tracing_stats_fops = {
7256 	.open		= tracing_open_generic_tr,
7257 	.read		= tracing_stats_read,
7258 	.llseek		= generic_file_llseek,
7259 	.release	= tracing_release_generic_tr,
7260 };
7261 
7262 #ifdef CONFIG_DYNAMIC_FTRACE
7263 
7264 static ssize_t
7265 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7266 		  size_t cnt, loff_t *ppos)
7267 {
7268 	unsigned long *p = filp->private_data;
7269 	char buf[64]; /* Not too big for a shallow stack */
7270 	int r;
7271 
7272 	r = scnprintf(buf, 63, "%ld", *p);
7273 	buf[r++] = '\n';
7274 
7275 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7276 }
7277 
7278 static const struct file_operations tracing_dyn_info_fops = {
7279 	.open		= tracing_open_generic,
7280 	.read		= tracing_read_dyn_info,
7281 	.llseek		= generic_file_llseek,
7282 };
7283 #endif /* CONFIG_DYNAMIC_FTRACE */
7284 
7285 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7286 static void
7287 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7288 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7289 		void *data)
7290 {
7291 	tracing_snapshot_instance(tr);
7292 }
7293 
7294 static void
7295 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7296 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7297 		      void *data)
7298 {
7299 	struct ftrace_func_mapper *mapper = data;
7300 	long *count = NULL;
7301 
7302 	if (mapper)
7303 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7304 
7305 	if (count) {
7306 
7307 		if (*count <= 0)
7308 			return;
7309 
7310 		(*count)--;
7311 	}
7312 
7313 	tracing_snapshot_instance(tr);
7314 }
7315 
7316 static int
7317 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7318 		      struct ftrace_probe_ops *ops, void *data)
7319 {
7320 	struct ftrace_func_mapper *mapper = data;
7321 	long *count = NULL;
7322 
7323 	seq_printf(m, "%ps:", (void *)ip);
7324 
7325 	seq_puts(m, "snapshot");
7326 
7327 	if (mapper)
7328 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7329 
7330 	if (count)
7331 		seq_printf(m, ":count=%ld\n", *count);
7332 	else
7333 		seq_puts(m, ":unlimited\n");
7334 
7335 	return 0;
7336 }
7337 
7338 static int
7339 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7340 		     unsigned long ip, void *init_data, void **data)
7341 {
7342 	struct ftrace_func_mapper *mapper = *data;
7343 
7344 	if (!mapper) {
7345 		mapper = allocate_ftrace_func_mapper();
7346 		if (!mapper)
7347 			return -ENOMEM;
7348 		*data = mapper;
7349 	}
7350 
7351 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7352 }
7353 
7354 static void
7355 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7356 		     unsigned long ip, void *data)
7357 {
7358 	struct ftrace_func_mapper *mapper = data;
7359 
7360 	if (!ip) {
7361 		if (!mapper)
7362 			return;
7363 		free_ftrace_func_mapper(mapper, NULL);
7364 		return;
7365 	}
7366 
7367 	ftrace_func_mapper_remove_ip(mapper, ip);
7368 }
7369 
7370 static struct ftrace_probe_ops snapshot_probe_ops = {
7371 	.func			= ftrace_snapshot,
7372 	.print			= ftrace_snapshot_print,
7373 };
7374 
7375 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7376 	.func			= ftrace_count_snapshot,
7377 	.print			= ftrace_snapshot_print,
7378 	.init			= ftrace_snapshot_init,
7379 	.free			= ftrace_snapshot_free,
7380 };
7381 
7382 static int
7383 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7384 			       char *glob, char *cmd, char *param, int enable)
7385 {
7386 	struct ftrace_probe_ops *ops;
7387 	void *count = (void *)-1;
7388 	char *number;
7389 	int ret;
7390 
7391 	if (!tr)
7392 		return -ENODEV;
7393 
7394 	/* hash funcs only work with set_ftrace_filter */
7395 	if (!enable)
7396 		return -EINVAL;
7397 
7398 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7399 
7400 	if (glob[0] == '!')
7401 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7402 
7403 	if (!param)
7404 		goto out_reg;
7405 
7406 	number = strsep(&param, ":");
7407 
7408 	if (!strlen(number))
7409 		goto out_reg;
7410 
7411 	/*
7412 	 * We use the callback data field (which is a pointer)
7413 	 * as our counter.
7414 	 */
7415 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7416 	if (ret)
7417 		return ret;
7418 
7419  out_reg:
7420 	ret = tracing_alloc_snapshot_instance(tr);
7421 	if (ret < 0)
7422 		goto out;
7423 
7424 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7425 
7426  out:
7427 	return ret < 0 ? ret : 0;
7428 }
7429 
7430 static struct ftrace_func_command ftrace_snapshot_cmd = {
7431 	.name			= "snapshot",
7432 	.func			= ftrace_trace_snapshot_callback,
7433 };
7434 
7435 static __init int register_snapshot_cmd(void)
7436 {
7437 	return register_ftrace_command(&ftrace_snapshot_cmd);
7438 }
7439 #else
7440 static inline __init int register_snapshot_cmd(void) { return 0; }
7441 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7442 
7443 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7444 {
7445 	if (WARN_ON(!tr->dir))
7446 		return ERR_PTR(-ENODEV);
7447 
7448 	/* Top directory uses NULL as the parent */
7449 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7450 		return NULL;
7451 
7452 	/* All sub buffers have a descriptor */
7453 	return tr->dir;
7454 }
7455 
7456 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7457 {
7458 	struct dentry *d_tracer;
7459 
7460 	if (tr->percpu_dir)
7461 		return tr->percpu_dir;
7462 
7463 	d_tracer = tracing_get_dentry(tr);
7464 	if (IS_ERR(d_tracer))
7465 		return NULL;
7466 
7467 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7468 
7469 	WARN_ONCE(!tr->percpu_dir,
7470 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7471 
7472 	return tr->percpu_dir;
7473 }
7474 
7475 static struct dentry *
7476 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7477 		      void *data, long cpu, const struct file_operations *fops)
7478 {
7479 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7480 
7481 	if (ret) /* See tracing_get_cpu() */
7482 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
7483 	return ret;
7484 }
7485 
7486 static void
7487 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7488 {
7489 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7490 	struct dentry *d_cpu;
7491 	char cpu_dir[30]; /* 30 characters should be more than enough */
7492 
7493 	if (!d_percpu)
7494 		return;
7495 
7496 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
7497 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7498 	if (!d_cpu) {
7499 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7500 		return;
7501 	}
7502 
7503 	/* per cpu trace_pipe */
7504 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7505 				tr, cpu, &tracing_pipe_fops);
7506 
7507 	/* per cpu trace */
7508 	trace_create_cpu_file("trace", 0644, d_cpu,
7509 				tr, cpu, &tracing_fops);
7510 
7511 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7512 				tr, cpu, &tracing_buffers_fops);
7513 
7514 	trace_create_cpu_file("stats", 0444, d_cpu,
7515 				tr, cpu, &tracing_stats_fops);
7516 
7517 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7518 				tr, cpu, &tracing_entries_fops);
7519 
7520 #ifdef CONFIG_TRACER_SNAPSHOT
7521 	trace_create_cpu_file("snapshot", 0644, d_cpu,
7522 				tr, cpu, &snapshot_fops);
7523 
7524 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7525 				tr, cpu, &snapshot_raw_fops);
7526 #endif
7527 }
7528 
7529 #ifdef CONFIG_FTRACE_SELFTEST
7530 /* Let selftest have access to static functions in this file */
7531 #include "trace_selftest.c"
7532 #endif
7533 
7534 static ssize_t
7535 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7536 			loff_t *ppos)
7537 {
7538 	struct trace_option_dentry *topt = filp->private_data;
7539 	char *buf;
7540 
7541 	if (topt->flags->val & topt->opt->bit)
7542 		buf = "1\n";
7543 	else
7544 		buf = "0\n";
7545 
7546 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7547 }
7548 
7549 static ssize_t
7550 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7551 			 loff_t *ppos)
7552 {
7553 	struct trace_option_dentry *topt = filp->private_data;
7554 	unsigned long val;
7555 	int ret;
7556 
7557 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7558 	if (ret)
7559 		return ret;
7560 
7561 	if (val != 0 && val != 1)
7562 		return -EINVAL;
7563 
7564 	if (!!(topt->flags->val & topt->opt->bit) != val) {
7565 		mutex_lock(&trace_types_lock);
7566 		ret = __set_tracer_option(topt->tr, topt->flags,
7567 					  topt->opt, !val);
7568 		mutex_unlock(&trace_types_lock);
7569 		if (ret)
7570 			return ret;
7571 	}
7572 
7573 	*ppos += cnt;
7574 
7575 	return cnt;
7576 }
7577 
7578 
7579 static const struct file_operations trace_options_fops = {
7580 	.open = tracing_open_generic,
7581 	.read = trace_options_read,
7582 	.write = trace_options_write,
7583 	.llseek	= generic_file_llseek,
7584 };
7585 
7586 /*
7587  * In order to pass in both the trace_array descriptor as well as the index
7588  * to the flag that the trace option file represents, the trace_array
7589  * has a character array of trace_flags_index[], which holds the index
7590  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7591  * The address of this character array is passed to the flag option file
7592  * read/write callbacks.
7593  *
7594  * In order to extract both the index and the trace_array descriptor,
7595  * get_tr_index() uses the following algorithm.
7596  *
7597  *   idx = *ptr;
7598  *
7599  * As the pointer itself contains the address of the index (remember
7600  * index[1] == 1).
7601  *
7602  * Then to get the trace_array descriptor, by subtracting that index
7603  * from the ptr, we get to the start of the index itself.
7604  *
7605  *   ptr - idx == &index[0]
7606  *
7607  * Then a simple container_of() from that pointer gets us to the
7608  * trace_array descriptor.
7609  */
7610 static void get_tr_index(void *data, struct trace_array **ptr,
7611 			 unsigned int *pindex)
7612 {
7613 	*pindex = *(unsigned char *)data;
7614 
7615 	*ptr = container_of(data - *pindex, struct trace_array,
7616 			    trace_flags_index);
7617 }
7618 
7619 static ssize_t
7620 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7621 			loff_t *ppos)
7622 {
7623 	void *tr_index = filp->private_data;
7624 	struct trace_array *tr;
7625 	unsigned int index;
7626 	char *buf;
7627 
7628 	get_tr_index(tr_index, &tr, &index);
7629 
7630 	if (tr->trace_flags & (1 << index))
7631 		buf = "1\n";
7632 	else
7633 		buf = "0\n";
7634 
7635 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7636 }
7637 
7638 static ssize_t
7639 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7640 			 loff_t *ppos)
7641 {
7642 	void *tr_index = filp->private_data;
7643 	struct trace_array *tr;
7644 	unsigned int index;
7645 	unsigned long val;
7646 	int ret;
7647 
7648 	get_tr_index(tr_index, &tr, &index);
7649 
7650 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7651 	if (ret)
7652 		return ret;
7653 
7654 	if (val != 0 && val != 1)
7655 		return -EINVAL;
7656 
7657 	mutex_lock(&trace_types_lock);
7658 	ret = set_tracer_flag(tr, 1 << index, val);
7659 	mutex_unlock(&trace_types_lock);
7660 
7661 	if (ret < 0)
7662 		return ret;
7663 
7664 	*ppos += cnt;
7665 
7666 	return cnt;
7667 }
7668 
7669 static const struct file_operations trace_options_core_fops = {
7670 	.open = tracing_open_generic,
7671 	.read = trace_options_core_read,
7672 	.write = trace_options_core_write,
7673 	.llseek = generic_file_llseek,
7674 };
7675 
7676 struct dentry *trace_create_file(const char *name,
7677 				 umode_t mode,
7678 				 struct dentry *parent,
7679 				 void *data,
7680 				 const struct file_operations *fops)
7681 {
7682 	struct dentry *ret;
7683 
7684 	ret = tracefs_create_file(name, mode, parent, data, fops);
7685 	if (!ret)
7686 		pr_warn("Could not create tracefs '%s' entry\n", name);
7687 
7688 	return ret;
7689 }
7690 
7691 
7692 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7693 {
7694 	struct dentry *d_tracer;
7695 
7696 	if (tr->options)
7697 		return tr->options;
7698 
7699 	d_tracer = tracing_get_dentry(tr);
7700 	if (IS_ERR(d_tracer))
7701 		return NULL;
7702 
7703 	tr->options = tracefs_create_dir("options", d_tracer);
7704 	if (!tr->options) {
7705 		pr_warn("Could not create tracefs directory 'options'\n");
7706 		return NULL;
7707 	}
7708 
7709 	return tr->options;
7710 }
7711 
7712 static void
7713 create_trace_option_file(struct trace_array *tr,
7714 			 struct trace_option_dentry *topt,
7715 			 struct tracer_flags *flags,
7716 			 struct tracer_opt *opt)
7717 {
7718 	struct dentry *t_options;
7719 
7720 	t_options = trace_options_init_dentry(tr);
7721 	if (!t_options)
7722 		return;
7723 
7724 	topt->flags = flags;
7725 	topt->opt = opt;
7726 	topt->tr = tr;
7727 
7728 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7729 				    &trace_options_fops);
7730 
7731 }
7732 
7733 static void
7734 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7735 {
7736 	struct trace_option_dentry *topts;
7737 	struct trace_options *tr_topts;
7738 	struct tracer_flags *flags;
7739 	struct tracer_opt *opts;
7740 	int cnt;
7741 	int i;
7742 
7743 	if (!tracer)
7744 		return;
7745 
7746 	flags = tracer->flags;
7747 
7748 	if (!flags || !flags->opts)
7749 		return;
7750 
7751 	/*
7752 	 * If this is an instance, only create flags for tracers
7753 	 * the instance may have.
7754 	 */
7755 	if (!trace_ok_for_array(tracer, tr))
7756 		return;
7757 
7758 	for (i = 0; i < tr->nr_topts; i++) {
7759 		/* Make sure there's no duplicate flags. */
7760 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7761 			return;
7762 	}
7763 
7764 	opts = flags->opts;
7765 
7766 	for (cnt = 0; opts[cnt].name; cnt++)
7767 		;
7768 
7769 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7770 	if (!topts)
7771 		return;
7772 
7773 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7774 			    GFP_KERNEL);
7775 	if (!tr_topts) {
7776 		kfree(topts);
7777 		return;
7778 	}
7779 
7780 	tr->topts = tr_topts;
7781 	tr->topts[tr->nr_topts].tracer = tracer;
7782 	tr->topts[tr->nr_topts].topts = topts;
7783 	tr->nr_topts++;
7784 
7785 	for (cnt = 0; opts[cnt].name; cnt++) {
7786 		create_trace_option_file(tr, &topts[cnt], flags,
7787 					 &opts[cnt]);
7788 		WARN_ONCE(topts[cnt].entry == NULL,
7789 			  "Failed to create trace option: %s",
7790 			  opts[cnt].name);
7791 	}
7792 }
7793 
7794 static struct dentry *
7795 create_trace_option_core_file(struct trace_array *tr,
7796 			      const char *option, long index)
7797 {
7798 	struct dentry *t_options;
7799 
7800 	t_options = trace_options_init_dentry(tr);
7801 	if (!t_options)
7802 		return NULL;
7803 
7804 	return trace_create_file(option, 0644, t_options,
7805 				 (void *)&tr->trace_flags_index[index],
7806 				 &trace_options_core_fops);
7807 }
7808 
7809 static void create_trace_options_dir(struct trace_array *tr)
7810 {
7811 	struct dentry *t_options;
7812 	bool top_level = tr == &global_trace;
7813 	int i;
7814 
7815 	t_options = trace_options_init_dentry(tr);
7816 	if (!t_options)
7817 		return;
7818 
7819 	for (i = 0; trace_options[i]; i++) {
7820 		if (top_level ||
7821 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7822 			create_trace_option_core_file(tr, trace_options[i], i);
7823 	}
7824 }
7825 
7826 static ssize_t
7827 rb_simple_read(struct file *filp, char __user *ubuf,
7828 	       size_t cnt, loff_t *ppos)
7829 {
7830 	struct trace_array *tr = filp->private_data;
7831 	char buf[64];
7832 	int r;
7833 
7834 	r = tracer_tracing_is_on(tr);
7835 	r = sprintf(buf, "%d\n", r);
7836 
7837 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7838 }
7839 
7840 static ssize_t
7841 rb_simple_write(struct file *filp, const char __user *ubuf,
7842 		size_t cnt, loff_t *ppos)
7843 {
7844 	struct trace_array *tr = filp->private_data;
7845 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
7846 	unsigned long val;
7847 	int ret;
7848 
7849 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7850 	if (ret)
7851 		return ret;
7852 
7853 	if (buffer) {
7854 		mutex_lock(&trace_types_lock);
7855 		if (!!val == tracer_tracing_is_on(tr)) {
7856 			val = 0; /* do nothing */
7857 		} else if (val) {
7858 			tracer_tracing_on(tr);
7859 			if (tr->current_trace->start)
7860 				tr->current_trace->start(tr);
7861 		} else {
7862 			tracer_tracing_off(tr);
7863 			if (tr->current_trace->stop)
7864 				tr->current_trace->stop(tr);
7865 		}
7866 		mutex_unlock(&trace_types_lock);
7867 	}
7868 
7869 	(*ppos)++;
7870 
7871 	return cnt;
7872 }
7873 
7874 static const struct file_operations rb_simple_fops = {
7875 	.open		= tracing_open_generic_tr,
7876 	.read		= rb_simple_read,
7877 	.write		= rb_simple_write,
7878 	.release	= tracing_release_generic_tr,
7879 	.llseek		= default_llseek,
7880 };
7881 
7882 static ssize_t
7883 buffer_percent_read(struct file *filp, char __user *ubuf,
7884 		    size_t cnt, loff_t *ppos)
7885 {
7886 	struct trace_array *tr = filp->private_data;
7887 	char buf[64];
7888 	int r;
7889 
7890 	r = tr->buffer_percent;
7891 	r = sprintf(buf, "%d\n", r);
7892 
7893 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7894 }
7895 
7896 static ssize_t
7897 buffer_percent_write(struct file *filp, const char __user *ubuf,
7898 		     size_t cnt, loff_t *ppos)
7899 {
7900 	struct trace_array *tr = filp->private_data;
7901 	unsigned long val;
7902 	int ret;
7903 
7904 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7905 	if (ret)
7906 		return ret;
7907 
7908 	if (val > 100)
7909 		return -EINVAL;
7910 
7911 	if (!val)
7912 		val = 1;
7913 
7914 	tr->buffer_percent = val;
7915 
7916 	(*ppos)++;
7917 
7918 	return cnt;
7919 }
7920 
7921 static const struct file_operations buffer_percent_fops = {
7922 	.open		= tracing_open_generic_tr,
7923 	.read		= buffer_percent_read,
7924 	.write		= buffer_percent_write,
7925 	.release	= tracing_release_generic_tr,
7926 	.llseek		= default_llseek,
7927 };
7928 
7929 struct dentry *trace_instance_dir;
7930 
7931 static void
7932 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7933 
7934 static int
7935 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7936 {
7937 	enum ring_buffer_flags rb_flags;
7938 
7939 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7940 
7941 	buf->tr = tr;
7942 
7943 	buf->buffer = ring_buffer_alloc(size, rb_flags);
7944 	if (!buf->buffer)
7945 		return -ENOMEM;
7946 
7947 	buf->data = alloc_percpu(struct trace_array_cpu);
7948 	if (!buf->data) {
7949 		ring_buffer_free(buf->buffer);
7950 		buf->buffer = NULL;
7951 		return -ENOMEM;
7952 	}
7953 
7954 	/* Allocate the first page for all buffers */
7955 	set_buffer_entries(&tr->trace_buffer,
7956 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
7957 
7958 	return 0;
7959 }
7960 
7961 static int allocate_trace_buffers(struct trace_array *tr, int size)
7962 {
7963 	int ret;
7964 
7965 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7966 	if (ret)
7967 		return ret;
7968 
7969 #ifdef CONFIG_TRACER_MAX_TRACE
7970 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
7971 				    allocate_snapshot ? size : 1);
7972 	if (WARN_ON(ret)) {
7973 		ring_buffer_free(tr->trace_buffer.buffer);
7974 		tr->trace_buffer.buffer = NULL;
7975 		free_percpu(tr->trace_buffer.data);
7976 		tr->trace_buffer.data = NULL;
7977 		return -ENOMEM;
7978 	}
7979 	tr->allocated_snapshot = allocate_snapshot;
7980 
7981 	/*
7982 	 * Only the top level trace array gets its snapshot allocated
7983 	 * from the kernel command line.
7984 	 */
7985 	allocate_snapshot = false;
7986 #endif
7987 	return 0;
7988 }
7989 
7990 static void free_trace_buffer(struct trace_buffer *buf)
7991 {
7992 	if (buf->buffer) {
7993 		ring_buffer_free(buf->buffer);
7994 		buf->buffer = NULL;
7995 		free_percpu(buf->data);
7996 		buf->data = NULL;
7997 	}
7998 }
7999 
8000 static void free_trace_buffers(struct trace_array *tr)
8001 {
8002 	if (!tr)
8003 		return;
8004 
8005 	free_trace_buffer(&tr->trace_buffer);
8006 
8007 #ifdef CONFIG_TRACER_MAX_TRACE
8008 	free_trace_buffer(&tr->max_buffer);
8009 #endif
8010 }
8011 
8012 static void init_trace_flags_index(struct trace_array *tr)
8013 {
8014 	int i;
8015 
8016 	/* Used by the trace options files */
8017 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8018 		tr->trace_flags_index[i] = i;
8019 }
8020 
8021 static void __update_tracer_options(struct trace_array *tr)
8022 {
8023 	struct tracer *t;
8024 
8025 	for (t = trace_types; t; t = t->next)
8026 		add_tracer_options(tr, t);
8027 }
8028 
8029 static void update_tracer_options(struct trace_array *tr)
8030 {
8031 	mutex_lock(&trace_types_lock);
8032 	__update_tracer_options(tr);
8033 	mutex_unlock(&trace_types_lock);
8034 }
8035 
8036 static int instance_mkdir(const char *name)
8037 {
8038 	struct trace_array *tr;
8039 	int ret;
8040 
8041 	mutex_lock(&event_mutex);
8042 	mutex_lock(&trace_types_lock);
8043 
8044 	ret = -EEXIST;
8045 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8046 		if (tr->name && strcmp(tr->name, name) == 0)
8047 			goto out_unlock;
8048 	}
8049 
8050 	ret = -ENOMEM;
8051 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8052 	if (!tr)
8053 		goto out_unlock;
8054 
8055 	tr->name = kstrdup(name, GFP_KERNEL);
8056 	if (!tr->name)
8057 		goto out_free_tr;
8058 
8059 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8060 		goto out_free_tr;
8061 
8062 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8063 
8064 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8065 
8066 	raw_spin_lock_init(&tr->start_lock);
8067 
8068 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8069 
8070 	tr->current_trace = &nop_trace;
8071 
8072 	INIT_LIST_HEAD(&tr->systems);
8073 	INIT_LIST_HEAD(&tr->events);
8074 	INIT_LIST_HEAD(&tr->hist_vars);
8075 
8076 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8077 		goto out_free_tr;
8078 
8079 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
8080 	if (!tr->dir)
8081 		goto out_free_tr;
8082 
8083 	ret = event_trace_add_tracer(tr->dir, tr);
8084 	if (ret) {
8085 		tracefs_remove_recursive(tr->dir);
8086 		goto out_free_tr;
8087 	}
8088 
8089 	ftrace_init_trace_array(tr);
8090 
8091 	init_tracer_tracefs(tr, tr->dir);
8092 	init_trace_flags_index(tr);
8093 	__update_tracer_options(tr);
8094 
8095 	list_add(&tr->list, &ftrace_trace_arrays);
8096 
8097 	mutex_unlock(&trace_types_lock);
8098 	mutex_unlock(&event_mutex);
8099 
8100 	return 0;
8101 
8102  out_free_tr:
8103 	free_trace_buffers(tr);
8104 	free_cpumask_var(tr->tracing_cpumask);
8105 	kfree(tr->name);
8106 	kfree(tr);
8107 
8108  out_unlock:
8109 	mutex_unlock(&trace_types_lock);
8110 	mutex_unlock(&event_mutex);
8111 
8112 	return ret;
8113 
8114 }
8115 
8116 static int instance_rmdir(const char *name)
8117 {
8118 	struct trace_array *tr;
8119 	int found = 0;
8120 	int ret;
8121 	int i;
8122 
8123 	mutex_lock(&event_mutex);
8124 	mutex_lock(&trace_types_lock);
8125 
8126 	ret = -ENODEV;
8127 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8128 		if (tr->name && strcmp(tr->name, name) == 0) {
8129 			found = 1;
8130 			break;
8131 		}
8132 	}
8133 	if (!found)
8134 		goto out_unlock;
8135 
8136 	ret = -EBUSY;
8137 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
8138 		goto out_unlock;
8139 
8140 	list_del(&tr->list);
8141 
8142 	/* Disable all the flags that were enabled coming in */
8143 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8144 		if ((1 << i) & ZEROED_TRACE_FLAGS)
8145 			set_tracer_flag(tr, 1 << i, 0);
8146 	}
8147 
8148 	tracing_set_nop(tr);
8149 	clear_ftrace_function_probes(tr);
8150 	event_trace_del_tracer(tr);
8151 	ftrace_clear_pids(tr);
8152 	ftrace_destroy_function_files(tr);
8153 	tracefs_remove_recursive(tr->dir);
8154 	free_trace_buffers(tr);
8155 
8156 	for (i = 0; i < tr->nr_topts; i++) {
8157 		kfree(tr->topts[i].topts);
8158 	}
8159 	kfree(tr->topts);
8160 
8161 	free_cpumask_var(tr->tracing_cpumask);
8162 	kfree(tr->name);
8163 	kfree(tr);
8164 
8165 	ret = 0;
8166 
8167  out_unlock:
8168 	mutex_unlock(&trace_types_lock);
8169 	mutex_unlock(&event_mutex);
8170 
8171 	return ret;
8172 }
8173 
8174 static __init void create_trace_instances(struct dentry *d_tracer)
8175 {
8176 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8177 							 instance_mkdir,
8178 							 instance_rmdir);
8179 	if (WARN_ON(!trace_instance_dir))
8180 		return;
8181 }
8182 
8183 static void
8184 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8185 {
8186 	struct trace_event_file *file;
8187 	int cpu;
8188 
8189 	trace_create_file("available_tracers", 0444, d_tracer,
8190 			tr, &show_traces_fops);
8191 
8192 	trace_create_file("current_tracer", 0644, d_tracer,
8193 			tr, &set_tracer_fops);
8194 
8195 	trace_create_file("tracing_cpumask", 0644, d_tracer,
8196 			  tr, &tracing_cpumask_fops);
8197 
8198 	trace_create_file("trace_options", 0644, d_tracer,
8199 			  tr, &tracing_iter_fops);
8200 
8201 	trace_create_file("trace", 0644, d_tracer,
8202 			  tr, &tracing_fops);
8203 
8204 	trace_create_file("trace_pipe", 0444, d_tracer,
8205 			  tr, &tracing_pipe_fops);
8206 
8207 	trace_create_file("buffer_size_kb", 0644, d_tracer,
8208 			  tr, &tracing_entries_fops);
8209 
8210 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8211 			  tr, &tracing_total_entries_fops);
8212 
8213 	trace_create_file("free_buffer", 0200, d_tracer,
8214 			  tr, &tracing_free_buffer_fops);
8215 
8216 	trace_create_file("trace_marker", 0220, d_tracer,
8217 			  tr, &tracing_mark_fops);
8218 
8219 	file = __find_event_file(tr, "ftrace", "print");
8220 	if (file && file->dir)
8221 		trace_create_file("trigger", 0644, file->dir, file,
8222 				  &event_trigger_fops);
8223 	tr->trace_marker_file = file;
8224 
8225 	trace_create_file("trace_marker_raw", 0220, d_tracer,
8226 			  tr, &tracing_mark_raw_fops);
8227 
8228 	trace_create_file("trace_clock", 0644, d_tracer, tr,
8229 			  &trace_clock_fops);
8230 
8231 	trace_create_file("tracing_on", 0644, d_tracer,
8232 			  tr, &rb_simple_fops);
8233 
8234 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8235 			  &trace_time_stamp_mode_fops);
8236 
8237 	tr->buffer_percent = 50;
8238 
8239 	trace_create_file("buffer_percent", 0444, d_tracer,
8240 			tr, &buffer_percent_fops);
8241 
8242 	create_trace_options_dir(tr);
8243 
8244 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8245 	trace_create_file("tracing_max_latency", 0644, d_tracer,
8246 			&tr->max_latency, &tracing_max_lat_fops);
8247 #endif
8248 
8249 	if (ftrace_create_function_files(tr, d_tracer))
8250 		WARN(1, "Could not allocate function filter files");
8251 
8252 #ifdef CONFIG_TRACER_SNAPSHOT
8253 	trace_create_file("snapshot", 0644, d_tracer,
8254 			  tr, &snapshot_fops);
8255 #endif
8256 
8257 	for_each_tracing_cpu(cpu)
8258 		tracing_init_tracefs_percpu(tr, cpu);
8259 
8260 	ftrace_init_tracefs(tr, d_tracer);
8261 }
8262 
8263 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8264 {
8265 	struct vfsmount *mnt;
8266 	struct file_system_type *type;
8267 
8268 	/*
8269 	 * To maintain backward compatibility for tools that mount
8270 	 * debugfs to get to the tracing facility, tracefs is automatically
8271 	 * mounted to the debugfs/tracing directory.
8272 	 */
8273 	type = get_fs_type("tracefs");
8274 	if (!type)
8275 		return NULL;
8276 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8277 	put_filesystem(type);
8278 	if (IS_ERR(mnt))
8279 		return NULL;
8280 	mntget(mnt);
8281 
8282 	return mnt;
8283 }
8284 
8285 /**
8286  * tracing_init_dentry - initialize top level trace array
8287  *
8288  * This is called when creating files or directories in the tracing
8289  * directory. It is called via fs_initcall() by any of the boot up code
8290  * and expects to return the dentry of the top level tracing directory.
8291  */
8292 struct dentry *tracing_init_dentry(void)
8293 {
8294 	struct trace_array *tr = &global_trace;
8295 
8296 	/* The top level trace array uses  NULL as parent */
8297 	if (tr->dir)
8298 		return NULL;
8299 
8300 	if (WARN_ON(!tracefs_initialized()) ||
8301 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
8302 		 WARN_ON(!debugfs_initialized())))
8303 		return ERR_PTR(-ENODEV);
8304 
8305 	/*
8306 	 * As there may still be users that expect the tracing
8307 	 * files to exist in debugfs/tracing, we must automount
8308 	 * the tracefs file system there, so older tools still
8309 	 * work with the newer kerenl.
8310 	 */
8311 	tr->dir = debugfs_create_automount("tracing", NULL,
8312 					   trace_automount, NULL);
8313 	if (!tr->dir) {
8314 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
8315 		return ERR_PTR(-ENOMEM);
8316 	}
8317 
8318 	return NULL;
8319 }
8320 
8321 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8322 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8323 
8324 static void __init trace_eval_init(void)
8325 {
8326 	int len;
8327 
8328 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8329 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8330 }
8331 
8332 #ifdef CONFIG_MODULES
8333 static void trace_module_add_evals(struct module *mod)
8334 {
8335 	if (!mod->num_trace_evals)
8336 		return;
8337 
8338 	/*
8339 	 * Modules with bad taint do not have events created, do
8340 	 * not bother with enums either.
8341 	 */
8342 	if (trace_module_has_bad_taint(mod))
8343 		return;
8344 
8345 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8346 }
8347 
8348 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8349 static void trace_module_remove_evals(struct module *mod)
8350 {
8351 	union trace_eval_map_item *map;
8352 	union trace_eval_map_item **last = &trace_eval_maps;
8353 
8354 	if (!mod->num_trace_evals)
8355 		return;
8356 
8357 	mutex_lock(&trace_eval_mutex);
8358 
8359 	map = trace_eval_maps;
8360 
8361 	while (map) {
8362 		if (map->head.mod == mod)
8363 			break;
8364 		map = trace_eval_jmp_to_tail(map);
8365 		last = &map->tail.next;
8366 		map = map->tail.next;
8367 	}
8368 	if (!map)
8369 		goto out;
8370 
8371 	*last = trace_eval_jmp_to_tail(map)->tail.next;
8372 	kfree(map);
8373  out:
8374 	mutex_unlock(&trace_eval_mutex);
8375 }
8376 #else
8377 static inline void trace_module_remove_evals(struct module *mod) { }
8378 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8379 
8380 static int trace_module_notify(struct notifier_block *self,
8381 			       unsigned long val, void *data)
8382 {
8383 	struct module *mod = data;
8384 
8385 	switch (val) {
8386 	case MODULE_STATE_COMING:
8387 		trace_module_add_evals(mod);
8388 		break;
8389 	case MODULE_STATE_GOING:
8390 		trace_module_remove_evals(mod);
8391 		break;
8392 	}
8393 
8394 	return 0;
8395 }
8396 
8397 static struct notifier_block trace_module_nb = {
8398 	.notifier_call = trace_module_notify,
8399 	.priority = 0,
8400 };
8401 #endif /* CONFIG_MODULES */
8402 
8403 static __init int tracer_init_tracefs(void)
8404 {
8405 	struct dentry *d_tracer;
8406 
8407 	trace_access_lock_init();
8408 
8409 	d_tracer = tracing_init_dentry();
8410 	if (IS_ERR(d_tracer))
8411 		return 0;
8412 
8413 	event_trace_init();
8414 
8415 	init_tracer_tracefs(&global_trace, d_tracer);
8416 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8417 
8418 	trace_create_file("tracing_thresh", 0644, d_tracer,
8419 			&global_trace, &tracing_thresh_fops);
8420 
8421 	trace_create_file("README", 0444, d_tracer,
8422 			NULL, &tracing_readme_fops);
8423 
8424 	trace_create_file("saved_cmdlines", 0444, d_tracer,
8425 			NULL, &tracing_saved_cmdlines_fops);
8426 
8427 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8428 			  NULL, &tracing_saved_cmdlines_size_fops);
8429 
8430 	trace_create_file("saved_tgids", 0444, d_tracer,
8431 			NULL, &tracing_saved_tgids_fops);
8432 
8433 	trace_eval_init();
8434 
8435 	trace_create_eval_file(d_tracer);
8436 
8437 #ifdef CONFIG_MODULES
8438 	register_module_notifier(&trace_module_nb);
8439 #endif
8440 
8441 #ifdef CONFIG_DYNAMIC_FTRACE
8442 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8443 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8444 #endif
8445 
8446 	create_trace_instances(d_tracer);
8447 
8448 	update_tracer_options(&global_trace);
8449 
8450 	return 0;
8451 }
8452 
8453 static int trace_panic_handler(struct notifier_block *this,
8454 			       unsigned long event, void *unused)
8455 {
8456 	if (ftrace_dump_on_oops)
8457 		ftrace_dump(ftrace_dump_on_oops);
8458 	return NOTIFY_OK;
8459 }
8460 
8461 static struct notifier_block trace_panic_notifier = {
8462 	.notifier_call  = trace_panic_handler,
8463 	.next           = NULL,
8464 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
8465 };
8466 
8467 static int trace_die_handler(struct notifier_block *self,
8468 			     unsigned long val,
8469 			     void *data)
8470 {
8471 	switch (val) {
8472 	case DIE_OOPS:
8473 		if (ftrace_dump_on_oops)
8474 			ftrace_dump(ftrace_dump_on_oops);
8475 		break;
8476 	default:
8477 		break;
8478 	}
8479 	return NOTIFY_OK;
8480 }
8481 
8482 static struct notifier_block trace_die_notifier = {
8483 	.notifier_call = trace_die_handler,
8484 	.priority = 200
8485 };
8486 
8487 /*
8488  * printk is set to max of 1024, we really don't need it that big.
8489  * Nothing should be printing 1000 characters anyway.
8490  */
8491 #define TRACE_MAX_PRINT		1000
8492 
8493 /*
8494  * Define here KERN_TRACE so that we have one place to modify
8495  * it if we decide to change what log level the ftrace dump
8496  * should be at.
8497  */
8498 #define KERN_TRACE		KERN_EMERG
8499 
8500 void
8501 trace_printk_seq(struct trace_seq *s)
8502 {
8503 	/* Probably should print a warning here. */
8504 	if (s->seq.len >= TRACE_MAX_PRINT)
8505 		s->seq.len = TRACE_MAX_PRINT;
8506 
8507 	/*
8508 	 * More paranoid code. Although the buffer size is set to
8509 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8510 	 * an extra layer of protection.
8511 	 */
8512 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8513 		s->seq.len = s->seq.size - 1;
8514 
8515 	/* should be zero ended, but we are paranoid. */
8516 	s->buffer[s->seq.len] = 0;
8517 
8518 	printk(KERN_TRACE "%s", s->buffer);
8519 
8520 	trace_seq_init(s);
8521 }
8522 
8523 void trace_init_global_iter(struct trace_iterator *iter)
8524 {
8525 	iter->tr = &global_trace;
8526 	iter->trace = iter->tr->current_trace;
8527 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
8528 	iter->trace_buffer = &global_trace.trace_buffer;
8529 
8530 	if (iter->trace && iter->trace->open)
8531 		iter->trace->open(iter);
8532 
8533 	/* Annotate start of buffers if we had overruns */
8534 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
8535 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
8536 
8537 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
8538 	if (trace_clocks[iter->tr->clock_id].in_ns)
8539 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8540 }
8541 
8542 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8543 {
8544 	/* use static because iter can be a bit big for the stack */
8545 	static struct trace_iterator iter;
8546 	static atomic_t dump_running;
8547 	struct trace_array *tr = &global_trace;
8548 	unsigned int old_userobj;
8549 	unsigned long flags;
8550 	int cnt = 0, cpu;
8551 
8552 	/* Only allow one dump user at a time. */
8553 	if (atomic_inc_return(&dump_running) != 1) {
8554 		atomic_dec(&dump_running);
8555 		return;
8556 	}
8557 
8558 	/*
8559 	 * Always turn off tracing when we dump.
8560 	 * We don't need to show trace output of what happens
8561 	 * between multiple crashes.
8562 	 *
8563 	 * If the user does a sysrq-z, then they can re-enable
8564 	 * tracing with echo 1 > tracing_on.
8565 	 */
8566 	tracing_off();
8567 
8568 	local_irq_save(flags);
8569 	printk_nmi_direct_enter();
8570 
8571 	/* Simulate the iterator */
8572 	trace_init_global_iter(&iter);
8573 
8574 	for_each_tracing_cpu(cpu) {
8575 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8576 	}
8577 
8578 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8579 
8580 	/* don't look at user memory in panic mode */
8581 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8582 
8583 	switch (oops_dump_mode) {
8584 	case DUMP_ALL:
8585 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8586 		break;
8587 	case DUMP_ORIG:
8588 		iter.cpu_file = raw_smp_processor_id();
8589 		break;
8590 	case DUMP_NONE:
8591 		goto out_enable;
8592 	default:
8593 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8594 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8595 	}
8596 
8597 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
8598 
8599 	/* Did function tracer already get disabled? */
8600 	if (ftrace_is_dead()) {
8601 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8602 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8603 	}
8604 
8605 	/*
8606 	 * We need to stop all tracing on all CPUS to read the
8607 	 * the next buffer. This is a bit expensive, but is
8608 	 * not done often. We fill all what we can read,
8609 	 * and then release the locks again.
8610 	 */
8611 
8612 	while (!trace_empty(&iter)) {
8613 
8614 		if (!cnt)
8615 			printk(KERN_TRACE "---------------------------------\n");
8616 
8617 		cnt++;
8618 
8619 		/* reset all but tr, trace, and overruns */
8620 		memset(&iter.seq, 0,
8621 		       sizeof(struct trace_iterator) -
8622 		       offsetof(struct trace_iterator, seq));
8623 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
8624 		iter.pos = -1;
8625 
8626 		if (trace_find_next_entry_inc(&iter) != NULL) {
8627 			int ret;
8628 
8629 			ret = print_trace_line(&iter);
8630 			if (ret != TRACE_TYPE_NO_CONSUME)
8631 				trace_consume(&iter);
8632 		}
8633 		touch_nmi_watchdog();
8634 
8635 		trace_printk_seq(&iter.seq);
8636 	}
8637 
8638 	if (!cnt)
8639 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
8640 	else
8641 		printk(KERN_TRACE "---------------------------------\n");
8642 
8643  out_enable:
8644 	tr->trace_flags |= old_userobj;
8645 
8646 	for_each_tracing_cpu(cpu) {
8647 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8648 	}
8649 	atomic_dec(&dump_running);
8650 	printk_nmi_direct_exit();
8651 	local_irq_restore(flags);
8652 }
8653 EXPORT_SYMBOL_GPL(ftrace_dump);
8654 
8655 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8656 {
8657 	char **argv;
8658 	int argc, ret;
8659 
8660 	argc = 0;
8661 	ret = 0;
8662 	argv = argv_split(GFP_KERNEL, buf, &argc);
8663 	if (!argv)
8664 		return -ENOMEM;
8665 
8666 	if (argc)
8667 		ret = createfn(argc, argv);
8668 
8669 	argv_free(argv);
8670 
8671 	return ret;
8672 }
8673 
8674 #define WRITE_BUFSIZE  4096
8675 
8676 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8677 				size_t count, loff_t *ppos,
8678 				int (*createfn)(int, char **))
8679 {
8680 	char *kbuf, *buf, *tmp;
8681 	int ret = 0;
8682 	size_t done = 0;
8683 	size_t size;
8684 
8685 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8686 	if (!kbuf)
8687 		return -ENOMEM;
8688 
8689 	while (done < count) {
8690 		size = count - done;
8691 
8692 		if (size >= WRITE_BUFSIZE)
8693 			size = WRITE_BUFSIZE - 1;
8694 
8695 		if (copy_from_user(kbuf, buffer + done, size)) {
8696 			ret = -EFAULT;
8697 			goto out;
8698 		}
8699 		kbuf[size] = '\0';
8700 		buf = kbuf;
8701 		do {
8702 			tmp = strchr(buf, '\n');
8703 			if (tmp) {
8704 				*tmp = '\0';
8705 				size = tmp - buf + 1;
8706 			} else {
8707 				size = strlen(buf);
8708 				if (done + size < count) {
8709 					if (buf != kbuf)
8710 						break;
8711 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8712 					pr_warn("Line length is too long: Should be less than %d\n",
8713 						WRITE_BUFSIZE - 2);
8714 					ret = -EINVAL;
8715 					goto out;
8716 				}
8717 			}
8718 			done += size;
8719 
8720 			/* Remove comments */
8721 			tmp = strchr(buf, '#');
8722 
8723 			if (tmp)
8724 				*tmp = '\0';
8725 
8726 			ret = trace_run_command(buf, createfn);
8727 			if (ret)
8728 				goto out;
8729 			buf += size;
8730 
8731 		} while (done < count);
8732 	}
8733 	ret = done;
8734 
8735 out:
8736 	kfree(kbuf);
8737 
8738 	return ret;
8739 }
8740 
8741 __init static int tracer_alloc_buffers(void)
8742 {
8743 	int ring_buf_size;
8744 	int ret = -ENOMEM;
8745 
8746 	/*
8747 	 * Make sure we don't accidently add more trace options
8748 	 * than we have bits for.
8749 	 */
8750 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8751 
8752 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8753 		goto out;
8754 
8755 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8756 		goto out_free_buffer_mask;
8757 
8758 	/* Only allocate trace_printk buffers if a trace_printk exists */
8759 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8760 		/* Must be called before global_trace.buffer is allocated */
8761 		trace_printk_init_buffers();
8762 
8763 	/* To save memory, keep the ring buffer size to its minimum */
8764 	if (ring_buffer_expanded)
8765 		ring_buf_size = trace_buf_size;
8766 	else
8767 		ring_buf_size = 1;
8768 
8769 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8770 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8771 
8772 	raw_spin_lock_init(&global_trace.start_lock);
8773 
8774 	/*
8775 	 * The prepare callbacks allocates some memory for the ring buffer. We
8776 	 * don't free the buffer if the if the CPU goes down. If we were to free
8777 	 * the buffer, then the user would lose any trace that was in the
8778 	 * buffer. The memory will be removed once the "instance" is removed.
8779 	 */
8780 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8781 				      "trace/RB:preapre", trace_rb_cpu_prepare,
8782 				      NULL);
8783 	if (ret < 0)
8784 		goto out_free_cpumask;
8785 	/* Used for event triggers */
8786 	ret = -ENOMEM;
8787 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8788 	if (!temp_buffer)
8789 		goto out_rm_hp_state;
8790 
8791 	if (trace_create_savedcmd() < 0)
8792 		goto out_free_temp_buffer;
8793 
8794 	/* TODO: make the number of buffers hot pluggable with CPUS */
8795 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8796 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8797 		WARN_ON(1);
8798 		goto out_free_savedcmd;
8799 	}
8800 
8801 	if (global_trace.buffer_disabled)
8802 		tracing_off();
8803 
8804 	if (trace_boot_clock) {
8805 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
8806 		if (ret < 0)
8807 			pr_warn("Trace clock %s not defined, going back to default\n",
8808 				trace_boot_clock);
8809 	}
8810 
8811 	/*
8812 	 * register_tracer() might reference current_trace, so it
8813 	 * needs to be set before we register anything. This is
8814 	 * just a bootstrap of current_trace anyway.
8815 	 */
8816 	global_trace.current_trace = &nop_trace;
8817 
8818 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8819 
8820 	ftrace_init_global_array_ops(&global_trace);
8821 
8822 	init_trace_flags_index(&global_trace);
8823 
8824 	register_tracer(&nop_trace);
8825 
8826 	/* Function tracing may start here (via kernel command line) */
8827 	init_function_trace();
8828 
8829 	/* All seems OK, enable tracing */
8830 	tracing_disabled = 0;
8831 
8832 	atomic_notifier_chain_register(&panic_notifier_list,
8833 				       &trace_panic_notifier);
8834 
8835 	register_die_notifier(&trace_die_notifier);
8836 
8837 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8838 
8839 	INIT_LIST_HEAD(&global_trace.systems);
8840 	INIT_LIST_HEAD(&global_trace.events);
8841 	INIT_LIST_HEAD(&global_trace.hist_vars);
8842 	list_add(&global_trace.list, &ftrace_trace_arrays);
8843 
8844 	apply_trace_boot_options();
8845 
8846 	register_snapshot_cmd();
8847 
8848 	return 0;
8849 
8850 out_free_savedcmd:
8851 	free_saved_cmdlines_buffer(savedcmd);
8852 out_free_temp_buffer:
8853 	ring_buffer_free(temp_buffer);
8854 out_rm_hp_state:
8855 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8856 out_free_cpumask:
8857 	free_cpumask_var(global_trace.tracing_cpumask);
8858 out_free_buffer_mask:
8859 	free_cpumask_var(tracing_buffer_mask);
8860 out:
8861 	return ret;
8862 }
8863 
8864 void __init early_trace_init(void)
8865 {
8866 	if (tracepoint_printk) {
8867 		tracepoint_print_iter =
8868 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8869 		if (WARN_ON(!tracepoint_print_iter))
8870 			tracepoint_printk = 0;
8871 		else
8872 			static_key_enable(&tracepoint_printk_key.key);
8873 	}
8874 	tracer_alloc_buffers();
8875 }
8876 
8877 void __init trace_init(void)
8878 {
8879 	trace_event_init();
8880 }
8881 
8882 __init static int clear_boot_tracer(void)
8883 {
8884 	/*
8885 	 * The default tracer at boot buffer is an init section.
8886 	 * This function is called in lateinit. If we did not
8887 	 * find the boot tracer, then clear it out, to prevent
8888 	 * later registration from accessing the buffer that is
8889 	 * about to be freed.
8890 	 */
8891 	if (!default_bootup_tracer)
8892 		return 0;
8893 
8894 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8895 	       default_bootup_tracer);
8896 	default_bootup_tracer = NULL;
8897 
8898 	return 0;
8899 }
8900 
8901 fs_initcall(tracer_init_tracefs);
8902 late_initcall_sync(clear_boot_tracer);
8903 
8904 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8905 __init static int tracing_set_default_clock(void)
8906 {
8907 	/* sched_clock_stable() is determined in late_initcall */
8908 	if (!trace_boot_clock && !sched_clock_stable()) {
8909 		printk(KERN_WARNING
8910 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
8911 		       "If you want to keep using the local clock, then add:\n"
8912 		       "  \"trace_clock=local\"\n"
8913 		       "on the kernel command line\n");
8914 		tracing_set_clock(&global_trace, "global");
8915 	}
8916 
8917 	return 0;
8918 }
8919 late_initcall_sync(tracing_set_default_clock);
8920 #endif
8921