xref: /openbmc/linux/kernel/trace/trace.c (revision 9a8f3203)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/seq_file.h>
21 #include <linux/notifier.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/poll.h>
42 #include <linux/nmi.h>
43 #include <linux/fs.h>
44 #include <linux/trace.h>
45 #include <linux/sched/clock.h>
46 #include <linux/sched/rt.h>
47 
48 #include "trace.h"
49 #include "trace_output.h"
50 
51 /*
52  * On boot up, the ring buffer is set to the minimum size, so that
53  * we do not waste memory on systems that are not using tracing.
54  */
55 bool ring_buffer_expanded;
56 
57 /*
58  * We need to change this state when a selftest is running.
59  * A selftest will lurk into the ring-buffer to count the
60  * entries inserted during the selftest although some concurrent
61  * insertions into the ring-buffer such as trace_printk could occurred
62  * at the same time, giving false positive or negative results.
63  */
64 static bool __read_mostly tracing_selftest_running;
65 
66 /*
67  * If a tracer is running, we do not want to run SELFTEST.
68  */
69 bool __read_mostly tracing_selftest_disabled;
70 
71 /* Pipe tracepoints to printk */
72 struct trace_iterator *tracepoint_print_iter;
73 int tracepoint_printk;
74 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
75 
76 /* For tracers that don't implement custom flags */
77 static struct tracer_opt dummy_tracer_opt[] = {
78 	{ }
79 };
80 
81 static int
82 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
83 {
84 	return 0;
85 }
86 
87 /*
88  * To prevent the comm cache from being overwritten when no
89  * tracing is active, only save the comm when a trace event
90  * occurred.
91  */
92 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
93 
94 /*
95  * Kill all tracing for good (never come back).
96  * It is initialized to 1 but will turn to zero if the initialization
97  * of the tracer is successful. But that is the only place that sets
98  * this back to zero.
99  */
100 static int tracing_disabled = 1;
101 
102 cpumask_var_t __read_mostly	tracing_buffer_mask;
103 
104 /*
105  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106  *
107  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108  * is set, then ftrace_dump is called. This will output the contents
109  * of the ftrace buffers to the console.  This is very useful for
110  * capturing traces that lead to crashes and outputing it to a
111  * serial console.
112  *
113  * It is default off, but you can enable it with either specifying
114  * "ftrace_dump_on_oops" in the kernel command line, or setting
115  * /proc/sys/kernel/ftrace_dump_on_oops
116  * Set 1 if you want to dump buffers of all CPUs
117  * Set 2 if you want to dump the buffer of the CPU that triggered oops
118  */
119 
120 enum ftrace_dump_mode ftrace_dump_on_oops;
121 
122 /* When set, tracing will stop when a WARN*() is hit */
123 int __disable_trace_on_warning;
124 
125 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
126 /* Map of enums to their values, for "eval_map" file */
127 struct trace_eval_map_head {
128 	struct module			*mod;
129 	unsigned long			length;
130 };
131 
132 union trace_eval_map_item;
133 
134 struct trace_eval_map_tail {
135 	/*
136 	 * "end" is first and points to NULL as it must be different
137 	 * than "mod" or "eval_string"
138 	 */
139 	union trace_eval_map_item	*next;
140 	const char			*end;	/* points to NULL */
141 };
142 
143 static DEFINE_MUTEX(trace_eval_mutex);
144 
145 /*
146  * The trace_eval_maps are saved in an array with two extra elements,
147  * one at the beginning, and one at the end. The beginning item contains
148  * the count of the saved maps (head.length), and the module they
149  * belong to if not built in (head.mod). The ending item contains a
150  * pointer to the next array of saved eval_map items.
151  */
152 union trace_eval_map_item {
153 	struct trace_eval_map		map;
154 	struct trace_eval_map_head	head;
155 	struct trace_eval_map_tail	tail;
156 };
157 
158 static union trace_eval_map_item *trace_eval_maps;
159 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160 
161 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
162 
163 #define MAX_TRACER_SIZE		100
164 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
165 static char *default_bootup_tracer;
166 
167 static bool allocate_snapshot;
168 
169 static int __init set_cmdline_ftrace(char *str)
170 {
171 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
172 	default_bootup_tracer = bootup_tracer_buf;
173 	/* We are using ftrace early, expand it */
174 	ring_buffer_expanded = true;
175 	return 1;
176 }
177 __setup("ftrace=", set_cmdline_ftrace);
178 
179 static int __init set_ftrace_dump_on_oops(char *str)
180 {
181 	if (*str++ != '=' || !*str) {
182 		ftrace_dump_on_oops = DUMP_ALL;
183 		return 1;
184 	}
185 
186 	if (!strcmp("orig_cpu", str)) {
187 		ftrace_dump_on_oops = DUMP_ORIG;
188                 return 1;
189         }
190 
191         return 0;
192 }
193 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
194 
195 static int __init stop_trace_on_warning(char *str)
196 {
197 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
198 		__disable_trace_on_warning = 1;
199 	return 1;
200 }
201 __setup("traceoff_on_warning", stop_trace_on_warning);
202 
203 static int __init boot_alloc_snapshot(char *str)
204 {
205 	allocate_snapshot = true;
206 	/* We also need the main ring buffer expanded */
207 	ring_buffer_expanded = true;
208 	return 1;
209 }
210 __setup("alloc_snapshot", boot_alloc_snapshot);
211 
212 
213 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
214 
215 static int __init set_trace_boot_options(char *str)
216 {
217 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
218 	return 0;
219 }
220 __setup("trace_options=", set_trace_boot_options);
221 
222 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
223 static char *trace_boot_clock __initdata;
224 
225 static int __init set_trace_boot_clock(char *str)
226 {
227 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
228 	trace_boot_clock = trace_boot_clock_buf;
229 	return 0;
230 }
231 __setup("trace_clock=", set_trace_boot_clock);
232 
233 static int __init set_tracepoint_printk(char *str)
234 {
235 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
236 		tracepoint_printk = 1;
237 	return 1;
238 }
239 __setup("tp_printk", set_tracepoint_printk);
240 
241 unsigned long long ns2usecs(u64 nsec)
242 {
243 	nsec += 500;
244 	do_div(nsec, 1000);
245 	return nsec;
246 }
247 
248 /* trace_flags holds trace_options default values */
249 #define TRACE_DEFAULT_FLAGS						\
250 	(FUNCTION_DEFAULT_FLAGS |					\
251 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
252 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
253 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
254 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
255 
256 /* trace_options that are only supported by global_trace */
257 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
258 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
259 
260 /* trace_flags that are default zero for instances */
261 #define ZEROED_TRACE_FLAGS \
262 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
263 
264 /*
265  * The global_trace is the descriptor that holds the top-level tracing
266  * buffers for the live tracing.
267  */
268 static struct trace_array global_trace = {
269 	.trace_flags = TRACE_DEFAULT_FLAGS,
270 };
271 
272 LIST_HEAD(ftrace_trace_arrays);
273 
274 int trace_array_get(struct trace_array *this_tr)
275 {
276 	struct trace_array *tr;
277 	int ret = -ENODEV;
278 
279 	mutex_lock(&trace_types_lock);
280 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
281 		if (tr == this_tr) {
282 			tr->ref++;
283 			ret = 0;
284 			break;
285 		}
286 	}
287 	mutex_unlock(&trace_types_lock);
288 
289 	return ret;
290 }
291 
292 static void __trace_array_put(struct trace_array *this_tr)
293 {
294 	WARN_ON(!this_tr->ref);
295 	this_tr->ref--;
296 }
297 
298 void trace_array_put(struct trace_array *this_tr)
299 {
300 	mutex_lock(&trace_types_lock);
301 	__trace_array_put(this_tr);
302 	mutex_unlock(&trace_types_lock);
303 }
304 
305 int call_filter_check_discard(struct trace_event_call *call, void *rec,
306 			      struct ring_buffer *buffer,
307 			      struct ring_buffer_event *event)
308 {
309 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
310 	    !filter_match_preds(call->filter, rec)) {
311 		__trace_event_discard_commit(buffer, event);
312 		return 1;
313 	}
314 
315 	return 0;
316 }
317 
318 void trace_free_pid_list(struct trace_pid_list *pid_list)
319 {
320 	vfree(pid_list->pids);
321 	kfree(pid_list);
322 }
323 
324 /**
325  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
326  * @filtered_pids: The list of pids to check
327  * @search_pid: The PID to find in @filtered_pids
328  *
329  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
330  */
331 bool
332 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
333 {
334 	/*
335 	 * If pid_max changed after filtered_pids was created, we
336 	 * by default ignore all pids greater than the previous pid_max.
337 	 */
338 	if (search_pid >= filtered_pids->pid_max)
339 		return false;
340 
341 	return test_bit(search_pid, filtered_pids->pids);
342 }
343 
344 /**
345  * trace_ignore_this_task - should a task be ignored for tracing
346  * @filtered_pids: The list of pids to check
347  * @task: The task that should be ignored if not filtered
348  *
349  * Checks if @task should be traced or not from @filtered_pids.
350  * Returns true if @task should *NOT* be traced.
351  * Returns false if @task should be traced.
352  */
353 bool
354 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
355 {
356 	/*
357 	 * Return false, because if filtered_pids does not exist,
358 	 * all pids are good to trace.
359 	 */
360 	if (!filtered_pids)
361 		return false;
362 
363 	return !trace_find_filtered_pid(filtered_pids, task->pid);
364 }
365 
366 /**
367  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
368  * @pid_list: The list to modify
369  * @self: The current task for fork or NULL for exit
370  * @task: The task to add or remove
371  *
372  * If adding a task, if @self is defined, the task is only added if @self
373  * is also included in @pid_list. This happens on fork and tasks should
374  * only be added when the parent is listed. If @self is NULL, then the
375  * @task pid will be removed from the list, which would happen on exit
376  * of a task.
377  */
378 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
379 				  struct task_struct *self,
380 				  struct task_struct *task)
381 {
382 	if (!pid_list)
383 		return;
384 
385 	/* For forks, we only add if the forking task is listed */
386 	if (self) {
387 		if (!trace_find_filtered_pid(pid_list, self->pid))
388 			return;
389 	}
390 
391 	/* Sorry, but we don't support pid_max changing after setting */
392 	if (task->pid >= pid_list->pid_max)
393 		return;
394 
395 	/* "self" is set for forks, and NULL for exits */
396 	if (self)
397 		set_bit(task->pid, pid_list->pids);
398 	else
399 		clear_bit(task->pid, pid_list->pids);
400 }
401 
402 /**
403  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
404  * @pid_list: The pid list to show
405  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
406  * @pos: The position of the file
407  *
408  * This is used by the seq_file "next" operation to iterate the pids
409  * listed in a trace_pid_list structure.
410  *
411  * Returns the pid+1 as we want to display pid of zero, but NULL would
412  * stop the iteration.
413  */
414 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
415 {
416 	unsigned long pid = (unsigned long)v;
417 
418 	(*pos)++;
419 
420 	/* pid already is +1 of the actual prevous bit */
421 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
422 
423 	/* Return pid + 1 to allow zero to be represented */
424 	if (pid < pid_list->pid_max)
425 		return (void *)(pid + 1);
426 
427 	return NULL;
428 }
429 
430 /**
431  * trace_pid_start - Used for seq_file to start reading pid lists
432  * @pid_list: The pid list to show
433  * @pos: The position of the file
434  *
435  * This is used by seq_file "start" operation to start the iteration
436  * of listing pids.
437  *
438  * Returns the pid+1 as we want to display pid of zero, but NULL would
439  * stop the iteration.
440  */
441 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
442 {
443 	unsigned long pid;
444 	loff_t l = 0;
445 
446 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
447 	if (pid >= pid_list->pid_max)
448 		return NULL;
449 
450 	/* Return pid + 1 so that zero can be the exit value */
451 	for (pid++; pid && l < *pos;
452 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
453 		;
454 	return (void *)pid;
455 }
456 
457 /**
458  * trace_pid_show - show the current pid in seq_file processing
459  * @m: The seq_file structure to write into
460  * @v: A void pointer of the pid (+1) value to display
461  *
462  * Can be directly used by seq_file operations to display the current
463  * pid value.
464  */
465 int trace_pid_show(struct seq_file *m, void *v)
466 {
467 	unsigned long pid = (unsigned long)v - 1;
468 
469 	seq_printf(m, "%lu\n", pid);
470 	return 0;
471 }
472 
473 /* 128 should be much more than enough */
474 #define PID_BUF_SIZE		127
475 
476 int trace_pid_write(struct trace_pid_list *filtered_pids,
477 		    struct trace_pid_list **new_pid_list,
478 		    const char __user *ubuf, size_t cnt)
479 {
480 	struct trace_pid_list *pid_list;
481 	struct trace_parser parser;
482 	unsigned long val;
483 	int nr_pids = 0;
484 	ssize_t read = 0;
485 	ssize_t ret = 0;
486 	loff_t pos;
487 	pid_t pid;
488 
489 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
490 		return -ENOMEM;
491 
492 	/*
493 	 * Always recreate a new array. The write is an all or nothing
494 	 * operation. Always create a new array when adding new pids by
495 	 * the user. If the operation fails, then the current list is
496 	 * not modified.
497 	 */
498 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
499 	if (!pid_list)
500 		return -ENOMEM;
501 
502 	pid_list->pid_max = READ_ONCE(pid_max);
503 
504 	/* Only truncating will shrink pid_max */
505 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
506 		pid_list->pid_max = filtered_pids->pid_max;
507 
508 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
509 	if (!pid_list->pids) {
510 		kfree(pid_list);
511 		return -ENOMEM;
512 	}
513 
514 	if (filtered_pids) {
515 		/* copy the current bits to the new max */
516 		for_each_set_bit(pid, filtered_pids->pids,
517 				 filtered_pids->pid_max) {
518 			set_bit(pid, pid_list->pids);
519 			nr_pids++;
520 		}
521 	}
522 
523 	while (cnt > 0) {
524 
525 		pos = 0;
526 
527 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
528 		if (ret < 0 || !trace_parser_loaded(&parser))
529 			break;
530 
531 		read += ret;
532 		ubuf += ret;
533 		cnt -= ret;
534 
535 		ret = -EINVAL;
536 		if (kstrtoul(parser.buffer, 0, &val))
537 			break;
538 		if (val >= pid_list->pid_max)
539 			break;
540 
541 		pid = (pid_t)val;
542 
543 		set_bit(pid, pid_list->pids);
544 		nr_pids++;
545 
546 		trace_parser_clear(&parser);
547 		ret = 0;
548 	}
549 	trace_parser_put(&parser);
550 
551 	if (ret < 0) {
552 		trace_free_pid_list(pid_list);
553 		return ret;
554 	}
555 
556 	if (!nr_pids) {
557 		/* Cleared the list of pids */
558 		trace_free_pid_list(pid_list);
559 		read = ret;
560 		pid_list = NULL;
561 	}
562 
563 	*new_pid_list = pid_list;
564 
565 	return read;
566 }
567 
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570 	u64 ts;
571 
572 	/* Early boot up does not have a buffer yet */
573 	if (!buf->buffer)
574 		return trace_clock_local();
575 
576 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
577 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578 
579 	return ts;
580 }
581 
582 u64 ftrace_now(int cpu)
583 {
584 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586 
587 /**
588  * tracing_is_enabled - Show if global_trace has been disabled
589  *
590  * Shows if the global trace has been enabled or not. It uses the
591  * mirror flag "buffer_disabled" to be used in fast paths such as for
592  * the irqsoff tracer. But it may be inaccurate due to races. If you
593  * need to know the accurate state, use tracing_is_on() which is a little
594  * slower, but accurate.
595  */
596 int tracing_is_enabled(void)
597 {
598 	/*
599 	 * For quick access (irqsoff uses this in fast path), just
600 	 * return the mirror variable of the state of the ring buffer.
601 	 * It's a little racy, but we don't really care.
602 	 */
603 	smp_rmb();
604 	return !global_trace.buffer_disabled;
605 }
606 
607 /*
608  * trace_buf_size is the size in bytes that is allocated
609  * for a buffer. Note, the number of bytes is always rounded
610  * to page size.
611  *
612  * This number is purposely set to a low number of 16384.
613  * If the dump on oops happens, it will be much appreciated
614  * to not have to wait for all that output. Anyway this can be
615  * boot time and run time configurable.
616  */
617 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
618 
619 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620 
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer		*trace_types __read_mostly;
623 
624 /*
625  * trace_types_lock is used to protect the trace_types list.
626  */
627 DEFINE_MUTEX(trace_types_lock);
628 
629 /*
630  * serialize the access of the ring buffer
631  *
632  * ring buffer serializes readers, but it is low level protection.
633  * The validity of the events (which returns by ring_buffer_peek() ..etc)
634  * are not protected by ring buffer.
635  *
636  * The content of events may become garbage if we allow other process consumes
637  * these events concurrently:
638  *   A) the page of the consumed events may become a normal page
639  *      (not reader page) in ring buffer, and this page will be rewrited
640  *      by events producer.
641  *   B) The page of the consumed events may become a page for splice_read,
642  *      and this page will be returned to system.
643  *
644  * These primitives allow multi process access to different cpu ring buffer
645  * concurrently.
646  *
647  * These primitives don't distinguish read-only and read-consume access.
648  * Multi read-only access are also serialized.
649  */
650 
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654 
655 static inline void trace_access_lock(int cpu)
656 {
657 	if (cpu == RING_BUFFER_ALL_CPUS) {
658 		/* gain it for accessing the whole ring buffer. */
659 		down_write(&all_cpu_access_lock);
660 	} else {
661 		/* gain it for accessing a cpu ring buffer. */
662 
663 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664 		down_read(&all_cpu_access_lock);
665 
666 		/* Secondly block other access to this @cpu ring buffer. */
667 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
668 	}
669 }
670 
671 static inline void trace_access_unlock(int cpu)
672 {
673 	if (cpu == RING_BUFFER_ALL_CPUS) {
674 		up_write(&all_cpu_access_lock);
675 	} else {
676 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677 		up_read(&all_cpu_access_lock);
678 	}
679 }
680 
681 static inline void trace_access_lock_init(void)
682 {
683 	int cpu;
684 
685 	for_each_possible_cpu(cpu)
686 		mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688 
689 #else
690 
691 static DEFINE_MUTEX(access_lock);
692 
693 static inline void trace_access_lock(int cpu)
694 {
695 	(void)cpu;
696 	mutex_lock(&access_lock);
697 }
698 
699 static inline void trace_access_unlock(int cpu)
700 {
701 	(void)cpu;
702 	mutex_unlock(&access_lock);
703 }
704 
705 static inline void trace_access_lock_init(void)
706 {
707 }
708 
709 #endif
710 
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713 				 unsigned long flags,
714 				 int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716 				      struct ring_buffer *buffer,
717 				      unsigned long flags,
718 				      int skip, int pc, struct pt_regs *regs);
719 
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722 					unsigned long flags,
723 					int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727 				      struct ring_buffer *buffer,
728 				      unsigned long flags,
729 				      int skip, int pc, struct pt_regs *regs)
730 {
731 }
732 
733 #endif
734 
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737 		  int type, unsigned long flags, int pc)
738 {
739 	struct trace_entry *ent = ring_buffer_event_data(event);
740 
741 	tracing_generic_entry_update(ent, flags, pc);
742 	ent->type = type;
743 }
744 
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747 			  int type,
748 			  unsigned long len,
749 			  unsigned long flags, int pc)
750 {
751 	struct ring_buffer_event *event;
752 
753 	event = ring_buffer_lock_reserve(buffer, len);
754 	if (event != NULL)
755 		trace_event_setup(event, type, flags, pc);
756 
757 	return event;
758 }
759 
760 void tracer_tracing_on(struct trace_array *tr)
761 {
762 	if (tr->trace_buffer.buffer)
763 		ring_buffer_record_on(tr->trace_buffer.buffer);
764 	/*
765 	 * This flag is looked at when buffers haven't been allocated
766 	 * yet, or by some tracers (like irqsoff), that just want to
767 	 * know if the ring buffer has been disabled, but it can handle
768 	 * races of where it gets disabled but we still do a record.
769 	 * As the check is in the fast path of the tracers, it is more
770 	 * important to be fast than accurate.
771 	 */
772 	tr->buffer_disabled = 0;
773 	/* Make the flag seen by readers */
774 	smp_wmb();
775 }
776 
777 /**
778  * tracing_on - enable tracing buffers
779  *
780  * This function enables tracing buffers that may have been
781  * disabled with tracing_off.
782  */
783 void tracing_on(void)
784 {
785 	tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788 
789 
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793 	__this_cpu_write(trace_taskinfo_save, true);
794 
795 	/* If this is the temp buffer, we need to commit fully */
796 	if (this_cpu_read(trace_buffered_event) == event) {
797 		/* Length is in event->array[0] */
798 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
799 		/* Release the temp buffer */
800 		this_cpu_dec(trace_buffered_event_cnt);
801 	} else
802 		ring_buffer_unlock_commit(buffer, event);
803 }
804 
805 /**
806  * __trace_puts - write a constant string into the trace buffer.
807  * @ip:	   The address of the caller
808  * @str:   The constant string to write
809  * @size:  The size of the string.
810  */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813 	struct ring_buffer_event *event;
814 	struct ring_buffer *buffer;
815 	struct print_entry *entry;
816 	unsigned long irq_flags;
817 	int alloc;
818 	int pc;
819 
820 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821 		return 0;
822 
823 	pc = preempt_count();
824 
825 	if (unlikely(tracing_selftest_running || tracing_disabled))
826 		return 0;
827 
828 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
829 
830 	local_save_flags(irq_flags);
831 	buffer = global_trace.trace_buffer.buffer;
832 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
833 					    irq_flags, pc);
834 	if (!event)
835 		return 0;
836 
837 	entry = ring_buffer_event_data(event);
838 	entry->ip = ip;
839 
840 	memcpy(&entry->buf, str, size);
841 
842 	/* Add a newline if necessary */
843 	if (entry->buf[size - 1] != '\n') {
844 		entry->buf[size] = '\n';
845 		entry->buf[size + 1] = '\0';
846 	} else
847 		entry->buf[size] = '\0';
848 
849 	__buffer_unlock_commit(buffer, event);
850 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851 
852 	return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855 
856 /**
857  * __trace_bputs - write the pointer to a constant string into trace buffer
858  * @ip:	   The address of the caller
859  * @str:   The constant string to write to the buffer to
860  */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863 	struct ring_buffer_event *event;
864 	struct ring_buffer *buffer;
865 	struct bputs_entry *entry;
866 	unsigned long irq_flags;
867 	int size = sizeof(struct bputs_entry);
868 	int pc;
869 
870 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871 		return 0;
872 
873 	pc = preempt_count();
874 
875 	if (unlikely(tracing_selftest_running || tracing_disabled))
876 		return 0;
877 
878 	local_save_flags(irq_flags);
879 	buffer = global_trace.trace_buffer.buffer;
880 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881 					    irq_flags, pc);
882 	if (!event)
883 		return 0;
884 
885 	entry = ring_buffer_event_data(event);
886 	entry->ip			= ip;
887 	entry->str			= str;
888 
889 	__buffer_unlock_commit(buffer, event);
890 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891 
892 	return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895 
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
898 {
899 	struct tracer *tracer = tr->current_trace;
900 	unsigned long flags;
901 
902 	if (in_nmi()) {
903 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
904 		internal_trace_puts("*** snapshot is being ignored        ***\n");
905 		return;
906 	}
907 
908 	if (!tr->allocated_snapshot) {
909 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
910 		internal_trace_puts("*** stopping trace here!   ***\n");
911 		tracing_off();
912 		return;
913 	}
914 
915 	/* Note, snapshot can not be used when the tracer uses it */
916 	if (tracer->use_max_tr) {
917 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
918 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
919 		return;
920 	}
921 
922 	local_irq_save(flags);
923 	update_max_tr(tr, current, smp_processor_id(), cond_data);
924 	local_irq_restore(flags);
925 }
926 
927 void tracing_snapshot_instance(struct trace_array *tr)
928 {
929 	tracing_snapshot_instance_cond(tr, NULL);
930 }
931 
932 /**
933  * tracing_snapshot - take a snapshot of the current buffer.
934  *
935  * This causes a swap between the snapshot buffer and the current live
936  * tracing buffer. You can use this to take snapshots of the live
937  * trace when some condition is triggered, but continue to trace.
938  *
939  * Note, make sure to allocate the snapshot with either
940  * a tracing_snapshot_alloc(), or by doing it manually
941  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
942  *
943  * If the snapshot buffer is not allocated, it will stop tracing.
944  * Basically making a permanent snapshot.
945  */
946 void tracing_snapshot(void)
947 {
948 	struct trace_array *tr = &global_trace;
949 
950 	tracing_snapshot_instance(tr);
951 }
952 EXPORT_SYMBOL_GPL(tracing_snapshot);
953 
954 /**
955  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
956  * @tr:		The tracing instance to snapshot
957  * @cond_data:	The data to be tested conditionally, and possibly saved
958  *
959  * This is the same as tracing_snapshot() except that the snapshot is
960  * conditional - the snapshot will only happen if the
961  * cond_snapshot.update() implementation receiving the cond_data
962  * returns true, which means that the trace array's cond_snapshot
963  * update() operation used the cond_data to determine whether the
964  * snapshot should be taken, and if it was, presumably saved it along
965  * with the snapshot.
966  */
967 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
968 {
969 	tracing_snapshot_instance_cond(tr, cond_data);
970 }
971 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
972 
973 /**
974  * tracing_snapshot_cond_data - get the user data associated with a snapshot
975  * @tr:		The tracing instance
976  *
977  * When the user enables a conditional snapshot using
978  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
979  * with the snapshot.  This accessor is used to retrieve it.
980  *
981  * Should not be called from cond_snapshot.update(), since it takes
982  * the tr->max_lock lock, which the code calling
983  * cond_snapshot.update() has already done.
984  *
985  * Returns the cond_data associated with the trace array's snapshot.
986  */
987 void *tracing_cond_snapshot_data(struct trace_array *tr)
988 {
989 	void *cond_data = NULL;
990 
991 	arch_spin_lock(&tr->max_lock);
992 
993 	if (tr->cond_snapshot)
994 		cond_data = tr->cond_snapshot->cond_data;
995 
996 	arch_spin_unlock(&tr->max_lock);
997 
998 	return cond_data;
999 }
1000 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1001 
1002 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1003 					struct trace_buffer *size_buf, int cpu_id);
1004 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1005 
1006 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1007 {
1008 	int ret;
1009 
1010 	if (!tr->allocated_snapshot) {
1011 
1012 		/* allocate spare buffer */
1013 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1014 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1015 		if (ret < 0)
1016 			return ret;
1017 
1018 		tr->allocated_snapshot = true;
1019 	}
1020 
1021 	return 0;
1022 }
1023 
1024 static void free_snapshot(struct trace_array *tr)
1025 {
1026 	/*
1027 	 * We don't free the ring buffer. instead, resize it because
1028 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1029 	 * we want preserve it.
1030 	 */
1031 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1032 	set_buffer_entries(&tr->max_buffer, 1);
1033 	tracing_reset_online_cpus(&tr->max_buffer);
1034 	tr->allocated_snapshot = false;
1035 }
1036 
1037 /**
1038  * tracing_alloc_snapshot - allocate snapshot buffer.
1039  *
1040  * This only allocates the snapshot buffer if it isn't already
1041  * allocated - it doesn't also take a snapshot.
1042  *
1043  * This is meant to be used in cases where the snapshot buffer needs
1044  * to be set up for events that can't sleep but need to be able to
1045  * trigger a snapshot.
1046  */
1047 int tracing_alloc_snapshot(void)
1048 {
1049 	struct trace_array *tr = &global_trace;
1050 	int ret;
1051 
1052 	ret = tracing_alloc_snapshot_instance(tr);
1053 	WARN_ON(ret < 0);
1054 
1055 	return ret;
1056 }
1057 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1058 
1059 /**
1060  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1061  *
1062  * This is similar to tracing_snapshot(), but it will allocate the
1063  * snapshot buffer if it isn't already allocated. Use this only
1064  * where it is safe to sleep, as the allocation may sleep.
1065  *
1066  * This causes a swap between the snapshot buffer and the current live
1067  * tracing buffer. You can use this to take snapshots of the live
1068  * trace when some condition is triggered, but continue to trace.
1069  */
1070 void tracing_snapshot_alloc(void)
1071 {
1072 	int ret;
1073 
1074 	ret = tracing_alloc_snapshot();
1075 	if (ret < 0)
1076 		return;
1077 
1078 	tracing_snapshot();
1079 }
1080 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1081 
1082 /**
1083  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1084  * @tr:		The tracing instance
1085  * @cond_data:	User data to associate with the snapshot
1086  * @update:	Implementation of the cond_snapshot update function
1087  *
1088  * Check whether the conditional snapshot for the given instance has
1089  * already been enabled, or if the current tracer is already using a
1090  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1091  * save the cond_data and update function inside.
1092  *
1093  * Returns 0 if successful, error otherwise.
1094  */
1095 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1096 				 cond_update_fn_t update)
1097 {
1098 	struct cond_snapshot *cond_snapshot;
1099 	int ret = 0;
1100 
1101 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1102 	if (!cond_snapshot)
1103 		return -ENOMEM;
1104 
1105 	cond_snapshot->cond_data = cond_data;
1106 	cond_snapshot->update = update;
1107 
1108 	mutex_lock(&trace_types_lock);
1109 
1110 	ret = tracing_alloc_snapshot_instance(tr);
1111 	if (ret)
1112 		goto fail_unlock;
1113 
1114 	if (tr->current_trace->use_max_tr) {
1115 		ret = -EBUSY;
1116 		goto fail_unlock;
1117 	}
1118 
1119 	/*
1120 	 * The cond_snapshot can only change to NULL without the
1121 	 * trace_types_lock. We don't care if we race with it going
1122 	 * to NULL, but we want to make sure that it's not set to
1123 	 * something other than NULL when we get here, which we can
1124 	 * do safely with only holding the trace_types_lock and not
1125 	 * having to take the max_lock.
1126 	 */
1127 	if (tr->cond_snapshot) {
1128 		ret = -EBUSY;
1129 		goto fail_unlock;
1130 	}
1131 
1132 	arch_spin_lock(&tr->max_lock);
1133 	tr->cond_snapshot = cond_snapshot;
1134 	arch_spin_unlock(&tr->max_lock);
1135 
1136 	mutex_unlock(&trace_types_lock);
1137 
1138 	return ret;
1139 
1140  fail_unlock:
1141 	mutex_unlock(&trace_types_lock);
1142 	kfree(cond_snapshot);
1143 	return ret;
1144 }
1145 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1146 
1147 /**
1148  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1149  * @tr:		The tracing instance
1150  *
1151  * Check whether the conditional snapshot for the given instance is
1152  * enabled; if so, free the cond_snapshot associated with it,
1153  * otherwise return -EINVAL.
1154  *
1155  * Returns 0 if successful, error otherwise.
1156  */
1157 int tracing_snapshot_cond_disable(struct trace_array *tr)
1158 {
1159 	int ret = 0;
1160 
1161 	arch_spin_lock(&tr->max_lock);
1162 
1163 	if (!tr->cond_snapshot)
1164 		ret = -EINVAL;
1165 	else {
1166 		kfree(tr->cond_snapshot);
1167 		tr->cond_snapshot = NULL;
1168 	}
1169 
1170 	arch_spin_unlock(&tr->max_lock);
1171 
1172 	return ret;
1173 }
1174 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1175 #else
1176 void tracing_snapshot(void)
1177 {
1178 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1179 }
1180 EXPORT_SYMBOL_GPL(tracing_snapshot);
1181 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1182 {
1183 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1184 }
1185 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1186 int tracing_alloc_snapshot(void)
1187 {
1188 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1189 	return -ENODEV;
1190 }
1191 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1192 void tracing_snapshot_alloc(void)
1193 {
1194 	/* Give warning */
1195 	tracing_snapshot();
1196 }
1197 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1198 void *tracing_cond_snapshot_data(struct trace_array *tr)
1199 {
1200 	return NULL;
1201 }
1202 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1203 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1204 {
1205 	return -ENODEV;
1206 }
1207 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1208 int tracing_snapshot_cond_disable(struct trace_array *tr)
1209 {
1210 	return false;
1211 }
1212 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1213 #endif /* CONFIG_TRACER_SNAPSHOT */
1214 
1215 void tracer_tracing_off(struct trace_array *tr)
1216 {
1217 	if (tr->trace_buffer.buffer)
1218 		ring_buffer_record_off(tr->trace_buffer.buffer);
1219 	/*
1220 	 * This flag is looked at when buffers haven't been allocated
1221 	 * yet, or by some tracers (like irqsoff), that just want to
1222 	 * know if the ring buffer has been disabled, but it can handle
1223 	 * races of where it gets disabled but we still do a record.
1224 	 * As the check is in the fast path of the tracers, it is more
1225 	 * important to be fast than accurate.
1226 	 */
1227 	tr->buffer_disabled = 1;
1228 	/* Make the flag seen by readers */
1229 	smp_wmb();
1230 }
1231 
1232 /**
1233  * tracing_off - turn off tracing buffers
1234  *
1235  * This function stops the tracing buffers from recording data.
1236  * It does not disable any overhead the tracers themselves may
1237  * be causing. This function simply causes all recording to
1238  * the ring buffers to fail.
1239  */
1240 void tracing_off(void)
1241 {
1242 	tracer_tracing_off(&global_trace);
1243 }
1244 EXPORT_SYMBOL_GPL(tracing_off);
1245 
1246 void disable_trace_on_warning(void)
1247 {
1248 	if (__disable_trace_on_warning)
1249 		tracing_off();
1250 }
1251 
1252 /**
1253  * tracer_tracing_is_on - show real state of ring buffer enabled
1254  * @tr : the trace array to know if ring buffer is enabled
1255  *
1256  * Shows real state of the ring buffer if it is enabled or not.
1257  */
1258 bool tracer_tracing_is_on(struct trace_array *tr)
1259 {
1260 	if (tr->trace_buffer.buffer)
1261 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1262 	return !tr->buffer_disabled;
1263 }
1264 
1265 /**
1266  * tracing_is_on - show state of ring buffers enabled
1267  */
1268 int tracing_is_on(void)
1269 {
1270 	return tracer_tracing_is_on(&global_trace);
1271 }
1272 EXPORT_SYMBOL_GPL(tracing_is_on);
1273 
1274 static int __init set_buf_size(char *str)
1275 {
1276 	unsigned long buf_size;
1277 
1278 	if (!str)
1279 		return 0;
1280 	buf_size = memparse(str, &str);
1281 	/* nr_entries can not be zero */
1282 	if (buf_size == 0)
1283 		return 0;
1284 	trace_buf_size = buf_size;
1285 	return 1;
1286 }
1287 __setup("trace_buf_size=", set_buf_size);
1288 
1289 static int __init set_tracing_thresh(char *str)
1290 {
1291 	unsigned long threshold;
1292 	int ret;
1293 
1294 	if (!str)
1295 		return 0;
1296 	ret = kstrtoul(str, 0, &threshold);
1297 	if (ret < 0)
1298 		return 0;
1299 	tracing_thresh = threshold * 1000;
1300 	return 1;
1301 }
1302 __setup("tracing_thresh=", set_tracing_thresh);
1303 
1304 unsigned long nsecs_to_usecs(unsigned long nsecs)
1305 {
1306 	return nsecs / 1000;
1307 }
1308 
1309 /*
1310  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1311  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1312  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1313  * of strings in the order that the evals (enum) were defined.
1314  */
1315 #undef C
1316 #define C(a, b) b
1317 
1318 /* These must match the bit postions in trace_iterator_flags */
1319 static const char *trace_options[] = {
1320 	TRACE_FLAGS
1321 	NULL
1322 };
1323 
1324 static struct {
1325 	u64 (*func)(void);
1326 	const char *name;
1327 	int in_ns;		/* is this clock in nanoseconds? */
1328 } trace_clocks[] = {
1329 	{ trace_clock_local,		"local",	1 },
1330 	{ trace_clock_global,		"global",	1 },
1331 	{ trace_clock_counter,		"counter",	0 },
1332 	{ trace_clock_jiffies,		"uptime",	0 },
1333 	{ trace_clock,			"perf",		1 },
1334 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1335 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1336 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1337 	ARCH_TRACE_CLOCKS
1338 };
1339 
1340 bool trace_clock_in_ns(struct trace_array *tr)
1341 {
1342 	if (trace_clocks[tr->clock_id].in_ns)
1343 		return true;
1344 
1345 	return false;
1346 }
1347 
1348 /*
1349  * trace_parser_get_init - gets the buffer for trace parser
1350  */
1351 int trace_parser_get_init(struct trace_parser *parser, int size)
1352 {
1353 	memset(parser, 0, sizeof(*parser));
1354 
1355 	parser->buffer = kmalloc(size, GFP_KERNEL);
1356 	if (!parser->buffer)
1357 		return 1;
1358 
1359 	parser->size = size;
1360 	return 0;
1361 }
1362 
1363 /*
1364  * trace_parser_put - frees the buffer for trace parser
1365  */
1366 void trace_parser_put(struct trace_parser *parser)
1367 {
1368 	kfree(parser->buffer);
1369 	parser->buffer = NULL;
1370 }
1371 
1372 /*
1373  * trace_get_user - reads the user input string separated by  space
1374  * (matched by isspace(ch))
1375  *
1376  * For each string found the 'struct trace_parser' is updated,
1377  * and the function returns.
1378  *
1379  * Returns number of bytes read.
1380  *
1381  * See kernel/trace/trace.h for 'struct trace_parser' details.
1382  */
1383 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1384 	size_t cnt, loff_t *ppos)
1385 {
1386 	char ch;
1387 	size_t read = 0;
1388 	ssize_t ret;
1389 
1390 	if (!*ppos)
1391 		trace_parser_clear(parser);
1392 
1393 	ret = get_user(ch, ubuf++);
1394 	if (ret)
1395 		goto out;
1396 
1397 	read++;
1398 	cnt--;
1399 
1400 	/*
1401 	 * The parser is not finished with the last write,
1402 	 * continue reading the user input without skipping spaces.
1403 	 */
1404 	if (!parser->cont) {
1405 		/* skip white space */
1406 		while (cnt && isspace(ch)) {
1407 			ret = get_user(ch, ubuf++);
1408 			if (ret)
1409 				goto out;
1410 			read++;
1411 			cnt--;
1412 		}
1413 
1414 		parser->idx = 0;
1415 
1416 		/* only spaces were written */
1417 		if (isspace(ch) || !ch) {
1418 			*ppos += read;
1419 			ret = read;
1420 			goto out;
1421 		}
1422 	}
1423 
1424 	/* read the non-space input */
1425 	while (cnt && !isspace(ch) && ch) {
1426 		if (parser->idx < parser->size - 1)
1427 			parser->buffer[parser->idx++] = ch;
1428 		else {
1429 			ret = -EINVAL;
1430 			goto out;
1431 		}
1432 		ret = get_user(ch, ubuf++);
1433 		if (ret)
1434 			goto out;
1435 		read++;
1436 		cnt--;
1437 	}
1438 
1439 	/* We either got finished input or we have to wait for another call. */
1440 	if (isspace(ch) || !ch) {
1441 		parser->buffer[parser->idx] = 0;
1442 		parser->cont = false;
1443 	} else if (parser->idx < parser->size - 1) {
1444 		parser->cont = true;
1445 		parser->buffer[parser->idx++] = ch;
1446 		/* Make sure the parsed string always terminates with '\0'. */
1447 		parser->buffer[parser->idx] = 0;
1448 	} else {
1449 		ret = -EINVAL;
1450 		goto out;
1451 	}
1452 
1453 	*ppos += read;
1454 	ret = read;
1455 
1456 out:
1457 	return ret;
1458 }
1459 
1460 /* TODO add a seq_buf_to_buffer() */
1461 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1462 {
1463 	int len;
1464 
1465 	if (trace_seq_used(s) <= s->seq.readpos)
1466 		return -EBUSY;
1467 
1468 	len = trace_seq_used(s) - s->seq.readpos;
1469 	if (cnt > len)
1470 		cnt = len;
1471 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1472 
1473 	s->seq.readpos += cnt;
1474 	return cnt;
1475 }
1476 
1477 unsigned long __read_mostly	tracing_thresh;
1478 
1479 #ifdef CONFIG_TRACER_MAX_TRACE
1480 /*
1481  * Copy the new maximum trace into the separate maximum-trace
1482  * structure. (this way the maximum trace is permanently saved,
1483  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1484  */
1485 static void
1486 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1487 {
1488 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1489 	struct trace_buffer *max_buf = &tr->max_buffer;
1490 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1491 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1492 
1493 	max_buf->cpu = cpu;
1494 	max_buf->time_start = data->preempt_timestamp;
1495 
1496 	max_data->saved_latency = tr->max_latency;
1497 	max_data->critical_start = data->critical_start;
1498 	max_data->critical_end = data->critical_end;
1499 
1500 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1501 	max_data->pid = tsk->pid;
1502 	/*
1503 	 * If tsk == current, then use current_uid(), as that does not use
1504 	 * RCU. The irq tracer can be called out of RCU scope.
1505 	 */
1506 	if (tsk == current)
1507 		max_data->uid = current_uid();
1508 	else
1509 		max_data->uid = task_uid(tsk);
1510 
1511 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1512 	max_data->policy = tsk->policy;
1513 	max_data->rt_priority = tsk->rt_priority;
1514 
1515 	/* record this tasks comm */
1516 	tracing_record_cmdline(tsk);
1517 }
1518 
1519 /**
1520  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1521  * @tr: tracer
1522  * @tsk: the task with the latency
1523  * @cpu: The cpu that initiated the trace.
1524  * @cond_data: User data associated with a conditional snapshot
1525  *
1526  * Flip the buffers between the @tr and the max_tr and record information
1527  * about which task was the cause of this latency.
1528  */
1529 void
1530 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1531 	      void *cond_data)
1532 {
1533 	if (tr->stop_count)
1534 		return;
1535 
1536 	WARN_ON_ONCE(!irqs_disabled());
1537 
1538 	if (!tr->allocated_snapshot) {
1539 		/* Only the nop tracer should hit this when disabling */
1540 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1541 		return;
1542 	}
1543 
1544 	arch_spin_lock(&tr->max_lock);
1545 
1546 	/* Inherit the recordable setting from trace_buffer */
1547 	if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1548 		ring_buffer_record_on(tr->max_buffer.buffer);
1549 	else
1550 		ring_buffer_record_off(tr->max_buffer.buffer);
1551 
1552 #ifdef CONFIG_TRACER_SNAPSHOT
1553 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1554 		goto out_unlock;
1555 #endif
1556 	swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1557 
1558 	__update_max_tr(tr, tsk, cpu);
1559 
1560  out_unlock:
1561 	arch_spin_unlock(&tr->max_lock);
1562 }
1563 
1564 /**
1565  * update_max_tr_single - only copy one trace over, and reset the rest
1566  * @tr - tracer
1567  * @tsk - task with the latency
1568  * @cpu - the cpu of the buffer to copy.
1569  *
1570  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1571  */
1572 void
1573 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1574 {
1575 	int ret;
1576 
1577 	if (tr->stop_count)
1578 		return;
1579 
1580 	WARN_ON_ONCE(!irqs_disabled());
1581 	if (!tr->allocated_snapshot) {
1582 		/* Only the nop tracer should hit this when disabling */
1583 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1584 		return;
1585 	}
1586 
1587 	arch_spin_lock(&tr->max_lock);
1588 
1589 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1590 
1591 	if (ret == -EBUSY) {
1592 		/*
1593 		 * We failed to swap the buffer due to a commit taking
1594 		 * place on this CPU. We fail to record, but we reset
1595 		 * the max trace buffer (no one writes directly to it)
1596 		 * and flag that it failed.
1597 		 */
1598 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1599 			"Failed to swap buffers due to commit in progress\n");
1600 	}
1601 
1602 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1603 
1604 	__update_max_tr(tr, tsk, cpu);
1605 	arch_spin_unlock(&tr->max_lock);
1606 }
1607 #endif /* CONFIG_TRACER_MAX_TRACE */
1608 
1609 static int wait_on_pipe(struct trace_iterator *iter, int full)
1610 {
1611 	/* Iterators are static, they should be filled or empty */
1612 	if (trace_buffer_iter(iter, iter->cpu_file))
1613 		return 0;
1614 
1615 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1616 				full);
1617 }
1618 
1619 #ifdef CONFIG_FTRACE_STARTUP_TEST
1620 static bool selftests_can_run;
1621 
1622 struct trace_selftests {
1623 	struct list_head		list;
1624 	struct tracer			*type;
1625 };
1626 
1627 static LIST_HEAD(postponed_selftests);
1628 
1629 static int save_selftest(struct tracer *type)
1630 {
1631 	struct trace_selftests *selftest;
1632 
1633 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1634 	if (!selftest)
1635 		return -ENOMEM;
1636 
1637 	selftest->type = type;
1638 	list_add(&selftest->list, &postponed_selftests);
1639 	return 0;
1640 }
1641 
1642 static int run_tracer_selftest(struct tracer *type)
1643 {
1644 	struct trace_array *tr = &global_trace;
1645 	struct tracer *saved_tracer = tr->current_trace;
1646 	int ret;
1647 
1648 	if (!type->selftest || tracing_selftest_disabled)
1649 		return 0;
1650 
1651 	/*
1652 	 * If a tracer registers early in boot up (before scheduling is
1653 	 * initialized and such), then do not run its selftests yet.
1654 	 * Instead, run it a little later in the boot process.
1655 	 */
1656 	if (!selftests_can_run)
1657 		return save_selftest(type);
1658 
1659 	/*
1660 	 * Run a selftest on this tracer.
1661 	 * Here we reset the trace buffer, and set the current
1662 	 * tracer to be this tracer. The tracer can then run some
1663 	 * internal tracing to verify that everything is in order.
1664 	 * If we fail, we do not register this tracer.
1665 	 */
1666 	tracing_reset_online_cpus(&tr->trace_buffer);
1667 
1668 	tr->current_trace = type;
1669 
1670 #ifdef CONFIG_TRACER_MAX_TRACE
1671 	if (type->use_max_tr) {
1672 		/* If we expanded the buffers, make sure the max is expanded too */
1673 		if (ring_buffer_expanded)
1674 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1675 					   RING_BUFFER_ALL_CPUS);
1676 		tr->allocated_snapshot = true;
1677 	}
1678 #endif
1679 
1680 	/* the test is responsible for initializing and enabling */
1681 	pr_info("Testing tracer %s: ", type->name);
1682 	ret = type->selftest(type, tr);
1683 	/* the test is responsible for resetting too */
1684 	tr->current_trace = saved_tracer;
1685 	if (ret) {
1686 		printk(KERN_CONT "FAILED!\n");
1687 		/* Add the warning after printing 'FAILED' */
1688 		WARN_ON(1);
1689 		return -1;
1690 	}
1691 	/* Only reset on passing, to avoid touching corrupted buffers */
1692 	tracing_reset_online_cpus(&tr->trace_buffer);
1693 
1694 #ifdef CONFIG_TRACER_MAX_TRACE
1695 	if (type->use_max_tr) {
1696 		tr->allocated_snapshot = false;
1697 
1698 		/* Shrink the max buffer again */
1699 		if (ring_buffer_expanded)
1700 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1701 					   RING_BUFFER_ALL_CPUS);
1702 	}
1703 #endif
1704 
1705 	printk(KERN_CONT "PASSED\n");
1706 	return 0;
1707 }
1708 
1709 static __init int init_trace_selftests(void)
1710 {
1711 	struct trace_selftests *p, *n;
1712 	struct tracer *t, **last;
1713 	int ret;
1714 
1715 	selftests_can_run = true;
1716 
1717 	mutex_lock(&trace_types_lock);
1718 
1719 	if (list_empty(&postponed_selftests))
1720 		goto out;
1721 
1722 	pr_info("Running postponed tracer tests:\n");
1723 
1724 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1725 		ret = run_tracer_selftest(p->type);
1726 		/* If the test fails, then warn and remove from available_tracers */
1727 		if (ret < 0) {
1728 			WARN(1, "tracer: %s failed selftest, disabling\n",
1729 			     p->type->name);
1730 			last = &trace_types;
1731 			for (t = trace_types; t; t = t->next) {
1732 				if (t == p->type) {
1733 					*last = t->next;
1734 					break;
1735 				}
1736 				last = &t->next;
1737 			}
1738 		}
1739 		list_del(&p->list);
1740 		kfree(p);
1741 	}
1742 
1743  out:
1744 	mutex_unlock(&trace_types_lock);
1745 
1746 	return 0;
1747 }
1748 core_initcall(init_trace_selftests);
1749 #else
1750 static inline int run_tracer_selftest(struct tracer *type)
1751 {
1752 	return 0;
1753 }
1754 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1755 
1756 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1757 
1758 static void __init apply_trace_boot_options(void);
1759 
1760 /**
1761  * register_tracer - register a tracer with the ftrace system.
1762  * @type - the plugin for the tracer
1763  *
1764  * Register a new plugin tracer.
1765  */
1766 int __init register_tracer(struct tracer *type)
1767 {
1768 	struct tracer *t;
1769 	int ret = 0;
1770 
1771 	if (!type->name) {
1772 		pr_info("Tracer must have a name\n");
1773 		return -1;
1774 	}
1775 
1776 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1777 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1778 		return -1;
1779 	}
1780 
1781 	mutex_lock(&trace_types_lock);
1782 
1783 	tracing_selftest_running = true;
1784 
1785 	for (t = trace_types; t; t = t->next) {
1786 		if (strcmp(type->name, t->name) == 0) {
1787 			/* already found */
1788 			pr_info("Tracer %s already registered\n",
1789 				type->name);
1790 			ret = -1;
1791 			goto out;
1792 		}
1793 	}
1794 
1795 	if (!type->set_flag)
1796 		type->set_flag = &dummy_set_flag;
1797 	if (!type->flags) {
1798 		/*allocate a dummy tracer_flags*/
1799 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1800 		if (!type->flags) {
1801 			ret = -ENOMEM;
1802 			goto out;
1803 		}
1804 		type->flags->val = 0;
1805 		type->flags->opts = dummy_tracer_opt;
1806 	} else
1807 		if (!type->flags->opts)
1808 			type->flags->opts = dummy_tracer_opt;
1809 
1810 	/* store the tracer for __set_tracer_option */
1811 	type->flags->trace = type;
1812 
1813 	ret = run_tracer_selftest(type);
1814 	if (ret < 0)
1815 		goto out;
1816 
1817 	type->next = trace_types;
1818 	trace_types = type;
1819 	add_tracer_options(&global_trace, type);
1820 
1821  out:
1822 	tracing_selftest_running = false;
1823 	mutex_unlock(&trace_types_lock);
1824 
1825 	if (ret || !default_bootup_tracer)
1826 		goto out_unlock;
1827 
1828 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1829 		goto out_unlock;
1830 
1831 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1832 	/* Do we want this tracer to start on bootup? */
1833 	tracing_set_tracer(&global_trace, type->name);
1834 	default_bootup_tracer = NULL;
1835 
1836 	apply_trace_boot_options();
1837 
1838 	/* disable other selftests, since this will break it. */
1839 	tracing_selftest_disabled = true;
1840 #ifdef CONFIG_FTRACE_STARTUP_TEST
1841 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1842 	       type->name);
1843 #endif
1844 
1845  out_unlock:
1846 	return ret;
1847 }
1848 
1849 void tracing_reset(struct trace_buffer *buf, int cpu)
1850 {
1851 	struct ring_buffer *buffer = buf->buffer;
1852 
1853 	if (!buffer)
1854 		return;
1855 
1856 	ring_buffer_record_disable(buffer);
1857 
1858 	/* Make sure all commits have finished */
1859 	synchronize_rcu();
1860 	ring_buffer_reset_cpu(buffer, cpu);
1861 
1862 	ring_buffer_record_enable(buffer);
1863 }
1864 
1865 void tracing_reset_online_cpus(struct trace_buffer *buf)
1866 {
1867 	struct ring_buffer *buffer = buf->buffer;
1868 	int cpu;
1869 
1870 	if (!buffer)
1871 		return;
1872 
1873 	ring_buffer_record_disable(buffer);
1874 
1875 	/* Make sure all commits have finished */
1876 	synchronize_rcu();
1877 
1878 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1879 
1880 	for_each_online_cpu(cpu)
1881 		ring_buffer_reset_cpu(buffer, cpu);
1882 
1883 	ring_buffer_record_enable(buffer);
1884 }
1885 
1886 /* Must have trace_types_lock held */
1887 void tracing_reset_all_online_cpus(void)
1888 {
1889 	struct trace_array *tr;
1890 
1891 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1892 		if (!tr->clear_trace)
1893 			continue;
1894 		tr->clear_trace = false;
1895 		tracing_reset_online_cpus(&tr->trace_buffer);
1896 #ifdef CONFIG_TRACER_MAX_TRACE
1897 		tracing_reset_online_cpus(&tr->max_buffer);
1898 #endif
1899 	}
1900 }
1901 
1902 static int *tgid_map;
1903 
1904 #define SAVED_CMDLINES_DEFAULT 128
1905 #define NO_CMDLINE_MAP UINT_MAX
1906 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1907 struct saved_cmdlines_buffer {
1908 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1909 	unsigned *map_cmdline_to_pid;
1910 	unsigned cmdline_num;
1911 	int cmdline_idx;
1912 	char *saved_cmdlines;
1913 };
1914 static struct saved_cmdlines_buffer *savedcmd;
1915 
1916 /* temporary disable recording */
1917 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1918 
1919 static inline char *get_saved_cmdlines(int idx)
1920 {
1921 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1922 }
1923 
1924 static inline void set_cmdline(int idx, const char *cmdline)
1925 {
1926 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1927 }
1928 
1929 static int allocate_cmdlines_buffer(unsigned int val,
1930 				    struct saved_cmdlines_buffer *s)
1931 {
1932 	s->map_cmdline_to_pid = kmalloc_array(val,
1933 					      sizeof(*s->map_cmdline_to_pid),
1934 					      GFP_KERNEL);
1935 	if (!s->map_cmdline_to_pid)
1936 		return -ENOMEM;
1937 
1938 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1939 	if (!s->saved_cmdlines) {
1940 		kfree(s->map_cmdline_to_pid);
1941 		return -ENOMEM;
1942 	}
1943 
1944 	s->cmdline_idx = 0;
1945 	s->cmdline_num = val;
1946 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1947 	       sizeof(s->map_pid_to_cmdline));
1948 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1949 	       val * sizeof(*s->map_cmdline_to_pid));
1950 
1951 	return 0;
1952 }
1953 
1954 static int trace_create_savedcmd(void)
1955 {
1956 	int ret;
1957 
1958 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1959 	if (!savedcmd)
1960 		return -ENOMEM;
1961 
1962 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1963 	if (ret < 0) {
1964 		kfree(savedcmd);
1965 		savedcmd = NULL;
1966 		return -ENOMEM;
1967 	}
1968 
1969 	return 0;
1970 }
1971 
1972 int is_tracing_stopped(void)
1973 {
1974 	return global_trace.stop_count;
1975 }
1976 
1977 /**
1978  * tracing_start - quick start of the tracer
1979  *
1980  * If tracing is enabled but was stopped by tracing_stop,
1981  * this will start the tracer back up.
1982  */
1983 void tracing_start(void)
1984 {
1985 	struct ring_buffer *buffer;
1986 	unsigned long flags;
1987 
1988 	if (tracing_disabled)
1989 		return;
1990 
1991 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1992 	if (--global_trace.stop_count) {
1993 		if (global_trace.stop_count < 0) {
1994 			/* Someone screwed up their debugging */
1995 			WARN_ON_ONCE(1);
1996 			global_trace.stop_count = 0;
1997 		}
1998 		goto out;
1999 	}
2000 
2001 	/* Prevent the buffers from switching */
2002 	arch_spin_lock(&global_trace.max_lock);
2003 
2004 	buffer = global_trace.trace_buffer.buffer;
2005 	if (buffer)
2006 		ring_buffer_record_enable(buffer);
2007 
2008 #ifdef CONFIG_TRACER_MAX_TRACE
2009 	buffer = global_trace.max_buffer.buffer;
2010 	if (buffer)
2011 		ring_buffer_record_enable(buffer);
2012 #endif
2013 
2014 	arch_spin_unlock(&global_trace.max_lock);
2015 
2016  out:
2017 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2018 }
2019 
2020 static void tracing_start_tr(struct trace_array *tr)
2021 {
2022 	struct ring_buffer *buffer;
2023 	unsigned long flags;
2024 
2025 	if (tracing_disabled)
2026 		return;
2027 
2028 	/* If global, we need to also start the max tracer */
2029 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2030 		return tracing_start();
2031 
2032 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2033 
2034 	if (--tr->stop_count) {
2035 		if (tr->stop_count < 0) {
2036 			/* Someone screwed up their debugging */
2037 			WARN_ON_ONCE(1);
2038 			tr->stop_count = 0;
2039 		}
2040 		goto out;
2041 	}
2042 
2043 	buffer = tr->trace_buffer.buffer;
2044 	if (buffer)
2045 		ring_buffer_record_enable(buffer);
2046 
2047  out:
2048 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2049 }
2050 
2051 /**
2052  * tracing_stop - quick stop of the tracer
2053  *
2054  * Light weight way to stop tracing. Use in conjunction with
2055  * tracing_start.
2056  */
2057 void tracing_stop(void)
2058 {
2059 	struct ring_buffer *buffer;
2060 	unsigned long flags;
2061 
2062 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2063 	if (global_trace.stop_count++)
2064 		goto out;
2065 
2066 	/* Prevent the buffers from switching */
2067 	arch_spin_lock(&global_trace.max_lock);
2068 
2069 	buffer = global_trace.trace_buffer.buffer;
2070 	if (buffer)
2071 		ring_buffer_record_disable(buffer);
2072 
2073 #ifdef CONFIG_TRACER_MAX_TRACE
2074 	buffer = global_trace.max_buffer.buffer;
2075 	if (buffer)
2076 		ring_buffer_record_disable(buffer);
2077 #endif
2078 
2079 	arch_spin_unlock(&global_trace.max_lock);
2080 
2081  out:
2082 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2083 }
2084 
2085 static void tracing_stop_tr(struct trace_array *tr)
2086 {
2087 	struct ring_buffer *buffer;
2088 	unsigned long flags;
2089 
2090 	/* If global, we need to also stop the max tracer */
2091 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2092 		return tracing_stop();
2093 
2094 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2095 	if (tr->stop_count++)
2096 		goto out;
2097 
2098 	buffer = tr->trace_buffer.buffer;
2099 	if (buffer)
2100 		ring_buffer_record_disable(buffer);
2101 
2102  out:
2103 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2104 }
2105 
2106 static int trace_save_cmdline(struct task_struct *tsk)
2107 {
2108 	unsigned pid, idx;
2109 
2110 	/* treat recording of idle task as a success */
2111 	if (!tsk->pid)
2112 		return 1;
2113 
2114 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2115 		return 0;
2116 
2117 	/*
2118 	 * It's not the end of the world if we don't get
2119 	 * the lock, but we also don't want to spin
2120 	 * nor do we want to disable interrupts,
2121 	 * so if we miss here, then better luck next time.
2122 	 */
2123 	if (!arch_spin_trylock(&trace_cmdline_lock))
2124 		return 0;
2125 
2126 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2127 	if (idx == NO_CMDLINE_MAP) {
2128 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2129 
2130 		/*
2131 		 * Check whether the cmdline buffer at idx has a pid
2132 		 * mapped. We are going to overwrite that entry so we
2133 		 * need to clear the map_pid_to_cmdline. Otherwise we
2134 		 * would read the new comm for the old pid.
2135 		 */
2136 		pid = savedcmd->map_cmdline_to_pid[idx];
2137 		if (pid != NO_CMDLINE_MAP)
2138 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2139 
2140 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2141 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2142 
2143 		savedcmd->cmdline_idx = idx;
2144 	}
2145 
2146 	set_cmdline(idx, tsk->comm);
2147 
2148 	arch_spin_unlock(&trace_cmdline_lock);
2149 
2150 	return 1;
2151 }
2152 
2153 static void __trace_find_cmdline(int pid, char comm[])
2154 {
2155 	unsigned map;
2156 
2157 	if (!pid) {
2158 		strcpy(comm, "<idle>");
2159 		return;
2160 	}
2161 
2162 	if (WARN_ON_ONCE(pid < 0)) {
2163 		strcpy(comm, "<XXX>");
2164 		return;
2165 	}
2166 
2167 	if (pid > PID_MAX_DEFAULT) {
2168 		strcpy(comm, "<...>");
2169 		return;
2170 	}
2171 
2172 	map = savedcmd->map_pid_to_cmdline[pid];
2173 	if (map != NO_CMDLINE_MAP)
2174 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2175 	else
2176 		strcpy(comm, "<...>");
2177 }
2178 
2179 void trace_find_cmdline(int pid, char comm[])
2180 {
2181 	preempt_disable();
2182 	arch_spin_lock(&trace_cmdline_lock);
2183 
2184 	__trace_find_cmdline(pid, comm);
2185 
2186 	arch_spin_unlock(&trace_cmdline_lock);
2187 	preempt_enable();
2188 }
2189 
2190 int trace_find_tgid(int pid)
2191 {
2192 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2193 		return 0;
2194 
2195 	return tgid_map[pid];
2196 }
2197 
2198 static int trace_save_tgid(struct task_struct *tsk)
2199 {
2200 	/* treat recording of idle task as a success */
2201 	if (!tsk->pid)
2202 		return 1;
2203 
2204 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2205 		return 0;
2206 
2207 	tgid_map[tsk->pid] = tsk->tgid;
2208 	return 1;
2209 }
2210 
2211 static bool tracing_record_taskinfo_skip(int flags)
2212 {
2213 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2214 		return true;
2215 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2216 		return true;
2217 	if (!__this_cpu_read(trace_taskinfo_save))
2218 		return true;
2219 	return false;
2220 }
2221 
2222 /**
2223  * tracing_record_taskinfo - record the task info of a task
2224  *
2225  * @task  - task to record
2226  * @flags - TRACE_RECORD_CMDLINE for recording comm
2227  *        - TRACE_RECORD_TGID for recording tgid
2228  */
2229 void tracing_record_taskinfo(struct task_struct *task, int flags)
2230 {
2231 	bool done;
2232 
2233 	if (tracing_record_taskinfo_skip(flags))
2234 		return;
2235 
2236 	/*
2237 	 * Record as much task information as possible. If some fail, continue
2238 	 * to try to record the others.
2239 	 */
2240 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2241 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2242 
2243 	/* If recording any information failed, retry again soon. */
2244 	if (!done)
2245 		return;
2246 
2247 	__this_cpu_write(trace_taskinfo_save, false);
2248 }
2249 
2250 /**
2251  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2252  *
2253  * @prev - previous task during sched_switch
2254  * @next - next task during sched_switch
2255  * @flags - TRACE_RECORD_CMDLINE for recording comm
2256  *          TRACE_RECORD_TGID for recording tgid
2257  */
2258 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2259 					  struct task_struct *next, int flags)
2260 {
2261 	bool done;
2262 
2263 	if (tracing_record_taskinfo_skip(flags))
2264 		return;
2265 
2266 	/*
2267 	 * Record as much task information as possible. If some fail, continue
2268 	 * to try to record the others.
2269 	 */
2270 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2271 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2272 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2273 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2274 
2275 	/* If recording any information failed, retry again soon. */
2276 	if (!done)
2277 		return;
2278 
2279 	__this_cpu_write(trace_taskinfo_save, false);
2280 }
2281 
2282 /* Helpers to record a specific task information */
2283 void tracing_record_cmdline(struct task_struct *task)
2284 {
2285 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2286 }
2287 
2288 void tracing_record_tgid(struct task_struct *task)
2289 {
2290 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2291 }
2292 
2293 /*
2294  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2295  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2296  * simplifies those functions and keeps them in sync.
2297  */
2298 enum print_line_t trace_handle_return(struct trace_seq *s)
2299 {
2300 	return trace_seq_has_overflowed(s) ?
2301 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2302 }
2303 EXPORT_SYMBOL_GPL(trace_handle_return);
2304 
2305 void
2306 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2307 			     int pc)
2308 {
2309 	struct task_struct *tsk = current;
2310 
2311 	entry->preempt_count		= pc & 0xff;
2312 	entry->pid			= (tsk) ? tsk->pid : 0;
2313 	entry->flags =
2314 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2315 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2316 #else
2317 		TRACE_FLAG_IRQS_NOSUPPORT |
2318 #endif
2319 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2320 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2321 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2322 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2323 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2324 }
2325 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2326 
2327 struct ring_buffer_event *
2328 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2329 			  int type,
2330 			  unsigned long len,
2331 			  unsigned long flags, int pc)
2332 {
2333 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2334 }
2335 
2336 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2337 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2338 static int trace_buffered_event_ref;
2339 
2340 /**
2341  * trace_buffered_event_enable - enable buffering events
2342  *
2343  * When events are being filtered, it is quicker to use a temporary
2344  * buffer to write the event data into if there's a likely chance
2345  * that it will not be committed. The discard of the ring buffer
2346  * is not as fast as committing, and is much slower than copying
2347  * a commit.
2348  *
2349  * When an event is to be filtered, allocate per cpu buffers to
2350  * write the event data into, and if the event is filtered and discarded
2351  * it is simply dropped, otherwise, the entire data is to be committed
2352  * in one shot.
2353  */
2354 void trace_buffered_event_enable(void)
2355 {
2356 	struct ring_buffer_event *event;
2357 	struct page *page;
2358 	int cpu;
2359 
2360 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2361 
2362 	if (trace_buffered_event_ref++)
2363 		return;
2364 
2365 	for_each_tracing_cpu(cpu) {
2366 		page = alloc_pages_node(cpu_to_node(cpu),
2367 					GFP_KERNEL | __GFP_NORETRY, 0);
2368 		if (!page)
2369 			goto failed;
2370 
2371 		event = page_address(page);
2372 		memset(event, 0, sizeof(*event));
2373 
2374 		per_cpu(trace_buffered_event, cpu) = event;
2375 
2376 		preempt_disable();
2377 		if (cpu == smp_processor_id() &&
2378 		    this_cpu_read(trace_buffered_event) !=
2379 		    per_cpu(trace_buffered_event, cpu))
2380 			WARN_ON_ONCE(1);
2381 		preempt_enable();
2382 	}
2383 
2384 	return;
2385  failed:
2386 	trace_buffered_event_disable();
2387 }
2388 
2389 static void enable_trace_buffered_event(void *data)
2390 {
2391 	/* Probably not needed, but do it anyway */
2392 	smp_rmb();
2393 	this_cpu_dec(trace_buffered_event_cnt);
2394 }
2395 
2396 static void disable_trace_buffered_event(void *data)
2397 {
2398 	this_cpu_inc(trace_buffered_event_cnt);
2399 }
2400 
2401 /**
2402  * trace_buffered_event_disable - disable buffering events
2403  *
2404  * When a filter is removed, it is faster to not use the buffered
2405  * events, and to commit directly into the ring buffer. Free up
2406  * the temp buffers when there are no more users. This requires
2407  * special synchronization with current events.
2408  */
2409 void trace_buffered_event_disable(void)
2410 {
2411 	int cpu;
2412 
2413 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2414 
2415 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2416 		return;
2417 
2418 	if (--trace_buffered_event_ref)
2419 		return;
2420 
2421 	preempt_disable();
2422 	/* For each CPU, set the buffer as used. */
2423 	smp_call_function_many(tracing_buffer_mask,
2424 			       disable_trace_buffered_event, NULL, 1);
2425 	preempt_enable();
2426 
2427 	/* Wait for all current users to finish */
2428 	synchronize_rcu();
2429 
2430 	for_each_tracing_cpu(cpu) {
2431 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2432 		per_cpu(trace_buffered_event, cpu) = NULL;
2433 	}
2434 	/*
2435 	 * Make sure trace_buffered_event is NULL before clearing
2436 	 * trace_buffered_event_cnt.
2437 	 */
2438 	smp_wmb();
2439 
2440 	preempt_disable();
2441 	/* Do the work on each cpu */
2442 	smp_call_function_many(tracing_buffer_mask,
2443 			       enable_trace_buffered_event, NULL, 1);
2444 	preempt_enable();
2445 }
2446 
2447 static struct ring_buffer *temp_buffer;
2448 
2449 struct ring_buffer_event *
2450 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2451 			  struct trace_event_file *trace_file,
2452 			  int type, unsigned long len,
2453 			  unsigned long flags, int pc)
2454 {
2455 	struct ring_buffer_event *entry;
2456 	int val;
2457 
2458 	*current_rb = trace_file->tr->trace_buffer.buffer;
2459 
2460 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2461 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2462 	    (entry = this_cpu_read(trace_buffered_event))) {
2463 		/* Try to use the per cpu buffer first */
2464 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2465 		if (val == 1) {
2466 			trace_event_setup(entry, type, flags, pc);
2467 			entry->array[0] = len;
2468 			return entry;
2469 		}
2470 		this_cpu_dec(trace_buffered_event_cnt);
2471 	}
2472 
2473 	entry = __trace_buffer_lock_reserve(*current_rb,
2474 					    type, len, flags, pc);
2475 	/*
2476 	 * If tracing is off, but we have triggers enabled
2477 	 * we still need to look at the event data. Use the temp_buffer
2478 	 * to store the trace event for the tigger to use. It's recusive
2479 	 * safe and will not be recorded anywhere.
2480 	 */
2481 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2482 		*current_rb = temp_buffer;
2483 		entry = __trace_buffer_lock_reserve(*current_rb,
2484 						    type, len, flags, pc);
2485 	}
2486 	return entry;
2487 }
2488 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2489 
2490 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2491 static DEFINE_MUTEX(tracepoint_printk_mutex);
2492 
2493 static void output_printk(struct trace_event_buffer *fbuffer)
2494 {
2495 	struct trace_event_call *event_call;
2496 	struct trace_event *event;
2497 	unsigned long flags;
2498 	struct trace_iterator *iter = tracepoint_print_iter;
2499 
2500 	/* We should never get here if iter is NULL */
2501 	if (WARN_ON_ONCE(!iter))
2502 		return;
2503 
2504 	event_call = fbuffer->trace_file->event_call;
2505 	if (!event_call || !event_call->event.funcs ||
2506 	    !event_call->event.funcs->trace)
2507 		return;
2508 
2509 	event = &fbuffer->trace_file->event_call->event;
2510 
2511 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2512 	trace_seq_init(&iter->seq);
2513 	iter->ent = fbuffer->entry;
2514 	event_call->event.funcs->trace(iter, 0, event);
2515 	trace_seq_putc(&iter->seq, 0);
2516 	printk("%s", iter->seq.buffer);
2517 
2518 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2519 }
2520 
2521 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2522 			     void __user *buffer, size_t *lenp,
2523 			     loff_t *ppos)
2524 {
2525 	int save_tracepoint_printk;
2526 	int ret;
2527 
2528 	mutex_lock(&tracepoint_printk_mutex);
2529 	save_tracepoint_printk = tracepoint_printk;
2530 
2531 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2532 
2533 	/*
2534 	 * This will force exiting early, as tracepoint_printk
2535 	 * is always zero when tracepoint_printk_iter is not allocated
2536 	 */
2537 	if (!tracepoint_print_iter)
2538 		tracepoint_printk = 0;
2539 
2540 	if (save_tracepoint_printk == tracepoint_printk)
2541 		goto out;
2542 
2543 	if (tracepoint_printk)
2544 		static_key_enable(&tracepoint_printk_key.key);
2545 	else
2546 		static_key_disable(&tracepoint_printk_key.key);
2547 
2548  out:
2549 	mutex_unlock(&tracepoint_printk_mutex);
2550 
2551 	return ret;
2552 }
2553 
2554 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2555 {
2556 	if (static_key_false(&tracepoint_printk_key.key))
2557 		output_printk(fbuffer);
2558 
2559 	event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2560 				    fbuffer->event, fbuffer->entry,
2561 				    fbuffer->flags, fbuffer->pc);
2562 }
2563 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2564 
2565 /*
2566  * Skip 3:
2567  *
2568  *   trace_buffer_unlock_commit_regs()
2569  *   trace_event_buffer_commit()
2570  *   trace_event_raw_event_xxx()
2571  */
2572 # define STACK_SKIP 3
2573 
2574 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2575 				     struct ring_buffer *buffer,
2576 				     struct ring_buffer_event *event,
2577 				     unsigned long flags, int pc,
2578 				     struct pt_regs *regs)
2579 {
2580 	__buffer_unlock_commit(buffer, event);
2581 
2582 	/*
2583 	 * If regs is not set, then skip the necessary functions.
2584 	 * Note, we can still get here via blktrace, wakeup tracer
2585 	 * and mmiotrace, but that's ok if they lose a function or
2586 	 * two. They are not that meaningful.
2587 	 */
2588 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2589 	ftrace_trace_userstack(buffer, flags, pc);
2590 }
2591 
2592 /*
2593  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2594  */
2595 void
2596 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2597 				   struct ring_buffer_event *event)
2598 {
2599 	__buffer_unlock_commit(buffer, event);
2600 }
2601 
2602 static void
2603 trace_process_export(struct trace_export *export,
2604 	       struct ring_buffer_event *event)
2605 {
2606 	struct trace_entry *entry;
2607 	unsigned int size = 0;
2608 
2609 	entry = ring_buffer_event_data(event);
2610 	size = ring_buffer_event_length(event);
2611 	export->write(export, entry, size);
2612 }
2613 
2614 static DEFINE_MUTEX(ftrace_export_lock);
2615 
2616 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2617 
2618 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2619 
2620 static inline void ftrace_exports_enable(void)
2621 {
2622 	static_branch_enable(&ftrace_exports_enabled);
2623 }
2624 
2625 static inline void ftrace_exports_disable(void)
2626 {
2627 	static_branch_disable(&ftrace_exports_enabled);
2628 }
2629 
2630 static void ftrace_exports(struct ring_buffer_event *event)
2631 {
2632 	struct trace_export *export;
2633 
2634 	preempt_disable_notrace();
2635 
2636 	export = rcu_dereference_raw_notrace(ftrace_exports_list);
2637 	while (export) {
2638 		trace_process_export(export, event);
2639 		export = rcu_dereference_raw_notrace(export->next);
2640 	}
2641 
2642 	preempt_enable_notrace();
2643 }
2644 
2645 static inline void
2646 add_trace_export(struct trace_export **list, struct trace_export *export)
2647 {
2648 	rcu_assign_pointer(export->next, *list);
2649 	/*
2650 	 * We are entering export into the list but another
2651 	 * CPU might be walking that list. We need to make sure
2652 	 * the export->next pointer is valid before another CPU sees
2653 	 * the export pointer included into the list.
2654 	 */
2655 	rcu_assign_pointer(*list, export);
2656 }
2657 
2658 static inline int
2659 rm_trace_export(struct trace_export **list, struct trace_export *export)
2660 {
2661 	struct trace_export **p;
2662 
2663 	for (p = list; *p != NULL; p = &(*p)->next)
2664 		if (*p == export)
2665 			break;
2666 
2667 	if (*p != export)
2668 		return -1;
2669 
2670 	rcu_assign_pointer(*p, (*p)->next);
2671 
2672 	return 0;
2673 }
2674 
2675 static inline void
2676 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2677 {
2678 	if (*list == NULL)
2679 		ftrace_exports_enable();
2680 
2681 	add_trace_export(list, export);
2682 }
2683 
2684 static inline int
2685 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2686 {
2687 	int ret;
2688 
2689 	ret = rm_trace_export(list, export);
2690 	if (*list == NULL)
2691 		ftrace_exports_disable();
2692 
2693 	return ret;
2694 }
2695 
2696 int register_ftrace_export(struct trace_export *export)
2697 {
2698 	if (WARN_ON_ONCE(!export->write))
2699 		return -1;
2700 
2701 	mutex_lock(&ftrace_export_lock);
2702 
2703 	add_ftrace_export(&ftrace_exports_list, export);
2704 
2705 	mutex_unlock(&ftrace_export_lock);
2706 
2707 	return 0;
2708 }
2709 EXPORT_SYMBOL_GPL(register_ftrace_export);
2710 
2711 int unregister_ftrace_export(struct trace_export *export)
2712 {
2713 	int ret;
2714 
2715 	mutex_lock(&ftrace_export_lock);
2716 
2717 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2718 
2719 	mutex_unlock(&ftrace_export_lock);
2720 
2721 	return ret;
2722 }
2723 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2724 
2725 void
2726 trace_function(struct trace_array *tr,
2727 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2728 	       int pc)
2729 {
2730 	struct trace_event_call *call = &event_function;
2731 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2732 	struct ring_buffer_event *event;
2733 	struct ftrace_entry *entry;
2734 
2735 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2736 					    flags, pc);
2737 	if (!event)
2738 		return;
2739 	entry	= ring_buffer_event_data(event);
2740 	entry->ip			= ip;
2741 	entry->parent_ip		= parent_ip;
2742 
2743 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2744 		if (static_branch_unlikely(&ftrace_exports_enabled))
2745 			ftrace_exports(event);
2746 		__buffer_unlock_commit(buffer, event);
2747 	}
2748 }
2749 
2750 #ifdef CONFIG_STACKTRACE
2751 
2752 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2753 struct ftrace_stack {
2754 	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
2755 };
2756 
2757 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2758 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2759 
2760 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2761 				 unsigned long flags,
2762 				 int skip, int pc, struct pt_regs *regs)
2763 {
2764 	struct trace_event_call *call = &event_kernel_stack;
2765 	struct ring_buffer_event *event;
2766 	struct stack_entry *entry;
2767 	struct stack_trace trace;
2768 	int use_stack;
2769 	int size = FTRACE_STACK_ENTRIES;
2770 
2771 	trace.nr_entries	= 0;
2772 	trace.skip		= skip;
2773 
2774 	/*
2775 	 * Add one, for this function and the call to save_stack_trace()
2776 	 * If regs is set, then these functions will not be in the way.
2777 	 */
2778 #ifndef CONFIG_UNWINDER_ORC
2779 	if (!regs)
2780 		trace.skip++;
2781 #endif
2782 
2783 	/*
2784 	 * Since events can happen in NMIs there's no safe way to
2785 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2786 	 * or NMI comes in, it will just have to use the default
2787 	 * FTRACE_STACK_SIZE.
2788 	 */
2789 	preempt_disable_notrace();
2790 
2791 	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2792 	/*
2793 	 * We don't need any atomic variables, just a barrier.
2794 	 * If an interrupt comes in, we don't care, because it would
2795 	 * have exited and put the counter back to what we want.
2796 	 * We just need a barrier to keep gcc from moving things
2797 	 * around.
2798 	 */
2799 	barrier();
2800 	if (use_stack == 1) {
2801 		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
2802 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
2803 
2804 		if (regs)
2805 			save_stack_trace_regs(regs, &trace);
2806 		else
2807 			save_stack_trace(&trace);
2808 
2809 		if (trace.nr_entries > size)
2810 			size = trace.nr_entries;
2811 	} else
2812 		/* From now on, use_stack is a boolean */
2813 		use_stack = 0;
2814 
2815 	size *= sizeof(unsigned long);
2816 
2817 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2818 					    sizeof(*entry) + size, flags, pc);
2819 	if (!event)
2820 		goto out;
2821 	entry = ring_buffer_event_data(event);
2822 
2823 	memset(&entry->caller, 0, size);
2824 
2825 	if (use_stack)
2826 		memcpy(&entry->caller, trace.entries,
2827 		       trace.nr_entries * sizeof(unsigned long));
2828 	else {
2829 		trace.max_entries	= FTRACE_STACK_ENTRIES;
2830 		trace.entries		= entry->caller;
2831 		if (regs)
2832 			save_stack_trace_regs(regs, &trace);
2833 		else
2834 			save_stack_trace(&trace);
2835 	}
2836 
2837 	entry->size = trace.nr_entries;
2838 
2839 	if (!call_filter_check_discard(call, entry, buffer, event))
2840 		__buffer_unlock_commit(buffer, event);
2841 
2842  out:
2843 	/* Again, don't let gcc optimize things here */
2844 	barrier();
2845 	__this_cpu_dec(ftrace_stack_reserve);
2846 	preempt_enable_notrace();
2847 
2848 }
2849 
2850 static inline void ftrace_trace_stack(struct trace_array *tr,
2851 				      struct ring_buffer *buffer,
2852 				      unsigned long flags,
2853 				      int skip, int pc, struct pt_regs *regs)
2854 {
2855 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2856 		return;
2857 
2858 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2859 }
2860 
2861 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2862 		   int pc)
2863 {
2864 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2865 
2866 	if (rcu_is_watching()) {
2867 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2868 		return;
2869 	}
2870 
2871 	/*
2872 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2873 	 * but if the above rcu_is_watching() failed, then the NMI
2874 	 * triggered someplace critical, and rcu_irq_enter() should
2875 	 * not be called from NMI.
2876 	 */
2877 	if (unlikely(in_nmi()))
2878 		return;
2879 
2880 	rcu_irq_enter_irqson();
2881 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2882 	rcu_irq_exit_irqson();
2883 }
2884 
2885 /**
2886  * trace_dump_stack - record a stack back trace in the trace buffer
2887  * @skip: Number of functions to skip (helper handlers)
2888  */
2889 void trace_dump_stack(int skip)
2890 {
2891 	unsigned long flags;
2892 
2893 	if (tracing_disabled || tracing_selftest_running)
2894 		return;
2895 
2896 	local_save_flags(flags);
2897 
2898 #ifndef CONFIG_UNWINDER_ORC
2899 	/* Skip 1 to skip this function. */
2900 	skip++;
2901 #endif
2902 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2903 			     flags, skip, preempt_count(), NULL);
2904 }
2905 EXPORT_SYMBOL_GPL(trace_dump_stack);
2906 
2907 static DEFINE_PER_CPU(int, user_stack_count);
2908 
2909 void
2910 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2911 {
2912 	struct trace_event_call *call = &event_user_stack;
2913 	struct ring_buffer_event *event;
2914 	struct userstack_entry *entry;
2915 	struct stack_trace trace;
2916 
2917 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2918 		return;
2919 
2920 	/*
2921 	 * NMIs can not handle page faults, even with fix ups.
2922 	 * The save user stack can (and often does) fault.
2923 	 */
2924 	if (unlikely(in_nmi()))
2925 		return;
2926 
2927 	/*
2928 	 * prevent recursion, since the user stack tracing may
2929 	 * trigger other kernel events.
2930 	 */
2931 	preempt_disable();
2932 	if (__this_cpu_read(user_stack_count))
2933 		goto out;
2934 
2935 	__this_cpu_inc(user_stack_count);
2936 
2937 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2938 					    sizeof(*entry), flags, pc);
2939 	if (!event)
2940 		goto out_drop_count;
2941 	entry	= ring_buffer_event_data(event);
2942 
2943 	entry->tgid		= current->tgid;
2944 	memset(&entry->caller, 0, sizeof(entry->caller));
2945 
2946 	trace.nr_entries	= 0;
2947 	trace.max_entries	= FTRACE_STACK_ENTRIES;
2948 	trace.skip		= 0;
2949 	trace.entries		= entry->caller;
2950 
2951 	save_stack_trace_user(&trace);
2952 	if (!call_filter_check_discard(call, entry, buffer, event))
2953 		__buffer_unlock_commit(buffer, event);
2954 
2955  out_drop_count:
2956 	__this_cpu_dec(user_stack_count);
2957  out:
2958 	preempt_enable();
2959 }
2960 
2961 #ifdef UNUSED
2962 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2963 {
2964 	ftrace_trace_userstack(tr, flags, preempt_count());
2965 }
2966 #endif /* UNUSED */
2967 
2968 #endif /* CONFIG_STACKTRACE */
2969 
2970 /* created for use with alloc_percpu */
2971 struct trace_buffer_struct {
2972 	int nesting;
2973 	char buffer[4][TRACE_BUF_SIZE];
2974 };
2975 
2976 static struct trace_buffer_struct *trace_percpu_buffer;
2977 
2978 /*
2979  * Thise allows for lockless recording.  If we're nested too deeply, then
2980  * this returns NULL.
2981  */
2982 static char *get_trace_buf(void)
2983 {
2984 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2985 
2986 	if (!buffer || buffer->nesting >= 4)
2987 		return NULL;
2988 
2989 	buffer->nesting++;
2990 
2991 	/* Interrupts must see nesting incremented before we use the buffer */
2992 	barrier();
2993 	return &buffer->buffer[buffer->nesting][0];
2994 }
2995 
2996 static void put_trace_buf(void)
2997 {
2998 	/* Don't let the decrement of nesting leak before this */
2999 	barrier();
3000 	this_cpu_dec(trace_percpu_buffer->nesting);
3001 }
3002 
3003 static int alloc_percpu_trace_buffer(void)
3004 {
3005 	struct trace_buffer_struct *buffers;
3006 
3007 	buffers = alloc_percpu(struct trace_buffer_struct);
3008 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3009 		return -ENOMEM;
3010 
3011 	trace_percpu_buffer = buffers;
3012 	return 0;
3013 }
3014 
3015 static int buffers_allocated;
3016 
3017 void trace_printk_init_buffers(void)
3018 {
3019 	if (buffers_allocated)
3020 		return;
3021 
3022 	if (alloc_percpu_trace_buffer())
3023 		return;
3024 
3025 	/* trace_printk() is for debug use only. Don't use it in production. */
3026 
3027 	pr_warn("\n");
3028 	pr_warn("**********************************************************\n");
3029 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3030 	pr_warn("**                                                      **\n");
3031 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3032 	pr_warn("**                                                      **\n");
3033 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3034 	pr_warn("** unsafe for production use.                           **\n");
3035 	pr_warn("**                                                      **\n");
3036 	pr_warn("** If you see this message and you are not debugging    **\n");
3037 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3038 	pr_warn("**                                                      **\n");
3039 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3040 	pr_warn("**********************************************************\n");
3041 
3042 	/* Expand the buffers to set size */
3043 	tracing_update_buffers();
3044 
3045 	buffers_allocated = 1;
3046 
3047 	/*
3048 	 * trace_printk_init_buffers() can be called by modules.
3049 	 * If that happens, then we need to start cmdline recording
3050 	 * directly here. If the global_trace.buffer is already
3051 	 * allocated here, then this was called by module code.
3052 	 */
3053 	if (global_trace.trace_buffer.buffer)
3054 		tracing_start_cmdline_record();
3055 }
3056 
3057 void trace_printk_start_comm(void)
3058 {
3059 	/* Start tracing comms if trace printk is set */
3060 	if (!buffers_allocated)
3061 		return;
3062 	tracing_start_cmdline_record();
3063 }
3064 
3065 static void trace_printk_start_stop_comm(int enabled)
3066 {
3067 	if (!buffers_allocated)
3068 		return;
3069 
3070 	if (enabled)
3071 		tracing_start_cmdline_record();
3072 	else
3073 		tracing_stop_cmdline_record();
3074 }
3075 
3076 /**
3077  * trace_vbprintk - write binary msg to tracing buffer
3078  *
3079  */
3080 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3081 {
3082 	struct trace_event_call *call = &event_bprint;
3083 	struct ring_buffer_event *event;
3084 	struct ring_buffer *buffer;
3085 	struct trace_array *tr = &global_trace;
3086 	struct bprint_entry *entry;
3087 	unsigned long flags;
3088 	char *tbuffer;
3089 	int len = 0, size, pc;
3090 
3091 	if (unlikely(tracing_selftest_running || tracing_disabled))
3092 		return 0;
3093 
3094 	/* Don't pollute graph traces with trace_vprintk internals */
3095 	pause_graph_tracing();
3096 
3097 	pc = preempt_count();
3098 	preempt_disable_notrace();
3099 
3100 	tbuffer = get_trace_buf();
3101 	if (!tbuffer) {
3102 		len = 0;
3103 		goto out_nobuffer;
3104 	}
3105 
3106 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3107 
3108 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3109 		goto out;
3110 
3111 	local_save_flags(flags);
3112 	size = sizeof(*entry) + sizeof(u32) * len;
3113 	buffer = tr->trace_buffer.buffer;
3114 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3115 					    flags, pc);
3116 	if (!event)
3117 		goto out;
3118 	entry = ring_buffer_event_data(event);
3119 	entry->ip			= ip;
3120 	entry->fmt			= fmt;
3121 
3122 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3123 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3124 		__buffer_unlock_commit(buffer, event);
3125 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3126 	}
3127 
3128 out:
3129 	put_trace_buf();
3130 
3131 out_nobuffer:
3132 	preempt_enable_notrace();
3133 	unpause_graph_tracing();
3134 
3135 	return len;
3136 }
3137 EXPORT_SYMBOL_GPL(trace_vbprintk);
3138 
3139 __printf(3, 0)
3140 static int
3141 __trace_array_vprintk(struct ring_buffer *buffer,
3142 		      unsigned long ip, const char *fmt, va_list args)
3143 {
3144 	struct trace_event_call *call = &event_print;
3145 	struct ring_buffer_event *event;
3146 	int len = 0, size, pc;
3147 	struct print_entry *entry;
3148 	unsigned long flags;
3149 	char *tbuffer;
3150 
3151 	if (tracing_disabled || tracing_selftest_running)
3152 		return 0;
3153 
3154 	/* Don't pollute graph traces with trace_vprintk internals */
3155 	pause_graph_tracing();
3156 
3157 	pc = preempt_count();
3158 	preempt_disable_notrace();
3159 
3160 
3161 	tbuffer = get_trace_buf();
3162 	if (!tbuffer) {
3163 		len = 0;
3164 		goto out_nobuffer;
3165 	}
3166 
3167 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3168 
3169 	local_save_flags(flags);
3170 	size = sizeof(*entry) + len + 1;
3171 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3172 					    flags, pc);
3173 	if (!event)
3174 		goto out;
3175 	entry = ring_buffer_event_data(event);
3176 	entry->ip = ip;
3177 
3178 	memcpy(&entry->buf, tbuffer, len + 1);
3179 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3180 		__buffer_unlock_commit(buffer, event);
3181 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3182 	}
3183 
3184 out:
3185 	put_trace_buf();
3186 
3187 out_nobuffer:
3188 	preempt_enable_notrace();
3189 	unpause_graph_tracing();
3190 
3191 	return len;
3192 }
3193 
3194 __printf(3, 0)
3195 int trace_array_vprintk(struct trace_array *tr,
3196 			unsigned long ip, const char *fmt, va_list args)
3197 {
3198 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3199 }
3200 
3201 __printf(3, 0)
3202 int trace_array_printk(struct trace_array *tr,
3203 		       unsigned long ip, const char *fmt, ...)
3204 {
3205 	int ret;
3206 	va_list ap;
3207 
3208 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3209 		return 0;
3210 
3211 	va_start(ap, fmt);
3212 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3213 	va_end(ap);
3214 	return ret;
3215 }
3216 
3217 __printf(3, 4)
3218 int trace_array_printk_buf(struct ring_buffer *buffer,
3219 			   unsigned long ip, const char *fmt, ...)
3220 {
3221 	int ret;
3222 	va_list ap;
3223 
3224 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3225 		return 0;
3226 
3227 	va_start(ap, fmt);
3228 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3229 	va_end(ap);
3230 	return ret;
3231 }
3232 
3233 __printf(2, 0)
3234 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3235 {
3236 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3237 }
3238 EXPORT_SYMBOL_GPL(trace_vprintk);
3239 
3240 static void trace_iterator_increment(struct trace_iterator *iter)
3241 {
3242 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3243 
3244 	iter->idx++;
3245 	if (buf_iter)
3246 		ring_buffer_read(buf_iter, NULL);
3247 }
3248 
3249 static struct trace_entry *
3250 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3251 		unsigned long *lost_events)
3252 {
3253 	struct ring_buffer_event *event;
3254 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3255 
3256 	if (buf_iter)
3257 		event = ring_buffer_iter_peek(buf_iter, ts);
3258 	else
3259 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3260 					 lost_events);
3261 
3262 	if (event) {
3263 		iter->ent_size = ring_buffer_event_length(event);
3264 		return ring_buffer_event_data(event);
3265 	}
3266 	iter->ent_size = 0;
3267 	return NULL;
3268 }
3269 
3270 static struct trace_entry *
3271 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3272 		  unsigned long *missing_events, u64 *ent_ts)
3273 {
3274 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
3275 	struct trace_entry *ent, *next = NULL;
3276 	unsigned long lost_events = 0, next_lost = 0;
3277 	int cpu_file = iter->cpu_file;
3278 	u64 next_ts = 0, ts;
3279 	int next_cpu = -1;
3280 	int next_size = 0;
3281 	int cpu;
3282 
3283 	/*
3284 	 * If we are in a per_cpu trace file, don't bother by iterating over
3285 	 * all cpu and peek directly.
3286 	 */
3287 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3288 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3289 			return NULL;
3290 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3291 		if (ent_cpu)
3292 			*ent_cpu = cpu_file;
3293 
3294 		return ent;
3295 	}
3296 
3297 	for_each_tracing_cpu(cpu) {
3298 
3299 		if (ring_buffer_empty_cpu(buffer, cpu))
3300 			continue;
3301 
3302 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3303 
3304 		/*
3305 		 * Pick the entry with the smallest timestamp:
3306 		 */
3307 		if (ent && (!next || ts < next_ts)) {
3308 			next = ent;
3309 			next_cpu = cpu;
3310 			next_ts = ts;
3311 			next_lost = lost_events;
3312 			next_size = iter->ent_size;
3313 		}
3314 	}
3315 
3316 	iter->ent_size = next_size;
3317 
3318 	if (ent_cpu)
3319 		*ent_cpu = next_cpu;
3320 
3321 	if (ent_ts)
3322 		*ent_ts = next_ts;
3323 
3324 	if (missing_events)
3325 		*missing_events = next_lost;
3326 
3327 	return next;
3328 }
3329 
3330 /* Find the next real entry, without updating the iterator itself */
3331 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3332 					  int *ent_cpu, u64 *ent_ts)
3333 {
3334 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3335 }
3336 
3337 /* Find the next real entry, and increment the iterator to the next entry */
3338 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3339 {
3340 	iter->ent = __find_next_entry(iter, &iter->cpu,
3341 				      &iter->lost_events, &iter->ts);
3342 
3343 	if (iter->ent)
3344 		trace_iterator_increment(iter);
3345 
3346 	return iter->ent ? iter : NULL;
3347 }
3348 
3349 static void trace_consume(struct trace_iterator *iter)
3350 {
3351 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3352 			    &iter->lost_events);
3353 }
3354 
3355 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3356 {
3357 	struct trace_iterator *iter = m->private;
3358 	int i = (int)*pos;
3359 	void *ent;
3360 
3361 	WARN_ON_ONCE(iter->leftover);
3362 
3363 	(*pos)++;
3364 
3365 	/* can't go backwards */
3366 	if (iter->idx > i)
3367 		return NULL;
3368 
3369 	if (iter->idx < 0)
3370 		ent = trace_find_next_entry_inc(iter);
3371 	else
3372 		ent = iter;
3373 
3374 	while (ent && iter->idx < i)
3375 		ent = trace_find_next_entry_inc(iter);
3376 
3377 	iter->pos = *pos;
3378 
3379 	return ent;
3380 }
3381 
3382 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3383 {
3384 	struct ring_buffer_event *event;
3385 	struct ring_buffer_iter *buf_iter;
3386 	unsigned long entries = 0;
3387 	u64 ts;
3388 
3389 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3390 
3391 	buf_iter = trace_buffer_iter(iter, cpu);
3392 	if (!buf_iter)
3393 		return;
3394 
3395 	ring_buffer_iter_reset(buf_iter);
3396 
3397 	/*
3398 	 * We could have the case with the max latency tracers
3399 	 * that a reset never took place on a cpu. This is evident
3400 	 * by the timestamp being before the start of the buffer.
3401 	 */
3402 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3403 		if (ts >= iter->trace_buffer->time_start)
3404 			break;
3405 		entries++;
3406 		ring_buffer_read(buf_iter, NULL);
3407 	}
3408 
3409 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3410 }
3411 
3412 /*
3413  * The current tracer is copied to avoid a global locking
3414  * all around.
3415  */
3416 static void *s_start(struct seq_file *m, loff_t *pos)
3417 {
3418 	struct trace_iterator *iter = m->private;
3419 	struct trace_array *tr = iter->tr;
3420 	int cpu_file = iter->cpu_file;
3421 	void *p = NULL;
3422 	loff_t l = 0;
3423 	int cpu;
3424 
3425 	/*
3426 	 * copy the tracer to avoid using a global lock all around.
3427 	 * iter->trace is a copy of current_trace, the pointer to the
3428 	 * name may be used instead of a strcmp(), as iter->trace->name
3429 	 * will point to the same string as current_trace->name.
3430 	 */
3431 	mutex_lock(&trace_types_lock);
3432 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3433 		*iter->trace = *tr->current_trace;
3434 	mutex_unlock(&trace_types_lock);
3435 
3436 #ifdef CONFIG_TRACER_MAX_TRACE
3437 	if (iter->snapshot && iter->trace->use_max_tr)
3438 		return ERR_PTR(-EBUSY);
3439 #endif
3440 
3441 	if (!iter->snapshot)
3442 		atomic_inc(&trace_record_taskinfo_disabled);
3443 
3444 	if (*pos != iter->pos) {
3445 		iter->ent = NULL;
3446 		iter->cpu = 0;
3447 		iter->idx = -1;
3448 
3449 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3450 			for_each_tracing_cpu(cpu)
3451 				tracing_iter_reset(iter, cpu);
3452 		} else
3453 			tracing_iter_reset(iter, cpu_file);
3454 
3455 		iter->leftover = 0;
3456 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3457 			;
3458 
3459 	} else {
3460 		/*
3461 		 * If we overflowed the seq_file before, then we want
3462 		 * to just reuse the trace_seq buffer again.
3463 		 */
3464 		if (iter->leftover)
3465 			p = iter;
3466 		else {
3467 			l = *pos - 1;
3468 			p = s_next(m, p, &l);
3469 		}
3470 	}
3471 
3472 	trace_event_read_lock();
3473 	trace_access_lock(cpu_file);
3474 	return p;
3475 }
3476 
3477 static void s_stop(struct seq_file *m, void *p)
3478 {
3479 	struct trace_iterator *iter = m->private;
3480 
3481 #ifdef CONFIG_TRACER_MAX_TRACE
3482 	if (iter->snapshot && iter->trace->use_max_tr)
3483 		return;
3484 #endif
3485 
3486 	if (!iter->snapshot)
3487 		atomic_dec(&trace_record_taskinfo_disabled);
3488 
3489 	trace_access_unlock(iter->cpu_file);
3490 	trace_event_read_unlock();
3491 }
3492 
3493 static void
3494 get_total_entries(struct trace_buffer *buf,
3495 		  unsigned long *total, unsigned long *entries)
3496 {
3497 	unsigned long count;
3498 	int cpu;
3499 
3500 	*total = 0;
3501 	*entries = 0;
3502 
3503 	for_each_tracing_cpu(cpu) {
3504 		count = ring_buffer_entries_cpu(buf->buffer, cpu);
3505 		/*
3506 		 * If this buffer has skipped entries, then we hold all
3507 		 * entries for the trace and we need to ignore the
3508 		 * ones before the time stamp.
3509 		 */
3510 		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3511 			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3512 			/* total is the same as the entries */
3513 			*total += count;
3514 		} else
3515 			*total += count +
3516 				ring_buffer_overrun_cpu(buf->buffer, cpu);
3517 		*entries += count;
3518 	}
3519 }
3520 
3521 static void print_lat_help_header(struct seq_file *m)
3522 {
3523 	seq_puts(m, "#                  _------=> CPU#            \n"
3524 		    "#                 / _-----=> irqs-off        \n"
3525 		    "#                | / _----=> need-resched    \n"
3526 		    "#                || / _---=> hardirq/softirq \n"
3527 		    "#                ||| / _--=> preempt-depth   \n"
3528 		    "#                |||| /     delay            \n"
3529 		    "#  cmd     pid   ||||| time  |   caller      \n"
3530 		    "#     \\   /      |||||  \\    |   /         \n");
3531 }
3532 
3533 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3534 {
3535 	unsigned long total;
3536 	unsigned long entries;
3537 
3538 	get_total_entries(buf, &total, &entries);
3539 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3540 		   entries, total, num_online_cpus());
3541 	seq_puts(m, "#\n");
3542 }
3543 
3544 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3545 				   unsigned int flags)
3546 {
3547 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3548 
3549 	print_event_info(buf, m);
3550 
3551 	seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3552 	seq_printf(m, "#              | |     %s    |       |         |\n",	 tgid ? "  |      " : "");
3553 }
3554 
3555 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3556 				       unsigned int flags)
3557 {
3558 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3559 	const char tgid_space[] = "          ";
3560 	const char space[] = "  ";
3561 
3562 	print_event_info(buf, m);
3563 
3564 	seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3565 		   tgid ? tgid_space : space);
3566 	seq_printf(m, "#                          %s / _----=> need-resched\n",
3567 		   tgid ? tgid_space : space);
3568 	seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3569 		   tgid ? tgid_space : space);
3570 	seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3571 		   tgid ? tgid_space : space);
3572 	seq_printf(m, "#                          %s||| /     delay\n",
3573 		   tgid ? tgid_space : space);
3574 	seq_printf(m, "#           TASK-PID %sCPU#  ||||    TIMESTAMP  FUNCTION\n",
3575 		   tgid ? "   TGID   " : space);
3576 	seq_printf(m, "#              | |   %s  |   ||||       |         |\n",
3577 		   tgid ? "     |    " : space);
3578 }
3579 
3580 void
3581 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3582 {
3583 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3584 	struct trace_buffer *buf = iter->trace_buffer;
3585 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3586 	struct tracer *type = iter->trace;
3587 	unsigned long entries;
3588 	unsigned long total;
3589 	const char *name = "preemption";
3590 
3591 	name = type->name;
3592 
3593 	get_total_entries(buf, &total, &entries);
3594 
3595 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3596 		   name, UTS_RELEASE);
3597 	seq_puts(m, "# -----------------------------------"
3598 		 "---------------------------------\n");
3599 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3600 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3601 		   nsecs_to_usecs(data->saved_latency),
3602 		   entries,
3603 		   total,
3604 		   buf->cpu,
3605 #if defined(CONFIG_PREEMPT_NONE)
3606 		   "server",
3607 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3608 		   "desktop",
3609 #elif defined(CONFIG_PREEMPT)
3610 		   "preempt",
3611 #else
3612 		   "unknown",
3613 #endif
3614 		   /* These are reserved for later use */
3615 		   0, 0, 0, 0);
3616 #ifdef CONFIG_SMP
3617 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3618 #else
3619 	seq_puts(m, ")\n");
3620 #endif
3621 	seq_puts(m, "#    -----------------\n");
3622 	seq_printf(m, "#    | task: %.16s-%d "
3623 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3624 		   data->comm, data->pid,
3625 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3626 		   data->policy, data->rt_priority);
3627 	seq_puts(m, "#    -----------------\n");
3628 
3629 	if (data->critical_start) {
3630 		seq_puts(m, "#  => started at: ");
3631 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3632 		trace_print_seq(m, &iter->seq);
3633 		seq_puts(m, "\n#  => ended at:   ");
3634 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3635 		trace_print_seq(m, &iter->seq);
3636 		seq_puts(m, "\n#\n");
3637 	}
3638 
3639 	seq_puts(m, "#\n");
3640 }
3641 
3642 static void test_cpu_buff_start(struct trace_iterator *iter)
3643 {
3644 	struct trace_seq *s = &iter->seq;
3645 	struct trace_array *tr = iter->tr;
3646 
3647 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3648 		return;
3649 
3650 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3651 		return;
3652 
3653 	if (cpumask_available(iter->started) &&
3654 	    cpumask_test_cpu(iter->cpu, iter->started))
3655 		return;
3656 
3657 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3658 		return;
3659 
3660 	if (cpumask_available(iter->started))
3661 		cpumask_set_cpu(iter->cpu, iter->started);
3662 
3663 	/* Don't print started cpu buffer for the first entry of the trace */
3664 	if (iter->idx > 1)
3665 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3666 				iter->cpu);
3667 }
3668 
3669 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3670 {
3671 	struct trace_array *tr = iter->tr;
3672 	struct trace_seq *s = &iter->seq;
3673 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3674 	struct trace_entry *entry;
3675 	struct trace_event *event;
3676 
3677 	entry = iter->ent;
3678 
3679 	test_cpu_buff_start(iter);
3680 
3681 	event = ftrace_find_event(entry->type);
3682 
3683 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3684 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3685 			trace_print_lat_context(iter);
3686 		else
3687 			trace_print_context(iter);
3688 	}
3689 
3690 	if (trace_seq_has_overflowed(s))
3691 		return TRACE_TYPE_PARTIAL_LINE;
3692 
3693 	if (event)
3694 		return event->funcs->trace(iter, sym_flags, event);
3695 
3696 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3697 
3698 	return trace_handle_return(s);
3699 }
3700 
3701 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3702 {
3703 	struct trace_array *tr = iter->tr;
3704 	struct trace_seq *s = &iter->seq;
3705 	struct trace_entry *entry;
3706 	struct trace_event *event;
3707 
3708 	entry = iter->ent;
3709 
3710 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3711 		trace_seq_printf(s, "%d %d %llu ",
3712 				 entry->pid, iter->cpu, iter->ts);
3713 
3714 	if (trace_seq_has_overflowed(s))
3715 		return TRACE_TYPE_PARTIAL_LINE;
3716 
3717 	event = ftrace_find_event(entry->type);
3718 	if (event)
3719 		return event->funcs->raw(iter, 0, event);
3720 
3721 	trace_seq_printf(s, "%d ?\n", entry->type);
3722 
3723 	return trace_handle_return(s);
3724 }
3725 
3726 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3727 {
3728 	struct trace_array *tr = iter->tr;
3729 	struct trace_seq *s = &iter->seq;
3730 	unsigned char newline = '\n';
3731 	struct trace_entry *entry;
3732 	struct trace_event *event;
3733 
3734 	entry = iter->ent;
3735 
3736 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3737 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3738 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3739 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3740 		if (trace_seq_has_overflowed(s))
3741 			return TRACE_TYPE_PARTIAL_LINE;
3742 	}
3743 
3744 	event = ftrace_find_event(entry->type);
3745 	if (event) {
3746 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3747 		if (ret != TRACE_TYPE_HANDLED)
3748 			return ret;
3749 	}
3750 
3751 	SEQ_PUT_FIELD(s, newline);
3752 
3753 	return trace_handle_return(s);
3754 }
3755 
3756 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3757 {
3758 	struct trace_array *tr = iter->tr;
3759 	struct trace_seq *s = &iter->seq;
3760 	struct trace_entry *entry;
3761 	struct trace_event *event;
3762 
3763 	entry = iter->ent;
3764 
3765 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3766 		SEQ_PUT_FIELD(s, entry->pid);
3767 		SEQ_PUT_FIELD(s, iter->cpu);
3768 		SEQ_PUT_FIELD(s, iter->ts);
3769 		if (trace_seq_has_overflowed(s))
3770 			return TRACE_TYPE_PARTIAL_LINE;
3771 	}
3772 
3773 	event = ftrace_find_event(entry->type);
3774 	return event ? event->funcs->binary(iter, 0, event) :
3775 		TRACE_TYPE_HANDLED;
3776 }
3777 
3778 int trace_empty(struct trace_iterator *iter)
3779 {
3780 	struct ring_buffer_iter *buf_iter;
3781 	int cpu;
3782 
3783 	/* If we are looking at one CPU buffer, only check that one */
3784 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3785 		cpu = iter->cpu_file;
3786 		buf_iter = trace_buffer_iter(iter, cpu);
3787 		if (buf_iter) {
3788 			if (!ring_buffer_iter_empty(buf_iter))
3789 				return 0;
3790 		} else {
3791 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3792 				return 0;
3793 		}
3794 		return 1;
3795 	}
3796 
3797 	for_each_tracing_cpu(cpu) {
3798 		buf_iter = trace_buffer_iter(iter, cpu);
3799 		if (buf_iter) {
3800 			if (!ring_buffer_iter_empty(buf_iter))
3801 				return 0;
3802 		} else {
3803 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3804 				return 0;
3805 		}
3806 	}
3807 
3808 	return 1;
3809 }
3810 
3811 /*  Called with trace_event_read_lock() held. */
3812 enum print_line_t print_trace_line(struct trace_iterator *iter)
3813 {
3814 	struct trace_array *tr = iter->tr;
3815 	unsigned long trace_flags = tr->trace_flags;
3816 	enum print_line_t ret;
3817 
3818 	if (iter->lost_events) {
3819 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3820 				 iter->cpu, iter->lost_events);
3821 		if (trace_seq_has_overflowed(&iter->seq))
3822 			return TRACE_TYPE_PARTIAL_LINE;
3823 	}
3824 
3825 	if (iter->trace && iter->trace->print_line) {
3826 		ret = iter->trace->print_line(iter);
3827 		if (ret != TRACE_TYPE_UNHANDLED)
3828 			return ret;
3829 	}
3830 
3831 	if (iter->ent->type == TRACE_BPUTS &&
3832 			trace_flags & TRACE_ITER_PRINTK &&
3833 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3834 		return trace_print_bputs_msg_only(iter);
3835 
3836 	if (iter->ent->type == TRACE_BPRINT &&
3837 			trace_flags & TRACE_ITER_PRINTK &&
3838 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3839 		return trace_print_bprintk_msg_only(iter);
3840 
3841 	if (iter->ent->type == TRACE_PRINT &&
3842 			trace_flags & TRACE_ITER_PRINTK &&
3843 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3844 		return trace_print_printk_msg_only(iter);
3845 
3846 	if (trace_flags & TRACE_ITER_BIN)
3847 		return print_bin_fmt(iter);
3848 
3849 	if (trace_flags & TRACE_ITER_HEX)
3850 		return print_hex_fmt(iter);
3851 
3852 	if (trace_flags & TRACE_ITER_RAW)
3853 		return print_raw_fmt(iter);
3854 
3855 	return print_trace_fmt(iter);
3856 }
3857 
3858 void trace_latency_header(struct seq_file *m)
3859 {
3860 	struct trace_iterator *iter = m->private;
3861 	struct trace_array *tr = iter->tr;
3862 
3863 	/* print nothing if the buffers are empty */
3864 	if (trace_empty(iter))
3865 		return;
3866 
3867 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3868 		print_trace_header(m, iter);
3869 
3870 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3871 		print_lat_help_header(m);
3872 }
3873 
3874 void trace_default_header(struct seq_file *m)
3875 {
3876 	struct trace_iterator *iter = m->private;
3877 	struct trace_array *tr = iter->tr;
3878 	unsigned long trace_flags = tr->trace_flags;
3879 
3880 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3881 		return;
3882 
3883 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3884 		/* print nothing if the buffers are empty */
3885 		if (trace_empty(iter))
3886 			return;
3887 		print_trace_header(m, iter);
3888 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3889 			print_lat_help_header(m);
3890 	} else {
3891 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3892 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3893 				print_func_help_header_irq(iter->trace_buffer,
3894 							   m, trace_flags);
3895 			else
3896 				print_func_help_header(iter->trace_buffer, m,
3897 						       trace_flags);
3898 		}
3899 	}
3900 }
3901 
3902 static void test_ftrace_alive(struct seq_file *m)
3903 {
3904 	if (!ftrace_is_dead())
3905 		return;
3906 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3907 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3908 }
3909 
3910 #ifdef CONFIG_TRACER_MAX_TRACE
3911 static void show_snapshot_main_help(struct seq_file *m)
3912 {
3913 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3914 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3915 		    "#                      Takes a snapshot of the main buffer.\n"
3916 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3917 		    "#                      (Doesn't have to be '2' works with any number that\n"
3918 		    "#                       is not a '0' or '1')\n");
3919 }
3920 
3921 static void show_snapshot_percpu_help(struct seq_file *m)
3922 {
3923 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3924 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3925 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3926 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3927 #else
3928 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3929 		    "#                     Must use main snapshot file to allocate.\n");
3930 #endif
3931 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3932 		    "#                      (Doesn't have to be '2' works with any number that\n"
3933 		    "#                       is not a '0' or '1')\n");
3934 }
3935 
3936 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3937 {
3938 	if (iter->tr->allocated_snapshot)
3939 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3940 	else
3941 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3942 
3943 	seq_puts(m, "# Snapshot commands:\n");
3944 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3945 		show_snapshot_main_help(m);
3946 	else
3947 		show_snapshot_percpu_help(m);
3948 }
3949 #else
3950 /* Should never be called */
3951 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3952 #endif
3953 
3954 static int s_show(struct seq_file *m, void *v)
3955 {
3956 	struct trace_iterator *iter = v;
3957 	int ret;
3958 
3959 	if (iter->ent == NULL) {
3960 		if (iter->tr) {
3961 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3962 			seq_puts(m, "#\n");
3963 			test_ftrace_alive(m);
3964 		}
3965 		if (iter->snapshot && trace_empty(iter))
3966 			print_snapshot_help(m, iter);
3967 		else if (iter->trace && iter->trace->print_header)
3968 			iter->trace->print_header(m);
3969 		else
3970 			trace_default_header(m);
3971 
3972 	} else if (iter->leftover) {
3973 		/*
3974 		 * If we filled the seq_file buffer earlier, we
3975 		 * want to just show it now.
3976 		 */
3977 		ret = trace_print_seq(m, &iter->seq);
3978 
3979 		/* ret should this time be zero, but you never know */
3980 		iter->leftover = ret;
3981 
3982 	} else {
3983 		print_trace_line(iter);
3984 		ret = trace_print_seq(m, &iter->seq);
3985 		/*
3986 		 * If we overflow the seq_file buffer, then it will
3987 		 * ask us for this data again at start up.
3988 		 * Use that instead.
3989 		 *  ret is 0 if seq_file write succeeded.
3990 		 *        -1 otherwise.
3991 		 */
3992 		iter->leftover = ret;
3993 	}
3994 
3995 	return 0;
3996 }
3997 
3998 /*
3999  * Should be used after trace_array_get(), trace_types_lock
4000  * ensures that i_cdev was already initialized.
4001  */
4002 static inline int tracing_get_cpu(struct inode *inode)
4003 {
4004 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4005 		return (long)inode->i_cdev - 1;
4006 	return RING_BUFFER_ALL_CPUS;
4007 }
4008 
4009 static const struct seq_operations tracer_seq_ops = {
4010 	.start		= s_start,
4011 	.next		= s_next,
4012 	.stop		= s_stop,
4013 	.show		= s_show,
4014 };
4015 
4016 static struct trace_iterator *
4017 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4018 {
4019 	struct trace_array *tr = inode->i_private;
4020 	struct trace_iterator *iter;
4021 	int cpu;
4022 
4023 	if (tracing_disabled)
4024 		return ERR_PTR(-ENODEV);
4025 
4026 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4027 	if (!iter)
4028 		return ERR_PTR(-ENOMEM);
4029 
4030 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4031 				    GFP_KERNEL);
4032 	if (!iter->buffer_iter)
4033 		goto release;
4034 
4035 	/*
4036 	 * We make a copy of the current tracer to avoid concurrent
4037 	 * changes on it while we are reading.
4038 	 */
4039 	mutex_lock(&trace_types_lock);
4040 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4041 	if (!iter->trace)
4042 		goto fail;
4043 
4044 	*iter->trace = *tr->current_trace;
4045 
4046 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4047 		goto fail;
4048 
4049 	iter->tr = tr;
4050 
4051 #ifdef CONFIG_TRACER_MAX_TRACE
4052 	/* Currently only the top directory has a snapshot */
4053 	if (tr->current_trace->print_max || snapshot)
4054 		iter->trace_buffer = &tr->max_buffer;
4055 	else
4056 #endif
4057 		iter->trace_buffer = &tr->trace_buffer;
4058 	iter->snapshot = snapshot;
4059 	iter->pos = -1;
4060 	iter->cpu_file = tracing_get_cpu(inode);
4061 	mutex_init(&iter->mutex);
4062 
4063 	/* Notify the tracer early; before we stop tracing. */
4064 	if (iter->trace && iter->trace->open)
4065 		iter->trace->open(iter);
4066 
4067 	/* Annotate start of buffers if we had overruns */
4068 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
4069 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4070 
4071 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4072 	if (trace_clocks[tr->clock_id].in_ns)
4073 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4074 
4075 	/* stop the trace while dumping if we are not opening "snapshot" */
4076 	if (!iter->snapshot)
4077 		tracing_stop_tr(tr);
4078 
4079 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4080 		for_each_tracing_cpu(cpu) {
4081 			iter->buffer_iter[cpu] =
4082 				ring_buffer_read_prepare(iter->trace_buffer->buffer,
4083 							 cpu, GFP_KERNEL);
4084 		}
4085 		ring_buffer_read_prepare_sync();
4086 		for_each_tracing_cpu(cpu) {
4087 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4088 			tracing_iter_reset(iter, cpu);
4089 		}
4090 	} else {
4091 		cpu = iter->cpu_file;
4092 		iter->buffer_iter[cpu] =
4093 			ring_buffer_read_prepare(iter->trace_buffer->buffer,
4094 						 cpu, GFP_KERNEL);
4095 		ring_buffer_read_prepare_sync();
4096 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4097 		tracing_iter_reset(iter, cpu);
4098 	}
4099 
4100 	mutex_unlock(&trace_types_lock);
4101 
4102 	return iter;
4103 
4104  fail:
4105 	mutex_unlock(&trace_types_lock);
4106 	kfree(iter->trace);
4107 	kfree(iter->buffer_iter);
4108 release:
4109 	seq_release_private(inode, file);
4110 	return ERR_PTR(-ENOMEM);
4111 }
4112 
4113 int tracing_open_generic(struct inode *inode, struct file *filp)
4114 {
4115 	if (tracing_disabled)
4116 		return -ENODEV;
4117 
4118 	filp->private_data = inode->i_private;
4119 	return 0;
4120 }
4121 
4122 bool tracing_is_disabled(void)
4123 {
4124 	return (tracing_disabled) ? true: false;
4125 }
4126 
4127 /*
4128  * Open and update trace_array ref count.
4129  * Must have the current trace_array passed to it.
4130  */
4131 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4132 {
4133 	struct trace_array *tr = inode->i_private;
4134 
4135 	if (tracing_disabled)
4136 		return -ENODEV;
4137 
4138 	if (trace_array_get(tr) < 0)
4139 		return -ENODEV;
4140 
4141 	filp->private_data = inode->i_private;
4142 
4143 	return 0;
4144 }
4145 
4146 static int tracing_release(struct inode *inode, struct file *file)
4147 {
4148 	struct trace_array *tr = inode->i_private;
4149 	struct seq_file *m = file->private_data;
4150 	struct trace_iterator *iter;
4151 	int cpu;
4152 
4153 	if (!(file->f_mode & FMODE_READ)) {
4154 		trace_array_put(tr);
4155 		return 0;
4156 	}
4157 
4158 	/* Writes do not use seq_file */
4159 	iter = m->private;
4160 	mutex_lock(&trace_types_lock);
4161 
4162 	for_each_tracing_cpu(cpu) {
4163 		if (iter->buffer_iter[cpu])
4164 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4165 	}
4166 
4167 	if (iter->trace && iter->trace->close)
4168 		iter->trace->close(iter);
4169 
4170 	if (!iter->snapshot)
4171 		/* reenable tracing if it was previously enabled */
4172 		tracing_start_tr(tr);
4173 
4174 	__trace_array_put(tr);
4175 
4176 	mutex_unlock(&trace_types_lock);
4177 
4178 	mutex_destroy(&iter->mutex);
4179 	free_cpumask_var(iter->started);
4180 	kfree(iter->trace);
4181 	kfree(iter->buffer_iter);
4182 	seq_release_private(inode, file);
4183 
4184 	return 0;
4185 }
4186 
4187 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4188 {
4189 	struct trace_array *tr = inode->i_private;
4190 
4191 	trace_array_put(tr);
4192 	return 0;
4193 }
4194 
4195 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4196 {
4197 	struct trace_array *tr = inode->i_private;
4198 
4199 	trace_array_put(tr);
4200 
4201 	return single_release(inode, file);
4202 }
4203 
4204 static int tracing_open(struct inode *inode, struct file *file)
4205 {
4206 	struct trace_array *tr = inode->i_private;
4207 	struct trace_iterator *iter;
4208 	int ret = 0;
4209 
4210 	if (trace_array_get(tr) < 0)
4211 		return -ENODEV;
4212 
4213 	/* If this file was open for write, then erase contents */
4214 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4215 		int cpu = tracing_get_cpu(inode);
4216 		struct trace_buffer *trace_buf = &tr->trace_buffer;
4217 
4218 #ifdef CONFIG_TRACER_MAX_TRACE
4219 		if (tr->current_trace->print_max)
4220 			trace_buf = &tr->max_buffer;
4221 #endif
4222 
4223 		if (cpu == RING_BUFFER_ALL_CPUS)
4224 			tracing_reset_online_cpus(trace_buf);
4225 		else
4226 			tracing_reset(trace_buf, cpu);
4227 	}
4228 
4229 	if (file->f_mode & FMODE_READ) {
4230 		iter = __tracing_open(inode, file, false);
4231 		if (IS_ERR(iter))
4232 			ret = PTR_ERR(iter);
4233 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4234 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4235 	}
4236 
4237 	if (ret < 0)
4238 		trace_array_put(tr);
4239 
4240 	return ret;
4241 }
4242 
4243 /*
4244  * Some tracers are not suitable for instance buffers.
4245  * A tracer is always available for the global array (toplevel)
4246  * or if it explicitly states that it is.
4247  */
4248 static bool
4249 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4250 {
4251 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4252 }
4253 
4254 /* Find the next tracer that this trace array may use */
4255 static struct tracer *
4256 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4257 {
4258 	while (t && !trace_ok_for_array(t, tr))
4259 		t = t->next;
4260 
4261 	return t;
4262 }
4263 
4264 static void *
4265 t_next(struct seq_file *m, void *v, loff_t *pos)
4266 {
4267 	struct trace_array *tr = m->private;
4268 	struct tracer *t = v;
4269 
4270 	(*pos)++;
4271 
4272 	if (t)
4273 		t = get_tracer_for_array(tr, t->next);
4274 
4275 	return t;
4276 }
4277 
4278 static void *t_start(struct seq_file *m, loff_t *pos)
4279 {
4280 	struct trace_array *tr = m->private;
4281 	struct tracer *t;
4282 	loff_t l = 0;
4283 
4284 	mutex_lock(&trace_types_lock);
4285 
4286 	t = get_tracer_for_array(tr, trace_types);
4287 	for (; t && l < *pos; t = t_next(m, t, &l))
4288 			;
4289 
4290 	return t;
4291 }
4292 
4293 static void t_stop(struct seq_file *m, void *p)
4294 {
4295 	mutex_unlock(&trace_types_lock);
4296 }
4297 
4298 static int t_show(struct seq_file *m, void *v)
4299 {
4300 	struct tracer *t = v;
4301 
4302 	if (!t)
4303 		return 0;
4304 
4305 	seq_puts(m, t->name);
4306 	if (t->next)
4307 		seq_putc(m, ' ');
4308 	else
4309 		seq_putc(m, '\n');
4310 
4311 	return 0;
4312 }
4313 
4314 static const struct seq_operations show_traces_seq_ops = {
4315 	.start		= t_start,
4316 	.next		= t_next,
4317 	.stop		= t_stop,
4318 	.show		= t_show,
4319 };
4320 
4321 static int show_traces_open(struct inode *inode, struct file *file)
4322 {
4323 	struct trace_array *tr = inode->i_private;
4324 	struct seq_file *m;
4325 	int ret;
4326 
4327 	if (tracing_disabled)
4328 		return -ENODEV;
4329 
4330 	ret = seq_open(file, &show_traces_seq_ops);
4331 	if (ret)
4332 		return ret;
4333 
4334 	m = file->private_data;
4335 	m->private = tr;
4336 
4337 	return 0;
4338 }
4339 
4340 static ssize_t
4341 tracing_write_stub(struct file *filp, const char __user *ubuf,
4342 		   size_t count, loff_t *ppos)
4343 {
4344 	return count;
4345 }
4346 
4347 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4348 {
4349 	int ret;
4350 
4351 	if (file->f_mode & FMODE_READ)
4352 		ret = seq_lseek(file, offset, whence);
4353 	else
4354 		file->f_pos = ret = 0;
4355 
4356 	return ret;
4357 }
4358 
4359 static const struct file_operations tracing_fops = {
4360 	.open		= tracing_open,
4361 	.read		= seq_read,
4362 	.write		= tracing_write_stub,
4363 	.llseek		= tracing_lseek,
4364 	.release	= tracing_release,
4365 };
4366 
4367 static const struct file_operations show_traces_fops = {
4368 	.open		= show_traces_open,
4369 	.read		= seq_read,
4370 	.release	= seq_release,
4371 	.llseek		= seq_lseek,
4372 };
4373 
4374 static ssize_t
4375 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4376 		     size_t count, loff_t *ppos)
4377 {
4378 	struct trace_array *tr = file_inode(filp)->i_private;
4379 	char *mask_str;
4380 	int len;
4381 
4382 	len = snprintf(NULL, 0, "%*pb\n",
4383 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4384 	mask_str = kmalloc(len, GFP_KERNEL);
4385 	if (!mask_str)
4386 		return -ENOMEM;
4387 
4388 	len = snprintf(mask_str, len, "%*pb\n",
4389 		       cpumask_pr_args(tr->tracing_cpumask));
4390 	if (len >= count) {
4391 		count = -EINVAL;
4392 		goto out_err;
4393 	}
4394 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4395 
4396 out_err:
4397 	kfree(mask_str);
4398 
4399 	return count;
4400 }
4401 
4402 static ssize_t
4403 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4404 		      size_t count, loff_t *ppos)
4405 {
4406 	struct trace_array *tr = file_inode(filp)->i_private;
4407 	cpumask_var_t tracing_cpumask_new;
4408 	int err, cpu;
4409 
4410 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4411 		return -ENOMEM;
4412 
4413 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4414 	if (err)
4415 		goto err_unlock;
4416 
4417 	local_irq_disable();
4418 	arch_spin_lock(&tr->max_lock);
4419 	for_each_tracing_cpu(cpu) {
4420 		/*
4421 		 * Increase/decrease the disabled counter if we are
4422 		 * about to flip a bit in the cpumask:
4423 		 */
4424 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4425 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4426 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4427 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4428 		}
4429 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4430 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4431 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4432 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4433 		}
4434 	}
4435 	arch_spin_unlock(&tr->max_lock);
4436 	local_irq_enable();
4437 
4438 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4439 	free_cpumask_var(tracing_cpumask_new);
4440 
4441 	return count;
4442 
4443 err_unlock:
4444 	free_cpumask_var(tracing_cpumask_new);
4445 
4446 	return err;
4447 }
4448 
4449 static const struct file_operations tracing_cpumask_fops = {
4450 	.open		= tracing_open_generic_tr,
4451 	.read		= tracing_cpumask_read,
4452 	.write		= tracing_cpumask_write,
4453 	.release	= tracing_release_generic_tr,
4454 	.llseek		= generic_file_llseek,
4455 };
4456 
4457 static int tracing_trace_options_show(struct seq_file *m, void *v)
4458 {
4459 	struct tracer_opt *trace_opts;
4460 	struct trace_array *tr = m->private;
4461 	u32 tracer_flags;
4462 	int i;
4463 
4464 	mutex_lock(&trace_types_lock);
4465 	tracer_flags = tr->current_trace->flags->val;
4466 	trace_opts = tr->current_trace->flags->opts;
4467 
4468 	for (i = 0; trace_options[i]; i++) {
4469 		if (tr->trace_flags & (1 << i))
4470 			seq_printf(m, "%s\n", trace_options[i]);
4471 		else
4472 			seq_printf(m, "no%s\n", trace_options[i]);
4473 	}
4474 
4475 	for (i = 0; trace_opts[i].name; i++) {
4476 		if (tracer_flags & trace_opts[i].bit)
4477 			seq_printf(m, "%s\n", trace_opts[i].name);
4478 		else
4479 			seq_printf(m, "no%s\n", trace_opts[i].name);
4480 	}
4481 	mutex_unlock(&trace_types_lock);
4482 
4483 	return 0;
4484 }
4485 
4486 static int __set_tracer_option(struct trace_array *tr,
4487 			       struct tracer_flags *tracer_flags,
4488 			       struct tracer_opt *opts, int neg)
4489 {
4490 	struct tracer *trace = tracer_flags->trace;
4491 	int ret;
4492 
4493 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4494 	if (ret)
4495 		return ret;
4496 
4497 	if (neg)
4498 		tracer_flags->val &= ~opts->bit;
4499 	else
4500 		tracer_flags->val |= opts->bit;
4501 	return 0;
4502 }
4503 
4504 /* Try to assign a tracer specific option */
4505 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4506 {
4507 	struct tracer *trace = tr->current_trace;
4508 	struct tracer_flags *tracer_flags = trace->flags;
4509 	struct tracer_opt *opts = NULL;
4510 	int i;
4511 
4512 	for (i = 0; tracer_flags->opts[i].name; i++) {
4513 		opts = &tracer_flags->opts[i];
4514 
4515 		if (strcmp(cmp, opts->name) == 0)
4516 			return __set_tracer_option(tr, trace->flags, opts, neg);
4517 	}
4518 
4519 	return -EINVAL;
4520 }
4521 
4522 /* Some tracers require overwrite to stay enabled */
4523 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4524 {
4525 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4526 		return -1;
4527 
4528 	return 0;
4529 }
4530 
4531 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4532 {
4533 	/* do nothing if flag is already set */
4534 	if (!!(tr->trace_flags & mask) == !!enabled)
4535 		return 0;
4536 
4537 	/* Give the tracer a chance to approve the change */
4538 	if (tr->current_trace->flag_changed)
4539 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4540 			return -EINVAL;
4541 
4542 	if (enabled)
4543 		tr->trace_flags |= mask;
4544 	else
4545 		tr->trace_flags &= ~mask;
4546 
4547 	if (mask == TRACE_ITER_RECORD_CMD)
4548 		trace_event_enable_cmd_record(enabled);
4549 
4550 	if (mask == TRACE_ITER_RECORD_TGID) {
4551 		if (!tgid_map)
4552 			tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4553 					   sizeof(*tgid_map),
4554 					   GFP_KERNEL);
4555 		if (!tgid_map) {
4556 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4557 			return -ENOMEM;
4558 		}
4559 
4560 		trace_event_enable_tgid_record(enabled);
4561 	}
4562 
4563 	if (mask == TRACE_ITER_EVENT_FORK)
4564 		trace_event_follow_fork(tr, enabled);
4565 
4566 	if (mask == TRACE_ITER_FUNC_FORK)
4567 		ftrace_pid_follow_fork(tr, enabled);
4568 
4569 	if (mask == TRACE_ITER_OVERWRITE) {
4570 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4571 #ifdef CONFIG_TRACER_MAX_TRACE
4572 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4573 #endif
4574 	}
4575 
4576 	if (mask == TRACE_ITER_PRINTK) {
4577 		trace_printk_start_stop_comm(enabled);
4578 		trace_printk_control(enabled);
4579 	}
4580 
4581 	return 0;
4582 }
4583 
4584 static int trace_set_options(struct trace_array *tr, char *option)
4585 {
4586 	char *cmp;
4587 	int neg = 0;
4588 	int ret;
4589 	size_t orig_len = strlen(option);
4590 	int len;
4591 
4592 	cmp = strstrip(option);
4593 
4594 	len = str_has_prefix(cmp, "no");
4595 	if (len)
4596 		neg = 1;
4597 
4598 	cmp += len;
4599 
4600 	mutex_lock(&trace_types_lock);
4601 
4602 	ret = match_string(trace_options, -1, cmp);
4603 	/* If no option could be set, test the specific tracer options */
4604 	if (ret < 0)
4605 		ret = set_tracer_option(tr, cmp, neg);
4606 	else
4607 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4608 
4609 	mutex_unlock(&trace_types_lock);
4610 
4611 	/*
4612 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4613 	 * turn it back into a space.
4614 	 */
4615 	if (orig_len > strlen(option))
4616 		option[strlen(option)] = ' ';
4617 
4618 	return ret;
4619 }
4620 
4621 static void __init apply_trace_boot_options(void)
4622 {
4623 	char *buf = trace_boot_options_buf;
4624 	char *option;
4625 
4626 	while (true) {
4627 		option = strsep(&buf, ",");
4628 
4629 		if (!option)
4630 			break;
4631 
4632 		if (*option)
4633 			trace_set_options(&global_trace, option);
4634 
4635 		/* Put back the comma to allow this to be called again */
4636 		if (buf)
4637 			*(buf - 1) = ',';
4638 	}
4639 }
4640 
4641 static ssize_t
4642 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4643 			size_t cnt, loff_t *ppos)
4644 {
4645 	struct seq_file *m = filp->private_data;
4646 	struct trace_array *tr = m->private;
4647 	char buf[64];
4648 	int ret;
4649 
4650 	if (cnt >= sizeof(buf))
4651 		return -EINVAL;
4652 
4653 	if (copy_from_user(buf, ubuf, cnt))
4654 		return -EFAULT;
4655 
4656 	buf[cnt] = 0;
4657 
4658 	ret = trace_set_options(tr, buf);
4659 	if (ret < 0)
4660 		return ret;
4661 
4662 	*ppos += cnt;
4663 
4664 	return cnt;
4665 }
4666 
4667 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4668 {
4669 	struct trace_array *tr = inode->i_private;
4670 	int ret;
4671 
4672 	if (tracing_disabled)
4673 		return -ENODEV;
4674 
4675 	if (trace_array_get(tr) < 0)
4676 		return -ENODEV;
4677 
4678 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4679 	if (ret < 0)
4680 		trace_array_put(tr);
4681 
4682 	return ret;
4683 }
4684 
4685 static const struct file_operations tracing_iter_fops = {
4686 	.open		= tracing_trace_options_open,
4687 	.read		= seq_read,
4688 	.llseek		= seq_lseek,
4689 	.release	= tracing_single_release_tr,
4690 	.write		= tracing_trace_options_write,
4691 };
4692 
4693 static const char readme_msg[] =
4694 	"tracing mini-HOWTO:\n\n"
4695 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4696 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4697 	" Important files:\n"
4698 	"  trace\t\t\t- The static contents of the buffer\n"
4699 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4700 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4701 	"  current_tracer\t- function and latency tracers\n"
4702 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4703 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4704 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4705 	"  trace_clock\t\t-change the clock used to order events\n"
4706 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4707 	"      global:   Synced across CPUs but slows tracing down.\n"
4708 	"     counter:   Not a clock, but just an increment\n"
4709 	"      uptime:   Jiffy counter from time of boot\n"
4710 	"        perf:   Same clock that perf events use\n"
4711 #ifdef CONFIG_X86_64
4712 	"     x86-tsc:   TSC cycle counter\n"
4713 #endif
4714 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
4715 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4716 	"    absolute:   Absolute (standalone) timestamp\n"
4717 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4718 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4719 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4720 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4721 	"\t\t\t  Remove sub-buffer with rmdir\n"
4722 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4723 	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4724 	"\t\t\t  option name\n"
4725 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4726 #ifdef CONFIG_DYNAMIC_FTRACE
4727 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4728 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4729 	"\t\t\t  functions\n"
4730 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4731 	"\t     modules: Can select a group via module\n"
4732 	"\t      Format: :mod:<module-name>\n"
4733 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4734 	"\t    triggers: a command to perform when function is hit\n"
4735 	"\t      Format: <function>:<trigger>[:count]\n"
4736 	"\t     trigger: traceon, traceoff\n"
4737 	"\t\t      enable_event:<system>:<event>\n"
4738 	"\t\t      disable_event:<system>:<event>\n"
4739 #ifdef CONFIG_STACKTRACE
4740 	"\t\t      stacktrace\n"
4741 #endif
4742 #ifdef CONFIG_TRACER_SNAPSHOT
4743 	"\t\t      snapshot\n"
4744 #endif
4745 	"\t\t      dump\n"
4746 	"\t\t      cpudump\n"
4747 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4748 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4749 	"\t     The first one will disable tracing every time do_fault is hit\n"
4750 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4751 	"\t       The first time do trap is hit and it disables tracing, the\n"
4752 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4753 	"\t       the counter will not decrement. It only decrements when the\n"
4754 	"\t       trigger did work\n"
4755 	"\t     To remove trigger without count:\n"
4756 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4757 	"\t     To remove trigger with a count:\n"
4758 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4759 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4760 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4761 	"\t    modules: Can select a group via module command :mod:\n"
4762 	"\t    Does not accept triggers\n"
4763 #endif /* CONFIG_DYNAMIC_FTRACE */
4764 #ifdef CONFIG_FUNCTION_TRACER
4765 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4766 	"\t\t    (function)\n"
4767 #endif
4768 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4769 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4770 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4771 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4772 #endif
4773 #ifdef CONFIG_TRACER_SNAPSHOT
4774 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4775 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4776 	"\t\t\t  information\n"
4777 #endif
4778 #ifdef CONFIG_STACK_TRACER
4779 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4780 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4781 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4782 	"\t\t\t  new trace)\n"
4783 #ifdef CONFIG_DYNAMIC_FTRACE
4784 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4785 	"\t\t\t  traces\n"
4786 #endif
4787 #endif /* CONFIG_STACK_TRACER */
4788 #ifdef CONFIG_DYNAMIC_EVENTS
4789 	"  dynamic_events\t\t- Add/remove/show the generic dynamic events\n"
4790 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4791 #endif
4792 #ifdef CONFIG_KPROBE_EVENTS
4793 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4794 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4795 #endif
4796 #ifdef CONFIG_UPROBE_EVENTS
4797 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4798 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4799 #endif
4800 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4801 	"\t  accepts: event-definitions (one definition per line)\n"
4802 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4803 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4804 #ifdef CONFIG_HIST_TRIGGERS
4805 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
4806 #endif
4807 	"\t           -:[<group>/]<event>\n"
4808 #ifdef CONFIG_KPROBE_EVENTS
4809 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4810   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4811 #endif
4812 #ifdef CONFIG_UPROBE_EVENTS
4813   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4814 #endif
4815 	"\t     args: <name>=fetcharg[:type]\n"
4816 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4817 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4818 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>\n"
4819 #else
4820 	"\t           $stack<index>, $stack, $retval, $comm\n"
4821 #endif
4822 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4823 	"\t           b<bit-width>@<bit-offset>/<container-size>,\n"
4824 	"\t           <type>\\[<array-size>\\]\n"
4825 #ifdef CONFIG_HIST_TRIGGERS
4826 	"\t    field: <stype> <name>;\n"
4827 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4828 	"\t           [unsigned] char/int/long\n"
4829 #endif
4830 #endif
4831 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4832 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4833 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4834 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4835 	"\t\t\t  events\n"
4836 	"      filter\t\t- If set, only events passing filter are traced\n"
4837 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4838 	"\t\t\t  <event>:\n"
4839 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4840 	"      filter\t\t- If set, only events passing filter are traced\n"
4841 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4842 	"\t    Format: <trigger>[:count][if <filter>]\n"
4843 	"\t   trigger: traceon, traceoff\n"
4844 	"\t            enable_event:<system>:<event>\n"
4845 	"\t            disable_event:<system>:<event>\n"
4846 #ifdef CONFIG_HIST_TRIGGERS
4847 	"\t            enable_hist:<system>:<event>\n"
4848 	"\t            disable_hist:<system>:<event>\n"
4849 #endif
4850 #ifdef CONFIG_STACKTRACE
4851 	"\t\t    stacktrace\n"
4852 #endif
4853 #ifdef CONFIG_TRACER_SNAPSHOT
4854 	"\t\t    snapshot\n"
4855 #endif
4856 #ifdef CONFIG_HIST_TRIGGERS
4857 	"\t\t    hist (see below)\n"
4858 #endif
4859 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4860 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4861 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4862 	"\t                  events/block/block_unplug/trigger\n"
4863 	"\t   The first disables tracing every time block_unplug is hit.\n"
4864 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4865 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4866 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4867 	"\t   Like function triggers, the counter is only decremented if it\n"
4868 	"\t    enabled or disabled tracing.\n"
4869 	"\t   To remove a trigger without a count:\n"
4870 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4871 	"\t   To remove a trigger with a count:\n"
4872 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4873 	"\t   Filters can be ignored when removing a trigger.\n"
4874 #ifdef CONFIG_HIST_TRIGGERS
4875 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4876 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4877 	"\t            [:values=<field1[,field2,...]>]\n"
4878 	"\t            [:sort=<field1[,field2,...]>]\n"
4879 	"\t            [:size=#entries]\n"
4880 	"\t            [:pause][:continue][:clear]\n"
4881 	"\t            [:name=histname1]\n"
4882 	"\t            [:<handler>.<action>]\n"
4883 	"\t            [if <filter>]\n\n"
4884 	"\t    When a matching event is hit, an entry is added to a hash\n"
4885 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4886 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4887 	"\t    correspond to fields in the event's format description.  Keys\n"
4888 	"\t    can be any field, or the special string 'stacktrace'.\n"
4889 	"\t    Compound keys consisting of up to two fields can be specified\n"
4890 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4891 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4892 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4893 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4894 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4895 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4896 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4897 	"\t    its histogram data will be shared with other triggers of the\n"
4898 	"\t    same name, and trigger hits will update this common data.\n\n"
4899 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4900 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4901 	"\t    triggers attached to an event, there will be a table for each\n"
4902 	"\t    trigger in the output.  The table displayed for a named\n"
4903 	"\t    trigger will be the same as any other instance having the\n"
4904 	"\t    same name.  The default format used to display a given field\n"
4905 	"\t    can be modified by appending any of the following modifiers\n"
4906 	"\t    to the field name, as applicable:\n\n"
4907 	"\t            .hex        display a number as a hex value\n"
4908 	"\t            .sym        display an address as a symbol\n"
4909 	"\t            .sym-offset display an address as a symbol and offset\n"
4910 	"\t            .execname   display a common_pid as a program name\n"
4911 	"\t            .syscall    display a syscall id as a syscall name\n"
4912 	"\t            .log2       display log2 value rather than raw number\n"
4913 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
4914 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4915 	"\t    trigger or to start a hist trigger but not log any events\n"
4916 	"\t    until told to do so.  'continue' can be used to start or\n"
4917 	"\t    restart a paused hist trigger.\n\n"
4918 	"\t    The 'clear' parameter will clear the contents of a running\n"
4919 	"\t    hist trigger and leave its current paused/active state\n"
4920 	"\t    unchanged.\n\n"
4921 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4922 	"\t    have one event conditionally start and stop another event's\n"
4923 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
4924 	"\t    the enable_event and disable_event triggers.\n\n"
4925 	"\t    Hist trigger handlers and actions are executed whenever a\n"
4926 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
4927 	"\t        <handler>.<action>\n\n"
4928 	"\t    The available handlers are:\n\n"
4929 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
4930 	"\t        onmax(var)               - invoke if var exceeds current max\n"
4931 	"\t        onchange(var)            - invoke action if var changes\n\n"
4932 	"\t    The available actions are:\n\n"
4933 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
4934 	"\t        save(field,...)                      - save current event fields\n"
4935 #ifdef CONFIG_TRACER_SNAPSHOT
4936 	"\t        snapshot()                           - snapshot the trace buffer\n"
4937 #endif
4938 #endif
4939 ;
4940 
4941 static ssize_t
4942 tracing_readme_read(struct file *filp, char __user *ubuf,
4943 		       size_t cnt, loff_t *ppos)
4944 {
4945 	return simple_read_from_buffer(ubuf, cnt, ppos,
4946 					readme_msg, strlen(readme_msg));
4947 }
4948 
4949 static const struct file_operations tracing_readme_fops = {
4950 	.open		= tracing_open_generic,
4951 	.read		= tracing_readme_read,
4952 	.llseek		= generic_file_llseek,
4953 };
4954 
4955 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4956 {
4957 	int *ptr = v;
4958 
4959 	if (*pos || m->count)
4960 		ptr++;
4961 
4962 	(*pos)++;
4963 
4964 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4965 		if (trace_find_tgid(*ptr))
4966 			return ptr;
4967 	}
4968 
4969 	return NULL;
4970 }
4971 
4972 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4973 {
4974 	void *v;
4975 	loff_t l = 0;
4976 
4977 	if (!tgid_map)
4978 		return NULL;
4979 
4980 	v = &tgid_map[0];
4981 	while (l <= *pos) {
4982 		v = saved_tgids_next(m, v, &l);
4983 		if (!v)
4984 			return NULL;
4985 	}
4986 
4987 	return v;
4988 }
4989 
4990 static void saved_tgids_stop(struct seq_file *m, void *v)
4991 {
4992 }
4993 
4994 static int saved_tgids_show(struct seq_file *m, void *v)
4995 {
4996 	int pid = (int *)v - tgid_map;
4997 
4998 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4999 	return 0;
5000 }
5001 
5002 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5003 	.start		= saved_tgids_start,
5004 	.stop		= saved_tgids_stop,
5005 	.next		= saved_tgids_next,
5006 	.show		= saved_tgids_show,
5007 };
5008 
5009 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5010 {
5011 	if (tracing_disabled)
5012 		return -ENODEV;
5013 
5014 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5015 }
5016 
5017 
5018 static const struct file_operations tracing_saved_tgids_fops = {
5019 	.open		= tracing_saved_tgids_open,
5020 	.read		= seq_read,
5021 	.llseek		= seq_lseek,
5022 	.release	= seq_release,
5023 };
5024 
5025 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5026 {
5027 	unsigned int *ptr = v;
5028 
5029 	if (*pos || m->count)
5030 		ptr++;
5031 
5032 	(*pos)++;
5033 
5034 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5035 	     ptr++) {
5036 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5037 			continue;
5038 
5039 		return ptr;
5040 	}
5041 
5042 	return NULL;
5043 }
5044 
5045 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5046 {
5047 	void *v;
5048 	loff_t l = 0;
5049 
5050 	preempt_disable();
5051 	arch_spin_lock(&trace_cmdline_lock);
5052 
5053 	v = &savedcmd->map_cmdline_to_pid[0];
5054 	while (l <= *pos) {
5055 		v = saved_cmdlines_next(m, v, &l);
5056 		if (!v)
5057 			return NULL;
5058 	}
5059 
5060 	return v;
5061 }
5062 
5063 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5064 {
5065 	arch_spin_unlock(&trace_cmdline_lock);
5066 	preempt_enable();
5067 }
5068 
5069 static int saved_cmdlines_show(struct seq_file *m, void *v)
5070 {
5071 	char buf[TASK_COMM_LEN];
5072 	unsigned int *pid = v;
5073 
5074 	__trace_find_cmdline(*pid, buf);
5075 	seq_printf(m, "%d %s\n", *pid, buf);
5076 	return 0;
5077 }
5078 
5079 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5080 	.start		= saved_cmdlines_start,
5081 	.next		= saved_cmdlines_next,
5082 	.stop		= saved_cmdlines_stop,
5083 	.show		= saved_cmdlines_show,
5084 };
5085 
5086 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5087 {
5088 	if (tracing_disabled)
5089 		return -ENODEV;
5090 
5091 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5092 }
5093 
5094 static const struct file_operations tracing_saved_cmdlines_fops = {
5095 	.open		= tracing_saved_cmdlines_open,
5096 	.read		= seq_read,
5097 	.llseek		= seq_lseek,
5098 	.release	= seq_release,
5099 };
5100 
5101 static ssize_t
5102 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5103 				 size_t cnt, loff_t *ppos)
5104 {
5105 	char buf[64];
5106 	int r;
5107 
5108 	arch_spin_lock(&trace_cmdline_lock);
5109 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5110 	arch_spin_unlock(&trace_cmdline_lock);
5111 
5112 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5113 }
5114 
5115 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5116 {
5117 	kfree(s->saved_cmdlines);
5118 	kfree(s->map_cmdline_to_pid);
5119 	kfree(s);
5120 }
5121 
5122 static int tracing_resize_saved_cmdlines(unsigned int val)
5123 {
5124 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5125 
5126 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5127 	if (!s)
5128 		return -ENOMEM;
5129 
5130 	if (allocate_cmdlines_buffer(val, s) < 0) {
5131 		kfree(s);
5132 		return -ENOMEM;
5133 	}
5134 
5135 	arch_spin_lock(&trace_cmdline_lock);
5136 	savedcmd_temp = savedcmd;
5137 	savedcmd = s;
5138 	arch_spin_unlock(&trace_cmdline_lock);
5139 	free_saved_cmdlines_buffer(savedcmd_temp);
5140 
5141 	return 0;
5142 }
5143 
5144 static ssize_t
5145 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5146 				  size_t cnt, loff_t *ppos)
5147 {
5148 	unsigned long val;
5149 	int ret;
5150 
5151 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5152 	if (ret)
5153 		return ret;
5154 
5155 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5156 	if (!val || val > PID_MAX_DEFAULT)
5157 		return -EINVAL;
5158 
5159 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5160 	if (ret < 0)
5161 		return ret;
5162 
5163 	*ppos += cnt;
5164 
5165 	return cnt;
5166 }
5167 
5168 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5169 	.open		= tracing_open_generic,
5170 	.read		= tracing_saved_cmdlines_size_read,
5171 	.write		= tracing_saved_cmdlines_size_write,
5172 };
5173 
5174 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5175 static union trace_eval_map_item *
5176 update_eval_map(union trace_eval_map_item *ptr)
5177 {
5178 	if (!ptr->map.eval_string) {
5179 		if (ptr->tail.next) {
5180 			ptr = ptr->tail.next;
5181 			/* Set ptr to the next real item (skip head) */
5182 			ptr++;
5183 		} else
5184 			return NULL;
5185 	}
5186 	return ptr;
5187 }
5188 
5189 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5190 {
5191 	union trace_eval_map_item *ptr = v;
5192 
5193 	/*
5194 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5195 	 * This really should never happen.
5196 	 */
5197 	ptr = update_eval_map(ptr);
5198 	if (WARN_ON_ONCE(!ptr))
5199 		return NULL;
5200 
5201 	ptr++;
5202 
5203 	(*pos)++;
5204 
5205 	ptr = update_eval_map(ptr);
5206 
5207 	return ptr;
5208 }
5209 
5210 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5211 {
5212 	union trace_eval_map_item *v;
5213 	loff_t l = 0;
5214 
5215 	mutex_lock(&trace_eval_mutex);
5216 
5217 	v = trace_eval_maps;
5218 	if (v)
5219 		v++;
5220 
5221 	while (v && l < *pos) {
5222 		v = eval_map_next(m, v, &l);
5223 	}
5224 
5225 	return v;
5226 }
5227 
5228 static void eval_map_stop(struct seq_file *m, void *v)
5229 {
5230 	mutex_unlock(&trace_eval_mutex);
5231 }
5232 
5233 static int eval_map_show(struct seq_file *m, void *v)
5234 {
5235 	union trace_eval_map_item *ptr = v;
5236 
5237 	seq_printf(m, "%s %ld (%s)\n",
5238 		   ptr->map.eval_string, ptr->map.eval_value,
5239 		   ptr->map.system);
5240 
5241 	return 0;
5242 }
5243 
5244 static const struct seq_operations tracing_eval_map_seq_ops = {
5245 	.start		= eval_map_start,
5246 	.next		= eval_map_next,
5247 	.stop		= eval_map_stop,
5248 	.show		= eval_map_show,
5249 };
5250 
5251 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5252 {
5253 	if (tracing_disabled)
5254 		return -ENODEV;
5255 
5256 	return seq_open(filp, &tracing_eval_map_seq_ops);
5257 }
5258 
5259 static const struct file_operations tracing_eval_map_fops = {
5260 	.open		= tracing_eval_map_open,
5261 	.read		= seq_read,
5262 	.llseek		= seq_lseek,
5263 	.release	= seq_release,
5264 };
5265 
5266 static inline union trace_eval_map_item *
5267 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5268 {
5269 	/* Return tail of array given the head */
5270 	return ptr + ptr->head.length + 1;
5271 }
5272 
5273 static void
5274 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5275 			   int len)
5276 {
5277 	struct trace_eval_map **stop;
5278 	struct trace_eval_map **map;
5279 	union trace_eval_map_item *map_array;
5280 	union trace_eval_map_item *ptr;
5281 
5282 	stop = start + len;
5283 
5284 	/*
5285 	 * The trace_eval_maps contains the map plus a head and tail item,
5286 	 * where the head holds the module and length of array, and the
5287 	 * tail holds a pointer to the next list.
5288 	 */
5289 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5290 	if (!map_array) {
5291 		pr_warn("Unable to allocate trace eval mapping\n");
5292 		return;
5293 	}
5294 
5295 	mutex_lock(&trace_eval_mutex);
5296 
5297 	if (!trace_eval_maps)
5298 		trace_eval_maps = map_array;
5299 	else {
5300 		ptr = trace_eval_maps;
5301 		for (;;) {
5302 			ptr = trace_eval_jmp_to_tail(ptr);
5303 			if (!ptr->tail.next)
5304 				break;
5305 			ptr = ptr->tail.next;
5306 
5307 		}
5308 		ptr->tail.next = map_array;
5309 	}
5310 	map_array->head.mod = mod;
5311 	map_array->head.length = len;
5312 	map_array++;
5313 
5314 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5315 		map_array->map = **map;
5316 		map_array++;
5317 	}
5318 	memset(map_array, 0, sizeof(*map_array));
5319 
5320 	mutex_unlock(&trace_eval_mutex);
5321 }
5322 
5323 static void trace_create_eval_file(struct dentry *d_tracer)
5324 {
5325 	trace_create_file("eval_map", 0444, d_tracer,
5326 			  NULL, &tracing_eval_map_fops);
5327 }
5328 
5329 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5330 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5331 static inline void trace_insert_eval_map_file(struct module *mod,
5332 			      struct trace_eval_map **start, int len) { }
5333 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5334 
5335 static void trace_insert_eval_map(struct module *mod,
5336 				  struct trace_eval_map **start, int len)
5337 {
5338 	struct trace_eval_map **map;
5339 
5340 	if (len <= 0)
5341 		return;
5342 
5343 	map = start;
5344 
5345 	trace_event_eval_update(map, len);
5346 
5347 	trace_insert_eval_map_file(mod, start, len);
5348 }
5349 
5350 static ssize_t
5351 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5352 		       size_t cnt, loff_t *ppos)
5353 {
5354 	struct trace_array *tr = filp->private_data;
5355 	char buf[MAX_TRACER_SIZE+2];
5356 	int r;
5357 
5358 	mutex_lock(&trace_types_lock);
5359 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5360 	mutex_unlock(&trace_types_lock);
5361 
5362 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5363 }
5364 
5365 int tracer_init(struct tracer *t, struct trace_array *tr)
5366 {
5367 	tracing_reset_online_cpus(&tr->trace_buffer);
5368 	return t->init(tr);
5369 }
5370 
5371 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5372 {
5373 	int cpu;
5374 
5375 	for_each_tracing_cpu(cpu)
5376 		per_cpu_ptr(buf->data, cpu)->entries = val;
5377 }
5378 
5379 #ifdef CONFIG_TRACER_MAX_TRACE
5380 /* resize @tr's buffer to the size of @size_tr's entries */
5381 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5382 					struct trace_buffer *size_buf, int cpu_id)
5383 {
5384 	int cpu, ret = 0;
5385 
5386 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5387 		for_each_tracing_cpu(cpu) {
5388 			ret = ring_buffer_resize(trace_buf->buffer,
5389 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5390 			if (ret < 0)
5391 				break;
5392 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5393 				per_cpu_ptr(size_buf->data, cpu)->entries;
5394 		}
5395 	} else {
5396 		ret = ring_buffer_resize(trace_buf->buffer,
5397 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5398 		if (ret == 0)
5399 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5400 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5401 	}
5402 
5403 	return ret;
5404 }
5405 #endif /* CONFIG_TRACER_MAX_TRACE */
5406 
5407 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5408 					unsigned long size, int cpu)
5409 {
5410 	int ret;
5411 
5412 	/*
5413 	 * If kernel or user changes the size of the ring buffer
5414 	 * we use the size that was given, and we can forget about
5415 	 * expanding it later.
5416 	 */
5417 	ring_buffer_expanded = true;
5418 
5419 	/* May be called before buffers are initialized */
5420 	if (!tr->trace_buffer.buffer)
5421 		return 0;
5422 
5423 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5424 	if (ret < 0)
5425 		return ret;
5426 
5427 #ifdef CONFIG_TRACER_MAX_TRACE
5428 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5429 	    !tr->current_trace->use_max_tr)
5430 		goto out;
5431 
5432 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5433 	if (ret < 0) {
5434 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5435 						     &tr->trace_buffer, cpu);
5436 		if (r < 0) {
5437 			/*
5438 			 * AARGH! We are left with different
5439 			 * size max buffer!!!!
5440 			 * The max buffer is our "snapshot" buffer.
5441 			 * When a tracer needs a snapshot (one of the
5442 			 * latency tracers), it swaps the max buffer
5443 			 * with the saved snap shot. We succeeded to
5444 			 * update the size of the main buffer, but failed to
5445 			 * update the size of the max buffer. But when we tried
5446 			 * to reset the main buffer to the original size, we
5447 			 * failed there too. This is very unlikely to
5448 			 * happen, but if it does, warn and kill all
5449 			 * tracing.
5450 			 */
5451 			WARN_ON(1);
5452 			tracing_disabled = 1;
5453 		}
5454 		return ret;
5455 	}
5456 
5457 	if (cpu == RING_BUFFER_ALL_CPUS)
5458 		set_buffer_entries(&tr->max_buffer, size);
5459 	else
5460 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5461 
5462  out:
5463 #endif /* CONFIG_TRACER_MAX_TRACE */
5464 
5465 	if (cpu == RING_BUFFER_ALL_CPUS)
5466 		set_buffer_entries(&tr->trace_buffer, size);
5467 	else
5468 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5469 
5470 	return ret;
5471 }
5472 
5473 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5474 					  unsigned long size, int cpu_id)
5475 {
5476 	int ret = size;
5477 
5478 	mutex_lock(&trace_types_lock);
5479 
5480 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5481 		/* make sure, this cpu is enabled in the mask */
5482 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5483 			ret = -EINVAL;
5484 			goto out;
5485 		}
5486 	}
5487 
5488 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5489 	if (ret < 0)
5490 		ret = -ENOMEM;
5491 
5492 out:
5493 	mutex_unlock(&trace_types_lock);
5494 
5495 	return ret;
5496 }
5497 
5498 
5499 /**
5500  * tracing_update_buffers - used by tracing facility to expand ring buffers
5501  *
5502  * To save on memory when the tracing is never used on a system with it
5503  * configured in. The ring buffers are set to a minimum size. But once
5504  * a user starts to use the tracing facility, then they need to grow
5505  * to their default size.
5506  *
5507  * This function is to be called when a tracer is about to be used.
5508  */
5509 int tracing_update_buffers(void)
5510 {
5511 	int ret = 0;
5512 
5513 	mutex_lock(&trace_types_lock);
5514 	if (!ring_buffer_expanded)
5515 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5516 						RING_BUFFER_ALL_CPUS);
5517 	mutex_unlock(&trace_types_lock);
5518 
5519 	return ret;
5520 }
5521 
5522 struct trace_option_dentry;
5523 
5524 static void
5525 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5526 
5527 /*
5528  * Used to clear out the tracer before deletion of an instance.
5529  * Must have trace_types_lock held.
5530  */
5531 static void tracing_set_nop(struct trace_array *tr)
5532 {
5533 	if (tr->current_trace == &nop_trace)
5534 		return;
5535 
5536 	tr->current_trace->enabled--;
5537 
5538 	if (tr->current_trace->reset)
5539 		tr->current_trace->reset(tr);
5540 
5541 	tr->current_trace = &nop_trace;
5542 }
5543 
5544 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5545 {
5546 	/* Only enable if the directory has been created already. */
5547 	if (!tr->dir)
5548 		return;
5549 
5550 	create_trace_option_files(tr, t);
5551 }
5552 
5553 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5554 {
5555 	struct tracer *t;
5556 #ifdef CONFIG_TRACER_MAX_TRACE
5557 	bool had_max_tr;
5558 #endif
5559 	int ret = 0;
5560 
5561 	mutex_lock(&trace_types_lock);
5562 
5563 	if (!ring_buffer_expanded) {
5564 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5565 						RING_BUFFER_ALL_CPUS);
5566 		if (ret < 0)
5567 			goto out;
5568 		ret = 0;
5569 	}
5570 
5571 	for (t = trace_types; t; t = t->next) {
5572 		if (strcmp(t->name, buf) == 0)
5573 			break;
5574 	}
5575 	if (!t) {
5576 		ret = -EINVAL;
5577 		goto out;
5578 	}
5579 	if (t == tr->current_trace)
5580 		goto out;
5581 
5582 #ifdef CONFIG_TRACER_SNAPSHOT
5583 	if (t->use_max_tr) {
5584 		arch_spin_lock(&tr->max_lock);
5585 		if (tr->cond_snapshot)
5586 			ret = -EBUSY;
5587 		arch_spin_unlock(&tr->max_lock);
5588 		if (ret)
5589 			goto out;
5590 	}
5591 #endif
5592 	/* Some tracers won't work on kernel command line */
5593 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5594 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5595 			t->name);
5596 		goto out;
5597 	}
5598 
5599 	/* Some tracers are only allowed for the top level buffer */
5600 	if (!trace_ok_for_array(t, tr)) {
5601 		ret = -EINVAL;
5602 		goto out;
5603 	}
5604 
5605 	/* If trace pipe files are being read, we can't change the tracer */
5606 	if (tr->current_trace->ref) {
5607 		ret = -EBUSY;
5608 		goto out;
5609 	}
5610 
5611 	trace_branch_disable();
5612 
5613 	tr->current_trace->enabled--;
5614 
5615 	if (tr->current_trace->reset)
5616 		tr->current_trace->reset(tr);
5617 
5618 	/* Current trace needs to be nop_trace before synchronize_rcu */
5619 	tr->current_trace = &nop_trace;
5620 
5621 #ifdef CONFIG_TRACER_MAX_TRACE
5622 	had_max_tr = tr->allocated_snapshot;
5623 
5624 	if (had_max_tr && !t->use_max_tr) {
5625 		/*
5626 		 * We need to make sure that the update_max_tr sees that
5627 		 * current_trace changed to nop_trace to keep it from
5628 		 * swapping the buffers after we resize it.
5629 		 * The update_max_tr is called from interrupts disabled
5630 		 * so a synchronized_sched() is sufficient.
5631 		 */
5632 		synchronize_rcu();
5633 		free_snapshot(tr);
5634 	}
5635 #endif
5636 
5637 #ifdef CONFIG_TRACER_MAX_TRACE
5638 	if (t->use_max_tr && !had_max_tr) {
5639 		ret = tracing_alloc_snapshot_instance(tr);
5640 		if (ret < 0)
5641 			goto out;
5642 	}
5643 #endif
5644 
5645 	if (t->init) {
5646 		ret = tracer_init(t, tr);
5647 		if (ret)
5648 			goto out;
5649 	}
5650 
5651 	tr->current_trace = t;
5652 	tr->current_trace->enabled++;
5653 	trace_branch_enable(tr);
5654  out:
5655 	mutex_unlock(&trace_types_lock);
5656 
5657 	return ret;
5658 }
5659 
5660 static ssize_t
5661 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5662 			size_t cnt, loff_t *ppos)
5663 {
5664 	struct trace_array *tr = filp->private_data;
5665 	char buf[MAX_TRACER_SIZE+1];
5666 	int i;
5667 	size_t ret;
5668 	int err;
5669 
5670 	ret = cnt;
5671 
5672 	if (cnt > MAX_TRACER_SIZE)
5673 		cnt = MAX_TRACER_SIZE;
5674 
5675 	if (copy_from_user(buf, ubuf, cnt))
5676 		return -EFAULT;
5677 
5678 	buf[cnt] = 0;
5679 
5680 	/* strip ending whitespace. */
5681 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5682 		buf[i] = 0;
5683 
5684 	err = tracing_set_tracer(tr, buf);
5685 	if (err)
5686 		return err;
5687 
5688 	*ppos += ret;
5689 
5690 	return ret;
5691 }
5692 
5693 static ssize_t
5694 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5695 		   size_t cnt, loff_t *ppos)
5696 {
5697 	char buf[64];
5698 	int r;
5699 
5700 	r = snprintf(buf, sizeof(buf), "%ld\n",
5701 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5702 	if (r > sizeof(buf))
5703 		r = sizeof(buf);
5704 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5705 }
5706 
5707 static ssize_t
5708 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5709 		    size_t cnt, loff_t *ppos)
5710 {
5711 	unsigned long val;
5712 	int ret;
5713 
5714 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5715 	if (ret)
5716 		return ret;
5717 
5718 	*ptr = val * 1000;
5719 
5720 	return cnt;
5721 }
5722 
5723 static ssize_t
5724 tracing_thresh_read(struct file *filp, char __user *ubuf,
5725 		    size_t cnt, loff_t *ppos)
5726 {
5727 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5728 }
5729 
5730 static ssize_t
5731 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5732 		     size_t cnt, loff_t *ppos)
5733 {
5734 	struct trace_array *tr = filp->private_data;
5735 	int ret;
5736 
5737 	mutex_lock(&trace_types_lock);
5738 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5739 	if (ret < 0)
5740 		goto out;
5741 
5742 	if (tr->current_trace->update_thresh) {
5743 		ret = tr->current_trace->update_thresh(tr);
5744 		if (ret < 0)
5745 			goto out;
5746 	}
5747 
5748 	ret = cnt;
5749 out:
5750 	mutex_unlock(&trace_types_lock);
5751 
5752 	return ret;
5753 }
5754 
5755 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5756 
5757 static ssize_t
5758 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5759 		     size_t cnt, loff_t *ppos)
5760 {
5761 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5762 }
5763 
5764 static ssize_t
5765 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5766 		      size_t cnt, loff_t *ppos)
5767 {
5768 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5769 }
5770 
5771 #endif
5772 
5773 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5774 {
5775 	struct trace_array *tr = inode->i_private;
5776 	struct trace_iterator *iter;
5777 	int ret = 0;
5778 
5779 	if (tracing_disabled)
5780 		return -ENODEV;
5781 
5782 	if (trace_array_get(tr) < 0)
5783 		return -ENODEV;
5784 
5785 	mutex_lock(&trace_types_lock);
5786 
5787 	/* create a buffer to store the information to pass to userspace */
5788 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5789 	if (!iter) {
5790 		ret = -ENOMEM;
5791 		__trace_array_put(tr);
5792 		goto out;
5793 	}
5794 
5795 	trace_seq_init(&iter->seq);
5796 	iter->trace = tr->current_trace;
5797 
5798 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5799 		ret = -ENOMEM;
5800 		goto fail;
5801 	}
5802 
5803 	/* trace pipe does not show start of buffer */
5804 	cpumask_setall(iter->started);
5805 
5806 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5807 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5808 
5809 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5810 	if (trace_clocks[tr->clock_id].in_ns)
5811 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5812 
5813 	iter->tr = tr;
5814 	iter->trace_buffer = &tr->trace_buffer;
5815 	iter->cpu_file = tracing_get_cpu(inode);
5816 	mutex_init(&iter->mutex);
5817 	filp->private_data = iter;
5818 
5819 	if (iter->trace->pipe_open)
5820 		iter->trace->pipe_open(iter);
5821 
5822 	nonseekable_open(inode, filp);
5823 
5824 	tr->current_trace->ref++;
5825 out:
5826 	mutex_unlock(&trace_types_lock);
5827 	return ret;
5828 
5829 fail:
5830 	kfree(iter);
5831 	__trace_array_put(tr);
5832 	mutex_unlock(&trace_types_lock);
5833 	return ret;
5834 }
5835 
5836 static int tracing_release_pipe(struct inode *inode, struct file *file)
5837 {
5838 	struct trace_iterator *iter = file->private_data;
5839 	struct trace_array *tr = inode->i_private;
5840 
5841 	mutex_lock(&trace_types_lock);
5842 
5843 	tr->current_trace->ref--;
5844 
5845 	if (iter->trace->pipe_close)
5846 		iter->trace->pipe_close(iter);
5847 
5848 	mutex_unlock(&trace_types_lock);
5849 
5850 	free_cpumask_var(iter->started);
5851 	mutex_destroy(&iter->mutex);
5852 	kfree(iter);
5853 
5854 	trace_array_put(tr);
5855 
5856 	return 0;
5857 }
5858 
5859 static __poll_t
5860 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5861 {
5862 	struct trace_array *tr = iter->tr;
5863 
5864 	/* Iterators are static, they should be filled or empty */
5865 	if (trace_buffer_iter(iter, iter->cpu_file))
5866 		return EPOLLIN | EPOLLRDNORM;
5867 
5868 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5869 		/*
5870 		 * Always select as readable when in blocking mode
5871 		 */
5872 		return EPOLLIN | EPOLLRDNORM;
5873 	else
5874 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5875 					     filp, poll_table);
5876 }
5877 
5878 static __poll_t
5879 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5880 {
5881 	struct trace_iterator *iter = filp->private_data;
5882 
5883 	return trace_poll(iter, filp, poll_table);
5884 }
5885 
5886 /* Must be called with iter->mutex held. */
5887 static int tracing_wait_pipe(struct file *filp)
5888 {
5889 	struct trace_iterator *iter = filp->private_data;
5890 	int ret;
5891 
5892 	while (trace_empty(iter)) {
5893 
5894 		if ((filp->f_flags & O_NONBLOCK)) {
5895 			return -EAGAIN;
5896 		}
5897 
5898 		/*
5899 		 * We block until we read something and tracing is disabled.
5900 		 * We still block if tracing is disabled, but we have never
5901 		 * read anything. This allows a user to cat this file, and
5902 		 * then enable tracing. But after we have read something,
5903 		 * we give an EOF when tracing is again disabled.
5904 		 *
5905 		 * iter->pos will be 0 if we haven't read anything.
5906 		 */
5907 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5908 			break;
5909 
5910 		mutex_unlock(&iter->mutex);
5911 
5912 		ret = wait_on_pipe(iter, 0);
5913 
5914 		mutex_lock(&iter->mutex);
5915 
5916 		if (ret)
5917 			return ret;
5918 	}
5919 
5920 	return 1;
5921 }
5922 
5923 /*
5924  * Consumer reader.
5925  */
5926 static ssize_t
5927 tracing_read_pipe(struct file *filp, char __user *ubuf,
5928 		  size_t cnt, loff_t *ppos)
5929 {
5930 	struct trace_iterator *iter = filp->private_data;
5931 	ssize_t sret;
5932 
5933 	/*
5934 	 * Avoid more than one consumer on a single file descriptor
5935 	 * This is just a matter of traces coherency, the ring buffer itself
5936 	 * is protected.
5937 	 */
5938 	mutex_lock(&iter->mutex);
5939 
5940 	/* return any leftover data */
5941 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5942 	if (sret != -EBUSY)
5943 		goto out;
5944 
5945 	trace_seq_init(&iter->seq);
5946 
5947 	if (iter->trace->read) {
5948 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5949 		if (sret)
5950 			goto out;
5951 	}
5952 
5953 waitagain:
5954 	sret = tracing_wait_pipe(filp);
5955 	if (sret <= 0)
5956 		goto out;
5957 
5958 	/* stop when tracing is finished */
5959 	if (trace_empty(iter)) {
5960 		sret = 0;
5961 		goto out;
5962 	}
5963 
5964 	if (cnt >= PAGE_SIZE)
5965 		cnt = PAGE_SIZE - 1;
5966 
5967 	/* reset all but tr, trace, and overruns */
5968 	memset(&iter->seq, 0,
5969 	       sizeof(struct trace_iterator) -
5970 	       offsetof(struct trace_iterator, seq));
5971 	cpumask_clear(iter->started);
5972 	iter->pos = -1;
5973 
5974 	trace_event_read_lock();
5975 	trace_access_lock(iter->cpu_file);
5976 	while (trace_find_next_entry_inc(iter) != NULL) {
5977 		enum print_line_t ret;
5978 		int save_len = iter->seq.seq.len;
5979 
5980 		ret = print_trace_line(iter);
5981 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5982 			/* don't print partial lines */
5983 			iter->seq.seq.len = save_len;
5984 			break;
5985 		}
5986 		if (ret != TRACE_TYPE_NO_CONSUME)
5987 			trace_consume(iter);
5988 
5989 		if (trace_seq_used(&iter->seq) >= cnt)
5990 			break;
5991 
5992 		/*
5993 		 * Setting the full flag means we reached the trace_seq buffer
5994 		 * size and we should leave by partial output condition above.
5995 		 * One of the trace_seq_* functions is not used properly.
5996 		 */
5997 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5998 			  iter->ent->type);
5999 	}
6000 	trace_access_unlock(iter->cpu_file);
6001 	trace_event_read_unlock();
6002 
6003 	/* Now copy what we have to the user */
6004 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6005 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6006 		trace_seq_init(&iter->seq);
6007 
6008 	/*
6009 	 * If there was nothing to send to user, in spite of consuming trace
6010 	 * entries, go back to wait for more entries.
6011 	 */
6012 	if (sret == -EBUSY)
6013 		goto waitagain;
6014 
6015 out:
6016 	mutex_unlock(&iter->mutex);
6017 
6018 	return sret;
6019 }
6020 
6021 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6022 				     unsigned int idx)
6023 {
6024 	__free_page(spd->pages[idx]);
6025 }
6026 
6027 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6028 	.confirm		= generic_pipe_buf_confirm,
6029 	.release		= generic_pipe_buf_release,
6030 	.steal			= generic_pipe_buf_steal,
6031 	.get			= generic_pipe_buf_get,
6032 };
6033 
6034 static size_t
6035 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6036 {
6037 	size_t count;
6038 	int save_len;
6039 	int ret;
6040 
6041 	/* Seq buffer is page-sized, exactly what we need. */
6042 	for (;;) {
6043 		save_len = iter->seq.seq.len;
6044 		ret = print_trace_line(iter);
6045 
6046 		if (trace_seq_has_overflowed(&iter->seq)) {
6047 			iter->seq.seq.len = save_len;
6048 			break;
6049 		}
6050 
6051 		/*
6052 		 * This should not be hit, because it should only
6053 		 * be set if the iter->seq overflowed. But check it
6054 		 * anyway to be safe.
6055 		 */
6056 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6057 			iter->seq.seq.len = save_len;
6058 			break;
6059 		}
6060 
6061 		count = trace_seq_used(&iter->seq) - save_len;
6062 		if (rem < count) {
6063 			rem = 0;
6064 			iter->seq.seq.len = save_len;
6065 			break;
6066 		}
6067 
6068 		if (ret != TRACE_TYPE_NO_CONSUME)
6069 			trace_consume(iter);
6070 		rem -= count;
6071 		if (!trace_find_next_entry_inc(iter))	{
6072 			rem = 0;
6073 			iter->ent = NULL;
6074 			break;
6075 		}
6076 	}
6077 
6078 	return rem;
6079 }
6080 
6081 static ssize_t tracing_splice_read_pipe(struct file *filp,
6082 					loff_t *ppos,
6083 					struct pipe_inode_info *pipe,
6084 					size_t len,
6085 					unsigned int flags)
6086 {
6087 	struct page *pages_def[PIPE_DEF_BUFFERS];
6088 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6089 	struct trace_iterator *iter = filp->private_data;
6090 	struct splice_pipe_desc spd = {
6091 		.pages		= pages_def,
6092 		.partial	= partial_def,
6093 		.nr_pages	= 0, /* This gets updated below. */
6094 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6095 		.ops		= &tracing_pipe_buf_ops,
6096 		.spd_release	= tracing_spd_release_pipe,
6097 	};
6098 	ssize_t ret;
6099 	size_t rem;
6100 	unsigned int i;
6101 
6102 	if (splice_grow_spd(pipe, &spd))
6103 		return -ENOMEM;
6104 
6105 	mutex_lock(&iter->mutex);
6106 
6107 	if (iter->trace->splice_read) {
6108 		ret = iter->trace->splice_read(iter, filp,
6109 					       ppos, pipe, len, flags);
6110 		if (ret)
6111 			goto out_err;
6112 	}
6113 
6114 	ret = tracing_wait_pipe(filp);
6115 	if (ret <= 0)
6116 		goto out_err;
6117 
6118 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6119 		ret = -EFAULT;
6120 		goto out_err;
6121 	}
6122 
6123 	trace_event_read_lock();
6124 	trace_access_lock(iter->cpu_file);
6125 
6126 	/* Fill as many pages as possible. */
6127 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6128 		spd.pages[i] = alloc_page(GFP_KERNEL);
6129 		if (!spd.pages[i])
6130 			break;
6131 
6132 		rem = tracing_fill_pipe_page(rem, iter);
6133 
6134 		/* Copy the data into the page, so we can start over. */
6135 		ret = trace_seq_to_buffer(&iter->seq,
6136 					  page_address(spd.pages[i]),
6137 					  trace_seq_used(&iter->seq));
6138 		if (ret < 0) {
6139 			__free_page(spd.pages[i]);
6140 			break;
6141 		}
6142 		spd.partial[i].offset = 0;
6143 		spd.partial[i].len = trace_seq_used(&iter->seq);
6144 
6145 		trace_seq_init(&iter->seq);
6146 	}
6147 
6148 	trace_access_unlock(iter->cpu_file);
6149 	trace_event_read_unlock();
6150 	mutex_unlock(&iter->mutex);
6151 
6152 	spd.nr_pages = i;
6153 
6154 	if (i)
6155 		ret = splice_to_pipe(pipe, &spd);
6156 	else
6157 		ret = 0;
6158 out:
6159 	splice_shrink_spd(&spd);
6160 	return ret;
6161 
6162 out_err:
6163 	mutex_unlock(&iter->mutex);
6164 	goto out;
6165 }
6166 
6167 static ssize_t
6168 tracing_entries_read(struct file *filp, char __user *ubuf,
6169 		     size_t cnt, loff_t *ppos)
6170 {
6171 	struct inode *inode = file_inode(filp);
6172 	struct trace_array *tr = inode->i_private;
6173 	int cpu = tracing_get_cpu(inode);
6174 	char buf[64];
6175 	int r = 0;
6176 	ssize_t ret;
6177 
6178 	mutex_lock(&trace_types_lock);
6179 
6180 	if (cpu == RING_BUFFER_ALL_CPUS) {
6181 		int cpu, buf_size_same;
6182 		unsigned long size;
6183 
6184 		size = 0;
6185 		buf_size_same = 1;
6186 		/* check if all cpu sizes are same */
6187 		for_each_tracing_cpu(cpu) {
6188 			/* fill in the size from first enabled cpu */
6189 			if (size == 0)
6190 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6191 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6192 				buf_size_same = 0;
6193 				break;
6194 			}
6195 		}
6196 
6197 		if (buf_size_same) {
6198 			if (!ring_buffer_expanded)
6199 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6200 					    size >> 10,
6201 					    trace_buf_size >> 10);
6202 			else
6203 				r = sprintf(buf, "%lu\n", size >> 10);
6204 		} else
6205 			r = sprintf(buf, "X\n");
6206 	} else
6207 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6208 
6209 	mutex_unlock(&trace_types_lock);
6210 
6211 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6212 	return ret;
6213 }
6214 
6215 static ssize_t
6216 tracing_entries_write(struct file *filp, const char __user *ubuf,
6217 		      size_t cnt, loff_t *ppos)
6218 {
6219 	struct inode *inode = file_inode(filp);
6220 	struct trace_array *tr = inode->i_private;
6221 	unsigned long val;
6222 	int ret;
6223 
6224 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6225 	if (ret)
6226 		return ret;
6227 
6228 	/* must have at least 1 entry */
6229 	if (!val)
6230 		return -EINVAL;
6231 
6232 	/* value is in KB */
6233 	val <<= 10;
6234 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6235 	if (ret < 0)
6236 		return ret;
6237 
6238 	*ppos += cnt;
6239 
6240 	return cnt;
6241 }
6242 
6243 static ssize_t
6244 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6245 				size_t cnt, loff_t *ppos)
6246 {
6247 	struct trace_array *tr = filp->private_data;
6248 	char buf[64];
6249 	int r, cpu;
6250 	unsigned long size = 0, expanded_size = 0;
6251 
6252 	mutex_lock(&trace_types_lock);
6253 	for_each_tracing_cpu(cpu) {
6254 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6255 		if (!ring_buffer_expanded)
6256 			expanded_size += trace_buf_size >> 10;
6257 	}
6258 	if (ring_buffer_expanded)
6259 		r = sprintf(buf, "%lu\n", size);
6260 	else
6261 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6262 	mutex_unlock(&trace_types_lock);
6263 
6264 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6265 }
6266 
6267 static ssize_t
6268 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6269 			  size_t cnt, loff_t *ppos)
6270 {
6271 	/*
6272 	 * There is no need to read what the user has written, this function
6273 	 * is just to make sure that there is no error when "echo" is used
6274 	 */
6275 
6276 	*ppos += cnt;
6277 
6278 	return cnt;
6279 }
6280 
6281 static int
6282 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6283 {
6284 	struct trace_array *tr = inode->i_private;
6285 
6286 	/* disable tracing ? */
6287 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6288 		tracer_tracing_off(tr);
6289 	/* resize the ring buffer to 0 */
6290 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6291 
6292 	trace_array_put(tr);
6293 
6294 	return 0;
6295 }
6296 
6297 static ssize_t
6298 tracing_mark_write(struct file *filp, const char __user *ubuf,
6299 					size_t cnt, loff_t *fpos)
6300 {
6301 	struct trace_array *tr = filp->private_data;
6302 	struct ring_buffer_event *event;
6303 	enum event_trigger_type tt = ETT_NONE;
6304 	struct ring_buffer *buffer;
6305 	struct print_entry *entry;
6306 	unsigned long irq_flags;
6307 	const char faulted[] = "<faulted>";
6308 	ssize_t written;
6309 	int size;
6310 	int len;
6311 
6312 /* Used in tracing_mark_raw_write() as well */
6313 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6314 
6315 	if (tracing_disabled)
6316 		return -EINVAL;
6317 
6318 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6319 		return -EINVAL;
6320 
6321 	if (cnt > TRACE_BUF_SIZE)
6322 		cnt = TRACE_BUF_SIZE;
6323 
6324 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6325 
6326 	local_save_flags(irq_flags);
6327 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6328 
6329 	/* If less than "<faulted>", then make sure we can still add that */
6330 	if (cnt < FAULTED_SIZE)
6331 		size += FAULTED_SIZE - cnt;
6332 
6333 	buffer = tr->trace_buffer.buffer;
6334 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6335 					    irq_flags, preempt_count());
6336 	if (unlikely(!event))
6337 		/* Ring buffer disabled, return as if not open for write */
6338 		return -EBADF;
6339 
6340 	entry = ring_buffer_event_data(event);
6341 	entry->ip = _THIS_IP_;
6342 
6343 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6344 	if (len) {
6345 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6346 		cnt = FAULTED_SIZE;
6347 		written = -EFAULT;
6348 	} else
6349 		written = cnt;
6350 	len = cnt;
6351 
6352 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6353 		/* do not add \n before testing triggers, but add \0 */
6354 		entry->buf[cnt] = '\0';
6355 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6356 	}
6357 
6358 	if (entry->buf[cnt - 1] != '\n') {
6359 		entry->buf[cnt] = '\n';
6360 		entry->buf[cnt + 1] = '\0';
6361 	} else
6362 		entry->buf[cnt] = '\0';
6363 
6364 	__buffer_unlock_commit(buffer, event);
6365 
6366 	if (tt)
6367 		event_triggers_post_call(tr->trace_marker_file, tt);
6368 
6369 	if (written > 0)
6370 		*fpos += written;
6371 
6372 	return written;
6373 }
6374 
6375 /* Limit it for now to 3K (including tag) */
6376 #define RAW_DATA_MAX_SIZE (1024*3)
6377 
6378 static ssize_t
6379 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6380 					size_t cnt, loff_t *fpos)
6381 {
6382 	struct trace_array *tr = filp->private_data;
6383 	struct ring_buffer_event *event;
6384 	struct ring_buffer *buffer;
6385 	struct raw_data_entry *entry;
6386 	const char faulted[] = "<faulted>";
6387 	unsigned long irq_flags;
6388 	ssize_t written;
6389 	int size;
6390 	int len;
6391 
6392 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6393 
6394 	if (tracing_disabled)
6395 		return -EINVAL;
6396 
6397 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6398 		return -EINVAL;
6399 
6400 	/* The marker must at least have a tag id */
6401 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6402 		return -EINVAL;
6403 
6404 	if (cnt > TRACE_BUF_SIZE)
6405 		cnt = TRACE_BUF_SIZE;
6406 
6407 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6408 
6409 	local_save_flags(irq_flags);
6410 	size = sizeof(*entry) + cnt;
6411 	if (cnt < FAULT_SIZE_ID)
6412 		size += FAULT_SIZE_ID - cnt;
6413 
6414 	buffer = tr->trace_buffer.buffer;
6415 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6416 					    irq_flags, preempt_count());
6417 	if (!event)
6418 		/* Ring buffer disabled, return as if not open for write */
6419 		return -EBADF;
6420 
6421 	entry = ring_buffer_event_data(event);
6422 
6423 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6424 	if (len) {
6425 		entry->id = -1;
6426 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6427 		written = -EFAULT;
6428 	} else
6429 		written = cnt;
6430 
6431 	__buffer_unlock_commit(buffer, event);
6432 
6433 	if (written > 0)
6434 		*fpos += written;
6435 
6436 	return written;
6437 }
6438 
6439 static int tracing_clock_show(struct seq_file *m, void *v)
6440 {
6441 	struct trace_array *tr = m->private;
6442 	int i;
6443 
6444 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6445 		seq_printf(m,
6446 			"%s%s%s%s", i ? " " : "",
6447 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6448 			i == tr->clock_id ? "]" : "");
6449 	seq_putc(m, '\n');
6450 
6451 	return 0;
6452 }
6453 
6454 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6455 {
6456 	int i;
6457 
6458 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6459 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6460 			break;
6461 	}
6462 	if (i == ARRAY_SIZE(trace_clocks))
6463 		return -EINVAL;
6464 
6465 	mutex_lock(&trace_types_lock);
6466 
6467 	tr->clock_id = i;
6468 
6469 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6470 
6471 	/*
6472 	 * New clock may not be consistent with the previous clock.
6473 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6474 	 */
6475 	tracing_reset_online_cpus(&tr->trace_buffer);
6476 
6477 #ifdef CONFIG_TRACER_MAX_TRACE
6478 	if (tr->max_buffer.buffer)
6479 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6480 	tracing_reset_online_cpus(&tr->max_buffer);
6481 #endif
6482 
6483 	mutex_unlock(&trace_types_lock);
6484 
6485 	return 0;
6486 }
6487 
6488 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6489 				   size_t cnt, loff_t *fpos)
6490 {
6491 	struct seq_file *m = filp->private_data;
6492 	struct trace_array *tr = m->private;
6493 	char buf[64];
6494 	const char *clockstr;
6495 	int ret;
6496 
6497 	if (cnt >= sizeof(buf))
6498 		return -EINVAL;
6499 
6500 	if (copy_from_user(buf, ubuf, cnt))
6501 		return -EFAULT;
6502 
6503 	buf[cnt] = 0;
6504 
6505 	clockstr = strstrip(buf);
6506 
6507 	ret = tracing_set_clock(tr, clockstr);
6508 	if (ret)
6509 		return ret;
6510 
6511 	*fpos += cnt;
6512 
6513 	return cnt;
6514 }
6515 
6516 static int tracing_clock_open(struct inode *inode, struct file *file)
6517 {
6518 	struct trace_array *tr = inode->i_private;
6519 	int ret;
6520 
6521 	if (tracing_disabled)
6522 		return -ENODEV;
6523 
6524 	if (trace_array_get(tr))
6525 		return -ENODEV;
6526 
6527 	ret = single_open(file, tracing_clock_show, inode->i_private);
6528 	if (ret < 0)
6529 		trace_array_put(tr);
6530 
6531 	return ret;
6532 }
6533 
6534 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6535 {
6536 	struct trace_array *tr = m->private;
6537 
6538 	mutex_lock(&trace_types_lock);
6539 
6540 	if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6541 		seq_puts(m, "delta [absolute]\n");
6542 	else
6543 		seq_puts(m, "[delta] absolute\n");
6544 
6545 	mutex_unlock(&trace_types_lock);
6546 
6547 	return 0;
6548 }
6549 
6550 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6551 {
6552 	struct trace_array *tr = inode->i_private;
6553 	int ret;
6554 
6555 	if (tracing_disabled)
6556 		return -ENODEV;
6557 
6558 	if (trace_array_get(tr))
6559 		return -ENODEV;
6560 
6561 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6562 	if (ret < 0)
6563 		trace_array_put(tr);
6564 
6565 	return ret;
6566 }
6567 
6568 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6569 {
6570 	int ret = 0;
6571 
6572 	mutex_lock(&trace_types_lock);
6573 
6574 	if (abs && tr->time_stamp_abs_ref++)
6575 		goto out;
6576 
6577 	if (!abs) {
6578 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6579 			ret = -EINVAL;
6580 			goto out;
6581 		}
6582 
6583 		if (--tr->time_stamp_abs_ref)
6584 			goto out;
6585 	}
6586 
6587 	ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6588 
6589 #ifdef CONFIG_TRACER_MAX_TRACE
6590 	if (tr->max_buffer.buffer)
6591 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6592 #endif
6593  out:
6594 	mutex_unlock(&trace_types_lock);
6595 
6596 	return ret;
6597 }
6598 
6599 struct ftrace_buffer_info {
6600 	struct trace_iterator	iter;
6601 	void			*spare;
6602 	unsigned int		spare_cpu;
6603 	unsigned int		read;
6604 };
6605 
6606 #ifdef CONFIG_TRACER_SNAPSHOT
6607 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6608 {
6609 	struct trace_array *tr = inode->i_private;
6610 	struct trace_iterator *iter;
6611 	struct seq_file *m;
6612 	int ret = 0;
6613 
6614 	if (trace_array_get(tr) < 0)
6615 		return -ENODEV;
6616 
6617 	if (file->f_mode & FMODE_READ) {
6618 		iter = __tracing_open(inode, file, true);
6619 		if (IS_ERR(iter))
6620 			ret = PTR_ERR(iter);
6621 	} else {
6622 		/* Writes still need the seq_file to hold the private data */
6623 		ret = -ENOMEM;
6624 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6625 		if (!m)
6626 			goto out;
6627 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6628 		if (!iter) {
6629 			kfree(m);
6630 			goto out;
6631 		}
6632 		ret = 0;
6633 
6634 		iter->tr = tr;
6635 		iter->trace_buffer = &tr->max_buffer;
6636 		iter->cpu_file = tracing_get_cpu(inode);
6637 		m->private = iter;
6638 		file->private_data = m;
6639 	}
6640 out:
6641 	if (ret < 0)
6642 		trace_array_put(tr);
6643 
6644 	return ret;
6645 }
6646 
6647 static ssize_t
6648 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6649 		       loff_t *ppos)
6650 {
6651 	struct seq_file *m = filp->private_data;
6652 	struct trace_iterator *iter = m->private;
6653 	struct trace_array *tr = iter->tr;
6654 	unsigned long val;
6655 	int ret;
6656 
6657 	ret = tracing_update_buffers();
6658 	if (ret < 0)
6659 		return ret;
6660 
6661 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6662 	if (ret)
6663 		return ret;
6664 
6665 	mutex_lock(&trace_types_lock);
6666 
6667 	if (tr->current_trace->use_max_tr) {
6668 		ret = -EBUSY;
6669 		goto out;
6670 	}
6671 
6672 	arch_spin_lock(&tr->max_lock);
6673 	if (tr->cond_snapshot)
6674 		ret = -EBUSY;
6675 	arch_spin_unlock(&tr->max_lock);
6676 	if (ret)
6677 		goto out;
6678 
6679 	switch (val) {
6680 	case 0:
6681 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6682 			ret = -EINVAL;
6683 			break;
6684 		}
6685 		if (tr->allocated_snapshot)
6686 			free_snapshot(tr);
6687 		break;
6688 	case 1:
6689 /* Only allow per-cpu swap if the ring buffer supports it */
6690 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6691 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6692 			ret = -EINVAL;
6693 			break;
6694 		}
6695 #endif
6696 		if (!tr->allocated_snapshot) {
6697 			ret = tracing_alloc_snapshot_instance(tr);
6698 			if (ret < 0)
6699 				break;
6700 		}
6701 		local_irq_disable();
6702 		/* Now, we're going to swap */
6703 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6704 			update_max_tr(tr, current, smp_processor_id(), NULL);
6705 		else
6706 			update_max_tr_single(tr, current, iter->cpu_file);
6707 		local_irq_enable();
6708 		break;
6709 	default:
6710 		if (tr->allocated_snapshot) {
6711 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6712 				tracing_reset_online_cpus(&tr->max_buffer);
6713 			else
6714 				tracing_reset(&tr->max_buffer, iter->cpu_file);
6715 		}
6716 		break;
6717 	}
6718 
6719 	if (ret >= 0) {
6720 		*ppos += cnt;
6721 		ret = cnt;
6722 	}
6723 out:
6724 	mutex_unlock(&trace_types_lock);
6725 	return ret;
6726 }
6727 
6728 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6729 {
6730 	struct seq_file *m = file->private_data;
6731 	int ret;
6732 
6733 	ret = tracing_release(inode, file);
6734 
6735 	if (file->f_mode & FMODE_READ)
6736 		return ret;
6737 
6738 	/* If write only, the seq_file is just a stub */
6739 	if (m)
6740 		kfree(m->private);
6741 	kfree(m);
6742 
6743 	return 0;
6744 }
6745 
6746 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6747 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6748 				    size_t count, loff_t *ppos);
6749 static int tracing_buffers_release(struct inode *inode, struct file *file);
6750 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6751 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6752 
6753 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6754 {
6755 	struct ftrace_buffer_info *info;
6756 	int ret;
6757 
6758 	ret = tracing_buffers_open(inode, filp);
6759 	if (ret < 0)
6760 		return ret;
6761 
6762 	info = filp->private_data;
6763 
6764 	if (info->iter.trace->use_max_tr) {
6765 		tracing_buffers_release(inode, filp);
6766 		return -EBUSY;
6767 	}
6768 
6769 	info->iter.snapshot = true;
6770 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
6771 
6772 	return ret;
6773 }
6774 
6775 #endif /* CONFIG_TRACER_SNAPSHOT */
6776 
6777 
6778 static const struct file_operations tracing_thresh_fops = {
6779 	.open		= tracing_open_generic,
6780 	.read		= tracing_thresh_read,
6781 	.write		= tracing_thresh_write,
6782 	.llseek		= generic_file_llseek,
6783 };
6784 
6785 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6786 static const struct file_operations tracing_max_lat_fops = {
6787 	.open		= tracing_open_generic,
6788 	.read		= tracing_max_lat_read,
6789 	.write		= tracing_max_lat_write,
6790 	.llseek		= generic_file_llseek,
6791 };
6792 #endif
6793 
6794 static const struct file_operations set_tracer_fops = {
6795 	.open		= tracing_open_generic,
6796 	.read		= tracing_set_trace_read,
6797 	.write		= tracing_set_trace_write,
6798 	.llseek		= generic_file_llseek,
6799 };
6800 
6801 static const struct file_operations tracing_pipe_fops = {
6802 	.open		= tracing_open_pipe,
6803 	.poll		= tracing_poll_pipe,
6804 	.read		= tracing_read_pipe,
6805 	.splice_read	= tracing_splice_read_pipe,
6806 	.release	= tracing_release_pipe,
6807 	.llseek		= no_llseek,
6808 };
6809 
6810 static const struct file_operations tracing_entries_fops = {
6811 	.open		= tracing_open_generic_tr,
6812 	.read		= tracing_entries_read,
6813 	.write		= tracing_entries_write,
6814 	.llseek		= generic_file_llseek,
6815 	.release	= tracing_release_generic_tr,
6816 };
6817 
6818 static const struct file_operations tracing_total_entries_fops = {
6819 	.open		= tracing_open_generic_tr,
6820 	.read		= tracing_total_entries_read,
6821 	.llseek		= generic_file_llseek,
6822 	.release	= tracing_release_generic_tr,
6823 };
6824 
6825 static const struct file_operations tracing_free_buffer_fops = {
6826 	.open		= tracing_open_generic_tr,
6827 	.write		= tracing_free_buffer_write,
6828 	.release	= tracing_free_buffer_release,
6829 };
6830 
6831 static const struct file_operations tracing_mark_fops = {
6832 	.open		= tracing_open_generic_tr,
6833 	.write		= tracing_mark_write,
6834 	.llseek		= generic_file_llseek,
6835 	.release	= tracing_release_generic_tr,
6836 };
6837 
6838 static const struct file_operations tracing_mark_raw_fops = {
6839 	.open		= tracing_open_generic_tr,
6840 	.write		= tracing_mark_raw_write,
6841 	.llseek		= generic_file_llseek,
6842 	.release	= tracing_release_generic_tr,
6843 };
6844 
6845 static const struct file_operations trace_clock_fops = {
6846 	.open		= tracing_clock_open,
6847 	.read		= seq_read,
6848 	.llseek		= seq_lseek,
6849 	.release	= tracing_single_release_tr,
6850 	.write		= tracing_clock_write,
6851 };
6852 
6853 static const struct file_operations trace_time_stamp_mode_fops = {
6854 	.open		= tracing_time_stamp_mode_open,
6855 	.read		= seq_read,
6856 	.llseek		= seq_lseek,
6857 	.release	= tracing_single_release_tr,
6858 };
6859 
6860 #ifdef CONFIG_TRACER_SNAPSHOT
6861 static const struct file_operations snapshot_fops = {
6862 	.open		= tracing_snapshot_open,
6863 	.read		= seq_read,
6864 	.write		= tracing_snapshot_write,
6865 	.llseek		= tracing_lseek,
6866 	.release	= tracing_snapshot_release,
6867 };
6868 
6869 static const struct file_operations snapshot_raw_fops = {
6870 	.open		= snapshot_raw_open,
6871 	.read		= tracing_buffers_read,
6872 	.release	= tracing_buffers_release,
6873 	.splice_read	= tracing_buffers_splice_read,
6874 	.llseek		= no_llseek,
6875 };
6876 
6877 #endif /* CONFIG_TRACER_SNAPSHOT */
6878 
6879 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6880 {
6881 	struct trace_array *tr = inode->i_private;
6882 	struct ftrace_buffer_info *info;
6883 	int ret;
6884 
6885 	if (tracing_disabled)
6886 		return -ENODEV;
6887 
6888 	if (trace_array_get(tr) < 0)
6889 		return -ENODEV;
6890 
6891 	info = kzalloc(sizeof(*info), GFP_KERNEL);
6892 	if (!info) {
6893 		trace_array_put(tr);
6894 		return -ENOMEM;
6895 	}
6896 
6897 	mutex_lock(&trace_types_lock);
6898 
6899 	info->iter.tr		= tr;
6900 	info->iter.cpu_file	= tracing_get_cpu(inode);
6901 	info->iter.trace	= tr->current_trace;
6902 	info->iter.trace_buffer = &tr->trace_buffer;
6903 	info->spare		= NULL;
6904 	/* Force reading ring buffer for first read */
6905 	info->read		= (unsigned int)-1;
6906 
6907 	filp->private_data = info;
6908 
6909 	tr->current_trace->ref++;
6910 
6911 	mutex_unlock(&trace_types_lock);
6912 
6913 	ret = nonseekable_open(inode, filp);
6914 	if (ret < 0)
6915 		trace_array_put(tr);
6916 
6917 	return ret;
6918 }
6919 
6920 static __poll_t
6921 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6922 {
6923 	struct ftrace_buffer_info *info = filp->private_data;
6924 	struct trace_iterator *iter = &info->iter;
6925 
6926 	return trace_poll(iter, filp, poll_table);
6927 }
6928 
6929 static ssize_t
6930 tracing_buffers_read(struct file *filp, char __user *ubuf,
6931 		     size_t count, loff_t *ppos)
6932 {
6933 	struct ftrace_buffer_info *info = filp->private_data;
6934 	struct trace_iterator *iter = &info->iter;
6935 	ssize_t ret = 0;
6936 	ssize_t size;
6937 
6938 	if (!count)
6939 		return 0;
6940 
6941 #ifdef CONFIG_TRACER_MAX_TRACE
6942 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6943 		return -EBUSY;
6944 #endif
6945 
6946 	if (!info->spare) {
6947 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6948 							  iter->cpu_file);
6949 		if (IS_ERR(info->spare)) {
6950 			ret = PTR_ERR(info->spare);
6951 			info->spare = NULL;
6952 		} else {
6953 			info->spare_cpu = iter->cpu_file;
6954 		}
6955 	}
6956 	if (!info->spare)
6957 		return ret;
6958 
6959 	/* Do we have previous read data to read? */
6960 	if (info->read < PAGE_SIZE)
6961 		goto read;
6962 
6963  again:
6964 	trace_access_lock(iter->cpu_file);
6965 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6966 				    &info->spare,
6967 				    count,
6968 				    iter->cpu_file, 0);
6969 	trace_access_unlock(iter->cpu_file);
6970 
6971 	if (ret < 0) {
6972 		if (trace_empty(iter)) {
6973 			if ((filp->f_flags & O_NONBLOCK))
6974 				return -EAGAIN;
6975 
6976 			ret = wait_on_pipe(iter, 0);
6977 			if (ret)
6978 				return ret;
6979 
6980 			goto again;
6981 		}
6982 		return 0;
6983 	}
6984 
6985 	info->read = 0;
6986  read:
6987 	size = PAGE_SIZE - info->read;
6988 	if (size > count)
6989 		size = count;
6990 
6991 	ret = copy_to_user(ubuf, info->spare + info->read, size);
6992 	if (ret == size)
6993 		return -EFAULT;
6994 
6995 	size -= ret;
6996 
6997 	*ppos += size;
6998 	info->read += size;
6999 
7000 	return size;
7001 }
7002 
7003 static int tracing_buffers_release(struct inode *inode, struct file *file)
7004 {
7005 	struct ftrace_buffer_info *info = file->private_data;
7006 	struct trace_iterator *iter = &info->iter;
7007 
7008 	mutex_lock(&trace_types_lock);
7009 
7010 	iter->tr->current_trace->ref--;
7011 
7012 	__trace_array_put(iter->tr);
7013 
7014 	if (info->spare)
7015 		ring_buffer_free_read_page(iter->trace_buffer->buffer,
7016 					   info->spare_cpu, info->spare);
7017 	kfree(info);
7018 
7019 	mutex_unlock(&trace_types_lock);
7020 
7021 	return 0;
7022 }
7023 
7024 struct buffer_ref {
7025 	struct ring_buffer	*buffer;
7026 	void			*page;
7027 	int			cpu;
7028 	int			ref;
7029 };
7030 
7031 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7032 				    struct pipe_buffer *buf)
7033 {
7034 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7035 
7036 	if (--ref->ref)
7037 		return;
7038 
7039 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7040 	kfree(ref);
7041 	buf->private = 0;
7042 }
7043 
7044 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7045 				struct pipe_buffer *buf)
7046 {
7047 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7048 
7049 	ref->ref++;
7050 }
7051 
7052 /* Pipe buffer operations for a buffer. */
7053 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7054 	.confirm		= generic_pipe_buf_confirm,
7055 	.release		= buffer_pipe_buf_release,
7056 	.steal			= generic_pipe_buf_steal,
7057 	.get			= buffer_pipe_buf_get,
7058 };
7059 
7060 /*
7061  * Callback from splice_to_pipe(), if we need to release some pages
7062  * at the end of the spd in case we error'ed out in filling the pipe.
7063  */
7064 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7065 {
7066 	struct buffer_ref *ref =
7067 		(struct buffer_ref *)spd->partial[i].private;
7068 
7069 	if (--ref->ref)
7070 		return;
7071 
7072 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7073 	kfree(ref);
7074 	spd->partial[i].private = 0;
7075 }
7076 
7077 static ssize_t
7078 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7079 			    struct pipe_inode_info *pipe, size_t len,
7080 			    unsigned int flags)
7081 {
7082 	struct ftrace_buffer_info *info = file->private_data;
7083 	struct trace_iterator *iter = &info->iter;
7084 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7085 	struct page *pages_def[PIPE_DEF_BUFFERS];
7086 	struct splice_pipe_desc spd = {
7087 		.pages		= pages_def,
7088 		.partial	= partial_def,
7089 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7090 		.ops		= &buffer_pipe_buf_ops,
7091 		.spd_release	= buffer_spd_release,
7092 	};
7093 	struct buffer_ref *ref;
7094 	int entries, i;
7095 	ssize_t ret = 0;
7096 
7097 #ifdef CONFIG_TRACER_MAX_TRACE
7098 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7099 		return -EBUSY;
7100 #endif
7101 
7102 	if (*ppos & (PAGE_SIZE - 1))
7103 		return -EINVAL;
7104 
7105 	if (len & (PAGE_SIZE - 1)) {
7106 		if (len < PAGE_SIZE)
7107 			return -EINVAL;
7108 		len &= PAGE_MASK;
7109 	}
7110 
7111 	if (splice_grow_spd(pipe, &spd))
7112 		return -ENOMEM;
7113 
7114  again:
7115 	trace_access_lock(iter->cpu_file);
7116 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7117 
7118 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7119 		struct page *page;
7120 		int r;
7121 
7122 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7123 		if (!ref) {
7124 			ret = -ENOMEM;
7125 			break;
7126 		}
7127 
7128 		ref->ref = 1;
7129 		ref->buffer = iter->trace_buffer->buffer;
7130 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7131 		if (IS_ERR(ref->page)) {
7132 			ret = PTR_ERR(ref->page);
7133 			ref->page = NULL;
7134 			kfree(ref);
7135 			break;
7136 		}
7137 		ref->cpu = iter->cpu_file;
7138 
7139 		r = ring_buffer_read_page(ref->buffer, &ref->page,
7140 					  len, iter->cpu_file, 1);
7141 		if (r < 0) {
7142 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
7143 						   ref->page);
7144 			kfree(ref);
7145 			break;
7146 		}
7147 
7148 		page = virt_to_page(ref->page);
7149 
7150 		spd.pages[i] = page;
7151 		spd.partial[i].len = PAGE_SIZE;
7152 		spd.partial[i].offset = 0;
7153 		spd.partial[i].private = (unsigned long)ref;
7154 		spd.nr_pages++;
7155 		*ppos += PAGE_SIZE;
7156 
7157 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7158 	}
7159 
7160 	trace_access_unlock(iter->cpu_file);
7161 	spd.nr_pages = i;
7162 
7163 	/* did we read anything? */
7164 	if (!spd.nr_pages) {
7165 		if (ret)
7166 			goto out;
7167 
7168 		ret = -EAGAIN;
7169 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7170 			goto out;
7171 
7172 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7173 		if (ret)
7174 			goto out;
7175 
7176 		goto again;
7177 	}
7178 
7179 	ret = splice_to_pipe(pipe, &spd);
7180 out:
7181 	splice_shrink_spd(&spd);
7182 
7183 	return ret;
7184 }
7185 
7186 static const struct file_operations tracing_buffers_fops = {
7187 	.open		= tracing_buffers_open,
7188 	.read		= tracing_buffers_read,
7189 	.poll		= tracing_buffers_poll,
7190 	.release	= tracing_buffers_release,
7191 	.splice_read	= tracing_buffers_splice_read,
7192 	.llseek		= no_llseek,
7193 };
7194 
7195 static ssize_t
7196 tracing_stats_read(struct file *filp, char __user *ubuf,
7197 		   size_t count, loff_t *ppos)
7198 {
7199 	struct inode *inode = file_inode(filp);
7200 	struct trace_array *tr = inode->i_private;
7201 	struct trace_buffer *trace_buf = &tr->trace_buffer;
7202 	int cpu = tracing_get_cpu(inode);
7203 	struct trace_seq *s;
7204 	unsigned long cnt;
7205 	unsigned long long t;
7206 	unsigned long usec_rem;
7207 
7208 	s = kmalloc(sizeof(*s), GFP_KERNEL);
7209 	if (!s)
7210 		return -ENOMEM;
7211 
7212 	trace_seq_init(s);
7213 
7214 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7215 	trace_seq_printf(s, "entries: %ld\n", cnt);
7216 
7217 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7218 	trace_seq_printf(s, "overrun: %ld\n", cnt);
7219 
7220 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7221 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7222 
7223 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7224 	trace_seq_printf(s, "bytes: %ld\n", cnt);
7225 
7226 	if (trace_clocks[tr->clock_id].in_ns) {
7227 		/* local or global for trace_clock */
7228 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7229 		usec_rem = do_div(t, USEC_PER_SEC);
7230 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7231 								t, usec_rem);
7232 
7233 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7234 		usec_rem = do_div(t, USEC_PER_SEC);
7235 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7236 	} else {
7237 		/* counter or tsc mode for trace_clock */
7238 		trace_seq_printf(s, "oldest event ts: %llu\n",
7239 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7240 
7241 		trace_seq_printf(s, "now ts: %llu\n",
7242 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7243 	}
7244 
7245 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7246 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7247 
7248 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7249 	trace_seq_printf(s, "read events: %ld\n", cnt);
7250 
7251 	count = simple_read_from_buffer(ubuf, count, ppos,
7252 					s->buffer, trace_seq_used(s));
7253 
7254 	kfree(s);
7255 
7256 	return count;
7257 }
7258 
7259 static const struct file_operations tracing_stats_fops = {
7260 	.open		= tracing_open_generic_tr,
7261 	.read		= tracing_stats_read,
7262 	.llseek		= generic_file_llseek,
7263 	.release	= tracing_release_generic_tr,
7264 };
7265 
7266 #ifdef CONFIG_DYNAMIC_FTRACE
7267 
7268 static ssize_t
7269 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7270 		  size_t cnt, loff_t *ppos)
7271 {
7272 	unsigned long *p = filp->private_data;
7273 	char buf[64]; /* Not too big for a shallow stack */
7274 	int r;
7275 
7276 	r = scnprintf(buf, 63, "%ld", *p);
7277 	buf[r++] = '\n';
7278 
7279 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7280 }
7281 
7282 static const struct file_operations tracing_dyn_info_fops = {
7283 	.open		= tracing_open_generic,
7284 	.read		= tracing_read_dyn_info,
7285 	.llseek		= generic_file_llseek,
7286 };
7287 #endif /* CONFIG_DYNAMIC_FTRACE */
7288 
7289 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7290 static void
7291 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7292 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7293 		void *data)
7294 {
7295 	tracing_snapshot_instance(tr);
7296 }
7297 
7298 static void
7299 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7300 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7301 		      void *data)
7302 {
7303 	struct ftrace_func_mapper *mapper = data;
7304 	long *count = NULL;
7305 
7306 	if (mapper)
7307 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7308 
7309 	if (count) {
7310 
7311 		if (*count <= 0)
7312 			return;
7313 
7314 		(*count)--;
7315 	}
7316 
7317 	tracing_snapshot_instance(tr);
7318 }
7319 
7320 static int
7321 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7322 		      struct ftrace_probe_ops *ops, void *data)
7323 {
7324 	struct ftrace_func_mapper *mapper = data;
7325 	long *count = NULL;
7326 
7327 	seq_printf(m, "%ps:", (void *)ip);
7328 
7329 	seq_puts(m, "snapshot");
7330 
7331 	if (mapper)
7332 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7333 
7334 	if (count)
7335 		seq_printf(m, ":count=%ld\n", *count);
7336 	else
7337 		seq_puts(m, ":unlimited\n");
7338 
7339 	return 0;
7340 }
7341 
7342 static int
7343 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7344 		     unsigned long ip, void *init_data, void **data)
7345 {
7346 	struct ftrace_func_mapper *mapper = *data;
7347 
7348 	if (!mapper) {
7349 		mapper = allocate_ftrace_func_mapper();
7350 		if (!mapper)
7351 			return -ENOMEM;
7352 		*data = mapper;
7353 	}
7354 
7355 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7356 }
7357 
7358 static void
7359 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7360 		     unsigned long ip, void *data)
7361 {
7362 	struct ftrace_func_mapper *mapper = data;
7363 
7364 	if (!ip) {
7365 		if (!mapper)
7366 			return;
7367 		free_ftrace_func_mapper(mapper, NULL);
7368 		return;
7369 	}
7370 
7371 	ftrace_func_mapper_remove_ip(mapper, ip);
7372 }
7373 
7374 static struct ftrace_probe_ops snapshot_probe_ops = {
7375 	.func			= ftrace_snapshot,
7376 	.print			= ftrace_snapshot_print,
7377 };
7378 
7379 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7380 	.func			= ftrace_count_snapshot,
7381 	.print			= ftrace_snapshot_print,
7382 	.init			= ftrace_snapshot_init,
7383 	.free			= ftrace_snapshot_free,
7384 };
7385 
7386 static int
7387 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7388 			       char *glob, char *cmd, char *param, int enable)
7389 {
7390 	struct ftrace_probe_ops *ops;
7391 	void *count = (void *)-1;
7392 	char *number;
7393 	int ret;
7394 
7395 	if (!tr)
7396 		return -ENODEV;
7397 
7398 	/* hash funcs only work with set_ftrace_filter */
7399 	if (!enable)
7400 		return -EINVAL;
7401 
7402 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7403 
7404 	if (glob[0] == '!')
7405 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7406 
7407 	if (!param)
7408 		goto out_reg;
7409 
7410 	number = strsep(&param, ":");
7411 
7412 	if (!strlen(number))
7413 		goto out_reg;
7414 
7415 	/*
7416 	 * We use the callback data field (which is a pointer)
7417 	 * as our counter.
7418 	 */
7419 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7420 	if (ret)
7421 		return ret;
7422 
7423  out_reg:
7424 	ret = tracing_alloc_snapshot_instance(tr);
7425 	if (ret < 0)
7426 		goto out;
7427 
7428 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7429 
7430  out:
7431 	return ret < 0 ? ret : 0;
7432 }
7433 
7434 static struct ftrace_func_command ftrace_snapshot_cmd = {
7435 	.name			= "snapshot",
7436 	.func			= ftrace_trace_snapshot_callback,
7437 };
7438 
7439 static __init int register_snapshot_cmd(void)
7440 {
7441 	return register_ftrace_command(&ftrace_snapshot_cmd);
7442 }
7443 #else
7444 static inline __init int register_snapshot_cmd(void) { return 0; }
7445 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7446 
7447 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7448 {
7449 	if (WARN_ON(!tr->dir))
7450 		return ERR_PTR(-ENODEV);
7451 
7452 	/* Top directory uses NULL as the parent */
7453 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7454 		return NULL;
7455 
7456 	/* All sub buffers have a descriptor */
7457 	return tr->dir;
7458 }
7459 
7460 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7461 {
7462 	struct dentry *d_tracer;
7463 
7464 	if (tr->percpu_dir)
7465 		return tr->percpu_dir;
7466 
7467 	d_tracer = tracing_get_dentry(tr);
7468 	if (IS_ERR(d_tracer))
7469 		return NULL;
7470 
7471 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7472 
7473 	WARN_ONCE(!tr->percpu_dir,
7474 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7475 
7476 	return tr->percpu_dir;
7477 }
7478 
7479 static struct dentry *
7480 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7481 		      void *data, long cpu, const struct file_operations *fops)
7482 {
7483 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7484 
7485 	if (ret) /* See tracing_get_cpu() */
7486 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
7487 	return ret;
7488 }
7489 
7490 static void
7491 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7492 {
7493 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7494 	struct dentry *d_cpu;
7495 	char cpu_dir[30]; /* 30 characters should be more than enough */
7496 
7497 	if (!d_percpu)
7498 		return;
7499 
7500 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
7501 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7502 	if (!d_cpu) {
7503 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7504 		return;
7505 	}
7506 
7507 	/* per cpu trace_pipe */
7508 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7509 				tr, cpu, &tracing_pipe_fops);
7510 
7511 	/* per cpu trace */
7512 	trace_create_cpu_file("trace", 0644, d_cpu,
7513 				tr, cpu, &tracing_fops);
7514 
7515 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7516 				tr, cpu, &tracing_buffers_fops);
7517 
7518 	trace_create_cpu_file("stats", 0444, d_cpu,
7519 				tr, cpu, &tracing_stats_fops);
7520 
7521 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7522 				tr, cpu, &tracing_entries_fops);
7523 
7524 #ifdef CONFIG_TRACER_SNAPSHOT
7525 	trace_create_cpu_file("snapshot", 0644, d_cpu,
7526 				tr, cpu, &snapshot_fops);
7527 
7528 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7529 				tr, cpu, &snapshot_raw_fops);
7530 #endif
7531 }
7532 
7533 #ifdef CONFIG_FTRACE_SELFTEST
7534 /* Let selftest have access to static functions in this file */
7535 #include "trace_selftest.c"
7536 #endif
7537 
7538 static ssize_t
7539 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7540 			loff_t *ppos)
7541 {
7542 	struct trace_option_dentry *topt = filp->private_data;
7543 	char *buf;
7544 
7545 	if (topt->flags->val & topt->opt->bit)
7546 		buf = "1\n";
7547 	else
7548 		buf = "0\n";
7549 
7550 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7551 }
7552 
7553 static ssize_t
7554 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7555 			 loff_t *ppos)
7556 {
7557 	struct trace_option_dentry *topt = filp->private_data;
7558 	unsigned long val;
7559 	int ret;
7560 
7561 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7562 	if (ret)
7563 		return ret;
7564 
7565 	if (val != 0 && val != 1)
7566 		return -EINVAL;
7567 
7568 	if (!!(topt->flags->val & topt->opt->bit) != val) {
7569 		mutex_lock(&trace_types_lock);
7570 		ret = __set_tracer_option(topt->tr, topt->flags,
7571 					  topt->opt, !val);
7572 		mutex_unlock(&trace_types_lock);
7573 		if (ret)
7574 			return ret;
7575 	}
7576 
7577 	*ppos += cnt;
7578 
7579 	return cnt;
7580 }
7581 
7582 
7583 static const struct file_operations trace_options_fops = {
7584 	.open = tracing_open_generic,
7585 	.read = trace_options_read,
7586 	.write = trace_options_write,
7587 	.llseek	= generic_file_llseek,
7588 };
7589 
7590 /*
7591  * In order to pass in both the trace_array descriptor as well as the index
7592  * to the flag that the trace option file represents, the trace_array
7593  * has a character array of trace_flags_index[], which holds the index
7594  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7595  * The address of this character array is passed to the flag option file
7596  * read/write callbacks.
7597  *
7598  * In order to extract both the index and the trace_array descriptor,
7599  * get_tr_index() uses the following algorithm.
7600  *
7601  *   idx = *ptr;
7602  *
7603  * As the pointer itself contains the address of the index (remember
7604  * index[1] == 1).
7605  *
7606  * Then to get the trace_array descriptor, by subtracting that index
7607  * from the ptr, we get to the start of the index itself.
7608  *
7609  *   ptr - idx == &index[0]
7610  *
7611  * Then a simple container_of() from that pointer gets us to the
7612  * trace_array descriptor.
7613  */
7614 static void get_tr_index(void *data, struct trace_array **ptr,
7615 			 unsigned int *pindex)
7616 {
7617 	*pindex = *(unsigned char *)data;
7618 
7619 	*ptr = container_of(data - *pindex, struct trace_array,
7620 			    trace_flags_index);
7621 }
7622 
7623 static ssize_t
7624 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7625 			loff_t *ppos)
7626 {
7627 	void *tr_index = filp->private_data;
7628 	struct trace_array *tr;
7629 	unsigned int index;
7630 	char *buf;
7631 
7632 	get_tr_index(tr_index, &tr, &index);
7633 
7634 	if (tr->trace_flags & (1 << index))
7635 		buf = "1\n";
7636 	else
7637 		buf = "0\n";
7638 
7639 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7640 }
7641 
7642 static ssize_t
7643 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7644 			 loff_t *ppos)
7645 {
7646 	void *tr_index = filp->private_data;
7647 	struct trace_array *tr;
7648 	unsigned int index;
7649 	unsigned long val;
7650 	int ret;
7651 
7652 	get_tr_index(tr_index, &tr, &index);
7653 
7654 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7655 	if (ret)
7656 		return ret;
7657 
7658 	if (val != 0 && val != 1)
7659 		return -EINVAL;
7660 
7661 	mutex_lock(&trace_types_lock);
7662 	ret = set_tracer_flag(tr, 1 << index, val);
7663 	mutex_unlock(&trace_types_lock);
7664 
7665 	if (ret < 0)
7666 		return ret;
7667 
7668 	*ppos += cnt;
7669 
7670 	return cnt;
7671 }
7672 
7673 static const struct file_operations trace_options_core_fops = {
7674 	.open = tracing_open_generic,
7675 	.read = trace_options_core_read,
7676 	.write = trace_options_core_write,
7677 	.llseek = generic_file_llseek,
7678 };
7679 
7680 struct dentry *trace_create_file(const char *name,
7681 				 umode_t mode,
7682 				 struct dentry *parent,
7683 				 void *data,
7684 				 const struct file_operations *fops)
7685 {
7686 	struct dentry *ret;
7687 
7688 	ret = tracefs_create_file(name, mode, parent, data, fops);
7689 	if (!ret)
7690 		pr_warn("Could not create tracefs '%s' entry\n", name);
7691 
7692 	return ret;
7693 }
7694 
7695 
7696 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7697 {
7698 	struct dentry *d_tracer;
7699 
7700 	if (tr->options)
7701 		return tr->options;
7702 
7703 	d_tracer = tracing_get_dentry(tr);
7704 	if (IS_ERR(d_tracer))
7705 		return NULL;
7706 
7707 	tr->options = tracefs_create_dir("options", d_tracer);
7708 	if (!tr->options) {
7709 		pr_warn("Could not create tracefs directory 'options'\n");
7710 		return NULL;
7711 	}
7712 
7713 	return tr->options;
7714 }
7715 
7716 static void
7717 create_trace_option_file(struct trace_array *tr,
7718 			 struct trace_option_dentry *topt,
7719 			 struct tracer_flags *flags,
7720 			 struct tracer_opt *opt)
7721 {
7722 	struct dentry *t_options;
7723 
7724 	t_options = trace_options_init_dentry(tr);
7725 	if (!t_options)
7726 		return;
7727 
7728 	topt->flags = flags;
7729 	topt->opt = opt;
7730 	topt->tr = tr;
7731 
7732 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7733 				    &trace_options_fops);
7734 
7735 }
7736 
7737 static void
7738 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7739 {
7740 	struct trace_option_dentry *topts;
7741 	struct trace_options *tr_topts;
7742 	struct tracer_flags *flags;
7743 	struct tracer_opt *opts;
7744 	int cnt;
7745 	int i;
7746 
7747 	if (!tracer)
7748 		return;
7749 
7750 	flags = tracer->flags;
7751 
7752 	if (!flags || !flags->opts)
7753 		return;
7754 
7755 	/*
7756 	 * If this is an instance, only create flags for tracers
7757 	 * the instance may have.
7758 	 */
7759 	if (!trace_ok_for_array(tracer, tr))
7760 		return;
7761 
7762 	for (i = 0; i < tr->nr_topts; i++) {
7763 		/* Make sure there's no duplicate flags. */
7764 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7765 			return;
7766 	}
7767 
7768 	opts = flags->opts;
7769 
7770 	for (cnt = 0; opts[cnt].name; cnt++)
7771 		;
7772 
7773 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7774 	if (!topts)
7775 		return;
7776 
7777 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7778 			    GFP_KERNEL);
7779 	if (!tr_topts) {
7780 		kfree(topts);
7781 		return;
7782 	}
7783 
7784 	tr->topts = tr_topts;
7785 	tr->topts[tr->nr_topts].tracer = tracer;
7786 	tr->topts[tr->nr_topts].topts = topts;
7787 	tr->nr_topts++;
7788 
7789 	for (cnt = 0; opts[cnt].name; cnt++) {
7790 		create_trace_option_file(tr, &topts[cnt], flags,
7791 					 &opts[cnt]);
7792 		WARN_ONCE(topts[cnt].entry == NULL,
7793 			  "Failed to create trace option: %s",
7794 			  opts[cnt].name);
7795 	}
7796 }
7797 
7798 static struct dentry *
7799 create_trace_option_core_file(struct trace_array *tr,
7800 			      const char *option, long index)
7801 {
7802 	struct dentry *t_options;
7803 
7804 	t_options = trace_options_init_dentry(tr);
7805 	if (!t_options)
7806 		return NULL;
7807 
7808 	return trace_create_file(option, 0644, t_options,
7809 				 (void *)&tr->trace_flags_index[index],
7810 				 &trace_options_core_fops);
7811 }
7812 
7813 static void create_trace_options_dir(struct trace_array *tr)
7814 {
7815 	struct dentry *t_options;
7816 	bool top_level = tr == &global_trace;
7817 	int i;
7818 
7819 	t_options = trace_options_init_dentry(tr);
7820 	if (!t_options)
7821 		return;
7822 
7823 	for (i = 0; trace_options[i]; i++) {
7824 		if (top_level ||
7825 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7826 			create_trace_option_core_file(tr, trace_options[i], i);
7827 	}
7828 }
7829 
7830 static ssize_t
7831 rb_simple_read(struct file *filp, char __user *ubuf,
7832 	       size_t cnt, loff_t *ppos)
7833 {
7834 	struct trace_array *tr = filp->private_data;
7835 	char buf[64];
7836 	int r;
7837 
7838 	r = tracer_tracing_is_on(tr);
7839 	r = sprintf(buf, "%d\n", r);
7840 
7841 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7842 }
7843 
7844 static ssize_t
7845 rb_simple_write(struct file *filp, const char __user *ubuf,
7846 		size_t cnt, loff_t *ppos)
7847 {
7848 	struct trace_array *tr = filp->private_data;
7849 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
7850 	unsigned long val;
7851 	int ret;
7852 
7853 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7854 	if (ret)
7855 		return ret;
7856 
7857 	if (buffer) {
7858 		mutex_lock(&trace_types_lock);
7859 		if (!!val == tracer_tracing_is_on(tr)) {
7860 			val = 0; /* do nothing */
7861 		} else if (val) {
7862 			tracer_tracing_on(tr);
7863 			if (tr->current_trace->start)
7864 				tr->current_trace->start(tr);
7865 		} else {
7866 			tracer_tracing_off(tr);
7867 			if (tr->current_trace->stop)
7868 				tr->current_trace->stop(tr);
7869 		}
7870 		mutex_unlock(&trace_types_lock);
7871 	}
7872 
7873 	(*ppos)++;
7874 
7875 	return cnt;
7876 }
7877 
7878 static const struct file_operations rb_simple_fops = {
7879 	.open		= tracing_open_generic_tr,
7880 	.read		= rb_simple_read,
7881 	.write		= rb_simple_write,
7882 	.release	= tracing_release_generic_tr,
7883 	.llseek		= default_llseek,
7884 };
7885 
7886 static ssize_t
7887 buffer_percent_read(struct file *filp, char __user *ubuf,
7888 		    size_t cnt, loff_t *ppos)
7889 {
7890 	struct trace_array *tr = filp->private_data;
7891 	char buf[64];
7892 	int r;
7893 
7894 	r = tr->buffer_percent;
7895 	r = sprintf(buf, "%d\n", r);
7896 
7897 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7898 }
7899 
7900 static ssize_t
7901 buffer_percent_write(struct file *filp, const char __user *ubuf,
7902 		     size_t cnt, loff_t *ppos)
7903 {
7904 	struct trace_array *tr = filp->private_data;
7905 	unsigned long val;
7906 	int ret;
7907 
7908 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7909 	if (ret)
7910 		return ret;
7911 
7912 	if (val > 100)
7913 		return -EINVAL;
7914 
7915 	if (!val)
7916 		val = 1;
7917 
7918 	tr->buffer_percent = val;
7919 
7920 	(*ppos)++;
7921 
7922 	return cnt;
7923 }
7924 
7925 static const struct file_operations buffer_percent_fops = {
7926 	.open		= tracing_open_generic_tr,
7927 	.read		= buffer_percent_read,
7928 	.write		= buffer_percent_write,
7929 	.release	= tracing_release_generic_tr,
7930 	.llseek		= default_llseek,
7931 };
7932 
7933 struct dentry *trace_instance_dir;
7934 
7935 static void
7936 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7937 
7938 static int
7939 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7940 {
7941 	enum ring_buffer_flags rb_flags;
7942 
7943 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7944 
7945 	buf->tr = tr;
7946 
7947 	buf->buffer = ring_buffer_alloc(size, rb_flags);
7948 	if (!buf->buffer)
7949 		return -ENOMEM;
7950 
7951 	buf->data = alloc_percpu(struct trace_array_cpu);
7952 	if (!buf->data) {
7953 		ring_buffer_free(buf->buffer);
7954 		buf->buffer = NULL;
7955 		return -ENOMEM;
7956 	}
7957 
7958 	/* Allocate the first page for all buffers */
7959 	set_buffer_entries(&tr->trace_buffer,
7960 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
7961 
7962 	return 0;
7963 }
7964 
7965 static int allocate_trace_buffers(struct trace_array *tr, int size)
7966 {
7967 	int ret;
7968 
7969 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7970 	if (ret)
7971 		return ret;
7972 
7973 #ifdef CONFIG_TRACER_MAX_TRACE
7974 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
7975 				    allocate_snapshot ? size : 1);
7976 	if (WARN_ON(ret)) {
7977 		ring_buffer_free(tr->trace_buffer.buffer);
7978 		tr->trace_buffer.buffer = NULL;
7979 		free_percpu(tr->trace_buffer.data);
7980 		tr->trace_buffer.data = NULL;
7981 		return -ENOMEM;
7982 	}
7983 	tr->allocated_snapshot = allocate_snapshot;
7984 
7985 	/*
7986 	 * Only the top level trace array gets its snapshot allocated
7987 	 * from the kernel command line.
7988 	 */
7989 	allocate_snapshot = false;
7990 #endif
7991 	return 0;
7992 }
7993 
7994 static void free_trace_buffer(struct trace_buffer *buf)
7995 {
7996 	if (buf->buffer) {
7997 		ring_buffer_free(buf->buffer);
7998 		buf->buffer = NULL;
7999 		free_percpu(buf->data);
8000 		buf->data = NULL;
8001 	}
8002 }
8003 
8004 static void free_trace_buffers(struct trace_array *tr)
8005 {
8006 	if (!tr)
8007 		return;
8008 
8009 	free_trace_buffer(&tr->trace_buffer);
8010 
8011 #ifdef CONFIG_TRACER_MAX_TRACE
8012 	free_trace_buffer(&tr->max_buffer);
8013 #endif
8014 }
8015 
8016 static void init_trace_flags_index(struct trace_array *tr)
8017 {
8018 	int i;
8019 
8020 	/* Used by the trace options files */
8021 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8022 		tr->trace_flags_index[i] = i;
8023 }
8024 
8025 static void __update_tracer_options(struct trace_array *tr)
8026 {
8027 	struct tracer *t;
8028 
8029 	for (t = trace_types; t; t = t->next)
8030 		add_tracer_options(tr, t);
8031 }
8032 
8033 static void update_tracer_options(struct trace_array *tr)
8034 {
8035 	mutex_lock(&trace_types_lock);
8036 	__update_tracer_options(tr);
8037 	mutex_unlock(&trace_types_lock);
8038 }
8039 
8040 static int instance_mkdir(const char *name)
8041 {
8042 	struct trace_array *tr;
8043 	int ret;
8044 
8045 	mutex_lock(&event_mutex);
8046 	mutex_lock(&trace_types_lock);
8047 
8048 	ret = -EEXIST;
8049 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8050 		if (tr->name && strcmp(tr->name, name) == 0)
8051 			goto out_unlock;
8052 	}
8053 
8054 	ret = -ENOMEM;
8055 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8056 	if (!tr)
8057 		goto out_unlock;
8058 
8059 	tr->name = kstrdup(name, GFP_KERNEL);
8060 	if (!tr->name)
8061 		goto out_free_tr;
8062 
8063 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8064 		goto out_free_tr;
8065 
8066 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8067 
8068 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8069 
8070 	raw_spin_lock_init(&tr->start_lock);
8071 
8072 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8073 
8074 	tr->current_trace = &nop_trace;
8075 
8076 	INIT_LIST_HEAD(&tr->systems);
8077 	INIT_LIST_HEAD(&tr->events);
8078 	INIT_LIST_HEAD(&tr->hist_vars);
8079 
8080 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8081 		goto out_free_tr;
8082 
8083 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
8084 	if (!tr->dir)
8085 		goto out_free_tr;
8086 
8087 	ret = event_trace_add_tracer(tr->dir, tr);
8088 	if (ret) {
8089 		tracefs_remove_recursive(tr->dir);
8090 		goto out_free_tr;
8091 	}
8092 
8093 	ftrace_init_trace_array(tr);
8094 
8095 	init_tracer_tracefs(tr, tr->dir);
8096 	init_trace_flags_index(tr);
8097 	__update_tracer_options(tr);
8098 
8099 	list_add(&tr->list, &ftrace_trace_arrays);
8100 
8101 	mutex_unlock(&trace_types_lock);
8102 	mutex_unlock(&event_mutex);
8103 
8104 	return 0;
8105 
8106  out_free_tr:
8107 	free_trace_buffers(tr);
8108 	free_cpumask_var(tr->tracing_cpumask);
8109 	kfree(tr->name);
8110 	kfree(tr);
8111 
8112  out_unlock:
8113 	mutex_unlock(&trace_types_lock);
8114 	mutex_unlock(&event_mutex);
8115 
8116 	return ret;
8117 
8118 }
8119 
8120 static int instance_rmdir(const char *name)
8121 {
8122 	struct trace_array *tr;
8123 	int found = 0;
8124 	int ret;
8125 	int i;
8126 
8127 	mutex_lock(&event_mutex);
8128 	mutex_lock(&trace_types_lock);
8129 
8130 	ret = -ENODEV;
8131 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8132 		if (tr->name && strcmp(tr->name, name) == 0) {
8133 			found = 1;
8134 			break;
8135 		}
8136 	}
8137 	if (!found)
8138 		goto out_unlock;
8139 
8140 	ret = -EBUSY;
8141 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
8142 		goto out_unlock;
8143 
8144 	list_del(&tr->list);
8145 
8146 	/* Disable all the flags that were enabled coming in */
8147 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8148 		if ((1 << i) & ZEROED_TRACE_FLAGS)
8149 			set_tracer_flag(tr, 1 << i, 0);
8150 	}
8151 
8152 	tracing_set_nop(tr);
8153 	clear_ftrace_function_probes(tr);
8154 	event_trace_del_tracer(tr);
8155 	ftrace_clear_pids(tr);
8156 	ftrace_destroy_function_files(tr);
8157 	tracefs_remove_recursive(tr->dir);
8158 	free_trace_buffers(tr);
8159 
8160 	for (i = 0; i < tr->nr_topts; i++) {
8161 		kfree(tr->topts[i].topts);
8162 	}
8163 	kfree(tr->topts);
8164 
8165 	free_cpumask_var(tr->tracing_cpumask);
8166 	kfree(tr->name);
8167 	kfree(tr);
8168 
8169 	ret = 0;
8170 
8171  out_unlock:
8172 	mutex_unlock(&trace_types_lock);
8173 	mutex_unlock(&event_mutex);
8174 
8175 	return ret;
8176 }
8177 
8178 static __init void create_trace_instances(struct dentry *d_tracer)
8179 {
8180 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8181 							 instance_mkdir,
8182 							 instance_rmdir);
8183 	if (WARN_ON(!trace_instance_dir))
8184 		return;
8185 }
8186 
8187 static void
8188 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8189 {
8190 	struct trace_event_file *file;
8191 	int cpu;
8192 
8193 	trace_create_file("available_tracers", 0444, d_tracer,
8194 			tr, &show_traces_fops);
8195 
8196 	trace_create_file("current_tracer", 0644, d_tracer,
8197 			tr, &set_tracer_fops);
8198 
8199 	trace_create_file("tracing_cpumask", 0644, d_tracer,
8200 			  tr, &tracing_cpumask_fops);
8201 
8202 	trace_create_file("trace_options", 0644, d_tracer,
8203 			  tr, &tracing_iter_fops);
8204 
8205 	trace_create_file("trace", 0644, d_tracer,
8206 			  tr, &tracing_fops);
8207 
8208 	trace_create_file("trace_pipe", 0444, d_tracer,
8209 			  tr, &tracing_pipe_fops);
8210 
8211 	trace_create_file("buffer_size_kb", 0644, d_tracer,
8212 			  tr, &tracing_entries_fops);
8213 
8214 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8215 			  tr, &tracing_total_entries_fops);
8216 
8217 	trace_create_file("free_buffer", 0200, d_tracer,
8218 			  tr, &tracing_free_buffer_fops);
8219 
8220 	trace_create_file("trace_marker", 0220, d_tracer,
8221 			  tr, &tracing_mark_fops);
8222 
8223 	file = __find_event_file(tr, "ftrace", "print");
8224 	if (file && file->dir)
8225 		trace_create_file("trigger", 0644, file->dir, file,
8226 				  &event_trigger_fops);
8227 	tr->trace_marker_file = file;
8228 
8229 	trace_create_file("trace_marker_raw", 0220, d_tracer,
8230 			  tr, &tracing_mark_raw_fops);
8231 
8232 	trace_create_file("trace_clock", 0644, d_tracer, tr,
8233 			  &trace_clock_fops);
8234 
8235 	trace_create_file("tracing_on", 0644, d_tracer,
8236 			  tr, &rb_simple_fops);
8237 
8238 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8239 			  &trace_time_stamp_mode_fops);
8240 
8241 	tr->buffer_percent = 50;
8242 
8243 	trace_create_file("buffer_percent", 0444, d_tracer,
8244 			tr, &buffer_percent_fops);
8245 
8246 	create_trace_options_dir(tr);
8247 
8248 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8249 	trace_create_file("tracing_max_latency", 0644, d_tracer,
8250 			&tr->max_latency, &tracing_max_lat_fops);
8251 #endif
8252 
8253 	if (ftrace_create_function_files(tr, d_tracer))
8254 		WARN(1, "Could not allocate function filter files");
8255 
8256 #ifdef CONFIG_TRACER_SNAPSHOT
8257 	trace_create_file("snapshot", 0644, d_tracer,
8258 			  tr, &snapshot_fops);
8259 #endif
8260 
8261 	for_each_tracing_cpu(cpu)
8262 		tracing_init_tracefs_percpu(tr, cpu);
8263 
8264 	ftrace_init_tracefs(tr, d_tracer);
8265 }
8266 
8267 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8268 {
8269 	struct vfsmount *mnt;
8270 	struct file_system_type *type;
8271 
8272 	/*
8273 	 * To maintain backward compatibility for tools that mount
8274 	 * debugfs to get to the tracing facility, tracefs is automatically
8275 	 * mounted to the debugfs/tracing directory.
8276 	 */
8277 	type = get_fs_type("tracefs");
8278 	if (!type)
8279 		return NULL;
8280 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8281 	put_filesystem(type);
8282 	if (IS_ERR(mnt))
8283 		return NULL;
8284 	mntget(mnt);
8285 
8286 	return mnt;
8287 }
8288 
8289 /**
8290  * tracing_init_dentry - initialize top level trace array
8291  *
8292  * This is called when creating files or directories in the tracing
8293  * directory. It is called via fs_initcall() by any of the boot up code
8294  * and expects to return the dentry of the top level tracing directory.
8295  */
8296 struct dentry *tracing_init_dentry(void)
8297 {
8298 	struct trace_array *tr = &global_trace;
8299 
8300 	/* The top level trace array uses  NULL as parent */
8301 	if (tr->dir)
8302 		return NULL;
8303 
8304 	if (WARN_ON(!tracefs_initialized()) ||
8305 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
8306 		 WARN_ON(!debugfs_initialized())))
8307 		return ERR_PTR(-ENODEV);
8308 
8309 	/*
8310 	 * As there may still be users that expect the tracing
8311 	 * files to exist in debugfs/tracing, we must automount
8312 	 * the tracefs file system there, so older tools still
8313 	 * work with the newer kerenl.
8314 	 */
8315 	tr->dir = debugfs_create_automount("tracing", NULL,
8316 					   trace_automount, NULL);
8317 	if (!tr->dir) {
8318 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
8319 		return ERR_PTR(-ENOMEM);
8320 	}
8321 
8322 	return NULL;
8323 }
8324 
8325 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8326 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8327 
8328 static void __init trace_eval_init(void)
8329 {
8330 	int len;
8331 
8332 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8333 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8334 }
8335 
8336 #ifdef CONFIG_MODULES
8337 static void trace_module_add_evals(struct module *mod)
8338 {
8339 	if (!mod->num_trace_evals)
8340 		return;
8341 
8342 	/*
8343 	 * Modules with bad taint do not have events created, do
8344 	 * not bother with enums either.
8345 	 */
8346 	if (trace_module_has_bad_taint(mod))
8347 		return;
8348 
8349 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8350 }
8351 
8352 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8353 static void trace_module_remove_evals(struct module *mod)
8354 {
8355 	union trace_eval_map_item *map;
8356 	union trace_eval_map_item **last = &trace_eval_maps;
8357 
8358 	if (!mod->num_trace_evals)
8359 		return;
8360 
8361 	mutex_lock(&trace_eval_mutex);
8362 
8363 	map = trace_eval_maps;
8364 
8365 	while (map) {
8366 		if (map->head.mod == mod)
8367 			break;
8368 		map = trace_eval_jmp_to_tail(map);
8369 		last = &map->tail.next;
8370 		map = map->tail.next;
8371 	}
8372 	if (!map)
8373 		goto out;
8374 
8375 	*last = trace_eval_jmp_to_tail(map)->tail.next;
8376 	kfree(map);
8377  out:
8378 	mutex_unlock(&trace_eval_mutex);
8379 }
8380 #else
8381 static inline void trace_module_remove_evals(struct module *mod) { }
8382 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8383 
8384 static int trace_module_notify(struct notifier_block *self,
8385 			       unsigned long val, void *data)
8386 {
8387 	struct module *mod = data;
8388 
8389 	switch (val) {
8390 	case MODULE_STATE_COMING:
8391 		trace_module_add_evals(mod);
8392 		break;
8393 	case MODULE_STATE_GOING:
8394 		trace_module_remove_evals(mod);
8395 		break;
8396 	}
8397 
8398 	return 0;
8399 }
8400 
8401 static struct notifier_block trace_module_nb = {
8402 	.notifier_call = trace_module_notify,
8403 	.priority = 0,
8404 };
8405 #endif /* CONFIG_MODULES */
8406 
8407 static __init int tracer_init_tracefs(void)
8408 {
8409 	struct dentry *d_tracer;
8410 
8411 	trace_access_lock_init();
8412 
8413 	d_tracer = tracing_init_dentry();
8414 	if (IS_ERR(d_tracer))
8415 		return 0;
8416 
8417 	event_trace_init();
8418 
8419 	init_tracer_tracefs(&global_trace, d_tracer);
8420 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8421 
8422 	trace_create_file("tracing_thresh", 0644, d_tracer,
8423 			&global_trace, &tracing_thresh_fops);
8424 
8425 	trace_create_file("README", 0444, d_tracer,
8426 			NULL, &tracing_readme_fops);
8427 
8428 	trace_create_file("saved_cmdlines", 0444, d_tracer,
8429 			NULL, &tracing_saved_cmdlines_fops);
8430 
8431 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8432 			  NULL, &tracing_saved_cmdlines_size_fops);
8433 
8434 	trace_create_file("saved_tgids", 0444, d_tracer,
8435 			NULL, &tracing_saved_tgids_fops);
8436 
8437 	trace_eval_init();
8438 
8439 	trace_create_eval_file(d_tracer);
8440 
8441 #ifdef CONFIG_MODULES
8442 	register_module_notifier(&trace_module_nb);
8443 #endif
8444 
8445 #ifdef CONFIG_DYNAMIC_FTRACE
8446 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8447 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8448 #endif
8449 
8450 	create_trace_instances(d_tracer);
8451 
8452 	update_tracer_options(&global_trace);
8453 
8454 	return 0;
8455 }
8456 
8457 static int trace_panic_handler(struct notifier_block *this,
8458 			       unsigned long event, void *unused)
8459 {
8460 	if (ftrace_dump_on_oops)
8461 		ftrace_dump(ftrace_dump_on_oops);
8462 	return NOTIFY_OK;
8463 }
8464 
8465 static struct notifier_block trace_panic_notifier = {
8466 	.notifier_call  = trace_panic_handler,
8467 	.next           = NULL,
8468 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
8469 };
8470 
8471 static int trace_die_handler(struct notifier_block *self,
8472 			     unsigned long val,
8473 			     void *data)
8474 {
8475 	switch (val) {
8476 	case DIE_OOPS:
8477 		if (ftrace_dump_on_oops)
8478 			ftrace_dump(ftrace_dump_on_oops);
8479 		break;
8480 	default:
8481 		break;
8482 	}
8483 	return NOTIFY_OK;
8484 }
8485 
8486 static struct notifier_block trace_die_notifier = {
8487 	.notifier_call = trace_die_handler,
8488 	.priority = 200
8489 };
8490 
8491 /*
8492  * printk is set to max of 1024, we really don't need it that big.
8493  * Nothing should be printing 1000 characters anyway.
8494  */
8495 #define TRACE_MAX_PRINT		1000
8496 
8497 /*
8498  * Define here KERN_TRACE so that we have one place to modify
8499  * it if we decide to change what log level the ftrace dump
8500  * should be at.
8501  */
8502 #define KERN_TRACE		KERN_EMERG
8503 
8504 void
8505 trace_printk_seq(struct trace_seq *s)
8506 {
8507 	/* Probably should print a warning here. */
8508 	if (s->seq.len >= TRACE_MAX_PRINT)
8509 		s->seq.len = TRACE_MAX_PRINT;
8510 
8511 	/*
8512 	 * More paranoid code. Although the buffer size is set to
8513 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8514 	 * an extra layer of protection.
8515 	 */
8516 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8517 		s->seq.len = s->seq.size - 1;
8518 
8519 	/* should be zero ended, but we are paranoid. */
8520 	s->buffer[s->seq.len] = 0;
8521 
8522 	printk(KERN_TRACE "%s", s->buffer);
8523 
8524 	trace_seq_init(s);
8525 }
8526 
8527 void trace_init_global_iter(struct trace_iterator *iter)
8528 {
8529 	iter->tr = &global_trace;
8530 	iter->trace = iter->tr->current_trace;
8531 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
8532 	iter->trace_buffer = &global_trace.trace_buffer;
8533 
8534 	if (iter->trace && iter->trace->open)
8535 		iter->trace->open(iter);
8536 
8537 	/* Annotate start of buffers if we had overruns */
8538 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
8539 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
8540 
8541 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
8542 	if (trace_clocks[iter->tr->clock_id].in_ns)
8543 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8544 }
8545 
8546 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8547 {
8548 	/* use static because iter can be a bit big for the stack */
8549 	static struct trace_iterator iter;
8550 	static atomic_t dump_running;
8551 	struct trace_array *tr = &global_trace;
8552 	unsigned int old_userobj;
8553 	unsigned long flags;
8554 	int cnt = 0, cpu;
8555 
8556 	/* Only allow one dump user at a time. */
8557 	if (atomic_inc_return(&dump_running) != 1) {
8558 		atomic_dec(&dump_running);
8559 		return;
8560 	}
8561 
8562 	/*
8563 	 * Always turn off tracing when we dump.
8564 	 * We don't need to show trace output of what happens
8565 	 * between multiple crashes.
8566 	 *
8567 	 * If the user does a sysrq-z, then they can re-enable
8568 	 * tracing with echo 1 > tracing_on.
8569 	 */
8570 	tracing_off();
8571 
8572 	local_irq_save(flags);
8573 	printk_nmi_direct_enter();
8574 
8575 	/* Simulate the iterator */
8576 	trace_init_global_iter(&iter);
8577 
8578 	for_each_tracing_cpu(cpu) {
8579 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8580 	}
8581 
8582 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8583 
8584 	/* don't look at user memory in panic mode */
8585 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8586 
8587 	switch (oops_dump_mode) {
8588 	case DUMP_ALL:
8589 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8590 		break;
8591 	case DUMP_ORIG:
8592 		iter.cpu_file = raw_smp_processor_id();
8593 		break;
8594 	case DUMP_NONE:
8595 		goto out_enable;
8596 	default:
8597 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8598 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8599 	}
8600 
8601 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
8602 
8603 	/* Did function tracer already get disabled? */
8604 	if (ftrace_is_dead()) {
8605 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8606 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8607 	}
8608 
8609 	/*
8610 	 * We need to stop all tracing on all CPUS to read the
8611 	 * the next buffer. This is a bit expensive, but is
8612 	 * not done often. We fill all what we can read,
8613 	 * and then release the locks again.
8614 	 */
8615 
8616 	while (!trace_empty(&iter)) {
8617 
8618 		if (!cnt)
8619 			printk(KERN_TRACE "---------------------------------\n");
8620 
8621 		cnt++;
8622 
8623 		/* reset all but tr, trace, and overruns */
8624 		memset(&iter.seq, 0,
8625 		       sizeof(struct trace_iterator) -
8626 		       offsetof(struct trace_iterator, seq));
8627 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
8628 		iter.pos = -1;
8629 
8630 		if (trace_find_next_entry_inc(&iter) != NULL) {
8631 			int ret;
8632 
8633 			ret = print_trace_line(&iter);
8634 			if (ret != TRACE_TYPE_NO_CONSUME)
8635 				trace_consume(&iter);
8636 		}
8637 		touch_nmi_watchdog();
8638 
8639 		trace_printk_seq(&iter.seq);
8640 	}
8641 
8642 	if (!cnt)
8643 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
8644 	else
8645 		printk(KERN_TRACE "---------------------------------\n");
8646 
8647  out_enable:
8648 	tr->trace_flags |= old_userobj;
8649 
8650 	for_each_tracing_cpu(cpu) {
8651 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8652 	}
8653 	atomic_dec(&dump_running);
8654 	printk_nmi_direct_exit();
8655 	local_irq_restore(flags);
8656 }
8657 EXPORT_SYMBOL_GPL(ftrace_dump);
8658 
8659 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8660 {
8661 	char **argv;
8662 	int argc, ret;
8663 
8664 	argc = 0;
8665 	ret = 0;
8666 	argv = argv_split(GFP_KERNEL, buf, &argc);
8667 	if (!argv)
8668 		return -ENOMEM;
8669 
8670 	if (argc)
8671 		ret = createfn(argc, argv);
8672 
8673 	argv_free(argv);
8674 
8675 	return ret;
8676 }
8677 
8678 #define WRITE_BUFSIZE  4096
8679 
8680 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8681 				size_t count, loff_t *ppos,
8682 				int (*createfn)(int, char **))
8683 {
8684 	char *kbuf, *buf, *tmp;
8685 	int ret = 0;
8686 	size_t done = 0;
8687 	size_t size;
8688 
8689 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8690 	if (!kbuf)
8691 		return -ENOMEM;
8692 
8693 	while (done < count) {
8694 		size = count - done;
8695 
8696 		if (size >= WRITE_BUFSIZE)
8697 			size = WRITE_BUFSIZE - 1;
8698 
8699 		if (copy_from_user(kbuf, buffer + done, size)) {
8700 			ret = -EFAULT;
8701 			goto out;
8702 		}
8703 		kbuf[size] = '\0';
8704 		buf = kbuf;
8705 		do {
8706 			tmp = strchr(buf, '\n');
8707 			if (tmp) {
8708 				*tmp = '\0';
8709 				size = tmp - buf + 1;
8710 			} else {
8711 				size = strlen(buf);
8712 				if (done + size < count) {
8713 					if (buf != kbuf)
8714 						break;
8715 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8716 					pr_warn("Line length is too long: Should be less than %d\n",
8717 						WRITE_BUFSIZE - 2);
8718 					ret = -EINVAL;
8719 					goto out;
8720 				}
8721 			}
8722 			done += size;
8723 
8724 			/* Remove comments */
8725 			tmp = strchr(buf, '#');
8726 
8727 			if (tmp)
8728 				*tmp = '\0';
8729 
8730 			ret = trace_run_command(buf, createfn);
8731 			if (ret)
8732 				goto out;
8733 			buf += size;
8734 
8735 		} while (done < count);
8736 	}
8737 	ret = done;
8738 
8739 out:
8740 	kfree(kbuf);
8741 
8742 	return ret;
8743 }
8744 
8745 __init static int tracer_alloc_buffers(void)
8746 {
8747 	int ring_buf_size;
8748 	int ret = -ENOMEM;
8749 
8750 	/*
8751 	 * Make sure we don't accidently add more trace options
8752 	 * than we have bits for.
8753 	 */
8754 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8755 
8756 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8757 		goto out;
8758 
8759 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8760 		goto out_free_buffer_mask;
8761 
8762 	/* Only allocate trace_printk buffers if a trace_printk exists */
8763 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8764 		/* Must be called before global_trace.buffer is allocated */
8765 		trace_printk_init_buffers();
8766 
8767 	/* To save memory, keep the ring buffer size to its minimum */
8768 	if (ring_buffer_expanded)
8769 		ring_buf_size = trace_buf_size;
8770 	else
8771 		ring_buf_size = 1;
8772 
8773 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8774 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8775 
8776 	raw_spin_lock_init(&global_trace.start_lock);
8777 
8778 	/*
8779 	 * The prepare callbacks allocates some memory for the ring buffer. We
8780 	 * don't free the buffer if the if the CPU goes down. If we were to free
8781 	 * the buffer, then the user would lose any trace that was in the
8782 	 * buffer. The memory will be removed once the "instance" is removed.
8783 	 */
8784 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8785 				      "trace/RB:preapre", trace_rb_cpu_prepare,
8786 				      NULL);
8787 	if (ret < 0)
8788 		goto out_free_cpumask;
8789 	/* Used for event triggers */
8790 	ret = -ENOMEM;
8791 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8792 	if (!temp_buffer)
8793 		goto out_rm_hp_state;
8794 
8795 	if (trace_create_savedcmd() < 0)
8796 		goto out_free_temp_buffer;
8797 
8798 	/* TODO: make the number of buffers hot pluggable with CPUS */
8799 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8800 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8801 		WARN_ON(1);
8802 		goto out_free_savedcmd;
8803 	}
8804 
8805 	if (global_trace.buffer_disabled)
8806 		tracing_off();
8807 
8808 	if (trace_boot_clock) {
8809 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
8810 		if (ret < 0)
8811 			pr_warn("Trace clock %s not defined, going back to default\n",
8812 				trace_boot_clock);
8813 	}
8814 
8815 	/*
8816 	 * register_tracer() might reference current_trace, so it
8817 	 * needs to be set before we register anything. This is
8818 	 * just a bootstrap of current_trace anyway.
8819 	 */
8820 	global_trace.current_trace = &nop_trace;
8821 
8822 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8823 
8824 	ftrace_init_global_array_ops(&global_trace);
8825 
8826 	init_trace_flags_index(&global_trace);
8827 
8828 	register_tracer(&nop_trace);
8829 
8830 	/* Function tracing may start here (via kernel command line) */
8831 	init_function_trace();
8832 
8833 	/* All seems OK, enable tracing */
8834 	tracing_disabled = 0;
8835 
8836 	atomic_notifier_chain_register(&panic_notifier_list,
8837 				       &trace_panic_notifier);
8838 
8839 	register_die_notifier(&trace_die_notifier);
8840 
8841 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8842 
8843 	INIT_LIST_HEAD(&global_trace.systems);
8844 	INIT_LIST_HEAD(&global_trace.events);
8845 	INIT_LIST_HEAD(&global_trace.hist_vars);
8846 	list_add(&global_trace.list, &ftrace_trace_arrays);
8847 
8848 	apply_trace_boot_options();
8849 
8850 	register_snapshot_cmd();
8851 
8852 	return 0;
8853 
8854 out_free_savedcmd:
8855 	free_saved_cmdlines_buffer(savedcmd);
8856 out_free_temp_buffer:
8857 	ring_buffer_free(temp_buffer);
8858 out_rm_hp_state:
8859 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8860 out_free_cpumask:
8861 	free_cpumask_var(global_trace.tracing_cpumask);
8862 out_free_buffer_mask:
8863 	free_cpumask_var(tracing_buffer_mask);
8864 out:
8865 	return ret;
8866 }
8867 
8868 void __init early_trace_init(void)
8869 {
8870 	if (tracepoint_printk) {
8871 		tracepoint_print_iter =
8872 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8873 		if (WARN_ON(!tracepoint_print_iter))
8874 			tracepoint_printk = 0;
8875 		else
8876 			static_key_enable(&tracepoint_printk_key.key);
8877 	}
8878 	tracer_alloc_buffers();
8879 }
8880 
8881 void __init trace_init(void)
8882 {
8883 	trace_event_init();
8884 }
8885 
8886 __init static int clear_boot_tracer(void)
8887 {
8888 	/*
8889 	 * The default tracer at boot buffer is an init section.
8890 	 * This function is called in lateinit. If we did not
8891 	 * find the boot tracer, then clear it out, to prevent
8892 	 * later registration from accessing the buffer that is
8893 	 * about to be freed.
8894 	 */
8895 	if (!default_bootup_tracer)
8896 		return 0;
8897 
8898 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8899 	       default_bootup_tracer);
8900 	default_bootup_tracer = NULL;
8901 
8902 	return 0;
8903 }
8904 
8905 fs_initcall(tracer_init_tracefs);
8906 late_initcall_sync(clear_boot_tracer);
8907 
8908 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8909 __init static int tracing_set_default_clock(void)
8910 {
8911 	/* sched_clock_stable() is determined in late_initcall */
8912 	if (!trace_boot_clock && !sched_clock_stable()) {
8913 		printk(KERN_WARNING
8914 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
8915 		       "If you want to keep using the local clock, then add:\n"
8916 		       "  \"trace_clock=local\"\n"
8917 		       "on the kernel command line\n");
8918 		tracing_set_clock(&global_trace, "global");
8919 	}
8920 
8921 	return 0;
8922 }
8923 late_initcall_sync(tracing_set_default_clock);
8924 #endif
8925